pydna-group · manulera · Nov 7, 2025 · Nov 7, 2025 · Nov 7, 2025 · Nov 7, 2025
diff --git a/docs/notebooks/history.ipynb b/docs/notebooks/history.ipynb
@@ -186,13 +186,13 @@
       "╙── product (Dseqrecord(-18))\n",
       "    └─╼ LigationSource\n",
       "        ├─╼ c (Dseqrecord(-7))\n",
-      "        │   └─╼ Source\n",
-      "        │       └─╼ a (Dseqrecord(-18)) ╾ Source, Source\n",
+      "        │   └─╼ RestrictionEnzymeDigestionSource\n",
+      "        │       └─╼ a (Dseqrecord(-18)) ╾ RestrictionEnzymeDigestionSource, RestrictionEnzymeDigestionSource\n",
       "        ├─╼ d (Dseqrecord(-12))\n",
-      "        │   └─╼ Source\n",
+      "        │   └─╼ RestrictionEnzymeDigestionSource\n",
       "        │       └─╼  ...\n",
       "        └─╼ e (Dseqrecord(-7))\n",
-      "            └─╼ Source\n",
+      "            └─╼ RestrictionEnzymeDigestionSource\n",
       "                └─╼  ...\n"
      ]
     }
@@ -354,8 +354,8 @@
       "    └─╼ CreLoxRecombinationSource\n",
       "        └─╼ integration_product (Dseqrecord(-84))\n",
       "            └─╼ CreLoxRecombinationSource\n",
-      "                ├─╼ a (Dseqrecord(-45))\n",
-      "                └─╼ b (Dseqrecord(o39))\n"
+      "                ├─╼ genome (Dseqrecord(-45))\n",
+      "                └─╼ plasmid (Dseqrecord(o39))\n"
      ]
     }
    ],

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -60,7 +60,7 @@ scipy = [
 ]
 seguid = ">=0.0.5"
 regex = "^2024.11.6"
-opencloning-linkml = "0.4.5"
+opencloning-linkml = "^0.4.9"
 [tool.poetry.extras]
 clipboard = ["pyperclip"]
 download = ["pyparsing", "requests"]

diff --git a/src/pydna/afile.fasta b/src/pydna/afile.fasta
@@ -0,0 +1,2 @@
+>fn
+gatc
diff --git a/src/pydna/assembly2.py b/src/pydna/assembly2.py
@@ -2036,7 +2036,7 @@ def _recast_sources(
     """
     for prod in products:
         prod.source = source_cls(
-            **prod.source.model_dump(),
+            **prod.source.to_unserialized_dict(),
             **extra_fields,
         )
     return products
@@ -2805,7 +2805,8 @@ def crispr_integration(
         # The second element of product.source.input is conventionally the insert/repair fragment
         # The other two (first and third) are the two bits of the genome
         repair_start = _location_boundaries(product.source.input[0].right_location)[0]
-        repair_end = _location_boundaries(product.source.input[2].left_location)[1]
+        # Here we do +1 because the position of the cut marks the boundary (e.g. 0:10, 10:20 if a cut is at pos 10)
+        repair_end = _location_boundaries(product.source.input[2].left_location)[1] + 1
         repair_location = create_location(repair_start, repair_end, len(genome))
         some_cuts_inside_repair = []
         all_cuts_inside_repair = []

diff --git a/src/pydna/genbank.py b/src/pydna/genbank.py
@@ -12,21 +12,17 @@
 `pydna.ini` file. See the documentation of :func:`pydna.open_config_folder`"""
 
 # from pydna.utils import memorize as _memorize
+from pydna.opencloning_models import NCBISequenceSource
 from pydna.genbankrecord import GenbankRecord as _GenbankRecord
 from pydna.readers import read as _read
 
 from Bio import Entrez as _Entrez
+from Bio.SeqFeature import SimpleLocation
+
 from typing import Literal as _Literal, Optional as _Optional
 import re as _re
 import os as _os
 
-# import logging as _logging
-
-# _module_logger = _logging.getLogger("pydna." + __name__)
-
-
-# TODO http://httpbin.org/ use for testing?
-
 
 class Genbank:
     """Class to facilitate download from genbank. It is easier and
@@ -179,12 +175,29 @@ def nucleotide(
 
         # _module_logger.info("text[:160]  %s", text[:160])
 
-        return _GenbankRecord(
-            _read(text), item=item, start=seq_start, stop=seq_stop, strand=strand
+        result = _read(text)
+        # TODO: Address this for cases where only one is defined
+        if seq_start is not None and seq_stop is not None:
+            location = SimpleLocation(
+                int(seq_start) - 1, int(seq_stop), -1 if strand == 2 else strand
+            )
+        else:
+            location = None
+
+        result.source = NCBISequenceSource(
+            repository_id=item,
+            coordinates=location,
         )
+        return result
+
+        # return _GenbankRecord(
+        #     _read(text), item=item, start=seq_start, stop=seq_stop, strand=strand
+        # )
 
 
-def genbank(accession: str = "CS570233.1", *args, **kwargs) -> _GenbankRecord:
+def genbank(
+    accession: str = "CS570233.1", *args, email=None, **kwargs
+) -> _GenbankRecord:
     """
     Download a genbank nuclotide record.
 
@@ -229,9 +242,6 @@ def genbank(accession: str = "CS570233.1", *args, **kwargs) -> _GenbankRecord:
         //
 
     """
-    email = _os.getenv("pydna_email")
-    # _module_logger.info("#### genbank function called ####")
-    # _module_logger.info("email      %s", email)
-    # _module_logger.info("accession  %s", email)
+    email = email or _os.getenv("pydna_email")
     gb = Genbank(email)
     return gb.nucleotide(accession, *args, **kwargs)
diff --git a/src/pydna/oligonucleotide_hybridization.py b/src/pydna/oligonucleotide_hybridization.py
@@ -0,0 +1,124 @@
+# -*- coding: utf-8 -*-
+"""
+This module contains the functions for oligonucleotide hybridization.
+"""
+
+from pydna.common_sub_strings import common_sub_strings
+from Bio.Seq import reverse_complement
+from pydna.primer import Primer
+from pydna.dseqrecord import Dseqrecord
+from pydna.dseq import Dseq
+from pydna.opencloning_models import OligoHybridizationSource, SourceInput
+
+
+def oligonucleotide_hybridization_overhangs(
+    fwd_oligo_seq: str, rvs_oligo_seq: str, minimal_annealing: int
+) -> list[int]:
+    """
+    Returns possible overhangs between two oligos given a minimal annealing length, and
+    returns an error if mismatches are found.
+
+    see https://github.com/manulera/OpenCloning_backend/issues/302 for notation
+
+    >>> from pydna.oligonucleotide_hybridization import oligonucleotide_hybridization_overhangs
+    >>> oligonucleotide_hybridization_overhangs("ATGGC", "GCCAT", 3)
+    [0]
+    >>> oligonucleotide_hybridization_overhangs("aATGGC", "GCCAT", 5)
+    [-1]
+    >>> oligonucleotide_hybridization_overhangs("ATGGC", "GCCATa", 5)
+    [1]
+    >>> oligonucleotide_hybridization_overhangs("ATGGC", "GCCATaaGCCAT", 5)
+    [0, 7]
+
+    If the minimal annealing length is longer than the length of the shortest oligo, it returns an empty list.
+
+    >>> oligonucleotide_hybridization_overhangs("ATGGC", "GCCATaaGCCAT", 100)
+    []
+
+    If it's possible to anneal for ``minimal_annealing`` length, but with mismatches, it raises an error.
+
+    >>> oligonucleotide_hybridization_overhangs("cATGGC", "GCCATa", 5)
+    Traceback (most recent call last):
+        ...
+    ValueError: The oligonucleotides can anneal with mismatches
+    """
+    matches = common_sub_strings(
+        fwd_oligo_seq.lower(),
+        reverse_complement(rvs_oligo_seq.lower()),
+        minimal_annealing,
+    )
+
+    for pos_fwd, pos_rvs, length in matches:
+
+        if (pos_fwd != 0 and pos_rvs != 0) or (
+            pos_fwd + length < len(fwd_oligo_seq)
+            and pos_rvs + length < len(rvs_oligo_seq)
+        ):
+            raise ValueError("The oligonucleotides can anneal with mismatches")
+
+    # Return possible overhangs
+    return [pos_rvs - pos_fwd for pos_fwd, pos_rvs, length in matches]
+
+
+def oligonucleotide_hybridization(
+    fwd_primer: Primer, rvs_primer: Primer, minimal_annealing: int
+) -> list[Dseqrecord]:
+    """
+    Returns a list of Dseqrecord objects representing the hybridization of two primers.
+
+    >>> from pydna.primer import Primer
+    >>> from pydna.oligonucleotide_hybridization import oligonucleotide_hybridization
+    >>> fwd_primer = Primer("ATGGC")
+    >>> rvs_primer = Primer("GCCA")
+    >>> oligonucleotide_hybridization(fwd_primer, rvs_primer, 3)[0].seq
+    Dseq(-5)
+    ATGGC
+     ACCG
+
+    Multiple values can be returned:
+
+    >>> rvs_primer2 = Primer("GCCATaaGCCAT")
+    >>> oligonucleotide_hybridization(fwd_primer, rvs_primer2, 3)[0].seq
+    Dseq(-12)
+    ATGGC
+    TACCGaaTACCG
+    >>> oligonucleotide_hybridization(fwd_primer, rvs_primer2, 3)[1].seq
+    Dseq(-12)
+           ATGGC
+    TACCGaaTACCG
+
+    If no possible overhangs are found, it returns an empty list.
+
+    >>> oligonucleotide_hybridization(fwd_primer, rvs_primer, 100)
+    []
+
+    If there are mismatches given the minimal annealing length, it raises an error.
+
+    >>> fwd_primer3 = Primer("cATGGC")
+    >>> rvs_primer3 = Primer("GCCATa")
+    >>> oligonucleotide_hybridization(fwd_primer3, rvs_primer3, 5)
+    Traceback (most recent call last):
+        ...
+    ValueError: The oligonucleotides can anneal with mismatches
+    """
+    possible_overhangs = oligonucleotide_hybridization_overhangs(
+        str(fwd_primer.seq), str(rvs_primer.seq), minimal_annealing
+    )
+    sources = [
+        OligoHybridizationSource(
+            overhang_crick_3prime=pos,
+            input=[SourceInput(sequence=fwd_primer), SourceInput(sequence=rvs_primer)],
+        )
+        for pos in possible_overhangs
+    ]
+    return [
+        Dseqrecord(
+            Dseq(
+                str(fwd_primer.seq),
+                str(rvs_primer.seq),
+                ovhg=source.overhang_crick_3prime,
+            ),
+            source=source,
+        )
+        for source in sources
+    ]