Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions docs/notebooks/history.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -186,13 +186,13 @@
"╙── product (Dseqrecord(-18))\n",
" └─╼ LigationSource\n",
" ├─╼ c (Dseqrecord(-7))\n",
" │ └─╼ Source\n",
" │ └─╼ a (Dseqrecord(-18)) ╾ Source, Source\n",
" │ └─╼ RestrictionEnzymeDigestionSource\n",
" │ └─╼ a (Dseqrecord(-18)) ╾ RestrictionEnzymeDigestionSource, RestrictionEnzymeDigestionSource\n",
" ├─╼ d (Dseqrecord(-12))\n",
" │ └─╼ Source\n",
" │ └─╼ RestrictionEnzymeDigestionSource\n",
" │ └─╼ ...\n",
" └─╼ e (Dseqrecord(-7))\n",
" └─╼ Source\n",
" └─╼ RestrictionEnzymeDigestionSource\n",
" └─╼ ...\n"
]
}
Expand Down Expand Up @@ -354,8 +354,8 @@
" └─╼ CreLoxRecombinationSource\n",
" └─╼ integration_product (Dseqrecord(-84))\n",
" └─╼ CreLoxRecombinationSource\n",
" ├─╼ a (Dseqrecord(-45))\n",
" └─╼ b (Dseqrecord(o39))\n"
" ├─╼ genome (Dseqrecord(-45))\n",
" └─╼ plasmid (Dseqrecord(o39))\n"
]
}
],
Expand Down
482 changes: 132 additions & 350 deletions poetry.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ scipy = [
]
seguid = ">=0.0.5"
regex = "^2024.11.6"
opencloning-linkml = "0.4.5"
opencloning-linkml = "^0.4.9"
[tool.poetry.extras]
clipboard = ["pyperclip"]
download = ["pyparsing", "requests"]
Expand Down
5 changes: 3 additions & 2 deletions src/pydna/assembly2.py
Original file line number Diff line number Diff line change
Expand Up @@ -2036,7 +2036,7 @@ def _recast_sources(
"""
for prod in products:
prod.source = source_cls(
**prod.source.model_dump(),
**prod.source.to_unserialized_dict(),
**extra_fields,
)
return products
Expand Down Expand Up @@ -2805,7 +2805,8 @@ def crispr_integration(
# The second element of product.source.input is conventionally the insert/repair fragment
# The other two (first and third) are the two bits of the genome
repair_start = _location_boundaries(product.source.input[0].right_location)[0]
repair_end = _location_boundaries(product.source.input[2].left_location)[1]
# Here we do +1 because the position of the cut marks the boundary (e.g. 0:10, 10:20 if a cut is at pos 10)
repair_end = _location_boundaries(product.source.input[2].left_location)[1] + 1
repair_location = create_location(repair_start, repair_end, len(genome))
some_cuts_inside_repair = []
all_cuts_inside_repair = []
Expand Down
15 changes: 11 additions & 4 deletions src/pydna/genbank.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,13 @@
`pydna.ini` file. See the documentation of :func:`pydna.open_config_folder`"""

# from pydna.utils import memorize as _memorize
from pydna.opencloning_models import RepositoryIdSource
from pydna.opencloning_models import NCBISequenceSource
from pydna.genbankrecord import GenbankRecord as _GenbankRecord
from pydna.readers import read as _read

from Bio import Entrez as _Entrez
from Bio.SeqFeature import SimpleLocation

from typing import Literal as _Literal, Optional as _Optional
import re as _re
import os as _os
Expand Down Expand Up @@ -175,11 +176,17 @@ def nucleotide(
# _module_logger.info("text[:160] %s", text[:160])

result = _read(text)
# TODO: Address this for cases where only one is defined
if seq_start is not None and seq_stop is not None:
location = SimpleLocation(
int(seq_start) - 1, int(seq_stop), -1 if strand == 2 else strand
)
else:
location = None

result.source = RepositoryIdSource(
result.source = NCBISequenceSource(
repository_id=item,
repository_name="genbank",
location=SimpleLocation(seq_start, seq_stop, strand),
coordinates=location,
)
return result

Expand Down
124 changes: 124 additions & 0 deletions src/pydna/oligonucleotide_hybridization.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
# -*- coding: utf-8 -*-
"""
This module contains the functions for oligonucleotide hybridization.
"""

from pydna.common_sub_strings import common_sub_strings
from Bio.Seq import reverse_complement
from pydna.primer import Primer
from pydna.dseqrecord import Dseqrecord
from pydna.dseq import Dseq
from pydna.opencloning_models import OligoHybridizationSource, SourceInput


def oligonucleotide_hybridization_overhangs(
fwd_oligo_seq: str, rvs_oligo_seq: str, minimal_annealing: int
) -> list[int]:
"""
Returns possible overhangs between two oligos given a minimal annealing length, and
returns an error if mismatches are found.

see https://github.com/manulera/OpenCloning_backend/issues/302 for notation

>>> from pydna.oligonucleotide_hybridization import oligonucleotide_hybridization_overhangs
>>> oligonucleotide_hybridization_overhangs("ATGGC", "GCCAT", 3)
[0]
>>> oligonucleotide_hybridization_overhangs("aATGGC", "GCCAT", 5)
[-1]
>>> oligonucleotide_hybridization_overhangs("ATGGC", "GCCATa", 5)
[1]
>>> oligonucleotide_hybridization_overhangs("ATGGC", "GCCATaaGCCAT", 5)
[0, 7]

If the minimal annealing length is longer than the length of the shortest oligo, it returns an empty list.

>>> oligonucleotide_hybridization_overhangs("ATGGC", "GCCATaaGCCAT", 100)
[]

If it's possible to anneal for ``minimal_annealing`` length, but with mismatches, it raises an error.

>>> oligonucleotide_hybridization_overhangs("cATGGC", "GCCATa", 5)
Traceback (most recent call last):
...
ValueError: The oligonucleotides can anneal with mismatches
"""
matches = common_sub_strings(
fwd_oligo_seq.lower(),
reverse_complement(rvs_oligo_seq.lower()),
minimal_annealing,
)

for pos_fwd, pos_rvs, length in matches:

if (pos_fwd != 0 and pos_rvs != 0) or (
pos_fwd + length < len(fwd_oligo_seq)
and pos_rvs + length < len(rvs_oligo_seq)
):
raise ValueError("The oligonucleotides can anneal with mismatches")

# Return possible overhangs
return [pos_rvs - pos_fwd for pos_fwd, pos_rvs, length in matches]


def oligonucleotide_hybridization(
fwd_primer: Primer, rvs_primer: Primer, minimal_annealing: int
) -> list[Dseqrecord]:
"""
Returns a list of Dseqrecord objects representing the hybridization of two primers.

>>> from pydna.primer import Primer
>>> from pydna.oligonucleotide_hybridization import oligonucleotide_hybridization
>>> fwd_primer = Primer("ATGGC")
>>> rvs_primer = Primer("GCCA")
>>> oligonucleotide_hybridization(fwd_primer, rvs_primer, 3)[0].seq
Dseq(-5)
ATGGC
ACCG

Multiple values can be returned:

>>> rvs_primer2 = Primer("GCCATaaGCCAT")
>>> oligonucleotide_hybridization(fwd_primer, rvs_primer2, 3)[0].seq
Dseq(-12)
ATGGC
TACCGaaTACCG
>>> oligonucleotide_hybridization(fwd_primer, rvs_primer2, 3)[1].seq
Dseq(-12)
ATGGC
TACCGaaTACCG

If no possible overhangs are found, it returns an empty list.

>>> oligonucleotide_hybridization(fwd_primer, rvs_primer, 100)
[]

If there are mismatches given the minimal annealing length, it raises an error.

>>> fwd_primer3 = Primer("cATGGC")
>>> rvs_primer3 = Primer("GCCATa")
>>> oligonucleotide_hybridization(fwd_primer3, rvs_primer3, 5)
Traceback (most recent call last):
...
ValueError: The oligonucleotides can anneal with mismatches
"""
possible_overhangs = oligonucleotide_hybridization_overhangs(
str(fwd_primer.seq), str(rvs_primer.seq), minimal_annealing
)
sources = [
OligoHybridizationSource(
overhang_crick_3prime=pos,
input=[SourceInput(sequence=fwd_primer), SourceInput(sequence=rvs_primer)],
)
for pos in possible_overhangs
]
return [
Dseqrecord(
Dseq(
str(fwd_primer.seq),
str(rvs_primer.seq),
ovhg=source.overhang_crick_3prime,
),
source=source,
)
for source in sources
]
Loading
Loading