From 162109242a2cff1ab5a278dd9d7b05063dda97a2 Mon Sep 17 00:00:00 2001 From: Manuel Lera-Ramirez Date: Tue, 18 Nov 2025 12:09:40 +0000 Subject: [PATCH 1/3] switch from namedtuple to dataclass --- src/pydna/alphabet.py | 57 ++++++++++++++++++++++++++++++++----------- 1 file changed, 43 insertions(+), 14 deletions(-) diff --git a/src/pydna/alphabet.py b/src/pydna/alphabet.py index d0ba5805..b0c1635f 100644 --- a/src/pydna/alphabet.py +++ b/src/pydna/alphabet.py @@ -75,8 +75,8 @@ """ -from collections import namedtuple import re as _re +from dataclasses import dataclass # An alias for whitespace emptyspace = chr(32) @@ -452,7 +452,40 @@ annealing_dict_w_holes.update(mixed_case_dict) -def get_parts(datastring: str) -> namedtuple: +@dataclass +class DseqParts: + sticky_left5: str + sticky_left3: str + middle: str + sticky_right3: str + sticky_right5: str + single_watson: str + single_crick: str + + def __iter__(self): + """ + Allow unpacking DseqParts instances. + >>> from pydna.alphabet import get_parts + >>> sticky_left5, sticky_left3, middle, sticky_right3, sticky_right5, single_watson, single_crick = get_parts("eeATCGuggCCGgg") + >>> sticky_left5 + 'ee' + >>> middle + 'ATCGuggCCGgg' + """ + return iter( + ( + self.sticky_left5, + self.sticky_left3, + self.middle, + self.sticky_right3, + self.sticky_right5, + self.single_watson, + self.single_crick, + ) + ) + + +def get_parts(datastring: str) -> DseqParts: """ A namedtuple containing the parts of a dsDNA sequence. @@ -552,20 +585,16 @@ def get_parts(datastring: str) -> namedtuple: result = ["" if e is None else e for e in result] - field_names = ( - "sticky_left5", - "sticky_left3", - "middle", - "sticky_right3", - "sticky_right5", - "single_watson", - "single_crick", + return DseqParts( + sticky_left5=result[0], + sticky_left3=result[1], + middle=result[2], + sticky_right3=result[3], + sticky_right5=result[4], + single_watson=result[5], + single_crick=result[6], ) - fragment = namedtuple("fragment", field_names) - - return fragment(*result) - def dsbreaks(data: str): From 167a3a9cfd2a2413b4de16bb187e7a9a23a5c08a Mon Sep 17 00:00:00 2001 From: Manuel Lera-Ramirez Date: Tue, 18 Nov 2025 12:17:07 +0000 Subject: [PATCH 2/3] remove FIXMEs --- tests/test_module_assembly2.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/tests/test_module_assembly2.py b/tests/test_module_assembly2.py index e91f77a2..f16cb103 100644 --- a/tests/test_module_assembly2.py +++ b/tests/test_module_assembly2.py @@ -1868,7 +1868,7 @@ def test_assemble_function(): assembly_plan = [ (1, 2, loc_end, loc_start), ] - # FIXME: The assert below fails in the Sanity check on line 770 in assembly2, but gives the expected result. + assert (fragments[0] + fragments[1]).seq == assembly.assemble( fragments, assembly_plan ).seq @@ -1878,7 +1878,7 @@ def test_assemble_function(): (1, 2, loc_end, loc_start), (2, 1, loc_end, loc_start), ] - # FIXME: The assert below fails in the Sanity check on line 770 in assembly2, but gives the expected result. + assert (fragments[0] + fragments[1]).looped().seq == assembly.assemble( fragments, assembly_plan ).seq @@ -2156,9 +2156,7 @@ def test_ligation_assembly(): # Blunt ligation combined with sticky end fragments = Dseqrecord("AAAGAATTCAAA").cut(EcoRI) - result = assembly.ligation_assembly( - fragments, allow_blunt=True - ) # FIXME: The assert below fails in the Sanity check on line 770 in assembly2, but gives the expected result. + result = assembly.ligation_assembly(fragments, allow_blunt=True) result_str = [str(x.seq) for x in result] assert sorted(result_str) == sorted(["AAAGAATTCAAA"]) assert result[0].circular @@ -2180,9 +2178,7 @@ def test_blunt_assembly(): use_fragment_order=False, ) - assert dseqrecord_list_to_dseq_list(asm.assemble_linear()) == [ - (b + a).seq - ] # FIXME: The assert below fails in the Sanity check on line 770 in assembly2, but gives the expected result. + assert dseqrecord_list_to_dseq_list(asm.assemble_linear()) == [(b + a).seq] assert asm.assemble_circular() == [] # Circular assembly From c395cf8e390108519be643126bca0bc3e00e7b9c Mon Sep 17 00:00:00 2001 From: Manuel Lera-Ramirez Date: Tue, 18 Nov 2025 12:30:06 +0000 Subject: [PATCH 3/3] add getitem to DseqParts dataclass --- src/pydna/alphabet.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/pydna/alphabet.py b/src/pydna/alphabet.py index b0c1635f..8e13bb82 100644 --- a/src/pydna/alphabet.py +++ b/src/pydna/alphabet.py @@ -484,17 +484,29 @@ def __iter__(self): ) ) + def __getitem__(self, index: int) -> str: + """ + Allow indexing DseqParts instances. + >>> from pydna.alphabet import get_parts + >>> parts = get_parts("eeATCGuggCCGgg") + >>> parts[0] + 'ee' + >>> parts[2] + 'ATCGuggCCGgg' + """ + return tuple(self)[index] + def get_parts(datastring: str) -> DseqParts: """ - A namedtuple containing the parts of a dsDNA sequence. + Returns a DseqParts instance containing the parts of a dsDNA sequence. The datastring should contain a string with dscode symbols. A regex is used to capture the single stranded regions at the ends as well as the regiond in the middle. The figure below numbers the regex capture groups and what they capture - as well as the namedtuple field name. + as well as the DseqParts instance field name. ::