diff --git a/src/pydna/alphabet.py b/src/pydna/alphabet.py index d0ba5805..8e13bb82 100644 --- a/src/pydna/alphabet.py +++ b/src/pydna/alphabet.py @@ -75,8 +75,8 @@ """ -from collections import namedtuple import re as _re +from dataclasses import dataclass # An alias for whitespace emptyspace = chr(32) @@ -452,16 +452,61 @@ annealing_dict_w_holes.update(mixed_case_dict) -def get_parts(datastring: str) -> namedtuple: +@dataclass +class DseqParts: + sticky_left5: str + sticky_left3: str + middle: str + sticky_right3: str + sticky_right5: str + single_watson: str + single_crick: str + + def __iter__(self): + """ + Allow unpacking DseqParts instances. + >>> from pydna.alphabet import get_parts + >>> sticky_left5, sticky_left3, middle, sticky_right3, sticky_right5, single_watson, single_crick = get_parts("eeATCGuggCCGgg") + >>> sticky_left5 + 'ee' + >>> middle + 'ATCGuggCCGgg' + """ + return iter( + ( + self.sticky_left5, + self.sticky_left3, + self.middle, + self.sticky_right3, + self.sticky_right5, + self.single_watson, + self.single_crick, + ) + ) + + def __getitem__(self, index: int) -> str: + """ + Allow indexing DseqParts instances. + >>> from pydna.alphabet import get_parts + >>> parts = get_parts("eeATCGuggCCGgg") + >>> parts[0] + 'ee' + >>> parts[2] + 'ATCGuggCCGgg' + """ + return tuple(self)[index] + + +def get_parts(datastring: str) -> DseqParts: """ - A namedtuple containing the parts of a dsDNA sequence. + Returns a DseqParts instance containing the parts of a dsDNA sequence. The datastring should contain a string with dscode symbols. A regex is used to capture the single stranded regions at the ends as well as the regiond in the middle. The figure below numbers the regex capture groups and what they capture - as well as the namedtuple field name. + as well as the DseqParts instance field name. :: @@ -552,20 +597,16 @@ def get_parts(datastring: str) -> namedtuple: result = ["" if e is None else e for e in result] - field_names = ( - "sticky_left5", - "sticky_left3", - "middle", - "sticky_right3", - "sticky_right5", - "single_watson", - "single_crick", + return DseqParts( + sticky_left5=result[0], + sticky_left3=result[1], + middle=result[2], + sticky_right3=result[3], + sticky_right5=result[4], + single_watson=result[5], + single_crick=result[6], ) - fragment = namedtuple("fragment", field_names) - - return fragment(*result) - def dsbreaks(data: str): diff --git a/tests/test_module_assembly2.py b/tests/test_module_assembly2.py index e91f77a2..f16cb103 100644 --- a/tests/test_module_assembly2.py +++ b/tests/test_module_assembly2.py @@ -1868,7 +1868,7 @@ def test_assemble_function(): assembly_plan = [ (1, 2, loc_end, loc_start), ] - # FIXME: The assert below fails in the Sanity check on line 770 in assembly2, but gives the expected result. + assert (fragments[0] + fragments[1]).seq == assembly.assemble( fragments, assembly_plan ).seq @@ -1878,7 +1878,7 @@ def test_assemble_function(): (1, 2, loc_end, loc_start), (2, 1, loc_end, loc_start), ] - # FIXME: The assert below fails in the Sanity check on line 770 in assembly2, but gives the expected result. + assert (fragments[0] + fragments[1]).looped().seq == assembly.assemble( fragments, assembly_plan ).seq @@ -2156,9 +2156,7 @@ def test_ligation_assembly(): # Blunt ligation combined with sticky end fragments = Dseqrecord("AAAGAATTCAAA").cut(EcoRI) - result = assembly.ligation_assembly( - fragments, allow_blunt=True - ) # FIXME: The assert below fails in the Sanity check on line 770 in assembly2, but gives the expected result. + result = assembly.ligation_assembly(fragments, allow_blunt=True) result_str = [str(x.seq) for x in result] assert sorted(result_str) == sorted(["AAAGAATTCAAA"]) assert result[0].circular @@ -2180,9 +2178,7 @@ def test_blunt_assembly(): use_fragment_order=False, ) - assert dseqrecord_list_to_dseq_list(asm.assemble_linear()) == [ - (b + a).seq - ] # FIXME: The assert below fails in the Sanity check on line 770 in assembly2, but gives the expected result. + assert dseqrecord_list_to_dseq_list(asm.assemble_linear()) == [(b + a).seq] assert asm.assemble_circular() == [] # Circular assembly