diff --git a/src/biotite/structure/__init__.py b/src/biotite/structure/__init__.py index df9776324..c8ecec129 100644 --- a/src/biotite/structure/__init__.py +++ b/src/biotite/structure/__init__.py @@ -57,14 +57,15 @@ The following annotation categories are optionally used by some functions: -========= =========== ================= ============================ +========= =========== ================= ========================================= Category Type Examples Description -========= =========== ================= ============================ +========= =========== ================= ========================================= atom_id int 1,2,3, ... Atom serial number b_factor float 0.9, 12.3, ... Temperature factor occupancy float .1, .3, .9, ... Occupancy charge int -2,-1,0,1,2, ... Electric charge of the atom -========= =========== ================= ============================ +sym_id string '1','2','3', ... Symmetry ID for assemblies/symmetry mates +========= =========== ================= ========================================= For each type, the attributes can be accessed directly. Both :class:`AtomArray` and :class:`AtomArrayStack` support diff --git a/src/biotite/structure/io/pdb/convert.py b/src/biotite/structure/io/pdb/convert.py index 127e49fbb..1971bc1ef 100644 --- a/src/biotite/structure/io/pdb/convert.py +++ b/src/biotite/structure/io/pdb/convert.py @@ -218,6 +218,8 @@ def get_assembly( assembly : AtomArray or AtomArrayStack The assembly. The return type depends on the `model` parameter. + Contains the `sym_id` annotation, which enumerates the copies of the asymmetric + unit in the assembly. Examples -------- diff --git a/src/biotite/structure/io/pdb/file.py b/src/biotite/structure/io/pdb/file.py index 6d192dac6..4ae2bb516 100644 --- a/src/biotite/structure/io/pdb/file.py +++ b/src/biotite/structure/io/pdb/file.py @@ -790,6 +790,8 @@ def get_assembly( assembly : AtomArray or AtomArrayStack The assembly. The return type depends on the `model` parameter. + Contains the `sym_id` annotation, which enumerates the copies of the + asymmetric unit in the assembly. Examples -------- @@ -853,8 +855,12 @@ def get_assembly( affected_chain_ids = [] transform_start = None for j, line in enumerate(assembly_lines[start:stop]): - if line.startswith("APPLY THE FOLLOWING TO CHAINS:") or line.startswith( - " AND CHAINS:" + if any( + line.startswith(chain_signal_string) + for chain_signal_string in [ + "APPLY THE FOLLOWING TO CHAINS:", + " AND CHAINS:", + ] ): affected_chain_ids += [ chain_id.strip() for chain_id in line[30:].split(",") @@ -1148,7 +1154,11 @@ def _apply_transformations(structure, rotations, translations): coord += translation assembly_coord[i] = coord - return repeat(structure, assembly_coord) + assembly = repeat(structure, assembly_coord) + assembly.set_annotation( + "sym_id", np.repeat(np.arange(len(rotations)), structure.array_length()) + ) + return assembly def _check_pdb_compatibility(array, hybrid36): diff --git a/src/biotite/structure/io/pdbx/convert.py b/src/biotite/structure/io/pdbx/convert.py index f5661f58b..5367c74bd 100644 --- a/src/biotite/structure/io/pdbx/convert.py +++ b/src/biotite/structure/io/pdbx/convert.py @@ -1557,7 +1557,10 @@ def get_assembly( Returns ------- assembly : AtomArray or AtomArrayStack - The assembly. The return type depends on the `model` parameter. + The assembly. + The return type depends on the `model` parameter. + Contains the `sym_id` annotation, which enumerates the copies of the asymmetric + unit in the assembly. Examples -------- @@ -1646,7 +1649,6 @@ def _apply_transformations(structure, transformation_dict, operations): """ # Additional first dimesion for 'structure.repeat()' assembly_coord = np.zeros((len(operations),) + structure.coord.shape) - # Apply corresponding transformation for each copy in the assembly for i, operation in enumerate(operations): coord = structure.coord @@ -1660,7 +1662,11 @@ def _apply_transformations(structure, transformation_dict, operations): coord += translation_vector assembly_coord[i] = coord - return repeat(structure, assembly_coord) + assembly = repeat(structure, assembly_coord) + assembly.set_annotation( + "sym_id", np.repeat(np.arange(len(operations)), structure.array_length()) + ) + return assembly def _get_transformations(struct_oper): diff --git a/tests/structure/io/test_pdbx.py b/tests/structure/io/test_pdbx.py index 96123294f..2b80ecfda 100644 --- a/tests/structure/io/test_pdbx.py +++ b/tests/structure/io/test_pdbx.py @@ -430,7 +430,7 @@ def test_list_assemblies(format): "format, pdb_id, model", itertools.product(["cif", "bcif"], ["1f2n", "5zng"], [None, 1, -1]), ) -def test_get_assembly(format, pdb_id, model): +def test_assembly_chain_count(format, pdb_id, model): """ Test whether the :func:`get_assembly()` function produces the same number of peptide chains as the @@ -478,6 +478,39 @@ def test_get_assembly(format, pdb_id, model): assert assembly.array_length() % monomer_atom_count == 0 +@pytest.mark.parametrize( + "pdb_id, assembly_id, symmetric_unit_count", + [ + # Single operation + ("5zng", "1", 1), + # Multiple operations with continuous operation IDs + ("1f2n", "1", 60), + # Multiple operations with discontinuous operation IDs + ("1f2n", "4", 6), + # Multiple combined operations + ("1f2n", "6", 60), + ], +) +def test_assembly_sym_id(pdb_id, assembly_id, symmetric_unit_count): + """ + Check if the :func:`get_assembly()` function returns the correct + symmetry ID annotation for a known example. + """ + pdbx_file = pdbx.BinaryCIFFile.read(join(data_dir("structure"), f"{pdb_id}.bcif")) + assembly = pdbx.get_assembly(pdbx_file, assembly_id=assembly_id) + # 'unique_indices' contains the FIRST occurence of each unique value + unique_sym_ids, unique_indices = np.unique(assembly.sym_id, return_index=True) + # Sort by first occurrence + order = np.argsort(unique_indices) + unique_sym_ids = unique_sym_ids[order] + unique_indices = unique_indices[order] + assert unique_sym_ids.tolist() == list(range(symmetric_unit_count)) + # Every asymmetric unit should have the same length, + # as each operation is applied to all atoms in the asymmetric unit + asym_lengths = np.diff(np.append(unique_indices, assembly.array_length())) + assert (asym_lengths == asym_lengths[0]).all() + + @pytest.mark.parametrize( "path, use_ideal_coord", itertools.product(