diff --git a/src/biotite/structure/io/pdb/file.py b/src/biotite/structure/io/pdb/file.py index eb7804bc8..0714410f3 100644 --- a/src/biotite/structure/io/pdb/file.py +++ b/src/biotite/structure/io/pdb/file.py @@ -6,12 +6,16 @@ __author__ = "Patrick Kunzmann, Daniel Bauer, Claude J. Rogers" __all__ = ["PDBFile"] +import itertools import warnings from collections import namedtuple import numpy as np from biotite.file import InvalidFileError, TextFile from biotite.structure.atoms import AtomArray, AtomArrayStack, repeat -from biotite.structure.bonds import BondList, connect_via_residue_names +from biotite.structure.bonds import ( + BondList, + connect_via_residue_names, +) from biotite.structure.box import unitcell_from_vectors, vectors_from_unitcell from biotite.structure.error import BadStructureError from biotite.structure.filter import ( @@ -19,6 +23,7 @@ filter_highest_occupancy_altloc, filter_solvent, ) +from biotite.structure.info.bonds import bonds_in_residue from biotite.structure.io.pdb.hybrid36 import ( decode_hybrid36, encode_hybrid36, @@ -544,7 +549,16 @@ def get_structure( # Read bonds if include_bonds: bond_list = self._get_bonds(atom_id) - bond_list = bond_list.merge(connect_via_residue_names(array)) + # Create bond dict containing only non-hetero residues (+ water) + custom_bond_dict = { + res_name: bonds_in_residue(res_name) + for res_name in itertools.chain( + np.unique(array[..., ~array.hetero].res_name), ["HOH"] + ) + } + bond_list = bond_list.merge( + connect_via_residue_names(array, custom_bond_dict=custom_bond_dict) + ) array.bonds = bond_list return array diff --git a/tests/structure/data/hetatm/ligand.pdb b/tests/structure/data/hetatm/ligand.pdb new file mode 100644 index 000000000..729fbe36f --- /dev/null +++ b/tests/structure/data/hetatm/ligand.pdb @@ -0,0 +1,10 @@ +HETATM 704 C7 LIG B 101 -17.432 24.497 -0.918 1.00 26.28 C +HETATM 705 C8 LIG B 101 -17.432 24.497 -0.918 1.00 26.28 C +HETATM 706 C13 LIG B 101 -17.432 24.497 -0.918 1.00 26.28 C +HETATM 707 C14 LIG B 101 -17.432 24.497 -0.918 1.00 26.28 C +HETATM 708 C15 LIG B 101 -17.432 24.497 -0.918 1.00 26.28 C +CONECT 704 705 +CONECT 705 704 706 +CONECT 706 705 707 +CONECT 707 706 708 +CONECT 708 707 \ No newline at end of file diff --git a/tests/structure/io/test_pdb.py b/tests/structure/io/test_pdb.py index e966e10a8..37be2ab7b 100644 --- a/tests/structure/io/test_pdb.py +++ b/tests/structure/io/test_pdb.py @@ -473,7 +473,6 @@ def test_bond_parsing(): ref_bonds = struc.connect_via_residue_names(atoms) ref_bonds.remove_bond_order() - assert test_bonds.as_set() == ref_bonds.as_set() @@ -572,3 +571,26 @@ def test_setting_incompatible_structure(annotation, value, warning_only): else: with pytest.raises(struc.BadStructureError): pdb_file.set_structure(atoms) + + +def test_hetatm_intra_residue_bonds(): + """ + Expect that HETATM intra-residues bonds are only parsed from CONECT records + and not looked up via residue names. + """ + expected_bonds = np.array( + [ + [0, 1, 0], + [1, 2, 0], + [2, 3, 0], + [3, 4, 0], + ], + dtype=np.uint32, + ) + path = join(data_dir("structure"), "hetatm/ligand.pdb") + + pdb_file = pdb.PDBFile.read(path) + structure = pdb.get_structure(pdb_file, model=1, include_bonds=True) + actual_bonds = structure.bonds.as_array() + + np.testing.assert_array_equal(actual_bonds, expected_bonds)