Skip to content

Commit ec9c0eb

Browse files
committed
Infer hetero residue IDs from author annotation
1 parent c4b16ed commit ec9c0eb

File tree

6 files changed

+335
-7
lines changed

6 files changed

+335
-7
lines changed

src/biotite/structure/io/pdbx/convert.py

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
from biotite.structure.io.pdbx.cif import CIFBlock, CIFFile
5656
from biotite.structure.io.pdbx.component import MaskValue
5757
from biotite.structure.io.pdbx.encoding import StringArrayEncoding
58+
from biotite.structure.repair import create_continuous_res_ids
5859
from biotite.structure.residues import (
5960
get_residue_count,
6061
get_residue_positions,
@@ -496,12 +497,6 @@ def _fill_annotations(array, atom_site, extra_fields, use_author_fields):
496497
atom_site, f"{prefix}_asym_id", f"{alt_prefix}_asym_id"
497498
).as_array(str),
498499
)
499-
array.set_annotation(
500-
"res_id",
501-
_get_or_fallback(
502-
atom_site, f"{prefix}_seq_id", f"{alt_prefix}_seq_id"
503-
).as_array(int, -1),
504-
)
505500
array.set_annotation("ins_code", atom_site["pdbx_PDB_ins_code"].as_array(str, ""))
506501
array.set_annotation(
507502
"res_name",
@@ -518,6 +513,22 @@ def _fill_annotations(array, atom_site, extra_fields, use_author_fields):
518513
)
519514
array.set_annotation("element", atom_site["type_symbol"].as_array(str))
520515

516+
# Special handling for `res_id`, as the `label_seq_id` is equal (`.`) for all
517+
# hetero residues, which makes distinguishing subsequent residues from another
518+
# difficult (https://github.com/biotite-dev/biotite/issues/553)
519+
res_id = _get_or_fallback(
520+
atom_site, f"{prefix}_seq_id", f"{alt_prefix}_seq_id"
521+
).as_array(int, -1)
522+
if not use_author_fields and "auth_seq_id" in atom_site:
523+
# Therefore, the `auth_seq_id` is still used to determine residue starts
524+
# in `create_continuous_res_ids()`, even if `use_author_fields = False`.
525+
res_id_for_residue_starts = atom_site["auth_seq_id"].as_array(int, -1)
526+
array.set_annotation("res_id", res_id_for_residue_starts)
527+
fallback_res_ids = create_continuous_res_ids(array)
528+
array.set_annotation("res_id", np.where(res_id == -1, fallback_res_ids, res_id))
529+
else:
530+
array.set_annotation("res_id", res_id)
531+
521532
if "atom_id" in extra_fields:
522533
if "id" in atom_site:
523534
array.set_annotation("atom_id", atom_site["id"].as_array(int))
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# Collection of structure file edges cases
2+
3+
- ``hetatm.pdb``: A simple PDB file containing a custom ligand, whose name is already
4+
taken by the CCD.
5+
However, since it contains only ``HETATM`` records, the bonds should not be taken from
6+
the CCD but from the ``CONECT`` records.
7+
- ``res_ids.cif``: Subsequent residues have the same ``label_xxx`` annotation, which
8+
makes it hard to determine where a new residue starts.
9+
However, using ``label_seq_id`` as fallback allows resolving the residue starts.
10+
Derived from PDB entry ``5HU8``.
File renamed without changes.

0 commit comments

Comments
 (0)