Skip to content

Commit

Permalink
Merge pull request #678 from padix-key/pdbx-inter-bonds
Browse files Browse the repository at this point in the history
Omit 'standard' bonds when writing `struct_conn` category
  • Loading branch information
padix-key authored Oct 21, 2024
2 parents 267f469 + 2f08dc1 commit 737c1b6
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 3 deletions.
4 changes: 2 additions & 2 deletions src/biotite/structure/io/pdbx/compress.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,14 +49,14 @@ def compress(data, float_tolerance=1e-6):
>>> pdbx_file.write(uncompressed_file)
>>> _ = uncompressed_file.seek(0)
>>> print(f"{len(uncompressed_file.read()) // 1000} KB")
931 KB
927 KB
>>> # Write compressed file
>>> pdbx_file = compress(pdbx_file)
>>> compressed_file = BytesIO()
>>> pdbx_file.write(compressed_file)
>>> _ = compressed_file.seek(0)
>>> print(f"{len(compressed_file.read()) // 1000} KB")
113 KB
111 KB
"""
match type(data):
case bcif.BinaryCIFFile:
Expand Down
38 changes: 37 additions & 1 deletion src/biotite/structure/io/pdbx/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@
from biotite.structure.bonds import BondList, BondType, connect_via_residue_names
from biotite.structure.box import unitcell_from_vectors, vectors_from_unitcell
from biotite.structure.error import BadStructureError
from biotite.structure.filter import _canonical_aa_list as canonical_aa_list
from biotite.structure.filter import (
_canonical_nucleotide_list as canonical_nucleotide_list,
)
from biotite.structure.filter import (
filter_first_altloc,
filter_highest_occupancy_altloc,
Expand All @@ -36,7 +40,11 @@
from biotite.structure.io.pdbx.cif import CIFBlock, CIFFile
from biotite.structure.io.pdbx.component import MaskValue
from biotite.structure.io.pdbx.encoding import StringArrayEncoding
from biotite.structure.residues import get_residue_count, get_residue_starts_for
from biotite.structure.residues import (
get_residue_count,
get_residue_positions,
get_residue_starts_for,
)
from biotite.structure.util import matrix_rotate

# Bond types in `struct_conn` category that refer to covalent bonds
Expand Down Expand Up @@ -89,6 +97,7 @@
COMP_BOND_TYPE_TO_ORDER = {
bond_type: order for order, bond_type in COMP_BOND_ORDER_TO_TYPE.items()
}
CANONICAL_RESIDUE_LIST = canonical_aa_list + canonical_nucleotide_list

_proteinseq_type_list = ["polypeptide(D)", "polypeptide(L)"]
_nucleotideseq_type_list = [
Expand Down Expand Up @@ -1101,6 +1110,12 @@ def _set_inter_residue_bonds(array, atom_site):
if len(bond_array) == 0:
return None

# Filter out 'standard' links, i.e. backbone bonds between adjacent canonical
# nucleotide/amino acid residues
bond_array = bond_array[~_filter_canonical_links(array, bond_array)]
if len(bond_array) == 0:
return None

struct_conn = Category()
struct_conn["id"] = np.arange(1, len(bond_array) + 1)
struct_conn["conn_type_id"] = [
Expand Down Expand Up @@ -1145,6 +1160,27 @@ def _filter_bonds(array, connection):
raise ValueError("Invalid 'connection' option")


def _filter_canonical_links(array, bond_array):
"""
Filter out peptide bonds between adjacent canonical amino acid residues.
"""
# Get the residue index for each bonded atom
residue_indices = get_residue_positions(array, bond_array[:, :2].flatten()).reshape(
-1, 2
)

return (
# Must be canonical residues
np.isin(array.res_name[bond_array[:, 0]], CANONICAL_RESIDUE_LIST) &
np.isin(array.res_name[bond_array[:, 1]], CANONICAL_RESIDUE_LIST) &
# Must be backbone bond
np.isin(array.atom_name[bond_array[:, 0]], ("C", "O3'")) &
np.isin(array.atom_name[bond_array[:, 1]], ("N", "P")) &
# Must connect adjacent residues
residue_indices[:, 1] - residue_indices[:, 0] == 1
) # fmt: skip


def get_component(pdbx_file, data_block=None, use_ideal_coord=True, res_name=None):
"""
Create an :class:`AtomArray` for a chemical component from the
Expand Down

0 comments on commit 737c1b6

Please sign in to comment.