Skip to content

Commit

Permalink
Make dihedral_backbone() more robust against missing atoms
Browse files Browse the repository at this point in the history
  • Loading branch information
padix-key committed Nov 4, 2024
1 parent 66d51bc commit d89e5ab
Show file tree
Hide file tree
Showing 5 changed files with 116 additions and 148 deletions.
10 changes: 6 additions & 4 deletions doc/examples/scripts/structure/protein/ramachandran.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,14 @@
# Download and parse file
file = rcsb.fetch("3vkh", "cif", gettempdir())
atom_array = strucio.load_structure(file)
# Extract first peptide chain
peptide = atom_array[struc.filter_amino_acids(atom_array)]
chain = peptide[peptide.chain_id == "A"]
# Calculate backbone dihedral angles
# from one of the two identical chains in the asymmetric unit
phi, psi, omega = struc.dihedral_backbone(atom_array[atom_array.chain_id == "A"])
# Conversion from radians into degree
phi *= 180 / np.pi
psi *= 180 / np.pi
phi, psi, omega = struc.dihedral_backbone(chain)
phi = np.rad2deg(phi)
psi = np.rad2deg(psi)
# Remove invalid values (NaN) at first and last position
phi = phi[1:-1]
psi = psi[1:-1]
Expand Down
4 changes: 2 additions & 2 deletions doc/tutorial/structure/measurement.rst
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,8 @@ simple *Ramachandran* plot for the first frame of *TC5b*.
)
ax.set_xlim(-180,180)
ax.set_ylim(-180,180)
ax.set_xlabel("$\phi$")
_ = ax.set_ylabel("$\psi$")
ax.set_xlabel(r"$\phi$")
_ = ax.set_ylabel(r"$\psi$")

Surface area
------------
Expand Down
173 changes: 60 additions & 113 deletions src/biotite/structure/geometry.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,12 @@
import numpy as np
from biotite.structure.atoms import AtomArray, AtomArrayStack, coord
from biotite.structure.box import coord_to_fraction, fraction_to_coord, is_orthogonal
from biotite.structure.chains import chain_iter
from biotite.structure.error import BadStructureError
from biotite.structure.filter import filter_peptide_backbone
from biotite.structure.util import norm_vector, vector_dot
from biotite.structure.filter import filter_amino_acids
from biotite.structure.util import (
coord_for_atom_name_per_residue,
norm_vector,
vector_dot,
)


def displacement(atoms1, atoms2, box=None):
Expand Down Expand Up @@ -480,139 +482,84 @@ def index_dihedral(*args, **kwargs):

def dihedral_backbone(atom_array):
"""
Measure the characteristic backbone dihedral angles of a protein
structure.
Measure the characteristic backbone dihedral angles of a chain.
Parameters
----------
atom_array: AtomArray or AtomArrayStack
The protein structure. A complete backbone, without gaps,
is required here.
Chain transitions are allowed, the angles at the transition are
`NaN`.
The order of the backbone atoms for each residue must be
(N, CA, C).
atoms: AtomArray or AtomArrayStack
The protein structure to measure the dihedral angles for.
For missing backbone atoms the corresponding angles are `NaN`.
Returns
-------
phi, psi, omega : ndarray
An array containing the 3 backbone dihedral angles for every
CA. 'phi' is not defined at the N-terminus, 'psi' and 'omega'
are not defined at the C-terminus. In these places the arrays
have *NaN* values. If an :class:`AtomArrayStack` is given, the
output angles are 2-dimensional, the first dimension corresponds
to the model number.
Raises
------
BadStructureError
If the amount of backbone atoms is not equal to amount of
residues times 3 (for N, CA and C).
See Also
--------
dihedral
Examples
--------
>>> phi, psi, omega = dihedral_backbone(atom_array)
>>> print(np.stack([np.rad2deg(phi), np.rad2deg(psi)]).T)
[[ nan -56.145]
[ -43.980 -51.309]
[ -66.466 -30.898]
[ -65.219 -45.945]
[ -64.747 -30.346]
[ -73.136 -43.425]
[ -64.882 -43.255]
[ -59.509 -25.698]
[ -77.989 -8.823]
[ 110.784 8.079]
[ 55.244 -124.371]
[ -57.983 -28.766]
[ -81.834 19.125]
[-124.057 13.401]
[ 67.931 25.218]
[-143.952 131.297]
[ -70.100 160.068]
[ -69.484 145.669]
[ -77.264 124.223]
[ -78.100 nan]]
An array containing the 3 backbone dihedral angles for every CA atom.
`phi` is not defined at the N-terminus, `psi` and `omega` are not defined at the
C-terminus.
In these places the arrays have *NaN* values.
If an :class:`AtomArrayStack` is given, the output angles are 2-dimensional,
the first dimension corresponds to the model number.
"""
bb_filter = filter_peptide_backbone(atom_array)
backbone = atom_array[..., bb_filter]

if (
backbone.array_length() % 3 != 0
or (backbone.atom_name[0::3] != "N").any()
or (backbone.atom_name[1::3] != "CA").any()
or (backbone.atom_name[2::3] != "C").any()
):
raise BadStructureError(
"The backbone is invalid, must be repeats of (N, CA, C), "
"maybe a backbone atom is missing"
)
phis = []
psis = []
omegas = []
for chain_bb in chain_iter(backbone):
phi, psi, omega = _dihedral_backbone(chain_bb)
phis.append(phi)
psis.append(psi)
omegas.append(omega)
return (
np.concatenate(phis, axis=-1),
np.concatenate(psis, axis=-1),
np.concatenate(omegas, axis=-1),
)
amino_acid_mask = filter_amino_acids(atom_array)

# Coordinates for dihedral angle calculation
coord_n, coord_ca, coord_c = coord_for_atom_name_per_residue(
atom_array,
("N", "CA", "C"),
amino_acid_mask,
)
n_residues = coord_n.shape[-2]

def _dihedral_backbone(chain_bb):
bb_coord = chain_bb.coord
# Coordinates for dihedral angle calculation
# Dim 0: Model index (only for atom array stacks)
# Dim 1: Angle index
# Dim 2: X, Y, Z coordinates
# Dim 3: Atoms involved in dihedral angle
if isinstance(chain_bb, AtomArray):
angle_coord_shape = (len(bb_coord) // 3, 3, 4)
elif isinstance(chain_bb, AtomArrayStack):
angle_coord_shape = (bb_coord.shape[0], bb_coord.shape[1] // 3, 3, 4)
phi_coord = np.full(angle_coord_shape, np.nan)
psi_coord = np.full(angle_coord_shape, np.nan)
omega_coord = np.full(angle_coord_shape, np.nan)

# Indices for coordinates of CA atoms
ca_i = np.arange(bb_coord.shape[-2] // 3) * 3 + 1
if isinstance(atom_array, AtomArray):
angle_coord_shape: tuple[int, ...] = (n_residues, 3, 4)
elif isinstance(atom_array, AtomArrayStack):
angle_coord_shape = (atom_array.stack_depth(), n_residues, 3, 4)
coord_for_phi = np.full(angle_coord_shape, np.nan, dtype=np.float32)
coord_for_psi = np.full(angle_coord_shape, np.nan, dtype=np.float32)
coord_for_omg = np.full(angle_coord_shape, np.nan, dtype=np.float32)

# fmt: off
phi_coord [..., 1:, :, 0] = bb_coord[..., ca_i[1: ]-2, :]
phi_coord [..., 1:, :, 1] = bb_coord[..., ca_i[1: ]-1, :]
phi_coord [..., 1:, :, 2] = bb_coord[..., ca_i[1: ], :]
phi_coord [..., 1:, :, 3] = bb_coord[..., ca_i[1: ]+1, :]
psi_coord [..., :-1, :, 0] = bb_coord[..., ca_i[:-1]-1, :]
psi_coord [..., :-1, :, 1] = bb_coord[..., ca_i[:-1], :]
psi_coord [..., :-1, :, 2] = bb_coord[..., ca_i[:-1]+1, :]
psi_coord [..., :-1, :, 3] = bb_coord[..., ca_i[:-1]+2, :]
omega_coord[..., :-1, :, 0] = bb_coord[..., ca_i[:-1], :]
omega_coord[..., :-1, :, 1] = bb_coord[..., ca_i[:-1]+1, :]
omega_coord[..., :-1, :, 2] = bb_coord[..., ca_i[:-1]+2, :]
omega_coord[..., :-1, :, 3] = bb_coord[..., ca_i[:-1]+3, :]
coord_for_phi[..., 1:, :, 0] = coord_c[..., 0:-1, :]
coord_for_phi[..., 1:, :, 1] = coord_n[..., 1:, :]
coord_for_phi[..., 1:, :, 2] = coord_ca[..., 1:, :]
coord_for_phi[..., 1:, :, 3] = coord_c[..., 1:, :]

coord_for_psi[..., 0:-1, :, 0] = coord_n[..., 0:-1, :]
coord_for_psi[..., 0:-1, :, 1] = coord_ca[..., 0:-1, :]
coord_for_psi[..., 0:-1, :, 2] = coord_c[..., 0:-1, :]
coord_for_psi[..., 0:-1, :, 3] = coord_n[..., 1:, :]

coord_for_omg[..., 0:-1, :, 0] = coord_ca[..., 0:-1, :]
coord_for_omg[..., 0:-1, :, 1] = coord_c[..., 0:-1, :]
coord_for_omg[..., 0:-1, :, 2] = coord_n[..., 1:, :]
coord_for_omg[..., 0:-1, :, 3] = coord_ca[..., 1:, :]
# fmt: on

phi = dihedral(
phi_coord[..., 0], phi_coord[..., 1], phi_coord[..., 2], phi_coord[..., 3]
coord_for_phi[..., 0],
coord_for_phi[..., 1],
coord_for_phi[..., 2],
coord_for_phi[..., 3],
)
psi = dihedral(
psi_coord[..., 0], psi_coord[..., 1], psi_coord[..., 2], psi_coord[..., 3]
coord_for_psi[..., 0],
coord_for_psi[..., 1],
coord_for_psi[..., 2],
coord_for_psi[..., 3],
)
omega = dihedral(
omega_coord[..., 0],
omega_coord[..., 1],
omega_coord[..., 2],
omega_coord[..., 3],
omg = dihedral(
coord_for_omg[..., 0],
coord_for_omg[..., 1],
coord_for_omg[..., 2],
coord_for_omg[..., 3],
)

return phi, psi, omega
return phi, psi, omg


def centroid(atoms):
Expand Down
32 changes: 22 additions & 10 deletions src/biotite/structure/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
]

import numpy as np
from biotite.structure.atoms import AtomArray
from biotite.structure.atoms import AtomArrayStack
from biotite.structure.error import BadStructureError
from biotite.structure.residues import get_residue_masks, get_residue_starts

Expand Down Expand Up @@ -105,7 +105,7 @@ def matrix_rotate(v, matrix):
return v


def coord_for_atom_name_per_residue(atoms, atom_names):
def coord_for_atom_name_per_residue(atoms, atom_names, mask=None):
"""
Get the coordinates of a specific atom for every residue.
Expand All @@ -118,39 +118,51 @@ def coord_for_atom_name_per_residue(atoms, atom_names):
atoms : AtomArray, shape=(n,) or AtomArrayStack, shape=(m,n)
The atom array or stack to get the residue-wise coordinates from.
atom_names : list of str, length=k
The atom names to get the coordinates for.
mask : ndarray, shape=(n,), dtype=bool, optional
A boolean mask to further select valid atoms from `atoms`.
Returns
-------
coord: ndarray, shape=(k, m, r, 3) or shape=(k, r, 3)
The coordinates of the specified atom for each residue.
"""
is_multi_model = isinstance(atoms, AtomArrayStack)
residue_starts = get_residue_starts(atoms)
all_residue_masks = get_residue_masks(atoms, residue_starts)

if isinstance(atoms, AtomArray):
if is_multi_model:
coord = np.full(
(len(atom_names), len(residue_starts), 3),
(len(atom_names), atoms.stack_depth(), len(residue_starts), 3),
np.nan,
dtype=np.float32,
)
else:
coord = np.full(
(len(atom_names), atoms.stack_depth(), len(residue_starts), 3),
(len(atom_names), len(residue_starts), 3),
np.nan,
dtype=np.float32,
)

for i, atom_name in enumerate(atom_names):
atom_mask_for_name = atoms.atom_name == atom_name
all_residue_masks_for_specified_atom = all_residue_masks & atom_mask_for_name
specified_atom_mask = atoms.atom_name == atom_name
if mask is not None:
specified_atom_mask &= mask
all_residue_masks_for_specified_atom = all_residue_masks & specified_atom_mask
number_of_specified_atoms_per_residue = np.count_nonzero(
all_residue_masks_for_specified_atom, axis=-1
)
if np.any(number_of_specified_atoms_per_residue > 1):
raise BadStructureError(f"Multiple '{atom_name}' atoms per residue")
residues_with_specified_atom = number_of_specified_atoms_per_residue == 1
coord[i, ..., residues_with_specified_atom, :] = atoms.coord[
..., atom_mask_for_name, :
]
coord_of_specified_atoms = atoms.coord[..., specified_atom_mask, :]
if is_multi_model:
# Swap dimensions due to NumPy's behavior when using advanced indexing
# (https://numpy.org/devdocs/user/basics.indexing.html#combining-advanced-and-basic-indexing)
coord[i, ..., residues_with_specified_atom, :] = (
coord_of_specified_atoms.transpose(1, 0, 2)
)
else:
coord[i, residues_with_specified_atom, :] = coord_of_specified_atoms

return coord
45 changes: 26 additions & 19 deletions tests/structure/test_geometry.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,28 +39,23 @@ def test_dihedral():
assert struc.dihedral(coord1, coord2, coord3, coord4) == pytest.approx(0.5 * np.pi)


@pytest.mark.parametrize("multiple_chains", [False, True])
def test_dihedral_backbone_general(multiple_chains):
@pytest.mark.parametrize("multi_model", [False, True])
def test_dihedral_backbone_general(multi_model):
stack = strucio.load_structure(join(data_dir("structure"), "1l2y.bcif"))
n_models = stack.stack_depth()
n_res = stack.res_id[-1]
if multiple_chains:
stack2 = stack.copy()
stack2.chain_id[:] = "B"
stack = stack + stack2
n_res = n_res * 2
array = stack[0]

if multi_model:
atoms = stack
expected_shape = (n_models, n_res)
else:
atoms = stack[0]
expected_shape = (n_res,)
# Test array
phi, psi, omega = struc.dihedral_backbone(array)
assert phi.shape == (n_res,)
assert psi.shape == (n_res,)
assert omega.shape == (n_res,)
_assert_plausible_omega(omega)
# Test stack
phi, psi, omega = struc.dihedral_backbone(stack)
assert phi.shape == (n_models, n_res)
assert psi.shape == (n_models, n_res)
assert omega.shape == (n_models, n_res)
phi, psi, omega = struc.dihedral_backbone(atoms)
assert phi.shape == expected_shape
assert psi.shape == expected_shape
assert omega.shape == expected_shape
_assert_plausible_omega(omega)


Expand All @@ -72,13 +67,17 @@ def _assert_plausible_omega(omega):
assert omega.tolist() == pytest.approx([np.pi] * len(omega), rel=0.6)


def test_dihedral_backbone_consistency():
@pytest.mark.parametrize("multi_model", [False, True])
def test_dihedral_backbone_consistency(multi_model):
"""
Check if the computed dihedral angles are equal to the reference computed with
MDTraj.
"""
pdbx_file = pdbx.BinaryCIFFile.read(join(data_dir("structure"), "1l2y.bcif"))
atoms = pdbx.get_structure(pdbx_file, model=1)
if multi_model:
# Use models with equal coordinates
atoms = struc.stack([atoms] * 2)

test_phi, test_psi, test_ome = struc.dihedral_backbone(atoms)

Expand All @@ -105,6 +104,14 @@ def test_dihedral_backbone_consistency():
[-1.363, np.nan, np.nan],
]) # fmt: skip

if multi_model:
assert np.array_equal(test_phi[1], test_phi[0], equal_nan=True)
assert np.array_equal(test_psi[1], test_psi[0], equal_nan=True)
assert np.array_equal(test_ome[1], test_ome[0], equal_nan=True)
test_phi = test_phi[0]
test_psi = test_psi[0]
test_ome = test_ome[0]

assert test_phi == pytest.approx(ref_dihedrals[:, 0], abs=1e-3, nan_ok=True)
assert test_psi == pytest.approx(ref_dihedrals[:, 1], abs=1e-3, nan_ok=True)
assert test_ome == pytest.approx(ref_dihedrals[:, 2], abs=1e-3, nan_ok=True)
Expand Down

0 comments on commit d89e5ab

Please sign in to comment.