|
19 | 19 | "dihedral",
|
20 | 20 | "index_dihedral",
|
21 | 21 | "dihedral_backbone",
|
| 22 | + "dihedral_side_chain", |
22 | 23 | "centroid",
|
23 | 24 | ]
|
24 | 25 |
|
| 26 | +import functools |
25 | 27 | import numpy as np
|
26 | 28 | from biotite.structure.atoms import AtomArray, AtomArrayStack, coord
|
27 | 29 | from biotite.structure.box import coord_to_fraction, fraction_to_coord, is_orthogonal
|
28 |
| -from biotite.structure.filter import filter_amino_acids |
| 30 | +from biotite.structure.filter import filter_amino_acids, filter_canonical_amino_acids |
| 31 | +from biotite.structure.residues import get_residue_starts |
29 | 32 | from biotite.structure.util import (
|
30 | 33 | coord_for_atom_name_per_residue,
|
31 | 34 | norm_vector,
|
32 | 35 | vector_dot,
|
33 | 36 | )
|
34 | 37 |
|
| 38 | +# The names of the atoms participating in chi angle |
| 39 | +_CHI_ATOMS = { |
| 40 | + "ARG": [ |
| 41 | + ("N", "CA", "CB", "CG"), |
| 42 | + ("CA", "CB", "CG", "CD"), |
| 43 | + ("CB", "CG", "CD", "NE"), |
| 44 | + ("CG", "CD", "NE", "CZ"), |
| 45 | + ], |
| 46 | + "LEU": [ |
| 47 | + ("N", "CA", "CB", "CG"), |
| 48 | + # By convention chi2 is defined using CD1 instead of CD2 |
| 49 | + ("CA", "CB", "CG", "CD1"), |
| 50 | + ], |
| 51 | + "VAL": [("N", "CA", "CB", "CG1")], |
| 52 | + "ILE": [("N", "CA", "CB", "CG1"), ("CA", "CB", "CG1", "CD1")], |
| 53 | + "MET": [ |
| 54 | + ("N", "CA", "CB", "CG"), |
| 55 | + ("CA", "CB", "CG", "SD"), |
| 56 | + ("CB", "CG", "SD", "CE"), |
| 57 | + ], |
| 58 | + "LYS": [ |
| 59 | + ("N", "CA", "CB", "CG"), |
| 60 | + ("CA", "CB", "CG", "CD"), |
| 61 | + ("CB", "CG", "CD", "CE"), |
| 62 | + ("CG", "CD", "CE", "NZ"), |
| 63 | + ], |
| 64 | + "PHE": [ |
| 65 | + ("N", "CA", "CB", "CG"), |
| 66 | + ("CA", "CB", "CG", "CD1"), |
| 67 | + ], |
| 68 | + "TRP": [ |
| 69 | + ("N", "CA", "CB", "CG"), |
| 70 | + ("CA", "CB", "CG", "CD1"), |
| 71 | + ], |
| 72 | + "TYR": [ |
| 73 | + ("N", "CA", "CB", "CG"), |
| 74 | + ("CA", "CB", "CG", "CD1"), |
| 75 | + ], |
| 76 | + "ASN": [("N", "CA", "CB", "CG"), ("CA", "CB", "CG", "OD1")], |
| 77 | + "GLN": [ |
| 78 | + ("N", "CA", "CB", "CG"), |
| 79 | + ("CA", "CB", "CG", "CD"), |
| 80 | + ("CB", "CG", "CD", "OE1"), |
| 81 | + ], |
| 82 | + "ASP": [("N", "CA", "CB", "CG"), ("CA", "CB", "CG", "OD1")], |
| 83 | + "GLU": [ |
| 84 | + ("N", "CA", "CB", "CG"), |
| 85 | + ("CA", "CB", "CG", "CD"), |
| 86 | + ("CB", "CG", "CD", "OE1"), |
| 87 | + ], |
| 88 | + "CYS": [("N", "CA", "CB", "SG")], |
| 89 | + "HIS": [("N", "CA", "CB", "CG"), ("CA", "CB", "CG", "ND1")], |
| 90 | + "PRO": [("N", "CA", "CB", "CG"), ("CA", "CB", "CG", "CD")], |
| 91 | + "SER": [("N", "CA", "CB", "OG")], |
| 92 | + "THR": [("N", "CA", "CB", "OG1")], |
| 93 | +} |
| 94 | + |
35 | 95 |
|
36 | 96 | def displacement(atoms1, atoms2, box=None):
|
37 | 97 | """
|
@@ -492,7 +552,7 @@ def dihedral_backbone(atom_array):
|
492 | 552 |
|
493 | 553 | Returns
|
494 | 554 | -------
|
495 |
| - phi, psi, omega : ndarray |
| 555 | + phi, psi, omega : ndarray, shape=(m,n) or shape=(n,), dtype=float |
496 | 556 | An array containing the 3 backbone dihedral angles for every CA atom.
|
497 | 557 | `phi` is not defined at the N-terminus, `psi` and `omega` are not defined at the
|
498 | 558 | C-terminus.
|
@@ -562,6 +622,96 @@ def dihedral_backbone(atom_array):
|
562 | 622 | return phi, psi, omg
|
563 | 623 |
|
564 | 624 |
|
| 625 | +def dihedral_side_chain(atoms): |
| 626 | + r""" |
| 627 | + Measure the side chain :math:`\chi` dihedral angles of amino acid residues. |
| 628 | +
|
| 629 | + Parameters |
| 630 | + ---------- |
| 631 | + atoms : AtomArray or AtomArrayStack |
| 632 | + The protein structure to measure the side chain dihedral angles for. |
| 633 | +
|
| 634 | + Returns |
| 635 | + ------- |
| 636 | + chi : ndarray, shape=(m, n, 4) or shape=(n, 4), dtype=float |
| 637 | + An array containing the up to four side chain dihedral angles for every |
| 638 | + amino acid residue. |
| 639 | + Trailing :math:`\chi` angles that are not defined for an amino acid are filled |
| 640 | + with :math:`NaN` values. |
| 641 | + The same is True for all residues that are not canonical amino acids. |
| 642 | +
|
| 643 | + Notes |
| 644 | + ----- |
| 645 | + By convention, the :math:`\chi_2` angle of leucine is defined using ``CD1`` |
| 646 | + instead of ``CD2``. |
| 647 | +
|
| 648 | + Examples |
| 649 | + -------- |
| 650 | +
|
| 651 | + >>> res_ids, res_names = get_residues(atom_array) |
| 652 | + >>> dihedrals = dihedral_side_chain(atom_array) |
| 653 | + >>> for res_id, res_name, dihedrals in zip(res_ids, res_names, dihedrals): |
| 654 | + ... print(f"{res_name.capitalize()}{res_id:<2d}:", dihedrals) |
| 655 | + Asn1 : [-1.180 -0.066 nan nan] |
| 656 | + Leu2 : [0.923 1.866 nan nan] |
| 657 | + Tyr3 : [-2.593 -1.487 nan nan] |
| 658 | + Ile4 : [-0.781 -0.972 nan nan] |
| 659 | + Gln5 : [-2.557 1.410 -1.776 nan] |
| 660 | + Trp6 : [3.117 1.372 nan nan] |
| 661 | + Leu7 : [-1.33 3.08 nan nan] |
| 662 | + Lys8 : [ 1.320 1.734 3.076 -2.022] |
| 663 | + Asp9 : [-1.623 0.909 nan nan] |
| 664 | + Gly10: [nan nan nan nan] |
| 665 | + Gly11: [nan nan nan nan] |
| 666 | + Pro12: [-0.331 0.539 nan nan] |
| 667 | + Ser13: [-1.067 nan nan nan] |
| 668 | + Ser14: [-2.514 nan nan nan] |
| 669 | + Gly15: [nan nan nan nan] |
| 670 | + Arg16: [ 1.032 -3.063 1.541 -1.568] |
| 671 | + Pro17: [ 0.522 -0.601 nan nan] |
| 672 | + Pro18: [ 0.475 -0.577 nan nan] |
| 673 | + Pro19: [ 0.561 -0.602 nan nan] |
| 674 | + Ser20: [-1.055 nan nan nan] |
| 675 | + """ |
| 676 | + is_multi_model = isinstance(atoms, AtomArrayStack) |
| 677 | + |
| 678 | + chi_atoms = _all_chi_atoms() |
| 679 | + res_names = atoms.res_name[get_residue_starts(atoms)] |
| 680 | + chi_atom_coord = coord_for_atom_name_per_residue( |
| 681 | + atoms, chi_atoms, filter_canonical_amino_acids(atoms) |
| 682 | + ) |
| 683 | + chi_atoms_to_coord_index = {atom_name: i for i, atom_name in enumerate(chi_atoms)} |
| 684 | + |
| 685 | + if is_multi_model: |
| 686 | + shape = (atoms.stack_depth(), len(res_names), 4) |
| 687 | + else: |
| 688 | + shape = (len(res_names), 4) |
| 689 | + chi_angles = np.full(shape, np.nan, dtype=np.float32) |
| 690 | + for res_name, chi_atom_names_for_all_angles in _CHI_ATOMS.items(): |
| 691 | + res_mask = res_names == res_name |
| 692 | + for chi_i, chi_atom_names in enumerate(chi_atom_names_for_all_angles): |
| 693 | + dihedrals = dihedral( |
| 694 | + chi_atom_coord[ |
| 695 | + chi_atoms_to_coord_index[chi_atom_names[0]], ..., res_mask, : |
| 696 | + ], |
| 697 | + chi_atom_coord[ |
| 698 | + chi_atoms_to_coord_index[chi_atom_names[1]], ..., res_mask, : |
| 699 | + ], |
| 700 | + chi_atom_coord[ |
| 701 | + chi_atoms_to_coord_index[chi_atom_names[2]], ..., res_mask, : |
| 702 | + ], |
| 703 | + chi_atom_coord[ |
| 704 | + chi_atoms_to_coord_index[chi_atom_names[3]], ..., res_mask, : |
| 705 | + ], |
| 706 | + ) |
| 707 | + if is_multi_model: |
| 708 | + # Swap dimensions due to NumPy's behavior when using advanced indexing |
| 709 | + # (https://numpy.org/devdocs/user/basics.indexing.html#combining-advanced-and-basic-indexing) |
| 710 | + dihedrals = dihedrals.T |
| 711 | + chi_angles[..., res_mask, chi_i] = dihedrals |
| 712 | + return chi_angles |
| 713 | + |
| 714 | + |
565 | 715 | def centroid(atoms):
|
566 | 716 | """
|
567 | 717 | Measure the centroid of a structure.
|
@@ -653,3 +803,15 @@ def _displacement_triclinic_box(fractions, box, disp):
|
653 | 803 | disp[:] = shifted_diffs[
|
654 | 804 | np.arange(len(shifted_diffs)), np.argmin(sq_distance, axis=1)
|
655 | 805 | ]
|
| 806 | + |
| 807 | + |
| 808 | +@functools.cache |
| 809 | +def _all_chi_atoms(): |
| 810 | + """ |
| 811 | + Get the names of the atoms participating in any chi angle. |
| 812 | + """ |
| 813 | + atom_names = set() |
| 814 | + for angles in _CHI_ATOMS.values(): |
| 815 | + for angle in angles: |
| 816 | + atom_names.update(angle) |
| 817 | + return sorted(atom_names) |
0 commit comments