Skip to content

Commit

Permalink
Merge pull request #576 from padix-key/repair
Browse files Browse the repository at this point in the history
Add repair functions
  • Loading branch information
padix-key authored May 30, 2024
2 parents 7ae1da6 + 4dc6c91 commit 427cea1
Show file tree
Hide file tree
Showing 11 changed files with 324 additions and 179 deletions.
6 changes: 6 additions & 0 deletions doc/apidoc.json
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,12 @@
"check_bond_continuity",
"check_linear_continuity"
],
"Repair" : [
"renumber_atom_ids",
"renumber_res_ids",
"create_continuous_res_ids",
"infer_elements"
],
"Residue level utility" : [
"get_residue_starts",
"get_residues",
Expand Down
1 change: 1 addition & 0 deletions src/biotite/structure/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@
from .molecules import *
from .pseudoknots import *
from .rdf import *
from .repair import *
from .residues import *
from .chains import *
from .sasa import *
Expand Down
83 changes: 16 additions & 67 deletions src/biotite/structure/integrity.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
__all__ = ["check_id_continuity", "check_atom_id_continuity",
"check_res_id_continuity", "check_backbone_continuity",
"check_duplicate_atoms", "check_bond_continuity",
"check_linear_continuity", "renumber_atom_ids", "renumber_res_ids"]
"check_linear_continuity"]

import numpy as np
import warnings
Expand All @@ -32,17 +32,17 @@ def check_id_continuity(array):
"""
Check if the residue IDs are incremented by more than 1 or
decremented, from one atom to the next one.
An increment by more than 1 is as strong clue for missing residues,
a decrement means probably a start of a new chain.
DEPRECATED: Use :func:`check_res_id_continuity()` instead.
Parameters
----------
array : AtomArray or AtomArrayStack
The array to be checked.
Returns
-------
discontinuity : ndarray, dtype=int
Expand All @@ -60,14 +60,14 @@ def check_atom_id_continuity(array):
"""
Check if the atom IDs are incremented by more than 1 or
decremented, from one atom to the next one.
An increment by more than 1 is as strong clue for missing atoms.
Parameters
----------
array : AtomArray or AtomArrayStack
The array to be checked.
Returns
-------
discontinuity : ndarray, dtype=int
Expand All @@ -81,15 +81,15 @@ def check_res_id_continuity(array):
"""
Check if the residue IDs are incremented by more than 1 or
decremented, from one atom to the next one.
An increment by more than 1 is as strong clue for missing residues,
a decrement means probably a start of a new chain.
Parameters
----------
array : AtomArray or AtomArrayStack
The array to be checked.
Returns
-------
discontinuity : ndarray, dtype=int
Expand Down Expand Up @@ -168,7 +168,7 @@ def check_backbone_continuity(array, min_len=1.2, max_len=1.8):
"""
Check if the (peptide or phosphate) backbone atoms have
non-reasonable distance to the next atom.
A large or very small distance is a very strong clue, that there is
no bond between those atoms, therefore the chain is discontinued.
Expand Down Expand Up @@ -206,16 +206,16 @@ def check_duplicate_atoms(array):
"""
Check if a structure contains duplicate atoms, i.e. two atoms in a
structure have the same annotations (coordinates may be different).
Duplicate atoms may appear, when a structure has occupancy for an
atom at two or more positions or when the *altloc* positions are
improperly read.
Parameters
----------
array : AtomArray or AtomArrayStack
The array to be checked.
Returns
-------
duplicate : ndarray, dtype=int
Expand All @@ -232,7 +232,7 @@ def check_duplicate_atoms(array):
for annot in annots:
# For each annotation array filter out the atoms until
# index i that have an unequal annotation
# to the atom at index i
# to the atom at index i
is_dublicate &= (annot[:i] == annot[i])
# After checking all annotation arrays,
# if there still is any duplicate to the atom at index i,
Expand All @@ -255,7 +255,7 @@ def check_in_box(array):
----------
array : AtomArray or AtomArrayStack
The array to be checked.
Returns
-------
outside : ndarray, dtype=int
Expand All @@ -266,54 +266,3 @@ def check_in_box(array):
box = array.box
fractions = coord_to_fraction(array, box)
return np.where(((fractions >= 0) & (fractions < 1)).all(axis=-1))[0]


def renumber_atom_ids(array, start=None):
"""
Renumber the atom IDs of the given array.
Parameters
----------
array : AtomArray or AtomArrayStack
The array to be checked.
start : int, optional
The starting index for renumbering.
The first ID in the array is taken by default.
Returns
-------
array : AtomArray or AtomArrayStack
The renumbered array.
"""
if "atom_id" not in array.get_annotation_categories():
raise ValueError("The atom array must have the 'atom_id' annotation")
if start is None:
start = array.atom_id[0]
array.atom_id = np.arange(start, array.shape[-1]+1)
return array


def renumber_res_ids(array, start=None):
"""
Renumber the residue IDs of the given array.
Parameters
----------
array : AtomArray or AtomArrayStack
The array to be checked.
start : int, optional
The starting index for renumbering.
The first ID in the array is taken by default.
Returns
-------
array : AtomArray or AtomArrayStack
The renumbered array.
"""
if start is None:
start = array.res_id[0]
diff = np.diff(array.res_id)
diff[diff != 0] = 1
new_res_ids = np.concatenate(([start], diff)).cumsum()
array.res_id = new_res_ids
return array
41 changes: 1 addition & 40 deletions src/biotite/structure/io/general.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

import os.path
import io
from ..atoms import AtomArray, AtomArrayStack
from ..atoms import AtomArrayStack


def load_structure(file_path, template=None, **kwargs):
Expand Down Expand Up @@ -224,42 +224,3 @@ def _as_single_model_if_possible(atoms):
return atoms[0]
else:
return atoms


# Helper function to estimate elements from atom names
_elements = [elem.upper() for elem in
["H", "He", "Li", "Be", "B", "C", "N", "O", "F", "Ne", "Na", "Mg",
"Al", "Si", "P", "S", "Cl", "Ar", "K", "Ca", "Sc", "Ti", "V", "Cr", "Mn", "Fe",
"Co", "Ni", "Cu", "Zn", "Ga", "Ge", "As", "Se", "Br", "Kr", "Rb", "Sr", "Y",
"Zr", "Nb", "Mo", "Tc", "Ru", "Rh", "Pd", "Ag", "Cd", "In", "Sn", "Sb", "Te",
"I", "Xe", "Cs", "Ba", "La", "Ce", "Pr", "Nd", "Pm", "Sm", "Eu", "Gd", "Tb",
"Dy", "Ho", "Er", "Tm", "Yb", "Lu", "Hf", "Ta", "W", "Re", "Os", "Ir", "Pt",
"Au", "Hg", "Tl", "Pb", "Bi", "Po", "At", "Rn", "Fr", "Ra", "Ac", "Th", "Pa",
"U", "Np", "Pu", "Am", "Cm", "Bk", "Cf", "Es", "Fm", "Md", "No", "Lr", "Rf",
"Db", "Sg", "Bh", "Hs", "Mt", "Ds", "Rg", "Cn", "Nh", "Fl", "Mc", "Lv", "Ts",
"Og"]
]
def _guess_element(atom_name):
# remove digits (1H -> H)
elem = "".join([i for i in atom_name if not i.isdigit()])
elem = elem.upper()
if len(elem) == 0:
return ""

# Some often used elements for biomolecules
if elem.startswith("C") or elem.startswith("N") or \
elem.startswith("O") or elem.startswith("S") or \
elem.startswith("H"):
return elem[0]

# Exactly match element abbreviations
try:
return _elements[_elements.index(elem[:2])]
except ValueError:
try:
return _elements[_elements.index(elem[0])]
except ValueError:
pass

return ""

32 changes: 16 additions & 16 deletions src/biotite/structure/io/gro/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from ...atoms import AtomArray, AtomArrayStack
from ...box import is_orthogonal
from ....file import TextFile, InvalidFileError
from ..general import _guess_element as guess_element
from ...repair import infer_elements
from ...error import BadStructureError
import copy
from datetime import datetime
Expand Down Expand Up @@ -38,15 +38,15 @@ class GROFile(TextFile):
--------
Load a `\\*.gro` file, modify the structure and save the new
structure into a new file:
>>> import os.path
>>> file = GROFile.read(os.path.join(path_to_structures, "1l2y.gro"))
>>> array_stack = file.get_structure()
>>> array_stack_mod = rotate(array_stack, [1,2,3])
>>> file = GROFile()
>>> file.set_structure(array_stack_mod)
>>> file.write(os.path.join(path_to_directory, "1l2y_mod.gro"))
"""
def get_model_count(self):
"""
Expand All @@ -68,7 +68,7 @@ def get_structure(self, model=None):
"""
Get an :class:`AtomArray` or :class:`AtomArrayStack` from the
GRO file.
Parameters
----------
model : int, optional
Expand All @@ -80,21 +80,21 @@ def get_structure(self, model=None):
If this parameter is omitted, an :class:`AtomArrayStack`
containing all models will be returned, even if the
structure contains only one model.
Returns
-------
array : AtomArray or AtomArrayStack
The return type depends on the `model` parameter.
"""

def get_atom_line_i(model_start_i, model_atom_counts):
"""
Helper function to get the indices of all atoms for a model
"""
return np.arange(
model_start_i+1, model_start_i+1+model_atom_counts
)

def set_box_dimen(box_param):
"""
Helper function to create the box vectors from the values
Expand All @@ -104,7 +104,7 @@ def set_box_dimen(box_param):
----------
box_param : list of float
The box dimensions in the GRO file.
Returns
-------
box_vectors : ndarray, dtype=float, shape=(3,3)
Expand Down Expand Up @@ -171,7 +171,7 @@ def set_box_dimen(box_param):
array.res_id[i] = int(line[0:5])
array.res_name[i] = line[5:10].strip()
array.atom_name[i] = line[10:15].strip()
array.element[i] = guess_element(line[10:15].strip())
array.element = infer_elements(array.atom_name)

# Fill in coordinates and boxes
if isinstance(array, AtomArray):
Expand All @@ -186,7 +186,7 @@ def set_box_dimen(box_param):
box_i = atom_i[-1] + 1
box_param = [float(e)*10 for e in self.lines[box_i].split()]
array.box = set_box_dimen(box_param)

elif isinstance(array, AtomArrayStack):
for m in range(len(model_start_i)):
atom_i = get_atom_line_i(
Expand All @@ -204,18 +204,18 @@ def set_box_dimen(box_param):
# Create a box in the stack if not already existing
# and the box is not a dummy
if box is not None:
if array.box is None:
if array.box is None:
array.box = np.zeros((array.stack_depth(), 3, 3))
array.box[m] = box

return array


def set_structure(self, array):
"""
Set the :class:`AtomArray` or :class:`AtomArrayStack` for the
file.
Parameters
----------
array : AtomArray or AtomArrayStack
Expand All @@ -235,7 +235,7 @@ def get_box_dimen(array):
----------
array : AtomArray
The atom array to get the box dimensions from.
Returns
-------
box : str
Expand All @@ -259,7 +259,7 @@ def get_box_dimen(array):
box[2,0], box[2,1],
)
return " ".join([f"{e:>9.5f}" for e in box_elements])

if "atom_id" in array.get_annotation_categories():
atom_id = array.atom_id
else:
Expand Down
Loading

0 comments on commit 427cea1

Please sign in to comment.