Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add repair functions #576

Merged
merged 4 commits into from
May 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions doc/apidoc.json
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,12 @@
"check_bond_continuity",
"check_linear_continuity"
],
"Repair" : [
"renumber_atom_ids",
"renumber_res_ids",
"create_continuous_res_ids",
"infer_elements"
],
"Residue level utility" : [
"get_residue_starts",
"get_residues",
Expand Down
1 change: 1 addition & 0 deletions src/biotite/structure/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@
from .molecules import *
from .pseudoknots import *
from .rdf import *
from .repair import *
from .residues import *
from .chains import *
from .sasa import *
Expand Down
83 changes: 16 additions & 67 deletions src/biotite/structure/integrity.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
__all__ = ["check_id_continuity", "check_atom_id_continuity",
"check_res_id_continuity", "check_backbone_continuity",
"check_duplicate_atoms", "check_bond_continuity",
"check_linear_continuity", "renumber_atom_ids", "renumber_res_ids"]
"check_linear_continuity"]

import numpy as np
import warnings
Expand All @@ -32,17 +32,17 @@ def check_id_continuity(array):
"""
Check if the residue IDs are incremented by more than 1 or
decremented, from one atom to the next one.

An increment by more than 1 is as strong clue for missing residues,
a decrement means probably a start of a new chain.

DEPRECATED: Use :func:`check_res_id_continuity()` instead.

Parameters
----------
array : AtomArray or AtomArrayStack
The array to be checked.

Returns
-------
discontinuity : ndarray, dtype=int
Expand All @@ -60,14 +60,14 @@ def check_atom_id_continuity(array):
"""
Check if the atom IDs are incremented by more than 1 or
decremented, from one atom to the next one.

An increment by more than 1 is as strong clue for missing atoms.

Parameters
----------
array : AtomArray or AtomArrayStack
The array to be checked.

Returns
-------
discontinuity : ndarray, dtype=int
Expand All @@ -81,15 +81,15 @@ def check_res_id_continuity(array):
"""
Check if the residue IDs are incremented by more than 1 or
decremented, from one atom to the next one.

An increment by more than 1 is as strong clue for missing residues,
a decrement means probably a start of a new chain.

Parameters
----------
array : AtomArray or AtomArrayStack
The array to be checked.

Returns
-------
discontinuity : ndarray, dtype=int
Expand Down Expand Up @@ -168,7 +168,7 @@ def check_backbone_continuity(array, min_len=1.2, max_len=1.8):
"""
Check if the (peptide or phosphate) backbone atoms have
non-reasonable distance to the next atom.

A large or very small distance is a very strong clue, that there is
no bond between those atoms, therefore the chain is discontinued.

Expand Down Expand Up @@ -206,16 +206,16 @@ def check_duplicate_atoms(array):
"""
Check if a structure contains duplicate atoms, i.e. two atoms in a
structure have the same annotations (coordinates may be different).

Duplicate atoms may appear, when a structure has occupancy for an
atom at two or more positions or when the *altloc* positions are
improperly read.

Parameters
----------
array : AtomArray or AtomArrayStack
The array to be checked.

Returns
-------
duplicate : ndarray, dtype=int
Expand All @@ -232,7 +232,7 @@ def check_duplicate_atoms(array):
for annot in annots:
# For each annotation array filter out the atoms until
# index i that have an unequal annotation
# to the atom at index i
# to the atom at index i
is_dublicate &= (annot[:i] == annot[i])
# After checking all annotation arrays,
# if there still is any duplicate to the atom at index i,
Expand All @@ -255,7 +255,7 @@ def check_in_box(array):
----------
array : AtomArray or AtomArrayStack
The array to be checked.

Returns
-------
outside : ndarray, dtype=int
Expand All @@ -266,54 +266,3 @@ def check_in_box(array):
box = array.box
fractions = coord_to_fraction(array, box)
return np.where(((fractions >= 0) & (fractions < 1)).all(axis=-1))[0]


def renumber_atom_ids(array, start=None):
"""
Renumber the atom IDs of the given array.

Parameters
----------
array : AtomArray or AtomArrayStack
The array to be checked.
start : int, optional
The starting index for renumbering.
The first ID in the array is taken by default.

Returns
-------
array : AtomArray or AtomArrayStack
The renumbered array.
"""
if "atom_id" not in array.get_annotation_categories():
raise ValueError("The atom array must have the 'atom_id' annotation")
if start is None:
start = array.atom_id[0]
array.atom_id = np.arange(start, array.shape[-1]+1)
return array


def renumber_res_ids(array, start=None):
"""
Renumber the residue IDs of the given array.

Parameters
----------
array : AtomArray or AtomArrayStack
The array to be checked.
start : int, optional
The starting index for renumbering.
The first ID in the array is taken by default.

Returns
-------
array : AtomArray or AtomArrayStack
The renumbered array.
"""
if start is None:
start = array.res_id[0]
diff = np.diff(array.res_id)
diff[diff != 0] = 1
new_res_ids = np.concatenate(([start], diff)).cumsum()
array.res_id = new_res_ids
return array
41 changes: 1 addition & 40 deletions src/biotite/structure/io/general.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

import os.path
import io
from ..atoms import AtomArray, AtomArrayStack
from ..atoms import AtomArrayStack


def load_structure(file_path, template=None, **kwargs):
Expand Down Expand Up @@ -224,42 +224,3 @@ def _as_single_model_if_possible(atoms):
return atoms[0]
else:
return atoms


# Helper function to estimate elements from atom names
_elements = [elem.upper() for elem in
["H", "He", "Li", "Be", "B", "C", "N", "O", "F", "Ne", "Na", "Mg",
"Al", "Si", "P", "S", "Cl", "Ar", "K", "Ca", "Sc", "Ti", "V", "Cr", "Mn", "Fe",
"Co", "Ni", "Cu", "Zn", "Ga", "Ge", "As", "Se", "Br", "Kr", "Rb", "Sr", "Y",
"Zr", "Nb", "Mo", "Tc", "Ru", "Rh", "Pd", "Ag", "Cd", "In", "Sn", "Sb", "Te",
"I", "Xe", "Cs", "Ba", "La", "Ce", "Pr", "Nd", "Pm", "Sm", "Eu", "Gd", "Tb",
"Dy", "Ho", "Er", "Tm", "Yb", "Lu", "Hf", "Ta", "W", "Re", "Os", "Ir", "Pt",
"Au", "Hg", "Tl", "Pb", "Bi", "Po", "At", "Rn", "Fr", "Ra", "Ac", "Th", "Pa",
"U", "Np", "Pu", "Am", "Cm", "Bk", "Cf", "Es", "Fm", "Md", "No", "Lr", "Rf",
"Db", "Sg", "Bh", "Hs", "Mt", "Ds", "Rg", "Cn", "Nh", "Fl", "Mc", "Lv", "Ts",
"Og"]
]
def _guess_element(atom_name):
# remove digits (1H -> H)
elem = "".join([i for i in atom_name if not i.isdigit()])
elem = elem.upper()
if len(elem) == 0:
return ""

# Some often used elements for biomolecules
if elem.startswith("C") or elem.startswith("N") or \
elem.startswith("O") or elem.startswith("S") or \
elem.startswith("H"):
return elem[0]

# Exactly match element abbreviations
try:
return _elements[_elements.index(elem[:2])]
except ValueError:
try:
return _elements[_elements.index(elem[0])]
except ValueError:
pass

return ""

32 changes: 16 additions & 16 deletions src/biotite/structure/io/gro/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from ...atoms import AtomArray, AtomArrayStack
from ...box import is_orthogonal
from ....file import TextFile, InvalidFileError
from ..general import _guess_element as guess_element
from ...repair import infer_elements
from ...error import BadStructureError
import copy
from datetime import datetime
Expand Down Expand Up @@ -38,15 +38,15 @@ class GROFile(TextFile):
--------
Load a `\\*.gro` file, modify the structure and save the new
structure into a new file:

>>> import os.path
>>> file = GROFile.read(os.path.join(path_to_structures, "1l2y.gro"))
>>> array_stack = file.get_structure()
>>> array_stack_mod = rotate(array_stack, [1,2,3])
>>> file = GROFile()
>>> file.set_structure(array_stack_mod)
>>> file.write(os.path.join(path_to_directory, "1l2y_mod.gro"))

"""
def get_model_count(self):
"""
Expand All @@ -68,7 +68,7 @@ def get_structure(self, model=None):
"""
Get an :class:`AtomArray` or :class:`AtomArrayStack` from the
GRO file.

Parameters
----------
model : int, optional
Expand All @@ -80,21 +80,21 @@ def get_structure(self, model=None):
If this parameter is omitted, an :class:`AtomArrayStack`
containing all models will be returned, even if the
structure contains only one model.

Returns
-------
array : AtomArray or AtomArrayStack
The return type depends on the `model` parameter.
"""

def get_atom_line_i(model_start_i, model_atom_counts):
"""
Helper function to get the indices of all atoms for a model
"""
return np.arange(
model_start_i+1, model_start_i+1+model_atom_counts
)

def set_box_dimen(box_param):
"""
Helper function to create the box vectors from the values
Expand All @@ -104,7 +104,7 @@ def set_box_dimen(box_param):
----------
box_param : list of float
The box dimensions in the GRO file.

Returns
-------
box_vectors : ndarray, dtype=float, shape=(3,3)
Expand Down Expand Up @@ -171,7 +171,7 @@ def set_box_dimen(box_param):
array.res_id[i] = int(line[0:5])
array.res_name[i] = line[5:10].strip()
array.atom_name[i] = line[10:15].strip()
array.element[i] = guess_element(line[10:15].strip())
array.element = infer_elements(array.atom_name)

# Fill in coordinates and boxes
if isinstance(array, AtomArray):
Expand All @@ -186,7 +186,7 @@ def set_box_dimen(box_param):
box_i = atom_i[-1] + 1
box_param = [float(e)*10 for e in self.lines[box_i].split()]
array.box = set_box_dimen(box_param)

elif isinstance(array, AtomArrayStack):
for m in range(len(model_start_i)):
atom_i = get_atom_line_i(
Expand All @@ -204,18 +204,18 @@ def set_box_dimen(box_param):
# Create a box in the stack if not already existing
# and the box is not a dummy
if box is not None:
if array.box is None:
if array.box is None:
array.box = np.zeros((array.stack_depth(), 3, 3))
array.box[m] = box

return array


def set_structure(self, array):
"""
Set the :class:`AtomArray` or :class:`AtomArrayStack` for the
file.

Parameters
----------
array : AtomArray or AtomArrayStack
Expand All @@ -235,7 +235,7 @@ def get_box_dimen(array):
----------
array : AtomArray
The atom array to get the box dimensions from.

Returns
-------
box : str
Expand All @@ -259,7 +259,7 @@ def get_box_dimen(array):
box[2,0], box[2,1],
)
return " ".join([f"{e:>9.5f}" for e in box_elements])

if "atom_id" in array.get_annotation_categories():
atom_id = array.atom_id
else:
Expand Down
Loading
Loading