Skip to content

Commit

Permalink
Merge pull request #512 from padix-key/issue-500
Browse files Browse the repository at this point in the history
Allow 5 characters residue names
  • Loading branch information
padix-key authored Dec 8, 2023
2 parents c76616b + 5c91d57 commit 3b194c4
Show file tree
Hide file tree
Showing 10 changed files with 4,603 additions and 192 deletions.
2 changes: 1 addition & 1 deletion src/biotite/structure/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
chain_id string (U4) 'A','S','AB', ... Polypeptide chain
res_id int 1,2,3, ... Sequence position of residue
ins_code string (U1) '', 'A','B',.. PDB insertion code (iCode)
res_name string (U3) 'GLY','ALA', ... Residue name
res_name string (U5) 'GLY','ALA', ... Residue name
hetero bool True, False False for ``ATOM``, true for ``HETATM``
atom_name string (U6) 'CA','N', ... Atom name
element string (U2) 'C','O','SE', ... Chemical Element
Expand Down
2 changes: 1 addition & 1 deletion src/biotite/structure/atoms.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def __init__(self, length):
self.add_annotation("chain_id", dtype="U4")
self.add_annotation("res_id", dtype=int)
self.add_annotation("ins_code", dtype="U1")
self.add_annotation("res_name", dtype="U3")
self.add_annotation("res_name", dtype="U5")
self.add_annotation("hetero", dtype=bool)
self.add_annotation("atom_name", dtype="U6")
self.add_annotation("element", dtype="U2")
Expand Down
68 changes: 34 additions & 34 deletions src/biotite/structure/io/mmtf/convertfile.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -45,15 +45,15 @@ def get_model_count(file):
"""
return file["numModels"]


def get_structure(file, model=None, altloc="first",
extra_fields=[], include_bonds=False):
"""
get_structure(file, model=None, altloc=[], extra_fields=[],
include_bonds=False)
Get an :class:`AtomArray` or :class:`AtomArrayStack` from the MMTF file.
Parameters
----------
file : MMTFFile
Expand Down Expand Up @@ -86,12 +86,12 @@ def get_structure(file, model=None, altloc="first",
If set to true, a :class:`BondList` will be created for the
resulting :class:`AtomArray` containing the bond information
from the file.
Returns
-------
array : AtomArray or AtomArrayStack
The return type depends on the `model` parameter.
Examples
--------
Expand All @@ -105,7 +105,7 @@ def get_structure(file, model=None, altloc="first",
38 304
"""
cdef int i, j, m


# Obtain (and potentially decode) required arrays/values from file
cdef int atom_count = file["numAtoms"]
Expand All @@ -130,7 +130,7 @@ def get_structure(file, model=None, altloc="first",
cdef np.ndarray inscode
all_altloc_ids = file.get("altLocList")
inscode = file.get("insCodeList")


# Create arrays from 'groupList' list of dictionaries
cdef list group_list = file["groupList"]
Expand All @@ -142,8 +142,8 @@ def get_structure(file, model=None, altloc="first",
for i in range(len(group_list)):
atoms_per_res[i] = len(group_list[i]["atomNameList"])
cdef int32 max_atoms_per_res = np.max(atoms_per_res)
#Create the arrays
cdef np.ndarray res_names = np.zeros(len(group_list), dtype="U3")
# Create the arrays
cdef np.ndarray res_names = np.zeros(len(group_list), dtype="U5")
cdef np.ndarray hetero_res = np.zeros(len(group_list), dtype=bool)
cdef np.ndarray atom_names = np.zeros((len(group_list), max_atoms_per_res),
dtype="U6")
Expand All @@ -159,16 +159,16 @@ def get_structure(file, model=None, altloc="first",
atom_names[i, :atoms_per_res[i]] = residue["atomNameList"]
elements[i, :atoms_per_res[i]] = residue["elementList"]
charges[i, :atoms_per_res[i]] = residue["formalChargeList"]


# Create the atom array (stack)
cdef int depth, length
cdef int start_i, stop_i
cdef bint extra_charge
cdef np.ndarray altloc_ids
cdef np.ndarray inscode_array


if model == None:
lengths = _get_model_lengths(res_type_i, chains_per_model,
res_per_chain, atoms_per_res)
Expand All @@ -181,22 +181,22 @@ def get_structure(file, model=None, altloc="first",
length = lengths[0]

depth = model_count


array = AtomArrayStack(depth, length)
array.coord = np.stack(
[x_coord,
y_coord,
z_coord],
axis=1
).reshape(depth, length, 3)

# Create altloc array for the final filtering
if all_altloc_ids is not None:
altloc_ids = all_altloc_ids[:length]
else:
altloc_ids = None

extra_charge = False
if "ins_code" in extra_fields:
extra_inscode = True
Expand All @@ -210,19 +210,19 @@ def get_structure(file, model=None, altloc="first",
array.set_annotation("b_factor", b_factor[:length])
if "occupancy" in extra_fields:
array.set_annotation("occupancy", occupancy[:length])

_fill_annotations(1, array, extra_charge,
chain_names, chains_per_model, res_per_chain,
res_type_i, res_ids, inscode, atoms_per_res,
res_names, hetero_res, atom_names, elements, charges)

if include_bonds:
array.bonds = _create_bond_list(
1, file["bondAtomList"], file["bondOrderList"],
0, length, file["numAtoms"], group_list, res_type_i,
atoms_per_res, res_per_chain, chains_per_model
)


else:
lengths = _get_model_lengths(res_type_i, chains_per_model,
Expand All @@ -242,18 +242,18 @@ def get_structure(file, model=None, altloc="first",
# for the specified model
start_i = np.sum(lengths[:model-1])
stop_i = start_i + length

array = AtomArray(length)
array.coord[:,0] = x_coord[start_i : stop_i]
array.coord[:,1] = y_coord[start_i : stop_i]
array.coord[:,2] = z_coord[start_i : stop_i]

# Create altloc array for the final filtering
if all_altloc_ids is not None:
altloc_ids = np.array(all_altloc_ids[start_i : stop_i], dtype="U1")
else:
altloc_ids = None

extra_charge = False
if "charge" in extra_fields:
extra_charge = True
Expand All @@ -264,19 +264,19 @@ def get_structure(file, model=None, altloc="first",
array.set_annotation("b_factor", b_factor[start_i : stop_i])
if "occupancy" in extra_fields:
array.set_annotation("occupancy", occupancy[start_i : stop_i])

_fill_annotations(model, array, extra_charge,
chain_names, chains_per_model, res_per_chain,
res_type_i, res_ids, inscode, atoms_per_res,
res_names, hetero_res, atom_names, elements, charges)

if include_bonds:
array.bonds = _create_bond_list(
model, file["bondAtomList"], file["bondOrderList"],
start_i, stop_i, file["numAtoms"], group_list, res_type_i,
atoms_per_res, res_per_chain, chains_per_model
)

# Get box
if "unitCell" in file:
a_len, b_len, c_len, alpha, beta, gamma = file["unitCell"]
Expand All @@ -293,8 +293,8 @@ def get_structure(file, model=None, altloc="first",
else:
# AtomArray
array.box = box


# Filter altloc IDs and return
if altloc_ids is None:
return array
Expand Down Expand Up @@ -343,7 +343,7 @@ def _get_model_lengths(int32[:] res_type_i,
model_i += 1
return np.asarray(model_lengths)


def _fill_annotations(int model, array,
bint extra_charge,
np.ndarray chain_names,
Expand Down Expand Up @@ -388,7 +388,7 @@ def _fill_annotations(int model, array,
# is equal to the total number of residues
for res_i in range(res_type_i.shape[0]):
# Wait for the data of the given model
if model_i == model-1:
if model_i == model-1:
chain_id_for_chain = chain_names[chain_i]
res_id_for_res = res_ids[res_i]
if res_inscodes is not None:
Expand All @@ -408,12 +408,12 @@ def _fill_annotations(int model, array,
if extra_charge:
charge[atom_i] = charges[type_i][atom_index_in_res]
atom_i += 1

elif model_i > model-1:
# The given model has already been parsed
# -> parsing is finished
break

res_count_in_chain += 1
if res_count_in_chain == res_per_chain[chain_i]:
# Chain is full -> Bump chain index and reset residue count
Expand Down Expand Up @@ -466,14 +466,14 @@ def _create_bond_list(int model, np.ndarray bonds, np.ndarray bond_types,
# is equal to the total number of residues
for res_i in range(res_type_i.shape[0]):
# Wait for the data of the given model
if model_i == model-1:
if model_i == model-1:
type_i = res_type_i[res_i]
bond_list_per_res = BondList(
atoms_per_res[type_i],
intra_bonds[type_i, :bonds_per_res[type_i]]
)
intra_bond_list += bond_list_per_res

elif model_i > model-1:
# The given model has already been parsed
# -> parsing is finished
Expand All @@ -489,7 +489,7 @@ def _create_bond_list(int model, np.ndarray bonds, np.ndarray bond_types,
# Model is full -> Bump model index and reset chain count
chain_count_in_model = 0
model_i += 1

# Add inter-residue bonds to BondList
cdef np.ndarray inter_bonds = np.zeros((len(bond_types), 3),
dtype=np.uint32)
Expand Down
Loading

0 comments on commit 3b194c4

Please sign in to comment.