Skip to content

Commit 9946981

Browse files
committed
Add residue level functionality to get atom names per residue
1 parent af14b30 commit 9946981

File tree

6 files changed

+303
-23
lines changed

6 files changed

+303
-23
lines changed

doc/apidoc.json

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -331,8 +331,10 @@
331331
"get_residue_masks",
332332
"get_residue_starts_for",
333333
"get_residue_positions",
334+
"get_all_residue_positions",
334335
"get_residue_count",
335-
"residue_iter"
336+
"residue_iter",
337+
"get_atom_name_indices"
336338
],
337339
"Chain level utility" : [
338340
"get_chain_starts",
@@ -341,6 +343,7 @@
341343
"get_chain_masks",
342344
"get_chain_starts_for",
343345
"get_chain_positions",
346+
"get_all_chain_positions",
344347
"get_chains",
345348
"get_chain_count",
346349
"chain_iter"

src/biotite/structure/chains.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
"get_chain_masks",
1717
"get_chain_starts_for",
1818
"get_chain_positions",
19+
"get_all_chain_positions",
1920
"chain_iter",
2021
"get_chains",
2122
"get_chain_count",
@@ -24,6 +25,7 @@
2425

2526
from biotite.structure.segments import (
2627
apply_segment_wise,
28+
get_all_segment_positions,
2729
get_segment_masks,
2830
get_segment_positions,
2931
get_segment_starts,
@@ -212,11 +214,43 @@ def get_chain_positions(array, indices):
212214
-------
213215
start_indices : ndarray, dtype=int, shape=(k,)
214216
The indices that point to the position of the chains.
217+
218+
See Also
219+
--------
220+
get_all_chain_positions :
221+
Similar to this function, but for all atoms in the :class:`struc.AtomArray`.
215222
"""
216223
starts = get_chain_starts(array, add_exclusive_stop=True)
217224
return get_segment_positions(starts, indices)
218225

219226

227+
def get_all_chain_positions(array):
228+
"""
229+
For each atom, obtain the position of the chain
230+
corresponding to this atom in the input `array`.
231+
232+
For example, the position of the first chain in the atom array is
233+
``0``, the the position of the second chain is ``1``, etc.
234+
235+
Parameters
236+
----------
237+
array : AtomArray or AtomArrayStack
238+
The atom array (stack) to determine the chains from.
239+
240+
Returns
241+
-------
242+
chain_indices : ndarray, dtype=int, shape=(k,)
243+
The indices that point to the position of the chains.
244+
245+
See Also
246+
--------
247+
get_chain_positions :
248+
Similar to this function, but for a given subset of atom indices.
249+
"""
250+
starts = get_chain_starts(array, add_exclusive_stop=True)
251+
return get_all_segment_positions(starts, array.array_length())
252+
253+
220254
def get_chains(array):
221255
"""
222256
Get the chain IDs of an atom array (stack).

src/biotite/structure/residues.py

Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,17 @@
1616
"get_residue_masks",
1717
"get_residue_starts_for",
1818
"get_residue_positions",
19+
"get_all_residue_positions",
1920
"get_residues",
2021
"get_residue_count",
2122
"residue_iter",
23+
"get_atom_name_indices",
2224
]
2325

26+
import numpy as np
2427
from biotite.structure.segments import (
2528
apply_segment_wise,
29+
get_all_segment_positions,
2630
get_segment_masks,
2731
get_segment_positions,
2832
get_segment_starts,
@@ -361,6 +365,11 @@ def get_residue_positions(array, indices):
361365
residue_indices : ndarray, dtype=int, shape=(k,)
362366
The indices that point to the position of the residues.
363367
368+
See Also
369+
--------
370+
get_all_residue_positions :
371+
Similar to this function, but for all atoms in the :class:`struc.AtomArray`.
372+
364373
Examples
365374
--------
366375
>>> atom_index = [5, 42]
@@ -380,6 +389,50 @@ def get_residue_positions(array, indices):
380389
return get_segment_positions(starts, indices)
381390

382391

392+
def get_all_residue_positions(array):
393+
"""
394+
For each atom, obtain the position of the residue
395+
corresponding to this atom in the input `array`.
396+
397+
For example, the position of the first residue in the atom array is
398+
``0``, the the position of the second residue is ``1``, etc.
399+
400+
Parameters
401+
----------
402+
array : AtomArray or AtomArrayStack
403+
The atom array (stack) to determine the residues from.
404+
405+
Returns
406+
-------
407+
residue_indices : ndarray, dtype=int, shape=(k,)
408+
The indices that point to the position of the residues.
409+
410+
See Also
411+
--------
412+
get_residue_positions :
413+
Similar to this function, but for a given subset of atom indices.
414+
415+
Examples
416+
--------
417+
>>> print(get_all_residue_positions(atom_array))
418+
[ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1
419+
1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2
420+
2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
421+
3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5
422+
5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6
423+
6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7
424+
7 7 7 7 7 7 7 7 7 7 7 7 7 8 8 8 8 8 8 8 8 8 8 8
425+
8 9 9 9 9 9 9 9 10 10 10 10 10 10 10 11 11 11 11 11 11 11 11 11
426+
11 11 11 11 11 12 12 12 12 12 12 12 12 12 12 12 13 13 13 13 13 13 13 13
427+
13 13 13 14 14 14 14 14 14 14 15 15 15 15 15 15 15 15 15 15 15 15 15 15
428+
15 15 15 15 15 15 15 15 15 15 16 16 16 16 16 16 16 16 16 16 16 16 16 16
429+
17 17 17 17 17 17 17 17 17 17 17 17 17 17 18 18 18 18 18 18 18 18 18 18
430+
18 18 18 18 19 19 19 19 19 19 19 19 19 19 19 19]
431+
"""
432+
starts = get_residue_starts(array, add_exclusive_stop=True)
433+
return get_all_segment_positions(starts, array.array_length())
434+
435+
383436
def get_residues(array):
384437
"""
385438
Get the residue IDs and names of an atom array (stack).
@@ -542,3 +595,122 @@ def residue_iter(array):
542595
starts = get_residue_starts(array, add_exclusive_stop=True)
543596
for residue in segment_iter(array, starts):
544597
yield residue
598+
599+
600+
def get_atom_name_indices(atoms, atom_names):
601+
"""
602+
For each residue, get the index of the atom with the given atom name.
603+
604+
Parameters
605+
----------
606+
atoms : AtomArray or AtomArrayStack
607+
Search for the indices of the given atom names in this structure.
608+
atom_names : list of str, length=p
609+
The names of the atoms to get the indices of.
610+
611+
Returns
612+
-------
613+
indices : ndarray, dtype=int, shape=(k, p)
614+
For every residue and atom name, the return value contains the atom index in
615+
the :class:`AtomArray` where the sought atom name is located.
616+
Where the atom name is not present in a residue, the array is filled with `-1`.
617+
618+
Examples
619+
--------
620+
621+
>>> indices = get_atom_name_indices(atom_array, ["CA", "CB"])
622+
>>> print(indices)
623+
[[ 1 4]
624+
[ 17 20]
625+
[ 36 39]
626+
[ 57 60]
627+
[ 76 79]
628+
[ 93 96]
629+
[117 120]
630+
[136 139]
631+
[158 161]
632+
[170 -1]
633+
[177 -1]
634+
[184 187]
635+
[198 201]
636+
[209 212]
637+
[220 -1]
638+
[227 230]
639+
[251 254]
640+
[265 268]
641+
[279 282]
642+
[293 296]]
643+
>>> for row in indices:
644+
... for index in row:
645+
... if index != -1:
646+
... print(atom_array[index])
647+
... print()
648+
A 1 ASN CA C -8.608 3.135 -1.618
649+
A 1 ASN CB C -9.437 3.396 -2.889
650+
<BLANKLINE>
651+
A 2 LEU CA C -4.923 4.002 -2.452
652+
A 2 LEU CB C -4.411 5.450 -2.619
653+
<BLANKLINE>
654+
A 3 TYR CA C -3.690 2.738 0.981
655+
A 3 TYR CB C -3.964 3.472 2.302
656+
<BLANKLINE>
657+
A 4 ILE CA C -5.857 -0.449 0.613
658+
A 4 ILE CB C -7.386 -0.466 0.343
659+
<BLANKLINE>
660+
A 5 GLN CA C -4.122 -1.167 -2.743
661+
A 5 GLN CB C -4.292 -0.313 -4.013
662+
<BLANKLINE>
663+
A 6 TRP CA C -0.716 -0.631 -0.993
664+
A 6 TRP CB C -0.221 0.703 -0.417
665+
<BLANKLINE>
666+
A 7 LEU CA C -1.641 -2.932 1.963
667+
A 7 LEU CB C -2.710 -2.645 3.033
668+
<BLANKLINE>
669+
A 8 LYS CA C -3.024 -5.791 -0.269
670+
A 8 LYS CB C -4.224 -5.697 -1.232
671+
<BLANKLINE>
672+
A 9 ASP CA C 0.466 -6.016 -1.905
673+
A 9 ASP CB C 1.033 -4.839 -2.724
674+
<BLANKLINE>
675+
A 10 GLY CA C 2.060 -6.618 1.593
676+
<BLANKLINE>
677+
A 11 GLY CA C 2.626 -2.967 2.723
678+
<BLANKLINE>
679+
A 12 PRO CA C 6.333 -2.533 3.806
680+
A 12 PRO CB C 6.740 -2.387 5.279
681+
<BLANKLINE>
682+
A 13 SER CA C 7.049 -6.179 2.704
683+
A 13 SER CB C 6.458 -7.371 3.472
684+
<BLANKLINE>
685+
A 14 SER CA C 6.389 -5.315 -1.015
686+
A 14 SER CB C 4.914 -4.993 -1.265
687+
<BLANKLINE>
688+
A 15 GLY CA C 9.451 -3.116 -1.870
689+
<BLANKLINE>
690+
A 16 ARG CA C 7.289 0.084 -2.054
691+
A 16 ARG CB C 6.110 -0.243 -2.994
692+
<BLANKLINE>
693+
A 17 PRO CA C 6.782 3.088 0.345
694+
A 17 PRO CB C 7.554 4.394 0.119
695+
<BLANKLINE>
696+
A 18 PRO CA C 3.287 4.031 1.686
697+
A 18 PRO CB C 3.035 4.190 3.187
698+
<BLANKLINE>
699+
A 19 PRO CA C 1.185 6.543 -0.353
700+
A 19 PRO CB C 0.048 6.014 -1.229
701+
<BLANKLINE>
702+
A 20 SER CA C 0.852 10.027 1.285
703+
A 20 SER CB C 1.972 11.071 1.284
704+
<BLANKLINE>
705+
"""
706+
residue_indices = get_all_residue_positions(atoms)
707+
indices = np.full(
708+
(residue_indices[-1] + 1, len(atom_names)), fill_value=-1, dtype=int
709+
)
710+
for i, atom_name in enumerate(atom_names):
711+
if atom_name is None:
712+
atom_name_indices = np.where(atoms.hetero)[0]
713+
else:
714+
atom_name_indices = np.where(atoms.atom_name == atom_name)[0]
715+
indices[residue_indices[atom_name_indices], i] = atom_name_indices
716+
return indices

src/biotite/structure/segments.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
"get_segment_masks",
1212
"get_segment_starts_for",
1313
"get_segment_positions",
14+
"get_all_segment_positions",
1415
"segment_iter",
1516
]
1617

@@ -252,6 +253,11 @@ def get_segment_positions(starts, indices):
252253
-------
253254
segment_indices : ndarray, shape=(k,)
254255
The indices that point to the position of the segments.
256+
257+
See Also
258+
--------
259+
get_all_segment_positions :
260+
Similar to this function, but for all atoms in the :class:`struc.AtomArray`.
255261
"""
256262
indices = np.asarray(indices)
257263
length = starts[-1]
@@ -269,6 +275,36 @@ def get_segment_positions(starts, indices):
269275
return np.searchsorted(starts, indices, side="right") - 1
270276

271277

278+
def get_all_segment_positions(starts, length):
279+
"""
280+
Generalized version of :func:`get_all_residue_positions()`
281+
for residues and chains.
282+
283+
Parameters
284+
----------
285+
starts : ndarray, dtype=int
286+
The sorted start indices of segments.
287+
Includes exclusive stop, i.e. the length of the corresponding
288+
atom array.
289+
length : int
290+
The length of the corresponding :class:`struc.AtomArray`.
291+
292+
Returns
293+
-------
294+
segment_indices : ndarray, shape=(k,)
295+
For each atom the indices that point to the corresponding position of the
296+
segments.
297+
298+
See Also
299+
--------
300+
get_segment_positions :
301+
Similar to this function, but for a given subset of atom indices.
302+
"""
303+
segment_changes = np.zeros(length, dtype=int)
304+
segment_changes[starts[1:-1]] = 1
305+
return np.cumsum(segment_changes)
306+
307+
272308
def segment_iter(array, starts):
273309
"""
274310
Generalized version of :func:`residue_iter()`

0 commit comments

Comments
 (0)