16
16
"get_residue_masks" ,
17
17
"get_residue_starts_for" ,
18
18
"get_residue_positions" ,
19
+ "get_all_residue_positions" ,
19
20
"get_residues" ,
20
21
"get_residue_count" ,
21
22
"residue_iter" ,
23
+ "get_atom_name_indices" ,
22
24
]
23
25
26
+ import numpy as np
24
27
from biotite .structure .segments import (
25
28
apply_segment_wise ,
29
+ get_all_segment_positions ,
26
30
get_segment_masks ,
27
31
get_segment_positions ,
28
32
get_segment_starts ,
@@ -361,6 +365,11 @@ def get_residue_positions(array, indices):
361
365
residue_indices : ndarray, dtype=int, shape=(k,)
362
366
The indices that point to the position of the residues.
363
367
368
+ See Also
369
+ --------
370
+ get_all_residue_positions :
371
+ Similar to this function, but for all atoms in the :class:`struc.AtomArray`.
372
+
364
373
Examples
365
374
--------
366
375
>>> atom_index = [5, 42]
@@ -380,6 +389,50 @@ def get_residue_positions(array, indices):
380
389
return get_segment_positions (starts , indices )
381
390
382
391
392
+ def get_all_residue_positions (array ):
393
+ """
394
+ For each atom, obtain the position of the residue
395
+ corresponding to this atom in the input `array`.
396
+
397
+ For example, the position of the first residue in the atom array is
398
+ ``0``, the the position of the second residue is ``1``, etc.
399
+
400
+ Parameters
401
+ ----------
402
+ array : AtomArray or AtomArrayStack
403
+ The atom array (stack) to determine the residues from.
404
+
405
+ Returns
406
+ -------
407
+ residue_indices : ndarray, dtype=int, shape=(k,)
408
+ The indices that point to the position of the residues.
409
+
410
+ See Also
411
+ --------
412
+ get_residue_positions :
413
+ Similar to this function, but for a given subset of atom indices.
414
+
415
+ Examples
416
+ --------
417
+ >>> print(get_all_residue_positions(atom_array))
418
+ [ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1
419
+ 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2
420
+ 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
421
+ 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 5 5
422
+ 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6
423
+ 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7
424
+ 7 7 7 7 7 7 7 7 7 7 7 7 7 8 8 8 8 8 8 8 8 8 8 8
425
+ 8 9 9 9 9 9 9 9 10 10 10 10 10 10 10 11 11 11 11 11 11 11 11 11
426
+ 11 11 11 11 11 12 12 12 12 12 12 12 12 12 12 12 13 13 13 13 13 13 13 13
427
+ 13 13 13 14 14 14 14 14 14 14 15 15 15 15 15 15 15 15 15 15 15 15 15 15
428
+ 15 15 15 15 15 15 15 15 15 15 16 16 16 16 16 16 16 16 16 16 16 16 16 16
429
+ 17 17 17 17 17 17 17 17 17 17 17 17 17 17 18 18 18 18 18 18 18 18 18 18
430
+ 18 18 18 18 19 19 19 19 19 19 19 19 19 19 19 19]
431
+ """
432
+ starts = get_residue_starts (array , add_exclusive_stop = True )
433
+ return get_all_segment_positions (starts , array .array_length ())
434
+
435
+
383
436
def get_residues (array ):
384
437
"""
385
438
Get the residue IDs and names of an atom array (stack).
@@ -542,3 +595,122 @@ def residue_iter(array):
542
595
starts = get_residue_starts (array , add_exclusive_stop = True )
543
596
for residue in segment_iter (array , starts ):
544
597
yield residue
598
+
599
+
600
+ def get_atom_name_indices (atoms , atom_names ):
601
+ """
602
+ For each residue, get the index of the atom with the given atom name.
603
+
604
+ Parameters
605
+ ----------
606
+ atoms : AtomArray or AtomArrayStack
607
+ Search for the indices of the given atom names in this structure.
608
+ atom_names : list of str, length=p
609
+ The names of the atoms to get the indices of.
610
+
611
+ Returns
612
+ -------
613
+ indices : ndarray, dtype=int, shape=(k, p)
614
+ For every residue and atom name, the return value contains the atom index in
615
+ the :class:`AtomArray` where the sought atom name is located.
616
+ Where the atom name is not present in a residue, the array is filled with `-1`.
617
+
618
+ Examples
619
+ --------
620
+
621
+ >>> indices = get_atom_name_indices(atom_array, ["CA", "CB"])
622
+ >>> print(indices)
623
+ [[ 1 4]
624
+ [ 17 20]
625
+ [ 36 39]
626
+ [ 57 60]
627
+ [ 76 79]
628
+ [ 93 96]
629
+ [117 120]
630
+ [136 139]
631
+ [158 161]
632
+ [170 -1]
633
+ [177 -1]
634
+ [184 187]
635
+ [198 201]
636
+ [209 212]
637
+ [220 -1]
638
+ [227 230]
639
+ [251 254]
640
+ [265 268]
641
+ [279 282]
642
+ [293 296]]
643
+ >>> for row in indices:
644
+ ... for index in row:
645
+ ... if index != -1:
646
+ ... print(atom_array[index])
647
+ ... print()
648
+ A 1 ASN CA C -8.608 3.135 -1.618
649
+ A 1 ASN CB C -9.437 3.396 -2.889
650
+ <BLANKLINE>
651
+ A 2 LEU CA C -4.923 4.002 -2.452
652
+ A 2 LEU CB C -4.411 5.450 -2.619
653
+ <BLANKLINE>
654
+ A 3 TYR CA C -3.690 2.738 0.981
655
+ A 3 TYR CB C -3.964 3.472 2.302
656
+ <BLANKLINE>
657
+ A 4 ILE CA C -5.857 -0.449 0.613
658
+ A 4 ILE CB C -7.386 -0.466 0.343
659
+ <BLANKLINE>
660
+ A 5 GLN CA C -4.122 -1.167 -2.743
661
+ A 5 GLN CB C -4.292 -0.313 -4.013
662
+ <BLANKLINE>
663
+ A 6 TRP CA C -0.716 -0.631 -0.993
664
+ A 6 TRP CB C -0.221 0.703 -0.417
665
+ <BLANKLINE>
666
+ A 7 LEU CA C -1.641 -2.932 1.963
667
+ A 7 LEU CB C -2.710 -2.645 3.033
668
+ <BLANKLINE>
669
+ A 8 LYS CA C -3.024 -5.791 -0.269
670
+ A 8 LYS CB C -4.224 -5.697 -1.232
671
+ <BLANKLINE>
672
+ A 9 ASP CA C 0.466 -6.016 -1.905
673
+ A 9 ASP CB C 1.033 -4.839 -2.724
674
+ <BLANKLINE>
675
+ A 10 GLY CA C 2.060 -6.618 1.593
676
+ <BLANKLINE>
677
+ A 11 GLY CA C 2.626 -2.967 2.723
678
+ <BLANKLINE>
679
+ A 12 PRO CA C 6.333 -2.533 3.806
680
+ A 12 PRO CB C 6.740 -2.387 5.279
681
+ <BLANKLINE>
682
+ A 13 SER CA C 7.049 -6.179 2.704
683
+ A 13 SER CB C 6.458 -7.371 3.472
684
+ <BLANKLINE>
685
+ A 14 SER CA C 6.389 -5.315 -1.015
686
+ A 14 SER CB C 4.914 -4.993 -1.265
687
+ <BLANKLINE>
688
+ A 15 GLY CA C 9.451 -3.116 -1.870
689
+ <BLANKLINE>
690
+ A 16 ARG CA C 7.289 0.084 -2.054
691
+ A 16 ARG CB C 6.110 -0.243 -2.994
692
+ <BLANKLINE>
693
+ A 17 PRO CA C 6.782 3.088 0.345
694
+ A 17 PRO CB C 7.554 4.394 0.119
695
+ <BLANKLINE>
696
+ A 18 PRO CA C 3.287 4.031 1.686
697
+ A 18 PRO CB C 3.035 4.190 3.187
698
+ <BLANKLINE>
699
+ A 19 PRO CA C 1.185 6.543 -0.353
700
+ A 19 PRO CB C 0.048 6.014 -1.229
701
+ <BLANKLINE>
702
+ A 20 SER CA C 0.852 10.027 1.285
703
+ A 20 SER CB C 1.972 11.071 1.284
704
+ <BLANKLINE>
705
+ """
706
+ residue_indices = get_all_residue_positions (atoms )
707
+ indices = np .full (
708
+ (residue_indices [- 1 ] + 1 , len (atom_names )), fill_value = - 1 , dtype = int
709
+ )
710
+ for i , atom_name in enumerate (atom_names ):
711
+ if atom_name is None :
712
+ atom_name_indices = np .where (atoms .hetero )[0 ]
713
+ else :
714
+ atom_name_indices = np .where (atoms .atom_name == atom_name )[0 ]
715
+ indices [residue_indices [atom_name_indices ], i ] = atom_name_indices
716
+ return indices
0 commit comments