11
11
import numpy as np
12
12
from biotite .application .application import AppState , requires_state
13
13
from biotite .application .localapp import LocalApp , cleanup_tempfile , get_version
14
- from biotite .structure .io .pdbx .cif import CIFFile
14
+ from biotite .structure .error import BadStructureError
15
+ from biotite .structure .filter import filter_amino_acids
16
+ from biotite .structure .io .pdbx .cif import CIFCategory , CIFColumn , CIFFile
17
+ from biotite .structure .io .pdbx .component import MaskValue
15
18
from biotite .structure .io .pdbx .convert import set_structure
19
+ from biotite .structure .repair import create_continuous_res_ids
20
+ from biotite .structure .residues import get_residue_starts
16
21
17
22
18
23
class DsspApp (LocalApp ):
@@ -49,17 +54,19 @@ class DsspApp(LocalApp):
49
54
>>> app.start()
50
55
>>> app.join()
51
56
>>> print(app.get_sse())
52
- ['C' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'T' 'T' 'G' 'G' 'G' 'G' 'T' 'C' 'C ' 'C '
53
- 'C ' 'C']
57
+ ['C' 'H' 'H' 'H' 'H' 'H' 'H' 'H' 'T' 'T' 'G' 'G' 'G' 'G' 'T' 'C' 'P ' 'P '
58
+ 'P ' 'C']
54
59
"""
55
60
56
61
def __init__ (self , atom_array , bin_path = "mkdssp" ):
57
62
super ().__init__ (bin_path )
58
63
59
- # mkdssp requires also the
60
- # 'occupancy', 'b_factor' and 'charge' fields
61
- # -> Add these annotations to a copy of the input structure
64
+ if not np .all (filter_amino_acids (atom_array )):
65
+ raise BadStructureError ("The input structure must contain only amino acids" )
62
66
self ._array = atom_array .copy ()
67
+ # DSSP requires also the
68
+ # 'occupancy', 'b_factor' and 'charge' fields
69
+ # -> Add these placeholder values
63
70
categories = self ._array .get_annotation_categories ()
64
71
if "charge" not in categories :
65
72
self ._array .set_annotation (
@@ -73,6 +80,10 @@ def __init__(self, atom_array, bin_path="mkdssp"):
73
80
self ._array .set_annotation (
74
81
"occupancy" , np .ones (self ._array .array_length (), dtype = float )
75
82
)
83
+ # DSSP>=4 complains about the `pdbx_poly_seq_scheme` category,
84
+ # if `seq_id` does not start at 1
85
+ self ._array .res_id = create_continuous_res_ids (self ._array )
86
+
76
87
try :
77
88
# The parameters have changed in version 4
78
89
self ._new_cli = get_version (bin_path )[0 ] >= 4
@@ -86,6 +97,9 @@ def __init__(self, atom_array, bin_path="mkdssp"):
86
97
def run (self ):
87
98
in_file = CIFFile ()
88
99
set_structure (in_file , self ._array )
100
+ in_file .block ["pdbx_poly_seq_scheme" ] = _create_pdbx_poly_seq_scheme (
101
+ self ._array , in_file .block ["atom_site" ]["label_entity_id" ].as_array (str )
102
+ )
89
103
in_file .write (self ._in_file )
90
104
self ._in_file .flush ()
91
105
if self ._new_cli :
@@ -157,3 +171,46 @@ def annotate_sse(atom_array, bin_path="mkdssp"):
157
171
app .start ()
158
172
app .join ()
159
173
return app .get_sse ()
174
+
175
+
176
+ def _create_pdbx_poly_seq_scheme (atom_array , entity_ids ):
177
+ """
178
+ Create the ``pdbx_poly_seq_scheme`` category, as required by DSSP.
179
+
180
+ Parameters
181
+ ----------
182
+ atom_array : AtomArray
183
+ The atom array to create the category from.
184
+ entity_ids : ndarray, dtype=str
185
+ The entity IDs for each atoms.
186
+
187
+ Returns
188
+ -------
189
+ pdbx_poly_seq_scheme : CIFCategory
190
+ The ``pdbx_poly_seq_scheme`` category.
191
+ """
192
+ res_start_indices = get_residue_starts (atom_array )
193
+ chain_id = atom_array .chain_id [res_start_indices ]
194
+ res_name = atom_array .res_name [res_start_indices ]
195
+ res_id = atom_array .res_id [res_start_indices ]
196
+ ins_code = atom_array .ins_code [res_start_indices ]
197
+ hetero = atom_array .hetero [res_start_indices ]
198
+ entity_id = entity_ids [res_start_indices ]
199
+
200
+ poly_seq_scheme = CIFCategory ()
201
+ poly_seq_scheme ["asym_id" ] = chain_id
202
+ poly_seq_scheme ["entity_id" ] = entity_id
203
+ poly_seq_scheme ["seq_id" ] = res_id
204
+ poly_seq_scheme ["mon_id" ] = res_name
205
+ poly_seq_scheme ["ndb_seq_num" ] = res_id
206
+ poly_seq_scheme ["pdb_seq_num" ] = res_id
207
+ poly_seq_scheme ["auth_seq_num" ] = res_id
208
+ poly_seq_scheme ["pdb_mon_id" ] = res_name
209
+ poly_seq_scheme ["auth_mon_id" ] = res_name
210
+ poly_seq_scheme ["pdb_strand_id" ] = chain_id
211
+ poly_seq_scheme ["pdb_ins_code" ] = CIFColumn (
212
+ ins_code , np .where (ins_code == "" , MaskValue .MISSING , MaskValue .PRESENT )
213
+ )
214
+ poly_seq_scheme ["hetero" ] = np .where (hetero , "y" , "n" )
215
+
216
+ return poly_seq_scheme
0 commit comments