@@ -149,10 +149,15 @@ def select_altloc(fhandle, selloc=None, byocc=False):
149
149
prev_resname = ''
150
150
prev_resnum = ''
151
151
152
+ # uses the same function names in the loop below. However, depending
153
+ # on the input options, the functions used are different. One is
154
+ # specific for byocc=True, and other specific for occ char selection
152
155
flush_func_multi_residues = flush_resloc_occ if byocc else flush_resloc
156
+
153
157
flush_func_single_residues = \
154
158
flush_resloc_occ_same_residue if byocc else flush_resloc_id_same_residue
155
159
160
+ # defines records and terminators
156
161
records = ('ATOM' , 'HETATM' , 'ANISOU' )
157
162
terminators = ('TER' , 'END' , 'CONECT' , 'END' , 'ENDMDL' )
158
163
@@ -170,16 +175,19 @@ def select_altloc(fhandle, selloc=None, byocc=False):
170
175
# if we see the altloc group has changed, we should flush
171
176
# the lines observed for the previous altloc group
172
177
173
- # uses for loop instead of "yield from" to maintain compatibility
174
- # with older python version
178
+ # uses " for loop" instead of "yield from" to maintain
179
+ # compatibility with older python version
175
180
if partial_altloc (altloc_lines ):
176
181
flush_func = flush_func_single_residues
177
182
else :
178
183
flush_func = flush_func_multi_residues
179
184
180
- for __line in flush_func (selloc = selloc , altloc_lines = altloc_lines , res_per_loc = res_per_loc ):
185
+ for __line in flush_func (selloc = selloc , altloc_lines = altloc_lines ):
181
186
yield __line
182
187
188
+ altloc_lines = {}
189
+ res_per_loc = {}
190
+
183
191
# saves the line per altloc identifier
184
192
current_loc = altloc_lines .setdefault (altloc , [])
185
193
current_loc .append (line )
@@ -200,9 +208,12 @@ def select_altloc(fhandle, selloc=None, byocc=False):
200
208
flush_func = flush_func_single_residues
201
209
else :
202
210
flush_func = flush_func_multi_residues
203
- for __line in flush_func (selloc = selloc , altloc_lines = altloc_lines , res_per_loc = res_per_loc ):
211
+ for __line in flush_func (selloc = selloc , altloc_lines = altloc_lines ):
204
212
yield __line
205
213
214
+ altloc_lines = {}
215
+ res_per_loc = {}
216
+
206
217
prev_altloc = ''
207
218
prev_resname = ''
208
219
prev_resnum = ''
@@ -224,9 +235,12 @@ def select_altloc(fhandle, selloc=None, byocc=False):
224
235
else :
225
236
flush_func = flush_func_multi_residues
226
237
227
- for __line in flush_func (selloc = selloc , altloc_lines = altloc_lines , res_per_loc = res_per_loc ):
238
+ for __line in flush_func (selloc = selloc , altloc_lines = altloc_lines ):
228
239
yield __line
229
240
241
+ altloc_lines = []
242
+ res_per_loc = {}
243
+
230
244
231
245
def is_another_altloc_group (
232
246
altloc ,
@@ -238,7 +252,7 @@ def is_another_altloc_group(
238
252
altloc_lines ,
239
253
rploc ,
240
254
):
241
- """Detect if current line because to another altloc group."""
255
+ """Detect if current line belongs to a new altloc group."""
242
256
a0 = prev_altloc
243
257
a1 = altloc
244
258
ra0 = prev_resname
@@ -266,7 +280,7 @@ def is_another_altloc_group(
266
280
return is_another
267
281
268
282
269
- def flush_resloc (selloc , altloc_lines , res_per_loc ):
283
+ def flush_resloc (selloc , altloc_lines ):
270
284
"""Flush the captured altloc lines."""
271
285
# only the selected altloc is yieled
272
286
if selloc in altloc_lines :
@@ -280,12 +294,8 @@ def flush_resloc(selloc, altloc_lines, res_per_loc):
280
294
for line2flush in lines2flush :
281
295
yield line2flush
282
296
283
- # clears the altloc group dictionary. Ready for the next one!
284
- altloc_lines .clear ()
285
- res_per_loc .clear ()
286
297
287
-
288
- def flush_resloc_occ (altloc_lines , res_per_loc , ** kw ):
298
+ def flush_resloc_occ (altloc_lines , ** kw ):
289
299
"""Flush the captured altloc lines by highest occupancy."""
290
300
# only the selected altloc is yieled
291
301
highest = 0.00
@@ -303,30 +313,21 @@ def flush_resloc_occ(altloc_lines, res_per_loc, **kw):
303
313
for line2flush in altloc_lines [altloc ]:
304
314
yield line2flush [:16 ] + ' ' + line2flush [17 :]
305
315
306
- # clears the altloc group dictionary. Ready for the next one!
307
- altloc_lines .clear ()
308
- res_per_loc .clear ()
309
-
310
316
311
- def flush_resloc_id_same_residue (selloc , altloc_lines , res_per_loc ):
317
+ def flush_resloc_id_same_residue (selloc , altloc_lines ):
312
318
"""Flush altloc if altloc are atoms in the same residue - by ID."""
313
319
# places all lines in a single list
314
- all_lines = []
315
- for altloc , lines in altloc_lines .items ():
316
- all_lines .extend (lines )
317
-
318
- # organize by atoms
319
- atoms = {}
320
- for line in all_lines :
321
- atom_number = int (line [6 :11 ])
322
- atom = line [12 :16 ]
323
- alist = atoms .setdefault ((atom_number , atom ), [])
324
- alist .append (line )
320
+ sorted_atoms = _get_sort_atoms (altloc_lines )
325
321
326
- sorted_atoms = sorted (list (atoms .items ()), key = lambda x : x [0 ][0 ])
322
+ for atom , linet in sorted_atoms :
323
+ to_yield = []
324
+ # remember linet is a tuple, where the first item is the atom number
325
+ lines = linet [1 ]
327
326
328
- to_yield = []
329
- for atom , lines in sorted_atoms :
327
+ # here we don't need to care about anisou lines as in
328
+ # `flush_resloc_occ_same_residue` because ATOM/HETATM and ANISOU
329
+ # are already sorted by definition and lines are yieled from the
330
+ # altloc record
330
331
for line in lines :
331
332
if line [16 ] == selloc :
332
333
to_yield .append (line )
@@ -338,41 +339,69 @@ def flush_resloc_id_same_residue(selloc, altloc_lines, res_per_loc):
338
339
for line in lines :
339
340
yield line
340
341
341
- altloc_lines .clear ()
342
- res_per_loc .clear ()
343
342
344
-
345
- def flush_resloc_occ_same_residue (altloc_lines , res_per_loc , ** kw ):
343
+ def flush_resloc_occ_same_residue (altloc_lines , ** kw ):
346
344
"""Flush altloc if altloc are atoms in the same residue - by occ."""
347
- # places all lines in a single list
345
+ sorted_atoms = _get_sort_atoms (altloc_lines )
346
+
347
+ for atom , linest in sorted_atoms :
348
+ lines = linest [1 ]
349
+
350
+ atom_lines = [l for l in lines if l .startswith (("ATOM" , "HETATM" ))]
351
+ anisou_lines = [l for l in lines if l .startswith (("ANISOU" ))]
352
+
353
+ if anisou_lines :
354
+ new = []
355
+
356
+ if len (atom_lines ) != len (anisou_lines ):
357
+ emsg = (
358
+ "There is an error with this PDB. "
359
+ "We expect one ANISOU line per ATOM/HETATM lines. "
360
+ "But the number of ATOM/HETATM and ANISOU lines differ."
361
+ )
362
+ raise ValueError (emsg )
363
+
364
+ for _a , _b in zip (atom_lines , anisou_lines ):
365
+ new .append ((_a , _b ))
366
+
367
+ new .sort (key = lambda x : float (x [0 ][54 :60 ]), reverse = True )
368
+
369
+ # ATOM/HETATM
370
+ yield new [0 ][0 ][:16 ] + ' ' + new [0 ][0 ][17 :]
371
+ # ANISOU
372
+ yield new [0 ][1 ][:16 ] + ' ' + new [0 ][1 ][17 :]
373
+
374
+ else :
375
+ atom_lines .sort (key = lambda x : float (x [54 :60 ]), reverse = True )
376
+ yield atom_lines [0 ][:16 ] + ' ' + atom_lines [0 ][17 :]
377
+
378
+
379
+ def _get_sort_atoms (altloc_lines ):
380
+ # this function is used by both:
381
+ # flush_resloc_occ_same_residue
382
+ # flush_resloc_id_same_residue
348
383
all_lines = []
349
384
for altloc , lines in altloc_lines .items ():
350
385
all_lines .extend (lines )
351
386
352
387
# organize by atoms
353
388
atoms = {}
389
+ # key in the dictionary are unique identifiers of the same residue
354
390
for line in all_lines :
391
+ res_number = int (line [22 :26 ])
392
+ res_name = line [17 :20 ].strip ()
393
+ atom_name = line [12 :16 ]
355
394
atom_number = int (line [6 :11 ])
356
- atom = line [12 :16 ]
357
- alist = atoms .setdefault ((atom_number , atom ), [])
358
- alist .append (line )
359
-
360
- sorted_atoms = sorted (list (atoms .items ()), key = lambda x : x [0 ][0 ])
361
-
362
- A = {
363
- 'ATOM' : 1 ,
364
- 'HETA' : 1 ,
365
- 'ANIS' : 0 ,
366
- }
367
-
368
- for atom , lines in sorted_atoms :
369
- lines .sort (key = lambda x : (A [x [:4 ]], float (x [54 :60 ])), reverse = True )
370
- yield lines [0 ][:16 ] + ' ' + lines [0 ][17 :]
371
- if lines [1 :] and lines [1 ].startswith ('ANISOU' ):
372
- yield lines [1 ][:16 ] + ' ' + lines [1 ][17 :]
373
-
374
- altloc_lines .clear ()
375
- res_per_loc .clear ()
395
+ chain_id = line [21 ]
396
+ key = (res_number , res_name , atom_name , chain_id )
397
+ # the atom number is saved so that the original order can be kept
398
+ alist = atoms .setdefault (key , (atom_number , []))
399
+ alist [1 ].append (line )
400
+
401
+ # entries at this point are not sorted. Sorts entries by residue
402
+ # number followed by atom number
403
+ sorted_atoms = sorted (list (atoms .items ()), key = lambda x : (x [0 ][0 ], x [1 ][0 ]))
404
+ return sorted_atoms
376
405
377
406
378
407
def all_same_residue (altloc_lines ):
0 commit comments