File tree 2 files changed +34
-151
lines changed
2 files changed +34
-151
lines changed Original file line number Diff line number Diff line change @@ -304,6 +304,40 @@ def gc(self):
304
304
c = self .seq .count ('C' )
305
305
c += self .seq .count ('c' )
306
306
return (g + c ) / len (self .seq )
307
+
308
+ @property
309
+ def gc_strict (self ):
310
+ """ Return the GC content of seq as a float, ignoring non ACGT characters
311
+ >>> x = Sequence(name='chr1', seq='NMRATCGTA')
312
+ >>> y = round(x.gc, 2)
313
+ >>> y == 0.33
314
+ True
315
+ """
316
+ trimSeq = re .sub (r'[^ACGTacgt]' , '' , self .seq )
317
+ g = trimSeq .count ('G' )
318
+ g += trimSeq .count ('g' )
319
+ c = trimSeq .count ('C' )
320
+ c += trimSeq .count ('c' )
321
+ return (g + c ) / len (trimSeq )
322
+
323
+ @property
324
+ def gc_iupac (seq ):
325
+ from collections import Counter
326
+ """ Return the GC content of seq as a float, accounting for IUPAC ambiguity
327
+ >>> x = Sequence(name='chr1', seq='NMRATCGTA')
328
+ >>> y = round(x.gc, 2)
329
+ >>> y == 0.36
330
+ True
331
+ """
332
+ trimSeq = re .sub (r'[^ACGTMRWSYKVHDBNacgtmrwsykvhdbn]' , '' , self .seq )
333
+ seqCount = Counter (trimSeq )
334
+ gc = seqCount ['S' ] + seqCount ['C' ] + seqCount ['G' ]
335
+ gc += 0.67 * (seqCount ['B' ] + seqCount ['V' ])
336
+ gc += 0.5 * (seqCount ['M' ] + seqCount ['R' ] + seqCount ['Y' ] + seqCount ['K' ])
337
+ gc += 0.33 * (seqCount ['H' ] + seqCount ['D' ])
338
+ gc += 0.25 * (seqCount ['N' ])
339
+ return gc / len (trimSeq )
340
+
307
341
308
342
309
343
class IndexRecord (
Load Diff This file was deleted.
You can’t perform that action at this time.
0 commit comments