Skip to content

Commit f773018

Browse files
committed
implemented change into __init__ Sequence class
1 parent 0ceef4b commit f773018

File tree

2 files changed

+34
-151
lines changed

2 files changed

+34
-151
lines changed

pyfaidx/__init__.py

+34
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,40 @@ def gc(self):
304304
c = self.seq.count('C')
305305
c += self.seq.count('c')
306306
return (g + c) / len(self.seq)
307+
308+
@property
309+
def gc_strict(self):
310+
""" Return the GC content of seq as a float, ignoring non ACGT characters
311+
>>> x = Sequence(name='chr1', seq='NMRATCGTA')
312+
>>> y = round(x.gc, 2)
313+
>>> y == 0.33
314+
True
315+
"""
316+
trimSeq = re.sub(r'[^ACGTacgt]', '', self.seq)
317+
g = trimSeq.count('G')
318+
g += trimSeq.count('g')
319+
c = trimSeq.count('C')
320+
c += trimSeq.count('c')
321+
return (g + c) / len(trimSeq)
322+
323+
@property
324+
def gc_iupac(seq):
325+
from collections import Counter
326+
""" Return the GC content of seq as a float, accounting for IUPAC ambiguity
327+
>>> x = Sequence(name='chr1', seq='NMRATCGTA')
328+
>>> y = round(x.gc, 2)
329+
>>> y == 0.36
330+
True
331+
"""
332+
trimSeq = re.sub(r'[^ACGTMRWSYKVHDBNacgtmrwsykvhdbn]', '', self.seq)
333+
seqCount = Counter(trimSeq)
334+
gc = seqCount['S'] + seqCount['C'] + seqCount['G']
335+
gc += 0.67 * (seqCount['B'] + seqCount['V'])
336+
gc += 0.5 * (seqCount['M'] + seqCount['R'] + seqCount['Y'] + seqCount['K'])
337+
gc += 0.33 * (seqCount['H'] + seqCount['D'])
338+
gc += 0.25 * (seqCount['N'])
339+
return gc / len(trimSeq)
340+
307341

308342

309343
class IndexRecord(

testenv.ipynb

-151
This file was deleted.

0 commit comments

Comments
 (0)