-
Notifications
You must be signed in to change notification settings - Fork 0
/
vcf_to_table.py
executable file
·118 lines (101 loc) · 4.39 KB
/
vcf_to_table.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#!/usr/bin/env python3
"""convert a vcf to a sample-per-row tsv table.
simple implementation only really intended to work on SNPs.
"""
import sys
import os
import errno
import time
import argparse
import logging
from collections import OrderedDict
import vcf
MAX_LOGGING_LEVEL = logging.CRITICAL
DEFAULT_LOGGING_LEVEL = logging.INFO
def setup_logger(verbose_level):
fmt=('%(levelname)s %(asctime)s [%(module)s:%(lineno)s %(funcName)s] :: '
'%(message)s')
logging.basicConfig(format=fmt, level=max((0, min((MAX_LOGGING_LEVEL,
DEFAULT_LOGGING_LEVEL-(verbose_level*10))))))
def Main(argv=None):
tic_total = time.time()
#parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser = argparse.ArgumentParser()
parser.add_argument('vcf', nargs='?', metavar='VCF', type=argparse.FileType('r'),
default='-',
help="VCF file to convert. '-' for stdin (default).")
parser.add_argument('-d', '--min-depth', type=int, default=0,
help="filter calls made with less than this FMT/DP (high-quality reads)")
parser.add_argument('-c', '--report-counts', action='store_true',
help="Output supporting read counts")
parser.add_argument('-F', '--soft-filter', action='store_true',
help="instead of dropping calls failing filter, just prefix '?' to them")
parser.add_argument('-R', '--no-report-ref', action='store_true',
help="Do not report the reference as first data row (and ALT as second row).")
parser.add_argument('-v', '--verbose', action='count', default=0,
help="increase logging verbosity")
parser.add_argument('-q', '--quiet', action='count', default=0,
help="decrease logging verbosity")
args = parser.parse_args()
setup_logger(verbose_level=args.verbose-args.quiet)
logging.info("vcf = "+str(args.vcf))
logging.info("min_depth = "+str(args.min_depth))
vcf_reader = vcf.Reader(args.vcf)
samples = vcf_reader.samples
dat = []
pos = []
ref_call = []
alt_call = []
for record in vcf_reader:
logging.info(str(record))
pos.append(str(record.CHROM)+':'+str(record.POS))
d = []
ref_call.append(record.REF)
alt_call.append(','.join((str(x) for x in record.ALT)))
for sample in samples:
c = record.genotype(sample).gt_bases
if ( c is None or
record.genotype(sample)['DP'] < args.min_depth ):
if args.soft_filter:
#c = '?'+''.join(sorted(str(c).split('/')))
c = '?'+str(c)
else:
c = '?'
#c = '?DP='+str(record.genotype(sample)['DP'])
else:
c = str(c)
#c = ''.join(sorted(str(c).split('/')))
if args.report_counts:
c += '\t'
c += str(record.genotype(sample)['RO'])
c += ','
#print(len(record.ALT), str(record.genotype(sample)['AO']))
try:
c += ','.join((str(_) for _ in record.genotype(sample)['AO']))
except TypeError:
c += str(record.genotype(sample)['AO'])
d.append(c)
dat.append(d)
print('# file:', args.vcf.name)
print('# min_depth:', args.min_depth)
if args.report_counts:
print('', *[_2 for _1 in zip(pos,['RO,AO']*len(pos)) for _2 in _1], sep='\t')
if not args.no_report_ref:
print('REF', end='\t')
print(*ref_call, sep='\t\t')
print('ALT', end='\t')
print(*alt_call, sep='\t\t')
for i,s in enumerate(samples):
print(s, *[dat[j][i] for j in range(len(pos))], sep='\t')
else:
print('', *pos, sep='\t')
if not args.no_report_ref:
print('REF', *ref_call, sep='\t')
print('ALT', *alt_call, sep='\t')
for i,s in enumerate(samples):
print(s, *[dat[j][i] for j in range(len(pos))], sep='\t')
logging.info("Done: {:.2f} sec elapsed".format(time.time()-tic_total))
#########################################################################
# Main loop hook... if run as script run main, else this is just a module
if __name__ == "__main__":
sys.exit(Main(argv=None))