-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathvcf2Loter.py
42 lines (31 loc) · 939 Bytes
/
vcf2Loter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# -*- coding: utf-8 -*-
"""
Created on Mon Aug 10 21:29:52 2020
@author: YudongCai
@Email: [email protected]
"""
import click
import allel
import numpy as np
def vcf2npy(vcffile, samples):
callset = allel.read_vcf(vcffile, samples=samples)
haplotypes_1 = callset['calldata/GT'][:,:,0]
haplotypes_2 = callset['calldata/GT'][:,:,1]
m, n = haplotypes_1.shape
mat_haplo = np.empty((2*n, m))
mat_haplo[::2] = haplotypes_1.T
mat_haplo[1::2] = haplotypes_2.T
return mat_haplo.astype(np.uint8)
@click.command()
@click.option('--vcffile')
@click.option('--samplelist')
@click.option('--outprefix')
def main(vcffile, samplelist, outprefix):
"""
convert vcf to Loter input
"""
samples = [x.strip() for x in open(samplelist)]
mat_haplo = vcf2npy(vcffile, samples)
np.save(f'{outprefix}', mat_haplo)
if __name__ == '__main__':
main()