-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathgenerate_Dstat_parfile.py
95 lines (76 loc) · 3.28 KB
/
generate_Dstat_parfile.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# -*- coding: utf-8 -*-
"""
Created on Thu Dec 20 10:33:02 2018
@author: YudongCai
@Email: [email protected]
"""
import os
import click
from itertools import combinations
def load_one_column(file):
out_list = []
with open(file) as f:
for line in f:
out_list.append(line.strip())
try:
out_list.remove("")
except ValueError:
pass
return out_list
def load_popIDs(pop):
if os.path.isfile(pop):
print(f'load pop file: {pop}')
outlist = load_one_column(pop)
print(f'{len(outlist)} popIDs loaded')
return outlist
else:
print(f'popID: {pop} loaded')
return [pop]
def chunks(l, n):
"""Yield successive n-sized chunks from l."""
for i in range(0, len(l), n):
yield l[i:i + n]
def generate_popfile(popids, outprefix, nquadrapules):
combs = []
for pop1 in popids['pop1']:
for pop2 in popids['pop2']:
for pop3 in popids['pop3']:
for pop4 in popids['pop4']:
if len(set([pop1, pop2, pop3, pop4])) == 4:
combs.append(f'{pop1}\t{pop2}\t{pop3}\t{pop4}')
else:
print(f'ID repeated in ({pop1}, {pop2}, {pop3}, {pop4})')
for nfile, comb in enumerate(chunks(combs, nquadrapules), 1):
with open(f'{outprefix}_{nfile}_popfile', 'w') as f:
f.write('\n'.join(comb))
return nfile
def generate_parfile(genotype, snp, indiv, outprefix, nfile):
for i in range(1, nfile+1):
with open(f'{outprefix}_{i}_par', 'w') as f:
f.write(f'genotypename: {genotype}\nsnpname: {snp}\nindivname: {indiv}\npopfilename: {outprefix}_{i}_popfile')
def generate_shell(outprefix, nfile):
for i in range(1, nfile+1):
with open(f'{outprefix}_{i}.sh', 'w') as f:
f.write(f'/stor9000/apps/users/NWSUAF/2015051660/software/AdmixTools-master/src/qpDstat -p {outprefix}_{i}_par numchrom 29 > {outprefix}_{i}.out')
@click.command()
@click.option('--pop1', help='Pop1(W), popID in .ind file, or a file name contain several popIDs')
@click.option('--pop2', help='Pop2(X), popID in .ind file, or a file name contain several popIDs')
@click.option('--pop3', help='Pop3(Y), popID in .ind file, or a file name contain several popIDs')
@click.option('--pop4', help='Pop4(Z), popID in .ind file, or a file name contain several popIDs')
@click.option('--genotype', help='input genotype file name')
@click.option('--snp', help='input snp file name')
@click.option('--indiv', help='input indiv file name')
@click.option('--outprefix', help='output files prefix')
@click.option('--nquadrapules', help='number of quadrapules per file, default is 20', default=20, type=int)
def main(pop1, pop2, pop3, pop4, genotype, snp, indiv, outprefix, nquadrapules):
popids = {}
for i, pop in enumerate((pop1, pop2, pop3, pop4), 1):
print(f'Pop{i}:')
popids[f'pop{i}'] = load_popIDs(pop)
nfile = generate_popfile(popids, outprefix, nquadrapules)
print(f'chunck size: {nquadrapules}')
print(f'file number: {nfile}')
generate_parfile(genotype, snp, indiv, outprefix, nfile)
generate_shell(outprefix, nfile)
if __name__ == '__main__':
main()