-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy patheigenstrat_rename_snps.py
executable file
·41 lines (33 loc) · 1.97 KB
/
eigenstrat_rename_snps.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
#!/usr/bin/env python3
import sys,argparse
VERSION='1.1.0'
def read_line(line, format):
fields=line.strip().split()
if format == "EIGENSTRAT":
(id, chrom, _, pos) = fields[:4]
if format == "PLINK":
(chrom, id, _, pos) = fields[:4]
return (chrom+"_"+pos, id)
parser = argparse.ArgumentParser(usage="%(prog)s (-i <INPUT .SNP FILE>) (-n <NAMED SNP FILE>) [-f <NAMED FILE FORMAT>]" , description="Rename the SNPs in an eigenstrat snp file based on a reference eigenstrat or plink snp file. Uses the genetic coordinate of SNPs to determine identity.")
parser._optionals.title = "Available options"
parser.add_argument("-i", "--Input", type=str, metavar="<INPUT .SNP FILE>", required=True, help="The input snp file.")
parser.add_argument("-n", "--Name_file", type=str, metavar="<NAMED SNP FILE>", required=True, help="An Eigenstrat or plink formatted snp file with the desired SNP names.")
parser.add_argument("-f", "--Format", type=str, default="EIGENSTRAT", metavar="<NAMED FILE FORMAT>", required=False, help="The format of the desired snp name file. Can be either 'EIGENSTRAT' or 'PLINK' [default: EIGENSTRAT]")
parser.add_argument("-v", "--version", action='version', version="%(prog)s {}".format(VERSION), help="Print the version and exit.")
args = parser.parse_args()
if args.Format not in ["PLINK", "EIGENSTRAT"]:
raise ValueError("Invalid format specified for desired SNP name file: '{}'".format(args.Format))
## First, create an index of names for each position in the named snp file.
names={}
for line in open(args.Name_file):
(fake_id, real_id) = read_line(line, args.Format)
names[fake_id]=real_id
## Then read the input snp file and rename the snps that can be renamed
for line in open(args.Input, 'r'):
fields=line.strip().split()
(id, chrom, dist, pos) = fields[:4]
if chrom+"_"+pos in names.keys():
new_id = names[chrom+"_"+pos]
else:
new_id = id
print(new_id, chrom, dist, pos, *fields[4:], sep="\t", file=sys.stdout)