-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathvar_phase.R
65 lines (54 loc) · 2.34 KB
/
var_phase.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
# Function: var_phase.R
# License: GPLv3 or later
# Modification date: 11 May 2021
# Written by: Yuri Tani Utsunomiya
# Contact: [email protected]
# Description: phase variant data
# Usage: Rscript var_phase.R [bedfile] [autonum] [outfile] [ncores]
# Programs ----------------------------------------------------------------------------------------
plink <- "/usr/bin/plink1.9"
eagle <- "/usr/bin/eagle"
tabix <- "/usr/bin/tabix"
# User arguments ----------------------------------------------------------------------------------
argv <- commandArgs(T)
bedfile <- argv[1]
autonum <- as.integer(argv[2])
outfile <- argv[3]
ncores <- argv[4]
# Temporary file ID generator ---------------------------------------------------------------------
tmpfile <- paste0("tmp_", paste(sample(LETTERS, size = 10, replace = T), collapse=""))
tmpfile <- gsub(pattern = "\\.", replacement = "", paste0(tmpfile,as.numeric(Sys.time())))
# Build genetic map ---------------------------------------------------------------------------------------------
bim <- read.table(file = paste0(bedfile,".bim"), header = F, stringsAsFactors = F)
genmap <- bim[,c(1,4,4,4)]
genmap[,3] <- 1
genmap[,4] <- genmap[,4]/1e+6
colnames(genmap) <- c("chr","position","COMBINED_rate(cM/Mb)","Genetic_Map(cM)")
# Run phasing algorithm ---------------------------------------------------------------------------
for(i in 1:autonum){
# Make VCF file
command <- paste(plink, "--chr-set", autonum,
"--bfile", bedfile,
"--chr",i,
"--recode vcf-iid",
"--out", tmpfile)
system(command)
# Index with tabix
command <- paste0("cat ", tmpfile, ".vcf | bgzip > ", tmpfile, ".vcf.gz")
system(command)
command <- paste0(tabix, " -p vcf ", tmpfile, ".vcf.gz")
system(command)
# Export genetic map
write.table(x = genmap[which(genmap$chr == i),], file = paste0(tmpfile, "_genmap.txt"), quote = F,
sep = " ", row.names = F, col.names = T)
# Phasing
command <- paste0(eagle, " --vcf ", tmpfile, ".vcf.gz",
" --geneticMapFile=", tmpfile, "_genmap.txt",
" --chromX ", autonum+1,
" --numThreads ", ncores,
" --outPrefix=", outfile, "_chr", i)
system(command)
# Clear directory
command <- paste0("rm ", tmpfile, "*")
system(command)
}