-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathseq_alignreads.R
76 lines (62 loc) · 2.79 KB
/
seq_alignreads.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# Function: seq_alignreads.R
# License: GPLv3 or later
# Modification date: 07 Mar 2021
# Written by: Yuri Tani Utsunomiya
# Contact: [email protected]
# Description: read alignment against reference genome
# Usage: Rscript seq_alignreads.R [refgenome] [readsfile] [ncores]
# Libraries ---------------------------------------------------------------------------------------
library(parallel)
# Programs ----------------------------------------------------------------------------------------
bwa <- "/usr/bin/bwa"
samtools <- "/usr/bin/samtools"
picard <- "/usr/bin/picard.jar"
dumpfolder <- "/media/TRI_storage/DUMP"
# User arguments ----------------------------------------------------------------------------------
argv <- commandArgs(T)
refgenome <- argv[1]
readfile <- argv[2]
ncores <- argv[3]
# Get readfile ------------------------------------------------------------------------------------
readfile <- read.table(file = readfile, header = T, stringsAsFactors = F)
readfile$out <- gsub(pattern = "\\.bam$", replacement = "", x = readfile$out)
# Run Pipeline ------------------------------------------------------------------------------------
for(i in 1:nrow(readfile)){
# Time stamp
timestart <- Sys.time()
# Perform alignments
command <- paste(bwa, "mem -t", ncores, refgenome, readfile$R1[i], readfile$R2[i])
command <- paste0(command, " | ", samtools, " view -o ",
readfile$out[i], "_unsorted_unmarked.bam -")
system(command)
# Sort alignments
command <- paste0(samtools," sort ", readfile$out[i], "_unsorted_unmarked.bam > ",
readfile$out[i], "_unmarked.bam")
system(command)
# ReadGroup
name <- basename(readfile$out[i])
command <- paste0("java -Xmx80g -jar ",picard," AddOrReplaceReadGroups INPUT=",readfile$out[i],"_unmarked.bam",
" OUTPUT=",readfile$out[i],"_unmarkedRG.bam"," RGID=",name,
" RGLB=",name," RGPL=Illumina RGPU=collapsed RGSM=",name," VALIDATION_STRINGENCY=LENIENT")
system(command)
# Mark duplicates
command <- paste0("java -Xmx80g -jar ",picard, " MarkDuplicates ",
" VALIDATION_STRINGENCY=LENIENT",
" TMP_DIR=", dumpfolder,
" ASSUME_SORTED=true REMOVE_DUPLICATES=false",
" INPUT=", readfile$out[i], "_unmarkedRG.bam",
" METRICS_FILE=", readfile$out[i], "_marked_dup_metrics.txt",
" OUTPUT=", readfile$out[i], ".bam")
system(command)
# Index bam
command <- paste0(samtools, " index ", readfile$out[i], ".bam")
system(command)
# Run flagstats
command <- paste0(samtools, " flagstat ", readfile$out[i], ".bam > ", readfile$out[i], ".flagstat")
system(command)
# Time stamp
timeend <- Sys.time()
timestart
timeend
timeend - timestart
}