-
Notifications
You must be signed in to change notification settings - Fork 2
/
main.nf
executable file
·99 lines (66 loc) · 1.85 KB
/
main.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#!/usr/bin/env nextflow
params.input = ""
params.output = ""
outputDir = file(params.output)
sraIds = Channel.create()
Channel
.from(file(params.input))
.splitCsv(sep:'\t', header: true)
.map { it.Run_s }
.into(sraIds)
process filter {
errorStrategy 'retry'
maxRetries 5
maxErrors 50000
tag { id + '_filter'}
input:
val id from sraIds
output:
stdout out into testedSraIds
shell:
'''
#!/bin/bash
READ_RUN=$(wget -qO- 'http://www.ebi.ac.uk/ena/data/warehouse/filereport?accession=!{id}&result=read_run' | tail -n +2)
if [[ -z "$READ_RUN" ]]; then
printf "not_found"
else
printf "!{id}"
fi
'''
}
filteredSraIds = Channel.create()
testedSraIds
.filter({!it.equals("not_found")})
.into(filteredSraIds)
process fetchSRA {
errorStrategy 'retry'
maxRetries 5
maxErrors 50000
tag { id + '_fastq_dump'}
input:
val id from filteredSraIds
output:
val id into fastqIds
"""
${params.SRA_TOOLKIT_DIR}/fastq-dump --readids --gzip --minReadLen ${params.READ_LENGTH} --split-3 ${id} -O ${outputDir}
"""
}
existingFastqSraIds = Channel.create()
fastqIds
.filter({file(params.output + "/" + it + "_?.fastq.gz", glob: true).size() == 2})
.into(existingFastqSraIds)
process seqPurge {
errorStrategy 'retry'
maxRetries 5
maxErrors 50000
cpus 1
tag { fileId + '_seqPurge' }
input:
val fileId from existingFastqSraIds
output:
val fileId into result
"""
${params.SEQPURGE} -in1 '${outputDir}/${fileId}_1.fastq.gz' -in2 '${outputDir}/${fileId}_2.fastq.gz' -out1 '${outputDir}/${fileId}_1.seqpurge.fastq.gz' -out2 '${outputDir}/${fileId}_2.seqpurge.fastq.gz' -out3 '${outputDir}/${fileId}_3.seqpurge'
"""
}
result.subscribe { String fileId -> file(fileId + "_?.fastq.gz").delete }