-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Sfitz concat vcf #213
Sfitz concat vcf #213
Changes from 17 commits
d64ad13
c11fc68
06c4254
998703c
764d04f
9879500
515c12a
26fcfc9
8ba1b41
10817de
9b4bc69
269d016
db0133c
027f02c
18fe92b
836db28
8816004
3fab4c2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
// Other processes after create_IndelCandidate_SAMtools will only run one at a time, so | ||
// Other processes will only run one at a time, so | ||
// we don't need to control their resources. | ||
|
||
process { | ||
|
@@ -78,4 +78,24 @@ process { | |
} | ||
} | ||
} | ||
withName: plot_VennDiagram_R { | ||
cpus = 2 | ||
memory = 5.GB | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can VennDiagram take 2 CPUs? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As I sort of mentioned in the PR description, these are just placeholders until I do a large bam test run before the release. I will adjust these and the processes added in |
||
retry_strategy { | ||
memory { | ||
strategy = 'add' | ||
operand = 10.GB | ||
} | ||
} | ||
} | ||
withName: concat_VCFs_BCFtools { | ||
cpus = 2 | ||
memory = 5.GB | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It looks like this process doesn't use 2 CPUs? |
||
retry_strategy { | ||
memory { | ||
strategy = 'add' | ||
operand = 10.GB | ||
} | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,14 +14,16 @@ process intersect_VCFs_BCFtools { | |
pattern: "*.vcf.gz*" | ||
publishDir path: "${params.workflow_output_dir}/output", | ||
mode: "copy", | ||
pattern: "isec-2-or-more" | ||
pattern: "isec-2-or-more/*.txt", | ||
saveAs: { "${file(it).getParent().getName()}/${params.output_filename}_${file(it).getName()}" } | ||
publishDir path: "${params.workflow_output_dir}/output", | ||
mode: "copy", | ||
pattern: "isec-1-or-more/*.txt" | ||
pattern: "isec-1-or-more/*.txt", | ||
saveAs: { "${file(it).getParent().getName()}/${params.output_filename}_${file(it).getName()}" } | ||
publishDir path: "${params.workflow_log_output_dir}", | ||
mode: "copy", | ||
pattern: ".command.*", | ||
saveAs: { "${task.process.replace(':', '/')}-${task.index}/log${file(it).getName()}" } | ||
saveAs: { "${task.process.replace(':', '/')}/log${file(it).getName()}" } | ||
tyamaguchi-ucla marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
input: | ||
path vcfs | ||
|
@@ -33,8 +35,8 @@ process intersect_VCFs_BCFtools { | |
path "*.vcf.gz", emit: consensus_vcf | ||
path "*.vcf.gz.tbi", emit: consensus_idx | ||
path ".command.*" | ||
path "isec-2-or-more" | ||
path "isec-1-or-more", emit: isec_dir | ||
path "isec-2-or-more/*.txt" | ||
path "isec-1-or-more/*.txt", emit: isec | ||
|
||
script: | ||
vcf_list = vcfs.join(' ') | ||
|
@@ -58,11 +60,11 @@ process intersect_VCFs_BCFtools { | |
publishDir path: "${params.workflow_log_output_dir}", | ||
mode: "copy", | ||
pattern: ".command.*", | ||
saveAs: { "${task.process.replace(':', '/')}-${task.index}/log${file(it).getName()}" } | ||
saveAs: { "${task.process.replace(':', '/')}/log${file(it).getName()}" } | ||
|
||
input: | ||
path script_dir | ||
path isec_dir | ||
path isec | ||
|
||
output: | ||
path ".command.*" | ||
|
@@ -71,6 +73,36 @@ process intersect_VCFs_BCFtools { | |
script: | ||
""" | ||
set -euo pipefail | ||
Rscript ${script_dir}/plot-venn.R --isec_dir ${isec_dir} --outfile ${params.output_filename}_Venn-diagram.tiff | ||
Rscript ${script_dir}/plot-venn.R --isec_readme README.txt --isec_sites sites.txt --outfile ${params.output_filename}_Venn-diagram.tiff | ||
tyamaguchi-ucla marked this conversation as resolved.
Show resolved
Hide resolved
|
||
""" | ||
} | ||
|
||
process concat_VCFs_BCFtools { | ||
container params.docker_image_BCFtools | ||
publishDir path: "${params.workflow_output_dir}/intermediate/${task.process.split(':')[-1]}", | ||
mode: "copy", | ||
pattern: "*concat.vcf", | ||
enabled: params.save_intermediate_files | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this intermediate file will be used by |
||
publishDir path: "${params.workflow_log_output_dir}", | ||
mode: "copy", | ||
pattern: ".command.*", | ||
saveAs: { "${task.process.replace(':', '/')}/log${file(it).getName()}" } | ||
|
||
input: | ||
path vcfs | ||
path indices | ||
|
||
output: | ||
path "*concat.vcf", emit: concat_vcf | ||
path ".command.*" | ||
|
||
script: | ||
vcf_list = vcfs.join(' ') | ||
""" | ||
set -euo pipefail | ||
# BCFtools concat to create a single VCF with all nfiles +2 variants | ||
# output header is a uniquified concatenation of all headers | ||
# output `INFO` `FORMAT` `NORMAL` and `TUMOR` fields are from the first listed VCF that has the variant | ||
yashpatel6 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
bcftools concat --output-type v --output ${params.output_filename}_SNV-concat.vcf --allow-overlaps --rm-dups all ${vcf_list} | ||
yashpatel6 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
""" | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I haven't done any runs with large samples since adding
plot_VennDiagram_R
orconcat_VCFs_BCFtools
so these are just guesses. These two processes will run together, but only after everything is done. I doubt they use much memory so I don't think it matters much. The next PR,add maf
, will add one more process and may be the last PR before release. With that I could test with large samples and look at memory as well as which processes will use morecpus
.