Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add pgs subworkflow #8

Merged
merged 6 commits into from
Nov 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,3 @@ ENV JAVA_TOOL_OPTIONS="-Djdk.lang.Process.launchMechanism=vfork"

COPY files/bin/trace /usr/bin/.
COPY files/bin/vcf2geno /usr/bin/.



2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,7 @@ params.minimac_window = 100000
### Build docker image locally

```
docker build -t genepi/nf-imputationserver:latest .
docker build -t genepi/imputation-docker:latest .
```

### Run testcases
Expand Down
14 changes: 8 additions & 6 deletions modules/local/ancestry_estimation/execute_trace.nf
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,14 @@ process EXECUTE_TRACE {
batch_name = "batch_${samples.baseName}"

"""
# extract samples form vcf
bcftools view --samples-file ${samples} -Oz ${vcf_file} > ${batch_name}.vcf.gz
tabix ${batch_name}.vcf.gz
tabix ${vcf_file}

# convert to geno. TODO: check peopleIncludeFile option instead of bcftools.
vcf2geno --inVcf ${batch_name}.vcf.gz --rangeFile ${reference_range} --out ${batch_name}
# convert to geno
vcf2geno \
--inVcf ${vcf_file} \
--rangeFile ${reference_range} \
--peopleIncludeFile ${samples} \
--out ${batch_name}

# write config file for trace
echo "GENO_FILE ${reference_geno}" > trace.config
Expand All @@ -34,7 +36,7 @@ process EXECUTE_TRACE {
echo "OUT_PREFIX ${batch_name}" >> trace.config

# execute trace with config file
trace -p trace.config > trace.log
trace -p trace.config
"""

}
2 changes: 0 additions & 2 deletions modules/local/ancestry_estimation/prepare_trace.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@ import groovy.json.JsonOutput

process PREPARE_TRACE {

publishDir params.output, mode: 'copy'

input:
path(vcf_files)
path(reference_sites)
Expand Down
2 changes: 1 addition & 1 deletion modules/local/ancestry_estimation/visualize_ancestry.nf
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ process VISUALIZE_ANCESTRY {
),
intermediates_dir='\$PWD',
knit_root_dir='\$PWD',
output_file='\$PWD/08-estimated-populations.html'
output_file='\$PWD/estimated-populations.html'
)"
"""

Expand Down
2 changes: 1 addition & 1 deletion modules/local/compression/compression_encryption_vcf.nf
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ process COMPRESSION_ENCRYPTION_VCF {
tag "Merge Chromosome ${chr}"

input:
tuple val(chr), path(imputed_vcf_data), path(imputed_info), path(imputed_meta_vcf_data)
tuple val(chr), val(start), val(end), path(imputed_vcf_data), path(imputed_info), path(imputed_meta_vcf_data)

output:
path("*.zip"), emit: encrypted_file
Expand Down
2 changes: 1 addition & 1 deletion modules/local/imputation/minimac4.nf
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ process MINIMAC4 {
path minimac_map

output:
tuple val(chr), file("*.dose.vcf.gz"), file("*.info"), file("*.empiricalDose.vcf.gz"), emit: imputed_chunks
tuple val(chr), val(start), val(end), file("*.dose.vcf.gz"), file("*.info"), file("*.empiricalDose.vcf.gz"), emit: imputed_chunks

script:
def map = minimac_map ? '--referenceEstimates --map ' + minimac_map : ''
Expand Down
30 changes: 30 additions & 0 deletions modules/local/pgs_calculation/calculate_chunks.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
process CALCULATE_CHUNKS {

tag "${vcf_file}"

input:
tuple val(chr), val(start), val(end), file(vcf_file), file(info_file), file(empirical_vcf_file)
path(scores)

output:
path "*.txt", emit: scores_chunks
path "*.info", emit: info_chunks

script:
name = "${vcf_file.baseName}_${chr}_${start}_${end}"

"""

pgs-calc apply ${vcf_file} \
--ref ${scores.join(',')} \
--out ${name}.scores.txt \
--info ${name}.scores.info \
--start ${start} \
--end ${end} \
${params.pgs.fix_strand_flips ? "--fix-strand-flips" : ""} \
--min-r2 ${params.pgs.min_r2} \
--no-ansi

"""

}
34 changes: 34 additions & 0 deletions modules/local/pgs_calculation/create_html_report.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
process CREATE_HTML_REPORT {

publishDir params.output, mode: 'copy'

input:
path(merged_score)
path(merged_info)
path(scores_meta)
path(estimated_ancestry)

output:
path "*.html", emit: html_report
path "*.coverage.txt", emit: coverage_report

script:
samples = params.ancestry.enabled ? "--samples ${estimated_ancestry}" : ""

"""
pgs-calc report \
--data ${merged_score} \
--info ${merged_info} \
--meta ${scores_meta} \
$samples \
--out scores.html

pgs-calc report \
--data ${merged_score} \
--info ${merged_info} \
--meta ${scores_meta} \
--template txt \
--out scores.coverage.txt
"""

}
18 changes: 18 additions & 0 deletions modules/local/pgs_calculation/merge_chunks_infos.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
process MERGE_CHUNKS_INFOS {

publishDir params.output, mode: 'copy'

input:
path(report_chunks)

output:
path "*.info", emit: merged_info_files

"""

pgs-calc merge-info ${report_chunks} \
--out ${params.project}.info

"""

}
18 changes: 18 additions & 0 deletions modules/local/pgs_calculation/merge_chunks_scores.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
process MERGE_CHUNKS_SCORES {

publishDir params.output, mode: 'copy'

input:
path(score_chunks)

output:
path "*.txt", emit: merged_score_files

"""

pgs-calc merge-score ${score_chunks} \
--out ${params.project}.scores.txt

"""

}
12 changes: 11 additions & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

manifest {
name = 'imputationserver2'
version = '2.0.0-beta1'
Expand Down Expand Up @@ -52,6 +51,17 @@ params {
threshold: 0.75
]

pgs = [
enabled: false,
min_r2: 0,
fix_strand_flips: false
]

pgscatalog = [
scores: "",
meta: ""
]

// header information
r2Filter = 0
password = null
Expand Down
3 changes: 3 additions & 0 deletions workflows/ancestry_estimation.nf
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,7 @@ workflow ANCESTRY_ESTIMATION {
references.first{it.getExtension()=="samples"}
)

emit:
estimated_ancestry = ESTIMATE_ANCESTRY.out.populations.collect()

}
2 changes: 1 addition & 1 deletion workflows/imputation.nf
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,5 @@ workflow IMPUTATION {
)

emit:
imputed_chunks = MINIMAC4.out.imputed_chunks.groupTuple()
imputed_chunks = MINIMAC4.out.imputed_chunks
}
11 changes: 10 additions & 1 deletion workflows/imputationserver.nf
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ workflow IMPUTATIONSERVER {
)

ENCRYPTION(
IMPUTATION.out
IMPUTATION.out.groupTuple()
)
}
}
Expand All @@ -88,6 +88,15 @@ workflow IMPUTATIONSERVER {
if (params.ancestry.enabled){
ANCESTRY_ESTIMATION()
}

if (params.pgs.enabled) {

PGS_CALCULATION(
IMPUTATION.out,
params.ancestry.enabled ? ANCESTRY_ESTIMATION.out : Channel.empty()
)

}

}

Expand Down
36 changes: 36 additions & 0 deletions workflows/pgs_calculation.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
include { CALCULATE_CHUNKS } from '../modules/local/pgs_calculation/calculate_chunks'
include { MERGE_CHUNKS_INFOS } from '../modules/local/pgs_calculation/merge_chunks_infos'
include { MERGE_CHUNKS_SCORES } from '../modules/local/pgs_calculation/merge_chunks_scores'
include { CREATE_HTML_REPORT } from '../modules/local/pgs_calculation/create_html_report'

workflow PGS_CALCULATION {

take:
imputed_chunks
estimated_ancestry


main:
scores = Channel.fromPath(params.pgscatalog.scores, checkIfExists:true).collect()

CALCULATE_CHUNKS(
imputed_chunks,
scores
)

MERGE_CHUNKS_SCORES(
CALCULATE_CHUNKS.out.scores_chunks.collect()
)

MERGE_CHUNKS_INFOS(
CALCULATE_CHUNKS.out.info_chunks.collect()
)

CREATE_HTML_REPORT(
MERGE_CHUNKS_SCORES.out.collect(),
MERGE_CHUNKS_INFOS.out.collect(),
file(params.pgscatalog.meta, checkIfExists:true),
estimated_ancestry.collect().ifEmpty([])
)

}