Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 23 additions & 5 deletions modules/local/compression/compression_encryption_vcf.nf
Original file line number Diff line number Diff line change
Expand Up @@ -22,23 +22,41 @@ process COMPRESSION_ENCRYPTION_VCF {
def zip_name = "chr_${chr}.zip"
def info_name = "${prefix}.info"
def aes = params.encryption.aes ? "-mem=AES256" : ""

def panel_version = RefPanelUtil.loadFromFile(params.refpanel_yaml).id
def phasing_version = "no_phasing"
switch(params.phasing) {
case "eagle": phasing_version ="${params.eagle_version}"; break;
case "beagle": phasing_version ="${params.beagle_version}"; break;
}

"""
bcftools concat -n ${imputed_joined} -o ${imputed_name} -Oz
tabix ${imputed_name}
# concat info files
csvtk concat ${info_joined} > ${info_name}
bgzip ${info_name}

if [[ "${params.meta}" == "true" ]]
# concat dosage files and update header
# TODO: a new file is written with the new header, should we add this on a chunk level and ove this block to imputation module and re-header there?
bcftools concat -n ${imputed_joined} -o tmp_${imputed_name} -Oz
echo "##mis_pipeline=${params.pipeline_version}" > add_header.txt
echo "##mis_imputation=${params.imputation_version}" >> add_header.txt
echo "##mis_phasing=${phasing_version}" >> add_header.txt
echo "##mis_panel=${panel_version}" >> add_header.txt
bcftools annotate -h add_header.txt tmp_${imputed_name} -o ${imputed_name} -Oz
rm tmp_${imputed_name}
tabix ${imputed_name}

# write meta files
if [[ ${params.meta} ]]
then
bcftools concat -n ${meta_joined} -o ${meta_name} -Oz
tabix ${meta_name}
fi

# zip files
7z a -tzip ${aes} -p"${params.encryption_password}" ${zip_name} ${prefix}*
rm *vcf.gz* *info

if [[ "${params.md5}" == "true" ]]
if [[ ${params.md5} ]]
then
md5sum ${zip_name} > ${zip_name}.md5
fi
Expand Down
2 changes: 1 addition & 1 deletion nf-test.config
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ config {
profile "development"

plugins {
load "nft-vcf@1.0.0"
load "nft-vcf@1.0.1"
}

}
111 changes: 108 additions & 3 deletions tests/main.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -147,18 +147,81 @@ nextflow_pipeline {

}

test("Should run without phasing") {
test("Should fail with phased data and empty phasing option") {

when {
params {
project = "test-job"
project = "testPipelineWithPhasedAndEmptyPhasing"
build = "hg19"
files = "$projectDir/tests/input/chr20-phased/*.vcf.gz"
population = "eur"
password = PASSWORD
refpanel_yaml = "$projectDir/tests/data/refpanels/hapmap2-chr20/cloudgene.yaml"
output = "${outputDir}"
phasing = ""
}
}

then {
assert workflow.failed

}

}

// this testcould be deleted later, since now phasing must be actively deactivated
test("Should run with phased data and no phasing selected") {

when {
params {
project = "testPipelineWithPhasedAndNoPhasingSelected"
build = "hg19"
files = "$projectDir/tests/input/chr20-phased/*.vcf.gz"
population = "eur"
password = PASSWORD
refpanel_yaml = "$projectDir/tests/data/refpanels/hapmap2-chr20/cloudgene.yaml"
output = "${outputDir}"
phasing = "no_phasing"
}
}

then {
assert workflow.success

def quality_control_log = file("${outputDir}/cloudgene.report.json")
assert quality_control_log.exists()
assert quality_control_log.text.contains("Remaining sites in total: 7,735")

def imputed_chr_20 = file("${outputDir}/chr_20.zip");
assert imputed_chr_20.exists()
ZipFile zipFile = new ZipFile(imputed_chr_20, PASSWORD.toCharArray());
zipFile.extractAll("${outputDir}");
def file = path("${outputDir}/chr20.dose.vcf.gz").vcf
assert file.getChromosome() == "20"
assert file.getNoSamples() == 51;
assert file.isPhased()
assert file.getNoSnps() == TOTAL_REFPANEL_CHR20_B37 + ONLY_IN_INPUT

//check correct number of snps in info.gz file
assert path("${outputDir}/chr20.info.gz").linesGzip.size() == 1 + TOTAL_REFPANEL_CHR20_B37 + ONLY_IN_INPUT

}

}

test("Should run with eagle") {

when {
params {
project = "testPipelineWithEagle"
build = "hg19"
files = "$projectDir/tests/input/chr20-phased/*.vcf.gz"
population = "eur"
phasing = "no_phasing"
password = PASSWORD
refpanel_yaml = "$projectDir/tests/data/refpanels/hapmap2-chr20/cloudgene.yaml"
output = "${outputDir}"
phasing = "eagle"
}
}

Expand All @@ -174,13 +237,55 @@ nextflow_pipeline {
ZipFile zipFile = new ZipFile(imputed_chr_20, PASSWORD.toCharArray());
zipFile.extractAll("${outputDir}");

def file = path("${outputDir}/chr20.dose.vcf.gz").vcf
assert file.getChromosome() == "20"
assert file.getNoSamples() == 51;
assert file.isPhased()
assert file.getNoSnps() == TOTAL_REFPANEL_CHR20_B37

//check correct number of snps in info.gz file
assert path("${outputDir}/chr20.info.gz").linesGzip.size() == 1 + TOTAL_REFPANEL_CHR20_B37 + ONLY_IN_INPUT
assert path("${outputDir}/chr20.info.gz").linesGzip.size() == 1 + TOTAL_REFPANEL_CHR20_B37

}

}

test("Should run with eagle and validate header") {

when {
params {
project = "testValidatePanelWithEagle"
build = "hg19"
files = "$projectDir/tests/input/chr20-phased/*.vcf.gz"
population = "eur"
phasing = "no_phasing"
password = PASSWORD
refpanel_yaml = "$projectDir/tests/data/refpanels/hapmap2-chr20/cloudgene.yaml"
output = "${outputDir}"
}
}

then {
assert workflow.success

def quality_control_log = file("${outputDir}/cloudgene.report.json")
assert quality_control_log.exists()
assert quality_control_log.text.contains("Remaining sites in total: 7,735")

def imputed_chr_20 = file("${outputDir}/chr_20.zip");
assert imputed_chr_20.exists()
ZipFile zipFile = new ZipFile(imputed_chr_20, PASSWORD.toCharArray());
zipFile.extractAll("${outputDir}");

def header = getVcfHeader(path("${outputDir}/chr20.dose.vcf.gz"))
assert header.getOtherHeaderLine("mis_phasing").getValue() == params.phasing
assert header.getOtherHeaderLine("mis_imputation").getKey()
assert header.getOtherHeaderLine("mis_pipeline").getKey()
assert header.getOtherHeaderLine("mis_panel").getKey()
}

}

test("Should run phasing-only") {

when {
Expand Down
40 changes: 17 additions & 23 deletions workflows/imputationserver.nf
Original file line number Diff line number Diff line change
Expand Up @@ -61,43 +61,37 @@ workflow IMPUTATIONSERVER {

imputation_ch = QUALITY_CONTROL.out.qc_metafiles

if ("${params.phasing}" != 'no_phasing') {
if (params.phasing != 'no_phasing') {

PHASING(
imputation_ch
)

if (params.mode == 'imputation') {
imputation_ch = PHASING.out.phased_ch
}
imputation_ch = PHASING.out.phased_ch

}


if (params.mode == 'imputation') {

IMPUTATION(
imputation_ch
)

ENCRYPTION(
IMPUTATION.out.groupTuple()
)

IMPUTATION(
imputation_ch
)
ENCRYPTION(
IMPUTATION.out
)
}
}
}

if (params.ancestry.enabled) {
if (params.ancestry.enabled){
ANCESTRY_ESTIMATION()
}

if(params.pgs.enabled) {

PGS_CALCULATION(
IMPUTATION.out,
params.ancestry.enabled ? ANCESTRY_ESTIMATION.out : Channel.empty()
)

}


}


workflow.onComplete {
//TODO: use templates
//TODO: move in EmailHelper class
Expand Down
10 changes: 0 additions & 10 deletions workflows/phasing.nf
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ workflow PHASING {

chromosomes = Channel.of(1..22, 'X.nonPAR', 'X.PAR1', 'X.PAR2', 'MT')

//TODO: phasing currently always executed, indepedent of detected phasing status in input files
if (params.phasing == 'eagle' && params.refpanel.refEagle != null) {

phasing_map_ch = file(params.refpanel.mapEagle, checkIfExists: false)
Expand Down Expand Up @@ -60,15 +59,6 @@ workflow PHASING {

}

if ("${params.phasing}" == 'no_phasing') {


metafiles_ch
.map {it -> tuple(it[0],it[1],it[2],it[3],file(it[4])) }
.set {phased_ch}

}

emit:
phased_ch

Expand Down