Skip to content

Commit

Permalink
Add MIS vcf tags to dosage files
Browse files Browse the repository at this point in the history
  • Loading branch information
seppinho committed Nov 21, 2023
1 parent 617dc7b commit 9e92f24
Show file tree
Hide file tree
Showing 5 changed files with 149 additions and 42 deletions.
28 changes: 23 additions & 5 deletions modules/local/compression/compression_encryption_vcf.nf
Original file line number Diff line number Diff line change
Expand Up @@ -22,23 +22,41 @@ process COMPRESSION_ENCRYPTION_VCF {
def zip_name = "chr_${chr}.zip"
def info_name = "${prefix}.info"
def aes = params.encryption.aes ? "-mem=AES256" : ""

def panel_version = RefPanelUtil.loadFromFile(params.refpanel_yaml).id
def phasing_version = "no_phasing"
switch(params.phasing) {
case "eagle": phasing_version ="${params.eagle_version}"; break;
case "beagle": phasing_version ="${params.beagle_version}"; break;
}

"""
bcftools concat -n ${imputed_joined} -o ${imputed_name} -Oz
tabix ${imputed_name}
# concat info files
csvtk concat ${info_joined} > ${info_name}
bgzip ${info_name}
if [[ "${params.meta}" == "true" ]]
# concat dosage files and update header
# TODO: a new file is written with the new header, should we add this on a chunk level and ove this block to imputation module and re-header there?
bcftools concat -n ${imputed_joined} -o tmp_${imputed_name} -Oz
echo "##mis_pipeline=${params.pipeline_version}" > add_header.txt
echo "##mis_imputation=${params.imputation_version}" >> add_header.txt
echo "##mis_phasing=${phasing_version}" >> add_header.txt
echo "##mis_panel=${panel_version}" >> add_header.txt
bcftools annotate -h add_header.txt tmp_${imputed_name} -o ${imputed_name} -Oz
rm tmp_${imputed_name}
tabix ${imputed_name}
# write meta files
if [[ ${params.meta} ]]
then
bcftools concat -n ${meta_joined} -o ${meta_name} -Oz
tabix ${meta_name}
fi
# zip files
7z a -tzip ${aes} -p"${params.encryption_password}" ${zip_name} ${prefix}*
rm *vcf.gz* *info
if [[ "${params.md5}" == "true" ]]
if [[ ${params.md5} ]]
then
md5sum ${zip_name} > ${zip_name}.md5
fi
Expand Down
2 changes: 1 addition & 1 deletion nf-test.config
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ config {
profile "development"

plugins {
load "[email protected].0"
load "[email protected].1"
}

}
111 changes: 108 additions & 3 deletions tests/main.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -147,18 +147,81 @@ nextflow_pipeline {

}

test("Should run without phasing") {
test("Should fail with phased data and empty phasing option") {

when {
params {
project = "test-job"
project = "testPipelineWithPhasedAndEmptyPhasing"
build = "hg19"
files = "$projectDir/tests/input/chr20-phased/*.vcf.gz"
population = "eur"
password = PASSWORD
refpanel_yaml = "$projectDir/tests/data/refpanels/hapmap2-chr20/cloudgene.yaml"
output = "${outputDir}"
phasing = ""
}
}

then {
assert workflow.failed

}

}

// this testcould be deleted later, since now phasing must be actively deactivated
test("Should run with phased data and no phasing selected") {

when {
params {
project = "testPipelineWithPhasedAndNoPhasingSelected"
build = "hg19"
files = "$projectDir/tests/input/chr20-phased/*.vcf.gz"
population = "eur"
password = PASSWORD
refpanel_yaml = "$projectDir/tests/data/refpanels/hapmap2-chr20/cloudgene.yaml"
output = "${outputDir}"
phasing = "no_phasing"
}
}

then {
assert workflow.success

def quality_control_log = file("${outputDir}/cloudgene.report.json")
assert quality_control_log.exists()
assert quality_control_log.text.contains("Remaining sites in total: 7,735")

def imputed_chr_20 = file("${outputDir}/chr_20.zip");
assert imputed_chr_20.exists()
ZipFile zipFile = new ZipFile(imputed_chr_20, PASSWORD.toCharArray());
zipFile.extractAll("${outputDir}");
def file = path("${outputDir}/chr20.dose.vcf.gz").vcf
assert file.getChromosome() == "20"
assert file.getNoSamples() == 51;
assert file.isPhased()
assert file.getNoSnps() == TOTAL_REFPANEL_CHR20_B37 + ONLY_IN_INPUT

//check correct number of snps in info.gz file
assert path("${outputDir}/chr20.info.gz").linesGzip.size() == 1 + TOTAL_REFPANEL_CHR20_B37 + ONLY_IN_INPUT

}

}

test("Should run with eagle") {

when {
params {
project = "testPipelineWithEagle"
build = "hg19"
files = "$projectDir/tests/input/chr20-phased/*.vcf.gz"
population = "eur"
phasing = "no_phasing"
password = PASSWORD
refpanel_yaml = "$projectDir/tests/data/refpanels/hapmap2-chr20/cloudgene.yaml"
output = "${outputDir}"
phasing = "eagle"
}
}

Expand All @@ -174,13 +237,55 @@ nextflow_pipeline {
ZipFile zipFile = new ZipFile(imputed_chr_20, PASSWORD.toCharArray());
zipFile.extractAll("${outputDir}");

def file = path("${outputDir}/chr20.dose.vcf.gz").vcf
assert file.getChromosome() == "20"
assert file.getNoSamples() == 51;
assert file.isPhased()
assert file.getNoSnps() == TOTAL_REFPANEL_CHR20_B37

//check correct number of snps in info.gz file
assert path("${outputDir}/chr20.info.gz").linesGzip.size() == 1 + TOTAL_REFPANEL_CHR20_B37 + ONLY_IN_INPUT
assert path("${outputDir}/chr20.info.gz").linesGzip.size() == 1 + TOTAL_REFPANEL_CHR20_B37

}

}

test("Should run with eagle and validate header") {

when {
params {
project = "testValidatePanelWithEagle"
build = "hg19"
files = "$projectDir/tests/input/chr20-phased/*.vcf.gz"
population = "eur"
phasing = "no_phasing"
password = PASSWORD
refpanel_yaml = "$projectDir/tests/data/refpanels/hapmap2-chr20/cloudgene.yaml"
output = "${outputDir}"
}
}

then {
assert workflow.success

def quality_control_log = file("${outputDir}/cloudgene.report.json")
assert quality_control_log.exists()
assert quality_control_log.text.contains("Remaining sites in total: 7,735")

def imputed_chr_20 = file("${outputDir}/chr_20.zip");
assert imputed_chr_20.exists()
ZipFile zipFile = new ZipFile(imputed_chr_20, PASSWORD.toCharArray());
zipFile.extractAll("${outputDir}");

def header = getVcfHeader(path("${outputDir}/chr20.dose.vcf.gz"))
assert header.getOtherHeaderLine("mis_phasing").getValue() == params.phasing
assert header.getOtherHeaderLine("mis_imputation").getKey()
assert header.getOtherHeaderLine("mis_pipeline").getKey()
assert header.getOtherHeaderLine("mis_panel").getKey()
}

}

test("Should run phasing-only") {

when {
Expand Down
40 changes: 17 additions & 23 deletions workflows/imputationserver.nf
Original file line number Diff line number Diff line change
Expand Up @@ -61,43 +61,37 @@ workflow IMPUTATIONSERVER {

imputation_ch = QUALITY_CONTROL.out.qc_metafiles

if ("${params.phasing}" != 'no_phasing') {
if (params.phasing != 'no_phasing') {

PHASING(
imputation_ch
)

if (params.mode == 'imputation') {
imputation_ch = PHASING.out.phased_ch
}
imputation_ch = PHASING.out.phased_ch

}


if (params.mode == 'imputation') {

IMPUTATION(
imputation_ch
)

ENCRYPTION(
IMPUTATION.out.groupTuple()
)

IMPUTATION(
imputation_ch
)
ENCRYPTION(
IMPUTATION.out
)
}
}
}

if (params.ancestry.enabled) {
if (params.ancestry.enabled){
ANCESTRY_ESTIMATION()
}

if(params.pgs.enabled) {

PGS_CALCULATION(
IMPUTATION.out,
params.ancestry.enabled ? ANCESTRY_ESTIMATION.out : Channel.empty()
)

}


}


workflow.onComplete {
//TODO: use templates
//TODO: move in EmailHelper class
Expand Down
10 changes: 0 additions & 10 deletions workflows/phasing.nf
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ workflow PHASING {

chromosomes = Channel.of(1..22, 'X.nonPAR', 'X.PAR1', 'X.PAR2', 'MT')

//TODO: phasing currently always executed, indepedent of detected phasing status in input files
if (params.phasing == 'eagle' && params.refpanel.refEagle != null) {

phasing_map_ch = file(params.refpanel.mapEagle, checkIfExists: false)
Expand Down Expand Up @@ -60,15 +59,6 @@ workflow PHASING {

}

if ("${params.phasing}" == 'no_phasing') {


metafiles_ch
.map {it -> tuple(it[0],it[1],it[2],it[3],file(it[4])) }
.set {phased_ch}

}

emit:
phased_ch

Expand Down

0 comments on commit 9e92f24

Please sign in to comment.