Skip to content

Commit

Permalink
Merge branch 'main' into features/pgs-calculation
Browse files Browse the repository at this point in the history
  • Loading branch information
seppinho authored Nov 21, 2023
2 parents 831961e + c71eee1 commit 010981e
Show file tree
Hide file tree
Showing 9 changed files with 124 additions and 52 deletions.
1 change: 0 additions & 1 deletion cloudgene.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,6 @@ workflow:
values:
qconly: Quality Control Only
imputation: Quality Control & Imputation
phasing: Quality Control & Phasing Only

- id: aesEncryption
description: AES 256 encryption
Expand Down
21 changes: 16 additions & 5 deletions modules/local/compression/compression_encryption_vcf.nf
Original file line number Diff line number Diff line change
Expand Up @@ -22,23 +22,34 @@ process COMPRESSION_ENCRYPTION_VCF {
def zip_name = "chr_${chr}.zip"
def info_name = "${prefix}.info"
def aes = params.encryption.aes ? "-mem=AES256" : ""

def panel_version = RefPanelUtil.loadFromFile(params.refpanel_yaml).id

"""
bcftools concat -n ${imputed_joined} -o ${imputed_name} -Oz
tabix ${imputed_name}
# concat info files
csvtk concat ${info_joined} > ${info_name}
bgzip ${info_name}
if [[ "${params.meta}" == "true" ]]
# concat dosage files and update header
bcftools concat -n ${imputed_joined} -o tmp_${imputed_name} -Oz
echo "##mis_pipeline=${params.pipeline_version}" > add_header.txt
echo "##mis_phasing=${params.phasing}" >> add_header.txt
echo "##mis_panel=${panel_version}" >> add_header.txt
bcftools annotate -h add_header.txt tmp_${imputed_name} -o ${imputed_name} -Oz
rm tmp_${imputed_name}
tabix ${imputed_name}
# write meta files
if [[ ${params.meta} ]]
then
bcftools concat -n ${meta_joined} -o ${meta_name} -Oz
tabix ${meta_name}
fi
# zip files
7z a -tzip ${aes} -p"${params.encryption_password}" ${zip_name} ${prefix}*
rm *vcf.gz* *info
if [[ "${params.md5}" == "true" ]]
if [[ ${params.md5} ]]
then
md5sum ${zip_name} > ${zip_name}.md5
fi
Expand Down
3 changes: 0 additions & 3 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,6 @@ params {
r2Filter = 0
password = null
pipeline_version = 'michigan-imputationserver-1.6.0-rc5'
eagle_version = 'eagle-2.4'
beagle_version = 'beagle.18May20.d20.jar'
imputation_version = 'minimac4-1.0.2'

config = [
send_mail: false
Expand Down
2 changes: 1 addition & 1 deletion nf-test.config
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ config {
profile "development"

plugins {
load "[email protected].0"
load "[email protected].1"
}

}
Binary file not shown.
Binary file not shown.
96 changes: 83 additions & 13 deletions tests/main.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -147,18 +147,81 @@ nextflow_pipeline {

}

test("Should run without phasing") {
test("Should fail with phased data and empty phasing option") {

when {
params {
project = "test-job"
project = "testPipelineWithPhasedAndEmptyPhasing"
build = "hg19"
files = "$projectDir/tests/input/chr20-phased/*.vcf.gz"
population = "eur"
password = PASSWORD
refpanel_yaml = "$projectDir/tests/data/refpanels/hapmap2-chr20/cloudgene.yaml"
output = "${outputDir}"
phasing = ""
}
}

then {
assert workflow.failed

}

}

// this testcould be deleted later, since now phasing must be actively deactivated
test("Should run with phased data and no phasing selected") {

when {
params {
project = "testPipelineWithPhasedAndNoPhasingSelected"
build = "hg19"
files = "$projectDir/tests/input/chr20-phased/*.vcf.gz"
population = "eur"
password = PASSWORD
refpanel_yaml = "$projectDir/tests/data/refpanels/hapmap2-chr20/cloudgene.yaml"
output = "${outputDir}"
phasing = "no_phasing"
}
}

then {
assert workflow.success

def quality_control_log = file("${outputDir}/cloudgene.report.json")
assert quality_control_log.exists()
assert quality_control_log.text.contains("Remaining sites in total: 7,735")

def imputed_chr_20 = file("${outputDir}/chr_20.zip");
assert imputed_chr_20.exists()
ZipFile zipFile = new ZipFile(imputed_chr_20, PASSWORD.toCharArray());
zipFile.extractAll("${outputDir}");
def file = path("${outputDir}/chr20.dose.vcf.gz").vcf
assert file.getChromosome() == "20"
assert file.getNoSamples() == 51;
assert file.isPhased()
assert file.getNoSnps() == TOTAL_REFPANEL_CHR20_B37 + ONLY_IN_INPUT

//check correct number of snps in info.gz file
assert path("${outputDir}/chr20.info.gz").linesGzip.size() == 1 + TOTAL_REFPANEL_CHR20_B37 + ONLY_IN_INPUT

}

}

test("Should run with eagle") {

when {
params {
project = "testPipelineWithEagle"
build = "hg19"
files = "$projectDir/tests/input/chr20-phased/*.vcf.gz"
population = "eur"
phasing = "no_phasing"
password = PASSWORD
refpanel_yaml = "$projectDir/tests/data/refpanels/hapmap2-chr20/cloudgene.yaml"
output = "${outputDir}"
phasing = "eagle"
}
}

Expand All @@ -174,22 +237,28 @@ nextflow_pipeline {
ZipFile zipFile = new ZipFile(imputed_chr_20, PASSWORD.toCharArray());
zipFile.extractAll("${outputDir}");

def file = path("${outputDir}/chr20.dose.vcf.gz").vcf
assert file.getChromosome() == "20"
assert file.getNoSamples() == 51;
assert file.isPhased()
assert file.getNoSnps() == TOTAL_REFPANEL_CHR20_B37

//check correct number of snps in info.gz file
assert path("${outputDir}/chr20.info.gz").linesGzip.size() == 1 + TOTAL_REFPANEL_CHR20_B37 + ONLY_IN_INPUT
assert path("${outputDir}/chr20.info.gz").linesGzip.size() == 1 + TOTAL_REFPANEL_CHR20_B37

}

}

test("Should run phasing-only") {
test("Should run with eagle and validate header") {

when {
params {
project = "test-job"
project = "testValidatePanelWithEagle"
build = "hg19"
files = "$projectDir/tests/input/chr20-phased/*.vcf.gz"
population = "eur"
mode = "phasing_only"
phasing = "no_phasing"
password = PASSWORD
refpanel_yaml = "$projectDir/tests/data/refpanels/hapmap2-chr20/cloudgene.yaml"
output = "${outputDir}"
Expand All @@ -203,14 +272,15 @@ nextflow_pipeline {
assert quality_control_log.exists()
assert quality_control_log.text.contains("Remaining sites in total: 7,735")

//TODO: add phasing only mode in workflow!
//def imputed_chr_20 = file("${outputDir}/chr_20.zip");
//assert imputed_chr_20.exists()
//ZipFile zipFile = new ZipFile(imputed_chr_20, PASSWORD.toCharArray());
//zipFile.extractAll("${outputDir}");


def imputed_chr_20 = file("${outputDir}/chr_20.zip");
assert imputed_chr_20.exists()
ZipFile zipFile = new ZipFile(imputed_chr_20, PASSWORD.toCharArray());
zipFile.extractAll("${outputDir}");

def header = getVcfHeader(path("${outputDir}/chr20.dose.vcf.gz"))
assert header.getOtherHeaderLine("mis_phasing").getValue() == params.phasing
assert header.getOtherHeaderLine("mis_pipeline").getKey()
assert header.getOtherHeaderLine("mis_panel").getKey()
}

}
Expand Down
43 changes: 24 additions & 19 deletions workflows/imputationserver.nf
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@ for (param in requiredParams) {
}
}

//TODO create json validation file
if (params.phasing != 'eagle' && !params.phasing != 'beagle' && params.phasing != 'no_phasing' ) {
exit 1, "For phasing, only options 'eagle', 'beagle' or 'no_phasing' are allowed."
}


// create random password when not set by user
if (params.password == null) {
Expand All @@ -38,8 +43,6 @@ include { PHASING } from './phasing'
include { IMPUTATION } from './imputation'
include { ENCRYPTION } from './encryption'
include { ANCESTRY_ESTIMATION } from './ancestry_estimation'
include { PGS_CALCULATION } from './pgs_calculation'


workflow IMPUTATIONSERVER {

Expand All @@ -56,43 +59,45 @@ workflow IMPUTATIONSERVER {

if (params.mode != 'qc-only') {

imputation_ch = QUALITY_CONTROL.out.qc_metafiles
phased_ch = QUALITY_CONTROL.out.qc_metafiles

if ("${params.phasing}" != 'no_phasing') {
if (params.phasing != 'no_phasing') {

PHASING(
imputation_ch
QUALITY_CONTROL.out.qc_metafiles
)

if (params.mode == 'imputation') {
imputation_ch = PHASING.out.phased_ch
}
phased_ch = PHASING.out.phased_ch

}


if (params.mode == 'imputation') {

IMPUTATION(
imputation_ch
)

ENCRYPTION(
IMPUTATION.out.groupTuple()
)

IMPUTATION(
phased_ch
)
ENCRYPTION(
IMPUTATION.out.groupTuple()
)
}
}
}

if (params.ancestry.enabled) {
if (params.ancestry.enabled){
ANCESTRY_ESTIMATION()
}

if(params.pgs.enabled) {
if (params.pgs.enabled) {

PGS_CALCULATION(
IMPUTATION.out,
params.ancestry.enabled ? ANCESTRY_ESTIMATION.out : Channel.empty()
)

}

}


Expand Down
10 changes: 0 additions & 10 deletions workflows/phasing.nf
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ workflow PHASING {

chromosomes = Channel.of(1..22, 'X.nonPAR', 'X.PAR1', 'X.PAR2', 'MT')

//TODO: phasing currently always executed, indepedent of detected phasing status in input files
if (params.phasing == 'eagle' && params.refpanel.refEagle != null) {

phasing_map_ch = file(params.refpanel.mapEagle, checkIfExists: false)
Expand Down Expand Up @@ -60,15 +59,6 @@ workflow PHASING {

}

if ("${params.phasing}" == 'no_phasing') {


metafiles_ch
.map {it -> tuple(it[0],it[1],it[2],it[3],file(it[4])) }
.set {phased_ch}

}

emit:
phased_ch

Expand Down

0 comments on commit 010981e

Please sign in to comment.