Skip to content

Commit

Permalink
Merge branch 'main' into features/pgs-calculation
Browse files Browse the repository at this point in the history
  • Loading branch information
lukfor authored Nov 21, 2023
2 parents 9ea2973 + 594c40a commit 1c0081c
Show file tree
Hide file tree
Showing 8 changed files with 68 additions and 21 deletions.
6 changes: 2 additions & 4 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
FROM ubuntu:18.04
FROM ubuntu:22.04
MAINTAINER Lukas Forer <[email protected]> / Sebastian Schönherr <[email protected]>

# Install compilers
RUN apt-get update && apt-get install -y wget build-essential zlib1g-dev liblzma-dev libbz2-dev libxau-dev

# Install miniconda
RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh && \
RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-py39_23.9.0-0-Linux-x86_64.sh -O ~/miniconda.sh && \
/bin/bash ~/miniconda.sh -b -p /opt/conda
ENV PATH=/opt/conda/bin:${PATH}

Expand Down Expand Up @@ -79,5 +79,3 @@ ENV JAVA_TOOL_OPTIONS="-Djdk.lang.Process.launchMechanism=vfork"

COPY files/bin/trace /usr/bin/.
COPY files/bin/vcf2geno /usr/bin/.


2 changes: 1 addition & 1 deletion modules/local/compression/compression_encryption_vcf.nf
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ process COMPRESSION_ENCRYPTION_VCF {
def info_joined = ArrayUtil.sort(imputed_info)
def prefix = "chr${chr}"
def imputed_name = "${prefix}.dose.vcf.gz"
def meta_name = "${prefix}_empiricalDose.vcf.gz"
def meta_name = "${prefix}.empiricalDose.vcf.gz"
def zip_name = "chr_${chr}.zip"
def info_name = "${prefix}.info"
def aes = params.encryption.aes ? "-mem=AES256" : ""
Expand Down
2 changes: 1 addition & 1 deletion modules/local/phasing/beagle.nf
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ process BEAGLE {
tag "${chunkfile}"

input:
tuple val(chr), path(bcf), val(start), val(end), val(phasing_status), path(chunkfile), val(snps), val(in_reference), path(map_beagle)
tuple val(chr), path(bcf), val(start), val(end), val(phasing_status), path(chunkfile), path(map_beagle)
val phasing_method

output:
Expand Down
2 changes: 1 addition & 1 deletion modules/local/phasing/eagle.nf
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ process EAGLE {
tag "${chunkfile}"

input:
tuple val(chr), path(bcf), path(bcf_csi), val(start), val(end), val(phasing_status), path(chunkfile), val(snps), val(in_reference)
tuple val(chr), path(bcf), path(bcf_csi), val(start), val(end), val(phasing_status), path(chunkfile)
path map_eagle

output:
Expand Down
4 changes: 4 additions & 0 deletions nf-test.config
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,8 @@ config {
configFile "tests/nextflow.config"
profile "development"

plugins {
load "[email protected]"
}

}
56 changes: 47 additions & 9 deletions tests/main.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -66,18 +66,19 @@ nextflow_pipeline {
}

}

test("Should run with phased data") {

when {
params {
project = "test-job"
project = "testPipelineWithPhased"
build = "hg19"
files = "$projectDir/tests/input/chr20-phased/*.vcf.gz"
population = "eur"
password = PASSWORD
refpanel_yaml = "$projectDir/tests/data/refpanels/hapmap2-chr20/cloudgene.yaml"
output = "${outputDir}"
phasing = "no_phasing"
}
}

Expand All @@ -92,19 +93,56 @@ nextflow_pipeline {
assert imputed_chr_20.exists()
ZipFile zipFile = new ZipFile(imputed_chr_20, PASSWORD.toCharArray());
zipFile.extractAll("${outputDir}");
def file = path("${outputDir}/chr20.dose.vcf.gz").vcf
assert file.getChromosome() == "20"
assert file.getNoSamples() == 51;
assert file.isPhased()
assert file.getNoSnps() == TOTAL_REFPANEL_CHR20_B37 + ONLY_IN_INPUT

//check correct number of snps in info.gz file
assert path("${outputDir}/chr20.info.gz").linesGzip.size() == 1 + TOTAL_REFPANEL_CHR20_B37 + ONLY_IN_INPUT

}

}

test("Should run with phased data and meta option") {

when {
params {
project = "testPipelineWithPhasedAndMetaOption"
build = "hg19"
files = "$projectDir/tests/input/chr20-phased/*.vcf.gz"
population = "eur"
password = PASSWORD
refpanel_yaml = "$projectDir/tests/data/refpanels/hapmap2-chr20/cloudgene.yaml"
output = "${outputDir}"
phasing = "no_phasing"
}
}

//TODO: this is from imputationserver-utils.jar. How to include? use alternative?
/*VcfFile file = VcfFileUtil.load("${outputDir}/chr20.dose.vcf.gz", 100000000, false);
then {
assert workflow.success

def quality_control_log = file("${outputDir}/cloudgene.report.json")
assert quality_control_log.exists()
assert quality_control_log.text.contains("Remaining sites in total: 7,735")

def imputed_chr_20 = file("${outputDir}/chr_20.zip");
assert imputed_chr_20.exists()
ZipFile zipFile = new ZipFile(imputed_chr_20, PASSWORD.toCharArray());
zipFile.extractAll("${outputDir}");
def file = path("${outputDir}/chr20.dose.vcf.gz").vcf
def fileMeta = path("${outputDir}/chr20.empiricalDose.vcf.gz").vcf
assert fileMeta.getNoSnps() == 7735
assert file.getChromosome() == "20"
assert file.getNoSamples() == 51;
assert file.isPhased()
assert file.getNoSnps() == TOTAL_REFPANEL_CHR20_B37 + ONLY_IN_INPUT*/

assert file.getNoSnps() == TOTAL_REFPANEL_CHR20_B37 + ONLY_IN_INPUT
//check correct number of snps in info.gz file
//TODO: phasing executed every time, therefore ONLY_IN_INPUT is currently not available
// assert path("${outputDir}/chr20.info.gz").linesGzip.size() == 1 + TOTAL_REFPANEL_CHR20_B37 + ONLY_IN_INPUT
assert path("${outputDir}/chr20.info.gz").linesGzip.size() == 1 + TOTAL_REFPANEL_CHR20_B37
assert path("${outputDir}/chr20.info.gz").linesGzip.size() == 1 + TOTAL_REFPANEL_CHR20_B37 + ONLY_IN_INPUT

}

}
Expand Down
15 changes: 11 additions & 4 deletions workflows/imputationserver.nf
Original file line number Diff line number Diff line change
Expand Up @@ -53,17 +53,24 @@ workflow IMPUTATIONSERVER {
INPUT_VALIDATION.out,
legend_files_ch.collect()
)
//TODO: add phasing only mode

if (params.mode != 'qc-only') {

PHASING(
QUALITY_CONTROL.out.qc_metafiles
imputation_ch = QUALITY_CONTROL.out.qc_metafiles

if ("${params.phasing}" != 'no_phasing') {

PHASING(
imputation_ch
)

if (params.mode == 'imputation') {
imputation_ch = PHASING.out.phased_ch

}

IMPUTATION(
PHASING.out.phased_ch
imputation_ch
)

ENCRYPTION(
Expand Down
2 changes: 1 addition & 1 deletion workflows/quality_control.nf
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ workflow QUALITY_CONTROL {
chunks_csv_index
.combine(chunks_vcf_index, by: 0)
.map{
row-> tuple(row[1], row[2], row[3], row[4], file(row[8]), row[6], row[7])
row-> tuple(row[1], row[2], row[3], row[4], file(row[8]))
}
.set { metafiles_ch }

Expand Down

0 comments on commit 1c0081c

Please sign in to comment.