nf-core · maxulysse · Mar 4, 2025 · Mar 3, 2025 · Mar 3, 2025 · Mar 3, 2025
diff --git a/.github/workflows/nf-test.yml b/.github/workflows/nf-test.yml
@@ -5,7 +5,9 @@ on:
       # https://docs.renovatebot.com/key-concepts/automerge/#branch-vs-pr-automerging
       - "renovate/**" # branches Renovate creates
   pull_request:
-    branches: [dev]
+    branches:
+      - dev
+      - dev_normalization #TODO: remove this when merging normalization
   workflow_dispatch:
     inputs:
       runners:

@@ -39,6 +39,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - [1803](https://github.com/nf-core/sarek/pull/1803) - Back to dev
 - [1806](https://github.com/nf-core/sarek/pull/1806) - Use `nft-vcf` for nf-test vcf assertions
 - [1814](https://github.com/nf-core/sarek/pull/1814) - Added link to Bluesky
+- [1815](https://github.com/nf-core/sarek/pull/1815) - Create nf-test pipeline pytest vcf concatenation + normalize tests
 
 #### Changed
 
@@ -47,6 +48,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - [1809](https://github.com/nf-core/sarek/pull/1809) - Replace `getReadsMD5()` by `readsMD5` from `nft-bam` plugin for more global cohesion with usage of `nft-vcf` plugin
 - [1810](https://github.com/nf-core/sarek/pull/1810) - Implement automatic sharding for nf-test tests
 - [1810](https://github.com/nf-core/sarek/pull/1810) - Skip all CI but linting on docs changes
+- [1815](https://github.com/nf-core/sarek/pull/1815) - Migrate pipeline pytest vcf normalize tests to nf-test
 
 #### Fixed
 

@@ -22,8 +22,8 @@ process {
         publishDir = [ enabled: false ]
     }
 
-    withName: 'ADD_INFO_TO_VCF'{
-        ext.when   = { params.concatenate_vcfs }
+    withName: 'ADD_INFO_TO_VCF' {
+        ext.when   = { params.concatenate_vcfs || params.normalize_vcfs }
         publishDir = [ enabled: false ]
     }
 
@@ -49,8 +49,8 @@ process {
 
     withName: 'VCFS_NORM' {
         ext.args = { [
-            '--multiallelics -both', //split multiallelic sites into biallelic records and both SNPs and indels should be merged separately into two records
-            '--rm-dup all'            //output only the first instance of a record which is present multiple times
+            '--multiallelics -both', // split multiallelic sites into biallelic records and both SNPs and indels should be merged separately into two records
+            '--rm-dup all'           // output only the first instance of a record which is present multiple times
         ].join(' ') }
         ext.when   = { params.normalize_vcfs }
         publishDir = [
@@ -62,9 +62,10 @@ process {
     withName: 'TABIX_EXT_VCF' {
         ext.prefix = { "${input.baseName}" }
         ext.when = { params.concatenate_vcfs || params.normalize_vcfs }
+        publishDir = [ enabled: false ]
     }
 
-    withName: 'TABIX_GERMLINE_VCFS_CONCAT_SORT'{
+    withName: 'TABIX_GERMLINE_VCFS_CONCAT_SORT' {
         ext.prefix = { "${meta.id}.germline" }
         ext.when   = { params.concatenate_vcfs }
         publishDir = [
@@ -74,7 +75,7 @@ process {
         ]
     }
 
-    withName: 'TABIX_VCFS_NORM_SORT'{
+    withName: 'TABIX_VCFS_NORM_SORT' {
         ext.prefix = { "${meta.id}.${meta.variantcaller}.norm" }
         ext.when   = { params.normalize_vcfs }
         publishDir = [

@@ -2,11 +2,10 @@
 // POST VARIANT CALLING: processes run on variantcalled but not annotated VCFs
 //
 
-include { CONCATENATE_GERMLINE_VCFS } from '../vcf_concatenate_germline/main'
-include { NORMALIZE_VCFS } from '../vcf_normalization/main'
+include { CONCATENATE_GERMLINE_VCFS } from '../vcf_concatenate_germline'
+include { NORMALIZE_VCFS            } from '../vcf_normalization'
 
 workflow POST_VARIANTCALLING {
-
     take:
     germline_vcfs
     tumor_only_vcfs
@@ -19,22 +18,26 @@ workflow POST_VARIANTCALLING {
     versions = Channel.empty()
     vcfs = Channel.empty()
 
-    if (concatenate_vcfs){
+    if (concatenate_vcfs) {
         CONCATENATE_GERMLINE_VCFS(germline_vcfs)
 
         vcfs = vcfs.mix(CONCATENATE_GERMLINE_VCFS.out.vcfs)
         versions = versions.mix(CONCATENATE_GERMLINE_VCFS.out.versions)
     }
 
-    if (normalize_vcfs){
+    if (normalize_vcfs) {
+        germline_vcfs.view { "germline:" + it }
+        tumor_only_vcfs.view { "tumor_only:" + it }
+        somatic_vcfs.view { "somatic:" + it }
+
         NORMALIZE_VCFS(germline_vcfs, tumor_only_vcfs, somatic_vcfs, fasta)
 
         vcfs = vcfs.mix(NORMALIZE_VCFS.out.vcfs)
+
         versions = versions.mix(NORMALIZE_VCFS.out.versions)
     }
 
     emit:
-    vcfs // post processed vcfs
-
+    vcfs     // post processed vcfs
     versions // channel: [ versions.yml ]
 }
@@ -1,15 +1,14 @@
 // Normalize all unannotated VCFs
 
 // Import modules
-include { ADD_INFO_TO_VCF  } from '../../../modules/local/add_info_to_vcf/main'
-include { TABIX_BGZIPTABIX as TABIX_EXT_VCF } from '../../../modules/nf-core/tabix/bgziptabix/main'
-include { BCFTOOLS_NORM as VCFS_NORM } from '../../../modules/nf-core/bcftools/norm/main'
-include { BCFTOOLS_SORT as VCFS_NORM_SORT } from '../../../modules/nf-core/bcftools/sort/main'
-include { TABIX_TABIX as TABIX_VCFS_NORM_SORT } from '../../../modules/nf-core/tabix/tabix/main'
+include { ADD_INFO_TO_VCF                     } from '../../../modules/local/add_info_to_vcf'
+include { TABIX_BGZIPTABIX as TABIX_EXT_VCF   } from '../../../modules/nf-core/tabix/bgziptabix'
+include { BCFTOOLS_NORM as VCFS_NORM          } from '../../../modules/nf-core/bcftools/norm'
+include { BCFTOOLS_SORT as VCFS_NORM_SORT     } from '../../../modules/nf-core/bcftools/sort'
+include { TABIX_TABIX as TABIX_VCFS_NORM_SORT } from '../../../modules/nf-core/tabix/tabix'
 
 // Workflow to normalize, compress, and index VCF files
 workflow NORMALIZE_VCFS {
-
     take:
     germline_vcfs
     tumor_only_vcfs
@@ -38,13 +37,12 @@ workflow NORMALIZE_VCFS {
 
     // Gather versions of all tools used
     versions = versions.mix(ADD_INFO_TO_VCF.out.versions)
-    versions = versions.mix(VCFS_NORM.out.versions)
     versions = versions.mix(TABIX_EXT_VCF.out.versions)
-    versions = versions.mix(VCFS_NORM_SORT.out.versions)
     versions = versions.mix(TABIX_VCFS_NORM_SORT.out.versions)
+    versions = versions.mix(VCFS_NORM.out.versions)
+    versions = versions.mix(VCFS_NORM_SORT.out.versions)
 
     emit:
-    vcfs = VCFS_NORM_SORT.out.vcf // normalized vcfs
+    vcfs     = VCFS_NORM_SORT.out.vcf // normalized vcfs
     versions // Channel: [versions.yml]
 }
-
@@ -341,40 +341,6 @@ tiddit:
   - tests/csv/3.0/recalibrated_tumoronly.csv
   - tests/test_tiddit.yml
 
-# postprocessing
-
-## normalize all vcfs
-normalize_vcfs:
-  - conf/modules/post_variant_calling.config
-  - modules/nf-core/bcftools/concat/**
-  - modules/nf-core/bcftools/mpileup/**
-  - modules/nf-core/bcftools/norm/**
-  - modules/nf-core/bcftools/sort/**
-  - modules/nf-core/deepvariant/**
-  - modules/nf-core/freebayes/**
-  - modules/nf-core/gatk4/haplotypecaller/**
-  - modules/nf-core/gatk4/mergevcfs/**
-  - modules/nf-core/manta/germline/**
-  - modules/nf-core/samtools/mpileup/**
-  - modules/nf-core/strelka/germline/**
-  - modules/nf-core/tabix/bgziptabix/**
-  - modules/nf-core/tabix/tabix/**
-  - modules/nf-core/tiddit/sv/**
-  - subworkflows/local/bam_variant_calling_deepvariant/**
-  - subworkflows/local/bam_variant_calling_freebayes/**
-  - subworkflows/local/bam_variant_calling_germline_all/**
-  - subworkflows/local/bam_variant_calling_germline_manta/**
-  - subworkflows/local/bam_variant_calling_haplotypecaller/**
-  - subworkflows/local/bam_variant_calling_mpileup/**
-  - subworkflows/local/bam_variant_calling_single_strelka/**
-  - subworkflows/local/bam_variant_calling_single_tiddit/**
-  - subworkflows/local/bam_variant_calling_somatic_all/**
-  - subworkflows/local/bam_variant_calling_tumor_only_all/**
-  - subworkflows/local/post_variantcalling/**
-  - subworkflows/local/vcf_normalization/**
-  - tests/csv/3.0/mapped_joint_bam.csv
-  - tests/test_normalize_vcfs.yml
-
 # sampleqc
 
 ## ngscheckmate

diff --git a/tests/concatenation.nf.test → tests/postprocess_concatenation.nf.test b/tests/concatenation.nf.test → tests/postprocess_concatenation.nf.test
diff --git a/tests/concatenation.nf.test.snap → tests/postprocess_concatenation.nf.test.snap b/tests/concatenation.nf.test.snap → tests/postprocess_concatenation.nf.test.snap
@@ -1,7 +1,7 @@
 {
     "-profile test --concatenate_vcfs --tools freebayes,haplotypecaller": {
         "content": [
-            51,
+            57,
             {
                 "ADD_INFO_TO_VCF": {
                     "gawk": "5.1.0"
@@ -27,6 +27,9 @@
                 "GERMLINE_VCFS_CONCAT": {
                     "bcftools": 1.2
                 },
+                "GERMLINE_VCFS_CONCAT_SORT": {
+                    "bcftools": 1.2
+                },
                 "TABIX_EXT_VCF": {
                     "tabix": 1.2
                 },
@@ -159,11 +162,9 @@
                 "variant_calling/concat/testN",
                 "variant_calling/concat/testN/testN.germline.vcf.gz",
                 "variant_calling/concat/testN/testN.germline.vcf.gz.tbi",
-                "variant_calling/concat/testN/versions.yml",
                 "variant_calling/concat/testT",
                 "variant_calling/concat/testT/testT.germline.vcf.gz",
                 "variant_calling/concat/testT/testT.germline.vcf.gz.tbi",
-                "variant_calling/concat/testT/versions.yml",
                 "variant_calling/freebayes",
                 "variant_calling/freebayes/testN",
                 "variant_calling/freebayes/testN/testN.freebayes.vcf.gz",
@@ -217,18 +218,16 @@
                 "testN.haplotypecaller.filtered.FILTER.summary:md5,4e2ceea7f3ff998004691fd71192d9ee",
                 "testN.haplotypecaller.filtered.TsTv.count:md5,b77c120ee5cc0423267200c67d60c663",
                 "testT.haplotypecaller.filtered.FILTER.summary:md5,bedf7a690b985e8ff74eef9f4c1a8c34",
-                "testT.haplotypecaller.filtered.TsTv.count:md5,803d74e40f7716202bae2a3a81c1ddfc",
-                "versions.yml:md5,69c573f208c12410e6a0ec7059a0f24e",
-                "versions.yml:md5,69c573f208c12410e6a0ec7059a0f24e"
+                "testT.haplotypecaller.filtered.TsTv.count:md5,803d74e40f7716202bae2a3a81c1ddfc"
             ],
             [
                 [
                     "testN.germline.vcf.gz",
-                    "722f25aa13e7b6f34e8fff98202495cb"
+                    "141a2cc53b0a9d4a0ab4d779cb1e487"
                 ],
                 [
                     "testT.germline.vcf.gz",
-                    "ba2aba068afc37bf3a973b188b2c4bcc"
+                    "9119ee2c970c37729dcb35d4b88f9d10"
                 ],
                 [
                     "testN.freebayes.vcf.gz",
@@ -258,8 +257,8 @@
         ],
         "meta": {
             "nf-test": "0.9.2",
-            "nextflow": "24.10.4"
+            "nextflow": "25.01.0"
         },
-        "timestamp": "2025-02-24T18:36:32.697622794"
+        "timestamp": "2025-03-03T18:24:54.468039835"
     }
 }
diff --git a/tests/postprocess_concatenation_normalization.nf.test b/tests/postprocess_concatenation_normalization.nf.test
@@ -0,0 +1,47 @@
+nextflow_pipeline {
+
+    name "Test pipeline"
+    script "../main.nf"
+    tag "pipeline"
+    tag "pipeline_sarek"
+
+    test("-profile test --normalize_vcfs --concatenate_vcfs --tools freebayes,haplotypecaller") {
+
+        when {
+            params {
+
+                input = "${projectDir}/tests/csv/3.0/mapped_joint_bam.csv"
+                step = 'variant_calling'
+                normalize_vcfs = true
+                concatenate_vcfs = true
+                tools = 'freebayes,haplotypecaller'
+                modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/'
+                outdir = "$outputDir"
+            }
+        }
+
+        then {
+            // stable_name: All files + folders in ${params.outdir}/ with a stable name
+            def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}'])
+            // stable_path: All files in ${params.outdir}/ with stable content
+            def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore')
+            // vcf_files: All vcf files
+            def vcf_files  = getAllFilesFromDir(params.outdir, include: ['**/*.vcf.gz'])
+            assertAll(
+                { assert workflow.success},
+                { assert snapshot(
+                    // Number of successful tasks
+                    workflow.trace.succeeded().size(),
+                    // pipeline versions.yml file for multiqc from which Nextflow version is removed because we tests pipelines on multiple Nextflow versions
+                    removeNextflowVersion("$outputDir/pipeline_info/nf_core_sarek_software_mqc_versions.yml"),
+                    // All stable path name, with a relative path
+                    stable_name,
+                    // All files with stable contents
+                    stable_path,
+                    // All vcf files
+                    vcf_files.collect { file -> [file.getName(), path(file.toString()).vcf.variantsMD5] }
+                ).match() }
+            )
+        }
+    }
+}