From f8b0d2b5998e81fa4a0cf56db781caa3902fd4ee Mon Sep 17 00:00:00 2001 From: Judith Ballesteros <judith.vballesteros@gmail.com> Date: Fri, 28 Feb 2025 11:12:58 +0100 Subject: [PATCH 01/24] adding modern test --- conf/test_modern.config | 57 +++++++++++++++++++++++++++++++++++++++++ nextflow.config | 1 + 2 files changed, 58 insertions(+) create mode 100644 conf/test_modern.config diff --git a/conf/test_modern.config b/conf/test_modern.config new file mode 100644 index 00000000..53ff7f72 --- /dev/null +++ b/conf/test_modern.config @@ -0,0 +1,57 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/eager -profile test_modern,<docker/singularity> --outdir <OUTDIR> + +---------------------------------------------------------------------------------------- +*/ + +process { + resourceLimits = [ + cpus: 4, + memory: '15.GB', + time: '1.h' + ] +} + +params { + config_profile_name = 'Test modern profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Input data + // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets + // TODO nf-core: Give any required params for the test so that command line flags are not needed + input = params.pipelines_testdata_base_path + 'eager/testdata/Human/human_design_bam_eager3.tsv' + + // Genome references + fasta = params.pipelines_testdata_base_path + 'eager/reference/Human/hs37d5_chr21-MT.fa.gz' + + + // Preprocessing + sequencing_qc_tool = 'falco' + preprocessing_tool = 'fastp' + convert_inputbam = true + + // Mapping + mapping_tool = 'bwamem' + + // BAM filtering + run_bamfiltering = true + bamfiltering_minreadlength = 30 + bamfiltering_mappingquality = 37 + + // Metagenomics + run_metagenomics = true + metagenomics_complexity_tool = 'prinseq' + metagenomics_profiling_tool = 'kraken2' + metagenomics_profiling_database = params.pipelines_testdata_base_path + 'eager/databases/kraken/eager_test.tar.gz' + metagenomics_run_postprocessing = true + + // Genotyping + genotyping_tool = 'hc' + +} diff --git a/nextflow.config b/nextflow.config index a5d4f694..d6cee232 100644 --- a/nextflow.config +++ b/nextflow.config @@ -393,6 +393,7 @@ profiles { test_malt { includeConfig 'conf/test_malt.config' } test_krakenuniq { includeConfig 'conf/test_krakenuniq.config'} test_metaphlan { includeConfig 'conf/test_metaphlan.config' } + test_modern { includeConfig 'conf/test_modern.config' } } // Load nf-core custom profiles from different Institutions From e44842f0c048f7f90ada1fb2c78a1c18618d5322 Mon Sep 17 00:00:00 2001 From: Judith Ballesteros <judith.vballesteros@gmail.com> Date: Fri, 28 Feb 2025 11:57:43 +0100 Subject: [PATCH 02/24] adding microbial genome test, but missing samplesheet dsl2 to test --- conf/test_microbial.config | 57 ++++++++++++++++++++++++++++++++++++++ conf/test_modern.config | 2 -- nextflow.config | 3 +- 3 files changed, 59 insertions(+), 3 deletions(-) create mode 100644 conf/test_microbial.config diff --git a/conf/test_microbial.config b/conf/test_microbial.config new file mode 100644 index 00000000..48842532 --- /dev/null +++ b/conf/test_microbial.config @@ -0,0 +1,57 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/eager -profile test_microbial,<docker/singularity> --outdir <OUTDIR> + +---------------------------------------------------------------------------------------- +*/ + +process { + resourceLimits = [ + cpus: 4, + memory: '15.GB', + time: '1.h' + ] +} + +params { + config_profile_name = 'Test microbial profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Input data + input = params.pipelines_testdata_base_path + 'eager/testdata/Mammoth/samplesheet_PE_only.tsv' + + // Genome references + fasta_sheet = params.pipelines_testdata_base_path + 'eager/reference/reference_sheet_multiref.csv' + + + // Preprocessing + sequencing_qc_tool = 'falco' + + // Mapping + mapping_tool = 'circularmapper' + + // BAM filtering + deduplication_tool = "dedup" + run_bamfiltering = true + bamfiltering_minreadlength = 30 + bamfiltering_mappingquality = 37 + + // Metagenomics + run_metagenomics = true + metagenomics_profiling_tool = 'krakenuniq' + metagenomics_profiling_database = params.pipelines_testdata_base_path + 'eager/databases/krakenuniq/testdb-krakenuniq.tar.gz' + + // Manioulate Damage + run_mapdamage_rescaling = true + run_pmd_filtering = true + + // Genotyping + genotyping_tool = 'freebayes' + + +} diff --git a/conf/test_modern.config b/conf/test_modern.config index 53ff7f72..5add473c 100644 --- a/conf/test_modern.config +++ b/conf/test_modern.config @@ -23,8 +23,6 @@ params { config_profile_description = 'Minimal test dataset to check pipeline function' // Input data - // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets - // TODO nf-core: Give any required params for the test so that command line flags are not needed input = params.pipelines_testdata_base_path + 'eager/testdata/Human/human_design_bam_eager3.tsv' // Genome references diff --git a/nextflow.config b/nextflow.config index d6cee232..45b23490 100644 --- a/nextflow.config +++ b/nextflow.config @@ -393,7 +393,8 @@ profiles { test_malt { includeConfig 'conf/test_malt.config' } test_krakenuniq { includeConfig 'conf/test_krakenuniq.config'} test_metaphlan { includeConfig 'conf/test_metaphlan.config' } - test_modern { includeConfig 'conf/test_modern.config' } + test_modern { includeConfig 'conf/test_modern.config' } + test_microbial { includeConfig 'conf/test_microbial.config' } } // Load nf-core custom profiles from different Institutions From 24cc1093620b67c91eb802deb442d799fd90f8ab Mon Sep 17 00:00:00 2001 From: Judith Ballesteros <judith.vballesteros@gmail.com> Date: Fri, 7 Mar 2025 10:39:50 +0100 Subject: [PATCH 03/24] adding short dna, still need the ncbi dir to be set --- conf/test_shortdna.config | 56 +++++++++++++++++++++++++++++++++++++++ nextflow.config | 1 + 2 files changed, 57 insertions(+) create mode 100644 conf/test_shortdna.config diff --git a/conf/test_shortdna.config b/conf/test_shortdna.config new file mode 100644 index 00000000..c1079fdd --- /dev/null +++ b/conf/test_shortdna.config @@ -0,0 +1,56 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/eager -profile test_shortdna,<docker/singularity> --outdir <OUTDIR> + +---------------------------------------------------------------------------------------- +*/ + +process { + resourceLimits = [ + cpus: 4, + memory: '15.GB', + time: '1.h' + ] +} + +params { + config_profile_name = 'Test very short DNA profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Input data + input = params.pipelines_testdata_base_path + 'eager/testdata/Mammoth/samplesheet_v3.tsv' + + // Genome references + fasta_sheet = params.pipelines_testdata_base_path + 'eager/reference/reference_sheet_multiref.csv' + + + // Preprocessing + sequencing_qc_tool = 'falco' + + // Mapping + // TO DO: Change when mapAD is there. + // mapping_tool = 'mapad' + + + // Metagenomics + run_metagenomics = true + metagenomics_complexity_tool = 'bbduk' + metagenomics_profiling_tool = 'malt' + metagenomics_profiling_database = params.pipelines_testdata_base_path + '/eager/databases/malt/eager_test.tar.gz' + metagenomics_run_postprocessing = true + metagenomics_maltextract_taxonlist = params.pipelines_testdata_base_path + '/eager/testdata/Mammoth/maltextract/MaltExtract_list.txt' + metagenomics_maltextract_ncbidir = 'https://github.com/rhuebler/HOPS/raw/external/Resources/' + + // Manioulate Damage + run_pmd_filtering = true + + // Genotyping + genotyping_tool = 'angsd' + + +} diff --git a/nextflow.config b/nextflow.config index 45b23490..1e96ef98 100644 --- a/nextflow.config +++ b/nextflow.config @@ -395,6 +395,7 @@ profiles { test_metaphlan { includeConfig 'conf/test_metaphlan.config' } test_modern { includeConfig 'conf/test_modern.config' } test_microbial { includeConfig 'conf/test_microbial.config' } + test_shortdna { includeConfig 'conf/test_shortdna.config' } } // Load nf-core custom profiles from different Institutions From ead8adaa77eb37d0cb5c15e9dd8b2e1ade42431d Mon Sep 17 00:00:00 2001 From: Judith Ballesteros <judith.vballesteros@gmail.com> Date: Fri, 7 Mar 2025 11:09:55 +0100 Subject: [PATCH 04/24] adding test_humanpopgen --- conf/test_humanpopgen.config | 57 ++++++++++++++++++++++++++++++++++++ nextflow.config | 25 ++++++++-------- 2 files changed, 70 insertions(+), 12 deletions(-) create mode 100644 conf/test_humanpopgen.config diff --git a/conf/test_humanpopgen.config b/conf/test_humanpopgen.config new file mode 100644 index 00000000..e3a0da7f --- /dev/null +++ b/conf/test_humanpopgen.config @@ -0,0 +1,57 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/eager -profile test_humanpopgen,<docker/singularity> --outdir <OUTDIR> + +---------------------------------------------------------------------------------------- +*/ + +process { + resourceLimits = [ + cpus: 4, + memory: '15.GB', + time: '1.h' + ] +} + +params { + config_profile_name = 'Test human popgen profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Input data + input = params.pipelines_testdata_base_path + 'eager/testdata/Human/human_design_bam_eager3.tsv' + + // Genome references + fasta = params.pipelines_testdata_base_path + 'eager/reference/Human/hs37d5_chr21-MT.fa.gz' + + // Mapping + mapping_tool = 'bowtie2' + + // BAM filtering + run_bamfiltering = true + bamfiltering_minreadlength = 30 + bamfiltering_mappingquality = 37 + + // Damage + damagecalculation_tool = 'mapdamage' + run_mapdamage_rescaling = true + run_trim_bam = true + + // Contamination + run_mtnucratio = true + run_contamination_estimation_angsd = true + + // Genotyping + genotyping_tool = 'pileupcaller' + genotyping_pileupcaller_bedfile = params.pipelines_testdata_base_path + 'eager/reference/Human/1240K.pos.list_hs37d5.0based.bed.gz' + genotyping_pileupcaller_snpfile = params.pipelines_testdata_base_path + 'eager/reference/Human/1240K_covered_in_JK2067_downsampled_s0.1.numeric_chromosomes.snp' + + //Sex Determination + run_sexdeterrmine = true + sexdeterrmine_bedfile = params.pipelines_testdata_base_path + 'eager/reference/Human/1240K.pos.list_hs37d5.0based.bed.gz' + +} diff --git a/nextflow.config b/nextflow.config index 1e96ef98..0762b42d 100644 --- a/nextflow.config +++ b/nextflow.config @@ -384,18 +384,19 @@ profiles { test_full { includeConfig 'conf/test_full.config' } - test { includeConfig 'conf/test.config' } - test_full { includeConfig 'conf/test_full.config' } - test_nothing { includeConfig 'conf/test_nothing.config' } - test_humanbam { includeConfig 'conf/test_humanbam.config' } - test_multiref { includeConfig 'conf/test_multiref.config' } - test_kraken2 { includeConfig 'conf/test_kraken2.config' } - test_malt { includeConfig 'conf/test_malt.config' } - test_krakenuniq { includeConfig 'conf/test_krakenuniq.config'} - test_metaphlan { includeConfig 'conf/test_metaphlan.config' } - test_modern { includeConfig 'conf/test_modern.config' } - test_microbial { includeConfig 'conf/test_microbial.config' } - test_shortdna { includeConfig 'conf/test_shortdna.config' } + test { includeConfig 'conf/test.config' } + test_full { includeConfig 'conf/test_full.config' } + test_nothing { includeConfig 'conf/test_nothing.config' } + test_humanbam { includeConfig 'conf/test_humanbam.config' } + test_multiref { includeConfig 'conf/test_multiref.config' } + test_kraken2 { includeConfig 'conf/test_kraken2.config' } + test_malt { includeConfig 'conf/test_malt.config' } + test_krakenuniq { includeConfig 'conf/test_krakenuniq.config' } + test_metaphlan { includeConfig 'conf/test_metaphlan.config' } + test_modern { includeConfig 'conf/test_modern.config' } + test_microbial { includeConfig 'conf/test_microbial.config' } + test_shortdna { includeConfig 'conf/test_shortdna.config' } + test_humanpopgen { includeConfig 'conf/test_humanpopgen.config' } } // Load nf-core custom profiles from different Institutions From 66aafdf4bbe1026dc0e06ff014caeb4d78bb68aa Mon Sep 17 00:00:00 2001 From: Judith Ballesteros <judith.vballesteros@gmail.com> Date: Fri, 7 Mar 2025 11:33:29 +0100 Subject: [PATCH 05/24] adding test default --- conf/test_default.config | 67 ++++++++++++++++++++++++++++++++++++++++ nextflow.config | 1 + 2 files changed, 68 insertions(+) create mode 100644 conf/test_default.config diff --git a/conf/test_default.config b/conf/test_default.config new file mode 100644 index 00000000..39a141ea --- /dev/null +++ b/conf/test_default.config @@ -0,0 +1,67 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/eager -profile test_default,<docker/singularity> --outdir <OUTDIR> + +---------------------------------------------------------------------------------------- +*/ + +process { + resourceLimits = [ + cpus: 4, + memory: '15.GB', + time: '1.h' + ] +} + +// TO DO: Change name to test.config once migration is complete. +params { + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Input data + input = params.pipelines_testdata_base_path + 'eager/testdata/Mammoth/samplesheet_v3.tsv' + + // Genome references + fasta = params.pipelines_testdata_base_path + 'eager/reference/Mammoth/Mammoth_MT_Krause.fasta' + + // Preprocessing + preprocessing_tool = 'adapterremoval' + + // Sharding FASTQ + run_fastq_sharding = true + fastq_shard_size = 5000 + + // Mapping + mapping_tool = 'bwaaln' + skip_qualimap = false + + // BAM filtering + run_bamfiltering = true + bamfiltering_minreadlength = 30 + bamfiltering_mappingquality = 37 + deduplication_tool = 'markduplicates' + + // PreSeq + mapstats_preseq_mode = 'c_curve' + + // Damage calculation + damagecalculation_tool = 'damageprofiler' + + // Genotyping + genotyping_tool = 'ug' + + // Map Stats + run_bedtools_coverage = true + mapstats_bedtools_featurefile = params.pipelines_testdata_base_path + 'eager/reference/Mammoth/Mammoth_MT_Krause.gff3' + + // Metagenomic screening + run_metagenomics = true + metagenomics_profiling_tool = 'metaphlan' + metagenomics_profiling_database = params.pipelines_testdata_base_path + 'eager/databases/metaphlan/metaphlan4_database.tar.gz' + metagenomics_run_postprocessing = true +} diff --git a/nextflow.config b/nextflow.config index 0762b42d..1d565018 100644 --- a/nextflow.config +++ b/nextflow.config @@ -393,6 +393,7 @@ profiles { test_malt { includeConfig 'conf/test_malt.config' } test_krakenuniq { includeConfig 'conf/test_krakenuniq.config' } test_metaphlan { includeConfig 'conf/test_metaphlan.config' } + test_default { includeConfig 'conf/test_default.config' } test_modern { includeConfig 'conf/test_modern.config' } test_microbial { includeConfig 'conf/test_microbial.config' } test_shortdna { includeConfig 'conf/test_shortdna.config' } From 2689873ed3ef43f8760e6d489d6bca4af5c1ab24 Mon Sep 17 00:00:00 2001 From: Judith Ballesteros <judith.vballesteros@gmail.com> Date: Fri, 7 Mar 2025 11:59:33 +0100 Subject: [PATCH 06/24] minor updates --- conf/test_default.config | 4 ---- conf/test_humanpopgen.config | 2 +- conf/test_microbial.config | 4 ++-- conf/test_modern.config | 5 ----- conf/test_shortdna.config | 5 ----- 5 files changed, 3 insertions(+), 17 deletions(-) diff --git a/conf/test_default.config b/conf/test_default.config index 39a141ea..fbd211c5 100644 --- a/conf/test_default.config +++ b/conf/test_default.config @@ -55,10 +55,6 @@ params { // Genotyping genotyping_tool = 'ug' - // Map Stats - run_bedtools_coverage = true - mapstats_bedtools_featurefile = params.pipelines_testdata_base_path + 'eager/reference/Mammoth/Mammoth_MT_Krause.gff3' - // Metagenomic screening run_metagenomics = true metagenomics_profiling_tool = 'metaphlan' diff --git a/conf/test_humanpopgen.config b/conf/test_humanpopgen.config index e3a0da7f..da2efa16 100644 --- a/conf/test_humanpopgen.config +++ b/conf/test_humanpopgen.config @@ -30,6 +30,7 @@ params { // Mapping mapping_tool = 'bowtie2' + convert_inputbam = true // BAM filtering run_bamfiltering = true @@ -38,7 +39,6 @@ params { // Damage damagecalculation_tool = 'mapdamage' - run_mapdamage_rescaling = true run_trim_bam = true // Contamination diff --git a/conf/test_microbial.config b/conf/test_microbial.config index 48842532..0c25d5d5 100644 --- a/conf/test_microbial.config +++ b/conf/test_microbial.config @@ -45,10 +45,10 @@ params { run_metagenomics = true metagenomics_profiling_tool = 'krakenuniq' metagenomics_profiling_database = params.pipelines_testdata_base_path + 'eager/databases/krakenuniq/testdb-krakenuniq.tar.gz' + run_host_removal = true - // Manioulate Damage + // Manipulate Damage run_mapdamage_rescaling = true - run_pmd_filtering = true // Genotyping genotyping_tool = 'freebayes' diff --git a/conf/test_modern.config b/conf/test_modern.config index 5add473c..beee31c3 100644 --- a/conf/test_modern.config +++ b/conf/test_modern.config @@ -37,11 +37,6 @@ params { // Mapping mapping_tool = 'bwamem' - // BAM filtering - run_bamfiltering = true - bamfiltering_minreadlength = 30 - bamfiltering_mappingquality = 37 - // Metagenomics run_metagenomics = true metagenomics_complexity_tool = 'prinseq' diff --git a/conf/test_shortdna.config b/conf/test_shortdna.config index c1079fdd..672cb7f3 100644 --- a/conf/test_shortdna.config +++ b/conf/test_shortdna.config @@ -28,15 +28,10 @@ params { // Genome references fasta_sheet = params.pipelines_testdata_base_path + 'eager/reference/reference_sheet_multiref.csv' - - // Preprocessing - sequencing_qc_tool = 'falco' - // Mapping // TO DO: Change when mapAD is there. // mapping_tool = 'mapad' - // Metagenomics run_metagenomics = true metagenomics_complexity_tool = 'bbduk' From 981cd92d51108f49024a11934aff6016a798bfc5 Mon Sep 17 00:00:00 2001 From: Judith Ballesteros <judith.vballesteros@gmail.com> Date: Fri, 14 Mar 2025 11:57:57 +0100 Subject: [PATCH 07/24] finnished v1 test configs --- conf/test_default.config | 1 + conf/test_humanpopgen.config | 5 +++++ subworkflows/local/utils_nfcore_eager_pipeline/main.nf | 2 +- 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/conf/test_default.config b/conf/test_default.config index fbd211c5..a7403317 100644 --- a/conf/test_default.config +++ b/conf/test_default.config @@ -51,6 +51,7 @@ params { // Damage calculation damagecalculation_tool = 'damageprofiler' + skip_qualimap = false // Genotyping genotyping_tool = 'ug' diff --git a/conf/test_humanpopgen.config b/conf/test_humanpopgen.config index da2efa16..bd55ae97 100644 --- a/conf/test_humanpopgen.config +++ b/conf/test_humanpopgen.config @@ -16,6 +16,11 @@ process { memory: '15.GB', time: '1.h' ] + + // To avoid pipeline failure due to not having X reads and to not have overcrowded datasets in the test + withName: ANGSD_CONTAMINATION { + errorStrategy = { task.exitStatus in [134] ? 'ignore' : 'finish' } + } } params { diff --git a/subworkflows/local/utils_nfcore_eager_pipeline/main.nf b/subworkflows/local/utils_nfcore_eager_pipeline/main.nf index 0fcf7be6..5cfe596f 100644 --- a/subworkflows/local/utils_nfcore_eager_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_eager_pipeline/main.nf @@ -215,7 +215,7 @@ def validateInputParameters() { if ( !params.fasta && !params.fasta_sheet ) { exit 1, "[nf-core/eager] ERROR: Neither FASTA file --fasta nor reference sheet --fasta_sheet have been provided."} if ( params.fasta && params.fasta_sheet ) { exit 1, "[nf-core/eager] ERROR: A FASTA file --fasta and a reference sheet --fasta_sheet have been provided. These parameters are mutually exclusive."} if ( params.preprocessing_adapterlist && params.preprocessing_skipadaptertrim ) { log.warn("[nf-core/eager] WARNING: --preprocessing_skipadaptertrim will override --preprocessing_adapterlist. Adapter trimming will be skipped!") } - if ( params.deduplication_tool == 'dedup' && ! params.preprocessing_excludeunmerged ) { exit 1, "[nf-core/eager] ERROR: Dedup can only be used on collapsed (i.e. merged) PE reads. For all other cases, please set --deduplication_tool to 'markduplicates'."} + if ( params.deduplication_tool == 'dedup' && ! params.preprocessing_excludeunmerged ) { exit 1, "[nf-core/eager] ERROR: Dedup can only be used on collapsed (i.e. merged) PE reads without singletons. If you want to use Dedup, please provide --preprocessing_excludeunmerged. For all other cases, please set --deduplication_tool to 'markduplicates'."} if ( params.bamfiltering_retainunmappedgenomicbam && params.bamfiltering_mappingquality > 0 ) { exit 1, ("[nf-core/eager] ERROR: You cannot both retain unmapped reads and perform quality filtering, as unmapped reads have a mapping quality of 0. Pick one or the other functionality.") } if ( params.genotyping_source == 'trimmed' && ! params.run_trim_bam ) { exit 1, ("[nf-core/eager] ERROR: --genotyping_source cannot be 'trimmed' unless BAM trimming is turned on with `--run_trim_bam`.") } if ( params.genotyping_source == 'pmd' && ! params.run_pmd_filtering ) { exit 1, ("[nf-core/eager] ERROR: --genotyping_source cannot be 'pmd' unless PMD-filtering is ran.") } From 3597d6f441569c07e031bf90d2e46d61dd279c80 Mon Sep 17 00:00:00 2001 From: Judith Ballesteros <judith.vballesteros@gmail.com> Date: Fri, 14 Mar 2025 11:58:50 +0100 Subject: [PATCH 08/24] finnished but error with mapDamage --rescale. Mixing different references --- conf/test_microbial.config | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/conf/test_microbial.config b/conf/test_microbial.config index 0c25d5d5..269c4fc7 100644 --- a/conf/test_microbial.config +++ b/conf/test_microbial.config @@ -23,7 +23,7 @@ params { config_profile_description = 'Minimal test dataset to check pipeline function' // Input data - input = params.pipelines_testdata_base_path + 'eager/testdata/Mammoth/samplesheet_PE_only.tsv' + input = params.pipelines_testdata_base_path + 'eager/testdata/Mammoth/samplesheet_PE_only_v3.tsv' // Genome references fasta_sheet = params.pipelines_testdata_base_path + 'eager/reference/reference_sheet_multiref.csv' @@ -31,6 +31,7 @@ params { // Preprocessing sequencing_qc_tool = 'falco' + preprocessing_excludeunmerged = true // Mapping mapping_tool = 'circularmapper' From 67bde36da42d0ad3191e99476672a02e569a4200 Mon Sep 17 00:00:00 2001 From: Judith Ballesteros <judith.vballesteros@gmail.com> Date: Mon, 24 Mar 2025 14:14:39 +0100 Subject: [PATCH 09/24] removing old profiles --- conf/test_default.config | 64 ------------------------------------- conf/test_humanbam.config | 55 ------------------------------- conf/test_kraken2.config | 35 -------------------- conf/test_krakenuniq.config | 36 --------------------- conf/test_malt.config | 36 --------------------- conf/test_metaphlan.config | 37 --------------------- conf/test_multiref.config | 39 ---------------------- conf/test_nothing.config | 48 ---------------------------- 8 files changed, 350 deletions(-) delete mode 100644 conf/test_default.config delete mode 100644 conf/test_humanbam.config delete mode 100644 conf/test_kraken2.config delete mode 100644 conf/test_krakenuniq.config delete mode 100644 conf/test_malt.config delete mode 100644 conf/test_metaphlan.config delete mode 100644 conf/test_multiref.config delete mode 100644 conf/test_nothing.config diff --git a/conf/test_default.config b/conf/test_default.config deleted file mode 100644 index a7403317..00000000 --- a/conf/test_default.config +++ /dev/null @@ -1,64 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Nextflow config file for running minimal tests -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Defines input files and everything required to run a fast and simple pipeline test. - - Use as follows: - nextflow run nf-core/eager -profile test_default,<docker/singularity> --outdir <OUTDIR> - ----------------------------------------------------------------------------------------- -*/ - -process { - resourceLimits = [ - cpus: 4, - memory: '15.GB', - time: '1.h' - ] -} - -// TO DO: Change name to test.config once migration is complete. -params { - config_profile_name = 'Test profile' - config_profile_description = 'Minimal test dataset to check pipeline function' - - // Input data - input = params.pipelines_testdata_base_path + 'eager/testdata/Mammoth/samplesheet_v3.tsv' - - // Genome references - fasta = params.pipelines_testdata_base_path + 'eager/reference/Mammoth/Mammoth_MT_Krause.fasta' - - // Preprocessing - preprocessing_tool = 'adapterremoval' - - // Sharding FASTQ - run_fastq_sharding = true - fastq_shard_size = 5000 - - // Mapping - mapping_tool = 'bwaaln' - skip_qualimap = false - - // BAM filtering - run_bamfiltering = true - bamfiltering_minreadlength = 30 - bamfiltering_mappingquality = 37 - deduplication_tool = 'markduplicates' - - // PreSeq - mapstats_preseq_mode = 'c_curve' - - // Damage calculation - damagecalculation_tool = 'damageprofiler' - skip_qualimap = false - - // Genotyping - genotyping_tool = 'ug' - - // Metagenomic screening - run_metagenomics = true - metagenomics_profiling_tool = 'metaphlan' - metagenomics_profiling_database = params.pipelines_testdata_base_path + 'eager/databases/metaphlan/metaphlan4_database.tar.gz' - metagenomics_run_postprocessing = true -} diff --git a/conf/test_humanbam.config b/conf/test_humanbam.config deleted file mode 100644 index 21c35179..00000000 --- a/conf/test_humanbam.config +++ /dev/null @@ -1,55 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Nextflow config file for running minimal tests -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Defines input files and everything required to run a fast and simple pipeline test. - - Use as follows: - nextflow run nf-core/eager -profile test_tsv_humanbam,<docker/singularity> --outdir <OUTDIR> - ----------------------------------------------------------------------------------------- -*/ - -process { - resourceLimits = [ - cpus: 4, - memory: '15.GB', - time: '1.h' - ] -} - -params { - config_profile_name = 'Human BAM test profile' - config_profile_description = 'Minimal test dataset to check pipeline function' - - // Input data - // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets - // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = params.pipelines_testdata_base_path + 'eager/testdata/Human/human_design_bam_eager3.tsv' - - // Genome references - fasta = params.pipelines_testdata_base_path + 'eager/reference/Human/hs37d5_chr21-MT.fa.gz' - - // Contamination estimation - contamination_estimation_angsd_mapq = 0 - contamination_estimation_angsd_minq = 0 - - // Qualimap - snpcapture_bed = params.pipelines_testdata_base_path + 'eager/reference/Human/1240K.pos.list_hs37d5.0based.bed.gz' - - // TODO Reactivate sexDet and genotyping params when those steps get implemented. - // //Sex Determination - sexdeterrmine_bedfile = params.pipelines_testdata_base_path + 'eager/reference/Human/1240K.pos.list_hs37d5.0based.bed.gz' - // // Genotyping - genotyping_pileupcaller_bedfile = params.pipelines_testdata_base_path + 'eager/reference/Human/1240K.pos.list_hs37d5.0based.bed.gz' - genotyping_pileupcaller_snpfile = params.pipelines_testdata_base_path + 'eager/reference/Human/1240K_covered_in_JK2067_downsampled_s0.1.numeric_chromosomes.snp' - - - // BAM filtering - run_bamfiltering = true - bamfiltering_minreadlength = 30 - bamfiltering_mappingquality = 37 - - // Metagenomic screening - run_metagenomics = false -} diff --git a/conf/test_kraken2.config b/conf/test_kraken2.config deleted file mode 100644 index b976d179..00000000 --- a/conf/test_kraken2.config +++ /dev/null @@ -1,35 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Nextflow config file for running minimal tests -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Defines input files and everything required to run a fast and simple pipeline test for - metagenomics krakenuniq. - - Use as follows: - nextflow run nf-core/eager -profile test_krakenuniq,<docker/singularity> --outdir <OUTDIR> - ----------------------------------------------------------------------------------------- -*/ - -process { - resourceLimits = [ - cpus: 4, - memory: '15.GB', - time: '1.h' - ] -} -params { - config_profile_name = 'Kraken2 test profile' - config_profile_description = 'Minimal test dataset to check the metagenomics kraken2 pipeline function' - - // Input data - input = params.pipelines_testdata_base_path + 'eager/testdata/Mammoth/samplesheet_v3.tsv' - - // Genome references - fasta = params.pipelines_testdata_base_path + 'eager/reference/Mammoth/Mammoth_MT_Krause.fasta' - - // Metagenomics - run_metagenomics = true - metagenomics_profiling_tool = 'kraken2' - metagenomics_profiling_database = params.pipelines_testdata_base_path + 'eager/databases/kraken/eager_test.tar.gz' -} diff --git a/conf/test_krakenuniq.config b/conf/test_krakenuniq.config deleted file mode 100644 index 5b25c1d6..00000000 --- a/conf/test_krakenuniq.config +++ /dev/null @@ -1,36 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Nextflow config file for running minimal tests -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Defines input files and everything required to run a fast and simple pipeline test for - metagenomics krakenuniq. - - Use as follows: - nextflow run nf-core/eager -profile test_krakenuniq,<docker/singularity> --outdir <OUTDIR> - ----------------------------------------------------------------------------------------- -*/ - -process { - resourceLimits = [ - cpus: 4, - memory: '15.GB', - time: '1.h' - ] -} - -params { - config_profile_name = 'KrakenUniq test profile' - config_profile_description = 'Minimal test dataset to check the metagenomics krakenuniq pipeline function' - - // Input data - input = params.pipelines_testdata_base_path + 'eager/testdata/Mammoth/samplesheet_v3.tsv' - - // Genome references - fasta = params.pipelines_testdata_base_path + 'eager/reference/Mammoth/Mammoth_MT_Krause.fasta' - - // Metagenomics - run_metagenomics = true - metagenomics_profiling_tool = 'krakenuniq' - metagenomics_profiling_database = params.pipelines_testdata_base_path + 'eager/databases/krakenuniq/testdb-krakenuniq.tar.gz' -} diff --git a/conf/test_malt.config b/conf/test_malt.config deleted file mode 100644 index 43b905cc..00000000 --- a/conf/test_malt.config +++ /dev/null @@ -1,36 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Nextflow config file for running minimal tests -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Defines input files and everything required to run a fast and simple pipeline test for - metagenomics malt. - - Use as follows: - nextflow run nf-core/eager -profile test_malt,<docker/singularity> --outdir <OUTDIR> - ----------------------------------------------------------------------------------------- -*/ - -process { - resourceLimits = [ - cpus: 4, - memory: '15.GB', - time: '1.h' - ] -} - -params { - config_profile_name = 'MALT test profile' - config_profile_description = 'Minimal test dataset to check the metagenomics MALT pipeline function' - - // Input data - input = params.pipelines_testdata_base_path + 'eager/testdata/Mammoth/samplesheet_v3.tsv' - - // Genome references - fasta = params.pipelines_testdata_base_path + 'eager/reference/Mammoth/Mammoth_MT_Krause.fasta' - - // Metagenomics - run_metagenomics = true - metagenomics_profiling_tool = 'malt' - metagenomics_profiling_database = params.pipelines_testdata_base_path + '/eager/databases/malt/eager_test.tar.gz' -} diff --git a/conf/test_metaphlan.config b/conf/test_metaphlan.config deleted file mode 100644 index 85343cb7..00000000 --- a/conf/test_metaphlan.config +++ /dev/null @@ -1,37 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Nextflow config file for running minimal tests -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Defines input files and everything required to run a fast and simple pipeline test for - metagenomics krakenuniq. - - Use as follows: - nextflow run nf-core/eager -profile test_krakenuniq,<docker/singularity> --outdir <OUTDIR> - ----------------------------------------------------------------------------------------- -*/ - -process { - resourceLimits = [ - cpus: 4, - memory: '15.GB', - time: '1.h' - ] -} - -params { - config_profile_name = 'MetaPhlAn4 test profile' - config_profile_description = 'Minimal test dataset to check the metagenomics metaphlan4 pipeline function' - - // Input data - input = params.pipelines_testdata_base_path + 'eager/testdata/Mammoth/samplesheet_v3.tsv' - - // Genome references - fasta = params.pipelines_testdata_base_path + 'eager/reference/Mammoth/Mammoth_MT_Krause.fasta' - - // Metagenomics - run_metagenomics = true - metagenomics_profiling_tool = 'metaphlan' - metagenomics_profiling_database = params.pipelines_testdata_base_path + 'eager/databases/metaphlan/metaphlan4_database.tar.gz' - metagenomics_run_postprocessing = true -} diff --git a/conf/test_multiref.config b/conf/test_multiref.config deleted file mode 100644 index 6917a1a6..00000000 --- a/conf/test_multiref.config +++ /dev/null @@ -1,39 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Nextflow config file for running minimal tests -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Defines input files and everything required to run a fast and simple pipeline test. - - Use as follows: - fasta_sheet = 'https://github.com/nf-core/test-datasets/raw/eager/reference/reference_sheet_multiref.csv' - nextflow run nf-core/eager -profile test_multiref,<docker/singularity> --outdir <OUTDIR> - ----------------------------------------------------------------------------------------- -*/ - -process { - resourceLimits = [ - cpus: 4, - memory: '15.GB', - time: '1.h' - ] -} - -params { - config_profile_name = 'Test profile' - config_profile_description = 'Minimal test dataset to check pipeline function' - - // Input data - input = params.pipelines_testdata_base_path + 'eager/testdata/Mammoth/samplesheet_multilane_multilib.tsv' - - // Genome references - fasta_sheet = params.pipelines_testdata_base_path + 'eager/reference/reference_sheet_multiref.csv' - - // BAM filtering - run_bamfiltering = true - bamfiltering_minreadlength = 30 - bamfiltering_mappingquality = 37 - - // Metagenomics - run_metagenomics = false -} diff --git a/conf/test_nothing.config b/conf/test_nothing.config deleted file mode 100644 index a16745b6..00000000 --- a/conf/test_nothing.config +++ /dev/null @@ -1,48 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Nextflow config file for running minimal tests -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Defines input files and everything required to run a fast and simple pipeline test. - - Use as follows: - nextflow run nf-core/eager -profile test_nothing,<docker/singularity> --outdir <OUTDIR> - ----------------------------------------------------------------------------------------- -*/ - -process { - resourceLimits = [ - cpus: 4, - memory: '15.GB', - time: '1.h' - ] -} - -params { - config_profile_name = 'Test profile' - config_profile_description = 'Minimal test dataset to check pipeline function' - - // Input data - // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets - // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = params.pipelines_testdata_base_path + 'eager/testdata/Human/human_design_bam_eager3.tsv' - - // Genome references - fasta = params.pipelines_testdata_base_path + 'eager/reference/Human/hs37d5_chr21-MT.fa.gz' - - skip_preprocessing = true - skip_deduplication = true - skip_qualimap = true - skip_damagecalculation = true - mapstats_skip_preseq = true - - run_fastq_sharding = false - run_bamfiltering = false - run_bedtools_coverage = false - run_metagenomics = false - run_contamination_estimation_angsd = false - run_mtnucratio = false - run_mapdamage_rescaling = false - run_pmd_filtering = false - run_trim_bam = false -} From 56a3ab51dc7687ca4347b5baa604cb21e746d405 Mon Sep 17 00:00:00 2001 From: Judith Ballesteros <judith.vballesteros@gmail.com> Date: Mon, 24 Mar 2025 14:50:15 +0100 Subject: [PATCH 10/24] replacing test.config --- conf/test.config | 50 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 34 insertions(+), 16 deletions(-) diff --git a/conf/test.config b/conf/test.config index e358134a..b9f6b7eb 100644 --- a/conf/test.config +++ b/conf/test.config @@ -5,7 +5,7 @@ Defines input files and everything required to run a fast and simple pipeline test. Use as follows: - nextflow run nf-core/eager -profile test,<docker/singularity> --outdir <OUTDIR> + nextflow run nf-core/eager -profile test_default,<docker/singularity> --outdir <OUTDIR> ---------------------------------------------------------------------------------------- */ @@ -18,31 +18,49 @@ process { ] } +// TO DO: Change name to test.config once migration is complete. params { - config_profile_name = 'Test profile' - config_profile_description = 'Minimal test dataset to check pipeline function' + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function' // Input data - // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets - // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = params.pipelines_testdata_base_path + 'eager/testdata/Mammoth/samplesheet_v3.tsv' + input = params.pipelines_testdata_base_path + 'eager/testdata/Mammoth/samplesheet_v3.tsv' // Genome references - fasta = params.pipelines_testdata_base_path + 'eager/reference/Mammoth/Mammoth_MT_Krause.fasta' + fasta = params.pipelines_testdata_base_path + 'eager/reference/Mammoth/Mammoth_MT_Krause.fasta' + + // Preprocessing + preprocessing_tool = 'adapterremoval' // Sharding FASTQ - run_fastq_sharding = true - fastq_shard_size = 5000 + run_fastq_sharding = true + fastq_shard_size = 5000 + + // Mapping + mapping_tool = 'bwaaln' + skip_qualimap = false // BAM filtering - run_bamfiltering = true - bamfiltering_minreadlength = 30 - bamfiltering_mappingquality = 37 + run_bamfiltering = true + bamfiltering_minreadlength = 30 + bamfiltering_mappingquality = 37 + deduplication_tool = 'markduplicates' + + // PreSeq + mapstats_preseq_mode = 'c_curve' + + // Damage calculation + damagecalculation_tool = 'damageprofiler' + skip_qualimap = false - // Map Stats - run_bedtools_coverage = true - mapstats_bedtools_featurefile = params.pipelines_testdata_base_path + 'eager/reference/Mammoth/Mammoth_MT_Krause.gff3' + // Genotyping + run_genotyping = true + genotyping_source = 'raw' + genotyping_tool = 'ug' // Metagenomic screening - run_metagenomics = false + run_metagenomics = true + metagenomics_profiling_tool = 'metaphlan' + metagenomics_profiling_database = params.pipelines_testdata_base_path + 'eager/databases/metaphlan/metaphlan4_database.tar.gz' + metagenomics_run_postprocessing = true } From ff452d91a85e09bb7678f5c07204391f3cd53946 Mon Sep 17 00:00:00 2001 From: Judith Ballesteros <judith.vballesteros@gmail.com> Date: Mon, 24 Mar 2025 14:50:32 +0100 Subject: [PATCH 11/24] replacing nothing to minimal --- conf/test_minimal.config | 48 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 conf/test_minimal.config diff --git a/conf/test_minimal.config b/conf/test_minimal.config new file mode 100644 index 00000000..a16745b6 --- /dev/null +++ b/conf/test_minimal.config @@ -0,0 +1,48 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/eager -profile test_nothing,<docker/singularity> --outdir <OUTDIR> + +---------------------------------------------------------------------------------------- +*/ + +process { + resourceLimits = [ + cpus: 4, + memory: '15.GB', + time: '1.h' + ] +} + +params { + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Input data + // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets + // TODO nf-core: Give any required params for the test so that command line flags are not needed + input = params.pipelines_testdata_base_path + 'eager/testdata/Human/human_design_bam_eager3.tsv' + + // Genome references + fasta = params.pipelines_testdata_base_path + 'eager/reference/Human/hs37d5_chr21-MT.fa.gz' + + skip_preprocessing = true + skip_deduplication = true + skip_qualimap = true + skip_damagecalculation = true + mapstats_skip_preseq = true + + run_fastq_sharding = false + run_bamfiltering = false + run_bedtools_coverage = false + run_metagenomics = false + run_contamination_estimation_angsd = false + run_mtnucratio = false + run_mapdamage_rescaling = false + run_pmd_filtering = false + run_trim_bam = false +} From dd12a5b7aa3788fa4bbd92cf9ef17a66946eed2e Mon Sep 17 00:00:00 2001 From: Judith Ballesteros <judith.vballesteros@gmail.com> Date: Mon, 24 Mar 2025 14:50:47 +0100 Subject: [PATCH 12/24] adding genotyping source --- conf/test_humanpopgen.config | 2 ++ conf/test_microbial.config | 7 +++++++ conf/test_modern.config | 2 ++ conf/test_shortdna.config | 2 ++ 4 files changed, 13 insertions(+) diff --git a/conf/test_humanpopgen.config b/conf/test_humanpopgen.config index bd55ae97..9368bdc6 100644 --- a/conf/test_humanpopgen.config +++ b/conf/test_humanpopgen.config @@ -52,6 +52,8 @@ params { // Genotyping genotyping_tool = 'pileupcaller' + run_genotyping = true + genotyping_source = 'trimmed' genotyping_pileupcaller_bedfile = params.pipelines_testdata_base_path + 'eager/reference/Human/1240K.pos.list_hs37d5.0based.bed.gz' genotyping_pileupcaller_snpfile = params.pipelines_testdata_base_path + 'eager/reference/Human/1240K_covered_in_JK2067_downsampled_s0.1.numeric_chromosomes.snp' diff --git a/conf/test_microbial.config b/conf/test_microbial.config index 269c4fc7..dd9eec06 100644 --- a/conf/test_microbial.config +++ b/conf/test_microbial.config @@ -28,6 +28,8 @@ params { // Genome references fasta_sheet = params.pipelines_testdata_base_path + 'eager/reference/reference_sheet_multiref.csv' + // Host Removal + run_host_removal = true // Preprocessing sequencing_qc_tool = 'falco' @@ -51,7 +53,12 @@ params { // Manipulate Damage run_mapdamage_rescaling = true + // Bedtools coverage + run_bedtools_coverage = true + // Genotyping + run_genotyping = true + genotyping_source = 'rescaled' genotyping_tool = 'freebayes' diff --git a/conf/test_modern.config b/conf/test_modern.config index beee31c3..3494098b 100644 --- a/conf/test_modern.config +++ b/conf/test_modern.config @@ -45,6 +45,8 @@ params { metagenomics_run_postprocessing = true // Genotyping + run_genotyping = true + genotyping_source = 'raw' genotyping_tool = 'hc' } diff --git a/conf/test_shortdna.config b/conf/test_shortdna.config index 672cb7f3..2a1af8de 100644 --- a/conf/test_shortdna.config +++ b/conf/test_shortdna.config @@ -45,6 +45,8 @@ params { run_pmd_filtering = true // Genotyping + run_genotyping = true + genotyping_source = 'pmd' genotyping_tool = 'angsd' From 2c0bb13a59fd26b548c2c812acf4b631400a33b4 Mon Sep 17 00:00:00 2001 From: Judith Ballesteros <judith.vballesteros@gmail.com> Date: Mon, 24 Mar 2025 14:58:53 +0100 Subject: [PATCH 13/24] update nextflow.config --- nextflow.config | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/nextflow.config b/nextflow.config index 1d565018..d63a9fb3 100644 --- a/nextflow.config +++ b/nextflow.config @@ -386,14 +386,7 @@ profiles { } test { includeConfig 'conf/test.config' } test_full { includeConfig 'conf/test_full.config' } - test_nothing { includeConfig 'conf/test_nothing.config' } - test_humanbam { includeConfig 'conf/test_humanbam.config' } - test_multiref { includeConfig 'conf/test_multiref.config' } - test_kraken2 { includeConfig 'conf/test_kraken2.config' } - test_malt { includeConfig 'conf/test_malt.config' } - test_krakenuniq { includeConfig 'conf/test_krakenuniq.config' } - test_metaphlan { includeConfig 'conf/test_metaphlan.config' } - test_default { includeConfig 'conf/test_default.config' } + test_minimal { includeConfig 'conf/test_minimal.config' } test_modern { includeConfig 'conf/test_modern.config' } test_microbial { includeConfig 'conf/test_microbial.config' } test_shortdna { includeConfig 'conf/test_shortdna.config' } From 57809e50a6d1f358c9262497dd721287830f37a8 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" <jfy133@gmail.com> Date: Tue, 25 Mar 2025 08:04:23 +0100 Subject: [PATCH 14/24] Update conf/test.config --- conf/test.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/test.config b/conf/test.config index b9f6b7eb..e5d5c7eb 100644 --- a/conf/test.config +++ b/conf/test.config @@ -5,7 +5,7 @@ Defines input files and everything required to run a fast and simple pipeline test. Use as follows: - nextflow run nf-core/eager -profile test_default,<docker/singularity> --outdir <OUTDIR> + nextflow run nf-core/eager -profile test,<docker/singularity> --outdir <OUTDIR> ---------------------------------------------------------------------------------------- */ From 636e5d550182daf567c04a407df8d9f40a254298 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" <jfy133@gmail.com> Date: Tue, 25 Mar 2025 08:05:23 +0100 Subject: [PATCH 15/24] Update conf/test.config --- conf/test.config | 1 - 1 file changed, 1 deletion(-) diff --git a/conf/test.config b/conf/test.config index e5d5c7eb..81ec0bb9 100644 --- a/conf/test.config +++ b/conf/test.config @@ -18,7 +18,6 @@ process { ] } -// TO DO: Change name to test.config once migration is complete. params { config_profile_name = 'Test profile' config_profile_description = 'Minimal test dataset to check pipeline function' From b08e6ac5835643e36d24935690955d08063dc006 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" <jfy133@gmail.com> Date: Tue, 25 Mar 2025 08:45:22 +0000 Subject: [PATCH 16/24] Move kraken2 to humanpopgen --- conf/test_humanpopgen.config | 12 +++++++++--- conf/test_modern.config | 12 ++---------- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/conf/test_humanpopgen.config b/conf/test_humanpopgen.config index 9368bdc6..11605769 100644 --- a/conf/test_humanpopgen.config +++ b/conf/test_humanpopgen.config @@ -14,12 +14,12 @@ process { resourceLimits = [ cpus: 4, memory: '15.GB', - time: '1.h' + time: '1.h', ] // To avoid pipeline failure due to not having X reads and to not have overcrowded datasets in the test withName: ANGSD_CONTAMINATION { - errorStrategy = { task.exitStatus in [134] ? 'ignore' : 'finish' } + errorStrategy = { task.exitStatus in [134] ? 'ignore' : 'finish' } } } @@ -35,7 +35,7 @@ params { // Mapping mapping_tool = 'bowtie2' - convert_inputbam = true + convert_inputbam = true // BAM filtering run_bamfiltering = true @@ -61,4 +61,10 @@ params { run_sexdeterrmine = true sexdeterrmine_bedfile = params.pipelines_testdata_base_path + 'eager/reference/Human/1240K.pos.list_hs37d5.0based.bed.gz' + // Metagenomics + run_metagenomics = true + metagenomics_complexity_tool = 'prinseq' + metagenomics_profiling_tool = 'kraken2' + metagenomics_profiling_database = params.pipelines_testdata_base_path + 'eager/databases/kraken/eager_test.tar.gz' + metagenomics_run_postprocessing = true } diff --git a/conf/test_modern.config b/conf/test_modern.config index 3494098b..f958dd69 100644 --- a/conf/test_modern.config +++ b/conf/test_modern.config @@ -14,7 +14,7 @@ process { resourceLimits = [ cpus: 4, memory: '15.GB', - time: '1.h' + time: '1.h', ] } @@ -32,21 +32,13 @@ params { // Preprocessing sequencing_qc_tool = 'falco' preprocessing_tool = 'fastp' - convert_inputbam = true + convert_inputbam = false // Mapping mapping_tool = 'bwamem' - // Metagenomics - run_metagenomics = true - metagenomics_complexity_tool = 'prinseq' - metagenomics_profiling_tool = 'kraken2' - metagenomics_profiling_database = params.pipelines_testdata_base_path + 'eager/databases/kraken/eager_test.tar.gz' - metagenomics_run_postprocessing = true - // Genotyping run_genotyping = true genotyping_source = 'raw' genotyping_tool = 'hc' - } From ccf180ca34b58acd99c3a75089b2334a4c488767 Mon Sep 17 00:00:00 2001 From: Judith Ballesteros <judith.vballesteros@gmail.com> Date: Tue, 25 Mar 2025 10:26:12 +0100 Subject: [PATCH 17/24] removing fastp --- conf/test_microbial.config | 1 + 1 file changed, 1 insertion(+) diff --git a/conf/test_microbial.config b/conf/test_microbial.config index dd9eec06..27fbb4a6 100644 --- a/conf/test_microbial.config +++ b/conf/test_microbial.config @@ -33,6 +33,7 @@ params { // Preprocessing sequencing_qc_tool = 'falco' + preprocessing_tool = 'fastp' preprocessing_excludeunmerged = true // Mapping From b1bba769dc52f7a8f3c3b595db9050eb34b479bd Mon Sep 17 00:00:00 2001 From: Judith Ballesteros <judith.vballesteros@gmail.com> Date: Tue, 25 Mar 2025 10:26:38 +0100 Subject: [PATCH 18/24] adding fastp and changing input and fasta --- conf/test_modern.config | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/conf/test_modern.config b/conf/test_modern.config index f958dd69..84b6923e 100644 --- a/conf/test_modern.config +++ b/conf/test_modern.config @@ -23,15 +23,14 @@ params { config_profile_description = 'Minimal test dataset to check pipeline function' // Input data - input = params.pipelines_testdata_base_path + 'eager/testdata/Human/human_design_bam_eager3.tsv' + input = params.pipelines_testdata_base_path + 'eager/testdata/Mammoth/mammooth_design_bam_v3.tsv' // Genome references - fasta = params.pipelines_testdata_base_path + 'eager/reference/Human/hs37d5_chr21-MT.fa.gz' + fasta = params.pipelines_testdata_base_path + 'eager/reference/Mammoth/Mammoth_MT_Krause.fasta' // Preprocessing sequencing_qc_tool = 'falco' - preprocessing_tool = 'fastp' convert_inputbam = false // Mapping From 52df92bb34c44ca490899113eafaeaf9c3a06b9a Mon Sep 17 00:00:00 2001 From: Judith Ballesteros <judith.vballesteros@gmail.com> Date: Tue, 25 Mar 2025 11:33:33 +0100 Subject: [PATCH 19/24] excluding fasta --- subworkflows/local/genotype.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/genotype.nf b/subworkflows/local/genotype.nf index a674bf85..fc5a723b 100644 --- a/subworkflows/local/genotype.nf +++ b/subworkflows/local/genotype.nf @@ -393,17 +393,17 @@ workflow GENOTYPE { addNewMetaFromAttributes( it, "id" , "reference" , false ) } // RESULT: [ [combination_meta], [ref_meta], fasta, fai, dict, dbsnp ] + // TO DO: Module fails if we don't give the fai but the fasta. However is not specified atm. Re-add fasta once this issue is solved. ch_input_for_angsd = ch_bams_for_multimap .combine( ch_fasta_for_multimap , by:0 ) .multiMap { ignore_me, meta, bam, bai, ref_meta, fasta, fai, dict, dbsnp -> bam: [ meta, bam ] - fasta: [ ref_meta, fasta ] } ANGSD_GL( ch_input_for_angsd.bam, - ch_input_for_angsd.fasta, + [[], []], // No fasta file [[], []], // No errors file ) ch_angsd_genotype_likelihoods = ANGSD_GL.out.genotype_likelihood From 22433a9a08bf9f5caa82fba76d050f01a703a7a0 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" <jfy133@gmail.com> Date: Tue, 25 Mar 2025 14:11:44 +0000 Subject: [PATCH 20/24] Deactivate rescaling, fix freebayes problem --- conf/modules.config | 825 +++++++++++------------- conf/test_microbial.config | 11 +- subworkflows/local/manipulate_damage.nf | 248 ++++--- workflows/eager.nf | 18 +- 4 files changed, 502 insertions(+), 600 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index daf87aee..c102e4b9 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -15,15 +15,15 @@ process { publishDir = [ path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, ] - withName: 'MULTIQC' { - ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } + withName: MULTIQC { + ext.args = { params.multiqc_title ? "--title \"${params.multiqc_title}\"" : '' } publishDir = [ path: { "${params.outdir}/multiqc" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, ] } @@ -31,7 +31,7 @@ process { // CONVERT INPUT BAM // withName: SAMTOOLS_CONVERT_BAM_INPUT { - tag = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}" } + tag = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}" } ext.prefix = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}" } publishDir = [ enabled: false @@ -39,7 +39,7 @@ process { } withName: CAT_FASTQ_CONVERTED_BAM { - tag = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}" } + tag = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}" } ext.prefix = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}" } publishDir = [ enabled: false @@ -50,12 +50,11 @@ process { // READ PREPROCESSING // withName: FASTQC { - tag = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}" } - ext.args = '--quiet' + tag = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}" } + ext.args = '--quiet' ext.prefix = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}" } publishDir = [ [ - // stats path: { "${params.outdir}/preprocessing/fastqc_raw/stats/" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, @@ -64,12 +63,11 @@ process { } withName: FASTQC_PROCESSED { - tag = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}" } - ext.args = '--quiet' + tag = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}" } + ext.args = '--quiet' ext.prefix = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}" } publishDir = [ [ - // stats path: { "${params.outdir}/preprocessing/fastqc_processed/stats/" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, @@ -78,12 +76,11 @@ process { } withName: FALCO { - tag = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}" } - ext.args = '--quiet' + tag = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}" } + ext.args = '--quiet' ext.prefix = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}" } publishDir = [ [ - // stats path: { "${params.outdir}/preprocessing/falco_raw/stats/" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, @@ -92,12 +89,11 @@ process { } withName: FALCO_PROCESSED { - tag = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}" } - ext.args = '--quiet' + tag = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}" } + ext.args = '--quiet' ext.prefix = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}" } publishDir = [ [ - // stats path: { "${params.outdir}/preprocessing/falco_processed/stats/" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, @@ -106,71 +102,61 @@ process { } withName: FASTP_SINGLE { - tag = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}" } + tag = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}" } ext.args = [ - // TRIMMING OPTIONS params.preprocessing_trim5p != 0 ? "--trim_front1 ${params.preprocessing_trim5p}" : "", params.preprocessing_trim3p != 0 ? "--trim_tail1 ${params.preprocessing_trim3p}" : "", - params.preprocessing_skipadaptertrim ? "--disable_adapter_trimming" : params.preprocessing_adapterlist ? "" : params.preprocessing_adapter1 ? "--adapter_sequence ${params.preprocessing_adapter1}" : "", // adding adapter list happens at module input channel level - // FILTERING OPTIONS + params.preprocessing_skipadaptertrim ? "--disable_adapter_trimming" : params.preprocessing_adapterlist ? "" : params.preprocessing_adapter1 ? "--adapter_sequence ${params.preprocessing_adapter1}" : "", "--length_required ${params.preprocessing_minlength}", - params.preprocessing_fastp_complexityfilter ? "--low_complexity_filter --complexity_threshold ${params.preprocessing_fastp_complexityfilter_threshold}" : "" + params.preprocessing_fastp_complexityfilter ? "--low_complexity_filter --complexity_threshold ${params.preprocessing_fastp_complexityfilter_threshold}" : "", ].join(' ').trim() ext.prefix = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}" } publishDir = [ [ - // data path: { "${params.outdir}/preprocessing/fastp/data/" }, mode: params.publish_dir_mode, pattern: '*.fastq.gz', - enabled: params.preprocessing_savepreprocessedreads + enabled: params.preprocessing_savepreprocessedreads, ], [ - //stats path: { "${params.outdir}/preprocessing/fastp/stats/" }, mode: params.publish_dir_mode, - pattern: '*.{log,html,json}' - ] + pattern: '*.{log,html,json}', + ], ] } withName: FASTP_PAIRED { - tag = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}" } + tag = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}" } ext.args = [ - // COLLAPSING OPTIONS - option to retain singletons params.preprocessing_excludeunmerged ? "" : "--include_unmerged", - // TRIMMING OPTIONS params.preprocessing_trim5p != 0 ? "--trim_front1 ${params.preprocessing_trim5p} --trim_front2 ${params.preprocessing_trim5p}" : "", params.preprocessing_trim3p != 0 ? "--trim_tail1 ${params.preprocessing_trim3p} --trim_tail2 ${params.preprocessing_trim3p}" : "", - params.preprocessing_skipadaptertrim ? "--disable_adapter_trimming" : params.preprocessing_adapterlist ? "" : params.preprocessing_adapter1 ? "--adapter_sequence ${params.preprocessing_adapter1}" : "", // adding adapter list happens at module input channel level - params.preprocessing_skipadaptertrim ? "" : params.preprocessing_adapterlist ? "" : params.preprocessing_adapter2 ? "--adapter_sequence_r2 ${params.preprocessing_adapter2}" : "", // adding adapter list happens at module input channel level - // FILTERING OPTIONS--disable_adapter_trimming + params.preprocessing_skipadaptertrim ? "--disable_adapter_trimming" : params.preprocessing_adapterlist ? "" : params.preprocessing_adapter1 ? "--adapter_sequence ${params.preprocessing_adapter1}" : "", + params.preprocessing_skipadaptertrim ? "" : params.preprocessing_adapterlist ? "" : params.preprocessing_adapter2 ? "--adapter_sequence_r2 ${params.preprocessing_adapter2}" : "", "--length_required ${params.preprocessing_minlength}", - params.preprocessing_fastp_complexityfilter ? "--low_complexity_filter --complexity_threshold ${params.preprocessing_fastp_complexityfilter_threshold}" : "" + params.preprocessing_fastp_complexityfilter ? "--low_complexity_filter --complexity_threshold ${params.preprocessing_fastp_complexityfilter_threshold}" : "", ].join(' ').trim() ext.prefix = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}" } publishDir = [ [ - // data path: { "${params.outdir}/preprocessing/fastp/data/" }, mode: params.publish_dir_mode, pattern: '*.fastq.gz', - enabled: params.preprocessing_savepreprocessedreads + enabled: params.preprocessing_savepreprocessedreads, ], [ - // stats path: { "${params.outdir}/preprocessing/fastp/stats/" }, mode: params.publish_dir_mode, - pattern: '*.{log,html,json}' - ] + pattern: '*.{log,html,json}', + ], ] } withName: ADAPTERREMOVAL_SINGLE { - tag = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}" } + tag = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}" } ext.args = [ - // trimming options - note: adding adapter list happens at module input channel level - params.preprocessing_skipadaptertrim ? "--adapter1 ''" : params.preprocessing_adapterlist ? "" : params.preprocessing_adapter1 ? "--adapter1 ${params.preprocessing_adapter1}" : "", // adding adapter list happens at module input channel level + params.preprocessing_skipadaptertrim ? "--adapter1 ''" : params.preprocessing_adapterlist ? "" : params.preprocessing_adapter1 ? "--adapter1 ${params.preprocessing_adapter1}" : "", params.preprocessing_skipadaptertrim ? "--adapter2 ''" : params.preprocessing_adapterlist ? "" : params.preprocessing_adapter2 ? "--adapter2 ${params.preprocessing_adapter2}" : "", "--minadapteroverlap ${params.preprocessing_adapterremoval_adapteroverlap}", params.preprocessing_adapterremoval_preserve5p ? "--preserve5p" : "", @@ -179,33 +165,28 @@ process { !params.preprocessing_adapterremoval_skipqualitytrimming ? "--trimqualities --minquality ${params.preprocessing_adapterremoval_trimbasequalitymin}" : "", !params.preprocessing_adapterremoval_skipntrimming ? "--trimns" : "", "--qualitymax ${params.preprocessing_adapterremoval_qualitymax}", - // filtering options - "--minlength ${params.preprocessing_minlength}" + "--minlength ${params.preprocessing_minlength}", ].join(' ').trim() ext.prefix = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}" } publishDir = [ [ - // data path: { "${params.outdir}/preprocessing/adapterremoval/data/" }, mode: params.publish_dir_mode, pattern: '*.fastq.gz', - enabled: params.preprocessing_savepreprocessedreads + enabled: params.preprocessing_savepreprocessedreads, ], [ - // stats path: { "${params.outdir}/preprocessing/adapterremoval/stats/" }, mode: params.publish_dir_mode, - pattern: '*.settings' - ] + pattern: '*.settings', + ], ] } withName: ADAPTERREMOVAL_PAIRED { - tag = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}" } + tag = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}" } ext.args = [ - // COLLAPSING OPTIONS params.preprocessing_skippairmerging ? "" : "--collapse", - // TRIMMING OPTIONS - note: adding adapter list happens at module input channel level params.preprocessing_skipadaptertrim ? "--adapter1 ''" : params.preprocessing_adapterlist ? "" : params.preprocessing_adapter1 ? "--adapter1 ${params.preprocessing_adapter1}" : "", params.preprocessing_skipadaptertrim ? "--adapter2 ''" : params.preprocessing_adapterlist ? "" : params.preprocessing_adapter2 ? "--adapter2 ${params.preprocessing_adapter2}" : "", "--minadapteroverlap ${params.preprocessing_adapterremoval_adapteroverlap}", @@ -215,39 +196,35 @@ process { !params.preprocessing_adapterremoval_skipqualitytrimming ? "--trimqualities --minquality ${params.preprocessing_adapterremoval_trimbasequalitymin}" : "", !params.preprocessing_adapterremoval_skipntrimming ? "--trimns" : "", "--qualitymax ${params.preprocessing_adapterremoval_qualitymax}", - // FILTERING OPTIONS - "--minlength ${params.preprocessing_minlength}" + "--minlength ${params.preprocessing_minlength}", ].join(' ').trim() ext.prefix = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}" } publishDir = [ [ - // data path: { "${params.outdir}/preprocessing/adapterremoval/data/" }, mode: params.publish_dir_mode, pattern: '*.fastq.gz', - enabled: params.preprocessing_savepreprocessedreads + enabled: params.preprocessing_savepreprocessedreads, ], [ - // stats path: { "${params.outdir}/preprocessing/adapterremoval/stats/" }, mode: params.publish_dir_mode, - pattern: '*.settings' - ] + pattern: '*.settings', + ], ] } withName: CAT_FASTQ_ADAPTERREMOVAL { - tag = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}_${meta.reference}" } + tag = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}_${meta.reference}" } ext.prefix = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}_${meta.reference}" } publishDir = [ - [ - // data - path: { "${params.outdir}/preprocessing/adapterremoval/data/" }, - mode: params.publish_dir_mode, - pattern: '*.fastq.gz', - enabled: params.preprocessing_savepreprocessedreads - ] + [ + path: { "${params.outdir}/preprocessing/adapterremoval/data/" }, + mode: params.publish_dir_mode, + pattern: '*.fastq.gz', + enabled: params.preprocessing_savepreprocessedreads, ] + ] } @@ -257,11 +234,10 @@ process { withName: GUNZIP_FASTA { publishDir = [ [ - // data path: { "${params.outdir}/references/fasta/data/" }, mode: params.publish_dir_mode, pattern: '*.{fasta,fna,fas,fa}', - enabled: params.save_reference + enabled: params.save_reference, ] ] } @@ -275,11 +251,10 @@ process { withName: SAMTOOLS_FAIDX { publishDir = [ [ - // data path: { "${params.outdir}/references/fasta/data/" }, mode: params.publish_dir_mode, pattern: '*.fai', - enabled: params.save_reference + enabled: params.save_reference, ] ] } @@ -287,11 +262,10 @@ process { withName: PICARD_CREATESEQUENCEDICTIONARY { publishDir = [ [ - // data path: { "${params.outdir}/references/fasta/data/" }, mode: params.publish_dir_mode, pattern: '*.dict', - enabled: params.save_reference + enabled: params.save_reference, ] ] } @@ -299,11 +273,10 @@ process { withName: BOWTIE2_BUILD { publishDir = [ [ - // data path: { "${params.outdir}/references/bowtie2/data/" }, mode: params.publish_dir_mode, pattern: 'bowtie2', - enabled: params.save_reference + enabled: params.save_reference, ] ] } @@ -311,11 +284,10 @@ process { withName: BWA_INDEX { publishDir = [ [ - // data path: { "${params.outdir}/references/bwa/data/" }, mode: params.publish_dir_mode, pattern: 'bwa', - enabled: params.save_reference + enabled: params.save_reference, ] ] } @@ -324,24 +296,22 @@ process { withName: GUNZIP_ELONGATED_FASTA { publishDir = [ [ - // data path: { "${params.outdir}/references/fasta/data/" }, mode: params.publish_dir_mode, pattern: '*_*[0-9].{fasta,fna,fas,fa}', - enabled: params.save_reference + enabled: params.save_reference, ] ] } withName: CIRCULARMAPPER_CIRCULARGENERATOR { - tag = { "${meta.id}_${params.fasta_circularmapper_elongationfactor}" } + tag = { "${meta.id}_${params.fasta_circularmapper_elongationfactor}" } publishDir = [ [ - // data path: { "${params.outdir}/references/fasta/data/" }, mode: params.publish_dir_mode, pattern: '*_*[0-9].fasta', - enabled: params.save_reference + enabled: params.save_reference, ] ] } @@ -349,11 +319,10 @@ process { withName: BWA_INDEX_CIRCULARISED { publishDir = [ [ - // data path: { "${params.outdir}/references/bwa/data/" }, mode: params.publish_dir_mode, pattern: 'bwa', - enabled: params.save_reference + enabled: params.save_reference, ] ] } @@ -361,24 +330,20 @@ process { // // BAM INPUT // - withName: 'SAMTOOLS_FLAGSTATS_BAM_INPUT' { - // NOTE This step becomes obsolete once a lane-merging step is added for input BAMs. - // TODO Once a lane-merging step is added for input BAMs, the lane should be dropped from this tag. - tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" } + withName: SAMTOOLS_FLAGSTATS_BAM_INPUT { + tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" } ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.lane}_${meta.reference}" } publishDir = [ [ - // stats path: { "${params.outdir}/mapping/bam_input/stats/" }, mode: params.publish_dir_mode, - pattern: '*.flagstat' + pattern: '*.flagstat', ] ] } withName: SAMTOOLS_INDEX_BAM_INPUT { - // NOTE This step becomes obsolete once a lane-merging step is added for input BAMs. - tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" } + tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" } publishDir = [ enabled: false ] @@ -388,105 +353,98 @@ process { // BAM FILTERING // withName: FILTER_BAM_FRAGMENT_LENGTH { - tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } - ext.args = "-l ${params.bamfiltering_minreadlength}" + tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } + ext.args = "-l ${params.bamfiltering_minreadlength}" ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}" } publishDir = [ [ - // data path: { "${params.outdir}/read_filtering/filtered_bams/data/" }, mode: params.publish_dir_mode, pattern: '*.filtered.bam', - enabled: params.bamfiltering_savefilteredbams + enabled: params.bamfiltering_savefilteredbams, ] ] } withName: SAMTOOLS_VIEW_BAM_FILTERING { - tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } - ext.args = [ + tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } + ext.args = [ "-q ${params.bamfiltering_mappingquality}", params.bamfiltering_retainunmappedgenomicbam ? '' : "-F ${params.bamfiltering_genomicbamfilterflag}", ].join(' ').trim() ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}_filtered" } publishDir = [ [ - // data path: { "${params.outdir}/read_filtering/filtered_bams/data/" }, mode: params.publish_dir_mode, pattern: '*.bam', - enabled: params.bamfiltering_savefilteredbams + enabled: params.bamfiltering_savefilteredbams, ] ] } withName: 'SAMTOOLS_LENGTH_FILTER_INDEX|SAMTOOLS_FILTER_INDEX' { - tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } - ext.args = { params.fasta_largeref ? "-c" : "" } + tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } + ext.args = { params.fasta_largeref ? "-c" : "" } ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}_filtered" } publishDir = [ [ - // data path: { "${params.outdir}/read_filtering/filtered_bams/data/" }, mode: params.publish_dir_mode, pattern: '*.{bai,csi}', - enabled: params.bamfiltering_savefilteredbams + enabled: params.bamfiltering_savefilteredbams, ] ] } withName: SAMTOOLS_FLAGSTAT_FILTERED { - tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } + tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}_filtered" } publishDir = [ [ - // stats path: { "${params.outdir}/read_filtering/filtered_bams/stats/" }, mode: params.publish_dir_mode, - pattern: '*.flagstat' + pattern: '*.flagstat', ] ] } withName: SAMTOOLS_FASTQ_SAVEBAMFILTERINGREADS { - tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } + tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}_bamfiltering_fastq" } publishDir = [ [ - // data path: { "${params.outdir}/read_filtering/fastq/data/" }, mode: params.publish_dir_mode, pattern: '*.fastq.gz', - enabled: params.bamfiltering_generatefastq + enabled: params.bamfiltering_generatefastq, ] ] } withName: SAMTOOLS_FASTQ_METAGENOMICS { - tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } - ext.args = [ - params.metagenomics_input == 'mapped' ? '-F 4': '', - params.metagenomics_input == 'unmapped' ? '-f 4': '', - // 'all' is left then with NO -F or -f flag, therefore all reads get sent to fastq + tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } + ext.args = [ + params.metagenomics_input == 'mapped' ? '-F 4' : '', + params.metagenomics_input == 'unmapped' ? '-f 4' : '', ].join(' ').trim() ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}_metagenomics_fastq_${params.metagenomics_input}" } publishDir = [ [ - // data path: { "${params.outdir}/read_filtering/fastq/data/" }, mode: params.publish_dir_mode, pattern: '*.fastq.gz', - enabled: params.metagenomics_input_savefastq + enabled: params.metagenomics_input_savefastq, ] ] } - withName: 'CAT_FASTQ_METAGENOMICS' { - tag = { "${meta.sample_id}_${meta.library_id}_${meta.reference}" } + withName: CAT_FASTQ_METAGENOMICS { + tag = { "${meta.sample_id}_${meta.library_id}_${meta.reference}" } ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}" } publishDir = [ [ - enabled: false // NO publishing of concatenated fastq files for metagenomics, only outputs from SAMTOOLS_FASTQ_METAGENOMICS + enabled: false ] ] } @@ -495,9 +453,9 @@ process { // SHARDING FASTQS // withName: SEQKIT_SPLIT2 { - tag = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}" } + tag = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}" } ext.prefix = "out" - ext.args = "-s ${params.fastq_shard_size}" + ext.args = "-s ${params.fastq_shard_size}" publishDir = [ enabled: false ] @@ -507,8 +465,8 @@ process { // READ MAPPING // withName: BWA_ALN { - tag = { "${meta.id_index}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" } - ext.args = { "-n ${params.mapping_bwaaln_n} -k ${params.mapping_bwaaln_k} -l ${params.mapping_bwaaln_l} -o ${params.mapping_bwaaln_o}" } + tag = { "${meta.id_index}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" } + ext.args = { "-n ${params.mapping_bwaaln_n} -k ${params.mapping_bwaaln_k} -l ${params.mapping_bwaaln_l} -o ${params.mapping_bwaaln_o}" } ext.prefix = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}_${meta.id_index}" } publishDir = [ enabled: false @@ -516,21 +474,22 @@ process { } withName: 'BWA_SAMSE|BWA_SAMPE' { - tag = { "${meta.id_index}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" } - ext.args = { + tag = { "${meta.id_index}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" } + ext.args = { se_pe_string = meta.single_end ? "SE" : "PE" [ "-r '@RG\\tID:ILLUMINA-${meta.sample_id}_${meta.library_id}\\tSM:${meta.sample_id}\\tLB:${meta.library_id}\\tPL:illumina\\tPU:ILLUMINA-${meta.library_id}-${meta.strandedness}_stranded-${se_pe_string}'" - ].join(' ').trim() } + ].join(' ').trim() + } ext.prefix = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}_${meta.id_index}" } publishDir = [ enabled: false ] } - withName: ".*MAP:FASTQ_ALIGN_BWAALN:SAMTOOLS_INDEX" { - tag = { "${meta.id_index}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" } - ext.args = { params.fasta_largeref ? "-c" : "" } + withName: '.*MAP:FASTQ_ALIGN_BWAALN:SAMTOOLS_INDEX' { + tag = { "${meta.id_index}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" } + ext.args = { params.fasta_largeref ? "-c" : "" } ext.prefix = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}_${meta.reference}" } publishDir = [ enabled: false @@ -538,14 +497,15 @@ process { } withName: BWA_MEM { - tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" } - ext.args = { + tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" } + ext.args = { se_pe_string = meta.single_end ? "SE" : "PE" [ "-k ${params.mapping_bwamem_k}", "-r ${params.mapping_bwamem_r}", - "-R '@RG\\tID:ILLUMINA-${meta.sample_id}_${meta.library_id}\\tSM:${meta.sample_id}\\tLB:${meta.library_id}\\tPL:illumina\\tPU:ILLUMINA-${meta.library_id}-${meta.strandedness}_stranded-${se_pe_string}'" - ].join(' ').trim() } + "-R '@RG\\tID:ILLUMINA-${meta.sample_id}_${meta.library_id}\\tSM:${meta.sample_id}\\tLB:${meta.library_id}\\tPL:illumina\\tPU:ILLUMINA-${meta.library_id}-${meta.strandedness}_stranded-${se_pe_string}'", + ].join(' ').trim() + } ext.prefix = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}_${meta.reference}" } publishDir = [ enabled: false @@ -553,8 +513,8 @@ process { } withName: BOWTIE2_ALIGN { - tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" } - ext.args = { + tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" } + ext.args = { se_pe_string = meta.single_end ? "SE" : "PE" [ "-N ${params.mapping_bowtie2_n}", @@ -567,8 +527,9 @@ process { "--rg LB:${meta.library_id}", "--rg PL:illumina", "--rg PU:ILLUMINA-${meta.sample_id}_${meta.library_id}-${meta.strandedness}_stranded-${se_pe_string}", - "${params.mapping_bowtie2_alignmode}" == 'local' ? "--local --${params.mapping_bowtie2_sensitivity}-local" : ( "${params.mapping_bowtie2_alignmode}" == 'end-to-end' ? "--end-to-end --${params.mapping_bowtie2_sensitivity}" : "" ) - ].join(' ').trim()} + "${params.mapping_bowtie2_alignmode}" == 'local' ? "--local --${params.mapping_bowtie2_sensitivity}-local" : ("${params.mapping_bowtie2_alignmode}" == 'end-to-end' ? "--end-to-end --${params.mapping_bowtie2_sensitivity}" : ""), + ].join(' ').trim() + } ext.prefix = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}_${meta.reference}" } publishDir = [ enabled: false @@ -576,8 +537,8 @@ process { } withName: 'SAMTOOLS_INDEX_MEM|SAMTOOLS_INDEX_BT2' { - tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" } - ext.args = { params.fasta_largeref ? "-c" : "" } + tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" } + ext.args = { params.fasta_largeref ? "-c" : "" } ext.prefix = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}_${meta.reference}" } publishDir = [ enabled: false @@ -585,50 +546,47 @@ process { } withName: SAMTOOLS_MERGE_LANES { - tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } + tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}" } publishDir = [ enabled: false ] - ext.args = { params.run_fastq_sharding ? "-c -p" : "" } + ext.args = { params.run_fastq_sharding ? "-c -p" : "" } } withName: SAMTOOLS_SORT_MERGED_LANES { - tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } + tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}_sorted" } publishDir = [ [ - // data path: { "${params.outdir}/mapping/${params.mapping_tool}/data/" }, mode: params.publish_dir_mode, - pattern: '*.{bam}' + pattern: '*.{bam}', ] ] } withName: SAMTOOLS_INDEX_MERGED_LANES { - tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } - ext.args = { params.fasta_largeref ? "-c" : "" } + tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } + ext.args = { params.fasta_largeref ? "-c" : "" } ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}" } publishDir = [ [ - // data path: { "${params.outdir}/mapping/${params.mapping_tool}/data/" }, mode: params.publish_dir_mode, - pattern: '*.{bai,csi}' + pattern: '*.{bai,csi}', ] ] } withName: SAMTOOLS_FLAGSTAT_MERGED_LANES { - tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } + tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}_sorted" } publishDir = [ [ - // stats path: { "${params.outdir}/mapping/${params.mapping_tool}/stats/" }, mode: params.publish_dir_mode, - pattern: '*.flagstat' + pattern: '*.flagstat', ] ] } @@ -636,26 +594,26 @@ process { // Circular mapping // Configuration for BWA_ALN and BWA_SAMSE/SAMPE is the same as for the non-circular mapping withName: CIRCULARMAPPER_REALIGNSAMFILE { - tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" } - ext.args = { params.mapping_circularmapper_circularfilter ? "-f true -x true" : "" } + tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" } + ext.args = { params.mapping_circularmapper_circularfilter ? "-f true -x true" : "" } ext.prefix = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}_${meta.reference}" } publishDir = [ enabled: false ] } - withName: ".*MAP:CIRCULARMAPPER:FASTQ_ALIGN_BWAALN_ELONGATED:SAMTOOLS_INDEX" { - tag = { "${meta.id_index}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" } - ext.args = { params.fasta_largeref ? "-c" : "" } + withName: '.*MAP:CIRCULARMAPPER:FASTQ_ALIGN_BWAALN_ELONGATED:SAMTOOLS_INDEX' { + tag = { "${meta.id_index}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" } + ext.args = { params.fasta_largeref ? "-c" : "" } ext.prefix = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}_${meta.reference}" } publishDir = [ enabled: false ] } - withName: ".*MAP:CIRCULARMAPPER:SAMTOOLS_INDEX_REALIGNED" { - tag = { "${meta.id_index}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" } - ext.args = { params.fasta_largeref ? "-c" : "" } + withName: '.*MAP:CIRCULARMAPPER:SAMTOOLS_INDEX_REALIGNED' { + tag = { "${meta.id_index}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" } + ext.args = { params.fasta_largeref ? "-c" : "" } ext.prefix = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}_${meta.reference}" } publishDir = [ enabled: false @@ -666,11 +624,11 @@ process { // DEDUPLICATION // withName: PICARD_MARKDUPLICATES { - tag = { "${meta.reference}:${meta.genomic_region}|${meta.sample_id}_${meta.library_id}" } - ext.args = [ + tag = { "${meta.reference}:${meta.genomic_region}|${meta.sample_id}_${meta.library_id}" } + ext.args = [ "--REMOVE_DUPLICATES", "--VALIDATION_STRINGENCY SILENT", - "--ASSUME_SORTED" + "--ASSUME_SORTED", ].join(' ').trim() ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}_${meta.genomic_region}_MarkDuplicates" } publishDir = [ @@ -679,8 +637,8 @@ process { } withName: DEDUP { - tag = { "${meta.reference}:${meta.genomic_region}|${meta.sample_id}_${meta.library_id}" } - ext.args = "--merged" + tag = { "${meta.reference}:${meta.genomic_region}|${meta.sample_id}_${meta.library_id}" } + ext.args = "--merged" ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}_${meta.genomic_region}_DeDup" } publishDir = [ enabled: false @@ -688,31 +646,30 @@ process { } withName: SAMTOOLS_MERGE_DEDUPPED { - tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } + tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}" } publishDir = [ enabled: false ] } - withName: ".*DEDUPLICATE:BUILD_INTERVALS" { + withName: '.*DEDUPLICATE:BUILD_INTERVALS' { publishDir = [ enabled: false ] } // Overwrite default SWF tag and prefix - withName: ".*BAM_SPLIT_BY_REGION:SAMTOOLS_VIEW" { - tag = { "${meta.reference}:${meta.genomic_region}|${meta.sample_id}_${meta.library_id}" } + withName: '.*BAM_SPLIT_BY_REGION:SAMTOOLS_VIEW' { + tag = { "${meta.reference}:${meta.genomic_region}|${meta.sample_id}_${meta.library_id}" } ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}_${meta.genomic_region}_dedupped" } publishDir = [ enabled: false ] } - withName: ".*BAM_SPLIT_BY_REGION:SAMTOOLS_INDEX" { - // The BAM_SPLIT_BY_REGION SWF only works with bais, so `params.fasta_largeref` should not be passed to it. - tag = { "${meta.reference}:${meta.genomic_region}|${meta.sample_id}_${meta.library_id}" } + withName: '.*BAM_SPLIT_BY_REGION:SAMTOOLS_INDEX' { + tag = { "${meta.reference}:${meta.genomic_region}|${meta.sample_id}_${meta.library_id}" } ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}_${meta.genomic_region}_dedupped" } publishDir = [ enabled: false @@ -720,41 +677,38 @@ process { } withName: SAMTOOLS_SORT_DEDUPPED { - tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } + tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}_dedupped" } publishDir = [ [ - // data path: { "${params.outdir}/deduplication/${params.deduplication_tool}/data/" }, mode: params.publish_dir_mode, - pattern: '*.bam' + pattern: '*.bam', ] ] } withName: SAMTOOLS_INDEX_DEDUPPED { - tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } - ext.args = { params.fasta_largeref ? "-c" : "" } + tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } + ext.args = { params.fasta_largeref ? "-c" : "" } ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}_dedupped" } publishDir = [ [ - // data path: { "${params.outdir}/deduplication/${params.deduplication_tool}/data/" }, mode: params.publish_dir_mode, - pattern: '*.{bai,csi}' + pattern: '*.{bai,csi}', ] ] } withName: SAMTOOLS_FLAGSTAT_DEDUPPED { - tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } + tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}_dedupped" } publishDir = [ [ - // stats path: { "${params.outdir}/deduplication/${params.deduplication_tool}/stats/" }, mode: params.publish_dir_mode, - pattern: '*.flagstat' + pattern: '*.flagstat', ] ] } @@ -763,17 +717,18 @@ process { // HOST REMOVAL // withName: HOST_REMOVAL { - tag = { "${meta.reference}|${meta_fastqs.sample_id}_${meta_fastqs.library_id}_L${meta_fastqs.lane}" } - ext.args = {[ - "-m ${params.host_removal_mode}", - "${meta_fastqs.single_end}" == false && "${params.preprocessing_skippairmerging}" == false ? "-merged" : "" - ].join(' ').trim()} + tag = { "${meta.reference}|${meta_fastqs.sample_id}_${meta_fastqs.library_id}_L${meta_fastqs.lane}" } + ext.args = { + [ + "-m ${params.host_removal_mode}", + "${meta_fastqs.single_end}" == false && "${params.preprocessing_skippairmerging}" == false ? "-merged" : "", + ].join(' ').trim() + } publishDir = [ [ - // data path: { "${params.outdir}/read_filtering/host_removal/data/" }, mode: params.publish_dir_mode, - pattern: '*.fq.gz' + pattern: '*.fq.gz', ] ] } @@ -782,25 +737,23 @@ process { // AUTHENTICATION // withName: ENDORSPY { - tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } + tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}" } publishDir = [ [ - // stats path: { "${params.outdir}/authentication/endorspy/stats/" }, mode: params.publish_dir_mode, - pattern: '*.json' + pattern: '*.json', ] ] } withName: PRESEQ_CCURVE { - tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } - ext.args = "-B -s ${params.mapstats_preseq_stepsize}" + tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } + ext.args = "-B -s ${params.mapstats_preseq_stepsize}" ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}" } publishDir = [ [ - // stats path: { "${params.outdir}/authentication/preseq/stats/" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, @@ -809,7 +762,7 @@ process { } withName: PRESEQ_LCEXTRAP { - tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } + tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } ext.args = [ "-B", "-s ${params.mapstats_preseq_stepsize}", @@ -817,12 +770,11 @@ process { "-x ${params.mapstats_preseq_terms}", "-n ${params.mapstats_preseq_bootstrap}", "-c ${params.mapstats_preseq_cval}", - params.mapstats_preseq_defects_mode ? '-D' : '' + params.mapstats_preseq_defects_mode ? '-D' : '', ].join(' ').trim() ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}" } publishDir = [ [ - // stats path: { "${params.outdir}/authentication/preseq/stats/" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, @@ -831,19 +783,18 @@ process { } withName: SAMTOOLS_VIEW_GENOME { - tag = { "${meta.reference}|${meta.sample_id}" } + tag = { "${meta.reference}|${meta.sample_id}" } publishDir = [ enabled: false ] } withName: BEDTOOLS_COVERAGE_DEPTH { - tag = { "${meta.reference}|${meta.sample_id}" } - ext.args = '-mean -nonamecheck' + tag = { "${meta.reference}|${meta.sample_id}" } + ext.args = '-mean -nonamecheck' ext.prefix = { "${meta.sample_id}_${meta.reference}_depth" } publishDir = [ [ - // stats path: { "${params.outdir}/authentication/bedtools/stats/" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, @@ -852,12 +803,11 @@ process { } withName: BEDTOOLS_COVERAGE_BREADTH { - tag = { "${meta.reference}|${meta.sample_id}" } - ext.args = '-nonamecheck' + tag = { "${meta.reference}|${meta.sample_id}" } + ext.args = '-nonamecheck' ext.prefix = { "${meta.sample_id}_${meta.reference}_breadth" } publishDir = [ [ - // stats path: { "${params.outdir}/authentication/bedtools/stats/" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, @@ -867,12 +817,12 @@ process { withName: ANGSD_DOCOUNTS { ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}" } - tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } - ext.args = [ + tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } + ext.args = [ "-iCounts 1", "-r ${params.contamination_estimation_angsd_chrom_name}:${params.contamination_estimation_angsd_range_from}-${params.contamination_estimation_angsd_range_to}", "-minMapQ ${params.contamination_estimation_angsd_mapq}", - "-minQ ${params.contamination_estimation_angsd_minq}" + "-minQ ${params.contamination_estimation_angsd_minq}", ].join(' ').trim() publishDir = [ enabled: false @@ -880,14 +830,13 @@ process { } withName: ANGSD_CONTAMINATION { - tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } + tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}" } publishDir = [ [ - // stats - path: { "${params.outdir}/authentication/angsd_nuclear_contamination/stats/"}, + path: { "${params.outdir}/authentication/angsd_nuclear_contamination/stats/" }, mode: params.publish_dir_mode, - pattern: '*.txt' + pattern: '*.txt', ] ] } @@ -895,26 +844,24 @@ process { withName: PRINT_CONTAMINATION_ANGSD { publishDir = [ [ - // stats - path: { "${params.outdir}/authentication/angsd_nuclear_contamination/stats/"}, + path: { "${params.outdir}/authentication/angsd_nuclear_contamination/stats/" }, mode: params.publish_dir_mode, - pattern: 'nuclear_contamination*' + pattern: 'nuclear_contamination*', ] ] } withName: MTNUCRATIO { - tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } + tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } publishDir = [ enabled: false ] } withName: 'QUALIMAP_BAMQC_WITHBED|QUALIMAP_BAMQC_NOBED' { - tag = { "${meta.reference}|${meta.sample_id}" } + tag = { "${meta.reference}|${meta.sample_id}" } publishDir = [ [ - // stats path: { "${params.outdir}/authentication/qualimap/stats/${meta.reference}/" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, @@ -923,19 +870,19 @@ process { } withName: BBMAP_BBDUK { - tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } - ext.args = { "entropymask=f entropy=${params.metagenomics_complexity_entropy}" } + tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } + ext.args = { "entropymask=f entropy=${params.metagenomics_complexity_entropy}" } ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}_complexity" } publishDir = [ path: { "${params.outdir}/metagenomics/complexity_filter/bbduk/" }, mode: params.publish_dir_mode, pattern: '*.{fastq.gz,log}', - enabled: params.metagenomics_complexity_savefastq + enabled: params.metagenomics_complexity_savefastq, ] } withName: MALT_RUN { - ext.args = [ + ext.args = [ "-m ${params.metagenomics_malt_mode}", "-at ${params.metagenomics_malt_alignmentmode}", "-top ${params.metagenomics_malt_toppercent}", @@ -943,12 +890,12 @@ process { "-mq ${params.metagenomics_malt_maxqueries}", "--memoryMode ${params.metagenomics_malt_memorymode}", params.metagenomics_malt_minsupportmode == "percent" ? "-supp ${params.metagenomics_malt_minsupportpercent}" : "-sup ${params.metagenomics_malt_minsupportreads}", - params.metagenomics_malt_savereads ? "--alignments ./" : "" + params.metagenomics_malt_savereads ? "--alignments ./" : "", ].join(' ').trim() publishDir = [ path: { "${params.outdir}/metagenomics/profiling/malt/" }, mode: params.publish_dir_mode, - pattern: '*.{rma6,log,sam.gz}' + pattern: '*.{rma6,log,sam.gz}', ] ext.prefix = { "${meta.label}_${meta.id}-run" } } @@ -958,29 +905,29 @@ process { publishDir = [ path: { "${params.outdir}/metagenomics/profiling/malt/" }, mode: params.publish_dir_mode, - pattern: '*.{log}' + pattern: '*.{log}', ] } withName: KRAKEN2_KRAKEN2 { - tag = { "${meta.sample_id}|single_end_mode_${meta.single_end}" } - ext.args = [ + tag = { "${meta.sample_id}|single_end_mode_${meta.single_end}" } + ext.args = [ params.metagenomics_kraken2_saveminimizers ? "--report-minimizer-data" : "" ].join(' ').trim() ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}" } publishDir = [ path: { "${params.outdir}/metagenomics/profiling/kraken2/" }, mode: params.publish_dir_mode, - pattern: '*.{txt,fastq.gz}' + pattern: '*.{txt,fastq.gz}', ] } withName: KRAKENUNIQ_PRELOADEDKRAKENUNIQ { - tag = { "single_end_mode_${meta.single_end}" } + tag = { "single_end_mode_${meta.single_end}" } publishDir = [ path: { "${params.outdir}/metagenomics/profiling/krakenuniq/" }, mode: params.publish_dir_mode, - pattern: '*.{txt,fastq.gz}' + pattern: '*.{txt,fastq.gz}', ] ext.prefix = { "${meta.single_end}" } } @@ -989,13 +936,13 @@ process { publishDir = [ path: { "${params.outdir}/metagenomics/profiling/metaphlan/" }, mode: params.publish_dir_mode, - pattern: '*.{biom,txt}' + pattern: '*.{biom,txt}', ] ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}" } } withName: MALTEXTRACT { - ext.args = [ + ext.args = [ "-f ${params.metagenomics_maltextract_filter}", "-a ${params.metagenomics_maltextract_toppercent}", "--minPI ${params.metagenomics_maltextract_minpercentidentity}", @@ -1004,53 +951,53 @@ process { params.metagenomics_maltextract_duplicateremovaloff ? "--dupRemOff" : "", params.metagenomics_maltextract_matches ? "--matches" : "", params.metagenomics_maltextract_megansummary ? "--meganSummary" : "", - params.metagenomics_maltextract_usetopalignment ? "--useTopAlignment" : "", + params.metagenomics_maltextract_usetopalignment ? "--useTopAlignment" : "", { meta.strandedness } == "single" ? '--singleStranded' : '', ].join(' ').trim() publishDir = [ path: { "${params.outdir}/metagenomics/postprocessing/maltextract/" }, mode: params.publish_dir_mode, pattern: 'results', - saveAs: { "${meta.id}" } + saveAs: { "${meta.id}" }, ] } withName: MEGAN_RMA2INFO { - tag = {"${meta.id}"} - ext.args = "-c2c Taxonomy" + tag = { "${meta.id}" } + ext.args = "-c2c Taxonomy" ext.prefix = { "${meta.id}" } publishDir = [ path: { "${params.outdir}/metagenomics/postprocessing/megan_summaries/" }, mode: params.publish_dir_mode, - pattern: '*.{txt.gz,megan}' + pattern: '*.{txt.gz,megan}', ] } withName: AMPS { - publishDir = [ + publishDir = [ path: { "${params.outdir}/metagenomics/postprocessing/maltextract/" }, mode: params.publish_dir_mode, - pattern: 'results' + pattern: 'results', ] - errorStrategy = 'ignore' // required as it fails the run for low reads: https://github.com/rhuebler/HOPS/issues/9 + errorStrategy = 'ignore' } withName: TAXPASTA_MERGE { publishDir = [ path: { "${params.outdir}/metagenomics/postprocessing/taxpasta/" }, mode: params.publish_dir_mode, - pattern: '*.{csv,tsv,ods,xlsx,arrow,parquet,biom}' + pattern: '*.{csv,tsv,ods,xlsx,arrow,parquet,biom}', ] - ext.args = { "--profiler ${meta.profiler} --output ${meta.profiler}_taxpasta_table.tsv" } + ext.args = { "--profiler ${meta.profiler} --output ${meta.profiler}_taxpasta_table.tsv" } } withName: TAXPASTA_STANDARDISE { publishDir = [ path: { "${params.outdir}/metagenomics/postprocessing/taxpasta/" }, mode: params.publish_dir_mode, - pattern: '*.{csv,tsv,ods,xlsx,arrow,parquet,biom}' + pattern: '*.{csv,tsv,ods,xlsx,arrow,parquet,biom}', ] - ext.args = { "--profiler ${meta.profiler} --output ${meta.profiler}taxpasta_table.tsv" } + ext.args = { "--profiler ${meta.profiler} --output ${meta.profiler}taxpasta_table.tsv" } } // @@ -1058,7 +1005,7 @@ process { // withName: 'QUALIMAP_BAMQC_WITHBED|QUALIMAP_BAMQC_NOBED' { - tag = { "${meta.reference}|${meta.sample_id}" } + tag = { "${meta.reference}|${meta.sample_id}" } publishDir = [ path: { "${params.outdir}/mapstats/qualimap/${meta.reference}/" }, mode: params.publish_dir_mode, @@ -1070,16 +1017,15 @@ process { // DAMAGE CALCULATION // withName: DAMAGEPROFILER { - tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } + tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } ext.args = [ "-l ${params.damagecalculation_damageprofiler_length}", "-t ${params.damagecalculation_xaxis}", - "-yaxis_dp_max ${params.damagecalculation_yaxis}" + "-yaxis_dp_max ${params.damagecalculation_yaxis}", ].join(' ').trim() ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}" } publishDir = [ [ - // stats path: { "${params.outdir}/authentication/damageprofiler/stats/" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, @@ -1088,40 +1034,40 @@ process { } withName: CALCULATE_MAPDAMAGE2 { - tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } - ext.args = { [ - "--no-stats", - "-y ${params.damagecalculation_yaxis}", - params.damagecalculation_mapdamage_downsample != 0 ? "-n ${params.damagecalculation_mapdamage_downsample} --downsample-seed=1" : "", - { meta.strandedness } == "single" ? '--single-stranded' : '', - "-m ${params.damagecalculation_xaxis}" - ].join(' ').trim() } + tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } + ext.args = { + [ + "--no-stats", + "-y ${params.damagecalculation_yaxis}", + params.damagecalculation_mapdamage_downsample != 0 ? "-n ${params.damagecalculation_mapdamage_downsample} --downsample-seed=1" : "", + { meta.strandedness } == "single" ? '--single-stranded' : '', + "-m ${params.damagecalculation_xaxis}", + ].join(' ').trim() + } ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}" } publishDir = [ [ - // stats path: { "${params.outdir}/authentication/mapdamage2/stats/" }, mode: params.publish_dir_mode, - pattern: 'results_*/*' + pattern: 'results_*/*', ] ] } withName: SAMTOOLS_DEPTH_SEXDETERRMINE { - tag = { "${meta1.reference}|${meta1.sample_id}" } + tag = { "${meta1.reference}|${meta1.sample_id}" } ext.prefix = { "${meta2.id}_samtoolsdepth" } - ext.args = '-aa -q30 -Q30 -H' + ext.args = '-aa -q30 -Q30 -H' publishDir = [ enabled: false ] } withName: SEXDETERRMINE { - tag = { "${meta.reference}|${meta.sample_id}" } + tag = { "${meta.reference}|${meta.sample_id}" } ext.prefix = { "${meta.reference}_sexdeterrmine" } publishDir = [ [ - // stats path: { "${params.outdir}/authentication/sexdeterrmine/stats/" }, mode: params.publish_dir_mode, pattern: '*{_sexdeterrmine}*', @@ -1136,125 +1082,119 @@ process { ext.prefix = { "${meta.id}.masked" } publishDir = [ [ - // data path: { "${params.outdir}/references/masked_reference/data/" }, mode: params.publish_dir_mode, - pattern: '*.masked.fa' + pattern: '*.masked.fa', ] ] } withName: MAPDAMAGE2 { - tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } - ext.args = { [ - "--rescale", - // "--rescale-out=${task.ext.prefix}.bam", // This doesn't work, because the output is expected to be in the mapdamage results dir. - "--seq-length=${params.damage_manipulation_rescale_seqlength}", - params.damage_manipulation_rescale_length_3p != 0 ? "--rescale-length-3p=${params.damage_manipulation_rescale_length_3p}" : "", - params.damage_manipulation_rescale_length_5p != 0 ? "--rescale-length-5p=${params.damage_manipulation_rescale_length_5p}" : "", - { meta.strandedness } == "single" ? '--single-stranded' : '' - ].join(' ').trim() } + tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } + ext.args = { + [ + "--rescale", + "--seq-length=${params.damage_manipulation_rescale_seqlength}", + params.damage_manipulation_rescale_length_3p != 0 ? "--rescale-length-3p=${params.damage_manipulation_rescale_length_3p}" : "", + params.damage_manipulation_rescale_length_5p != 0 ? "--rescale-length-5p=${params.damage_manipulation_rescale_length_5p}" : "", + { meta.strandedness } == "single" ? '--single-stranded' : '', + ].join(' ').trim() + } ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}_rescaled" } publishDir = [ [ - // data path: { "${params.outdir}/damage_manipulation/mapdamage2/data/" }, mode: params.publish_dir_mode, pattern: 'result*/*.rescaled.bam', - saveAs: { "${meta.sample_id}_${meta.library_id}_${meta.reference}_rescaled.bam" } // Needed to save the bam directly in the output dir instead of within the mapdamage results dir. + saveAs: { "${meta.sample_id}_${meta.library_id}_${meta.reference}_rescaled.bam" }, ], [ - // stats path: { "${params.outdir}/damage_manipulation/mapdamage2/stats/" }, mode: params.publish_dir_mode, - pattern: 'results_*/Stats_out_MCMC_*' - ] + pattern: 'results_*/Stats_out_MCMC_*', + ], ] } withName: SAMTOOLS_INDEX_DAMAGE_RESCALED { - tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } - ext.args = { params.fasta_largeref ? "-c" : "" } + tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } + ext.args = { params.fasta_largeref ? "-c" : "" } publishDir = [ [ - // stats path: { "${params.outdir}/damage_manipulation/mapdamage2/data/" }, mode: params.publish_dir_mode, pattern: '*.{bai,csi}', - saveAs: { "${meta.sample_id}_${meta.library_id}_${meta.reference}_rescaled.bam.bai" } // ext.prefix cannot be used here, so rename for publishing instead. + saveAs: { "${meta.sample_id}_${meta.library_id}_${meta.reference}_rescaled.bam.bai" }, ] ] } withName: PMDTOOLS_FILTER { - tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } - ext.args2 = {[ - "${meta.damage_treatment}" == 'none' ? '--UDGminus' : "${meta.damage_treatment}" == 'half' ? '--UDGhalf' : '--UDGplus' , - "${meta.strandedness}" == 'single' ? '--ss' : '' - ].join(' ').trim()} - ext.args3 = { "-h" } + tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } + ext.args2 = { + [ + "${meta.damage_treatment}" == 'none' ? '--UDGminus' : "${meta.damage_treatment}" == 'half' ? '--UDGhalf' : '--UDGplus', + "${meta.strandedness}" == 'single' ? '--ss' : '', + ].join(' ').trim() + } + ext.args3 = { "-h" } ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}_pmdfiltered" } publishDir = [ [ - // data path: { "${params.outdir}/damage_manipulation/pmdtools/data/" }, mode: params.publish_dir_mode, - pattern: '*_pmdfiltered.bam' + pattern: '*_pmdfiltered.bam', ] ] } withName: SAMTOOLS_INDEX_DAMAGE_FILTERED { - tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } - ext.args = { params.fasta_largeref ? "-c" : "" } + tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } + ext.args = { params.fasta_largeref ? "-c" : "" } publishDir = [ [ - // data path: { "${params.outdir}/damage_manipulation/pmdtools/data/" }, mode: params.publish_dir_mode, - pattern: '*.{bai,csi}' + pattern: '*.{bai,csi}', ] ] } withName: SAMTOOLS_FLAGSTAT_DAMAGE_FILTERED { - tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } + tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}_pmdfiltered" } publishDir = [ [ - // stats path: { "${params.outdir}/damage_manipulation/pmdtools/stats/" }, mode: params.publish_dir_mode, - pattern: '*.flagstat' + pattern: '*.flagstat', ] ] } withName: BAMUTIL_TRIMBAM { - tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } - ext.args = [ + tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } + ext.args = [ params.damage_manipulation_bamutils_softclip ? '-c' : '' ].join(' ').trim() ext.prefix = { params.run_pmd_filtering ? "${meta.sample_id}_${meta.library_id}_${meta.reference}_pmdfiltered_trimmed" : "${meta.sample_id}_${meta.library_id}_${meta.reference}_trimmed" } publishDir = [ [ - // data path: { "${params.outdir}/damage_manipulation/bamutils_trimbam/data/" }, mode: params.publish_dir_mode, - pattern: '*_trimmed.bam' + pattern: '*_trimmed.bam', ] ] } withName: SAMTOOLS_INDEX_DAMAGE_TRIMMED { - tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } - ext.args = { params.fasta_largeref ? "-c" : "" } + tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } + ext.args = { params.fasta_largeref ? "-c" : "" } publishDir = [ [ - // data path: { "${params.outdir}/damage_manipulation/bamutils_trimbam/data/" }, mode: params.publish_dir_mode, - pattern: '*.{bai,csi}' + pattern: '*.{bai,csi}', ] ] } @@ -1263,55 +1203,50 @@ process { // METAGENOMIC SCREENING // withName: PRINSEQPLUSPLUS { - tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } - ext.args = [ - params.metagenomics_prinseq_mode == 'dust' ? "-lc_dust=${params.metagenomics_prinseq_dustscore}" : "-lc_entropy=${params.metagenomics_complexity_entropy}", - "-trim_qual_left=0 -trim_qual_left=0 -trim_qual_window=0 -trim_qual_step=0", - ].join(' ').trim() + tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } + ext.args = [ + params.metagenomics_prinseq_mode == 'dust' ? "-lc_dust=${params.metagenomics_prinseq_dustscore}" : "-lc_entropy=${params.metagenomics_complexity_entropy}", + "-trim_qual_left=0 -trim_qual_left=0 -trim_qual_window=0 -trim_qual_step=0", + ].join(' ').trim() ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}_complexity" } publishDir = [ [ - // data path: { "${params.outdir}/metagenomics/prinseq/data/" }, mode: params.publish_dir_mode, pattern: '*{_good_out.fastq.gz,_good_out_R1.fastq.gz,_good_out_R2.fastq.gz}', - enabled: params.metagenomics_complexity_savefastq + enabled: params.metagenomics_complexity_savefastq, ], [ - // stats path: { "${params.outdir}/metagenomics/prinseq/stats/" }, mode: params.publish_dir_mode, pattern: '*log', - enabled: params.metagenomics_complexity_savefastq - ] + enabled: params.metagenomics_complexity_savefastq, + ], ] } withName: BBMAP_BBDUK { - tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } - ext.args = { "entropymask=f entropy=${params.metagenomics_complexity_entropy}" } + tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } + ext.args = { "entropymask=f entropy=${params.metagenomics_complexity_entropy}" } ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}_complexity" } publishDir = [ [ - // data path: { "${params.outdir}/metagenomics/bbduk/data/" }, mode: params.publish_dir_mode, pattern: '*.fastq.gz', - enabled: params.metagenomics_complexity_savefastq + enabled: params.metagenomics_complexity_savefastq, ], [ - // stats path: { "${params.outdir}/metagenomics/bbduk/stats/" }, mode: params.publish_dir_mode, pattern: '*.log', - enabled: params.metagenomics_complexity_savefastq - ] + enabled: params.metagenomics_complexity_savefastq, + ], ] } withName: MALT_RUN { - // TODO needs a tag - ext.args = [ + ext.args = [ "-m ${params.metagenomics_malt_mode}", "-at ${params.metagenomics_malt_alignmentmode}", "-top ${params.metagenomics_malt_toppercent}", @@ -1319,34 +1254,30 @@ process { "-mq ${params.metagenomics_malt_maxqueries}", "--memoryMode ${params.metagenomics_malt_memorymode}", params.metagenomics_malt_minsupportmode == "percent" ? "-supp ${params.metagenomics_malt_minsupportpercent}" : "-sup ${params.metagenomics_malt_minsupportreads}", - params.metagenomics_malt_savereads ? "--alignments ./" : "" + params.metagenomics_malt_savereads ? "--alignments ./" : "", ].join(' ').trim() ext.prefix = { "${meta.label}_${meta.id}-run" } publishDir = [ [ - // data path: { "${params.outdir}/metagenomics/malt/data/" }, mode: params.publish_dir_mode, - pattern: '*.{rma6,sam.gz}' + pattern: '*.{rma6,sam.gz}', ], [ - // stats path: { "${params.outdir}/metagenomics/malt/stats/" }, mode: params.publish_dir_mode, - pattern: '*.log' - ] + pattern: '*.log', + ], ] } withName: CAT_CAT_MALT { - // TODO needs a tag ext.prefix = { "${meta.id}_runtime_log_concatenated.log" } publishDir = [ [ - // stats path: { "${params.outdir}/metagenomics/malt/stats/" }, mode: params.publish_dir_mode, - pattern: '*.log' + pattern: '*.log', ] ] } @@ -1358,40 +1289,36 @@ process { } withName: KRAKEN2_KRAKEN2 { - ext.args = [ + ext.args = [ params.metagenomics_kraken2_saveminimizers ? "--report-minimizer-data" : "" ].join(' ').trim() ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}" } publishDir = [ [ - // data path: { "${params.outdir}/metagenomics/kraken2/data/" }, mode: params.publish_dir_mode, - pattern: '*.fastq.gz' + pattern: '*.fastq.gz', ], [ - // stats path: { "${params.outdir}/metagenomics/kraken2/stats/" }, mode: params.publish_dir_mode, - pattern: '*.txt' - ] + pattern: '*.txt', + ], ] } withName: KRAKENUNIQ_PRELOADEDKRAKENUNIQ { publishDir = [ [ - // data path: { "${params.outdir}/metagenomics/krakenuniq/data/" }, mode: params.publish_dir_mode, - pattern: '*.fastq.gz' + pattern: '*.fastq.gz', ], [ - // stats path: { "${params.outdir}/metagenomics/krakenuniq/stats/" }, mode: params.publish_dir_mode, - pattern: '*.txt' - ] + pattern: '*.txt', + ], ] } @@ -1399,17 +1326,15 @@ process { ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}" } publishDir = [ [ - // stats path: { "${params.outdir}/metagenomics/metaphlan/stats/" }, mode: params.publish_dir_mode, - pattern: '*.{biom,txt}' + pattern: '*.{biom,txt}', ] ] } withName: MALTEXTRACT { - // TODO needs a tag - ext.args = [ + ext.args = [ "-f ${params.metagenomics_maltextract_filter}", "-a ${params.metagenomics_maltextract_toppercent}", "--minPI ${params.metagenomics_maltextract_minpercentidentity}", @@ -1418,63 +1343,59 @@ process { params.metagenomics_maltextract_duplicateremovaloff ? "--dupRemOff" : "", params.metagenomics_maltextract_matches ? "--matches" : "", params.metagenomics_maltextract_megansummary ? "--meganSummary" : "", - params.metagenomics_maltextract_usetopalignment ? "--useTopAlignment" : "", + params.metagenomics_maltextract_usetopalignment ? "--useTopAlignment" : "", { meta.strandedness } == "single" ? '--singleStranded' : '', ].join(' ').trim() publishDir = [ [ - // stats path: { "${params.outdir}/metagenomics/maltextract/stats/" }, mode: params.publish_dir_mode, pattern: 'results', - saveAs: { "${meta.id}" } + saveAs: { "${meta.id}" }, ] ] } withName: MEGAN_RMA2INFO { - tag = {"${meta.id}"} - ext.args = "-c2c Taxonomy" + tag = { "${meta.id}" } + ext.args = "-c2c Taxonomy" ext.prefix = { "${meta.id}" } publishDir = [ [ - // stats path: { "${params.outdir}/metagenomics/megan_summaries/stats/" }, mode: params.publish_dir_mode, - pattern: '*.{txt.gz,megan}' + pattern: '*.{txt.gz,megan}', ] ] } withName: AMPS { - publishDir = [ + publishDir = [ path: { "${params.outdir}/metagenomics/amps/stats/" }, mode: params.publish_dir_mode, - pattern: 'results' + pattern: 'results', ] - errorStrategy = 'ignore' // required as it fails the run for low reads: https://github.com/rhuebler/HOPS/issues/9 + errorStrategy = 'ignore' } withName: TAXPASTA_MERGE { - ext.args = { "--profiler ${meta.profiler} --output ${meta.profiler}_taxpasta_table.tsv" } + ext.args = { "--profiler ${meta.profiler} --output ${meta.profiler}_taxpasta_table.tsv" } publishDir = [ [ - // stats path: { "${params.outdir}/metagenomics/taxpasta/stats/" }, mode: params.publish_dir_mode, - pattern: '*.{csv,tsv,ods,xlsx,arrow,parquet,biom}' + pattern: '*.{csv,tsv,ods,xlsx,arrow,parquet,biom}', ] ] } withName: TAXPASTA_STANDARDISE { - ext.args = { "--profiler ${meta.profiler} --output ${meta.profiler}taxpasta_table.tsv" } + ext.args = { "--profiler ${meta.profiler} --output ${meta.profiler}taxpasta_table.tsv" } publishDir = [ [ - // stats path: { "${params.outdir}/metagenomics/taxpasta/stats/" }, mode: params.publish_dir_mode, - pattern: '*.{csv,tsv,ods,xlsx,arrow,parquet,biom}' + pattern: '*.{csv,tsv,ods,xlsx,arrow,parquet,biom}', ] ] } @@ -1482,98 +1403,92 @@ process { // // LIBRARY MERGE // - withName: ".*MERGE_LIBRARIES:SAMTOOLS_MERGE_LIBRARIES" { - tag = { "${meta.reference}|${meta.sample_id}" } + withName: '.*MERGE_LIBRARIES:SAMTOOLS_MERGE_LIBRARIES' { + tag = { "${meta.reference}|${meta.sample_id}" } ext.prefix = { "${meta.sample_id}_${meta.reference}_unsorted" } publishDir = [ enabled: false ] } - withName: ".*MERGE_LIBRARIES:SAMTOOLS_SORT_MERGED_LIBRARIES" { - tag = { "${meta.reference}|${meta.sample_id}" } + withName: '.*MERGE_LIBRARIES:SAMTOOLS_SORT_MERGED_LIBRARIES' { + tag = { "${meta.reference}|${meta.sample_id}" } ext.prefix = { "${meta.sample_id}_${meta.reference}" } publishDir = [ [ - // data path: { "${params.outdir}/final_bams/raw/data/" }, mode: params.publish_dir_mode, - pattern: '*.bam' + pattern: '*.bam', ] ] } - withName: ".*MERGE_LIBRARIES:SAMTOOLS_INDEX_MERGED_LIBRARIES" { - tag = { "${meta.reference}|${meta.sample_id}" } - ext.args = { params.fasta_largeref ? "-c" : "" } + withName: '.*MERGE_LIBRARIES:SAMTOOLS_INDEX_MERGED_LIBRARIES' { + tag = { "${meta.reference}|${meta.sample_id}" } + ext.args = { params.fasta_largeref ? "-c" : "" } ext.prefix = { "${meta.sample_id}_${meta.reference}" } publishDir = [ [ - // data path: { "${params.outdir}/final_bams/raw/data/" }, mode: params.publish_dir_mode, - pattern: '*.{bai,csi}' + pattern: '*.{bai,csi}', ] ] } - withName: ".*MERGE_LIBRARIES:SAMTOOLS_FLAGSTAT_MERGED_LIBRARIES" { - tag = { "${meta.reference}|${meta.sample_id}" } + withName: '.*MERGE_LIBRARIES:SAMTOOLS_FLAGSTAT_MERGED_LIBRARIES' { + tag = { "${meta.reference}|${meta.sample_id}" } ext.prefix = { "${meta.sample_id}_${meta.reference}" } publishDir = [ [ - // stats path: { "${params.outdir}/final_bams/raw/stats/" }, mode: params.publish_dir_mode, - pattern: '*.flagstat' + pattern: '*.flagstat', ] ] } - withName: ".*MERGE_LIBRARIES:SAMTOOLS_MERGE_LIBRARIES" { - tag = { "${meta.reference}|${meta.sample_id}" } + withName: '.*MERGE_LIBRARIES:SAMTOOLS_MERGE_LIBRARIES' { + tag = { "${meta.reference}|${meta.sample_id}" } ext.prefix = { "${meta.sample_id}_${meta.reference}_unsorted" } publishDir = [ enabled: false ] } - withName: ".*MERGE_LIBRARIES_GENOTYPING:SAMTOOLS_SORT_MERGED_LIBRARIES" { - tag = { "${meta.reference}|${meta.sample_id}" } + withName: '.*MERGE_LIBRARIES_GENOTYPING:SAMTOOLS_SORT_MERGED_LIBRARIES' { + tag = { "${meta.reference}|${meta.sample_id}" } ext.prefix = { "${meta.sample_id}_${meta.reference}" } publishDir = [ [ - // data path: { "${params.outdir}/final_bams/${params.genotyping_source}/data/" }, mode: params.publish_dir_mode, - pattern: '*.bam' + pattern: '*.bam', ] ] } - withName: ".*MERGE_LIBRARIES_GENOTYPING:SAMTOOLS_INDEX_MERGED_LIBRARIES" { - tag = { "${meta.reference}|${meta.sample_id}" } - ext.args = { params.fasta_largeref ? "-c" : "" } + withName: '.*MERGE_LIBRARIES_GENOTYPING:SAMTOOLS_INDEX_MERGED_LIBRARIES' { + tag = { "${meta.reference}|${meta.sample_id}" } + ext.args = { params.fasta_largeref ? "-c" : "" } ext.prefix = { "${meta.sample_id}_${meta.reference}" } publishDir = [ [ - // data path: { "${params.outdir}/final_bams/${params.genotyping_source}/data/" }, mode: params.publish_dir_mode, - pattern: '*.{bai,csi}' + pattern: '*.{bai,csi}', ] ] } - withName: ".*MERGE_LIBRARIES_GENOTYPING:SAMTOOLS_FLAGSTAT_MERGED_LIBRARIES" { - tag = { "${meta.reference}|${meta.sample_id}" } + withName: '.*MERGE_LIBRARIES_GENOTYPING:SAMTOOLS_FLAGSTAT_MERGED_LIBRARIES' { + tag = { "${meta.reference}|${meta.sample_id}" } ext.prefix = { "${meta.sample_id}_${meta.reference}" } publishDir = [ [ - // stats path: { "${params.outdir}/final_bams/${params.genotyping_source}/stats/" }, mode: params.publish_dir_mode, - pattern: '*.flagstat' + pattern: '*.flagstat', ] ] } @@ -1582,8 +1497,8 @@ process { // GENOTYPING // withName: SAMTOOLS_MPILEUP_PILEUPCALLER { - tag = { "${meta.reference}|${meta.strandedness}" } - ext.args = [ + tag = { "${meta.reference}|${meta.strandedness}" } + ext.args = [ "-B", "-q ${params.genotyping_pileupcaller_min_base_quality}", "-Q ${params.genotyping_pileupcaller_min_map_quality}", @@ -1595,26 +1510,28 @@ process { } withName: SEQUENCETOOLS_PILEUPCALLER { - tag = { "${meta.reference}|${meta.strandedness}" } - ext.args = {[ - "--${params.genotyping_pileupcaller_method}", - params.genotyping_pileupcaller_transitions_mode == "SkipTransitions" ? "--skipTransitions" : params.genotyping_pileupcaller_transitions_mode == "TransitionsMissing" ? "--transitionsMissing" : "", - "${meta.strandedness}" == 'single' ? "--singleStrandMode" : "" , - "--sampleNames", meta.sample_id.join(","), - "-e pileupcaller.${meta.strandedness}.${meta.reference}" - ].join(' ').trim() } + tag = { "${meta.reference}|${meta.strandedness}" } + ext.args = { + [ + "--${params.genotyping_pileupcaller_method}", + params.genotyping_pileupcaller_transitions_mode == "SkipTransitions" ? "--skipTransitions" : params.genotyping_pileupcaller_transitions_mode == "TransitionsMissing" ? "--transitionsMissing" : "", + "${meta.strandedness}" == 'single' ? "--singleStrandMode" : "", + "--sampleNames", + meta.sample_id.join(","), + "-e pileupcaller.${meta.strandedness}.${meta.reference}", + ].join(' ').trim() + } ext.prefix = { "${meta.strandedness}_${meta.reference}" } publishDir = [ - enabled: false // Not published because the output goes through COLLECT_GENOTYPES + enabled: false ] } withName: COLLECT_GENOTYPES { - tag = { "${meta.reference}" } + tag = { "${meta.reference}" } ext.prefix = { "pileupcaller_genotypes_${meta.reference}" } publishDir = [ [ - // data path: { "${params.outdir}/genotyping/pileupcaller/data/" }, mode: params.publish_dir_mode, pattern: '*.{geno,snp,ind}', @@ -1623,12 +1540,11 @@ process { } withName: EIGENSTRATDATABASETOOLS_EIGENSTRATSNPCOVERAGE { - tag = { "${meta.reference}" } - ext.args = { "-j ${prefix}.json" } + tag = { "${meta.reference}" } + ext.args = { "-j ${prefix}.json" } ext.prefix = { "pileupcaller_genotypes_${meta.reference}_coverage" } publishDir = [ [ - // stats path: { "${params.outdir}/genotyping/pileupcaller/stats/" }, mode: params.publish_dir_mode, pattern: '*.tsv', @@ -1637,9 +1553,9 @@ process { } withName: GATK_REALIGNERTARGETCREATOR { - tag = { "${meta.reference}|${meta.sample_id}" } - ext.args = [ - params.genotyping_gatk_ug_defaultbasequalities > 0 ? "--defaultBaseQualities ${params.genotyping_gatk_ug_defaultbasequalities}" : "", // Empty string since GATK complains if its default of -1 is provided. + tag = { "${meta.reference}|${meta.sample_id}" } + ext.args = [ + params.genotyping_gatk_ug_defaultbasequalities > 0 ? "--defaultBaseQualities ${params.genotyping_gatk_ug_defaultbasequalities}" : "" ].join(' ').trim() ext.prefix = { "${meta.sample_id}_${meta.reference}_realigntarget" } publishDir = [ @@ -1648,14 +1564,13 @@ process { } withName: GATK_INDELREALIGNER { - tag = { "${meta.reference}|${meta.sample_id}" } - ext.args = [ - params.genotyping_gatk_ug_defaultbasequalities > 0 ? "--defaultBaseQualities ${params.genotyping_gatk_ug_defaultbasequalities}" : "", // Empty string since GATK complains if its default of -1 is provided. + tag = { "${meta.reference}|${meta.sample_id}" } + ext.args = [ + params.genotyping_gatk_ug_defaultbasequalities > 0 ? "--defaultBaseQualities ${params.genotyping_gatk_ug_defaultbasequalities}" : "" ].join(' ').trim() ext.prefix = { "${meta.sample_id}_${meta.reference}_realigned" } publishDir = [ [ - // data path: { "${params.outdir}/genotyping/gatk_ug/data/" }, mode: params.publish_dir_mode, pattern: '*.{bam,bai}', @@ -1665,19 +1580,20 @@ process { } withName: GATK_UNIFIEDGENOTYPER { - tag = { "${meta.reference}|${meta.sample_id}" } - ext.args = {[ - "--sample_ploidy ${meta2.ploidy}", - "-stand_call_conf ${params.genotyping_gatk_call_conf}", - "-dcov ${params.genotyping_gatk_ug_downsample}", - "--output_mode ${params.genotyping_gatk_ug_out_mode}", - "--genotype_likelihoods_model ${params.genotyping_gatk_ug_genotype_mode}", - params.genotyping_gatk_ug_defaultbasequalities > 0 ? "--defaultBaseQualities ${params.genotyping_gatk_ug_defaultbasequalities}" : "", // Empty string since GATK complains if its default of -1 is provided. - ].join(' ').trim() } + tag = { "${meta.reference}|${meta.sample_id}" } + ext.args = { + [ + "--sample_ploidy ${meta2.ploidy}", + "-stand_call_conf ${params.genotyping_gatk_call_conf}", + "-dcov ${params.genotyping_gatk_ug_downsample}", + "--output_mode ${params.genotyping_gatk_ug_out_mode}", + "--genotype_likelihoods_model ${params.genotyping_gatk_ug_genotype_mode}", + params.genotyping_gatk_ug_defaultbasequalities > 0 ? "--defaultBaseQualities ${params.genotyping_gatk_ug_defaultbasequalities}" : "", + ].join(' ').trim() + } ext.prefix = { "${meta.sample_id}_${meta.reference}" } publishDir = [ [ - // data path: { "${params.outdir}/genotyping/gatk_ug/data/" }, mode: params.publish_dir_mode, pattern: '*.vcf.gz', @@ -1686,12 +1602,11 @@ process { } withName: BCFTOOLS_INDEX_UG { - tag = { "${meta.reference}|${meta.sample_id}" } - ext.args = "--tbi" //tbi indices for consistency with GATK HC + tag = { "${meta.reference}|${meta.sample_id}" } + ext.args = "--tbi" ext.prefix = { "${meta.sample_id}_${meta.reference}" } publishDir = [ [ - // data path: { "${params.outdir}/genotyping/gatk_ug/data/" }, mode: params.publish_dir_mode, pattern: '*.vcf.gz.tbi', @@ -1700,18 +1615,18 @@ process { } withName: GATK4_HAPLOTYPECALLER { - tag = { "${meta.reference}|${meta.sample_id}" } - ext.args = {[ - // Option names have changed from underscore_separated to hyphen-separated in GATK4 - "--sample-ploidy ${meta2.ploidy}", - "-stand-call-conf ${params.genotyping_gatk_call_conf}", - "--output-mode ${params.genotyping_gatk_hc_out_mode}", - "--emit-ref-confidence ${params.genotyping_gatk_hc_emitrefconf}", - ].join(' ').trim() } + tag = { "${meta.reference}|${meta.sample_id}" } + ext.args = { + [ + "--sample-ploidy ${meta2.ploidy}", + "-stand-call-conf ${params.genotyping_gatk_call_conf}", + "--output-mode ${params.genotyping_gatk_hc_out_mode}", + "--emit-ref-confidence ${params.genotyping_gatk_hc_emitrefconf}", + ].join(' ').trim() + } ext.prefix = { "${meta.sample_id}_${meta.reference}" } publishDir = [ [ - // data path: { "${params.outdir}/genotyping/gatk_hc/data/" }, mode: params.publish_dir_mode, pattern: '*.{vcf.gz,vcf.gz.tbi}', @@ -1720,16 +1635,17 @@ process { } withName: FREEBAYES { - tag = { "${meta.reference}|${meta.sample_id}" } - ext.args = {[ - "-p ${ref_meta.ploidy}", - "-C ${params.genotyping_freebayes_min_alternate_count}", - params.genotyping_freebayes_skip_coverage != 0 ? "-g ${params.genotyping_freebayes_skip_coverage}" : "", - ].join(' ').trim() } + tag = { "${meta.reference}|${meta.sample_id}" } + ext.args = { + [ + ref_meta.ploidy ? "-p ${ref_meta.ploidy}" : '', + "-C ${params.genotyping_freebayes_min_alternate_count}", + params.genotyping_freebayes_skip_coverage != 0 ? "-g ${params.genotyping_freebayes_skip_coverage}" : "", + ].join(' ').trim() + } ext.prefix = { "${meta.sample_id}_${meta.reference}" } publishDir = [ [ - // data path: { "${params.outdir}/genotyping/freebayes/data/" }, mode: params.publish_dir_mode, pattern: '*.vcf.gz', @@ -1738,12 +1654,11 @@ process { } withName: BCFTOOLS_INDEX_FREEBAYES { - tag = { "${meta.reference}|${meta.sample_id}" } - ext.args = "--tbi" //tbi indices for consistency with GATK HC + tag = { "${meta.reference}|${meta.sample_id}" } + ext.args = "--tbi" ext.prefix = { "${meta.sample_id}_${meta.reference}" } publishDir = [ [ - // data path: { "${params.outdir}/genotyping/freebayes/data/" }, mode: params.publish_dir_mode, pattern: '*.vcf.gz.tbi', @@ -1752,11 +1667,10 @@ process { } withName: BCFTOOLS_STATS_GENOTYPING { - tag = { "${meta.reference}|${meta.sample_id}" } + tag = { "${meta.reference}|${meta.sample_id}" } ext.prefix = { "${meta.sample_id}_${meta.reference}" } publishDir = [ [ - // stats path: { "${params.outdir}/genotyping/bcftools/stats/" }, mode: params.publish_dir_mode, pattern: '*.txt', @@ -1765,12 +1679,12 @@ process { } withName: ANGSD_GL { - tag = { "${meta.reference}" } - ext.args = { + tag = { "${meta.reference}" } + ext.args = { gl_model = params.genotyping_angsd_glmodel == 'samtools' ? 1 : params.genotyping_angsd_glmodel == 'gatk' ? 2 : params.genotyping_angsd_glmodel == 'soapsnp' ? 3 : 4 gl_format = params.genotyping_angsd_glformat == 'binary' ? 1 : params.genotyping_angsd_glformat == 'beagle_binary' ? 2 : params.genotyping_angsd_glformat == 'binary_three' ? 3 : 4 [ - ( gl_format == 2 || gl_format == 3 ) ? '-doMajorMinor 1': '', + gl_format == 2 || gl_format == 3 ? '-doMajorMinor 1' : '', "-GL ${gl_model}", "-doGlf ${gl_format}", ].join(' ').trim() @@ -1778,7 +1692,6 @@ process { ext.prefix = { "angsd_${meta.reference}" } publishDir = [ [ - // data path: { "${params.outdir}/genotyping/angsd/data/" }, mode: params.publish_dir_mode, pattern: '*.{glf,beagle}.gz', diff --git a/conf/test_microbial.config b/conf/test_microbial.config index dd9eec06..4c45e5db 100644 --- a/conf/test_microbial.config +++ b/conf/test_microbial.config @@ -14,7 +14,7 @@ process { resourceLimits = [ cpus: 4, memory: '15.GB', - time: '1.h' + time: '1.h', ] } @@ -29,7 +29,7 @@ params { fasta_sheet = params.pipelines_testdata_base_path + 'eager/reference/reference_sheet_multiref.csv' // Host Removal - run_host_removal = true + run_host_removal = true // Preprocessing sequencing_qc_tool = 'falco' @@ -50,16 +50,11 @@ params { metagenomics_profiling_database = params.pipelines_testdata_base_path + 'eager/databases/krakenuniq/testdb-krakenuniq.tar.gz' run_host_removal = true - // Manipulate Damage - run_mapdamage_rescaling = true - // Bedtools coverage run_bedtools_coverage = true // Genotyping run_genotyping = true - genotyping_source = 'rescaled' + genotyping_source = 'raw' genotyping_tool = 'freebayes' - - } diff --git a/subworkflows/local/manipulate_damage.nf b/subworkflows/local/manipulate_damage.nf index bc8d96cb..4b8b8e7d 100644 --- a/subworkflows/local/manipulate_damage.nf +++ b/subworkflows/local/manipulate_damage.nf @@ -2,177 +2,171 @@ // Calculate PMD scores, trim, or rescale DNA damage from mapped reads. // -include { addNewMetaFromAttributes } from '../../subworkflows/local/utils_nfcore_eager_pipeline/main' +include { addNewMetaFromAttributes } from '../../subworkflows/local/utils_nfcore_eager_pipeline/main' -include { BEDTOOLS_MASKFASTA } from '../../modules/nf-core/bedtools/maskfasta/main' -include { MAPDAMAGE2 } from '../../modules/nf-core/mapdamage2/main' -include { PMDTOOLS_FILTER } from '../../modules/nf-core/pmdtools/filter/main' -include { BAMUTIL_TRIMBAM } from '../../modules/nf-core/bamutil/trimbam/main' -include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_DAMAGE_RESCALED } from '../../modules/nf-core/samtools/index/main' -include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_DAMAGE_FILTERED } from '../../modules/nf-core/samtools/index/main' -include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_DAMAGE_TRIMMED } from '../../modules/nf-core/samtools/index/main' -include { SAMTOOLS_FLAGSTAT as SAMTOOLS_FLAGSTAT_DAMAGE_FILTERED } from '../../modules/nf-core/samtools/flagstat/main' +include { BEDTOOLS_MASKFASTA } from '../../modules/nf-core/bedtools/maskfasta/main' +include { MAPDAMAGE2 } from '../../modules/nf-core/mapdamage2/main' +include { PMDTOOLS_FILTER } from '../../modules/nf-core/pmdtools/filter/main' +include { BAMUTIL_TRIMBAM } from '../../modules/nf-core/bamutil/trimbam/main' +include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_DAMAGE_RESCALED } from '../../modules/nf-core/samtools/index/main' +include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_DAMAGE_FILTERED } from '../../modules/nf-core/samtools/index/main' +include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_DAMAGE_TRIMMED } from '../../modules/nf-core/samtools/index/main' +include { SAMTOOLS_FLAGSTAT as SAMTOOLS_FLAGSTAT_DAMAGE_FILTERED } from '../../modules/nf-core/samtools/flagstat/main' // TODO: Add required channels and channel manipulations for reference-dependent bed masking before pmdtools. Requires multi-ref support before implementation. workflow MANIPULATE_DAMAGE { take: - ch_bam_bai // [ [ meta ], bam , bai ] - ch_fasta // [ [ meta ], fasta ] - ch_pmd_masking // [ [ meta ], masked_fasta, bed_for_masking ] + ch_bam_bai // [ [ meta ], bam , bai ] + ch_fasta // [ [ meta ], fasta ] + ch_fasta_elongated // [ [ meta ], fasta ] + ch_pmd_masking // [ [ meta ], masked_fasta, bed_for_masking ] main: - ch_versions = Channel.empty() - ch_rescaled_bams = Channel.empty() - ch_pmd_filtered_bams = Channel.empty() - ch_trimmed_bams = Channel.empty() - ch_pmd_filtered_flagstat = Channel.empty() // Only run flagstat on pmd filtered bam, since rescaling and trimming does not change the number of reads + ch_versions = Channel.empty() + ch_rescaled_bams = Channel.empty() + ch_pmd_filtered_bams = Channel.empty() + ch_trimmed_bams = Channel.empty() + ch_pmd_filtered_flagstat = Channel.empty() + // Only run flagstat on pmd filtered bam, since rescaling and trimming does not change the number of reads // Ensure correct reference is associated with each bam_bai pair - ch_refs = ch_fasta - .map { + if (params.run_mapdamage_rescaling) { + ch_refs = ch_fasta_elongated.map { // Prepend a new meta that contains the meta.id value as the new_meta.reference attribute - addNewMetaFromAttributes( it, "id" , "reference" , false ) + addNewMetaFromAttributes(it, "id", "reference", false) } + } + else { + ch_refs = ch_fasta.map { + // Prepend a new meta that contains the meta.id value as the new_meta.reference attribute + addNewMetaFromAttributes(it, "id", "reference", false) + } + } + ch_input_for_damage_manipulation = ch_bam_bai .map { // Prepend a new meta that contains the meta.reference value as the new_meta.reference attribute - addNewMetaFromAttributes( it, "reference" , "reference" , false ) + addNewMetaFromAttributes(it, "reference", "reference", false) } - .combine( ch_refs, by: 0 ) // [ [combine_meta], [meta], bam, bai, [ref_meta], fasta ] - - if ( params.run_mapdamage_rescaling ) { - ch_mapdamage_prep = ch_input_for_damage_manipulation - .branch { - ignore_me, meta, bam, bai, ref_meta, fasta -> - skip: meta.damage_treatment == 'full' - no_skip: true - } + .combine(ch_refs, by: 0) + // [ [combine_meta], [meta], bam, bai, [ref_meta], fasta ] - ch_skip_rescale = ch_mapdamage_prep.skip - .map { - ignore_me, meta, bam, bai, ref_meta, fasta -> - [ meta, bam, bai ] - } + if (params.run_mapdamage_rescaling) { + ch_mapdamage_prep = ch_input_for_damage_manipulation.branch { ignore_me, meta, bam, bai, ref_meta, fasta -> + skip: meta.damage_treatment == 'full' + no_skip: true + } - ch_mapdamage_input = ch_mapdamage_prep.no_skip - .multiMap { - ignore_me, meta, bam, bai, ref_meta, fasta -> - bam: [ meta, bam ] - fasta: fasta - } + ch_skip_rescale = ch_mapdamage_prep.skip.map { ignore_me, meta, bam, bai, ref_meta, fasta -> + [meta, bam, bai] + } + + ch_mapdamage_input = ch_mapdamage_prep.no_skip.multiMap { ignore_me, meta, bam, bai, ref_meta, fasta -> + bam: [meta, bam] + fasta: fasta + } - MAPDAMAGE2( ch_mapdamage_input.bam, ch_mapdamage_input.fasta ) - ch_versions = ch_versions.mix( MAPDAMAGE2.out.versions.first() ) + MAPDAMAGE2(ch_mapdamage_input.bam, ch_mapdamage_input.fasta) + ch_versions = ch_versions.mix(MAPDAMAGE2.out.versions.first()) - SAMTOOLS_INDEX_DAMAGE_RESCALED( MAPDAMAGE2.out.rescaled ) - ch_versions = ch_versions.mix( SAMTOOLS_INDEX_DAMAGE_RESCALED.out.versions.first() ) + SAMTOOLS_INDEX_DAMAGE_RESCALED(MAPDAMAGE2.out.rescaled) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX_DAMAGE_RESCALED.out.versions.first()) ch_rescaled_index = params.fasta_largeref ? SAMTOOLS_INDEX_DAMAGE_RESCALED.out.csi : SAMTOOLS_INDEX_DAMAGE_RESCALED.out.bai // TODO When adding library-level data merging pre-genotyping, make sure that rescaled bams are not merged in any way as the underlying damage model could differ between libraries - ch_rescaled_bams = MAPDAMAGE2.out.rescaled.join(ch_rescaled_index) - .mix(ch_skip_rescale) // Should these be mixed actually, or excluded? Might not make sense to take rescaled and non-rescaled bams togetehr for anything downstream. + ch_rescaled_bams = MAPDAMAGE2.out.rescaled + .join(ch_rescaled_index) + .mix(ch_skip_rescale) } - if ( params.run_pmd_filtering ) { + if (params.run_pmd_filtering) { ch_masking_prep = ch_pmd_masking - .combine( ch_fasta, by: 0 ) // [ [meta], masked_fasta, bed, fasta ] - .branch { - meta, masked_fasta, bed, fasta -> - alreadymasked: masked_fasta != "" - tobemasked: masked_fasta == "" && bed != "" - nomasking: masked_fasta == "" && bed == "" - } - - ch_masking_input = ch_masking_prep.tobemasked - .multiMap{ - meta, masked_fasta, bed, fasta -> - bed: [ meta, bed ] - fasta: fasta - } - - BEDTOOLS_MASKFASTA( ch_masking_input.bed, ch_masking_input.fasta ) + .combine(ch_fasta, by: 0) + .branch { meta, masked_fasta, bed, fasta -> + alreadymasked: masked_fasta != "" + tobemasked: masked_fasta == "" && bed != "" + nomasking: masked_fasta == "" && bed == "" + } + + ch_masking_input = ch_masking_prep.tobemasked.multiMap { meta, masked_fasta, bed, fasta -> + bed: [meta, bed] + fasta: fasta + } + + BEDTOOLS_MASKFASTA(ch_masking_input.bed, ch_masking_input.fasta) ch_masking_output = BEDTOOLS_MASKFASTA.out.fasta - ch_versions = ch_versions.mix( BEDTOOLS_MASKFASTA.out.versions.first() ) - - ch_already_masked = ch_masking_prep.alreadymasked - .map { - meta, masked_fasta, bed, fasta -> - [ meta, masked_fasta ] - } - - ch_no_masking = ch_masking_prep.nomasking - .map { - meta, masked_fasta, bed, fasta -> - [ meta, fasta ] - } - - ch_pmd_fastas = ch_masking_output.mix( ch_already_masked, ch_no_masking ) - .map { - // Prepend a new meta that contains the meta.id value as the new_meta.reference attribute - addNewMetaFromAttributes( it, "id" , "reference" , false ) - } + ch_versions = ch_versions.mix(BEDTOOLS_MASKFASTA.out.versions.first()) + + ch_already_masked = ch_masking_prep.alreadymasked.map { meta, masked_fasta, bed, fasta -> + [meta, masked_fasta] + } + + ch_no_masking = ch_masking_prep.nomasking.map { meta, masked_fasta, bed, fasta -> + [meta, fasta] + } + + ch_pmd_fastas = ch_masking_output + .mix(ch_already_masked, ch_no_masking) + .map { + // Prepend a new meta that contains the meta.id value as the new_meta.reference attribute + addNewMetaFromAttributes(it, "id", "reference", false) + } ch_pmdtools_input = ch_bam_bai - .map { addNewMetaFromAttributes( it, "reference" , "reference" , false ) } - .combine( ch_pmd_fastas, by: 0 ) // [ [combine_meta], [meta], bam, bai, [ref_meta] fasta ] - .multiMap { - combine_meta, meta, bam, bai, ref_meta, fasta -> - bam: [ meta, bam, bai ] - fasta: fasta - } - - PMDTOOLS_FILTER( ch_pmdtools_input.bam, params.damage_manipulation_pmdtools_threshold, ch_pmdtools_input.fasta ) - ch_versions = ch_versions.mix( PMDTOOLS_FILTER.out.versions.first() ) - - SAMTOOLS_INDEX_DAMAGE_FILTERED( PMDTOOLS_FILTER.out.bam ) - ch_versions = ch_versions.mix( SAMTOOLS_INDEX_DAMAGE_FILTERED.out.versions.first() ) + .map { addNewMetaFromAttributes(it, "reference", "reference", false) } + .combine(ch_pmd_fastas, by: 0) + .multiMap { combine_meta, meta, bam, bai, ref_meta, fasta -> + bam: [meta, bam, bai] + fasta: fasta + } + + PMDTOOLS_FILTER(ch_pmdtools_input.bam, params.damage_manipulation_pmdtools_threshold, ch_pmdtools_input.fasta) + ch_versions = ch_versions.mix(PMDTOOLS_FILTER.out.versions.first()) + + SAMTOOLS_INDEX_DAMAGE_FILTERED(PMDTOOLS_FILTER.out.bam) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX_DAMAGE_FILTERED.out.versions.first()) ch_filtered_index = params.fasta_largeref ? SAMTOOLS_INDEX_DAMAGE_FILTERED.out.csi : SAMTOOLS_INDEX_DAMAGE_FILTERED.out.bai - ch_pmd_filtered_bams = PMDTOOLS_FILTER.out.bam.join( ch_filtered_index ) + ch_pmd_filtered_bams = PMDTOOLS_FILTER.out.bam.join(ch_filtered_index) - SAMTOOLS_FLAGSTAT_DAMAGE_FILTERED( ch_pmd_filtered_bams ) + SAMTOOLS_FLAGSTAT_DAMAGE_FILTERED(ch_pmd_filtered_bams) ch_pmd_filtered_flagstat = SAMTOOLS_FLAGSTAT_DAMAGE_FILTERED.out.flagstat - ch_versions = ch_versions.mix( SAMTOOLS_FLAGSTAT_DAMAGE_FILTERED.out.versions.first() ) + ch_versions = ch_versions.mix(SAMTOOLS_FLAGSTAT_DAMAGE_FILTERED.out.versions.first()) } - if ( params.run_trim_bam ) { - if ( params.run_pmd_filtering ) { - ch_to_trim = ch_pmd_filtered_bams - .map{ - meta, bam, bai -> - [ meta, bam ] - } - } else { - ch_to_trim = ch_bam_bai - .map { - meta, bam, bai -> - [ meta, bam ] - } + if (params.run_trim_bam) { + if (params.run_pmd_filtering) { + ch_to_trim = ch_pmd_filtered_bams.map { meta, bam, bai -> + [meta, bam] + } } - - ch_trimbam_input = ch_to_trim - .map { - meta, bam -> - trim_left = meta.strandedness == 'single' ? ( meta.damage_treatment == 'none' ? params.damage_manipulation_bamutils_trim_single_stranded_none_udg_left : meta.damage_treatment == 'half' ? params.damage_manipulation_bamutils_trim_single_stranded_half_udg_left : 0 ) : ( meta.damage_treatment == 'none' ? params.damage_manipulation_bamutils_trim_double_stranded_none_udg_left : meta.damage_treatment == 'half' ? params.damage_manipulation_bamutils_trim_double_stranded_half_udg_left : 0 ) - trim_right = meta.strandedness == 'single' ? ( meta.damage_treatment == 'none' ? params.damage_manipulation_bamutils_trim_single_stranded_none_udg_right : meta.damage_treatment == 'half' ? params.damage_manipulation_bamutils_trim_single_stranded_half_udg_right : 0 ) : ( meta.damage_treatment == 'none' ? params.damage_manipulation_bamutils_trim_double_stranded_none_udg_right : meta.damage_treatment == 'half' ? params.damage_manipulation_bamutils_trim_double_stranded_half_udg_right : 0 ) - [ meta, bam, trim_left, trim_right ] + else { + ch_to_trim = ch_bam_bai.map { meta, bam, bai -> + [meta, bam] } + } + + ch_trimbam_input = ch_to_trim.map { meta, bam -> + def trim_left = meta.strandedness == 'single' ? (meta.damage_treatment == 'none' ? params.damage_manipulation_bamutils_trim_single_stranded_none_udg_left : meta.damage_treatment == 'half' ? params.damage_manipulation_bamutils_trim_single_stranded_half_udg_left : 0) : (meta.damage_treatment == 'none' ? params.damage_manipulation_bamutils_trim_double_stranded_none_udg_left : meta.damage_treatment == 'half' ? params.damage_manipulation_bamutils_trim_double_stranded_half_udg_left : 0) + def trim_right = meta.strandedness == 'single' ? (meta.damage_treatment == 'none' ? params.damage_manipulation_bamutils_trim_single_stranded_none_udg_right : meta.damage_treatment == 'half' ? params.damage_manipulation_bamutils_trim_single_stranded_half_udg_right : 0) : (meta.damage_treatment == 'none' ? params.damage_manipulation_bamutils_trim_double_stranded_none_udg_right : meta.damage_treatment == 'half' ? params.damage_manipulation_bamutils_trim_double_stranded_half_udg_right : 0) + [meta, bam, trim_left, trim_right] + } - BAMUTIL_TRIMBAM( ch_trimbam_input ) - ch_versions = ch_versions.mix( BAMUTIL_TRIMBAM.out.versions.first() ) + BAMUTIL_TRIMBAM(ch_trimbam_input) + ch_versions = ch_versions.mix(BAMUTIL_TRIMBAM.out.versions.first()) - SAMTOOLS_INDEX_DAMAGE_TRIMMED( BAMUTIL_TRIMBAM.out.bam ) - ch_versions = ch_versions.mix( SAMTOOLS_INDEX_DAMAGE_TRIMMED.out.versions.first() ) + SAMTOOLS_INDEX_DAMAGE_TRIMMED(BAMUTIL_TRIMBAM.out.bam) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX_DAMAGE_TRIMMED.out.versions.first()) ch_trimmed_index = params.fasta_largeref ? SAMTOOLS_INDEX_DAMAGE_TRIMMED.out.csi : SAMTOOLS_INDEX_DAMAGE_TRIMMED.out.bai - ch_trimmed_bams = BAMUTIL_TRIMBAM.out.bam.join( ch_trimmed_index ) + ch_trimmed_bams = BAMUTIL_TRIMBAM.out.bam.join(ch_trimmed_index) } emit: - rescaled = ch_rescaled_bams // [ meta, bam, bai ] - filtered = ch_pmd_filtered_bams // [ meta, bam, bai ] - trimmed = ch_trimmed_bams // [ meta, bam, bai ] + rescaled = ch_rescaled_bams // [ meta, bam, bai ] + filtered = ch_pmd_filtered_bams // [ meta, bam, bai ] + trimmed = ch_trimmed_bams // [ meta, bam, bai ] flagstat = ch_pmd_filtered_flagstat // [ meta, flagstat ] versions = ch_versions } diff --git a/workflows/eager.nf b/workflows/eager.nf index dd98a790..104e4057 100644 --- a/workflows/eager.nf +++ b/workflows/eager.nf @@ -433,9 +433,9 @@ workflow EAGER { ch_versions = ch_versions.mix(PRESEQ_CCURVE.out.versions) } else if (!params.mapstats_skip_preseq && params.mapstats_preseq_mode == 'lc_extrap') { - PRESEQ_LCEXTRAP(ch_reads_for_deduplication.map { [it[0], it[1]] }) - ch_multiqc_files = ch_multiqc_files.mix(PRESEQ_LCEXTRAP.out.lc_extrap.collect { it[1] }.ifEmpty([])) - ch_versions = ch_versions.mix(PRESEQ_LCEXTRAP.out.versions) + PRESEQ_LCEXTRAP(ch_reads_for_deduplication.map { [it[0], it[1]] }) + ch_multiqc_files = ch_multiqc_files.mix(PRESEQ_LCEXTRAP.out.lc_extrap.collect { it[1] }.ifEmpty([])) + ch_versions = ch_versions.mix(PRESEQ_LCEXTRAP.out.versions) } // @@ -525,7 +525,7 @@ workflow EAGER { // if (params.run_mapdamage_rescaling || params.run_pmd_filtering || params.run_trim_bam) { - MANIPULATE_DAMAGE(ch_dedupped_bams, ch_fasta_for_deduplication.fasta, REFERENCE_INDEXING.out.pmd_masking) + MANIPULATE_DAMAGE(ch_dedupped_bams, ch_fasta_for_deduplication.fasta, REFERENCE_INDEXING.out.reference, REFERENCE_INDEXING.out.pmd_masking) ch_multiqc_files = ch_multiqc_files.mix(MANIPULATE_DAMAGE.out.flagstat.collect { it[1] }.ifEmpty([])) ch_versions = ch_versions.mix(MANIPULATE_DAMAGE.out.versions) ch_bams_for_library_merge = params.genotyping_source == 'rescaled' ? MANIPULATE_DAMAGE.out.rescaled : params.genotyping_source == 'pmd' ? MANIPULATE_DAMAGE.out.filtered : params.genotyping_source == 'trimmed' ? MANIPULATE_DAMAGE.out.trimmed : ch_merged_dedup_bams @@ -552,7 +552,7 @@ workflow EAGER { ch_bams_for_genotyping, ch_reference_for_genotyping, REFERENCE_INDEXING.out.pileupcaller_bed_snp.ifEmpty([[], [], []]), - REFERENCE_INDEXING.out.dbsnp.ifEmpty([[], []]) + REFERENCE_INDEXING.out.dbsnp.ifEmpty([[], []]), ) ch_versions = ch_versions.mix(GENOTYPE.out.versions) @@ -565,9 +565,9 @@ workflow EAGER { softwareVersionsToYAML(ch_versions) .collectFile( storeDir: "${params.outdir}/pipeline_info", - name: 'nf_core_' + 'eager_software_' + 'mqc_' + 'versions.yml', + name: 'nf_core_' + 'eager_software_' + 'mqc_' + 'versions.yml', sort: true, - newLine: true + newLine: true, ) .set { ch_collated_versions } @@ -605,7 +605,7 @@ workflow EAGER { ch_multiqc_files = ch_multiqc_files.mix( ch_methods_description.collectFile( name: 'methods_description_mqc.yaml', - sort: true + sort: true, ) ) @@ -619,7 +619,7 @@ workflow EAGER { ch_multiqc_custom_config.toList(), ch_multiqc_logo.toList(), [], - [] + [], ) emit: From 89262a279ecb20f104f886ba7a94882cbd122374 Mon Sep 17 00:00:00 2001 From: Judith Ballesteros <judith.vballesteros@gmail.com> Date: Fri, 28 Mar 2025 10:25:51 +0100 Subject: [PATCH 21/24] Update conf/modules.config Co-authored-by: Ian Light-Maka <86308592+ilight1542@users.noreply.github.com> --- conf/modules.config | 2 ++ 1 file changed, 2 insertions(+) diff --git a/conf/modules.config b/conf/modules.config index c102e4b9..4cce66c0 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -1619,6 +1619,8 @@ process { ext.args = { [ "--sample-ploidy ${meta2.ploidy}", + " ref_meta.ploidy ? "--sample-ploidy ${ref_meta.ploidy}" : "--sample-ploidy 2", + "-stand-call-conf ${params.genotyping_gatk_call_conf}", "--output-mode ${params.genotyping_gatk_hc_out_mode}", "--emit-ref-confidence ${params.genotyping_gatk_hc_emitrefconf}", From 04c75a174cb1fe63013060a9641099f12d0550e4 Mon Sep 17 00:00:00 2001 From: Judith Ballesteros <judith.vballesteros@gmail.com> Date: Fri, 28 Mar 2025 10:25:58 +0100 Subject: [PATCH 22/24] Update conf/modules.config Co-authored-by: Ian Light-Maka <86308592+ilight1542@users.noreply.github.com> --- conf/modules.config | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index 4cce66c0..3e91fe13 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -1583,7 +1583,9 @@ process { tag = { "${meta.reference}|${meta.sample_id}" } ext.args = { [ - "--sample_ploidy ${meta2.ploidy}", + "--sample-ploidy ${meta2.ploidy}", + " ref_meta.ploidy ? "--sample-ploidy ${ref_meta.ploidy}" : "--sample-ploidy 2", + "-stand_call_conf ${params.genotyping_gatk_call_conf}", "-dcov ${params.genotyping_gatk_ug_downsample}", "--output_mode ${params.genotyping_gatk_ug_out_mode}", From 13bc21a2c213b540d6d75d8ebd9cae17113abfcc Mon Sep 17 00:00:00 2001 From: Judith Ballesteros <judith.vballesteros@gmail.com> Date: Fri, 28 Mar 2025 11:32:32 +0100 Subject: [PATCH 23/24] Update conf/modules.config Co-authored-by: James A. Fellows Yates <jfy133@gmail.com> --- conf/modules.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index cc96d02f..21a34e65 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -1617,7 +1617,7 @@ process { ext.args = { [ "--sample-ploidy ${meta2.ploidy}", - " ref_meta.ploidy ? "--sample-ploidy ${ref_meta.ploidy}" : "--sample-ploidy 2", + ref_meta.ploidy ? "--sample-ploidy ${ref_meta.ploidy}" : "--sample-ploidy 2", "-stand_call_conf ${params.genotyping_gatk_call_conf}", "-dcov ${params.genotyping_gatk_ug_downsample}", From 396fd258ec32aada46cec4b16df5d38b695dd532 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" <jfy133@gmail.com> Date: Fri, 28 Mar 2025 10:49:42 +0000 Subject: [PATCH 24/24] And for haplotypecaller --- conf/modules.config | 31 +++++++++++++------------------ 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index cc96d02f..e9d1b05f 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -331,17 +331,17 @@ process { // BAM INPUT // - + withName: SAMTOOLS_INDEX_BAM_INPUT { - tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" } + tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" } publishDir = [ enabled: false ] } withName: SAMTOOLS_MERGE_LANES_BAMINPUT { - tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } - ext.args = { params.run_fastq_sharding ? "-c -p" : "" } + tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } + ext.args = { params.run_fastq_sharding ? "-c -p" : "" } ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}" } publishDir = [ enabled: false @@ -349,36 +349,33 @@ process { } withName: SAMTOOLS_SORT_MERGED_LANES_BAMINPUT { - tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } + tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}_sorted" } publishDir = [ - // data path: { "${params.outdir}/mapping/${params.mapping_tool}/data/" }, mode: params.publish_dir_mode, - pattern: '*.{bam}' + pattern: '*.{bam}', ] } withName: SAMTOOLS_INDEX_MERGED_LANES_BAMINPUT { - tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } - ext.args = { params.fasta_largeref ? "-c" : "" } + tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } + ext.args = { params.fasta_largeref ? "-c" : "" } ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}" } publishDir = [ - // data path: { "${params.outdir}/mapping/${params.mapping_tool}/data/" }, mode: params.publish_dir_mode, - pattern: '*.{bai,csi}' + pattern: '*.{bai,csi}', ] } withName: SAMTOOLS_FLAGSTAT_MERGED_LANES_BAMINPUT { - tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } + tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}_sorted" } publishDir = [ - // stats path: { "${params.outdir}/mapping/bam_input/stats/" }, mode: params.publish_dir_mode, - pattern: '*.flagstat' + pattern: '*.flagstat', ] } @@ -1617,8 +1614,7 @@ process { ext.args = { [ "--sample-ploidy ${meta2.ploidy}", - " ref_meta.ploidy ? "--sample-ploidy ${ref_meta.ploidy}" : "--sample-ploidy 2", - + ref_meta.ploidy ? "--sample-ploidy ${ref_meta.ploidy}" : "--sample-ploidy 2", "-stand_call_conf ${params.genotyping_gatk_call_conf}", "-dcov ${params.genotyping_gatk_ug_downsample}", "--output_mode ${params.genotyping_gatk_ug_out_mode}", @@ -1654,8 +1650,7 @@ process { ext.args = { [ "--sample-ploidy ${meta2.ploidy}", - " ref_meta.ploidy ? "--sample-ploidy ${ref_meta.ploidy}" : "--sample-ploidy 2", - + ref_meta.ploidy ? "--sample-ploidy ${ref_meta.ploidy}" : "--sample-ploidy 2", "-stand-call-conf ${params.genotyping_gatk_call_conf}", "--output-mode ${params.genotyping_gatk_hc_out_mode}", "--emit-ref-confidence ${params.genotyping_gatk_hc_emitrefconf}",