From 7102af43231abcca40d30e0c4968d7b927976277 Mon Sep 17 00:00:00 2001 From: Adam Rosenbaum Date: Mon, 14 Oct 2024 15:18:40 +0200 Subject: [PATCH] Fix config for modules nanoq and filtlong in modules.config --- conf/modules.config | 47 +++++++++++++++++++- nextflow.config | 7 ++- nextflow_schema.json | 19 +++++++- subworkflows/local/longread_preprocessing.nf | 20 +++++---- 4 files changed, 79 insertions(+), 14 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index b226ba01..c3fb6a44 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -192,17 +192,40 @@ process { "--min_length ${params.longreads_min_length}", "--keep_percent ${params.longreads_keep_percent}", "--trim", - "--length_weight ${params.longreads_length_weight}" + "--length_weight ${params.longreads_length_weight}", + params.longreads_min_quality ? "--min_mean_q ${params.longreads_min_quality}" : '', ].join(' ').trim() publishDir = [ path: { "${params.outdir}/QC_longreads/Filtlong" }, mode: params.publish_dir_mode, pattern: "*_filtlong.fastq.gz", - enabled: params.save_filtlong_reads + enabled: params.save_filtered_reads ] ext.prefix = { "${meta.id}_run${meta.run}_filtlong" } } + withName: NANOQ { + ext.args = [ + "--min-len ${params.longreads_min_length}", + params.longreads_min_quality ? "--min-qual ${params.longreads_min_quality}": '', + "-vv" + ].join(' ').trim() + publishDir = [ + [ + path: { "${params.outdir}/QC_longreads/Nanoq" }, + mode: params.publish_dir_mode, + pattern: "*_nanoq_filtered.fastq.gz", + enabled: params.save_filtered_reads + ], + [ + path: { "${params.outdir}/QC_longreads/Nanoq" }, + mode: params.publish_dir_mode, + pattern: "*_nanoq_filtered.stats" + ] + ] + ext.prefix = { "${meta.id}_run${meta.run}_nanoq_filtered" } + } + withName: NANOLYSE { publishDir = [ [ @@ -220,6 +243,26 @@ process { ext.prefix = { "${meta.id}_run${meta.run}_lambdafiltered" } } + withName: CHOPPER { + ext.args2 = [ + "--contam ${params.lambda_reference}" + ].join(' ').trim() + publishDir = [ + [ + path: { "${params.outdir}/QC_longreads/Chopper" }, + mode: params.publish_dir_mode, + pattern: "*.log" + ], + [ + path: { "${params.outdir}/QC_longreads/Chopper" }, + mode: params.publish_dir_mode, + pattern: "*_chopper.fastq.gz", + enabled: params.save_lambdaremoved_reads + ] + ] + ext.prefix = { "${meta.id}_run${meta.run}_chopper" } + } + withName: NANOPLOT_RAW { ext.prefix = 'raw' ext.args = { diff --git a/nextflow.config b/nextflow.config index b6d281d0..822656b3 100644 --- a/nextflow.config +++ b/nextflow.config @@ -28,7 +28,9 @@ params { adapterremoval_trim_quality_stretch = false keep_phix = false // long read preprocessing options - longread_adaptertrimming_tool = "porechop_abi" + longread_adaptertrimming_tool = "porechop_abi" + longread_phageremoval_tool = "chopper" + longread_filtering_tool = "filtlong" // phix_reference = "ftp://ftp.ncbi.nlm.nih.gov/genomes/genbank/viral/Enterobacteria_phage_phiX174_sensu_lato/all_assembly_versions/GCA_002596845.1_ASM259684v1/GCA_002596845.1_ASM259684v1_genomic.fna.gz" phix_reference = "${baseDir}/assets/data/GCA_002596845.1_ASM259684v1_genomic.fna.gz" save_phixremoved_reads = false @@ -102,6 +104,7 @@ params { // long read preprocessing options skip_adapter_trimming = false keep_lambda = false + longreads_min_quality = null longreads_min_length = 1000 longreads_keep_percent = 90 longreads_length_weight = 10 @@ -109,7 +112,7 @@ params { lambda_reference = "${baseDir}/assets/data/GCA_000840245.1_ViralProj14204_genomic.fna.gz" save_lambdaremoved_reads = false save_porechop_reads = false - save_filtlong_reads = false + save_filtered_reads = false // binning options skip_metabat2 = false diff --git a/nextflow_schema.json b/nextflow_schema.json index b4809d15..6f81582e 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -453,6 +453,11 @@ "default": 1000, "description": "Discard any read which is shorter than this value." }, + "longreads_min_quality": { + "type": "integer", + "default": null, + "description": "Discard any read which has a mean quality score lower than this value." + }, "longreads_keep_percent": { "type": "integer", "default": 90, @@ -482,7 +487,7 @@ "type": "boolean", "description": "Specify to save the resulting clipped FASTQ files to --outdir." }, - "save_filtlong_reads": { + "save_filtered_reads": { "type": "boolean", "description": "Specify to save the resulting length filtered FASTQ files to --outdir." }, @@ -491,6 +496,18 @@ "description": "Specify which long read adapter trimming tool to use.", "enum": ["porechop", "porechop_abi"], "default": "porechop_abi" + }, + "longread_phageremoval_tool": { + "type": "string", + "description": "Specify which long read phage removal tool to use.", + "enum": ["nanolyse", "chopper"], + "default": "chopper" + }, + "longread_filtering_tool": { + "type": "string", + "description": "Specify which long read filtering tool to use.", + "enum": ["filtlong", "nanoq"], + "default": "filtlong" } } }, diff --git a/subworkflows/local/longread_preprocessing.nf b/subworkflows/local/longread_preprocessing.nf index 76c0fa45..9e4cc0c1 100644 --- a/subworkflows/local/longread_preprocessing.nf +++ b/subworkflows/local/longread_preprocessing.nf @@ -71,16 +71,16 @@ workflow LONGREAD_PREPROCESSING { } - // join long and short reads by sample name - ch_short_reads_tmp = ch_short_reads - .map { meta, sr -> [ meta.id, meta, sr ] } + if (params.longread_filtering_tool == 'filtlong') { + // join long and short reads by sample name + ch_short_reads_tmp = ch_short_reads + .map { meta, sr -> [ meta.id, meta, sr ] } - ch_short_and_long_reads = ch_long_reads - .map { meta, lr -> [ meta.id, meta, lr ] } - .join(ch_short_reads_tmp, by: 0) - .map { id, meta_lr, lr, meta_sr, sr -> [ meta_lr, sr, lr ] } // should not occur for single-end, since SPAdes (hybrid) does not support single-end + ch_short_and_long_reads = ch_long_reads + .map { meta, lr -> [ meta.id, meta, lr ] } + .join(ch_short_reads_tmp, by: 0) + .map { id, meta_lr, lr, meta_sr, sr -> [ meta_lr, sr, lr ] } // should not occur for single-end, since SPAdes (hybrid) does not support single-end - if (params.longread_filtering_tool == 'filtlong') { FILTLONG ( ch_short_and_long_reads ) @@ -89,10 +89,12 @@ workflow LONGREAD_PREPROCESSING { ch_multiqc_files = ch_multiqc_files.mix( FILTLONG.out.log ) } else if (params.longread_filtering_tool == 'nanoq') { NANOQ ( - ch_long_reads + ch_long_reads, + 'fastq.gz' ) ch_long_reads = NANOQ.out.reads ch_versions = ch_versions.mix(NANOQ.out.versions.first()) + ch_multiqc_files = ch_multiqc_files.mix(NANOQ.out.stats) } NANOPLOT_FILTERED (