From 08445cf674a212cda10becbe81cbdccba2d6e997 Mon Sep 17 00:00:00 2001 From: adro Date: Fri, 20 Sep 2024 08:25:31 +0200 Subject: [PATCH 01/33] Added modules porechop/abi and chopper for QC of long reads --- modules.json | 10 ++ modules/nf-core/chopper/environment.yml | 5 + modules/nf-core/chopper/main.nf | 42 +++++++++ modules/nf-core/chopper/meta.yml | 53 +++++++++++ modules/nf-core/chopper/tests/main.nf.test | 45 +++++++++ .../nf-core/chopper/tests/main.nf.test.snap | 16 ++++ modules/nf-core/chopper/tests/tags.yml | 2 + modules/nf-core/porechop/abi/environment.yml | 7 ++ modules/nf-core/porechop/abi/main.nf | 50 ++++++++++ modules/nf-core/porechop/abi/meta.yml | 48 ++++++++++ .../nf-core/porechop/abi/tests/main.nf.test | 59 ++++++++++++ .../porechop/abi/tests/main.nf.test.snap | 94 +++++++++++++++++++ 12 files changed, 431 insertions(+) create mode 100644 modules/nf-core/chopper/environment.yml create mode 100644 modules/nf-core/chopper/main.nf create mode 100644 modules/nf-core/chopper/meta.yml create mode 100644 modules/nf-core/chopper/tests/main.nf.test create mode 100644 modules/nf-core/chopper/tests/main.nf.test.snap create mode 100644 modules/nf-core/chopper/tests/tags.yml create mode 100644 modules/nf-core/porechop/abi/environment.yml create mode 100644 modules/nf-core/porechop/abi/main.nf create mode 100644 modules/nf-core/porechop/abi/meta.yml create mode 100644 modules/nf-core/porechop/abi/tests/main.nf.test create mode 100644 modules/nf-core/porechop/abi/tests/main.nf.test.snap diff --git a/modules.json b/modules.json index 0cab4e4e..f69d0a28 100644 --- a/modules.json +++ b/modules.json @@ -62,6 +62,11 @@ "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, + "chopper": { + "branch": "master", + "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", + "installed_by": ["modules"] + }, "concoct/concoct": { "branch": "master", "git_sha": "baa30accc6c50ea8a98662417d4f42ed18966353", @@ -202,6 +207,11 @@ "git_sha": "3135090b46f308a260fc9d5991d7d2f9c0785309", "installed_by": ["modules"] }, + "porechop/abi": { + "branch": "master", + "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", + "installed_by": ["modules"] + }, "porechop/porechop": { "branch": "master", "git_sha": "1d68c7f248d1a480c5959548a9234602b771199e", diff --git a/modules/nf-core/chopper/environment.yml b/modules/nf-core/chopper/environment.yml new file mode 100644 index 00000000..e80840e1 --- /dev/null +++ b/modules/nf-core/chopper/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::chopper=0.3.0 diff --git a/modules/nf-core/chopper/main.nf b/modules/nf-core/chopper/main.nf new file mode 100644 index 00000000..06f79849 --- /dev/null +++ b/modules/nf-core/chopper/main.nf @@ -0,0 +1,42 @@ +process CHOPPER { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/chopper:0.3.0--hd03093a_0': + 'biocontainers/chopper:0.3.0--hd03093a_0' }" + + input: + tuple val(meta), path(fastq) + + output: + tuple val(meta), path("*.fastq.gz") , emit: fastq + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def args3 = task.ext.args3 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + if ("$fastq" == "${prefix}.fastq.gz") error "Input and output names are the same, set prefix in module configuration to disambiguate!" + """ + zcat \\ + $args \\ + $fastq | \\ + chopper \\ + --threads $task.cpus \\ + $args2 | \\ + gzip \\ + $args3 > ${prefix}.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + chopper: \$(chopper --version 2>&1 | cut -d ' ' -f 2) + END_VERSIONS + """ +} diff --git a/modules/nf-core/chopper/meta.yml b/modules/nf-core/chopper/meta.yml new file mode 100644 index 00000000..9d8093d6 --- /dev/null +++ b/modules/nf-core/chopper/meta.yml @@ -0,0 +1,53 @@ +name: "chopper" +description: Filter and trim long read data. +keywords: + - filter + - trimming + - fastq + - nanopore + - qc +tools: + - "zcat": + description: "zcat uncompresses either a list of files on the command line or its standard input and writes the uncompressed data on standard output." + documentation: "https://linux.die.net/man/1/zcat" + args_id: "$args" + - "chopper": + description: "A rust command line for filtering and trimming long reads." + homepage: "https://github.com/wdecoster/chopper" + documentation: "https://github.com/wdecoster/chopper" + tool_dev_url: "https://github.com/wdecoster/chopper" + doi: "10.1093/bioinformatics/bty149" + licence: ["MIT"] + args_id: "$args2" + - "gzip": + description: "Gzip reduces the size of the named files using Lempel-Ziv coding (LZ77)." + documentation: "https://linux.die.net/man/1/gzip" + args_id: "$args3" +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fastq: + type: file + description: FastQ with reads from long read sequencing e.g. PacBio or ONT + pattern: "*.{fastq.gz}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - fastq: + type: file + description: Filtered and trimmed FastQ file + pattern: "*.{fastq.gz}" +authors: + - "@FynnFreyer" +maintainers: + - "@FynnFreyer" diff --git a/modules/nf-core/chopper/tests/main.nf.test b/modules/nf-core/chopper/tests/main.nf.test new file mode 100644 index 00000000..ee195b5f --- /dev/null +++ b/modules/nf-core/chopper/tests/main.nf.test @@ -0,0 +1,45 @@ +nextflow_process { + + name "Test Process CHOPPER" + script "../main.nf" + process "CHOPPER" + tag "chopper" + tag "modules" + tag "modules_nfcore" + + test("Should run without failures") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [id:'test_out' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/nanopore/fastq/test.fastq.gz', checkIfExists: true) + ] + """ + } + } + + then { + + def fastq_content = path(process.out.fastq.get(0).get(1)).linesGzip + + assertAll( + { assert process.success }, + // original pytest checks + { assert process.out.fastq.get(0).get(1) ==~ ".*/test_out.fastq.gz" }, + { assert fastq_content.contains("@2109d790-67ec-4fd1-8931-6c7e61908ff3 runid=97ca62ca093ff43533aa34c38a10b1d6325e7e7b read=52274 ch=243 start_time=2021-02-05T23:27:30Z flow_cell_id=FAP51364 protocol_group_id=data sample_id=RN20097 barcode=barcode01 barcode_alias=barcode01")}, + // additional nf-test checks + // Order of reads is not deterministic, so only assess whether the number of reads is correct + { assert snapshot(fastq_content.size()).match("number_of_lines") }, + { assert snapshot(process.out.versions).match("versions") } + + ) + } + + } + +} diff --git a/modules/nf-core/chopper/tests/main.nf.test.snap b/modules/nf-core/chopper/tests/main.nf.test.snap new file mode 100644 index 00000000..d2587e66 --- /dev/null +++ b/modules/nf-core/chopper/tests/main.nf.test.snap @@ -0,0 +1,16 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,5fe28ea455482c9fe88603ddcc461881" + ] + ], + "timestamp": "2023-10-20T08:27:24.592662298" + }, + "number_of_lines": { + "content": [ + 400 + ], + "timestamp": "2023-10-20T08:27:24.581289647" + } +} \ No newline at end of file diff --git a/modules/nf-core/chopper/tests/tags.yml b/modules/nf-core/chopper/tests/tags.yml new file mode 100644 index 00000000..89b6233b --- /dev/null +++ b/modules/nf-core/chopper/tests/tags.yml @@ -0,0 +1,2 @@ +chopper: + - modules/nf-core/chopper/** diff --git a/modules/nf-core/porechop/abi/environment.yml b/modules/nf-core/porechop/abi/environment.yml new file mode 100644 index 00000000..dabb4921 --- /dev/null +++ b/modules/nf-core/porechop/abi/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::porechop_abi=0.5.0 diff --git a/modules/nf-core/porechop/abi/main.nf b/modules/nf-core/porechop/abi/main.nf new file mode 100644 index 00000000..88ec5bd0 --- /dev/null +++ b/modules/nf-core/porechop/abi/main.nf @@ -0,0 +1,50 @@ +process PORECHOP_ABI { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/porechop_abi:0.5.0--py310h590eda1_0': + 'biocontainers/porechop_abi:0.5.0--py310h590eda1_0' }" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("*.fastq.gz") , emit: reads + tuple val(meta), path("*.log") , emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}.porechop_abi" + if ("$reads" == "${prefix}.fastq.gz") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + porechop_abi \\ + --input $reads \\ + --threads $task.cpus \\ + $args \\ + --output ${prefix}.fastq.gz \\ + | tee ${prefix}.log + cat <<-END_VERSIONS > versions.yml + "${task.process}": + porechop_abi: \$( porechop_abi --version ) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}.porechop_abi" + """ + echo "" | gzip > ${prefix}.fastq.gz + touch ${prefix}.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + porechop_abi: \$( porechop_abi --version ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/porechop/abi/meta.yml b/modules/nf-core/porechop/abi/meta.yml new file mode 100644 index 00000000..a856ffbe --- /dev/null +++ b/modules/nf-core/porechop/abi/meta.yml @@ -0,0 +1,48 @@ +name: "porechop_abi" +description: Extension of Porechop whose purpose is to process adapter sequences in ONT reads. +keywords: + - porechop_abi + - adapter + - nanopore +tools: + - "porechop_abi": + description: Extension of Porechop whose purpose is to process adapter sequences in ONT reads. + homepage: "https://github.com/bonsai-team/Porechop_ABI" + documentation: "https://github.com/bonsai-team/Porechop_ABI" + tool_dev_url: "https://github.com/bonsai-team/Porechop_ABI" + doi: "10.1101/2022.07.07.499093" + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: fastq/fastq.gz file + pattern: "*.{fastq,fastq.gz,fq,fq.gz}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - reads: + type: file + description: Adapter-trimmed fastq.gz file + pattern: "*.fastq.gz" + - log: + type: file + description: Log file containing stdout information + pattern: "*.log" +authors: + - "@sofstam" + - "LilyAnderssonLee" +maintainers: + - "@sofstam" + - "LilyAnderssonLee" diff --git a/modules/nf-core/porechop/abi/tests/main.nf.test b/modules/nf-core/porechop/abi/tests/main.nf.test new file mode 100644 index 00000000..b5a29f90 --- /dev/null +++ b/modules/nf-core/porechop/abi/tests/main.nf.test @@ -0,0 +1,59 @@ +nextflow_process { + + name "Test Process PORECHOP_ABI" + script "../main.nf" + process "PORECHOP_ABI" + tag "modules" + tag "modules_nfcore" + tag "porechop" + tag "porechop/abi" + + test("sarscov2-nanopore") { + + when { + process { + """ + input[0] = [ + [ id:'test'], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/nanopore/fastq/test.fastq.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.reads, + file(process.out.log.get(0).get(1)).readLines()[20..40], + process.out.versions).match() + } + ) + } + } + + test("sarscov2-nanopore - stub") { + + options "-stub" + + when { + + process { + """ + input[0] = [ + [ id:'test'], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/nanopore/fastq/test.fastq.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/porechop/abi/tests/main.nf.test.snap b/modules/nf-core/porechop/abi/tests/main.nf.test.snap new file mode 100644 index 00000000..ad63f4ed --- /dev/null +++ b/modules/nf-core/porechop/abi/tests/main.nf.test.snap @@ -0,0 +1,94 @@ +{ + "sarscov2-nanopore": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.porechop_abi.fastq.gz:md5,886fdb859fb50e0dddd35007bcff043e" + ] + ], + [ + " Best \u001b[0m", + " read Best \u001b[0m", + " start read end\u001b[0m", + " \u001b[4mSet %ID %ID \u001b[0m", + " \u001b[32mSQK-NSK007 100.0 73.1\u001b[0m", + " Rapid 40.4 0.0", + " RBK004_upstream 77.5 0.0", + " SQK-MAP006 75.8 72.7", + " SQK-MAP006 short 65.5 66.7", + " PCR adapters 1 73.9 69.6", + " PCR adapters 2 80.0 72.7", + " PCR adapters 3 70.8 69.6", + " 1D^2 part 1 71.4 70.0", + " 1D^2 part 2 84.8 75.8", + " cDNA SSP 63.0 61.7", + " \u001b[32mBarcode 1 (reverse) 100.0 100.0\u001b[0m", + " Barcode 2 (reverse) 70.8 69.2", + " Barcode 3 (reverse) 76.0 70.4", + " Barcode 4 (reverse) 74.1 71.4", + " Barcode 5 (reverse) 77.8 80.8", + " Barcode 6 (reverse) 73.1 70.8" + ], + [ + "versions.yml:md5,0e9e5e0d35a68ff8e6490c949b257f98" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.1" + }, + "timestamp": "2024-07-29T13:50:49.318599" + }, + "sarscov2-nanopore - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.porechop_abi.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.porechop_abi.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,0e9e5e0d35a68ff8e6490c949b257f98" + ], + "log": [ + [ + { + "id": "test" + }, + "test.porechop_abi.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + [ + { + "id": "test" + }, + "test.porechop_abi.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,0e9e5e0d35a68ff8e6490c949b257f98" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.1" + }, + "timestamp": "2024-07-29T13:50:54.425389" + } +} \ No newline at end of file From 5aa3049e8f7c91547e574c32f34c8bb77c663a4f Mon Sep 17 00:00:00 2001 From: adro Date: Fri, 20 Sep 2024 14:24:03 +0200 Subject: [PATCH 02/33] Move long read preprocessing into subworkflow, and swapping porechop module to porechop/abi --- subworkflows/local/lr_preprocessing.nf | 77 ++++++++++++++++++++++++++ workflows/mag.nf | 55 +++--------------- 2 files changed, 85 insertions(+), 47 deletions(-) create mode 100644 subworkflows/local/lr_preprocessing.nf diff --git a/subworkflows/local/lr_preprocessing.nf b/subworkflows/local/lr_preprocessing.nf new file mode 100644 index 00000000..a6f7e663 --- /dev/null +++ b/subworkflows/local/lr_preprocessing.nf @@ -0,0 +1,77 @@ +/* + * LR_PREPROCESSING: Preprocessing and QC for long reads + */ + +include { NANOPLOT as NANOPLOT_RAW } from '../../modules/nf-core/nanoplot/main' +include { NANOPLOT as NANOPLOT_FILTERED } from '../../modules/nf-core/nanoplot/main' +include { NANOLYSE } from '../../modules/nf-core/nanolyse/main' +include { PORECHOP_ABI } from '../../modules/nf-core/porechop/abi/main' +include { FILTLONG } from '../../modules/local/filtlong' + + +workflow LR_PREPROCESSING { + take: + ch_raw_long_reads // [ [meta] , fastq] (mandatory) + ch_short_reads // [ [meta] , fastq1, fastq2] (mandatory) + ch_nanolyse_db // [fasta] + + main: + + ch_versions = Channel.empty() + + NANOPLOT_RAW ( + ch_raw_long_reads + ) + + ch_versions = ch_versions.mix(NANOPLOT_RAW.out.versions.first()) + + ch_long_reads = ch_raw_long_reads + .map { + meta, reads -> + def meta_new = meta - meta.subMap('run') + [ meta_new, reads ] + } + + if ( !params.assembly_input ) { + if (!params.skip_adapter_trimming) { + PORECHOP_ABI ( + ch_raw_long_reads + ) + ch_long_reads = PORECHOP_ABI.out.reads + ch_versions = ch_versions.mix(PORECHOP_ABI.out.versions.first()) + } + + if (!params.keep_lambda) { + NANOLYSE ( + ch_long_reads, + ch_nanolyse_db + ) + ch_long_reads = NANOLYSE.out.fastq + ch_versions = ch_versions.mix(NANOLYSE.out.versions.first()) + } + + // join long and short reads by sample name + ch_short_reads_tmp = ch_short_reads + .map { meta, sr -> [ meta.id, meta, sr ] } + + ch_short_and_long_reads = ch_long_reads + .map { meta, lr -> [ meta.id, meta, lr ] } + .join(ch_short_reads_tmp, by: 0) + .map { id, meta_lr, lr, meta_sr, sr -> [ meta_lr, lr, sr[0], sr[1] ] } // should not occur for single-end, since SPAdes (hybrid) does not support single-end + + FILTLONG ( + ch_short_and_long_reads + ) + ch_long_reads = FILTLONG.out.reads + ch_versions = ch_versions.mix(FILTLONG.out.versions.first()) + + NANOPLOT_FILTERED ( + ch_long_reads + ) + } + + ch_versions = Channel.empty() + emit: + long_reads = ch_long_reads + versions = ch_versions +} diff --git a/workflows/mag.nf b/workflows/mag.nf index f71d4218..1846b8a4 100644 --- a/workflows/mag.nf +++ b/workflows/mag.nf @@ -24,6 +24,7 @@ include { GTDBTK } from '../subworkflows/local/gtdbtk' include { ANCIENT_DNA_ASSEMBLY_VALIDATION } from '../subworkflows/local/ancient_dna' include { DOMAIN_CLASSIFICATION } from '../subworkflows/local/domain_classification' include { DEPTHS } from '../subworkflows/local/depths' +include { LR_PREPROCESSING } from '../subworkflows/local/lr_preprocessing' // // MODULE: Installed directly from nf-core/modules @@ -355,55 +356,15 @@ workflow MAG { Preprocessing and QC for long reads ================================================================================ */ - NANOPLOT_RAW ( - ch_raw_long_reads - ) - ch_versions = ch_versions.mix(NANOPLOT_RAW.out.versions.first()) - - ch_long_reads = ch_raw_long_reads - .map { - meta, reads -> - def meta_new = meta - meta.subMap('run') - [ meta_new, reads ] - } - if ( !params.assembly_input ) { - if (!params.skip_adapter_trimming) { - PORECHOP_PORECHOP ( - ch_raw_long_reads - ) - ch_long_reads = PORECHOP_PORECHOP.out.reads - ch_versions = ch_versions.mix(PORECHOP_PORECHOP.out.versions.first()) - } - - if (!params.keep_lambda) { - NANOLYSE ( - ch_long_reads, - ch_nanolyse_db - ) - ch_long_reads = NANOLYSE.out.fastq - ch_versions = ch_versions.mix(NANOLYSE.out.versions.first()) - } - - // join long and short reads by sample name - ch_short_reads_tmp = ch_short_reads - .map { meta, sr -> [ meta.id, meta, sr ] } - - ch_short_and_long_reads = ch_long_reads - .map { meta, lr -> [ meta.id, meta, lr ] } - .join(ch_short_reads_tmp, by: 0) - .map { id, meta_lr, lr, meta_sr, sr -> [ meta_lr, lr, sr[0], sr[1] ] } // should not occur for single-end, since SPAdes (hybrid) does not support single-end - - FILTLONG ( - ch_short_and_long_reads - ) - ch_long_reads = FILTLONG.out.reads - ch_versions = ch_versions.mix(FILTLONG.out.versions.first()) + LR_PREPROCESSING ( + ch_raw_long_reads, + ch_short_reads, + ch_nanolyse_db + ) - NANOPLOT_FILTERED ( - ch_long_reads - ) - } + ch_versions = ch_versions.mix(LR_PREPROCESSING.out.versions) + ch_long_reads = LR_PREPROCESSING.out.long_reads /* ================================================================================ From 451612afe32a6a7f4d84524a0b122cc9099f598f Mon Sep 17 00:00:00 2001 From: adro Date: Fri, 20 Sep 2024 14:40:01 +0200 Subject: [PATCH 03/33] Remove module import from main workflow, and add PORECHOP_ABI in conf/modules.config --- conf/modules.config | 2 +- workflows/mag.nf | 5 ----- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 81df5bc8..d4e445eb 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -167,7 +167,7 @@ process { ] } - withName: PORECHOP_PORECHOP { + withName: PORECHOP_ABI { publishDir = [ path: { "${params.outdir}/QC_longreads/porechop" }, mode: params.publish_dir_mode, diff --git a/workflows/mag.nf b/workflows/mag.nf index 1846b8a4..0ba7c654 100644 --- a/workflows/mag.nf +++ b/workflows/mag.nf @@ -33,10 +33,6 @@ include { ARIA2 as ARIA2_UNTAR } from '../modul include { FASTQC as FASTQC_RAW } from '../modules/nf-core/fastqc/main' include { FASTQC as FASTQC_TRIMMED } from '../modules/nf-core/fastqc/main' include { SEQTK_MERGEPE } from '../modules/nf-core/seqtk/mergepe/main' -include { PORECHOP_PORECHOP } from '../modules/nf-core/porechop/porechop/main' -include { NANOPLOT as NANOPLOT_RAW } from '../modules/nf-core/nanoplot/main' -include { NANOPLOT as NANOPLOT_FILTERED } from '../modules/nf-core/nanoplot/main' -include { NANOLYSE } from '../modules/nf-core/nanolyse/main' include { BBMAP_BBNORM } from '../modules/nf-core/bbmap/bbnorm/main' include { FASTP } from '../modules/nf-core/fastp/main' include { ADAPTERREMOVAL as ADAPTERREMOVAL_PE } from '../modules/nf-core/adapterremoval/main' @@ -61,7 +57,6 @@ include { BOWTIE2_REMOVAL_BUILD as BOWTIE2_HOST_REMOVAL_BUILD } from '../modules include { BOWTIE2_REMOVAL_ALIGN as BOWTIE2_HOST_REMOVAL_ALIGN } from '../modules/local/bowtie2_removal_align' include { BOWTIE2_REMOVAL_BUILD as BOWTIE2_PHIX_REMOVAL_BUILD } from '../modules/local/bowtie2_removal_build' include { BOWTIE2_REMOVAL_ALIGN as BOWTIE2_PHIX_REMOVAL_ALIGN } from '../modules/local/bowtie2_removal_align' -include { FILTLONG } from '../modules/local/filtlong' include { KRAKEN2_DB_PREPARATION } from '../modules/local/kraken2_db_preparation' include { KRAKEN2 } from '../modules/local/kraken2' include { POOL_SINGLE_READS as POOL_SHORT_SINGLE_READS } from '../modules/local/pool_single_reads' From 59ef702dec27ec6dcbd9e34e59e13ed55fe2752b Mon Sep 17 00:00:00 2001 From: Adam Rosenbaum Date: Tue, 24 Sep 2024 21:16:04 +0200 Subject: [PATCH 04/33] Exchange local filtlong module to nf-core/filtlong --- conf/modules.config | 8 +- modules.json | 5 + modules/local/filtlong.nf | 33 ------ modules/nf-core/filtlong/environment.yml | 5 + modules/nf-core/filtlong/main.nf | 40 +++++++ modules/nf-core/filtlong/meta.yml | 65 +++++++++++ modules/nf-core/filtlong/tests/main.nf.test | 108 ++++++++++++++++++ .../nf-core/filtlong/tests/main.nf.test.snap | 65 +++++++++++ .../nf-core/filtlong/tests/nextflow.config | 4 + subworkflows/local/lr_preprocessing.nf | 8 +- 10 files changed, 303 insertions(+), 38 deletions(-) delete mode 100644 modules/local/filtlong.nf create mode 100644 modules/nf-core/filtlong/environment.yml create mode 100644 modules/nf-core/filtlong/main.nf create mode 100644 modules/nf-core/filtlong/meta.yml create mode 100644 modules/nf-core/filtlong/tests/main.nf.test create mode 100644 modules/nf-core/filtlong/tests/main.nf.test.snap create mode 100644 modules/nf-core/filtlong/tests/nextflow.config diff --git a/conf/modules.config b/conf/modules.config index d4e445eb..c350a831 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -178,13 +178,19 @@ process { } withName: FILTLONG { + ext.args = [ + "--min_length ${params.longreads_min_length}", + "--keep_percent ${params.longreads_keep_percent}", + "--trim", + "--length_weight ${params.longreads_length_weight}" + ].join(' ').trim() publishDir = [ path: { "${params.outdir}/QC_longreads/Filtlong" }, mode: params.publish_dir_mode, pattern: "*_lr_filtlong.fastq.gz", enabled: params.save_filtlong_reads ] - ext.prefix = { "${meta.id}_run${meta.run}_lengthfiltered" } + ext.prefix = { "${meta.id}_run${meta.run}_lr_filtlong" } } withName: NANOLYSE { diff --git a/modules.json b/modules.json index f69d0a28..de51125e 100644 --- a/modules.json +++ b/modules.json @@ -112,6 +112,11 @@ "git_sha": "285a50500f9e02578d90b3ce6382ea3c30216acd", "installed_by": ["modules"] }, + "filtlong": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, "freebayes": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", diff --git a/modules/local/filtlong.nf b/modules/local/filtlong.nf deleted file mode 100644 index 5410c1cb..00000000 --- a/modules/local/filtlong.nf +++ /dev/null @@ -1,33 +0,0 @@ -process FILTLONG { - tag "$meta.id" - - conda "bioconda::filtlong=0.2.0" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/filtlong:0.2.0--he513fc3_3' : - 'biocontainers/filtlong:0.2.0--he513fc3_3' }" - - input: - tuple val(meta), path(long_reads), path(short_reads_1), path(short_reads_2) - - output: - tuple val(meta), path("${meta.id}_lr_filtlong.fastq.gz"), emit: reads - path "versions.yml" , emit: versions - - script: - """ - filtlong \ - -1 ${short_reads_1} \ - -2 ${short_reads_2} \ - --min_length ${params.longreads_min_length} \ - --keep_percent ${params.longreads_keep_percent} \ - --trim \ - --length_weight ${params.longreads_length_weight} \ - ${long_reads} | gzip > ${meta.id}_lr_filtlong.fastq.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - filtlong: \$(filtlong --version | sed -e "s/Filtlong v//g") - END_VERSIONS - """ -} - diff --git a/modules/nf-core/filtlong/environment.yml b/modules/nf-core/filtlong/environment.yml new file mode 100644 index 00000000..746c83a4 --- /dev/null +++ b/modules/nf-core/filtlong/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::filtlong=0.2.1 diff --git a/modules/nf-core/filtlong/main.nf b/modules/nf-core/filtlong/main.nf new file mode 100644 index 00000000..1c158439 --- /dev/null +++ b/modules/nf-core/filtlong/main.nf @@ -0,0 +1,40 @@ +process FILTLONG { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/filtlong:0.2.1--h9a82719_0' : + 'biocontainers/filtlong:0.2.1--h9a82719_0' }" + + input: + tuple val(meta), path(shortreads), path(longreads) + + output: + tuple val(meta), path("*.fastq.gz"), emit: reads + tuple val(meta), path("*.log") , emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def short_reads = !shortreads ? "" : meta.single_end ? "-1 $shortreads" : "-1 ${shortreads[0]} -2 ${shortreads[1]}" + if ("$longreads" == "${prefix}.fastq.gz") error "Longread FASTQ input and output names are the same, set prefix in module configuration to disambiguate!" + """ + filtlong \\ + $short_reads \\ + $args \\ + $longreads \\ + 2> >(tee ${prefix}.log >&2) \\ + | gzip -n > ${prefix}.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + filtlong: \$( filtlong --version | sed -e "s/Filtlong v//g" ) + END_VERSIONS + """ +} + diff --git a/modules/nf-core/filtlong/meta.yml b/modules/nf-core/filtlong/meta.yml new file mode 100644 index 00000000..804c1b0d --- /dev/null +++ b/modules/nf-core/filtlong/meta.yml @@ -0,0 +1,65 @@ +name: filtlong +description: Filtlong filters long reads based on quality measures or short read data. +keywords: + - nanopore + - quality control + - QC + - filtering + - long reads + - short reads +tools: + - filtlong: + description: Filtlong is a tool for filtering long reads. It can take a set of + long reads and produce a smaller, better subset. It uses both read length (longer + is better) and read identity (higher is better) when choosing which reads pass + the filter. + homepage: https://anaconda.org/bioconda/filtlong + tool_dev_url: https://github.com/rrwick/Filtlong + licence: ["GPL v3"] + identifier: biotools:filtlong +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - shortreads: + type: file + description: fastq file + pattern: "*.{fq,fastq,fq.gz,fastq.gz}" + - longreads: + type: file + description: fastq file + pattern: "*.{fq,fastq,fq.gz,fastq.gz}" +output: + - reads: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.fastq.gz": + type: file + description: Filtered (compressed) fastq file + pattern: "*.fastq.gz" + - log: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.log": + type: file + description: Standard error logging file containing summary statistics + pattern: "*.log" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@d4straub" + - "@sofstam" +maintainers: + - "@d4straub" + - "@sofstam" diff --git a/modules/nf-core/filtlong/tests/main.nf.test b/modules/nf-core/filtlong/tests/main.nf.test new file mode 100644 index 00000000..d54ce39c --- /dev/null +++ b/modules/nf-core/filtlong/tests/main.nf.test @@ -0,0 +1,108 @@ +nextflow_process { + + name "Test Process FILTLONG" + script "../main.nf" + process "FILTLONG" + config "./nextflow.config" + tag "filtlong" + tag "modules" + tag "modules_nfcore" + + test("sarscov2 nanopore [fastq]") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/nanopore/fastq/test.fastq.gz', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.log.get(0).get(1)).readLines().contains("Scoring long reads")}, + { assert snapshot( + process.out.reads, + process.out.versions + ).match() + } + ) + } + + } + + + test("sarscov2 nanopore [fastq] + Illumina single-end [fastq]") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/nanopore/fastq/test.fastq.gz', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.log.get(0).get(1)).readLines().contains("Scoring long reads")}, + { assert snapshot( + process.out.reads, + process.out.versions + ).match() + } + ) + } + + } + + + test("sarscov2 nanopore [fastq] + Illumina paired-end [fastq]") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/nanopore/fastq/test.fastq.gz', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.log.get(0).get(1)).readLines().contains("Scoring long reads")}, + { assert snapshot( + process.out.reads, + process.out.versions + ).match() + } + ) + } + + } +} diff --git a/modules/nf-core/filtlong/tests/main.nf.test.snap b/modules/nf-core/filtlong/tests/main.nf.test.snap new file mode 100644 index 00000000..1a25c3fc --- /dev/null +++ b/modules/nf-core/filtlong/tests/main.nf.test.snap @@ -0,0 +1,65 @@ +{ + "sarscov2 nanopore [fastq]": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test_lr.fastq.gz:md5,7567d853ada6ac142332619d0b541d76" + ] + ], + [ + "versions.yml:md5,af5988f30157282acdb0ac50ebb4c8cc" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-06T10:51:29.197603" + }, + "sarscov2 nanopore [fastq] + Illumina paired-end [fastq]": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test_lr.fastq.gz:md5,7567d853ada6ac142332619d0b541d76" + ] + ], + [ + "versions.yml:md5,af5988f30157282acdb0ac50ebb4c8cc" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-06T10:51:39.68464" + }, + "sarscov2 nanopore [fastq] + Illumina single-end [fastq]": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test_lr.fastq.gz:md5,7567d853ada6ac142332619d0b541d76" + ] + ], + [ + "versions.yml:md5,af5988f30157282acdb0ac50ebb4c8cc" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-06T10:51:34.404022" + } +} \ No newline at end of file diff --git a/modules/nf-core/filtlong/tests/nextflow.config b/modules/nf-core/filtlong/tests/nextflow.config new file mode 100644 index 00000000..d366b4c3 --- /dev/null +++ b/modules/nf-core/filtlong/tests/nextflow.config @@ -0,0 +1,4 @@ +process { + ext.args = "--min_length 10" + ext.prefix = "test_lr" +} diff --git a/subworkflows/local/lr_preprocessing.nf b/subworkflows/local/lr_preprocessing.nf index a6f7e663..5042f3d2 100644 --- a/subworkflows/local/lr_preprocessing.nf +++ b/subworkflows/local/lr_preprocessing.nf @@ -6,8 +6,7 @@ include { NANOPLOT as NANOPLOT_RAW } from '../../mo include { NANOPLOT as NANOPLOT_FILTERED } from '../../modules/nf-core/nanoplot/main' include { NANOLYSE } from '../../modules/nf-core/nanolyse/main' include { PORECHOP_ABI } from '../../modules/nf-core/porechop/abi/main' -include { FILTLONG } from '../../modules/local/filtlong' - +include { FILTLONG } from '../../modules/nf-core/filtlong' workflow LR_PREPROCESSING { take: @@ -57,7 +56,7 @@ workflow LR_PREPROCESSING { ch_short_and_long_reads = ch_long_reads .map { meta, lr -> [ meta.id, meta, lr ] } .join(ch_short_reads_tmp, by: 0) - .map { id, meta_lr, lr, meta_sr, sr -> [ meta_lr, lr, sr[0], sr[1] ] } // should not occur for single-end, since SPAdes (hybrid) does not support single-end + .map { id, meta_lr, lr, meta_sr, sr -> [ meta_lr, sr, lr ] } // should not occur for single-end, since SPAdes (hybrid) does not support single-end FILTLONG ( ch_short_and_long_reads @@ -68,9 +67,10 @@ workflow LR_PREPROCESSING { NANOPLOT_FILTERED ( ch_long_reads ) + + ch_versions = ch_versions.mix(NANOPLOT_FILTERED.out.versions.first()) } - ch_versions = Channel.empty() emit: long_reads = ch_long_reads versions = ch_versions From cd5ce2fada6f478eda8f0b2d308627634a81defe Mon Sep 17 00:00:00 2001 From: Adam Rosenbaum Date: Tue, 24 Sep 2024 22:38:48 +0200 Subject: [PATCH 05/33] Add filtlong and porechop logs to multiqc --- assets/multiqc_config.yml | 3 +++ subworkflows/local/lr_preprocessing.nf | 4 ++++ workflows/mag.nf | 1 + 3 files changed, 8 insertions(+) diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 10c24150..b79e667b 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -25,6 +25,8 @@ run_modules: - quast - kraken - prokka + - porechop + - filtlong ## Module order top_modules: @@ -35,6 +37,7 @@ top_modules: - "fastp" - "adapterRemoval" - "porechop" + - "filtlong" - "fastqc": name: "FastQC: after preprocessing" info: "After trimming and, if requested, contamination removal." diff --git a/subworkflows/local/lr_preprocessing.nf b/subworkflows/local/lr_preprocessing.nf index 5042f3d2..9e8d9eca 100644 --- a/subworkflows/local/lr_preprocessing.nf +++ b/subworkflows/local/lr_preprocessing.nf @@ -17,6 +17,7 @@ workflow LR_PREPROCESSING { main: ch_versions = Channel.empty() + ch_multiqc_files = Channel.empty() NANOPLOT_RAW ( ch_raw_long_reads @@ -38,6 +39,7 @@ workflow LR_PREPROCESSING { ) ch_long_reads = PORECHOP_ABI.out.reads ch_versions = ch_versions.mix(PORECHOP_ABI.out.versions.first()) + ch_multiqc_files = ch_multiqc_files.mix( PORECHOP_ABI.out.log ) } if (!params.keep_lambda) { @@ -63,6 +65,7 @@ workflow LR_PREPROCESSING { ) ch_long_reads = FILTLONG.out.reads ch_versions = ch_versions.mix(FILTLONG.out.versions.first()) + ch_multiqc_files = ch_multiqc_files.mix( FILTLONG.out.log ) NANOPLOT_FILTERED ( ch_long_reads @@ -74,4 +77,5 @@ workflow LR_PREPROCESSING { emit: long_reads = ch_long_reads versions = ch_versions + multiqc_files = ch_multiqc_files } diff --git a/workflows/mag.nf b/workflows/mag.nf index 0ba7c654..c869ce14 100644 --- a/workflows/mag.nf +++ b/workflows/mag.nf @@ -996,6 +996,7 @@ workflow MAG { ) ch_multiqc_files = ch_multiqc_files.mix(FASTQC_RAW.out.zip.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix( LR_PREPROCESSING.out.multiqc_files.collect{it[1]}.ifEmpty([]) ) if (!params.assembly_input) { From 5820fb4ac0696ffe635b92b0c73c1b172902a896 Mon Sep 17 00:00:00 2001 From: Adam Rosenbaum Date: Tue, 24 Sep 2024 23:27:38 +0200 Subject: [PATCH 06/33] Added --longread_preprocessing_tools parameters, to let user specify long read preprocessing tools. Currently only has the option to specify porechop_abi, but I decided this is a nice solution for future tools that the user might want to chose among --- conf/modules.config | 10 ++++++++++ nextflow.config | 2 ++ nextflow_schema.json | 6 ++++++ subworkflows/local/lr_preprocessing.nf | 25 +++++++++++++++++++------ 4 files changed, 37 insertions(+), 6 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index c350a831..c543165c 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -167,6 +167,16 @@ process { ] } + withName: PORECHOP_PORECHOP { + publishDir = [ + path: { "${params.outdir}/QC_longreads/porechop" }, + mode: params.publish_dir_mode, + pattern: "*_trimmed.fastq", + enabled: params.save_porechop_reads + ] + ext.prefix = { "${meta.id}_run${meta.run}_trimmed" } + } + withName: PORECHOP_ABI { publishDir = [ path: { "${params.outdir}/QC_longreads/porechop" }, diff --git a/nextflow.config b/nextflow.config index 9306ae99..3b125cbf 100644 --- a/nextflow.config +++ b/nextflow.config @@ -27,6 +27,8 @@ params { adapterremoval_adapter2 = 'AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT' adapterremoval_trim_quality_stretch = false keep_phix = false + // long read preprocessing options + longread_preprocessing_tools = null // phix_reference = "ftp://ftp.ncbi.nlm.nih.gov/genomes/genbank/viral/Enterobacteria_phage_phiX174_sensu_lato/all_assembly_versions/GCA_002596845.1_ASM259684v1/GCA_002596845.1_ASM259684v1_genomic.fna.gz" phix_reference = "${baseDir}/assets/data/GCA_002596845.1_ASM259684v1_genomic.fna.gz" save_phixremoved_reads = false diff --git a/nextflow_schema.json b/nextflow_schema.json index 35e85825..00f3ffa4 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -480,6 +480,12 @@ "save_filtlong_reads": { "type": "boolean", "description": "Specify to save the resulting length filtered FASTQ files to --outdir." + }, + "longread_preprocessing_tools": { + "type": "string", + "description": "Specify which long read preprocessing tools to use.", + "help_text": "multiple tools can be specified separated by a comma", + "pattern": "^((porechop_abi)?,?)*(? Date: Wed, 25 Sep 2024 08:46:24 +0200 Subject: [PATCH 07/33] Update modules/nf-core/filtlong/main.nf Fix linting problem --- modules/nf-core/filtlong/main.nf | 1 - 1 file changed, 1 deletion(-) diff --git a/modules/nf-core/filtlong/main.nf b/modules/nf-core/filtlong/main.nf index 1c158439..627247fe 100644 --- a/modules/nf-core/filtlong/main.nf +++ b/modules/nf-core/filtlong/main.nf @@ -37,4 +37,3 @@ process FILTLONG { END_VERSIONS """ } - From c11601aea64b09eb84c159a74e1cf499c50a001e Mon Sep 17 00:00:00 2001 From: Adam Rosenbaum Date: Mon, 30 Sep 2024 08:12:56 +0200 Subject: [PATCH 08/33] make subworkflow name more verbose --- .../{lr_preprocessing.nf => longread_preprocessing.nf} | 4 ++-- workflows/mag.nf | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) rename subworkflows/local/{lr_preprocessing.nf => longread_preprocessing.nf} (97%) diff --git a/subworkflows/local/lr_preprocessing.nf b/subworkflows/local/longread_preprocessing.nf similarity index 97% rename from subworkflows/local/lr_preprocessing.nf rename to subworkflows/local/longread_preprocessing.nf index e18f4e6a..4748e9dd 100644 --- a/subworkflows/local/lr_preprocessing.nf +++ b/subworkflows/local/longread_preprocessing.nf @@ -1,5 +1,5 @@ /* - * LR_PREPROCESSING: Preprocessing and QC for long reads + * LONGREAD_PREPROCESSING: Preprocessing and QC for long reads */ include { NANOPLOT as NANOPLOT_RAW } from '../../modules/nf-core/nanoplot/main' @@ -9,7 +9,7 @@ include { PORECHOP_PORECHOP } from '../../mo include { PORECHOP_ABI } from '../../modules/nf-core/porechop/abi/main' include { FILTLONG } from '../../modules/nf-core/filtlong' -workflow LR_PREPROCESSING { +workflow LONGREAD_PREPROCESSING { take: ch_raw_long_reads // [ [meta] , fastq] (mandatory) ch_short_reads // [ [meta] , fastq1, fastq2] (mandatory) diff --git a/workflows/mag.nf b/workflows/mag.nf index 0ddba2bf..49314024 100644 --- a/workflows/mag.nf +++ b/workflows/mag.nf @@ -24,7 +24,7 @@ include { GTDBTK } from '../subworkflows/local/gtdbtk' include { ANCIENT_DNA_ASSEMBLY_VALIDATION } from '../subworkflows/local/ancient_dna' include { DOMAIN_CLASSIFICATION } from '../subworkflows/local/domain_classification' include { DEPTHS } from '../subworkflows/local/depths' -include { LR_PREPROCESSING } from '../subworkflows/local/lr_preprocessing' +include { LONGREAD_PREPROCESSING } from '../subworkflows/local/longread_preprocessing' // // MODULE: Installed directly from nf-core/modules @@ -358,14 +358,14 @@ workflow MAG { ================================================================================ */ - LR_PREPROCESSING ( + LONGREAD_PREPROCESSING ( ch_raw_long_reads, ch_short_reads, ch_nanolyse_db ) - ch_versions = ch_versions.mix(LR_PREPROCESSING.out.versions) - ch_long_reads = LR_PREPROCESSING.out.long_reads + ch_versions = ch_versions.mix(LONGREAD_PREPROCESSING.out.versions) + ch_long_reads = LONGREAD_PREPROCESSING.out.long_reads /* ================================================================================ @@ -1002,7 +1002,7 @@ workflow MAG { ) ch_multiqc_files = ch_multiqc_files.mix(FASTQC_RAW.out.zip.collect{it[1]}.ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix( LR_PREPROCESSING.out.multiqc_files.collect{it[1]}.ifEmpty([]) ) + ch_multiqc_files = ch_multiqc_files.mix( LONGREAD_PREPROCESSING.out.multiqc_files.collect{it[1]}.ifEmpty([]) ) if (!params.assembly_input) { From 31deb5ca25935d88c078e00ee9030f2e907bfb59 Mon Sep 17 00:00:00 2001 From: Adam Rosenbaum Date: Mon, 30 Sep 2024 09:09:24 +0200 Subject: [PATCH 09/33] make --longread_adaptertrimming_tool as enum porechop or porechop_abi --- nextflow.config | 2 +- nextflow_schema.json | 9 +++++---- subworkflows/local/longread_preprocessing.nf | 8 +++----- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/nextflow.config b/nextflow.config index fae028e5..3737cf8f 100644 --- a/nextflow.config +++ b/nextflow.config @@ -28,7 +28,7 @@ params { adapterremoval_trim_quality_stretch = false keep_phix = false // long read preprocessing options - longread_preprocessing_tools = null + longread_adaptertrimming_tool = "porechop" // phix_reference = "ftp://ftp.ncbi.nlm.nih.gov/genomes/genbank/viral/Enterobacteria_phage_phiX174_sensu_lato/all_assembly_versions/GCA_002596845.1_ASM259684v1/GCA_002596845.1_ASM259684v1_genomic.fna.gz" phix_reference = "${baseDir}/assets/data/GCA_002596845.1_ASM259684v1_genomic.fna.gz" save_phixremoved_reads = false diff --git a/nextflow_schema.json b/nextflow_schema.json index f32ec92b..8e80c06e 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -486,11 +486,12 @@ "type": "boolean", "description": "Specify to save the resulting length filtered FASTQ files to --outdir." }, - "longread_preprocessing_tools": { + "longread_adaptertrimming_tool": { "type": "string", - "description": "Specify which long read preprocessing tools to use.", - "help_text": "multiple tools can be specified separated by a comma", - "pattern": "^((porechop_abi)?,?)*(? Date: Mon, 30 Sep 2024 11:07:11 +0200 Subject: [PATCH 10/33] Make prefix for porechop/porechop-abi more verbose --- conf/modules.config | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 05e437c3..7a89f777 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -171,20 +171,20 @@ process { publishDir = [ path: { "${params.outdir}/QC_longreads/porechop" }, mode: params.publish_dir_mode, - pattern: "*_trimmed.fastq", + pattern: "*_porechop_trimmed.fastq", enabled: params.save_porechop_reads ] - ext.prefix = { "${meta.id}_run${meta.run}_trimmed" } + ext.prefix = { "${meta.id}_run${meta.run}_porechop_trimmed" } } withName: PORECHOP_ABI { publishDir = [ path: { "${params.outdir}/QC_longreads/porechop" }, mode: params.publish_dir_mode, - pattern: "*_trimmed.fastq", + pattern: "*_porechop-abi_trimmed.fastq", enabled: params.save_porechop_reads ] - ext.prefix = { "${meta.id}_run${meta.run}_trimmed" } + ext.prefix = { "${meta.id}_run${meta.run}_porechop-abi_trimmed" } } withName: FILTLONG { From 8955e319f679c3a6b5cee97c9d27fd3db85c876f Mon Sep 17 00:00:00 2001 From: Adam Rosenbaum Date: Mon, 30 Sep 2024 11:25:18 +0200 Subject: [PATCH 11/33] lint --- CHANGELOG.md | 4 ++++ CITATIONS.md | 6 ++++++ README.md | 1 + 3 files changed, 11 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 76ac357c..eebcd549 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#665](https://github.com/nf-core/mag/pull/648) - Add support for supplying pre-made bowtie host reference index (requested by @simone-pignotti, added by @jfy133) - [#670](https://github.com/nf-core/mag/pull/670) - Added --gtdbtk_pplacer_useram to run GTDBTk in memory mode rather than write to disk (reported by @harper357, fixed by @jfy133) +- Added optional use of porechop-abi, instead of porechop, for long read adapter trimming. ### `Changed` @@ -20,6 +21,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#667](https://github.com/nf-core/mag/pull/667) - Fix pipeline crashing if only CONCOCT selected during binning (reported and fixed by @jfy133) - [#670](https://github.com/nf-core/mag/pull/670) - Re-add missing GTDBTk parameters into GTDBTk module (reported by harper357, fixed by @jfy133) - [#672](https://github.com/nf-core/mag/pull/673) - Fix GTDB-Tk per-sample TSV files not being published in output directory (reported by @jhayer, fix by @jfy133) +- Make longread preprocessing a subworkflow +- Add porechop and filtlong logs to multiqc +- Change local filtlong module to the official nf-core/filtlong module ### `Dependencies` diff --git a/CITATIONS.md b/CITATIONS.md index 560a103a..b5e2e091 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -40,6 +40,10 @@ > Parks, D. H., Imelfort, M., Skennerton, C. T., Hugenholtz, P., & Tyson, G. W. (2015). CheckM: assessing the quality of microbial genomes recovered from isolates, single cells, and metagenomes. Genome Research, 25(7), 1043–1055. doi: 10.1101/gr.186072.114 +- [Chopper](https://github.com/wdecoster/chopper) + + > De Coster W, Rademakers R. NanoPack2: population-scale evaluation of long-read sequencing data. Bioinformatics. 2023 May 4;39(5):btad311. doi: 10.1093/bioinformatics/btad311. PMID: 37171891; PMCID: PMC10196664. + - [CONCOCT](https://doi.org/10.1038/nmeth.3103) > Alneberg, J., Bjarnason, B. S., de Bruijn, I., Schirmer, M., Quick, J., Ijaz, U. Z., Lahti, L., Loman, N. J., Andersson, A. F., & Quince, C. (2014). Binning metagenomic contigs by coverage and composition. Nature Methods, 11(11), 1144–1146. doi: 10.1038/nmeth.3103 @@ -116,6 +120,8 @@ - [Porechop](https://github.com/rrwick/Porechop) +- [Porechop-abi](https://github.com/bonsai-team/Porechop_ABI) + - [Prodigal](https://pubmed.ncbi.nlm.nih.gov/20211023/) > Hyatt D, Chen GL, Locascio PF, Land ML, Larimer FW, Hauser LJ. Prodigal: prokaryotic gene recognition and translation initiation site identification. BMC Bioinformatics. 2010 Mar 8;11:119. doi: 10.1186/1471-2105-11-119. PMID: 20211023; PMCID: PMC2848648. diff --git a/README.md b/README.md index 405e298a..1cf42b74 100644 --- a/README.md +++ b/README.md @@ -90,6 +90,7 @@ Other code contributors include: - [Jim Downie](https://github.com/prototaxites) - [Phil Palmer](https://github.com/PhilPalmer) - [@willros](https://github.com/willros) +- [Adam Rosenbaum](https://github.com/muabnezor) Long read processing was inspired by [caspargross/HybridAssembly](https://github.com/caspargross/HybridAssembly) written by Caspar Gross [@caspargross](https://github.com/caspargross) From 644e2b1aaba9d12c75546b4bbd8289ed1b100c73 Mon Sep 17 00:00:00 2001 From: Adam Rosenbaum Date: Tue, 1 Oct 2024 09:42:50 +0200 Subject: [PATCH 12/33] Change default search pattern for filtlong.log files for the filtlongmodule in multiqc --- assets/multiqc_config.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 78a0ccdc..fe8d388f 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -112,6 +112,9 @@ sp: fn_re: ".*[kraken2|centrifuge].*report.txt" quast: fn_re: "report.*.tsv" + filtlong: + num_lines: 20 + fn_re: ".*_filtlong.log" ## File name cleaning extra_fn_clean_exts: From 3e15502b1a2947d3e27a4014a76580a73d2b37c9 Mon Sep 17 00:00:00 2001 From: Adam Rosenbaum Date: Thu, 3 Oct 2024 13:35:39 +0200 Subject: [PATCH 13/33] Update CHANGELOG.md Co-authored-by: James A. Fellows Yates --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index eebcd549..f384573d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#665](https://github.com/nf-core/mag/pull/648) - Add support for supplying pre-made bowtie host reference index (requested by @simone-pignotti, added by @jfy133) - [#670](https://github.com/nf-core/mag/pull/670) - Added --gtdbtk_pplacer_useram to run GTDBTk in memory mode rather than write to disk (reported by @harper357, fixed by @jfy133) -- Added optional use of porechop-abi, instead of porechop, for long read adapter trimming. +- [#674](https://github.com/nf-core/mag/pull/674/files) - Added optional use of porechop-abi, instead of porechop, for long read adapter trimming (added by @muabnezor) ### `Changed` From ec8538ce066f2a25e4fd8b4102a2ef7e28fe3dd6 Mon Sep 17 00:00:00 2001 From: Adam Rosenbaum Date: Thu, 3 Oct 2024 13:37:03 +0200 Subject: [PATCH 14/33] Update CHANGELOG.md Co-authored-by: James A. Fellows Yates --- CHANGELOG.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f384573d..005dc584 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,9 +21,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#667](https://github.com/nf-core/mag/pull/667) - Fix pipeline crashing if only CONCOCT selected during binning (reported and fixed by @jfy133) - [#670](https://github.com/nf-core/mag/pull/670) - Re-add missing GTDBTk parameters into GTDBTk module (reported by harper357, fixed by @jfy133) - [#672](https://github.com/nf-core/mag/pull/673) - Fix GTDB-Tk per-sample TSV files not being published in output directory (reported by @jhayer, fix by @jfy133) -- Make longread preprocessing a subworkflow -- Add porechop and filtlong logs to multiqc -- Change local filtlong module to the official nf-core/filtlong module +- [#674](https://github.com/nf-core/mag/pull/674/files) - Make longread preprocessing a subworkflow (added by @muabnezor) +- [#674](https://github.com/nf-core/mag/pull/674/files) - Add porechop and filtlong logs to multiqc (added by @muabnezor) +- [#674](https://github.com/nf-core/mag/pull/674/files) - Change local filtlong module to the official nf-core/filtlong module (added by @muabnezor) ### `Dependencies` From c6fb9b38990172cb39586fa1b4acdf1dd671d4b5 Mon Sep 17 00:00:00 2001 From: Adam Rosenbaum Date: Thu, 3 Oct 2024 13:38:49 +0200 Subject: [PATCH 15/33] Update nextflow_schema.json Co-authored-by: James A. Fellows Yates --- nextflow_schema.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 8e80c06e..fbf0b53b 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -488,7 +488,7 @@ }, "longread_adaptertrimming_tool": { "type": "string", - "description": "Specify which long read adaptertrimming tool to use.", + "description": "Specify which long read adapter trimming tool to use.", "help_text": "porechop or porechop_abi", "enum": ["porechop", "porechop_abi"], "default": "porechop" From b24f52f2e17fe43cc055ffd263beca8f756b1f4b Mon Sep 17 00:00:00 2001 From: Adam Rosenbaum Date: Thu, 3 Oct 2024 13:39:38 +0200 Subject: [PATCH 16/33] Update conf/modules.config Co-authored-by: James A. Fellows Yates --- conf/modules.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index 7a89f777..f3f75199 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -171,7 +171,7 @@ process { publishDir = [ path: { "${params.outdir}/QC_longreads/porechop" }, mode: params.publish_dir_mode, - pattern: "*_porechop_trimmed.fastq", + pattern: "*_porechop_trimmed.fastq.gz", enabled: params.save_porechop_reads ] ext.prefix = { "${meta.id}_run${meta.run}_porechop_trimmed" } From fcf509cd5da3929d0fe390c15b33da3d8dfa1310 Mon Sep 17 00:00:00 2001 From: Adam Rosenbaum Date: Thu, 3 Oct 2024 13:40:54 +0200 Subject: [PATCH 17/33] Update conf/modules.config Co-authored-by: James A. Fellows Yates --- conf/modules.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index f3f75199..03d1769f 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -197,7 +197,7 @@ process { publishDir = [ path: { "${params.outdir}/QC_longreads/Filtlong" }, mode: params.publish_dir_mode, - pattern: "*_lr_filtlong.fastq.gz", + pattern: "*_filtlong.fastq.gz", enabled: params.save_filtlong_reads ] ext.prefix = { "${meta.id}_run${meta.run}_lr_filtlong" } From 8f9cac6174613109e60942b6cd7f20ffc91aed34 Mon Sep 17 00:00:00 2001 From: Adam Rosenbaum Date: Thu, 3 Oct 2024 13:41:24 +0200 Subject: [PATCH 18/33] Update conf/modules.config Co-authored-by: James A. Fellows Yates --- conf/modules.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index 03d1769f..62187329 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -200,7 +200,7 @@ process { pattern: "*_filtlong.fastq.gz", enabled: params.save_filtlong_reads ] - ext.prefix = { "${meta.id}_run${meta.run}_lr_filtlong" } + ext.prefix = { "${meta.id}_run${meta.run}_filtlong" } } withName: NANOLYSE { From 10ecdc50d6a4b4072a20ddb574ad6131fec522dc Mon Sep 17 00:00:00 2001 From: Adam Rosenbaum Date: Thu, 3 Oct 2024 13:42:01 +0200 Subject: [PATCH 19/33] Update nextflow_schema.json Co-authored-by: James A. Fellows Yates --- nextflow_schema.json | 1 - 1 file changed, 1 deletion(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index fbf0b53b..568f6936 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -489,7 +489,6 @@ "longread_adaptertrimming_tool": { "type": "string", "description": "Specify which long read adapter trimming tool to use.", - "help_text": "porechop or porechop_abi", "enum": ["porechop", "porechop_abi"], "default": "porechop" } From ea6f9a4c531cb48b2b19e0a5cddeb10e7e25488c Mon Sep 17 00:00:00 2001 From: Adam Rosenbaum Date: Thu, 3 Oct 2024 13:42:10 +0200 Subject: [PATCH 20/33] Update nextflow.config Co-authored-by: James A. Fellows Yates --- nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index 3737cf8f..0a736ec6 100644 --- a/nextflow.config +++ b/nextflow.config @@ -28,7 +28,7 @@ params { adapterremoval_trim_quality_stretch = false keep_phix = false // long read preprocessing options - longread_adaptertrimming_tool = "porechop" + longread_adaptertrimming_tool = "porechop_abi" // phix_reference = "ftp://ftp.ncbi.nlm.nih.gov/genomes/genbank/viral/Enterobacteria_phage_phiX174_sensu_lato/all_assembly_versions/GCA_002596845.1_ASM259684v1/GCA_002596845.1_ASM259684v1_genomic.fna.gz" phix_reference = "${baseDir}/assets/data/GCA_002596845.1_ASM259684v1_genomic.fna.gz" save_phixremoved_reads = false From b43b9f6623ac229cb4092ddf13bd2ab5fb3013ca Mon Sep 17 00:00:00 2001 From: Adam Rosenbaum Date: Thu, 3 Oct 2024 13:42:25 +0200 Subject: [PATCH 21/33] Update subworkflows/local/longread_preprocessing.nf Co-authored-by: James A. Fellows Yates --- subworkflows/local/longread_preprocessing.nf | 1 - 1 file changed, 1 deletion(-) diff --git a/subworkflows/local/longread_preprocessing.nf b/subworkflows/local/longread_preprocessing.nf index 4c307715..1edaf34b 100644 --- a/subworkflows/local/longread_preprocessing.nf +++ b/subworkflows/local/longread_preprocessing.nf @@ -16,7 +16,6 @@ workflow LONGREAD_PREPROCESSING { ch_nanolyse_db // [fasta] main: - ch_versions = Channel.empty() ch_multiqc_files = Channel.empty() From 25981a1fc02f5d8861faa56934b2421c8ed5ec39 Mon Sep 17 00:00:00 2001 From: Adam Rosenbaum Date: Thu, 3 Oct 2024 13:42:33 +0200 Subject: [PATCH 22/33] Update subworkflows/local/longread_preprocessing.nf Co-authored-by: James A. Fellows Yates --- subworkflows/local/longread_preprocessing.nf | 1 - 1 file changed, 1 deletion(-) diff --git a/subworkflows/local/longread_preprocessing.nf b/subworkflows/local/longread_preprocessing.nf index 1edaf34b..cf9603aa 100644 --- a/subworkflows/local/longread_preprocessing.nf +++ b/subworkflows/local/longread_preprocessing.nf @@ -22,7 +22,6 @@ workflow LONGREAD_PREPROCESSING { NANOPLOT_RAW ( ch_raw_long_reads ) - ch_versions = ch_versions.mix(NANOPLOT_RAW.out.versions.first()) ch_long_reads = ch_raw_long_reads From edf8dab81c4af99e3ae25865c641d60f224f7029 Mon Sep 17 00:00:00 2001 From: Adam Rosenbaum Date: Thu, 3 Oct 2024 13:42:42 +0200 Subject: [PATCH 23/33] Update subworkflows/local/longread_preprocessing.nf Co-authored-by: James A. Fellows Yates --- subworkflows/local/longread_preprocessing.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/longread_preprocessing.nf b/subworkflows/local/longread_preprocessing.nf index cf9603aa..ec434858 100644 --- a/subworkflows/local/longread_preprocessing.nf +++ b/subworkflows/local/longread_preprocessing.nf @@ -84,7 +84,7 @@ workflow LONGREAD_PREPROCESSING { } emit: - long_reads = ch_long_reads - versions = ch_versions + long_reads = ch_long_reads + versions = ch_versions multiqc_files = ch_multiqc_files } From 669a8547d06d1dcdc93314739ca8c29cf32052af Mon Sep 17 00:00:00 2001 From: Adam Rosenbaum Date: Thu, 3 Oct 2024 15:48:18 +0200 Subject: [PATCH 24/33] Linting fix --- modules.json | 5 -- modules/nf-core/chopper/environment.yml | 5 -- modules/nf-core/chopper/main.nf | 42 --------------- modules/nf-core/chopper/meta.yml | 53 ------------------- modules/nf-core/chopper/tests/main.nf.test | 45 ---------------- .../nf-core/chopper/tests/main.nf.test.snap | 16 ------ modules/nf-core/chopper/tests/tags.yml | 2 - 7 files changed, 168 deletions(-) delete mode 100644 modules/nf-core/chopper/environment.yml delete mode 100644 modules/nf-core/chopper/main.nf delete mode 100644 modules/nf-core/chopper/meta.yml delete mode 100644 modules/nf-core/chopper/tests/main.nf.test delete mode 100644 modules/nf-core/chopper/tests/main.nf.test.snap delete mode 100644 modules/nf-core/chopper/tests/tags.yml diff --git a/modules.json b/modules.json index a06a5679..a72556a7 100644 --- a/modules.json +++ b/modules.json @@ -62,11 +62,6 @@ "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, - "chopper": { - "branch": "master", - "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": ["modules"] - }, "concoct/concoct": { "branch": "master", "git_sha": "baa30accc6c50ea8a98662417d4f42ed18966353", diff --git a/modules/nf-core/chopper/environment.yml b/modules/nf-core/chopper/environment.yml deleted file mode 100644 index e80840e1..00000000 --- a/modules/nf-core/chopper/environment.yml +++ /dev/null @@ -1,5 +0,0 @@ -channels: - - conda-forge - - bioconda -dependencies: - - bioconda::chopper=0.3.0 diff --git a/modules/nf-core/chopper/main.nf b/modules/nf-core/chopper/main.nf deleted file mode 100644 index 06f79849..00000000 --- a/modules/nf-core/chopper/main.nf +++ /dev/null @@ -1,42 +0,0 @@ -process CHOPPER { - tag "$meta.id" - label 'process_medium' - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/chopper:0.3.0--hd03093a_0': - 'biocontainers/chopper:0.3.0--hd03093a_0' }" - - input: - tuple val(meta), path(fastq) - - output: - tuple val(meta), path("*.fastq.gz") , emit: fastq - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def args2 = task.ext.args2 ?: '' - def args3 = task.ext.args3 ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - - if ("$fastq" == "${prefix}.fastq.gz") error "Input and output names are the same, set prefix in module configuration to disambiguate!" - """ - zcat \\ - $args \\ - $fastq | \\ - chopper \\ - --threads $task.cpus \\ - $args2 | \\ - gzip \\ - $args3 > ${prefix}.fastq.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - chopper: \$(chopper --version 2>&1 | cut -d ' ' -f 2) - END_VERSIONS - """ -} diff --git a/modules/nf-core/chopper/meta.yml b/modules/nf-core/chopper/meta.yml deleted file mode 100644 index 9d8093d6..00000000 --- a/modules/nf-core/chopper/meta.yml +++ /dev/null @@ -1,53 +0,0 @@ -name: "chopper" -description: Filter and trim long read data. -keywords: - - filter - - trimming - - fastq - - nanopore - - qc -tools: - - "zcat": - description: "zcat uncompresses either a list of files on the command line or its standard input and writes the uncompressed data on standard output." - documentation: "https://linux.die.net/man/1/zcat" - args_id: "$args" - - "chopper": - description: "A rust command line for filtering and trimming long reads." - homepage: "https://github.com/wdecoster/chopper" - documentation: "https://github.com/wdecoster/chopper" - tool_dev_url: "https://github.com/wdecoster/chopper" - doi: "10.1093/bioinformatics/bty149" - licence: ["MIT"] - args_id: "$args2" - - "gzip": - description: "Gzip reduces the size of the named files using Lempel-Ziv coding (LZ77)." - documentation: "https://linux.die.net/man/1/gzip" - args_id: "$args3" -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - fastq: - type: file - description: FastQ with reads from long read sequencing e.g. PacBio or ONT - pattern: "*.{fastq.gz}" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - fastq: - type: file - description: Filtered and trimmed FastQ file - pattern: "*.{fastq.gz}" -authors: - - "@FynnFreyer" -maintainers: - - "@FynnFreyer" diff --git a/modules/nf-core/chopper/tests/main.nf.test b/modules/nf-core/chopper/tests/main.nf.test deleted file mode 100644 index ee195b5f..00000000 --- a/modules/nf-core/chopper/tests/main.nf.test +++ /dev/null @@ -1,45 +0,0 @@ -nextflow_process { - - name "Test Process CHOPPER" - script "../main.nf" - process "CHOPPER" - tag "chopper" - tag "modules" - tag "modules_nfcore" - - test("Should run without failures") { - - when { - params { - outdir = "$outputDir" - } - process { - """ - input[0] = [ - [id:'test_out' ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/nanopore/fastq/test.fastq.gz', checkIfExists: true) - ] - """ - } - } - - then { - - def fastq_content = path(process.out.fastq.get(0).get(1)).linesGzip - - assertAll( - { assert process.success }, - // original pytest checks - { assert process.out.fastq.get(0).get(1) ==~ ".*/test_out.fastq.gz" }, - { assert fastq_content.contains("@2109d790-67ec-4fd1-8931-6c7e61908ff3 runid=97ca62ca093ff43533aa34c38a10b1d6325e7e7b read=52274 ch=243 start_time=2021-02-05T23:27:30Z flow_cell_id=FAP51364 protocol_group_id=data sample_id=RN20097 barcode=barcode01 barcode_alias=barcode01")}, - // additional nf-test checks - // Order of reads is not deterministic, so only assess whether the number of reads is correct - { assert snapshot(fastq_content.size()).match("number_of_lines") }, - { assert snapshot(process.out.versions).match("versions") } - - ) - } - - } - -} diff --git a/modules/nf-core/chopper/tests/main.nf.test.snap b/modules/nf-core/chopper/tests/main.nf.test.snap deleted file mode 100644 index d2587e66..00000000 --- a/modules/nf-core/chopper/tests/main.nf.test.snap +++ /dev/null @@ -1,16 +0,0 @@ -{ - "versions": { - "content": [ - [ - "versions.yml:md5,5fe28ea455482c9fe88603ddcc461881" - ] - ], - "timestamp": "2023-10-20T08:27:24.592662298" - }, - "number_of_lines": { - "content": [ - 400 - ], - "timestamp": "2023-10-20T08:27:24.581289647" - } -} \ No newline at end of file diff --git a/modules/nf-core/chopper/tests/tags.yml b/modules/nf-core/chopper/tests/tags.yml deleted file mode 100644 index 89b6233b..00000000 --- a/modules/nf-core/chopper/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -chopper: - - modules/nf-core/chopper/** From a1098f313c3b2982f338be798046530fd7dbfd9e Mon Sep 17 00:00:00 2001 From: Adam Rosenbaum Date: Thu, 3 Oct 2024 15:52:32 +0200 Subject: [PATCH 25/33] make porechop-abi default long read adapter trimming tool --- nextflow_schema.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 14f6764b..b4809d15 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -490,7 +490,7 @@ "type": "string", "description": "Specify which long read adapter trimming tool to use.", "enum": ["porechop", "porechop_abi"], - "default": "porechop" + "default": "porechop_abi" } } }, From 17ba45c8f42056214a6beb9a537a70ea2d554540 Mon Sep 17 00:00:00 2001 From: Adam Rosenbaum Date: Fri, 4 Oct 2024 07:52:48 +0200 Subject: [PATCH 26/33] Fix changelog --- CHANGELOG.md | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b9fa9e4d..ef8858fa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,14 +3,35 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## dev [unreleased] + +### `Added` + +- [#674](https://github.com/nf-core/mag/pull/674/files) - Added `--longread_adaptertrimming_tool` Where user can chose between porechop_abi (default) and porechop (added by @muabnezor) + +### `Changed` + +- [#674](https://github.com/nf-core/mag/pull/674/files) - Changed to porechop-abi as default adapter trimming tool for long reads. User can still use porechop if prefered. + +### `Fixed` + +- [#674](https://github.com/nf-core/mag/pull/674/files) - Make longread preprocessing a subworkflow (added by @muabnezor) +- [#674](https://github.com/nf-core/mag/pull/674/files) - Add porechop and filtlong logs to multiqc (added by @muabnezor) +- [#674](https://github.com/nf-core/mag/pull/674/files) - Change local filtlong module to the official nf-core/filtlong module (added by @muabnezor) + +### `Dependencies` + +| Tool | Previous version | New version | +| ------------ | ---------------- | ----------- | +| Porechop_ABI | | 0.5.0 | +| Filtlong | 0.2.0 | 0.2.1 | + ## 3.1.0 [2024-10-03] ### `Added` - [#665](https://github.com/nf-core/mag/pull/648) - Add support for supplying pre-made bowtie host reference index (requested by @simone-pignotti, added by @jfy133) - [#670](https://github.com/nf-core/mag/pull/670) - Added `--gtdbtk_pplacer_useram` to run GTDBTk in memory mode rather than write to disk (requested by @harper357, fixed by @jfy133) -- [#674](https://github.com/nf-core/mag/pull/674/files) - Added optional use of porechop-abi, instead of porechop, for long read adapter trimming (added by @muabnezor) - ### `Changed` @@ -22,9 +43,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#667](https://github.com/nf-core/mag/pull/667) - Fix pipeline crashing if only CONCOCT selected during binning (reported and fixed by @jfy133) - [#670](https://github.com/nf-core/mag/pull/670) - Re-add missing GTDBTk parameters into GTDBTk module (reported by harper357, fixed by @jfy133) - [#672](https://github.com/nf-core/mag/pull/673) - Fix GTDB-Tk per-sample TSV files not being published in output directory (reported by @jhayer, fix by @jfy133) -- [#674](https://github.com/nf-core/mag/pull/674/files) - Make longread preprocessing a subworkflow (added by @muabnezor) -- [#674](https://github.com/nf-core/mag/pull/674/files) - Add porechop and filtlong logs to multiqc (added by @muabnezor) -- [#674](https://github.com/nf-core/mag/pull/674/files) - Change local filtlong module to the official nf-core/filtlong module (added by @muabnezor) ### `Dependencies` From f54c9674021e3f0e3a871432761740ec322a903e Mon Sep 17 00:00:00 2001 From: Adam Rosenbaum Date: Fri, 4 Oct 2024 07:56:21 +0200 Subject: [PATCH 27/33] Fix porechop_abi pattern in modules.config --- conf/modules.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index 62187329..b226ba01 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -181,7 +181,7 @@ process { publishDir = [ path: { "${params.outdir}/QC_longreads/porechop" }, mode: params.publish_dir_mode, - pattern: "*_porechop-abi_trimmed.fastq", + pattern: "*_porechop-abi_trimmed.fastq.gz", enabled: params.save_porechop_reads ] ext.prefix = { "${meta.id}_run${meta.run}_porechop-abi_trimmed" } From 3472c87b529359902e26eb4246facea1971af7c3 Mon Sep 17 00:00:00 2001 From: Adam Rosenbaum Date: Fri, 4 Oct 2024 08:28:49 +0200 Subject: [PATCH 28/33] remove chopper citation for now --- CITATIONS.md | 4 ---- 1 file changed, 4 deletions(-) diff --git a/CITATIONS.md b/CITATIONS.md index b5e2e091..e40e7e54 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -40,10 +40,6 @@ > Parks, D. H., Imelfort, M., Skennerton, C. T., Hugenholtz, P., & Tyson, G. W. (2015). CheckM: assessing the quality of microbial genomes recovered from isolates, single cells, and metagenomes. Genome Research, 25(7), 1043–1055. doi: 10.1101/gr.186072.114 -- [Chopper](https://github.com/wdecoster/chopper) - - > De Coster W, Rademakers R. NanoPack2: population-scale evaluation of long-read sequencing data. Bioinformatics. 2023 May 4;39(5):btad311. doi: 10.1093/bioinformatics/btad311. PMID: 37171891; PMCID: PMC10196664. - - [CONCOCT](https://doi.org/10.1038/nmeth.3103) > Alneberg, J., Bjarnason, B. S., de Bruijn, I., Schirmer, M., Quick, J., Ijaz, U. Z., Lahti, L., Loman, N. J., Andersson, A. F., & Quince, C. (2014). Binning metagenomic contigs by coverage and composition. Nature Methods, 11(11), 1144–1146. doi: 10.1038/nmeth.3103 From ab5f482ccaf3710b053043ae47d23fbcaf22391b Mon Sep 17 00:00:00 2001 From: Adam Rosenbaum Date: Fri, 4 Oct 2024 09:46:47 +0200 Subject: [PATCH 29/33] retrigger checks From 7b31394ab82dafa00f70046f1010c5ea247b202f Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Fri, 11 Oct 2024 12:56:53 +0200 Subject: [PATCH 30/33] Apply suggestions from code review --- CHANGELOG.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f95e352d..43555b94 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,17 +7,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` -- [#674](https://github.com/nf-core/mag/pull/674/files) - Added `--longread_adaptertrimming_tool` Where user can chose between porechop_abi (default) and porechop (added by @muabnezor) +- [#674](https://github.com/nf-core/mag/pull/674) - Added `--longread_adaptertrimming_tool` Where user can chose between porechop_abi (default) and porechop (added by @muabnezor) ### `Changed` -- [#674](https://github.com/nf-core/mag/pull/674/files) - Changed to porechop-abi as default adapter trimming tool for long reads. User can still use porechop if prefered. +- [#674](https://github.com/nf-core/mag/pull/674) - Changed to porechop-abi as default adapter trimming tool for long reads. User can still use porechop if preferred. ### `Fixed` -- [#674](https://github.com/nf-core/mag/pull/674/files) - Make longread preprocessing a subworkflow (added by @muabnezor) -- [#674](https://github.com/nf-core/mag/pull/674/files) - Add porechop and filtlong logs to multiqc (added by @muabnezor) -- [#674](https://github.com/nf-core/mag/pull/674/files) - Change local filtlong module to the official nf-core/filtlong module (added by @muabnezor) +- [#674](https://github.com/nf-core/mag/pull/674) - Make longread preprocessing a subworkflow (added by @muabnezor) +- [#674](https://github.com/nf-core/mag/pull/674) - Add porechop and filtlong logs to multiqc (added by @muabnezor) +- [#674](https://github.com/nf-core/mag/pull/674) - Change local filtlong module to the official nf-core/filtlong module (added by @muabnezor) ### `Dependencies` From 78edf259c983a63c5a10599f72a5ef40a29f127f Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Fri, 11 Oct 2024 12:57:17 +0200 Subject: [PATCH 31/33] Apply suggestions from code review --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 43555b94..61b8c9a2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Changed` -- [#674](https://github.com/nf-core/mag/pull/674) - Changed to porechop-abi as default adapter trimming tool for long reads. User can still use porechop if preferred. +- [#674](https://github.com/nf-core/mag/pull/674) - Changed to porechop-abi as default adapter trimming tool for long reads. User can still use porechop if preferred (added by @muabnezor) ### `Fixed` From 3c3b46ab61f41f5e852c66334f957404d511274e Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Fri, 11 Oct 2024 13:03:10 +0200 Subject: [PATCH 32/33] Add previouysly undocumented output files to docs --- docs/output.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/docs/output.md b/docs/output.md index 5f889056..d8d57846 100644 --- a/docs/output.md +++ b/docs/output.md @@ -113,6 +113,20 @@ The pipeline uses Nanolyse to map the reads against the Lambda phage and removes The pipeline uses filtlong and porechop to perform quality control of the long reads that are eventually provided with the TSV input file. + +
+Output files + +- `QC_longreads/porechop/` + - `[sample]_[run]_porechop_trimmed.fastq.gz`: If `--longread_adaptertrimming_tool 'porechop'`, the adapter trimmed FASTQ files from porechop + - `[sample]_[run]_porechop-abi_trimmed.fastq.gz`: If `--longread_adaptertrimming_tool 'porechop_abi'`, the adapter trimmed FASTQ files from porechop_ABI +- `QC_longreads/filtlong/` + - `[sample]_[run]_filtlong.fastq.gz`: The length and quality filtered reads in FASTQ from Filtlong + +
+ +Trimmed and filtered FASTQ output directories and files will only exist if `--save_porechop_reads` and/or `--save_filtlong_reads` (respectively) are provided to the run command . + No direct host read removal is performed for long reads. However, since within this pipeline filtlong uses a read quality based on k-mer matches to the already filtered short reads, reads not overlapping those short reads might be discarded. The lower the parameter `--longreads_length_weight`, the higher the impact of the read qualities for filtering. From 7c7e9544da11483900992a0d009b4e027bf4bb53 Mon Sep 17 00:00:00 2001 From: nf-core-bot Date: Fri, 11 Oct 2024 11:04:23 +0000 Subject: [PATCH 33/33] [automated] Fix code linting --- docs/output.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/output.md b/docs/output.md index d8d57846..7ca0ae13 100644 --- a/docs/output.md +++ b/docs/output.md @@ -113,7 +113,6 @@ The pipeline uses Nanolyse to map the reads against the Lambda phage and removes The pipeline uses filtlong and porechop to perform quality control of the long reads that are eventually provided with the TSV input file. -
Output files