diff --git a/modules.json b/modules.json index 9350ace5..15b7dc89 100644 --- a/modules.json +++ b/modules.json @@ -46,7 +46,7 @@ }, "utils_nfcore_pipeline": { "branch": "master", - "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", + "git_sha": "92de218a329bfc9a9033116eb5f65fd270e72ba3", "installed_by": ["subworkflows"] }, "utils_nfvalidation_plugin": { diff --git a/modules/local/rename_fastq_cellranger.nf b/modules/local/rename_fastq_cellranger.nf new file mode 100644 index 00000000..8d318188 --- /dev/null +++ b/modules/local/rename_fastq_cellranger.nf @@ -0,0 +1,23 @@ +// Import generic module functions +process RENAME_FASTQ_CELLRANGER { + tag "$meta.id" + label 'process_low' + + conda "conda-forge::python=3.8.0 conda-forge::biopython=1.74" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-adc9bb9edc31eb38b3c24786a83b7dfa530e2bea:47d6d7765d7537847ced7dac873190d164146022-0' : + 'biocontainers/mulled-v2-adc9bb9edc31eb38b3c24786a83b7dfa530e2bea:47d6d7765d7537847ced7dac873190d164146022-0' }" + + input: + tuple val(meta), path(R1), path(R2) + tuple val(meta_2), path(orig_r1), path(orig_r2) + + output: + tuple val(meta), path('*R1_001.fastq.gz'), path('*R2_001.fastq.gz') , emit: reads + + script: + """ + mv ${R1} fastp_${orig_r1} + mv ${R2} fastp_${orig_r2} + """ +} diff --git a/subworkflows/local/sc_raw_input.nf b/subworkflows/local/sc_raw_input.nf index 735a8c10..a318594d 100644 --- a/subworkflows/local/sc_raw_input.nf +++ b/subworkflows/local/sc_raw_input.nf @@ -3,6 +3,9 @@ include { UNZIP_CELLRANGERDB } from ' include { RENAME_FILE as RENAME_FILE_TSV } from '../../modules/local/rename_file' include { CHANGEO_CONVERTDB_FASTA as CHANGEO_CONVERTDB_FASTA_FROM_AIRR } from '../../modules/local/changeo/changeo_convertdb_fasta' include { FASTQ_INPUT_CHECK } from '../../subworkflows/local/fastq_input_check' +include { FASTP } from '../../modules/nf-core/fastp/main' +include { RENAME_FASTQ_CELLRANGER } from '../../modules/local/rename_fastq_cellranger' + workflow SC_RAW_INPUT { @@ -51,9 +54,30 @@ workflow SC_RAW_INPUT { error "The single-cell 10X genomics library generation method requires you to provide a reference file." } + // Fastp + save_merged = false + FASTP ( + ch_reads, + [], + [], + save_merged + ) + ch_versions = ch_versions.mix(FASTP.out.versions) + + ch_rename_fastp = FASTP.out.reads.map{ meta,reads -> [meta, reads[0], reads[1]] } + ch_rename_original = ch_reads.map{ meta,reads -> [meta, reads[0], reads[1]] } + + // rename fastq files to follow cellranger standards again + RENAME_FASTQ_CELLRANGER( + ch_rename_fastp, + ch_rename_original + ) + + ch_reads_fastp = RENAME_FASTQ_CELLRANGER.out.reads.map{ meta, read1, read2 -> [meta, [read1, read2]] } + // run cellranger vdj CELLRANGER_VDJ ( - ch_reads, + ch_reads_fastp, ch_sc_reference.collect() ) ch_versions = ch_versions.mix(CELLRANGER_VDJ.out.versions) @@ -89,6 +113,9 @@ workflow SC_RAW_INPUT { emit: versions = ch_versions + // fastp + fastp_reads_json = FASTP.out.json.collect{ meta,json -> json } + fastp_reads_html = FASTP.out.html.collect{ meta,html -> html } // complete cellranger output outs = ch_cellranger_out // cellranger output in airr format diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf index a8b55d6f..14558c39 100644 --- a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf +++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf @@ -65,9 +65,15 @@ def checkProfileProvided(nextflow_cli_args) { // Citation string for pipeline // def workflowCitation() { + def temp_doi_ref = "" + String[] manifest_doi = workflow.manifest.doi.tokenize(",") + // Using a loop to handle multiple DOIs + // Removing `https://doi.org/` to handle pipelines using DOIs vs DOI resolvers + // Removing ` ` since the manifest.doi is a string and not a proper list + for (String doi_ref: manifest_doi) temp_doi_ref += " https://doi.org/${doi_ref.replace('https://doi.org/', '').replace(' ', '')}\n" return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + "* The pipeline\n" + - " ${workflow.manifest.doi}\n\n" + + temp_doi_ref + "\n" + "* The nf-core framework\n" + " https://doi.org/10.1038/s41587-020-0439-x\n\n" + "* Software dependencies\n" + diff --git a/workflows/airrflow.nf b/workflows/airrflow.nf index bc6b7924..62d7ba47 100644 --- a/workflows/airrflow.nf +++ b/workflows/airrflow.nf @@ -103,8 +103,8 @@ workflow AIRRFLOW { ch_presto_assemblepairs_logs = Channel.empty() ch_presto_collapseseq_logs = Channel.empty() ch_presto_splitseq_logs = Channel.empty() - ch_fastp_html = Channel.empty() - ch_fastp_json = Channel.empty() + ch_fastp_html = SC_RAW_INPUT.out.fastp_reads_html + ch_fastp_json = SC_RAW_INPUT.out.fastp_reads_json ch_fastqc_postassembly_mqc = Channel.empty() } else { // Perform sequence assembly if input type is fastq from bulk sequencing data