adding fastqscreen database parameter, updating docs

nf-core · Oct 30, 2024 · e560181 · e560181
1 parent 4af6dd4
commit e560181
Show file tree

Hide file tree

Showing 8 changed files with 38 additions and 6 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -12,6 +12,7 @@ Initial release of nf-core/seqinspector, created with the [nf-core](https://nf-c
 - [#20](https://github.com/nf-core/seqinspector/pull/20) Use tags to generate group reports
 - [#13](https://github.com/nf-core/seqinspector/pull/13) Generate reports per run, per project and per lane.
 - [#49](https://github.com/nf-core/seqinspector/pull/49) Merge with template 3.0.2.
+- [#53](https://github.com/nf-core/seqinspector/pull/53) Add FastQ-Screen database multiplexing
 
 ### `Fixed`
 

diff --git a/CITATIONS.md b/CITATIONS.md
@@ -14,6 +14,10 @@
 
 > Andrews, S. (2010). FastQC: A Quality Control Tool for High Throughput Sequence Data [Online].
 
+- [Fastqscreen](https://www.bioinformatics.babraham.ac.uk/projects/fastq_screen/)
+
+> Wingett SW and Andrews S. FastQ Screen: A tool for multi-genome mapping and quality control [version 2; referees: 4 approved]. F1000Research 2018, 7:1338 (https://doi.org/10.12688/f1000research.15931.2)
+
 - [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/)
 
 > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924.

diff --git a/assets/schema_database.json b/assets/schema_database.json
@@ -1,8 +1,8 @@
 {
     "$schema": "https://json-schema.org/draft/2020-12/schema",
     "$id": "https://raw.githubusercontent.com/nf-core/seqinspector/master/assets/schema_database.json",
-    "title": "nf-core/seqinspector pipeline - params.databasesheet schema",
-    "description": "Schema for the file provided with params.databasesheet",
+    "title": "nf-core/seqinspector pipeline - params.fastqscreen_databasesheet schema",
+    "description": "Schema for the file provided with params.fastq_screen_databasesheet",
     "type": "array",
     "items": {
         "type": "object",

diff --git a/docs/output.md b/docs/output.md
@@ -11,6 +11,7 @@ The directories listed below will be created in the results directory after the
 The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps:
 
 - [FastQC](#fastqc) - Raw read QC
+- [Fastqscreen](#fastqscreen) - mapping against a set of references for basic contamination QC
 - [MultiQC](#multiqc) - Aggregate report describing results and QC from the whole pipeline
 - [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution
 
@@ -27,6 +28,22 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
 
 [FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) gives general quality metrics about your sequenced reads. It provides information about the quality score distribution across your reads, per base sequence content (%A/T/G/C), adapter contamination and overrepresented sequences. For further reading and documentation see the [FastQC help pages](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/).
 
+### FASTQSCREEN
+
+<details markdown="1">
+<summary>Output files</summary>
+
+- `fastqc/`
+  - `*_screen.html`: Interactive graphical fastqscreen report which summaries the mapping of your sequences against each of your libraries.
+  - `*_screen.pdf`: Static graphical fastqscreen report which summaries the mapping of your sequences against each of your libraries.
+  - `*_screen.txt` : text based fastqscreen report which summaries the mapping of your sequences against each of your libraries.
+
+</details>
+
+[Fastqscreen](https://www.bioinformatics.babraham.ac.uk/projects/fastq_screen/) allows you to set up a standard set of libraries against which all of your sequences can be searched. Your search libraries might contain the genomes of all of the organisms you work on, along with PhiX, Vectors or other contaminants commonly seen in sequencing experiments.
+
+It requires the supply of referenced (databases) in a config file. In order to parallelize the mapping of the different samples, in seqinspector, this a fastqscreen config file is generated for every sample/reference combination.
+
 ### MultiQC
 
 nf-core/seqinspector will generate the following MultiQC reports:

diff --git a/main.nf b/main.nf
@@ -86,7 +86,7 @@ workflow {
     //
     NFCORE_SEQINSPECTOR (
         PIPELINE_INITIALISATION.out.samplesheet,
-        params.database_sheet,
+        params.fastqscreen_database_sheet,
     )
     //
     // SUBWORKFLOW: Run completion tasks

diff --git a/nextflow.config b/nextflow.config
@@ -20,6 +20,9 @@ params {
     config_fastq_screen        = "${projectDir}/conf"
     igenomes_ignore            = false
 
+    // Fastqscreen options
+    fastqscreen_database_sheet = './assets/databasesheet.csv'
+
     // MultiQC options
     multiqc_config             = null
     multiqc_title              = null

diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -68,7 +68,8 @@
                 "config_fastq_screen": {
                     "type": "string",
                     "description": "path to directory with fastq_screen config (fastq_screen.conf)",
-                    "fa_icon": "fas fa-braille"
+                    "fa_icon": "fas fa-braille",
+                    "default": "/Users/franziska.franziska/workspace/seqinspector/conf"
                 },
                 "igenomes_ignore": {
                     "type": "boolean",
@@ -84,6 +85,12 @@
                     "fa_icon": "fas fa-ban",
                     "hidden": true,
                     "default": "s3://ngi-igenomes/igenomes/"
+                },
+                "fastqscreen_database_sheet": {
+                    "type": "string",
+                    "default": "./assets/databasesheet.csv",
+                    "fa_icon": "fas fa-search",
+                    "description": "list of reference genomes (databases) to be mapped against in fastqscreen"
                 }
             }
         },

diff --git a/workflows/seqinspector.nf b/workflows/seqinspector.nf
@@ -26,7 +26,7 @@ workflow SEQINSPECTOR {
 
     take:
     ch_samplesheet // channel: samplesheet read in from --input
-    ch_databasesheet // channel: database sheet read in from --database_sheet
+    ch_fastqscreen_databasesheet // channel: database sheet read in from --fastqscreen_database_sheet
 
     main:
 
@@ -48,7 +48,7 @@ workflow SEQINSPECTOR {
     // MODULE: Run FastQ Screen
     //
     ch_databases = Channel
-        .fromList(samplesheetToList(ch_databasesheet, "${projectDir}/assets/schema_database.json"))
+        .fromList(samplesheetToList(ch_fastqscreen_databasesheet, "${projectDir}/assets/schema_database.json"))
 
     FASTQSCREEN_FASTQSCREEN (
         ch_samplesheet.combine(ch_databases)