Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sfitz add fastqc and switch to process_afterscript for logs #60

Merged
merged 10 commits into from
Jun 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,13 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
## [Unreleased]

### Added
- Add FastQC workflow
- Add per readgroup and per library functionality
- Add `process_afterscript`
- Add Nextflow version requirement to `README`

### Changed
- Update NFTest for FastQC
- Update repository/pipeline description
- Update Nextflow configuration test workflows

Expand Down
2 changes: 2 additions & 0 deletions config/default.config
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,12 @@ params {

// Docker images
pipeval_version = "4.0.0-rc.2"
fastqc_version = "0.12.1_samtools-1.20"
samtools_version = "1.18"
picard_version = "3.1.0"
qualimap_version = "2.3"
docker_image_validate_params = "${-> params.docker_container_registry}/pipeval:${params.pipeval_version}"
docker_image_fastqc = "${-> params.docker_container_registry}/fastqc:${params.fastqc_version}"
docker_image_samtools = "${-> params.docker_container_registry}/samtools:${params.samtools_version}"
docker_image_picard = "${-> params.docker_container_registry}/picard:${params.picard_version}"
docker_image_qualimap = "${-> params.docker_container_registry}/qualimap:${params.qualimap_version}"
Expand Down
1 change: 1 addition & 0 deletions config/methods.config
Original file line number Diff line number Diff line change
Expand Up @@ -98,5 +98,6 @@ methods {
methods.set_output_dir()
methods.set_pipeline_logs()
methods.setup_docker_cpus()
methods.setup_process_afterscript()
}
}
8 changes: 8 additions & 0 deletions config/schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,12 @@ algorithm:
required: false
help: 'List of QC algorithms'
choices:
- fastqc
- stats
- collectwgsmetrics
- bamqc
default:
- fastqc
- stats
- collectwgsmetrics
reference:
Expand All @@ -38,6 +40,12 @@ save_intermediate_files:
required: false
default: false
help: 'The option to save the intermediate files'
fastqc_additional_options:
type: 'String'
required: false
allow_empty: true
default: ''
help: 'Additional arguments for FastQC command'
samtools_remove_duplicates:
type: 'Bool'
required: false
Expand Down
5 changes: 4 additions & 1 deletion config/template.config
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ includeConfig "${projectDir}/nextflow.config"

// Inputs/parameters of the pipeline
params {
algorithm = ['stats', 'collectwgsmetrics'] // 'stats', 'collectwgsmetrics', 'bamqc'
algorithm = ['fastqc', 'stats', 'collectwgsmetrics'] // 'fastqc', 'stats', 'collectwgsmetrics', 'bamqc'
reference = '/hot/ref/reference/GRCh38-BI-20160721/Homo_sapiens_assembly38.fasta'
output_dir = '/path/to/output/directory'
blcds_registered_dataset = false // if you want the output to be registered
Expand All @@ -29,6 +29,9 @@ params {
bamqc_outformat = 'pdf' // 'html' or 'pdf'
bamqc_additional_options = ''

// FastQC options
fastqc_additional_options = ''

// Base resource allocation updater
// See README for adding parameters to update the base resource allocations
}
Expand Down
14 changes: 14 additions & 0 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@ include { run_bamqc_Qualimap } from './module/bamqc_qualimap' addParams(
workflow_log_output_dir: "${params.log_output_dir}/process-log/Qualimap-${params.qualimap_version}"
)

include { assess_ReadQuality_FastQC } from './module/fastqc' addParams(
workflow_output_dir: "${params.output_dir_base}/FastQC-${params.fastqc_version}",
workflow_log_output_dir: "${params.log_output_dir}/process-log/FastQC-${params.fastqc_version}"
)

include { indexFile } from './external/pipeline-Nextflow-module/modules/common/indexFile/main.nf'

log.info """\
Expand Down Expand Up @@ -82,6 +87,10 @@ log.info """\
qualimap_version: ${params.qualimap_version}
bamqc_outformat: ${params.bamqc_outformat}
bamqc_additional_options: ${params.bamqc_additional_options}

- FastQC options:
fastqc_version: ${params.fastqc_version}
fastqc_additional_options: ${params.fastqc_additional_options}
"""

Channel
Expand Down Expand Up @@ -153,6 +162,11 @@ workflow {
samples_to_process_ch
)
}
if ('fastqc' in params.algorithm) {
assess_ReadQuality_FastQC(
readgroups_to_process_ch
)
}
if ('collectwgsmetrics' in params.algorithm) {
run_CollectWgsMetrics_Picard(
samples_to_process_ch,
Expand Down
5 changes: 1 addition & 4 deletions module/bamqc_qualimap.nf
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,7 @@ process run_bamqc_Qualimap {
mode: "copy",
enabled: true

publishDir path: "${params.workflow_log_output_dir}",
pattern: ".command.*",
mode: "copy",
saveAs: { "${task.process.replace(':', '/')}-${sm_id}/log${file(it).getName()}" }
ext log_dir_suffix: { "-${sm_id}" }

input:
tuple path(path), val(unused), val(sm_id), val(unused), val(unused), val(unused), val(unused), val(unused)
Expand Down
5 changes: 1 addition & 4 deletions module/collectWgsMetrics_picard.nf
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,7 @@ process run_CollectWgsMetrics_Picard {
mode: "copy",
enabled: true

publishDir path: "${params.workflow_log_output_dir}",
pattern: ".command.*",
mode: "copy",
saveAs: { "${task.process.replace(':', '/')}-${sm_id}/log${file(it).getName()}" }
ext log_dir_suffix: { "-${sm_id}" }

input:
tuple path(path), val(unused), val(sm_id), val(unused), val(unused), val(unused), val(unused), val(read_length)
Expand Down
45 changes: 45 additions & 0 deletions module/fastqc.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/*
* Nextflow module for running FASTQC
*
* @input fq_path path path to the input FASTQ file
* @output fastqc_output_dir dir unzipped FASTQC output directory
*/

include { generate_standard_filename } from '../external/pipeline-Nextflow-module/modules/common/generate_standardized_filename/main.nf'

process assess_ReadQuality_FastQC {
container params.docker_image_fastqc

publishDir path: "${params.workflow_output_dir}/output",
pattern: "${output_filename}",
mode: "copy",
enabled: true
ext log_dir_suffix: { "-${target}" }

input:
tuple path(path), val(unused), val(sm_id), val(rg_arg), val(rg_id), val(unused), val(unused), val(ununsed)

output:
path("${output_filename}")

script:
target = "${sm_id}-${rg_id}"
output_filename = generate_standard_filename("FastQC-${params.fastqc_version}",
params.dataset_id,
target,
[:])

"""
set -euo pipefail
mkdir "${output_filename}"
samtools view -F 0x900 -h ${rg_arg} ${path} | \
samtools fastq | \
fastqc \
--outdir "./" \
--format fastq \
--extract \
--delete \
${params.fastqc_additional_options} \
stdin:${output_filename}
"""
}
5 changes: 1 addition & 4 deletions module/stats_samtools.nf
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,7 @@ process run_stats_SAMtools {
enabled: true,
saveAs: { "${outdir}/${file(it).getName()}" }

publishDir path: "${params.workflow_log_output_dir}",
pattern: ".command.*",
mode: "copy",
saveAs: { "${task.process.replace(':', '/')}-${log_suffix}/log${file(it).getName()}" }
ext log_dir_suffix: { "-${log_suffix}" }

input:
tuple path(path), val(unused), val(sm_id), val(rg_arg), val(rg_id), val(lib_id), val(unused), val(unused)
Expand Down
29 changes: 29 additions & 0 deletions nftest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,12 @@ cases:
skip: false
verbose: true
asserts:
- actual: generate-SQC-BAM-*/TWGSAMIN000001/FastQC-*/output/FastQC-*_TWGSAMIN_HG002.N/HG002.N_fastqc/fastqc_data.txt
expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_TWGSAMIN_HG002.N/HG002.N_fastqc/fastqc_data.txt
script: test/assert_txt.sh
- actual: generate-SQC-BAM-*/TWGSAMIN000001/FastQC-*/output/FastQC-*_TWGSAMIN_S2-v1.1.5/S2-v1.1.5_fastqc/fastqc_data.txt
expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_TWGSAMIN_S2-v1.1.5/S2-v1.1.5_fastqc/fastqc_data.txt
script: test/assert_txt.sh
- actual: generate-SQC-BAM-*/TWGSAMIN000001/SAMtools-*/output/SAMtools-*_TWGSAMIN_HG002.N_stats.txt
expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/SAMtools-1.18_TWGSAMIN_HG002.N_stats.txt
method: md5
Expand All @@ -40,6 +46,15 @@ cases:
skip: false
verbose: true
asserts:
- actual: generate-SQC-BAM-*/TWGSAMIN000001/FastQC-*/output/FastQC-*_TWGSAMIN_HG002.N/HG002.N_fastqc/fastqc_data.txt
expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_TWGSAMIN_HG002.N/HG002.N_fastqc/fastqc_data.txt
script: test/assert_txt.sh
- actual: generate-SQC-BAM-*/TWGSAMIN000001/FastQC-*/output/FastQC-*_TWGSAMIN_S2-v1.1.5/S2-v1.1.5_fastqc/fastqc_data.txt
expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_TWGSAMIN_S2-v1.1.5/S2-v1.1.5_fastqc/fastqc_data.txt
script: test/assert_txt.sh
- actual: generate-SQC-BAM-*/TWGSAMIN000001/FastQC-*/output/FastQC-*_TWGSAMIN_S2-v1.1.5.n1/S2-v1.1.5.n1_SMadjust_fastqc/fastqc_data.txt
expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_TWGSAMIN_S2-v1.1.5.n1/S2-v1.1.5.n1_SMadjust_fastqc/fastqc_data.txt
script: test/assert_txt.sh
- actual: generate-SQC-BAM-*/TWGSAMIN000001/SAMtools-*/output/SAMtools-*_TWGSAMIN_HG002.N_stats.txt
expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/SAMtools-1.18_TWGSAMIN_HG002.N_stats.txt
method: md5
Expand Down Expand Up @@ -67,6 +82,20 @@ cases:
- actual: generate-SQC-BAM-*/TWGSAMIN000001/Qualimap-*/output/Qualimap-*_TWGSAMIN_S2-v1.1.5.n1_stats/genome_results.txt
expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/Qualimap-2.3_TWGSAMIN_S2-v1.1.5.n1_stats/genome_results.txt
method: md5
- name: a_mini-fastqc
message: test fastqc
nf_script: main.nf
nf_config: test/config/fastqc.config
params_file: test/yaml/a_mini.yaml
skip: true
verbose: true
asserts:
- actual: generate-SQC-BAM-*/TWGSAMIN000001/FastQC-*/output/FastQC-*_TWGSAMIN_HG002.N/HG002.N_fastqc/fastqc_data.txt
expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_TWGSAMIN_HG002.N/HG002.N_fastqc/fastqc_data.txt
script: test/assert_txt.sh
- actual: generate-SQC-BAM-*/TWGSAMIN000001/FastQC-*/output/FastQC-*_TWGSAMIN_S2-v1.1.5/S2-v1.1.5_fastqc/fastqc_data.txt
expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_TWGSAMIN_S2-v1.1.5/S2-v1.1.5_fastqc/fastqc_data.txt
script: test/assert_txt.sh
- name: a_mini-stats
message: test samtools stats
nf_script: main.nf
Expand Down
5 changes: 4 additions & 1 deletion test/config/all-tools.config
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ includeConfig "${projectDir}/nextflow.config"

// Inputs/parameters of the pipeline
params {
algorithm = ['stats', 'collectwgsmetrics', 'bamqc'] // 'stats', 'collectwgsmetrics', 'bamqc'
algorithm = ['fastqc', 'stats', 'collectwgsmetrics', 'bamqc'] // 'fastqc', 'stats', 'collectwgsmetrics', 'bamqc'
reference = '/hot/ref/reference/GRCh38-BI-20160721/Homo_sapiens_assembly38.fasta'
blcds_registered_dataset = false // if you want the output to be registered
save_intermediate_files = true
Expand All @@ -28,6 +28,9 @@ params {
bamqc_outformat = 'pdf'
bamqc_additional_options = ''

// FastQC options
fastqc_additional_options = ''

// Base resource allocation updater
// See README for adding parameters to update the base resource allocations
}
Expand Down
25 changes: 25 additions & 0 deletions test/config/fastqc.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
// EXECUTION SETTINGS AND GLOBAL DEFAULTS

// External config files import. DO NOT MODIFY THESE LINES!
includeConfig "${projectDir}/config/default.config"
includeConfig "${projectDir}/config/methods.config"
includeConfig "${projectDir}/nextflow.config"


// Inputs/parameters of the pipeline
params {
algorithm = ['fastqc']
reference = '/hot/ref/reference/GRCh38-BI-20160721/Homo_sapiens_assembly38.fasta'
blcds_registered_dataset = false // if you want the output to be registered
save_intermediate_files = true

// SAMtools stats options
samtools_remove_duplicates = false
samtools_stats_additional_options = ''

// Base resource allocation updater
// See README for adding parameters to update the base resource allocations
}

// Setup the pipeline config. DO NOT REMOVE THIS LINE!
methods.setup()