From 5cd663b64f0d8d6bba86394586e622c63c4ad97f Mon Sep 17 00:00:00 2001 From: Graeme Grimes Date: Thu, 25 Apr 2024 14:00:59 +0000 Subject: [PATCH 1/2] update ep4 running a python script --- episodes/04-processes-part1.md | 57 ++++++++++++++++--- .../scripts/process/process_python_script.nf | 14 +++++ .../files/scripts/process/process_reads.py | 17 ++++++ 3 files changed, 81 insertions(+), 7 deletions(-) create mode 100644 episodes/files/scripts/process/process_python_script.nf create mode 100644 episodes/files/scripts/process/process_reads.py diff --git a/episodes/04-processes-part1.md b/episodes/04-processes-part1.md index 8dcdb4ea..3a724ca2 100644 --- a/episodes/04-processes-part1.md +++ b/episodes/04-processes-part1.md @@ -251,8 +251,9 @@ Number of sequences for chromosome A:118 ::::::::::::::::::::::::::::::::::::: instructor -The following section on python and R scripts is not meant to be run by the instructor or learners. +The following section on python is meant to be run by the instructor not the learners. It is meant to be a demonstration of the different ways to run a process. +This can be skipped for time. ::::::::::::::::::::::::::::::::::::::::::::::::: @@ -262,7 +263,7 @@ By default the process command is interpreted as a **Bash** script. However, any //process_python.nf nextflow.enable.dsl=2 -process PYSTUFF { +process PROCESS_READS { script: """ #!/usr/bin/env python @@ -285,7 +286,7 @@ process PYSTUFF { } workflow { - PYSTUFF() + PROCESS_READS() } ``` @@ -309,27 +310,69 @@ workflow { This allows the use of a different programming languages which may better fit a particular job. However, for large chunks of code it is suggested to save them into separate files and invoke them from the process script. +## Associated scripts + +Scripts such as the one in the example below, `process_reads.py`, can be stored in a `bin` folder at the same directory level as the Nextflow workflow script that invokes them, and given execute permission. Nextflow will automatically add this folder to the `PATH` environment variable. To invoke the script in a Nextflow process, simply use its filename on its own rather than invoking the interpreter e.g. `process_reads.py` instead of `python process_reads.py`. +**Note** The script `process_reads.py` must be executable to run. + +```bash +mkdir bin +mv process_reads.py bin +chmod 755 bin/process_reads.py +``` + +```python +# process_reads.py +#!/usr/bin/env python +import gzip +import sys +reads = 0 +bases = 0 + + +with gzip.open(sys.argv[1], 'rb') as read: + for id in read: + seq = next(read) + reads += 1 + bases += len(seq.strip()) + next(read) + next(read) + +print("reads", reads) +print("bases", bases) +``` + ```groovy +//process_python_script.nf nextflow.enable.dsl=2 -process PYSTUFF { +process PROCESS_READS { script: """ - myscript.py + process_reads.py ${projectDir}/data/yeast/reads/ref1_1.fq.gz """ } workflow { - PYSTUFF() + PROCESS_READS() } ``` +```output +N E X T F L O W ~ version 23.10.1 +Launching `pr.nf` [kickass_legentil] DSL2 - revision: 3b9eee1d47 +executor > local (1) +[88/759311] process > PROCESS_READS [100%] 1 of 1 ✔ +reads 14677 +bases 1482377 +``` + ::::::::::::::::::::::::::::::::::::::::: callout ## Associated scripts -Scripts such as the one in the example above, `myscript.py`, can be stored in a `bin` folder at the same directory level as the Nextflow workflow script that invokes them, and given execute permission. Nextflow will automatically add this folder to the `PATH` environment variable. To invoke the script in a Nextflow process, simply use its filename on its own rather than invoking the interpreter e.g. `myscript.py` instead of `python myscript.py`. +Scripts such as the one in the example above, `process_reads.py`, can be stored in a `bin` folder at the same directory level as the Nextflow workflow script that invokes them, and given execute permission. Nextflow will automatically add this folder to the `PATH` environment variable. To invoke the script in a Nextflow process, simply use its filename on its own rather than invoking the interpreter e.g. `process_reads.py` instead of `python process_reads.py`. :::::::::::::::::::::::::::::::::::::::::::::::::: diff --git a/episodes/files/scripts/process/process_python_script.nf b/episodes/files/scripts/process/process_python_script.nf new file mode 100644 index 00000000..34dd4041 --- /dev/null +++ b/episodes/files/scripts/process/process_python_script.nf @@ -0,0 +1,14 @@ +//process_python_script.nf +nextflow.enable.dsl=2 + +process PROCESS_READS { + + script: + """ + process_reads.py ${projectDir}/data/yeast/reads/ref1_1.fq.gz + """ +} + +workflow { + PROCESS_READS() +} \ No newline at end of file diff --git a/episodes/files/scripts/process/process_reads.py b/episodes/files/scripts/process/process_reads.py new file mode 100644 index 00000000..c1d5fe8e --- /dev/null +++ b/episodes/files/scripts/process/process_reads.py @@ -0,0 +1,17 @@ +#!/usr/bin/env python +import gzip +import sys +reads = 0 +bases = 0 + +# Read gzipped fastq file +with gzip.open(sys.argv[1], 'rb') as read: + for id in read: + seq = next(read) + reads += 1 + bases += len(seq.strip()) + next(read) + next(read) + +print("reads", reads) +print("bases", bases) \ No newline at end of file From bd6fe109b458599578d28e224f279ae9faab8f81 Mon Sep 17 00:00:00 2001 From: Graeme Grimes Date: Thu, 25 Apr 2024 14:01:37 +0000 Subject: [PATCH 2/2] update ep4 removed R materials --- episodes/04-processes-part1.md | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/episodes/04-processes-part1.md b/episodes/04-processes-part1.md index 3a724ca2..88c8bf3e 100644 --- a/episodes/04-processes-part1.md +++ b/episodes/04-processes-part1.md @@ -290,24 +290,6 @@ workflow { } ``` -```groovy -//process_rscript.nf -nextflow.enable.dsl=2 - -process RSTUFF { - script: - """ - #!/usr/bin/env Rscript - library("ShortRead") - countFastq(dirPath="data/yeast/reads/ref1_1.fq.gz") - """ -} - -workflow { - RSTUFF() -} -``` - This allows the use of a different programming languages which may better fit a particular job. However, for large chunks of code it is suggested to save them into separate files and invoke them from the process script. ## Associated scripts