Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Timstof support #41

Open
wants to merge 42 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
06eb8ea
initial commit on the branch
jspaezp Jun 26, 2023
c4c1591
added params to the encyclopedia run
jspaezp Jun 27, 2023
6a19f13
(wip) addition of skyline
jspaezp Jul 21, 2023
96a8b3e
added branching to the skyline quant step
jspaezp Jul 21, 2023
0a76604
(wip) improvement on skyline job passing
jspaezp Jul 21, 2023
84dc60a
made blib conversion 2 step
jspaezp Jul 24, 2023
fa5bf4b
Update test.yml
jspaezp Jul 24, 2023
d9706ce
minor addition to tests cicd
jspaezp Jul 24, 2023
df8e1eb
updated docker image
jspaezp Jul 24, 2023
2df5d7a
moved skyline logic out of agg
jspaezp Jul 24, 2023
f0559fa
added skyline template asset
jspaezp Jul 24, 2023
f5b0d7f
handled no-merge use case
jspaezp Jul 24, 2023
a1768ba
added asset
jspaezp Jul 24, 2023
0659a9c
Update test.yml
jspaezp Jul 25, 2023
2b9ea7c
bumped ims conversion version
jspaezp Jul 25, 2023
5ff3335
Merge branch 'feature/timstof_data' of github.com:TalusBio/nf-encyclo…
jspaezp Jul 25, 2023
5f74e08
updated msstats tests for resolutoin
jspaezp Jul 25, 2023
e6f5832
fixed input piping to ims
jspaezp Jul 25, 2023
61c6b0e
changed log handling in skyline steps
jspaezp Jul 25, 2023
427ff63
changed log handling in skylines first step
jspaezp Jul 25, 2023
17c34a6
updated position in skyline adds
jspaezp Jul 25, 2023
f9baf4a
handled decompression
jspaezp Jul 25, 2023
777f65f
handled decompression on grouping stage
jspaezp Jul 25, 2023
9529b39
escaped bash variables
jspaezp Jul 25, 2023
ea29937
bugfix minor typo on file type
jspaezp Jul 25, 2023
88b9d49
bugfix minor typo on file type
jspaezp Jul 25, 2023
1241330
changed way to list files in bash
jspaezp Jul 26, 2023
ca9ecc7
fixed yet another bug in the bash side of the workflow
jspaezp Jul 26, 2023
585b568
changed val to path in skyline step
jspaezp Jul 26, 2023
4c7d79f
added debugging prints
jspaezp Jul 26, 2023
efd04ef
yet another fix
jspaezp Jul 26, 2023
829081b
added more debugging info and added handling of raw files
jspaezp Jul 26, 2023
5b76bfe
handled empty else clause
jspaezp Jul 26, 2023
362dadb
added a lot of extra params
jspaezp Jul 26, 2023
b5cd217
added unused parameter
jspaezp Jul 26, 2023
39bfc35
fixed name in a parameter
jspaezp Jul 26, 2023
f70505b
fixed name in a parameter
jspaezp Jul 26, 2023
698b31d
fixed name in a parameter
jspaezp Jul 26, 2023
091a088
moved parameters from termplate to import
jspaezp Jul 26, 2023
d30a1d0
added rt filter
jspaezp Jul 26, 2023
f433d2b
moved ims params to sky template creation
jspaezp Jul 28, 2023
ddae2d6
added mem logging and decoys
jspaezp Jul 28, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ jobs:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3

- name: Create latest tag
uses: Actions-R-Us/actions-tagger@latest
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ jobs:
- name: Run Unit Tests
run: |
REMOTE=/app/nf-encyclopedia
docker run -v $(pwd):${REMOTE} -w ${REMOTE} nf-encyclopedia:latest \
pytest tests/unit_tests
docker run -v $(pwd):${REMOTE} nf-encyclopedia:latest \
pytest ${REMOTE}/tests/unit_tests

- name: Run System Tests
run: |
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
.nextflow
venv
.Rhistory
.idea/**
.gradle
Expand Down
69 changes: 40 additions & 29 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,66 +1,77 @@
FROM --platform=linux/amd64 mambaorg/micromamba:latest as micromamba
# First Stage of the build, gets the jar for encyclopedia
FROM --platform=linux/amd64 openjdk:8-jre
# FROM --platform=linux/amd64 ibmjava:11
# FROM --platform=linux/amd64 amazoncorretto:11-al2023-headless # Uses yum for package management.
FROM --platform=linux/amd64 nextflow/nextflow:23.04.2

ARG VERSION=2.12.30
ENV VERSION ${VERSION}
LABEL authors="[email protected]" \
description="Docker image for most of nf-encyclopedia"


# Install procps so that Nextflow can poll CPU usage and
# deep clean the apt cache to reduce image/layer size
# RUN apt-get install -y procps sqlite3 libgomp1 \
# && apt-get clean -y && rm -rf /var/lib/apt/lists/*
RUN yum install -y wget

RUN apt-get update && \
apt-get -y upgrade && \
apt-get -y install libgomp1 && \
apt-get clean

WORKDIR /code
# First Stage of the build, gets the jar for encyclopedia
RUN wget https://bitbucket.org/searleb/encyclopedia/downloads/encyclopedia-${VERSION}-executable.jar
WORKDIR /app

LABEL authors="[email protected]" \
description="Docker image for most of nf-encyclopedia"
# # Install nextflow
# RUN wget -qO- https://get.nextflow.io | bash
# RUN chmod +x nextflow
# RUN mv nextflow /usr/local/bin/.

# Install procps so that Nextflow can poll CPU usage and
# deep clean the apt cache to reduce image/layer size
RUN apt-get update \
&& apt-get install -y procps sqlite3 \
&& apt-get clean -y && rm -rf /var/lib/apt/lists/*
WORKDIR /app

# Setup micromamba:
ARG MAMBA_USER=mamba
ARG MAMBA_USER=root
ARG MAMBA_USER_ID=1000
ARG MAMBA_USER_GID=1000
ARG MAMBA_DOCKERFILE_ACTIVATE=1
ENV MAMBA_USER=$MAMBA_USER
ENV MAMBA_ROOT_PREFIX="/opt/conda"
ENV MAMBA_EXE="/bin/micromamba"
RUN mkdir -p ${MAMBA_ROOT_PREFIX}

COPY --from=micromamba "$MAMBA_EXE" "$MAMBA_EXE"
COPY --from=micromamba /usr/local/bin/_activate_current_env.sh /usr/local/bin/_activate_current_env.sh
COPY --from=micromamba /usr/local/bin/_dockerfile_shell.sh /usr/local/bin/_dockerfile_shell.sh
COPY --from=micromamba /usr/local/bin/_entrypoint.sh /usr/local/bin/_entrypoint.sh
COPY --from=micromamba /usr/local/bin/_entrypoint.sh /usr/local/bin/mamba_entrypoint.sh
COPY --from=micromamba /usr/local/bin/_activate_current_env.sh /usr/local/bin/_activate_current_env.sh
COPY --from=micromamba /usr/local/bin/_dockerfile_initialize_user_accounts.sh /usr/local/bin/_dockerfile_initialize_user_accounts.sh
COPY --from=micromamba /usr/local/bin/_dockerfile_setup_root_prefix.sh /usr/local/bin/_dockerfile_setup_root_prefix.sh

RUN /usr/local/bin/_dockerfile_initialize_user_accounts.sh && \
/usr/local/bin/_dockerfile_setup_root_prefix.sh
# No need to set up accounts if we will run as root ...
# RUN /usr/local/bin/_dockerfile_initialize_user_accounts.sh &&

RUN /usr/local/bin/_dockerfile_setup_root_prefix.sh

# Setup the environment
USER root
USER $MAMBA_USER
COPY environment.yml /tmp/environment.yml

# Instruct R processes to use these empty files instead of
# clashing with a local one
RUN touch .Rprofile .Renviron

# Create the environment
RUN micromamba install -y -n base -f /tmp/environment.yml && \
micromamba clean --all --yes
# Set the path. NextFlow seems to circumvent the conda environment
# We also need to set options for the JRE here.
ENV PATH="$MAMBA_ROOT_PREFIX/bin:$PATH:/bin" _JAVA_OPTIONS="-Djava.awt.headless=true" VERSION=$VERSION

# Setup the EncyclopeDIA executable:
RUN ln -s /code/encyclopedia-$VERSION-executable.jar /code/encyclopedia.jar

# Set the path. NextFlow seems to circumvent the conda environment
# We also need to set options for the JRE here.
ENV PATH="$MAMBA_ROOT_PREFIX/bin:$PATH" _JAVA_OPTIONS="-Djava.awt.headless=true" VERSION=$VERSION

# Create the entrypoint:
SHELL ["/usr/local/bin/_dockerfile_shell.sh"]
ENTRYPOINT ["/usr/local/bin/_entrypoint.sh"]
CMD []
# SHELL ["/usr/local/bin/_dockerfile_shell.sh"]
# ENTRYPOINT ["/usr/local/bin/mamba_entrypoint.sh", "/usr/local/bin/entry.sh"]

# Create the environment
RUN micromamba install -y -n base -f /tmp/environment.yml && \
micromamba clean --all --yes

CMD ["/bin/bash"]

Binary file added assets/template.sky.zip
Binary file not shown.
12 changes: 6 additions & 6 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ dependencies:
- pytest
- numpy
- pandas
- bioconductor-msstats=4.2.0
- r-rlang
- r-dplyr
- r-tidyr
- r-magrittr
- r-stringr
- bioconda::bioconductor-msstats
- conda-forge::r-rlang
- conda-forge::r-dplyr
- conda-forge::r-tidyr
- r::r-magrittr
- r::r-stringr
52 changes: 50 additions & 2 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
nextflow.enable.dsl = 2

// Subworkflows
include { CONVERT_TO_MZML } from "./subworkflows/msconvert"
include { CONVERT_TO_MZML } from "./subworkflows/convert"
include {
BUILD_CHROMATOGRAM_LIBRARY;
PERFORM_QUANT;
Expand All @@ -12,6 +12,12 @@ include {

// Modules
include { MSSTATS } from "./modules/msstats"
include { ADD_IMS_INFO } from "./modules/ims"
include {
SKYLINE_ADD_LIB;
SKYLINE_IMPORT_DATA;
SKYLINE_MERGE_RESULTS
} from "./modules/skyline"


//
Expand Down Expand Up @@ -49,6 +55,7 @@ workflow {
input = file(params.input, checkIfExists: true)
fasta = file(params.fasta, checkIfExists: true)
dlib = file(params.dlib, checkIfExists: true)
skyline_empty_template = file("$baseDir/assets/template.sky.zip", checkIfExists: true)

// Optional contrasts arg:
if ( params.contrasts != null ) {
Expand All @@ -74,6 +81,16 @@ workflow {
error "No MS data files were provided. Nothing to do."
}

// Raw Mass Spec files (raw including .raw or .d/.tar)
// These files will be used later to quant using skyline.
// This also filter out files that are chromatogram libraries
ms_files.runs
| join(ms_files.meta)
| filter { !it[1] }
| map { it[0] }
| filter( ~/^.*((.raw)|(.d.tar))$/ )
| set { raw_quant_files }

// Convert raw files to gzipped mzML and group them by experiment.
// The chrlib and quant channels take the following form:
// [[file_ids], [mzml_gz_files], is_chrlib, group]
Expand Down Expand Up @@ -107,13 +124,42 @@ workflow {
PERFORM_QUANT(quant_files, dlib, fasta, params.aggregate)
| set { quant_results }

quant_results.local
| map { it[0] }
| set { groups }

// Add IMS info to the blib
ADD_IMS_INFO(groups, quant_results.blib)
| set { blib }

SKYLINE_ADD_LIB(skyline_empty_template, fasta, blib)
| set { skyline_template_zipfile }

// This will generate a skyd for every raw data file
SKYLINE_IMPORT_DATA(
skyline_template_zipfile.skyline_zipfile,
raw_quant_files,
)
| set { skyline_import_results }

SKYLINE_MERGE_RESULTS(
skyline_template_zipfile.skyline_zipfile,
skyline_import_results.skyd_file.collect(),
raw_quant_files.collect(),
)
| set { skyline_merge_results }

skyline_merge_results.final_skyline_zipfile.view()

// Perform an aggregate analysis on all files if needed:
if ( params.aggregate ) {
// Aggregate quantitative runs with EncyclopeDIA.
// The output has one channel:
// global -> [agg_name, peptides, proteins] or null
// global -> [agg_name, peptides_txt, proteins_txt] or null
// lib -> blib
PERFORM_AGGREGATE_QUANT(quant_results.local, dlib, fasta)
| set { enc_results }

} else {
quant_results | set{ enc_results }
}
Expand All @@ -122,6 +168,8 @@ workflow {
if ( params.msstats.enabled ) {
MSSTATS(enc_results.global, input, contrasts)
}

//
}


Expand Down
61 changes: 61 additions & 0 deletions modules/convert.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
process MSCONVERT {
publishDir "${params.mzml_dir}/${outputDir}", failOnError: true
label 'process_low_constant'
label 'error_retry'

input:
tuple val(file_id), path(raw_input), val(outputDir)

output:
tuple val(file_id), path("${raw_input.baseName}.mzML.gz")

script:
"""
wine msconvert \\
-v \\
--gzip \\
--mzML \\
--64 \\
--zlib \\
--filter "peakPicking true 1-" \\
${params.msconvert.demultiplex ? '--filter "demultiplex optimization=overlap_only"' : ''} \\
${raw_input}
"""

stub:
"""
touch ${raw_input.baseName}.mzML.gz
"""
}


process TDF2MZML {
publishDir "${params.mzml_dir}/${outputDir}", pattern: "*.mzML.gz", failOnError: true
container 'mfreitas/tdf2mzml:latest' // I don't know which stable tag to use...
label 'process_single'
label 'error_retry'

input:
tuple val(file_id), path(tdf_input), val(outputDir)

output:
tuple val(file_id), path("${file(tdf_input.baseName).baseName}.mzML.gz")

script:
"""
echo "Unpacking..."
tar -xvf ${tdf_input}

echo "Converting..."
tdf2mzml.py -i *.d # --ms1_type "centroid"

echo "Compressing..."
mv *.mzml ${file(tdf_input.baseName).baseName}.mzML
gzip ${file(tdf_input.baseName).baseName}.mzML
"""

stub:
"""
touch ${file(tdf_input.baseName).baseName}.mzML.gz
"""
}
8 changes: 7 additions & 1 deletion modules/encyclopedia.nf
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def stem(suffix) {
process ENCYCLOPEDIA_SEARCH {
publishDir "${params.result_dir}/${group}/elib", pattern: '*.elib', failOnError: true
publishDir "${params.result_dir}/${group}/logs", pattern: '*.log', failOnError: true
label 'process_medium'
label 'process_high'

input:
tuple val(group), path(mzml_gz_file)
Expand Down Expand Up @@ -70,6 +70,7 @@ process ENCYCLOPEDIA_SEARCH {

process ENCYCLOPEDIA_AGGREGATE {
publishDir "${params.result_dir}/${group}/elib", pattern: '*.elib', failOnError: true
publishDir "${params.result_dir}/${group}/blib", pattern: '*.blib', failOnError: true
publishDir "${params.result_dir}/${group}/logs", pattern: '*.log', failOnError: true
publishDir "${params.result_dir}/${group}/results", pattern: '*.txt', failOnError: true
publishDir "${params.result_dir}/${group}/reports", pattern: '*.csv', failOnError: true
Expand All @@ -92,6 +93,7 @@ process ENCYCLOPEDIA_AGGREGATE {
tuple(
val(group),
path("${stem(output_suffix)}.elib"),
path("${stem(output_suffix)}.blib"),
path("${stem(output_suffix)}.global.log"),
path("${output_suffix}_detection_summary.csv"),
emit: "lib"
Expand Down Expand Up @@ -122,6 +124,9 @@ process ENCYCLOPEDIA_AGGREGATE {
-a ${align} \\
| tee ${stem(output_suffix)}.global.log

${execEncyclopedia(task.memory)} \\
-convert -libtoblib -i ${stem(output_suffix)}.elib

# Better file names:
if [ "${align}" = true ]; then
mv ${stem(output_suffix)}.elib.peptides.txt ${stem(output_suffix)}.peptides.txt
Expand All @@ -140,6 +145,7 @@ process ENCYCLOPEDIA_AGGREGATE {
stub:
"""
touch ${stem(output_suffix)}.elib
touch ${stem(output_suffix)}.blib

if [ "${align}" = true ]; then
touch ${stem(output_suffix)}.peptides.txt
Expand Down
24 changes: 24 additions & 0 deletions modules/ims.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@

process ADD_IMS_INFO {
publishDir "${params.result_dir}/${group}/blib", pattern: '*.ims.blib', failOnError: true
label 'process_medium'
container 'ghcr.io/talusbio/flimsay:v0.4.0'

input:
val group
path blib

output:
path("*.ims.blib"), emit: blib

script:
"""
flimsay fill_blib ${blib} blib.ims.blib
"""

stub:
"""
echo "${blib}"
touch blib.ims.blib
"""
}
Loading
Loading