Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions dockerfiles/samtools-cloud-virtual-env/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
ARG UBUNTU_RELEASE="22.04"
ARG CONDA_RELEASE="4.7.12" # releases listed here: https://github.com/conda/conda/releases
ARG CONDA_RELEASE="py39_25.3.1-1" # releases listed here: https://repo.anaconda.com/miniconda/
ARG PYTHON_RELEASE="3.10.4" # get available releases from conda search python
ARG CONDA_INSTALL_DIR=/opt/conda
ARG CONDA_ENV_NAME="gatk-sv"
Expand All @@ -16,7 +16,7 @@ RUN apt-get -qqy update --fix-missing && \
# install conda
ARG CONDA_RELEASE
ARG CONDA_INSTALL_DIR
RUN wget --quiet https://repo.continuum.io/miniconda/Miniconda3-${CONDA_RELEASE}-Linux-x86_64.sh -O /tmp/miniconda.sh && \
RUN wget https://repo.anaconda.com/miniconda/Miniconda3-${CONDA_RELEASE}-Linux-x86_64.sh -O /tmp/miniconda.sh && \
/bin/bash /tmp/miniconda.sh -b -p ${CONDA_INSTALL_DIR}

# update python to desired version, and install mamba for faster package management
Expand All @@ -28,9 +28,9 @@ ENV PATH=$CONDA_ENV_BIN:$CONDA_BIN:$PATH
# # create conda environment
ARG CONDA_ENV_NAME
# install the desired python version, and just the packages needed by google-cloud-cli
RUN conda install -qy -c conda-forge mamba=0.22.1 && \
RUN conda install -qy -c conda-forge mamba=2.1.1 && \
mamba create -n $CONDA_ENV_NAME -qy -c conda-forge python=$PYTHON_RELEASE \
pip=22.0.4 setuptools=62.1.0 wheel=0.37.1 crcmod=1.7
pip=25.1.1 setuptools=80.1.0 wheel=0.45.1 crcmod=1.7

# at this stage, we only need python for google-cloud-cli, however, we will want to use conda to install more packages
# later, so copy the whole conda install over, not just the environment
Expand Down
8 changes: 4 additions & 4 deletions dockerfiles/samtools-cloud/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
ARG MINIBASE_IMAGE=sv-base-mini:latest
ARG VIRTUAL_ENV_IMAGE=samtoolscloud-virtual-env:latest
# available gsutil versions here: https://cloud.google.com/sdk/docs/release-notes
ARG CLOUD_SDK_VERSION=414.0.0-0
ARG CLOUD_SDK_VERSION=518.0.0-0
ARG CONDA_ENV_NAME="gatk-sv"
ARG CONDA_INSTALL_DIR=/opt/conda
################## install google-cloud-cli into a copy of the virtual image
Expand All @@ -22,12 +22,12 @@ RUN apt-get -qqy update --fix-missing && \
# https://cloud.google.com/sdk/docs/install#deb
ARG CLOUD_SDK_VERSION

RUN echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] http://packages.cloud.google.com/apt cloud-sdk main" \
RUN echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt cloud-sdk main" \
| tee -a /etc/apt/sources.list.d/google-cloud-sdk.list && \
curl https://packages.cloud.google.com/apt/doc/apt-key.gpg \
| apt-key --keyring /usr/share/keyrings/cloud.google.gpg add - && \
| gpg --dearmor -o /usr/share/keyrings/cloud.google.gpg && \
apt-get update -y && \
apt-get install google-cloud-sdk=$CLOUD_SDK_VERSION -y && \
apt-get install google-cloud-cli=$CLOUD_SDK_VERSION -y && \
gcloud config set core/disable_usage_reporting true && \
gcloud config set component_manager/disable_update_check true && \
gcloud config set metrics/environment github_docker_image
Expand Down
22 changes: 18 additions & 4 deletions dockerfiles/sv-base-mini/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,11 +1,23 @@
# This is the barebones image for the GATK SV pipeline that includes only
# some basic bioinformatics utilities.
ARG UBUNTU_RELEASE="22.04"
ARG HTSLIB_VERSION="1.15.1"
ARG HTSLIB_VERSION="1.21"
ARG BEDTOOLS_VERSION="2.31.0"
ARG VCFTOOLS_VERSION="0.1.16"
ARG BWA_COMMIT="139f68fc4c3747813783a488aef2adc86626b01b"

# You may speed up the compilation of htslib, bcftools, and vcftools by
# parallelizing the build process. You may dynamically choose thread count
# at runtime using: -j $(nproc --all)
# However, that runs into uncommon error (maybe race condition) with the
# following error message:
# > collect2: error: ld returned 1 exit status
#
# While running with a single-thread is slower, but the process is more
# reproducible and reliable. You may override the following argument
# and be mindful of the above possible error.
ARG THREAD_COUNT="1"
Comment on lines +9 to +19
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In my experience, -j often leads to problems except in the simplest of projects

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Exactly, I was running into those issues, so I set the thread count to 1. But we can also remove that option altogether if we're ok with disabling parallel make.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah just take it out


ARG APT_REQUIRED_PACKAGES="/opt/apt-required-packages.list"

############### stage 0: download and build tools
Expand All @@ -24,23 +36,25 @@ RUN wget -q https://github.com/samtools/samtools/releases/download/${HTSLIB_VERS
tar xjf samtools-${HTSLIB_VERSION}.tar.bz2 && \
cd samtools-${HTSLIB_VERSION} && \
./configure --quiet --enable-libcurl --prefix=/opt/samtools && \
make -s -j $(nproc --all) all all-htslib && \
make -s -j ${THREAD_COUNT} all all-htslib && \
make -s install install-htslib

# install bcftools
RUN wget -q https://github.com/samtools/bcftools/releases/download/${HTSLIB_VERSION}/bcftools-${HTSLIB_VERSION}.tar.bz2 && \
tar xjf bcftools-${HTSLIB_VERSION}.tar.bz2 && \
cd bcftools-${HTSLIB_VERSION} && \
./configure --quiet --prefix=/opt/bcftools && \
make -s -j $(nproc --all) && \
make -s -j ${THREAD_COUNT} && \
make -s install

# install vcftools
ARG VCFTOOLS_VERSION
RUN wget -q https://github.com/vcftools/vcftools/releases/download/v$VCFTOOLS_VERSION/vcftools-${VCFTOOLS_VERSION}.tar.gz && \
tar zxf vcftools-${VCFTOOLS_VERSION}.tar.gz && \
cd vcftools-${VCFTOOLS_VERSION} && \
export PERL5LIB=$(pwd)/src/perl/ && \
./configure --quiet --prefix=/opt/vcftools && \
make -s -j $(nproc --all) && \
make -s -j ${THREAD_COUNT} && \
make -s install

# install bedtools
Expand Down
2 changes: 1 addition & 1 deletion dockerfiles/sv-base/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# This is the base dockerfile for the GATK SV pipeline that adds R, a few R packages, and GATK
ARG SAMTOOLS_CLOUD_IMAGE=samtools-cloud:latest
ARG VIRTUAL_ENV_IMAGE=sv-base-virtual-env:latest
ARG GATK_COMMIT="64348bc9750ebf6cc473ecb8c1ced3fc66f05488"
ARG GATK_COMMIT="657b4eeb2374d1d865b1faf862b9b55902aca041"
ARG GATK_JAR="/opt/gatk.jar"
ARG R_INSTALL_PATH=/opt/R

Expand Down
23 changes: 12 additions & 11 deletions dockerfiles/sv-pipeline-virtual-env/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,15 @@ FROM $SV_BASE_MINI_IMAGE as sv_base_mini
FROM $PYTHON_VIRTUAL_ENV_IMAGE as python_virtual_env

# install any build dependencies
ARG BUILD_DEPS="g++ make apt-transport-https ca-certificates wget libcurl4-openssl-dev"
ARG BUILD_DEPS="g++ make apt-transport-https ca-certificates wget libcurl4-openssl-dev zlib1g-dev"
ARG DEBIAN_FRONTEND=noninteractive
RUN apt-get -qqy update --fix-missing && \
apt-get -qqy install --no-install-recommends $BUILD_DEPS

# install conda packages
# NOTE: need to use scipy=1.7.3 instead of scipy=1.8.0 because it makes hail angry
ARG CONDA_PKGS="cython=0.29.28 numpy=1.22.3 pandas=1.4.2 scipy=1.7.3 scikit-learn=1.0.2 intervaltree=3.1.0 \
matplotlib=3.5.1 natsort=8.1.0 google-cloud-dataproc=4.0.2 seaborn=0.12.2"
matplotlib=3.5.1 natsort=8.1.0 google-cloud-dataproc=5.18.1 seaborn=0.12.2"
RUN mamba install -qy --freeze-installed -n $CONDA_ENV_NAME -c conda-forge -c bioconda $CONDA_PKGS

# copy in HTSLIB install so that pysam uses same version as is available in pipeline
Expand All @@ -37,15 +37,16 @@ RUN mamba install -qy --freeze-installed -n $CONDA_ENV_NAME -c conda-forge -c bi
# duplicate libraries. The older pysam that needs to be installed for BND issues has problems with cython during pip
# install, so must be built from source. One last pysam wrinkle: it's install can't handle the newest versions of
# setuptools, so we're going to downgrade then re-upgrade them just for pysam
ARG PYSAM_VERSION=0.15.4
RUN export SETUPTOOLS_VERSION=$(python -c 'import setuptools; print(setuptools.__version__)') && \
pip install setuptools==57.5.0 && \
wget -q https://github.com/pysam-developers/pysam/archive/refs/tags/v$PYSAM_VERSION.tar.gz && \
tar -xzf v$PYSAM_VERSION.tar.gz && \
cd pysam-$PYSAM_VERSION && \
python setup.py build && \
python setup.py install && \
pip install setuptools==$SETUPTOOLS_VERSION
ARG PYSAM_VERSION=0.23.0
# RUN export SETUPTOOLS_VERSION=$(python -c 'import setuptools; print(setuptools.__version__)') && \
# pip install setuptools==80.1.0 && \
# wget -q https://github.com/pysam-developers/pysam/archive/refs/tags/v$PYSAM_VERSION.tar.gz && \
# tar -xzf v$PYSAM_VERSION.tar.gz && \
# cd pysam-$PYSAM_VERSION && \
# python setup.py build && \
# python setup.py install && \
# pip install setuptools==$SETUPTOOLS_VERSION
RUN pip install pysam==$PYSAM_VERSION

# pybedtools needs to be installed via pip because it doesn't like the updated python
# Run cache purge in case the base is out of date.
Expand Down
19 changes: 10 additions & 9 deletions dockerfiles/sv-utils-env/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,16 @@ RUN mamba install -qy --freeze-installed -n $CONDA_ENV_NAME -c conda-forge -c bi
# duplicate libraries. The older pysam that needs to be installed for BND issues has problems with cython during pip
# install, so must be built from source. One last pysam wrinkle: it's install can't handle the newest versions of
# setuptools, so we're going to downgrade then re-upgrade them just for pysam
ARG PYSAM_VERSION=0.15.4
RUN export SETUPTOOLS_VERSION=$(python -c 'import setuptools; print(setuptools.__version__)') && \
pip install setuptools==57.5.0 && \
wget -q https://github.com/pysam-developers/pysam/archive/refs/tags/v$PYSAM_VERSION.tar.gz && \
tar -xzf v$PYSAM_VERSION.tar.gz && \
cd pysam-$PYSAM_VERSION && \
python setup.py build && \
python setup.py install && \
pip install setuptools==$SETUPTOOLS_VERSION
ARG PYSAM_VERSION=0.23.0
# RUN export SETUPTOOLS_VERSION=$(python -c 'import setuptools; print(setuptools.__version__)') && \
# pip install setuptools==80.1.0 && \
# wget -q https://github.com/pysam-developers/pysam/archive/refs/tags/v$PYSAM_VERSION.tar.gz && \
# tar -xzf v$PYSAM_VERSION.tar.gz && \
# cd pysam-$PYSAM_VERSION && \
# python setup.py build && \
# python setup.py install && \
# pip install setuptools==$SETUPTOOLS_VERSION
RUN pip install pysam==$PYSAM_VERSION

# clean unneeded stuff
RUN conda clean -ay --force-pkgs-dirs
Expand Down
2 changes: 1 addition & 1 deletion scripts/docker/build_docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@ class to track dependencies, control build and push of entire job
)
}
non_public_images = frozenset({"melt"})
images_built_by_all = frozenset(dependencies.keys()).difference({"melt"})
images_built_by_all = frozenset(dependencies.keys()).difference({"melt", "str"})
accepted_target_values = frozenset(dependencies.keys()).union({"all"})
latest_tag = "latest"
local_reg_name = "local"
Expand Down
8 changes: 8 additions & 0 deletions src/svtk/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
[build-system]
requires = [
"setuptools>=80.1.0",
"wheel",
"cython>=0.29.28",
"pysam>=0.23.0"
]
build-backend = "setuptools.build_meta"
Loading