From ea902a6b9e521bde0bda536df164c2cc41b7256d Mon Sep 17 00:00:00 2001 From: Henry Webel Date: Mon, 10 Jun 2024 10:58:40 +0200 Subject: [PATCH] :bug: make all imports explicit w.r.t. to pkg (#68) - make all import explicit - let workflow run as a last resort with one job at a time (although it wasn't necessary for the merge run) * :bug: make all imports explicit w.r.t. to pkg * :memo: update M1 installation instructions - still does not seem to work * :bug: deactivating njab makes it run! - committed from marc's M1 laptop * :rewind: use njab after fix in downstream pkg underlying case is polars import in mrmr-selection, see: https://github.com/RasmussenLab/njab/pull/13 * :bug: try to allow parallel installations for R - ubuntu runs are regularly failing as lock can cause issues - alternatively one job at a time could be run in the retry * :bug: try to manuelly add missing dependency: gmm * :zap::memo: make ci more robust to installation issues, update README for Mac M1 chicps --------- Co-authored-by: mpielies --- .github/workflows/ci.yaml | 12 ++++++-- .github/workflows/workflow_website.yaml | 6 ++++ README.md | 17 ++++++++---- project/01_1_train_NAGuideR_methods.R | 6 ++++ project/01_1_train_NAGuideR_methods.ipynb | 6 ++++ vaep/__init__.py | 3 -- vaep/analyzers/__init__.py | 2 +- vaep/data_handling.py | 2 -- vaep/filter.py | 2 -- vaep/models/__init__.py | 3 +- vaep/models/ae.py | 2 +- vaep/models/collab.py | 3 +- vaep/pandas/__init__.py | 34 +++++++++++++++++++---- vaep/plotting/__init__.py | 7 ++--- 14 files changed, 74 insertions(+), 31 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index f46fec693..60149e4b1 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -69,17 +69,25 @@ jobs: mkdir runs papermill 04_1_train_pimms_models.ipynb runs/04_1_train_pimms_models.ipynb papermill 04_1_train_pimms_models.ipynb runs/04_1_train_pimms_models_no_val.ipynb -p sample_splits False - - name: Run demo workflow (integration test) + - name: Dry-Run demo workflow (integration test) continue-on-error: true run: | cd project snakemake -p -c1 --configfile config/single_dev_dataset/example/config.yaml -n + - name: Run demo workflow (integration test) + continue-on-error: true + run: | + cd project snakemake -p -c4 -k --configfile config/single_dev_dataset/example/config.yaml - name: Run demo workflow again (in case of installation issues) + continue-on-error: true run: | cd project - snakemake -p -c1 -n --configfile config/single_dev_dataset/example/config.yaml snakemake -p -c4 -k --configfile config/single_dev_dataset/example/config.yaml + - name: Run demo workflow again (in case of installation issues) - one thread + run: | + cd project + snakemake -p -c1 --configfile config/single_dev_dataset/example/config.yaml - name: Archive results # https://github.com/actions/upload-artifact uses: actions/upload-artifact@v4 diff --git a/.github/workflows/workflow_website.yaml b/.github/workflows/workflow_website.yaml index be97836db..d4f4e845a 100644 --- a/.github/workflows/workflow_website.yaml +++ b/.github/workflows/workflow_website.yaml @@ -39,9 +39,15 @@ jobs: cd project snakemake -s workflow/Snakefile_v2.smk --configfile config/alzheimer_study/config.yaml -p -c4 -k - name: Run demo workflow again (in case of installation issues) + continue-on-error: true run: | cd project snakemake -s workflow/Snakefile_v2.smk --configfile config/alzheimer_study/config.yaml -p -c4 -k + - name: Run demo workflow again (in case of installation issues) with one thread + continue-on-error: true + run: | + cd project + snakemake -s workflow/Snakefile_v2.smk --configfile config/alzheimer_study/config.yaml -p -c1 -k - name: Run differential analysis workflow run: | cd project diff --git a/README.md b/README.md index aedf54e82..b11a02b64 100644 --- a/README.md +++ b/README.md @@ -127,17 +127,24 @@ mamba env create -n pimms -f environment.yml # faster, less then 5mins If on Mac M1, M2 or having otherwise issue using your accelerator (e.g. GPUs): Install the pytorch dependencies first, then the rest of the environment: -### Install pytorch first (M-chips) +### Install pytorch first + +> :warning: We currently see issues with some installations on M1 chips. A dependency +> for one workflow is polars, which causes the issue. This should be [fixed now](https://github.com/RasmussenLab/njab/pull/13) +> for general use by delayed import +> of `mrmr-selection` in `njab`. If you encounter issues, please open an issue. Check how to install pytorch for your system [here](https://pytorch.org/get-started). - select the version compatible with your cuda version if you have an nvidia gpu or a Mac M-chip. ```bash -conda create -n vaep python=3.9 pip -conda activate vaep -# Follow instructions on https://pytorch.org/get-started -# conda env update -f environment.yml -n vaep # should not install the rest. +conda create -n pimms python=3.9 pip +conda activate pimms +# Follow instructions on https://pytorch.org/get-started: +# CUDA is not available on MacOS, please use default package +# pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu +conda install pytorch::pytorch torchvision torchaudio fastai -c pytorch -c fastai -y pip install pimms-learn pip install jupyterlab papermill # use run notebook interactively or as a script diff --git a/project/01_1_train_NAGuideR_methods.R b/project/01_1_train_NAGuideR_methods.R index 13997aa17..3918a663c 100644 --- a/project/01_1_train_NAGuideR_methods.R +++ b/project/01_1_train_NAGuideR_methods.R @@ -20,6 +20,8 @@ # - BiocManager could be moved to methods who are installed from BioConductor # + tags=["hide-input"] vscode={"languageId": "r"} +# options("install.lock"=FALSE) + packages_base_R <- c("BiocManager", "reshape2", "data.table", "readr", "tibble") @@ -130,6 +132,7 @@ nafunctions <- function(x, method = "zero") { else if (method == "qrilc") { install_bioconductor("impute") install_bioconductor("pcaMethods") + install_rpackage('gmm') install_rpackage('imputeLCMD') xxm <- t(df1) data_zero1 <- @@ -139,6 +142,7 @@ nafunctions <- function(x, method = "zero") { else if (method == "mindet") { install_bioconductor("impute") install_bioconductor("pcaMethods") + install_rpackage('gmm') install_rpackage('imputeLCMD') xxm <- as.matrix(df1) df <- imputeLCMD::impute.MinDet(xxm, q = 0.01) @@ -146,6 +150,7 @@ nafunctions <- function(x, method = "zero") { else if (method == "minprob") { install_bioconductor("impute") install_bioconductor("pcaMethods") + install_rpackage('gmm') install_rpackage('imputeLCMD') xxm <- as.matrix(df1) df <- @@ -278,6 +283,7 @@ nafunctions <- function(x, method = "zero") { install_bioconductor("impute") install_bioconductor("pcaMethods") + install_rpackage('gmm') install_rpackage('imputeLCMD') install_rpackage("magrittr") install_rpackage("glmnet") diff --git a/project/01_1_train_NAGuideR_methods.ipynb b/project/01_1_train_NAGuideR_methods.ipynb index 23fae4bd3..f6a7a6a1b 100644 --- a/project/01_1_train_NAGuideR_methods.ipynb +++ b/project/01_1_train_NAGuideR_methods.ipynb @@ -26,6 +26,8 @@ }, "outputs": [], "source": [ + "# options(\"install.lock\"=FALSE)\n", + "\n", "packages_base_R <-\n", " c(\"BiocManager\", \"reshape2\", \"data.table\", \"readr\", \"tibble\")\n", "\n", @@ -160,6 +162,7 @@ " else if (method == \"qrilc\") {\n", " install_bioconductor(\"impute\")\n", " install_bioconductor(\"pcaMethods\")\n", + " install_rpackage('gmm')\n", " install_rpackage('imputeLCMD')\n", " xxm <- t(df1)\n", " data_zero1 <-\n", @@ -169,6 +172,7 @@ " else if (method == \"mindet\") {\n", " install_bioconductor(\"impute\")\n", " install_bioconductor(\"pcaMethods\")\n", + " install_rpackage('gmm')\n", " install_rpackage('imputeLCMD')\n", " xxm <- as.matrix(df1)\n", " df <- imputeLCMD::impute.MinDet(xxm, q = 0.01)\n", @@ -176,6 +180,7 @@ " else if (method == \"minprob\") {\n", " install_bioconductor(\"impute\")\n", " install_bioconductor(\"pcaMethods\")\n", + " install_rpackage('gmm')\n", " install_rpackage('imputeLCMD')\n", " xxm <- as.matrix(df1)\n", " df <-\n", @@ -308,6 +313,7 @@ " \n", " install_bioconductor(\"impute\")\n", " install_bioconductor(\"pcaMethods\")\n", + " install_rpackage('gmm')\n", " install_rpackage('imputeLCMD')\n", " install_rpackage(\"magrittr\")\n", " install_rpackage(\"glmnet\")\n", diff --git a/vaep/__init__.py b/vaep/__init__.py index 98460455a..059ccf970 100644 --- a/vaep/__init__.py +++ b/vaep/__init__.py @@ -9,10 +9,7 @@ from importlib import metadata import njab -import pandas as pd -import pandas.io.formats.format as pf -# from . import logging, nb, pandas, plotting import vaep.logging import vaep.nb import vaep.pandas diff --git a/vaep/analyzers/__init__.py b/vaep/analyzers/__init__.py index 9856dafb9..6d16805a6 100644 --- a/vaep/analyzers/__init__.py +++ b/vaep/analyzers/__init__.py @@ -2,7 +2,7 @@ """ from types import SimpleNamespace -from . import compare_predictions, diff_analysis +from vaep.analyzers import compare_predictions, diff_analysis __all__ = ['diff_analysis', 'compare_predictions', 'Analysis'] diff --git a/vaep/data_handling.py b/vaep/data_handling.py index 1f0bc5404..41be078ac 100644 --- a/vaep/data_handling.py +++ b/vaep/data_handling.py @@ -4,8 +4,6 @@ import numpy as np import pandas as pd -# coverage - def coverage(X: pd.DataFrame, coverage_col: float, coverage_row: float): """Select proteins by column depending on their coverage. diff --git a/vaep/filter.py b/vaep/filter.py index 2c85999a9..2d26c4806 100644 --- a/vaep/filter.py +++ b/vaep/filter.py @@ -4,8 +4,6 @@ logger = logging.getLogger(__name__) -# ! use in data selection and tutorial - def select_features(df: pd.DataFrame, feat_prevalence: float = .2, diff --git a/vaep/models/__init__.py b/vaep/models/__init__.py index 3be35408b..2ae111d44 100644 --- a/vaep/models/__init__.py +++ b/vaep/models/__init__.py @@ -16,8 +16,7 @@ from fastcore.foundation import L import vaep - -from . import ae, analysis, collab, vae +from vaep.models import ae, analysis, collab, vae logger = logging.getLogger(__name__) diff --git a/vaep/models/ae.py b/vaep/models/ae.py index fd5d081a1..8295c8560 100644 --- a/vaep/models/ae.py +++ b/vaep/models/ae.py @@ -21,7 +21,7 @@ import vaep.models import vaep.transform -from . import analysis +from vaep.models import analysis logger = logging.getLogger(__name__) diff --git a/vaep/models/collab.py b/vaep/models/collab.py index 6e1403213..f54ab6df2 100644 --- a/vaep/models/collab.py +++ b/vaep/models/collab.py @@ -11,8 +11,7 @@ import vaep.io.dataloaders import vaep.io.datasplits - -from . import analysis +from vaep.models import analysis logger = logging.getLogger(__name__) diff --git a/vaep/pandas/__init__.py b/vaep/pandas/__init__.py index 97520bb02..5f82204b1 100644 --- a/vaep/pandas/__init__.py +++ b/vaep/pandas/__init__.py @@ -7,7 +7,30 @@ import omegaconf import pandas as pd -from .calc_errors import calc_errors_per_feat, get_absolute_error +from vaep.pandas.calc_errors import calc_errors_per_feat, get_absolute_error + +__all__ = [ + 'calc_errors_per_feat', + 'get_absolute_error', + 'unique_cols', + 'get_unique_non_unique_columns', + 'prop_unique_index', + 'replace_with', + 'index_to_dict', + 'get_columns_accessor', + 'get_columns_accessor_from_iterable', + 'select_max_by', + 'get_columns_namedtuple', + 'highlight_min', + '_add_indices', + 'interpolate', + 'flatten_dict_of_dicts', + 'key_map', + 'parse_query_expression', + 'length', + 'get_last_index_matching_proportion', + 'get_lower_whiskers', + 'get_counts_per_bin'] def unique_cols(s: pd.Series) -> bool: @@ -285,16 +308,15 @@ def get_lower_whiskers(df: pd.DataFrame, factor: float = 1.5) -> pd.Series: return ret -def get_counts_per_bin(df: pd.DataFrame, bins: range, columns: Optional[List[str]] = None) -> pd.DataFrame: +def get_counts_per_bin(df: pd.DataFrame, + bins: range, + columns: Optional[List[str]] = None) -> pd.DataFrame: """Return counts per bin for selected columns in DataFrame.""" counts_per_bin = dict() if columns is None: columns = df.columns.to_list() for col in columns: - _series = (pd.cut(df[col], bins=bins) - .to_frame() - .groupby(col) - .size()) + _series = (pd.cut(df[col], bins=bins).to_frame().groupby(col).size()) _series.index.name = 'bin' counts_per_bin[col] = _series counts_per_bin = pd.DataFrame(counts_per_bin) diff --git a/vaep/plotting/__init__.py b/vaep/plotting/__init__.py index 17cc86ced..105c183f8 100644 --- a/vaep/plotting/__init__.py +++ b/vaep/plotting/__init__.py @@ -11,11 +11,8 @@ import seaborn import vaep.pandas - -from . import data, defaults, errors, plotly -from .errors import plot_rolling_error - -# from . defaults import order_categories, labels_dict, IDX_ORDER +from vaep.plotting import data, defaults, errors, plotly +from vaep.plotting.errors import plot_rolling_error seaborn.set_style("whitegrid") # seaborn.set_theme()