Skip to content

Commit

Permalink
Merge pull request #56 from loucerac/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
loucerac authored Jun 27, 2023
2 parents ba30cfe + a1d3cf4 commit 266e73b
Show file tree
Hide file tree
Showing 34 changed files with 1,593 additions and 26,105 deletions.
26 changes: 17 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,15 +32,18 @@ To install the development version use `@develop` instead of `@master`.

## Run

To run the program for a disease map that uses circuits from the preprocessed `KEGG` pathways and the `KDT` standard list, construct an environment file (e.g. `disease.env`) using the following template:
To run the program for a disease map that uses circuits from the preprocessed `KEGG` pathways and the `KDT` standard list, construct an environment file (e.g. `disease.env`):

- using the following template if you have a set of seed genes (comma-separated):

```
seed_genes=2175,2176,2189
```

- using the following template if you know which circuits to include (the disease map):

```
gene_exp=$default$
pathvals=$default$
circuits=circuits.tsv.gz
circuits_column=in_disease
genes=$default$
genes_column=approved_targets
```

The `TSV` file `circuits.tsv` has the following format (tab delimited):
Expand All @@ -67,10 +70,14 @@ P-hsa03320-28 1

where:
* `index`: Hipathia circuit id
* `in_disease`: boolean if a given circuit is part of the disease
* `in_disease`: (boolean) True/1 if a given circuit is part of the disease

Note that in all cases you can restrict the circuits to the physiological list by setting `use_physio=true` in the `env` file.

To run the experiment using 10 CPU cores and 0 GPUs, run the following command within an activated environment:

```
conda run -n drexml drexml run --n-gpus 0 --n-cpus 10 $DISEASE_PATH
drexml run --n-gpus 0 --n-cpus 10 $DISEASE_PATH
```

where:
Expand All @@ -84,11 +91,12 @@ Note that the first time that the full program is run, it will take longer as it

https://doi.org/10.5281/zenodo.6020480


## Contribute to development

The recommended setup is:
- setup `pipx`
- setup `miniconda`
- setup `miniforge`
- use `pipx` to install `poetry`
- use `pipx` to install `nox` and inject `nox-poetry` into `nox`
- run `make`, if you want to use a CUDA enabled GPU, use `make gpu=1`
Expand Down
37 changes: 17 additions & 20 deletions drexml/cli/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,13 @@
"ignore", module="shap", category=NumbaPendingDeprecationWarning
)

from drexml.datasets import get_data
from drexml.plotting import plot_metrics
from drexml.utils import (
get_data,
check_gputree_availability,
get_number_cuda_devices,
get_out_path,
get_version,
rename_results,
)

FNAME_DICT = {
Expand Down Expand Up @@ -158,8 +158,11 @@ def build_ctx(ctx, step=None):
ctx_new["mode"] = "final"

if "n_gpus" in ctx_new.keys():
if ctx_new["n_gpus"] < 0:
ctx_new["n_gpus"] = get_number_cuda_devices()
if check_gputree_availability():
if ctx_new["n_gpus"] < 0: # pragma: no cover
ctx_new["n_gpus"] = get_number_cuda_devices()
else:
ctx_new["n_gpus"] = 0 # pragma: no cover
if "n_cpus" in ctx_new.keys():
if ctx_new["n_cpus"] < 0:
ctx_new["n_cpus"] = multiprocessing.cpu_count()
Expand Down Expand Up @@ -218,11 +221,12 @@ def main():
def orchestrate(**kwargs):
"""Orchestrate the drexml procedure. Entry point for multi-disease workflows."""

print(f"running drexml explainer v {get_version()}")
click.echo(f"running drexml explainer v {get_version()}")
ctx = build_ctx(kwargs)

# Load data
gene_xpr, pathvals, _, _ = get_data(ctx["disease_path"], ctx["debug"])
click.echo(ctx["data_folder"].joinpath("features.jbl"))
joblib.dump(gene_xpr, ctx["data_folder"].joinpath("features.jbl"))
joblib.dump(pathvals, ctx["data_folder"].joinpath("target.jbl"))

Expand All @@ -248,8 +252,6 @@ def stability(**kwargs):
current_step = "stab-explain"
elif kwargs["mode"].lower() == "score":
current_step = "stab-score"
else:
sys.exit("Unknown stability analysis step.")

click.echo(f"Running drexml {current_step} v {get_version()}")

Expand All @@ -258,7 +260,7 @@ def stability(**kwargs):
run_cmd(ctx)

if ctx["mode"].lower() == "score":
fnames = ["stability_results.tsv"]
fnames = ["stability_results.tsv", "stability_results_symbol.tsv"]
copy_files(ctx, fnames)


Expand All @@ -277,7 +279,12 @@ def explain(**kwargs):

run_cmd(ctx)

fnames = ["shap_selection.tsv", "shap_summary.tsv"]
fnames = [
"shap_selection.tsv",
"shap_summary.tsv",
"shap_selection_symbol.tsv",
"shap_summary_symbol.tsv",
]
copy_files(ctx, fnames)


Expand Down Expand Up @@ -312,15 +319,5 @@ def plot(ctx, stab_path):
plot_metrics(stab_path)


@main.command()
@click.argument("results-folder", type=click.Path(exists=True))
@click.version_option(get_version())
@click.pass_context
def rename(ctx, results_folder):
"""Plot the stability results"""

rename_results(results_folder)


if __name__ == "__main__":
main()
main() # pragma: no cover
24 changes: 23 additions & 1 deletion drexml/cli/stab_explainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

from drexml.explain import compute_shap_fs, compute_shap_relevance, compute_shap_values_
from drexml.models import get_model
from drexml.utils import parse_stab
from drexml.utils import convert_names, parse_stab

if __name__ == "__main__":
import sys
Expand Down Expand Up @@ -165,8 +165,30 @@ def runner(model, bkg, new, check_add, use_gpu):
shap_relevances.to_csv(shap_summary_fpath, sep="\t")
print(f"Shap summary results saved to: {shap_summary_fpath}")

shap_summary_renamed = convert_names(
shap_relevances.set_index(shap_relevances.columns[0]),
["circuits", "genes"],
axis=[0, 1],
)
shap_summary_renamed.to_csv(
shap_summary_fpath.absolute().parent.joinpath(
f"{shap_summary_fpath.stem}_symbol.tsv"
),
sep="\t",
index_label="circuit_name",
)

# Save results
fs_fname = "shap_selection.tsv"
fs_fpath = data_folder.joinpath(fs_fname)
(filt_i * 1).to_csv(fs_fpath, sep="\t")
print(f"Shap selection results saved to: {fs_fpath}")

fs_renamed = convert_names(
filt_i.set_index(filt_i.columns[0]), ["circuits", "genes"], axis=[0, 1]
)
fs_renamed.to_csv(
fs_fpath.absolute().parent.joinpath(f"{fs_fpath.stem}_symbol.tsv"),
sep="\t",
index_label="circuit_name",
)
12 changes: 11 additions & 1 deletion drexml/cli/stab_scorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

from drexml.explain import build_stability_dict
from drexml.pystab import nogueria_test
from drexml.utils import get_stab, parse_stab
from drexml.utils import convert_names, get_stab, parse_stab

if __name__ == "__main__":
import sys
Expand Down Expand Up @@ -130,3 +130,13 @@ def stab_i(estimator, X, Y, split_id, this_split):
stability_results_df.to_csv(
data_folder.joinpath("stability_results.tsv"), sep="\t", index_label="name"
)

stability_results_renamed_df = convert_names(
stability_results_df, ["circuits"], axis=[0]
)

stability_results_renamed_df.to_csv(
data_folder.joinpath("stability_results_symbol.tsv"),
sep="\t",
index_label="name",
)
34 changes: 34 additions & 0 deletions drexml/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# -*- coding: utf-8 -*-
"""
Config module.
"""


DEFAULT_DICT = {
"seed_genes": None,
"use_physio": "true",
"gene_exp": None,
"gene_exp_zenodo": False,
"pathvals": None,
"pathvals_zenodo": False,
"circuits": None,
"circuits_zenodo": False,
"genes": None,
"genes_zenodo": False,
"circuits_column": "in_disease",
"genes_column": "drugbank_approved_targets",
"GTEX_VERSION": "v8",
"MYGENE_VERSION": "v20230220",
"DRUGBANK_VERSION": "v050110",
"HIPATHIA_VERSION": "v2-14-0",
"EDGER_VERSION": "v3-40-0",
}


VERSION_DICT = {
"GTEX_VERSION": ["v8"],
"MYGENE_VERSION": ["v20230220"],
"DRUGBANK_VERSION": ["v050110"],
"HIPATHIA_VERSION": ["v2-14-0"],
"EDGER_VERSION": ["v3-40-0"],
}
Loading

0 comments on commit 266e73b

Please sign in to comment.