Skip to content

Commit

Permalink
Merge pull request #57 from loucerac/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
loucerac authored Jun 30, 2023
2 parents 266e73b + f51c7fb commit 8320571
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 41 deletions.
5 changes: 2 additions & 3 deletions docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,11 @@ and signaling circuits.
Installation
=============

You can install drexml from PyPI using pip:
You can install drexml using:

.. code::
pip install git+ssh://[email protected]:loucerac/drexml.git
pip install git+https://github.com/loucerac/drexml.git@master
Getting started
Expand Down
43 changes: 22 additions & 21 deletions drexml/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,29 +23,30 @@ def load_physiological_circuits():
return circuit_names.index[circuit_names["is_physiological"]].tolist()


def fetch_file(disease, key, env, version="latest"):
"""
Retrieve data.
Parameters:
def fetch_file(key, env, version="latest"):
"""Retrieve file from the environment.
- disease (str): The name of the disease.
- key (str): The key associated with the data.
- env (Union[str, pathlib.Path]): The environment variable or path containing the data.
- version (str, optional): The version of the data to retrieve (default: "latest").
- debug (bool, optional): Whether to enable debug mode (default: False).
Returns:
- frame (np.ndarray): The preprocessed data frame.
Parameters
----------
key : str
Key of the file to retrieve.
env : dict
Environment.
version : str
Version of the file to retrieve.
Raises:
- ConnectTimeout: If a connection timeout occurs during retrieval.
Returns
-------
pathlib.Path
Path to the file.
Raises
------
NotImplementedError
Not implemented yet.
"""

print(f"Retrieving {key}")
experiment_env_path = pathlib.Path(disease)
env = read_disease_config(experiment_env_path)
if env[key + "_zenodo"]: # pragma: no cover
if version == "latest":
try:
Expand Down Expand Up @@ -352,10 +353,10 @@ def get_disease_data(disease):
experiment_env_path = pathlib.Path(disease)
env = read_disease_config(experiment_env_path)

gene_exp = fetch_file(disease, key="gene_exp", env=env, version="latest")
pathvals = fetch_file(disease, key="pathvals", env=env, version="latest")
circuits = fetch_file(disease, key="circuits", env=env, version="latest")
genes = fetch_file(disease, key="genes", env=env, version="latest")
gene_exp = fetch_file(key="gene_exp", env=env, version="latest")
pathvals = fetch_file(key="pathvals", env=env, version="latest")
circuits = fetch_file(key="circuits", env=env, version="latest")
genes = fetch_file(key="genes", env=env, version="latest")

# gene_exp = gene_exp[genes.index[genes[genes_column]]]

Expand Down
34 changes: 19 additions & 15 deletions drexml/explain.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,35 +12,39 @@
from drexml.pystab import nogueria_test


def matcorr(O, P):
def matcorr(features, targets):
"""Fast correlation matrix computation.
Parameters
----------
O : ndarray
features : ndarray [n_samples, n_features]
A matrix of observations.
P : ndarray
targets : ndarray [n_samples, n_tasks]
A matrix of predictions.
Returns
-------
ndarray
The cross-correlation matrix.
The correlation matrix.
"""
n = O.shape[0]
n = features.shape[0]

DO = O - (
np.einsum("nt->t", O, optimize="optimal") / np.double(n)
) # compute O - mean(O)
DP = P - (
np.einsum("nm->m", P, optimize="optimal") / np.double(n)
) # compute P - mean(P)
features_center = features - (
np.einsum("nt->t", features, optimize="optimal") / np.double(n)
)
targets_center = targets - (
np.einsum("nm->m", targets, optimize="optimal") / np.double(n)
)

cov = np.einsum("nm,nt->mt", DP, DO, optimize="optimal")
cov = np.einsum("nm,nt->mt", targets_center, features_center, optimize="optimal")

varP = np.einsum("nm,nm->m", DP, DP, optimize="optimal")
varO = np.einsum("nt,nt->t", DO, DO, optimize="optimal")
tmp = np.einsum("m,t->mt", varP, varO, optimize="optimal")
targets_var = np.einsum(
"nm,nm->m", targets_center, targets_center, optimize="optimal"
)
features_var = np.einsum(
"nt,nt->t", features_center, features_center, optimize="optimal"
)
tmp = np.einsum("m,t->mt", targets_var, features_var, optimize="optimal")

return cov / np.sqrt(tmp)

Expand Down
2 changes: 1 addition & 1 deletion drexml/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def check_cli_arg_is_bool(arg):
elif arg in ["false", "False", "FALSE", "0"]:
arg = False
else:
raise ValueError("debug must be a boolean")
raise ValueError(f"argument {arg} must be a boolean")

return arg

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "drexml"
version = "0.9.11"
version = "0.9.12"
description = "(DRExM³L) Drug REpurposing using and eXplainable Machine Learning and Mechanistic Models of signal transduction\""
authors = [
"Carlos Loucera <[email protected]>",
Expand Down

0 comments on commit 8320571

Please sign in to comment.