diff --git a/docs/source/index.rst b/docs/source/index.rst index 922fe19f..454b388a 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -12,12 +12,11 @@ and signaling circuits. Installation ============= -You can install drexml from PyPI using pip: +You can install drexml using: .. code:: - pip install git+ssh://git@github.com:loucerac/drexml.git - + pip install git+https://github.com/loucerac/drexml.git@master Getting started diff --git a/drexml/datasets.py b/drexml/datasets.py index 8cc4abbc..bd2fd3ad 100644 --- a/drexml/datasets.py +++ b/drexml/datasets.py @@ -23,29 +23,30 @@ def load_physiological_circuits(): return circuit_names.index[circuit_names["is_physiological"]].tolist() -def fetch_file(disease, key, env, version="latest"): - """ - Retrieve data. - - Parameters: +def fetch_file(key, env, version="latest"): + """Retrieve file from the environment. - - disease (str): The name of the disease. - - key (str): The key associated with the data. - - env (Union[str, pathlib.Path]): The environment variable or path containing the data. - - version (str, optional): The version of the data to retrieve (default: "latest"). - - debug (bool, optional): Whether to enable debug mode (default: False). - - Returns: - - frame (np.ndarray): The preprocessed data frame. + Parameters + ---------- + key : str + Key of the file to retrieve. + env : dict + Environment. + version : str + Version of the file to retrieve. - Raises: - - ConnectTimeout: If a connection timeout occurs during retrieval. + Returns + ------- + pathlib.Path + Path to the file. + Raises + ------ + NotImplementedError + Not implemented yet. """ print(f"Retrieving {key}") - experiment_env_path = pathlib.Path(disease) - env = read_disease_config(experiment_env_path) if env[key + "_zenodo"]: # pragma: no cover if version == "latest": try: @@ -352,10 +353,10 @@ def get_disease_data(disease): experiment_env_path = pathlib.Path(disease) env = read_disease_config(experiment_env_path) - gene_exp = fetch_file(disease, key="gene_exp", env=env, version="latest") - pathvals = fetch_file(disease, key="pathvals", env=env, version="latest") - circuits = fetch_file(disease, key="circuits", env=env, version="latest") - genes = fetch_file(disease, key="genes", env=env, version="latest") + gene_exp = fetch_file(key="gene_exp", env=env, version="latest") + pathvals = fetch_file(key="pathvals", env=env, version="latest") + circuits = fetch_file(key="circuits", env=env, version="latest") + genes = fetch_file(key="genes", env=env, version="latest") # gene_exp = gene_exp[genes.index[genes[genes_column]]] diff --git a/drexml/explain.py b/drexml/explain.py index ca128207..2c14ee16 100644 --- a/drexml/explain.py +++ b/drexml/explain.py @@ -12,35 +12,39 @@ from drexml.pystab import nogueria_test -def matcorr(O, P): +def matcorr(features, targets): """Fast correlation matrix computation. Parameters ---------- - O : ndarray + features : ndarray [n_samples, n_features] A matrix of observations. - P : ndarray + targets : ndarray [n_samples, n_tasks] A matrix of predictions. Returns ------- ndarray - The cross-correlation matrix. + The correlation matrix. """ - n = O.shape[0] + n = features.shape[0] - DO = O - ( - np.einsum("nt->t", O, optimize="optimal") / np.double(n) - ) # compute O - mean(O) - DP = P - ( - np.einsum("nm->m", P, optimize="optimal") / np.double(n) - ) # compute P - mean(P) + features_center = features - ( + np.einsum("nt->t", features, optimize="optimal") / np.double(n) + ) + targets_center = targets - ( + np.einsum("nm->m", targets, optimize="optimal") / np.double(n) + ) - cov = np.einsum("nm,nt->mt", DP, DO, optimize="optimal") + cov = np.einsum("nm,nt->mt", targets_center, features_center, optimize="optimal") - varP = np.einsum("nm,nm->m", DP, DP, optimize="optimal") - varO = np.einsum("nt,nt->t", DO, DO, optimize="optimal") - tmp = np.einsum("m,t->mt", varP, varO, optimize="optimal") + targets_var = np.einsum( + "nm,nm->m", targets_center, targets_center, optimize="optimal" + ) + features_var = np.einsum( + "nt,nt->t", features_center, features_center, optimize="optimal" + ) + tmp = np.einsum("m,t->mt", targets_var, features_var, optimize="optimal") return cov / np.sqrt(tmp) diff --git a/drexml/utils.py b/drexml/utils.py index 09da8e77..aba2fa48 100644 --- a/drexml/utils.py +++ b/drexml/utils.py @@ -50,7 +50,7 @@ def check_cli_arg_is_bool(arg): elif arg in ["false", "False", "FALSE", "0"]: arg = False else: - raise ValueError("debug must be a boolean") + raise ValueError(f"argument {arg} must be a boolean") return arg diff --git a/pyproject.toml b/pyproject.toml index caa605be..ee34af71 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "drexml" -version = "0.9.11" +version = "0.9.12" description = "(DRExM³L) Drug REpurposing using and eXplainable Machine Learning and Mechanistic Models of signal transduction\"" authors = [ "Carlos Loucera ",