diff --git a/docker/Dockerfile b/docker/Dockerfile index 6bdfa9d6f..838c1f734 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -39,6 +39,8 @@ RUN python -m pip install --upgrade pip && \ python -m pip install --no-dependencies "lxml>=4.6.4" && \ python -m pip install --no-dependencies --ignore-requires-python sbmlmath +RUN python -m mira.dkg.generate_obo_graphs + # Copy the example json for reconstructing the ode semantics RUN wget -O /sw/sir_flux_span.json https://raw.githubusercontent.com/gyorilab/mira/main/tests/sir_flux_span.json diff --git a/mira/dkg/construct.py b/mira/dkg/construct.py index 44eb37b13..322f6c534 100644 --- a/mira/dkg/construct.py +++ b/mira/dkg/construct.py @@ -41,10 +41,8 @@ from pydantic import BaseModel, Field from pyobo.struct import part_of, is_a from pyobo.sources import ontology_resolver -from pyobo.getters import _ensure_ontology_path from pyobo.api.utils import get_version from pyobo.utils.path import prefix_directory_join -from obonet import read_obo from tabulate import tabulate from tqdm.auto import tqdm from typing_extensions import Literal @@ -63,6 +61,7 @@ from mira.dkg.resources.geonames import get_geonames_terms from mira.dkg.resources.extract_eiffel_ontology import get_eiffel_ontology_terms from mira.dkg.resources.uat import get_uat +from mira.dkg.generate_obo_graphs import download_convert_ncbitaxon_obo_to_graph MODULE = pystow.module("mira") DEMO_MODULE = MODULE.module("demo", "import") @@ -434,14 +433,11 @@ def extract_ontology_subtree(curie: str, add_subtree: bool = False): under the corresponding entry's subtree in its respective ontology. Relation information is also extracted with this option. - Running this method for the first time for each specific resource will - take a long time (minutes) as the obo resource file has to be downloaded, - converted to a networkx graph, have their node indices normalized, and - pickled. - - Subsequent runs of this method will take a few seconds as the pickled + Execution of this method will take a few seconds as the pickled graph object has to be loaded. + Currently we only support the addition of ncbitaxon terms. + Parameters ---------- curie : @@ -465,19 +461,12 @@ def extract_ontology_subtree(curie: str, add_subtree: bool = False): type = "class" version = get_version(resource_prefix) cached_relabeled_obo_graph_path = prefix_directory_join(resource_prefix, - name="relabeled_obo_graph.pkl", - version=version) + name="relabeled_obo_graph.pkl", + version=version) if not cached_relabeled_obo_graph_path.exists(): - _, obo_path = _ensure_ontology_path(resource_prefix, force=False, - version=version) - obo_graph = read_obo(obo_path) - relabeled_graph = networkx.relabel_nodes(obo_graph, - lambda node_index: node_index.lower()) - with open(cached_relabeled_obo_graph_path,'wb') as relabeled_graph_file: - pickle.dump(relabeled_graph, relabeled_graph_file) - else: - with open(cached_relabeled_obo_graph_path,'rb') as relabeled_graph_file: - relabeled_graph = pickle.load(relabeled_graph_file) + download_convert_ncbitaxon_obo_to_graph() + with open(cached_relabeled_obo_graph_path,'rb') as relabeled_graph_file: + relabeled_graph = pickle.load(relabeled_graph_file) else: return nodes, edges diff --git a/mira/dkg/generate_obo_graphs.py b/mira/dkg/generate_obo_graphs.py new file mode 100644 index 000000000..92266ec83 --- /dev/null +++ b/mira/dkg/generate_obo_graphs.py @@ -0,0 +1,34 @@ +from pyobo.api.utils import get_version +from pyobo.getters import _ensure_ontology_path +from pyobo.utils.path import prefix_directory_join +from obonet import read_obo +import networkx +import pickle + + +def download_convert_ncbitaxon_obo_to_graph(): + resource_prefix = "ncbitaxon" + version = get_version(resource_prefix) + + # Checks to see if the pickled ncbitaxon obo graph exists in the container + cached_relabeled_obo_graph_path = prefix_directory_join(resource_prefix, + name="relabeled_obo_graph.pkl", + version=version) + if not cached_relabeled_obo_graph_path.exists(): + _, obo_path = _ensure_ontology_path( + resource_prefix, force=False, version=version + ) + obo_graph = read_obo(obo_path) + + # Normalize node indices + relabeled_graph = networkx.relabel_nodes( + obo_graph, lambda node_index: node_index.lower() + ) + with open( + cached_relabeled_obo_graph_path, "wb" + ) as relabeled_graph_file: + pickle.dump(relabeled_graph, relabeled_graph_file) + + +if __name__ == "__main__": + download_convert_ncbitaxon_obo_to_graph()