From d158f35910870cd1cd8d3e61a1ce5920429ef715 Mon Sep 17 00:00:00 2001 From: nanglo123 Date: Tue, 6 Aug 2024 09:50:33 -0400 Subject: [PATCH 1/7] Prioritize loaded up env var version pin dict when retrieving resource version --- src/pyobo/api/utils.py | 5 +++++ src/pyobo/constants.py | 19 ++++++++++++++++++- src/pyobo/sources/antibodyregistry.py | 4 ++-- src/pyobo/sources/biogrid.py | 6 +++--- src/pyobo/sources/hgnc.py | 5 +++-- src/pyobo/sources/mesh.py | 5 ++--- src/pyobo/sources/pubchem.py | 7 ++++--- src/pyobo/sources/rhea.py | 4 ++-- src/pyobo/sources/uniprot/uniprot.py | 4 ++-- src/pyobo/struct/struct.py | 5 ++--- src/pyobo/xrefdb/sources/chembl.py | 10 +++++----- src/pyobo/xrefdb/sources/pubchem.py | 4 ++-- 12 files changed, 50 insertions(+), 28 deletions(-) diff --git a/src/pyobo/api/utils.py b/src/pyobo/api/utils.py index 0db22f73..2a84dda5 100644 --- a/src/pyobo/api/utils.py +++ b/src/pyobo/api/utils.py @@ -8,6 +8,7 @@ import bioversions from ..utils.path import prefix_directory_join +from ..constants import VERSION_PINS __all__ = [ "get_version", @@ -25,6 +26,10 @@ def get_version(prefix: str) -> Optional[str]: :param prefix: the resource name :return: The version if available else None """ + # Prioritize loaded environmental variable VERSION_PINS dictionary + version = VERSION_PINS.get(prefix) + if version: + return version try: version = bioversions.get_version(prefix) except KeyError: diff --git a/src/pyobo/constants.py b/src/pyobo/constants.py index 3fd8279d..7413ed49 100644 --- a/src/pyobo/constants.py +++ b/src/pyobo/constants.py @@ -4,6 +4,8 @@ import logging import re +import os +import json import pystow @@ -11,6 +13,7 @@ "RAW_DIRECTORY", "DATABASE_DIRECTORY", "SPECIES_REMAPPING", + "VERSION_PINS" ] logger = logging.getLogger(__name__) @@ -80,7 +83,6 @@ SPECIES_RECORD = "5334738" SPECIES_FILE = "species.tsv.gz" - NCBITAXON_PREFIX = "NCBITaxon" DATE_FORMAT = "%d:%m:%Y %H:%M" PROVENANCE_PREFIXES = { @@ -99,3 +101,18 @@ "isbn", "issn", } + +# Load version pin dictionary from the environmental variable VERSION_PINS +try: + VERSION_PINS_STR = os.getenv("VERSION_PINS") + if not VERSION_PINS_STR: + VERSION_PINS = {} + else: + VERSION_PINS = json.loads(VERSION_PINS_STR) + for k, v in VERSION_PINS.items(): + if not isinstance(k, str) or not isinstance(v, str): + raise ValueError("The prefix and version name must both be " + "strings") +except Exception as e: + raise ValueError("The value for the environment variable VERSION_PINS" + " must be a valid JSON string") from e diff --git a/src/pyobo/sources/antibodyregistry.py b/src/pyobo/sources/antibodyregistry.py index df757590..3f1e9f4a 100644 --- a/src/pyobo/sources/antibodyregistry.py +++ b/src/pyobo/sources/antibodyregistry.py @@ -5,13 +5,13 @@ import logging from typing import Iterable, Mapping, Optional -import bioversions import pandas as pd from bioregistry.utils import removeprefix from tqdm.auto import tqdm from pyobo import Obo, Term from pyobo.utils.path import ensure_df +from pyobo.api.utils import get_version __all__ = [ "AntibodyRegistryGetter", @@ -27,7 +27,7 @@ def get_chunks(*, force: bool = False, version: Optional[str] = None) -> pd.DataFrame: """Get the BioGRID identifiers mapping dataframe.""" if version is None: - version = bioversions.get_version(PREFIX) + version = get_version(PREFIX) df = ensure_df( PREFIX, url=URL, diff --git a/src/pyobo/sources/biogrid.py b/src/pyobo/sources/biogrid.py index 48440085..30076f58 100644 --- a/src/pyobo/sources/biogrid.py +++ b/src/pyobo/sources/biogrid.py @@ -5,12 +5,12 @@ from functools import partial from typing import Mapping, Optional -import bioversions import pandas as pd from pyobo.resources.ncbitaxon import get_ncbitaxon_id from pyobo.utils.cache import cached_mapping from pyobo.utils.path import ensure_df, prefix_directory_join +from pyobo.api.utils import get_version PREFIX = "biogrid" BASE_URL = "https://downloads.thebiogrid.org/Download/BioGRID/Release-Archive" @@ -52,7 +52,7 @@ def _lookup(name: str) -> Optional[str]: def get_df() -> pd.DataFrame: """Get the BioGRID identifiers mapping dataframe.""" - version = bioversions.get_version("biogrid") + version = get_version("biogrid") url = f"{BASE_URL}/BIOGRID-{version}/BIOGRID-IDENTIFIERS-{version}.tab.zip" df = ensure_df(PREFIX, url=url, skiprows=28, dtype=str, version=version) df["taxonomy_id"] = df["ORGANISM_OFFICIAL_NAME"].map(_lookup) @@ -65,7 +65,7 @@ def get_df() -> pd.DataFrame: "cache", "xrefs", name="ncbigene.tsv", - version=partial(bioversions.get_version, PREFIX), + version=partial(get_version, PREFIX), ), header=["biogrid_id", "ncbigene_id"], ) diff --git a/src/pyobo/sources/hgnc.py b/src/pyobo/sources/hgnc.py index 0e0fab5c..0451fd13 100644 --- a/src/pyobo/sources/hgnc.py +++ b/src/pyobo/sources/hgnc.py @@ -10,7 +10,7 @@ from operator import attrgetter from typing import DefaultDict, Dict, Iterable, Optional -import bioversions + from tabulate import tabulate from tqdm.auto import tqdm @@ -29,6 +29,7 @@ ) from pyobo.struct.typedef import exact_match from pyobo.utils.path import ensure_path, prefix_directory_join +from pyobo.api.utils import get_version __all__ = [ "HGNCGetter", @@ -241,7 +242,7 @@ def get_obo(*, force: bool = False) -> Obo: def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Term]: # noqa:C901 """Get HGNC terms.""" if version is None: - version = bioversions.get_version("hgnc") + version = get_version("hgnc") unhandled_entry_keys: typing.Counter[str] = Counter() unhandle_locus_types: DefaultDict[str, Dict[str, Term]] = defaultdict(dict) path = ensure_path( diff --git a/src/pyobo/sources/mesh.py b/src/pyobo/sources/mesh.py index 0368e36a..e85ca5ba 100644 --- a/src/pyobo/sources/mesh.py +++ b/src/pyobo/sources/mesh.py @@ -16,6 +16,7 @@ from pyobo.utils.cache import cached_json, cached_mapping from pyobo.utils.io import parse_xml_gz from pyobo.utils.path import ensure_path, prefix_directory_join +from pyobo.api.utils import get_version __all__ = [ "MeSHGetter", @@ -331,9 +332,7 @@ def get_mesh_category_curies( .. seealso:: https://meshb.nlm.nih.gov/treeView """ if version is None: - import bioversions - - version = bioversions.get_version("mesh") + version = get_version("mesh") tree_to_mesh = get_tree_to_mesh_id(version=version) rv = [] for i in range(1, 100): diff --git a/src/pyobo/sources/pubchem.py b/src/pyobo/sources/pubchem.py index fa82ff06..18a99031 100644 --- a/src/pyobo/sources/pubchem.py +++ b/src/pyobo/sources/pubchem.py @@ -5,7 +5,7 @@ import logging from typing import Iterable, Mapping, Optional -import bioversions + import pandas as pd from bioregistry.utils import removeprefix from tqdm.auto import tqdm @@ -14,6 +14,7 @@ from ..struct import Obo, Reference, Synonym, Term from ..utils.iter import iterate_gzips_together from ..utils.path import ensure_df, ensure_path +from ..api.utils import get_version __all__ = [ "PubChemCompoundGetter", @@ -26,7 +27,7 @@ def _get_pubchem_extras_url(version: Optional[str], end: str) -> str: if version is None: - version = bioversions.get_version("pubchem") + version = get_version("pubchem") return f"ftp://ftp.ncbi.nlm.nih.gov/pubchem/Compound/Monthly/{version}/Extras/{end}" @@ -100,7 +101,7 @@ def get_pubchem_id_to_mesh_id(version: str) -> Mapping[str, str]: def _ensure_cid_name_path(*, version: Optional[str] = None, force: bool = False) -> str: if version is None: - version = bioversions.get_version("pubchem") + version = get_version("pubchem") # 2 tab-separated columns: compound_id, name cid_name_url = _get_pubchem_extras_url(version, "CID-Title.gz") cid_name_path = ensure_path(PREFIX, url=cid_name_url, version=version, force=force) diff --git a/src/pyobo/sources/rhea.py b/src/pyobo/sources/rhea.py index 7459005f..7587a79f 100644 --- a/src/pyobo/sources/rhea.py +++ b/src/pyobo/sources/rhea.py @@ -5,7 +5,6 @@ import logging from typing import TYPE_CHECKING, Dict, Iterable, Optional -import bioversions import pystow from pyobo.struct import Obo, Reference, Term @@ -21,6 +20,7 @@ reaction_enabled_by_molecular_function, ) from pyobo.utils.path import ensure_df +from pyobo.api.utils import get_version if TYPE_CHECKING: import rdflib @@ -63,7 +63,7 @@ def ensure_rhea_rdf(version: Optional[str] = None, force: bool = False) -> "rdfl """Get the Rhea RDF graph.""" # see docs: https://ftp.expasy.org/databases/rhea/rdf/rhea_rdf_documentation.pdf if version is None: - version = bioversions.get_version(PREFIX) + version = get_version(PREFIX) return pystow.ensure_rdf( "pyobo", "raw", diff --git a/src/pyobo/sources/uniprot/uniprot.py b/src/pyobo/sources/uniprot/uniprot.py index 79a2e1f2..1cc91c9d 100644 --- a/src/pyobo/sources/uniprot/uniprot.py +++ b/src/pyobo/sources/uniprot/uniprot.py @@ -6,7 +6,6 @@ from pathlib import Path from typing import Iterable, List, Optional, cast -import bioversions from tqdm.auto import tqdm from pyobo import Obo, Reference @@ -15,6 +14,7 @@ from pyobo.struct import Term, derives_from, enables, from_species, participates_in from pyobo.struct.typedef import gene_product_of, located_in, molecularly_interacts_with from pyobo.utils.io import open_reader +from pyobo.api.utils import get_version PREFIX = "uniprot" BASE_URL = "https://rest.uniprot.org/uniprotkb/stream" @@ -166,7 +166,7 @@ def _parse_go(go_terms) -> List[Reference]: def ensure(version: Optional[str] = None, force: bool = False) -> Path: """Ensure the reviewed uniprot names are available.""" if version is None: - version = bioversions.get_version("uniprot") + version = get_version("uniprot") return RAW_MODULE.ensure( PREFIX, version, diff --git a/src/pyobo/struct/struct.py b/src/pyobo/struct/struct.py index 9b82baa1..0b9eb681 100644 --- a/src/pyobo/struct/struct.py +++ b/src/pyobo/struct/struct.py @@ -68,6 +68,7 @@ from ..utils.io import multidict, write_iterable_tsv from ..utils.misc import obo_to_owl from ..utils.path import get_prefix_obo_path, prefix_directory_join +from ..api.utils import get_version __all__ = [ "Synonym", @@ -583,10 +584,8 @@ def __post_init__(self): def _get_version(self) -> Optional[str]: if self.bioversions_key: - import bioversions - try: - return bioversions.get_version(self.bioversions_key) + return get_version(self.bioversions_key) except KeyError: logger.warning(f"[{self.bioversions_key}] bioversions doesn't list this resource ") except IOError: diff --git a/src/pyobo/xrefdb/sources/chembl.py b/src/pyobo/xrefdb/sources/chembl.py index 6d98a917..56304760 100644 --- a/src/pyobo/xrefdb/sources/chembl.py +++ b/src/pyobo/xrefdb/sources/chembl.py @@ -4,7 +4,6 @@ from typing import Optional -import bioversions import pandas as pd from pyobo.constants import ( @@ -16,6 +15,7 @@ XREF_COLUMNS, ) from pyobo.utils.path import ensure_df +from pyobo.api.utils import get_version CHEMBL_COMPOUND_PREFIX = "chembl.compound" CHEMBL_TARGET_PREFIX = "chembl.target" @@ -26,7 +26,7 @@ def get_chembl_compound_equivalences_raw( ) -> pd.DataFrame: """Get the chemical representations raw dataframe.""" if version is None: - version = bioversions.get_version("chembl") + version = get_version("chembl") base_url = f"ftp://ftp.ebi.ac.uk/pub/databases/chembl/ChEMBLdb/releases/chembl_{version}" url = f"{base_url}/chembl_{version}_chemreps.txt.gz" @@ -36,7 +36,7 @@ def get_chembl_compound_equivalences_raw( def get_chembl_compound_equivalences(version: Optional[str] = None) -> pd.DataFrame: """Get ChEMBL chemical equivalences.""" if version is None: - version = bioversions.get_version("chembl") + version = get_version("chembl") df = get_chembl_compound_equivalences_raw(version=version) rows = [] @@ -55,7 +55,7 @@ def get_chembl_compound_equivalences(version: Optional[str] = None) -> pd.DataFr def get_chembl_protein_equivalences(version: Optional[str] = None) -> pd.DataFrame: """Get ChEMBL protein equivalences.""" if version is None: - version = bioversions.get_version("chembl") + version = get_version("chembl") url = f"ftp://ftp.ebi.ac.uk/pub/databases/chembl/ChEMBLdb/releases/chembl_{version}/chembl_uniprot_mapping.txt" df = ensure_df( @@ -75,7 +75,7 @@ def get_chembl_protein_equivalences(version: Optional[str] = None) -> pd.DataFra def get_chembl_xrefs_df(version: Optional[str] = None) -> pd.DataFrame: """Get all ChEBML equivalences.""" if version is None: - version = bioversions.get_version("chembl") + version = get_version("chembl") return pd.concat( [ diff --git a/src/pyobo/xrefdb/sources/pubchem.py b/src/pyobo/xrefdb/sources/pubchem.py index 6482a37f..7bbfb7b6 100644 --- a/src/pyobo/xrefdb/sources/pubchem.py +++ b/src/pyobo/xrefdb/sources/pubchem.py @@ -4,11 +4,11 @@ from typing import Optional -import bioversions import pandas as pd from ...constants import XREF_COLUMNS from ...sources.pubchem import _get_pubchem_extras_url, get_pubchem_id_to_mesh_id +from ...api.utils import get_version __all__ = [ "get_pubchem_mesh_df", @@ -18,7 +18,7 @@ def get_pubchem_mesh_df(version: Optional[str] = None) -> pd.DataFrame: """Get PubChem Compound-MeSH xrefs.""" if version is None: - version = bioversions.get_version("pubchem") + version = get_version("pubchem") cid_mesh_url = _get_pubchem_extras_url(version, "CID-MeSH") return pd.DataFrame( [ From e5765da71e0e54e699d772139f6f038c030f084d Mon Sep 17 00:00:00 2001 From: nanglo123 Date: Tue, 6 Aug 2024 13:18:26 -0400 Subject: [PATCH 2/7] Print information about VERSION_PINS --- src/pyobo/constants.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/pyobo/constants.py b/src/pyobo/constants.py index 7413ed49..852f3754 100644 --- a/src/pyobo/constants.py +++ b/src/pyobo/constants.py @@ -6,6 +6,7 @@ import re import os import json +import click import pystow @@ -116,3 +117,10 @@ except Exception as e: raise ValueError("The value for the environment variable VERSION_PINS" " must be a valid JSON string") from e + +click.echo(f"These are the resource versions that are pinned.\n{VERSION_PINS}. " + f"\nPyobo will download the latest version of a resource if it's " + f"not pinned.\nIf you want to use a specific version of a " + f"resource, edit your VERSION_PINS environmental " + f"variable which is a JSON string to include a prefix and version " + f"name.") From 8ba10b9baed4d0210c95d674bda73021ab51372b Mon Sep 17 00:00:00 2001 From: nanglo123 Date: Tue, 6 Aug 2024 15:49:19 -0400 Subject: [PATCH 3/7] Add conditional to extract curie version if not provided --- src/pyobo/api/names.py | 2 ++ src/pyobo/cli/lookup.py | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/pyobo/api/names.py b/src/pyobo/api/names.py index 18319fd5..1959ce25 100644 --- a/src/pyobo/api/names.py +++ b/src/pyobo/api/names.py @@ -32,6 +32,8 @@ def get_name_by_curie(curie: str, *, version: Optional[str] = None) -> Optional[str]: """Get the name for a CURIE, if possible.""" + if version is None: + version = get_version(curie.split(":")[0]) prefix, identifier = normalize_curie(curie) if prefix and identifier: return get_name(prefix, identifier, version=version) diff --git a/src/pyobo/cli/lookup.py b/src/pyobo/cli/lookup.py index 7d4746f0..06ff62e9 100644 --- a/src/pyobo/cli/lookup.py +++ b/src/pyobo/cli/lookup.py @@ -282,7 +282,7 @@ def ancestors(prefix: str, identifier: str, force: bool, version: Optional[str]) """Look up ancestors.""" curies = get_ancestors(prefix=prefix, identifier=identifier, force=force, version=version) for curie in sorted(curies or []): - click.echo(f"{curie}\t{get_name_by_curie(curie)}") + click.echo(f"{curie}\t{get_name_by_curie(curie, version)}") @lookup.command() @@ -295,7 +295,7 @@ def descendants(prefix: str, identifier: str, force: bool, version: Optional[str """Look up descendants.""" curies = get_descendants(prefix=prefix, identifier=identifier, force=force, version=version) for curie in sorted(curies or []): - click.echo(f"{curie}\t{get_name_by_curie(curie)}") + click.echo(f"{curie}\t{get_name_by_curie(curie, version)}") @lookup.command() From 8dc1918ff47ef6f5614403309992f886b9c42b40 Mon Sep 17 00:00:00 2001 From: nanglo123 Date: Mon, 12 Aug 2024 14:15:02 -0400 Subject: [PATCH 4/7] Log error rather than raising value error and reset VERSION_PINS to an empty dictionary --- src/pyobo/constants.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/pyobo/constants.py b/src/pyobo/constants.py index 852f3754..a0444853 100644 --- a/src/pyobo/constants.py +++ b/src/pyobo/constants.py @@ -112,11 +112,14 @@ VERSION_PINS = json.loads(VERSION_PINS_STR) for k, v in VERSION_PINS.items(): if not isinstance(k, str) or not isinstance(v, str): - raise ValueError("The prefix and version name must both be " + logger.error("The prefix and version name must both be " "strings") -except Exception as e: - raise ValueError("The value for the environment variable VERSION_PINS" - " must be a valid JSON string") from e + VERSION_PINS = {} + break +except ValueError as e: + logger.error("The value for the environment variable VERSION_PINS" + " must be a valid JSON string") + VERSION_PINS = {} click.echo(f"These are the resource versions that are pinned.\n{VERSION_PINS}. " f"\nPyobo will download the latest version of a resource if it's " From 616f162e9bbf3344b486db6d08ead95bbc9e0e33 Mon Sep 17 00:00:00 2001 From: nanglo123 Date: Mon, 12 Aug 2024 14:15:58 -0400 Subject: [PATCH 5/7] Run linting changes (isort and black) to pass Github linting test --- src/pyobo/api/utils.py | 2 +- src/pyobo/constants.py | 35 ++++++++++++--------------- src/pyobo/sources/antibodyregistry.py | 2 +- src/pyobo/sources/biogrid.py | 2 +- src/pyobo/sources/hgnc.py | 3 +-- src/pyobo/sources/mesh.py | 2 +- src/pyobo/sources/pubchem.py | 3 +-- src/pyobo/sources/rhea.py | 2 +- src/pyobo/sources/uniprot/uniprot.py | 2 +- src/pyobo/struct/struct.py | 2 +- src/pyobo/xrefdb/sources/chembl.py | 2 +- src/pyobo/xrefdb/sources/pubchem.py | 2 +- 12 files changed, 27 insertions(+), 32 deletions(-) diff --git a/src/pyobo/api/utils.py b/src/pyobo/api/utils.py index 2a84dda5..2d683006 100644 --- a/src/pyobo/api/utils.py +++ b/src/pyobo/api/utils.py @@ -7,8 +7,8 @@ import bioversions -from ..utils.path import prefix_directory_join from ..constants import VERSION_PINS +from ..utils.path import prefix_directory_join __all__ = [ "get_version", diff --git a/src/pyobo/constants.py b/src/pyobo/constants.py index a0444853..87550f35 100644 --- a/src/pyobo/constants.py +++ b/src/pyobo/constants.py @@ -2,20 +2,15 @@ """Constants for PyOBO.""" +import json import logging -import re import os -import json -import click +import re +import click import pystow -__all__ = [ - "RAW_DIRECTORY", - "DATABASE_DIRECTORY", - "SPECIES_REMAPPING", - "VERSION_PINS" -] +__all__ = ["RAW_DIRECTORY", "DATABASE_DIRECTORY", "SPECIES_REMAPPING", "VERSION_PINS"] logger = logging.getLogger(__name__) @@ -112,18 +107,20 @@ VERSION_PINS = json.loads(VERSION_PINS_STR) for k, v in VERSION_PINS.items(): if not isinstance(k, str) or not isinstance(v, str): - logger.error("The prefix and version name must both be " - "strings") + logger.error("The prefix and version name must both be " "strings") VERSION_PINS = {} break except ValueError as e: - logger.error("The value for the environment variable VERSION_PINS" - " must be a valid JSON string") + logger.error( + "The value for the environment variable VERSION_PINS" " must be a valid JSON string" + ) VERSION_PINS = {} -click.echo(f"These are the resource versions that are pinned.\n{VERSION_PINS}. " - f"\nPyobo will download the latest version of a resource if it's " - f"not pinned.\nIf you want to use a specific version of a " - f"resource, edit your VERSION_PINS environmental " - f"variable which is a JSON string to include a prefix and version " - f"name.") +click.echo( + f"These are the resource versions that are pinned.\n{VERSION_PINS}. " + f"\nPyobo will download the latest version of a resource if it's " + f"not pinned.\nIf you want to use a specific version of a " + f"resource, edit your VERSION_PINS environmental " + f"variable which is a JSON string to include a prefix and version " + f"name." +) diff --git a/src/pyobo/sources/antibodyregistry.py b/src/pyobo/sources/antibodyregistry.py index 3f1e9f4a..20b8b229 100644 --- a/src/pyobo/sources/antibodyregistry.py +++ b/src/pyobo/sources/antibodyregistry.py @@ -10,8 +10,8 @@ from tqdm.auto import tqdm from pyobo import Obo, Term -from pyobo.utils.path import ensure_df from pyobo.api.utils import get_version +from pyobo.utils.path import ensure_df __all__ = [ "AntibodyRegistryGetter", diff --git a/src/pyobo/sources/biogrid.py b/src/pyobo/sources/biogrid.py index 30076f58..2843fbd3 100644 --- a/src/pyobo/sources/biogrid.py +++ b/src/pyobo/sources/biogrid.py @@ -7,10 +7,10 @@ import pandas as pd +from pyobo.api.utils import get_version from pyobo.resources.ncbitaxon import get_ncbitaxon_id from pyobo.utils.cache import cached_mapping from pyobo.utils.path import ensure_df, prefix_directory_join -from pyobo.api.utils import get_version PREFIX = "biogrid" BASE_URL = "https://downloads.thebiogrid.org/Download/BioGRID/Release-Archive" diff --git a/src/pyobo/sources/hgnc.py b/src/pyobo/sources/hgnc.py index 0451fd13..d27430f2 100644 --- a/src/pyobo/sources/hgnc.py +++ b/src/pyobo/sources/hgnc.py @@ -10,10 +10,10 @@ from operator import attrgetter from typing import DefaultDict, Dict, Iterable, Optional - from tabulate import tabulate from tqdm.auto import tqdm +from pyobo.api.utils import get_version from pyobo.struct import ( Obo, Reference, @@ -29,7 +29,6 @@ ) from pyobo.struct.typedef import exact_match from pyobo.utils.path import ensure_path, prefix_directory_join -from pyobo.api.utils import get_version __all__ = [ "HGNCGetter", diff --git a/src/pyobo/sources/mesh.py b/src/pyobo/sources/mesh.py index e85ca5ba..b7c0ddae 100644 --- a/src/pyobo/sources/mesh.py +++ b/src/pyobo/sources/mesh.py @@ -11,12 +11,12 @@ from tqdm.auto import tqdm +from pyobo.api.utils import get_version from pyobo.identifier_utils import standardize_ec from pyobo.struct import Obo, Reference, Synonym, Term from pyobo.utils.cache import cached_json, cached_mapping from pyobo.utils.io import parse_xml_gz from pyobo.utils.path import ensure_path, prefix_directory_join -from pyobo.api.utils import get_version __all__ = [ "MeSHGetter", diff --git a/src/pyobo/sources/pubchem.py b/src/pyobo/sources/pubchem.py index 18a99031..6c91ca08 100644 --- a/src/pyobo/sources/pubchem.py +++ b/src/pyobo/sources/pubchem.py @@ -5,16 +5,15 @@ import logging from typing import Iterable, Mapping, Optional - import pandas as pd from bioregistry.utils import removeprefix from tqdm.auto import tqdm from ..api import get_name_id_mapping +from ..api.utils import get_version from ..struct import Obo, Reference, Synonym, Term from ..utils.iter import iterate_gzips_together from ..utils.path import ensure_df, ensure_path -from ..api.utils import get_version __all__ = [ "PubChemCompoundGetter", diff --git a/src/pyobo/sources/rhea.py b/src/pyobo/sources/rhea.py index 7587a79f..412ef8c2 100644 --- a/src/pyobo/sources/rhea.py +++ b/src/pyobo/sources/rhea.py @@ -7,6 +7,7 @@ import pystow +from pyobo.api.utils import get_version from pyobo.struct import Obo, Reference, Term from pyobo.struct.typedef import ( TypeDef, @@ -20,7 +21,6 @@ reaction_enabled_by_molecular_function, ) from pyobo.utils.path import ensure_df -from pyobo.api.utils import get_version if TYPE_CHECKING: import rdflib diff --git a/src/pyobo/sources/uniprot/uniprot.py b/src/pyobo/sources/uniprot/uniprot.py index 1cc91c9d..6b1a639d 100644 --- a/src/pyobo/sources/uniprot/uniprot.py +++ b/src/pyobo/sources/uniprot/uniprot.py @@ -9,12 +9,12 @@ from tqdm.auto import tqdm from pyobo import Obo, Reference +from pyobo.api.utils import get_version from pyobo.constants import RAW_MODULE from pyobo.identifier_utils import standardize_ec from pyobo.struct import Term, derives_from, enables, from_species, participates_in from pyobo.struct.typedef import gene_product_of, located_in, molecularly_interacts_with from pyobo.utils.io import open_reader -from pyobo.api.utils import get_version PREFIX = "uniprot" BASE_URL = "https://rest.uniprot.org/uniprotkb/stream" diff --git a/src/pyobo/struct/struct.py b/src/pyobo/struct/struct.py index 0b9eb681..d30a93ff 100644 --- a/src/pyobo/struct/struct.py +++ b/src/pyobo/struct/struct.py @@ -56,6 +56,7 @@ term_replaced_by, ) from .utils import comma_separate, obo_escape_slim +from ..api.utils import get_version from ..constants import ( DATE_FORMAT, NCBITAXON_PREFIX, @@ -68,7 +69,6 @@ from ..utils.io import multidict, write_iterable_tsv from ..utils.misc import obo_to_owl from ..utils.path import get_prefix_obo_path, prefix_directory_join -from ..api.utils import get_version __all__ = [ "Synonym", diff --git a/src/pyobo/xrefdb/sources/chembl.py b/src/pyobo/xrefdb/sources/chembl.py index 56304760..a3b04b6c 100644 --- a/src/pyobo/xrefdb/sources/chembl.py +++ b/src/pyobo/xrefdb/sources/chembl.py @@ -6,6 +6,7 @@ import pandas as pd +from pyobo.api.utils import get_version from pyobo.constants import ( PROVENANCE, SOURCE_ID, @@ -15,7 +16,6 @@ XREF_COLUMNS, ) from pyobo.utils.path import ensure_df -from pyobo.api.utils import get_version CHEMBL_COMPOUND_PREFIX = "chembl.compound" CHEMBL_TARGET_PREFIX = "chembl.target" diff --git a/src/pyobo/xrefdb/sources/pubchem.py b/src/pyobo/xrefdb/sources/pubchem.py index 7bbfb7b6..673858c0 100644 --- a/src/pyobo/xrefdb/sources/pubchem.py +++ b/src/pyobo/xrefdb/sources/pubchem.py @@ -6,9 +6,9 @@ import pandas as pd +from ...api.utils import get_version from ...constants import XREF_COLUMNS from ...sources.pubchem import _get_pubchem_extras_url, get_pubchem_id_to_mesh_id -from ...api.utils import get_version __all__ = [ "get_pubchem_mesh_df", From 5d93d6c5472584d3d814dacfd84163e88a4a5827 Mon Sep 17 00:00:00 2001 From: "Benjamin M. Gyori" Date: Tue, 13 Aug 2024 15:10:43 +0200 Subject: [PATCH 6/7] Add error to log --- src/pyobo/constants.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pyobo/constants.py b/src/pyobo/constants.py index 87550f35..7fb2e3d3 100644 --- a/src/pyobo/constants.py +++ b/src/pyobo/constants.py @@ -112,7 +112,7 @@ break except ValueError as e: logger.error( - "The value for the environment variable VERSION_PINS" " must be a valid JSON string" + "The value for the environment variable VERSION_PINS must be a valid JSON string: %s" % e ) VERSION_PINS = {} From cd412ca9cd68842eb2a43b4a1a23e8437a9ca187 Mon Sep 17 00:00:00 2001 From: nanglo123 Date: Tue, 13 Aug 2024 10:20:35 -0400 Subject: [PATCH 7/7] Add assertions for version to fix mypy linting issues --- src/pyobo/cli/lookup.py | 4 ++-- src/pyobo/sources/mesh.py | 1 + src/pyobo/utils/path.py | 3 ++- src/pyobo/xrefdb/sources/pubchem.py | 1 + 4 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/pyobo/cli/lookup.py b/src/pyobo/cli/lookup.py index 06ff62e9..cf2f2b10 100644 --- a/src/pyobo/cli/lookup.py +++ b/src/pyobo/cli/lookup.py @@ -282,7 +282,7 @@ def ancestors(prefix: str, identifier: str, force: bool, version: Optional[str]) """Look up ancestors.""" curies = get_ancestors(prefix=prefix, identifier=identifier, force=force, version=version) for curie in sorted(curies or []): - click.echo(f"{curie}\t{get_name_by_curie(curie, version)}") + click.echo(f"{curie}\t{get_name_by_curie(curie, version=version)}") @lookup.command() @@ -295,7 +295,7 @@ def descendants(prefix: str, identifier: str, force: bool, version: Optional[str """Look up descendants.""" curies = get_descendants(prefix=prefix, identifier=identifier, force=force, version=version) for curie in sorted(curies or []): - click.echo(f"{curie}\t{get_name_by_curie(curie, version)}") + click.echo(f"{curie}\t{get_name_by_curie(curie, version=version)}") @lookup.command() diff --git a/src/pyobo/sources/mesh.py b/src/pyobo/sources/mesh.py index b7c0ddae..7d5b81a2 100644 --- a/src/pyobo/sources/mesh.py +++ b/src/pyobo/sources/mesh.py @@ -333,6 +333,7 @@ def get_mesh_category_curies( """ if version is None: version = get_version("mesh") + assert version is not None tree_to_mesh = get_tree_to_mesh_id(version=version) rv = [] for i in range(1, 100): diff --git a/src/pyobo/utils/path.py b/src/pyobo/utils/path.py index b5e27971..4fac7643 100644 --- a/src/pyobo/utils/path.py +++ b/src/pyobo/utils/path.py @@ -25,7 +25,7 @@ logger = logging.getLogger(__name__) -VersionHint = Union[None, str, Callable[[], str]] +VersionHint = Union[None, str, Callable[[], Optional[str]]] requests_ftp.monkeypatch_session() @@ -46,6 +46,7 @@ def prefix_directory_join( logger.info("[%s] got version %s", prefix, version) elif not isinstance(version, str): raise TypeError(f"Invalid type: {version} ({type(version)})") + assert version is not None version = cleanup_version(version, prefix=prefix) if version is not None and "/" in version: raise ValueError(f"[{prefix}] Can not have slash in version: {version}") diff --git a/src/pyobo/xrefdb/sources/pubchem.py b/src/pyobo/xrefdb/sources/pubchem.py index 673858c0..09262a48 100644 --- a/src/pyobo/xrefdb/sources/pubchem.py +++ b/src/pyobo/xrefdb/sources/pubchem.py @@ -19,6 +19,7 @@ def get_pubchem_mesh_df(version: Optional[str] = None) -> pd.DataFrame: """Get PubChem Compound-MeSH xrefs.""" if version is None: version = get_version("pubchem") + assert version is not None cid_mesh_url = _get_pubchem_extras_url(version, "CID-MeSH") return pd.DataFrame( [