diff --git a/src/pyobo/api/names.py b/src/pyobo/api/names.py index 18319fd5..1959ce25 100644 --- a/src/pyobo/api/names.py +++ b/src/pyobo/api/names.py @@ -32,6 +32,8 @@ def get_name_by_curie(curie: str, *, version: Optional[str] = None) -> Optional[str]: """Get the name for a CURIE, if possible.""" + if version is None: + version = get_version(curie.split(":")[0]) prefix, identifier = normalize_curie(curie) if prefix and identifier: return get_name(prefix, identifier, version=version) diff --git a/src/pyobo/api/utils.py b/src/pyobo/api/utils.py index 0db22f73..2d683006 100644 --- a/src/pyobo/api/utils.py +++ b/src/pyobo/api/utils.py @@ -7,6 +7,7 @@ import bioversions +from ..constants import VERSION_PINS from ..utils.path import prefix_directory_join __all__ = [ @@ -25,6 +26,10 @@ def get_version(prefix: str) -> Optional[str]: :param prefix: the resource name :return: The version if available else None """ + # Prioritize loaded environmental variable VERSION_PINS dictionary + version = VERSION_PINS.get(prefix) + if version: + return version try: version = bioversions.get_version(prefix) except KeyError: diff --git a/src/pyobo/cli/lookup.py b/src/pyobo/cli/lookup.py index 7d4746f0..cf2f2b10 100644 --- a/src/pyobo/cli/lookup.py +++ b/src/pyobo/cli/lookup.py @@ -282,7 +282,7 @@ def ancestors(prefix: str, identifier: str, force: bool, version: Optional[str]) """Look up ancestors.""" curies = get_ancestors(prefix=prefix, identifier=identifier, force=force, version=version) for curie in sorted(curies or []): - click.echo(f"{curie}\t{get_name_by_curie(curie)}") + click.echo(f"{curie}\t{get_name_by_curie(curie, version=version)}") @lookup.command() @@ -295,7 +295,7 @@ def descendants(prefix: str, identifier: str, force: bool, version: Optional[str """Look up descendants.""" curies = get_descendants(prefix=prefix, identifier=identifier, force=force, version=version) for curie in sorted(curies or []): - click.echo(f"{curie}\t{get_name_by_curie(curie)}") + click.echo(f"{curie}\t{get_name_by_curie(curie, version=version)}") @lookup.command() diff --git a/src/pyobo/constants.py b/src/pyobo/constants.py index 3fd8279d..7fb2e3d3 100644 --- a/src/pyobo/constants.py +++ b/src/pyobo/constants.py @@ -2,16 +2,15 @@ """Constants for PyOBO.""" +import json import logging +import os import re +import click import pystow -__all__ = [ - "RAW_DIRECTORY", - "DATABASE_DIRECTORY", - "SPECIES_REMAPPING", -] +__all__ = ["RAW_DIRECTORY", "DATABASE_DIRECTORY", "SPECIES_REMAPPING", "VERSION_PINS"] logger = logging.getLogger(__name__) @@ -80,7 +79,6 @@ SPECIES_RECORD = "5334738" SPECIES_FILE = "species.tsv.gz" - NCBITAXON_PREFIX = "NCBITaxon" DATE_FORMAT = "%d:%m:%Y %H:%M" PROVENANCE_PREFIXES = { @@ -99,3 +97,30 @@ "isbn", "issn", } + +# Load version pin dictionary from the environmental variable VERSION_PINS +try: + VERSION_PINS_STR = os.getenv("VERSION_PINS") + if not VERSION_PINS_STR: + VERSION_PINS = {} + else: + VERSION_PINS = json.loads(VERSION_PINS_STR) + for k, v in VERSION_PINS.items(): + if not isinstance(k, str) or not isinstance(v, str): + logger.error("The prefix and version name must both be " "strings") + VERSION_PINS = {} + break +except ValueError as e: + logger.error( + "The value for the environment variable VERSION_PINS must be a valid JSON string: %s" % e + ) + VERSION_PINS = {} + +click.echo( + f"These are the resource versions that are pinned.\n{VERSION_PINS}. " + f"\nPyobo will download the latest version of a resource if it's " + f"not pinned.\nIf you want to use a specific version of a " + f"resource, edit your VERSION_PINS environmental " + f"variable which is a JSON string to include a prefix and version " + f"name." +) diff --git a/src/pyobo/sources/antibodyregistry.py b/src/pyobo/sources/antibodyregistry.py index df757590..20b8b229 100644 --- a/src/pyobo/sources/antibodyregistry.py +++ b/src/pyobo/sources/antibodyregistry.py @@ -5,12 +5,12 @@ import logging from typing import Iterable, Mapping, Optional -import bioversions import pandas as pd from bioregistry.utils import removeprefix from tqdm.auto import tqdm from pyobo import Obo, Term +from pyobo.api.utils import get_version from pyobo.utils.path import ensure_df __all__ = [ @@ -27,7 +27,7 @@ def get_chunks(*, force: bool = False, version: Optional[str] = None) -> pd.DataFrame: """Get the BioGRID identifiers mapping dataframe.""" if version is None: - version = bioversions.get_version(PREFIX) + version = get_version(PREFIX) df = ensure_df( PREFIX, url=URL, diff --git a/src/pyobo/sources/biogrid.py b/src/pyobo/sources/biogrid.py index 48440085..2843fbd3 100644 --- a/src/pyobo/sources/biogrid.py +++ b/src/pyobo/sources/biogrid.py @@ -5,9 +5,9 @@ from functools import partial from typing import Mapping, Optional -import bioversions import pandas as pd +from pyobo.api.utils import get_version from pyobo.resources.ncbitaxon import get_ncbitaxon_id from pyobo.utils.cache import cached_mapping from pyobo.utils.path import ensure_df, prefix_directory_join @@ -52,7 +52,7 @@ def _lookup(name: str) -> Optional[str]: def get_df() -> pd.DataFrame: """Get the BioGRID identifiers mapping dataframe.""" - version = bioversions.get_version("biogrid") + version = get_version("biogrid") url = f"{BASE_URL}/BIOGRID-{version}/BIOGRID-IDENTIFIERS-{version}.tab.zip" df = ensure_df(PREFIX, url=url, skiprows=28, dtype=str, version=version) df["taxonomy_id"] = df["ORGANISM_OFFICIAL_NAME"].map(_lookup) @@ -65,7 +65,7 @@ def get_df() -> pd.DataFrame: "cache", "xrefs", name="ncbigene.tsv", - version=partial(bioversions.get_version, PREFIX), + version=partial(get_version, PREFIX), ), header=["biogrid_id", "ncbigene_id"], ) diff --git a/src/pyobo/sources/hgnc.py b/src/pyobo/sources/hgnc.py index 0e0fab5c..d27430f2 100644 --- a/src/pyobo/sources/hgnc.py +++ b/src/pyobo/sources/hgnc.py @@ -10,10 +10,10 @@ from operator import attrgetter from typing import DefaultDict, Dict, Iterable, Optional -import bioversions from tabulate import tabulate from tqdm.auto import tqdm +from pyobo.api.utils import get_version from pyobo.struct import ( Obo, Reference, @@ -241,7 +241,7 @@ def get_obo(*, force: bool = False) -> Obo: def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Term]: # noqa:C901 """Get HGNC terms.""" if version is None: - version = bioversions.get_version("hgnc") + version = get_version("hgnc") unhandled_entry_keys: typing.Counter[str] = Counter() unhandle_locus_types: DefaultDict[str, Dict[str, Term]] = defaultdict(dict) path = ensure_path( diff --git a/src/pyobo/sources/mesh.py b/src/pyobo/sources/mesh.py index 0368e36a..7d5b81a2 100644 --- a/src/pyobo/sources/mesh.py +++ b/src/pyobo/sources/mesh.py @@ -11,6 +11,7 @@ from tqdm.auto import tqdm +from pyobo.api.utils import get_version from pyobo.identifier_utils import standardize_ec from pyobo.struct import Obo, Reference, Synonym, Term from pyobo.utils.cache import cached_json, cached_mapping @@ -331,9 +332,8 @@ def get_mesh_category_curies( .. seealso:: https://meshb.nlm.nih.gov/treeView """ if version is None: - import bioversions - - version = bioversions.get_version("mesh") + version = get_version("mesh") + assert version is not None tree_to_mesh = get_tree_to_mesh_id(version=version) rv = [] for i in range(1, 100): diff --git a/src/pyobo/sources/pubchem.py b/src/pyobo/sources/pubchem.py index fa82ff06..6c91ca08 100644 --- a/src/pyobo/sources/pubchem.py +++ b/src/pyobo/sources/pubchem.py @@ -5,12 +5,12 @@ import logging from typing import Iterable, Mapping, Optional -import bioversions import pandas as pd from bioregistry.utils import removeprefix from tqdm.auto import tqdm from ..api import get_name_id_mapping +from ..api.utils import get_version from ..struct import Obo, Reference, Synonym, Term from ..utils.iter import iterate_gzips_together from ..utils.path import ensure_df, ensure_path @@ -26,7 +26,7 @@ def _get_pubchem_extras_url(version: Optional[str], end: str) -> str: if version is None: - version = bioversions.get_version("pubchem") + version = get_version("pubchem") return f"ftp://ftp.ncbi.nlm.nih.gov/pubchem/Compound/Monthly/{version}/Extras/{end}" @@ -100,7 +100,7 @@ def get_pubchem_id_to_mesh_id(version: str) -> Mapping[str, str]: def _ensure_cid_name_path(*, version: Optional[str] = None, force: bool = False) -> str: if version is None: - version = bioversions.get_version("pubchem") + version = get_version("pubchem") # 2 tab-separated columns: compound_id, name cid_name_url = _get_pubchem_extras_url(version, "CID-Title.gz") cid_name_path = ensure_path(PREFIX, url=cid_name_url, version=version, force=force) diff --git a/src/pyobo/sources/rhea.py b/src/pyobo/sources/rhea.py index 7459005f..412ef8c2 100644 --- a/src/pyobo/sources/rhea.py +++ b/src/pyobo/sources/rhea.py @@ -5,9 +5,9 @@ import logging from typing import TYPE_CHECKING, Dict, Iterable, Optional -import bioversions import pystow +from pyobo.api.utils import get_version from pyobo.struct import Obo, Reference, Term from pyobo.struct.typedef import ( TypeDef, @@ -63,7 +63,7 @@ def ensure_rhea_rdf(version: Optional[str] = None, force: bool = False) -> "rdfl """Get the Rhea RDF graph.""" # see docs: https://ftp.expasy.org/databases/rhea/rdf/rhea_rdf_documentation.pdf if version is None: - version = bioversions.get_version(PREFIX) + version = get_version(PREFIX) return pystow.ensure_rdf( "pyobo", "raw", diff --git a/src/pyobo/sources/uniprot/uniprot.py b/src/pyobo/sources/uniprot/uniprot.py index 79a2e1f2..6b1a639d 100644 --- a/src/pyobo/sources/uniprot/uniprot.py +++ b/src/pyobo/sources/uniprot/uniprot.py @@ -6,10 +6,10 @@ from pathlib import Path from typing import Iterable, List, Optional, cast -import bioversions from tqdm.auto import tqdm from pyobo import Obo, Reference +from pyobo.api.utils import get_version from pyobo.constants import RAW_MODULE from pyobo.identifier_utils import standardize_ec from pyobo.struct import Term, derives_from, enables, from_species, participates_in @@ -166,7 +166,7 @@ def _parse_go(go_terms) -> List[Reference]: def ensure(version: Optional[str] = None, force: bool = False) -> Path: """Ensure the reviewed uniprot names are available.""" if version is None: - version = bioversions.get_version("uniprot") + version = get_version("uniprot") return RAW_MODULE.ensure( PREFIX, version, diff --git a/src/pyobo/struct/struct.py b/src/pyobo/struct/struct.py index 9b82baa1..d30a93ff 100644 --- a/src/pyobo/struct/struct.py +++ b/src/pyobo/struct/struct.py @@ -56,6 +56,7 @@ term_replaced_by, ) from .utils import comma_separate, obo_escape_slim +from ..api.utils import get_version from ..constants import ( DATE_FORMAT, NCBITAXON_PREFIX, @@ -583,10 +584,8 @@ def __post_init__(self): def _get_version(self) -> Optional[str]: if self.bioversions_key: - import bioversions - try: - return bioversions.get_version(self.bioversions_key) + return get_version(self.bioversions_key) except KeyError: logger.warning(f"[{self.bioversions_key}] bioversions doesn't list this resource ") except IOError: diff --git a/src/pyobo/utils/path.py b/src/pyobo/utils/path.py index b5e27971..4fac7643 100644 --- a/src/pyobo/utils/path.py +++ b/src/pyobo/utils/path.py @@ -25,7 +25,7 @@ logger = logging.getLogger(__name__) -VersionHint = Union[None, str, Callable[[], str]] +VersionHint = Union[None, str, Callable[[], Optional[str]]] requests_ftp.monkeypatch_session() @@ -46,6 +46,7 @@ def prefix_directory_join( logger.info("[%s] got version %s", prefix, version) elif not isinstance(version, str): raise TypeError(f"Invalid type: {version} ({type(version)})") + assert version is not None version = cleanup_version(version, prefix=prefix) if version is not None and "/" in version: raise ValueError(f"[{prefix}] Can not have slash in version: {version}") diff --git a/src/pyobo/xrefdb/sources/chembl.py b/src/pyobo/xrefdb/sources/chembl.py index 6d98a917..a3b04b6c 100644 --- a/src/pyobo/xrefdb/sources/chembl.py +++ b/src/pyobo/xrefdb/sources/chembl.py @@ -4,9 +4,9 @@ from typing import Optional -import bioversions import pandas as pd +from pyobo.api.utils import get_version from pyobo.constants import ( PROVENANCE, SOURCE_ID, @@ -26,7 +26,7 @@ def get_chembl_compound_equivalences_raw( ) -> pd.DataFrame: """Get the chemical representations raw dataframe.""" if version is None: - version = bioversions.get_version("chembl") + version = get_version("chembl") base_url = f"ftp://ftp.ebi.ac.uk/pub/databases/chembl/ChEMBLdb/releases/chembl_{version}" url = f"{base_url}/chembl_{version}_chemreps.txt.gz" @@ -36,7 +36,7 @@ def get_chembl_compound_equivalences_raw( def get_chembl_compound_equivalences(version: Optional[str] = None) -> pd.DataFrame: """Get ChEMBL chemical equivalences.""" if version is None: - version = bioversions.get_version("chembl") + version = get_version("chembl") df = get_chembl_compound_equivalences_raw(version=version) rows = [] @@ -55,7 +55,7 @@ def get_chembl_compound_equivalences(version: Optional[str] = None) -> pd.DataFr def get_chembl_protein_equivalences(version: Optional[str] = None) -> pd.DataFrame: """Get ChEMBL protein equivalences.""" if version is None: - version = bioversions.get_version("chembl") + version = get_version("chembl") url = f"ftp://ftp.ebi.ac.uk/pub/databases/chembl/ChEMBLdb/releases/chembl_{version}/chembl_uniprot_mapping.txt" df = ensure_df( @@ -75,7 +75,7 @@ def get_chembl_protein_equivalences(version: Optional[str] = None) -> pd.DataFra def get_chembl_xrefs_df(version: Optional[str] = None) -> pd.DataFrame: """Get all ChEBML equivalences.""" if version is None: - version = bioversions.get_version("chembl") + version = get_version("chembl") return pd.concat( [ diff --git a/src/pyobo/xrefdb/sources/pubchem.py b/src/pyobo/xrefdb/sources/pubchem.py index 6482a37f..09262a48 100644 --- a/src/pyobo/xrefdb/sources/pubchem.py +++ b/src/pyobo/xrefdb/sources/pubchem.py @@ -4,9 +4,9 @@ from typing import Optional -import bioversions import pandas as pd +from ...api.utils import get_version from ...constants import XREF_COLUMNS from ...sources.pubchem import _get_pubchem_extras_url, get_pubchem_id_to_mesh_id @@ -18,7 +18,8 @@ def get_pubchem_mesh_df(version: Optional[str] = None) -> pd.DataFrame: """Get PubChem Compound-MeSH xrefs.""" if version is None: - version = bioversions.get_version("pubchem") + version = get_version("pubchem") + assert version is not None cid_mesh_url = _get_pubchem_extras_url(version, "CID-MeSH") return pd.DataFrame( [