biopragmatics · bgyori · Aug 13, 2024 · Aug 6, 2024 · Aug 6, 2024 · Aug 6, 2024
diff --git a/src/pyobo/api/names.py b/src/pyobo/api/names.py
@@ -32,6 +32,8 @@
 
 def get_name_by_curie(curie: str, *, version: Optional[str] = None) -> Optional[str]:
     """Get the name for a CURIE, if possible."""
+    if version is None:
+        version = get_version(curie.split(":")[0])
     prefix, identifier = normalize_curie(curie)
     if prefix and identifier:
         return get_name(prefix, identifier, version=version)

diff --git a/src/pyobo/api/utils.py b/src/pyobo/api/utils.py
@@ -7,6 +7,7 @@
 
 import bioversions
 
+from ..constants import VERSION_PINS
 from ..utils.path import prefix_directory_join
 
 __all__ = [
@@ -25,6 +26,10 @@ def get_version(prefix: str) -> Optional[str]:
     :param prefix: the resource name
     :return: The version if available else None
     """
+    # Prioritize loaded environmental variable VERSION_PINS dictionary
+    version = VERSION_PINS.get(prefix)
+    if version:
+        return version
     try:
         version = bioversions.get_version(prefix)
     except KeyError:

diff --git a/src/pyobo/cli/lookup.py b/src/pyobo/cli/lookup.py
@@ -282,7 +282,7 @@ def ancestors(prefix: str, identifier: str, force: bool, version: Optional[str])
     """Look up ancestors."""
     curies = get_ancestors(prefix=prefix, identifier=identifier, force=force, version=version)
     for curie in sorted(curies or []):
-        click.echo(f"{curie}\t{get_name_by_curie(curie)}")
+        click.echo(f"{curie}\t{get_name_by_curie(curie, version=version)}")
 
 
 @lookup.command()
@@ -295,7 +295,7 @@ def descendants(prefix: str, identifier: str, force: bool, version: Optional[str
     """Look up descendants."""
     curies = get_descendants(prefix=prefix, identifier=identifier, force=force, version=version)
     for curie in sorted(curies or []):
-        click.echo(f"{curie}\t{get_name_by_curie(curie)}")
+        click.echo(f"{curie}\t{get_name_by_curie(curie, version=version)}")
 
 
 @lookup.command()

diff --git a/src/pyobo/constants.py b/src/pyobo/constants.py
@@ -2,16 +2,15 @@
 
 """Constants for PyOBO."""
 
+import json
 import logging
+import os
 import re
 
+import click
 import pystow
 
-__all__ = [
-    "RAW_DIRECTORY",
-    "DATABASE_DIRECTORY",
-    "SPECIES_REMAPPING",
-]
+__all__ = ["RAW_DIRECTORY", "DATABASE_DIRECTORY", "SPECIES_REMAPPING", "VERSION_PINS"]
 
 logger = logging.getLogger(__name__)
 
@@ -80,7 +79,6 @@
 SPECIES_RECORD = "5334738"
 SPECIES_FILE = "species.tsv.gz"
 
-
 NCBITAXON_PREFIX = "NCBITaxon"
 DATE_FORMAT = "%d:%m:%Y %H:%M"
 PROVENANCE_PREFIXES = {
@@ -99,3 +97,30 @@
     "isbn",
     "issn",
 }
+
+# Load version pin dictionary from the environmental variable VERSION_PINS
+try:
+    VERSION_PINS_STR = os.getenv("VERSION_PINS")
+    if not VERSION_PINS_STR:
+        VERSION_PINS = {}
+    else:
+        VERSION_PINS = json.loads(VERSION_PINS_STR)
+        for k, v in VERSION_PINS.items():
+            if not isinstance(k, str) or not isinstance(v, str):
+                logger.error("The prefix and version name must both be " "strings")
+            VERSION_PINS = {}
+            break
+except ValueError as e:
+    logger.error(
+        "The value for the environment variable VERSION_PINS must be a valid JSON string: %s" % e
+    )
+    VERSION_PINS = {}
+
+click.echo(
+    f"These are the resource versions that are pinned.\n{VERSION_PINS}. "
+    f"\nPyobo will download the latest version of a resource if it's "
+    f"not pinned.\nIf you want to use a specific version of a "
+    f"resource, edit your VERSION_PINS environmental "
+    f"variable which is a JSON string to include a prefix and version "
+    f"name."
+)
diff --git a/src/pyobo/sources/antibodyregistry.py b/src/pyobo/sources/antibodyregistry.py
@@ -5,12 +5,12 @@
 import logging
 from typing import Iterable, Mapping, Optional
 
-import bioversions
 import pandas as pd
 from bioregistry.utils import removeprefix
 from tqdm.auto import tqdm
 
 from pyobo import Obo, Term
+from pyobo.api.utils import get_version
 from pyobo.utils.path import ensure_df
 
 __all__ = [
@@ -27,7 +27,7 @@
 def get_chunks(*, force: bool = False, version: Optional[str] = None) -> pd.DataFrame:
     """Get the BioGRID identifiers mapping dataframe."""
     if version is None:
-        version = bioversions.get_version(PREFIX)
+        version = get_version(PREFIX)
     df = ensure_df(
         PREFIX,
         url=URL,

diff --git a/src/pyobo/sources/biogrid.py b/src/pyobo/sources/biogrid.py
@@ -5,9 +5,9 @@
 from functools import partial
 from typing import Mapping, Optional
 
-import bioversions
 import pandas as pd
 
+from pyobo.api.utils import get_version
 from pyobo.resources.ncbitaxon import get_ncbitaxon_id
 from pyobo.utils.cache import cached_mapping
 from pyobo.utils.path import ensure_df, prefix_directory_join
@@ -52,7 +52,7 @@ def _lookup(name: str) -> Optional[str]:
 
 def get_df() -> pd.DataFrame:
     """Get the BioGRID identifiers mapping dataframe."""
-    version = bioversions.get_version("biogrid")
+    version = get_version("biogrid")
     url = f"{BASE_URL}/BIOGRID-{version}/BIOGRID-IDENTIFIERS-{version}.tab.zip"
     df = ensure_df(PREFIX, url=url, skiprows=28, dtype=str, version=version)
     df["taxonomy_id"] = df["ORGANISM_OFFICIAL_NAME"].map(_lookup)
@@ -65,7 +65,7 @@ def get_df() -> pd.DataFrame:
         "cache",
         "xrefs",
         name="ncbigene.tsv",
-        version=partial(bioversions.get_version, PREFIX),
+        version=partial(get_version, PREFIX),
     ),
     header=["biogrid_id", "ncbigene_id"],
 )

diff --git a/src/pyobo/sources/hgnc.py b/src/pyobo/sources/hgnc.py
@@ -10,10 +10,10 @@
 from operator import attrgetter
 from typing import DefaultDict, Dict, Iterable, Optional
 
-import bioversions
 from tabulate import tabulate
 from tqdm.auto import tqdm
 
+from pyobo.api.utils import get_version
 from pyobo.struct import (
     Obo,
     Reference,
@@ -241,7 +241,7 @@ def get_obo(*, force: bool = False) -> Obo:
 def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Term]:  # noqa:C901
     """Get HGNC terms."""
     if version is None:
-        version = bioversions.get_version("hgnc")
+        version = get_version("hgnc")
     unhandled_entry_keys: typing.Counter[str] = Counter()
     unhandle_locus_types: DefaultDict[str, Dict[str, Term]] = defaultdict(dict)
     path = ensure_path(

diff --git a/src/pyobo/sources/mesh.py b/src/pyobo/sources/mesh.py
@@ -11,6 +11,7 @@
 
 from tqdm.auto import tqdm
 
+from pyobo.api.utils import get_version
 from pyobo.identifier_utils import standardize_ec
 from pyobo.struct import Obo, Reference, Synonym, Term
 from pyobo.utils.cache import cached_json, cached_mapping
@@ -331,9 +332,8 @@ def get_mesh_category_curies(
     .. seealso:: https://meshb.nlm.nih.gov/treeView
     """
     if version is None:
-        import bioversions
-
-        version = bioversions.get_version("mesh")
+        version = get_version("mesh")
+        assert version is not None
     tree_to_mesh = get_tree_to_mesh_id(version=version)
     rv = []
     for i in range(1, 100):

diff --git a/src/pyobo/sources/pubchem.py b/src/pyobo/sources/pubchem.py
@@ -5,12 +5,12 @@
 import logging
 from typing import Iterable, Mapping, Optional
 
-import bioversions
 import pandas as pd
 from bioregistry.utils import removeprefix
 from tqdm.auto import tqdm
 
 from ..api import get_name_id_mapping
+from ..api.utils import get_version
 from ..struct import Obo, Reference, Synonym, Term
 from ..utils.iter import iterate_gzips_together
 from ..utils.path import ensure_df, ensure_path
@@ -26,7 +26,7 @@
 
 def _get_pubchem_extras_url(version: Optional[str], end: str) -> str:
     if version is None:
-        version = bioversions.get_version("pubchem")
+        version = get_version("pubchem")
     return f"ftp://ftp.ncbi.nlm.nih.gov/pubchem/Compound/Monthly/{version}/Extras/{end}"
 
 
@@ -100,7 +100,7 @@ def get_pubchem_id_to_mesh_id(version: str) -> Mapping[str, str]:
 
 def _ensure_cid_name_path(*, version: Optional[str] = None, force: bool = False) -> str:
     if version is None:
-        version = bioversions.get_version("pubchem")
+        version = get_version("pubchem")
     # 2 tab-separated columns: compound_id, name
     cid_name_url = _get_pubchem_extras_url(version, "CID-Title.gz")
     cid_name_path = ensure_path(PREFIX, url=cid_name_url, version=version, force=force)

diff --git a/src/pyobo/sources/rhea.py b/src/pyobo/sources/rhea.py
@@ -5,9 +5,9 @@
 import logging
 from typing import TYPE_CHECKING, Dict, Iterable, Optional
 
-import bioversions
 import pystow
 
+from pyobo.api.utils import get_version
 from pyobo.struct import Obo, Reference, Term
 from pyobo.struct.typedef import (
     TypeDef,
@@ -63,7 +63,7 @@ def ensure_rhea_rdf(version: Optional[str] = None, force: bool = False) -> "rdfl
     """Get the Rhea RDF graph."""
     # see docs: https://ftp.expasy.org/databases/rhea/rdf/rhea_rdf_documentation.pdf
     if version is None:
-        version = bioversions.get_version(PREFIX)
+        version = get_version(PREFIX)
     return pystow.ensure_rdf(
         "pyobo",
         "raw",

diff --git a/src/pyobo/sources/uniprot/uniprot.py b/src/pyobo/sources/uniprot/uniprot.py
@@ -6,10 +6,10 @@
 from pathlib import Path
 from typing import Iterable, List, Optional, cast
 
-import bioversions
 from tqdm.auto import tqdm
 
 from pyobo import Obo, Reference
+from pyobo.api.utils import get_version
 from pyobo.constants import RAW_MODULE
 from pyobo.identifier_utils import standardize_ec
 from pyobo.struct import Term, derives_from, enables, from_species, participates_in
@@ -166,7 +166,7 @@ def _parse_go(go_terms) -> List[Reference]:
 def ensure(version: Optional[str] = None, force: bool = False) -> Path:
     """Ensure the reviewed uniprot names are available."""
     if version is None:
-        version = bioversions.get_version("uniprot")
+        version = get_version("uniprot")
     return RAW_MODULE.ensure(
         PREFIX,
         version,

diff --git a/src/pyobo/struct/struct.py b/src/pyobo/struct/struct.py
@@ -56,6 +56,7 @@
     term_replaced_by,
 )
 from .utils import comma_separate, obo_escape_slim
+from ..api.utils import get_version
 from ..constants import (
     DATE_FORMAT,
     NCBITAXON_PREFIX,
@@ -583,10 +584,8 @@ def __post_init__(self):
 
     def _get_version(self) -> Optional[str]:
         if self.bioversions_key:
-            import bioversions
-
             try:
-                return bioversions.get_version(self.bioversions_key)
+                return get_version(self.bioversions_key)
             except KeyError:
                 logger.warning(f"[{self.bioversions_key}] bioversions doesn't list this resource ")
             except IOError:

diff --git a/src/pyobo/utils/path.py b/src/pyobo/utils/path.py
@@ -25,7 +25,7 @@
 
 logger = logging.getLogger(__name__)
 
-VersionHint = Union[None, str, Callable[[], str]]
+VersionHint = Union[None, str, Callable[[], Optional[str]]]
 
 requests_ftp.monkeypatch_session()
 
@@ -46,6 +46,7 @@ def prefix_directory_join(
         logger.info("[%s] got version %s", prefix, version)
     elif not isinstance(version, str):
         raise TypeError(f"Invalid type: {version} ({type(version)})")
+    assert version is not None
     version = cleanup_version(version, prefix=prefix)
     if version is not None and "/" in version:
         raise ValueError(f"[{prefix}] Can not have slash in version: {version}")

diff --git a/src/pyobo/xrefdb/sources/chembl.py b/src/pyobo/xrefdb/sources/chembl.py
@@ -4,9 +4,9 @@
 
 from typing import Optional
 
-import bioversions
 import pandas as pd
 
+from pyobo.api.utils import get_version
 from pyobo.constants import (
     PROVENANCE,
     SOURCE_ID,
@@ -26,7 +26,7 @@ def get_chembl_compound_equivalences_raw(
 ) -> pd.DataFrame:
     """Get the chemical representations raw dataframe."""
     if version is None:
-        version = bioversions.get_version("chembl")
+        version = get_version("chembl")
 
     base_url = f"ftp://ftp.ebi.ac.uk/pub/databases/chembl/ChEMBLdb/releases/chembl_{version}"
     url = f"{base_url}/chembl_{version}_chemreps.txt.gz"
@@ -36,7 +36,7 @@ def get_chembl_compound_equivalences_raw(
 def get_chembl_compound_equivalences(version: Optional[str] = None) -> pd.DataFrame:
     """Get ChEMBL chemical equivalences."""
     if version is None:
-        version = bioversions.get_version("chembl")
+        version = get_version("chembl")
 
     df = get_chembl_compound_equivalences_raw(version=version)
     rows = []
@@ -55,7 +55,7 @@ def get_chembl_compound_equivalences(version: Optional[str] = None) -> pd.DataFr
 def get_chembl_protein_equivalences(version: Optional[str] = None) -> pd.DataFrame:
     """Get ChEMBL protein equivalences."""
     if version is None:
-        version = bioversions.get_version("chembl")
+        version = get_version("chembl")
 
     url = f"ftp://ftp.ebi.ac.uk/pub/databases/chembl/ChEMBLdb/releases/chembl_{version}/chembl_uniprot_mapping.txt"
     df = ensure_df(
@@ -75,7 +75,7 @@ def get_chembl_protein_equivalences(version: Optional[str] = None) -> pd.DataFra
 def get_chembl_xrefs_df(version: Optional[str] = None) -> pd.DataFrame:
     """Get all ChEBML equivalences."""
     if version is None:
-        version = bioversions.get_version("chembl")
+        version = get_version("chembl")
 
     return pd.concat(
         [

diff --git a/src/pyobo/xrefdb/sources/pubchem.py b/src/pyobo/xrefdb/sources/pubchem.py
@@ -4,9 +4,9 @@
 
 from typing import Optional
 
-import bioversions
 import pandas as pd
 
+from ...api.utils import get_version
 from ...constants import XREF_COLUMNS
 from ...sources.pubchem import _get_pubchem_extras_url, get_pubchem_id_to_mesh_id
 
@@ -18,7 +18,8 @@
 def get_pubchem_mesh_df(version: Optional[str] = None) -> pd.DataFrame:
     """Get PubChem Compound-MeSH xrefs."""
     if version is None:
-        version = bioversions.get_version("pubchem")
+        version = get_version("pubchem")
+        assert version is not None
     cid_mesh_url = _get_pubchem_extras_url(version, "CID-MeSH")
     return pd.DataFrame(
         [