More version getter cleanup

biopragmatics · Apr 18, 2024 · 61fa850 · 61fa850
1 parent 207bc5f
commit 61fa850
Show file tree

Hide file tree

Showing 5 changed files with 25 additions and 27 deletions.
diff --git a/src/pyobo/constants.py b/src/pyobo/constants.py
@@ -4,17 +4,13 @@
 
 import logging
 import re
-from functools import partial
-from typing import Callable
 
-import bioversions
 import pystow
 
 __all__ = [
     "RAW_DIRECTORY",
     "DATABASE_DIRECTORY",
     "SPECIES_REMAPPING",
-    "version_getter",
 ]
 
 logger = logging.getLogger(__name__)
@@ -85,11 +81,6 @@
 SPECIES_FILE = "species.tsv.gz"
 
 
-def version_getter(name: str) -> Callable[[], str]:
-    """Make a function appropriate for getting versions."""
-    return partial(bioversions.get_version, name)
-
-
 NCBITAXON_PREFIX = "NCBITaxon"
 DATE_FORMAT = "%d:%m:%Y %H:%M"
 PROVENANCE_PREFIXES = {

diff --git a/src/pyobo/sources/antibodyregistry.py b/src/pyobo/sources/antibodyregistry.py
@@ -24,9 +24,10 @@
 CHUNKSIZE = 20_000
 
 
-def get_chunks(force: bool = False) -> pd.DataFrame:
+def get_chunks(*, force: bool = False, version: Optional[str] = None) -> pd.DataFrame:
     """Get the BioGRID identifiers mapping dataframe."""
-    version = bioversions.get_version(PREFIX)
+    if version is None:
+        version = bioversions.get_version(PREFIX)
     df = ensure_df(
         PREFIX,
         url=URL,
@@ -47,7 +48,7 @@ class AntibodyRegistryGetter(Obo):
 
     def iter_terms(self, force: bool = False) -> Iterable[Term]:
         """Iterate over terms in the ontology."""
-        return iter_terms(force=force)
+        return iter_terms(force=force, version=self._version_or_raise)
 
 
 def get_obo(*, force: bool = False) -> Obo:
@@ -74,9 +75,9 @@ def get_obo(*, force: bool = False) -> Obo:
 }
 
 
-def iter_terms(force: bool = False) -> Iterable[Term]:
+def iter_terms(*, force: bool = False, version: Optional[str] = None) -> Iterable[Term]:
     """Iterate over antibodies."""
-    chunks = get_chunks(force=force)
+    chunks = get_chunks(force=force, version=version)
     needs_curating = set()
     # df['vendor'] = df['vendor'].map(bioregistry.normalize_prefix)
     it = tqdm(chunks, desc=f"{PREFIX}, chunkssize={CHUNKSIZE}")

diff --git a/src/pyobo/sources/biogrid.py b/src/pyobo/sources/biogrid.py
@@ -2,12 +2,12 @@
 
 """Extract and convert BioGRID identifiers."""
 
+from functools import partial
 from typing import Mapping, Optional
 
 import bioversions
 import pandas as pd
 
-from pyobo.constants import version_getter
 from pyobo.resources.ncbitaxon import get_ncbitaxon_id
 from pyobo.utils.cache import cached_mapping
 from pyobo.utils.path import ensure_df, prefix_directory_join
@@ -61,7 +61,11 @@ def get_df() -> pd.DataFrame:
 
 @cached_mapping(
     path=prefix_directory_join(
-        PREFIX, "cache", "xrefs", name="ncbigene.tsv", version=version_getter(PREFIX)
+        PREFIX,
+        "cache",
+        "xrefs",
+        name="ncbigene.tsv",
+        version=partial(bioversions.get_version, PREFIX),
     ),
     header=["biogrid_id", "ncbigene_id"],
 )

diff --git a/src/pyobo/sources/ccle.py b/src/pyobo/sources/ccle.py
@@ -50,37 +50,37 @@ def iter_terms(version: Optional[str] = None, force: bool = False) -> Iterable[T
         yield term
 
 
-def get_version() -> str:
+def get_ccle_static_version() -> str:
     """Get the default version of CCLE's cell lines."""
     return "2019"
 
 
 def get_url(version: Optional[str] = None) -> str:
     """Get the cBioPortal URL for the given version of CCLE's cell lines."""
     if version is None:
-        version = get_version()
+        version = get_ccle_static_version()
     return f"https://cbioportal-datahub.s3.amazonaws.com/ccle_broad_{version}.tar.gz"
 
 
 def get_inner(version: Optional[str] = None) -> str:
     """Get the inner tarfile path."""
     if version is None:
-        version = get_version()
+        version = get_ccle_static_version()
     return f"ccle_broad_{version}/data_clinical_sample.txt"
 
 
 def ensure(version: Optional[str] = None, **kwargs) -> Path:
     """Ensure the given version is downloaded."""
     if version is None:
-        version = get_version()
+        version = get_ccle_static_version()
     url = get_url(version=version)
     return pystow.ensure("pyobo", "raw", PREFIX, version, url=url, **kwargs)
 
 
 def ensure_df(version: Optional[str] = None, force: bool = False) -> pd.DataFrame:
     """Get the CCLE clinical sample dataframe."""
     if version is None:
-        version = get_version()
+        version = get_ccle_static_version()
     path = ensure(version=version, force=force)
     inner_path = get_inner(version=version)
     with tarfile.open(path) as tf:

diff --git a/src/pyobo/sources/mesh.py b/src/pyobo/sources/mesh.py
@@ -318,21 +318,23 @@ def _get_descriptor_qualifiers(descriptor: Element) -> List[Mapping[str, str]]:
     ]
 
 
-def get_mesh_category_curies(letter: str, skip: Optional[Collection[str]] = None) -> List[str]:
+def get_mesh_category_curies(
+    letter: str, *, skip: Optional[Collection[str]] = None, version: Optional[str] = None
+) -> List[str]:
     """Get the MeSH LUIDs for a category, by letter (e.g., "A").
 
     :param letter: The MeSH tree, A for anatomy, C for disease, etc.
     :param skip: An optional collection of MeSH tree codes to skip, such as "A03"
+    :param version: The MeSH version to use. Defaults to latest
     :returns: A list of MeSH CURIE strings for the top level of each MeSH tree.
 
     .. seealso:: https://meshb.nlm.nih.gov/treeView
     """
-    import bioversions
+    if version is None:
+        import bioversions
 
-    mesh_version = bioversions.get_version("mesh")
-    if mesh_version is None:
-        raise ValueError
-    tree_to_mesh = get_tree_to_mesh_id(mesh_version)
+        version = bioversions.get_version("mesh")
+    tree_to_mesh = get_tree_to_mesh_id(version=version)
     rv = []
     for i in range(1, 100):
         key = f"{letter}{i:02}"