From 61fa850d8ed64b916fc9f726f3a4249a379673f6 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Thu, 18 Apr 2024 11:53:33 +0200 Subject: [PATCH] More version getter cleanup --- src/pyobo/constants.py | 9 --------- src/pyobo/sources/antibodyregistry.py | 11 ++++++----- src/pyobo/sources/biogrid.py | 8 ++++++-- src/pyobo/sources/ccle.py | 10 +++++----- src/pyobo/sources/mesh.py | 14 ++++++++------ 5 files changed, 25 insertions(+), 27 deletions(-) diff --git a/src/pyobo/constants.py b/src/pyobo/constants.py index 6bb939dc..3fd8279d 100644 --- a/src/pyobo/constants.py +++ b/src/pyobo/constants.py @@ -4,17 +4,13 @@ import logging import re -from functools import partial -from typing import Callable -import bioversions import pystow __all__ = [ "RAW_DIRECTORY", "DATABASE_DIRECTORY", "SPECIES_REMAPPING", - "version_getter", ] logger = logging.getLogger(__name__) @@ -85,11 +81,6 @@ SPECIES_FILE = "species.tsv.gz" -def version_getter(name: str) -> Callable[[], str]: - """Make a function appropriate for getting versions.""" - return partial(bioversions.get_version, name) - - NCBITAXON_PREFIX = "NCBITaxon" DATE_FORMAT = "%d:%m:%Y %H:%M" PROVENANCE_PREFIXES = { diff --git a/src/pyobo/sources/antibodyregistry.py b/src/pyobo/sources/antibodyregistry.py index eb269317..df757590 100644 --- a/src/pyobo/sources/antibodyregistry.py +++ b/src/pyobo/sources/antibodyregistry.py @@ -24,9 +24,10 @@ CHUNKSIZE = 20_000 -def get_chunks(force: bool = False) -> pd.DataFrame: +def get_chunks(*, force: bool = False, version: Optional[str] = None) -> pd.DataFrame: """Get the BioGRID identifiers mapping dataframe.""" - version = bioversions.get_version(PREFIX) + if version is None: + version = bioversions.get_version(PREFIX) df = ensure_df( PREFIX, url=URL, @@ -47,7 +48,7 @@ class AntibodyRegistryGetter(Obo): def iter_terms(self, force: bool = False) -> Iterable[Term]: """Iterate over terms in the ontology.""" - return iter_terms(force=force) + return iter_terms(force=force, version=self._version_or_raise) def get_obo(*, force: bool = False) -> Obo: @@ -74,9 +75,9 @@ def get_obo(*, force: bool = False) -> Obo: } -def iter_terms(force: bool = False) -> Iterable[Term]: +def iter_terms(*, force: bool = False, version: Optional[str] = None) -> Iterable[Term]: """Iterate over antibodies.""" - chunks = get_chunks(force=force) + chunks = get_chunks(force=force, version=version) needs_curating = set() # df['vendor'] = df['vendor'].map(bioregistry.normalize_prefix) it = tqdm(chunks, desc=f"{PREFIX}, chunkssize={CHUNKSIZE}") diff --git a/src/pyobo/sources/biogrid.py b/src/pyobo/sources/biogrid.py index 5c869742..48440085 100644 --- a/src/pyobo/sources/biogrid.py +++ b/src/pyobo/sources/biogrid.py @@ -2,12 +2,12 @@ """Extract and convert BioGRID identifiers.""" +from functools import partial from typing import Mapping, Optional import bioversions import pandas as pd -from pyobo.constants import version_getter from pyobo.resources.ncbitaxon import get_ncbitaxon_id from pyobo.utils.cache import cached_mapping from pyobo.utils.path import ensure_df, prefix_directory_join @@ -61,7 +61,11 @@ def get_df() -> pd.DataFrame: @cached_mapping( path=prefix_directory_join( - PREFIX, "cache", "xrefs", name="ncbigene.tsv", version=version_getter(PREFIX) + PREFIX, + "cache", + "xrefs", + name="ncbigene.tsv", + version=partial(bioversions.get_version, PREFIX), ), header=["biogrid_id", "ncbigene_id"], ) diff --git a/src/pyobo/sources/ccle.py b/src/pyobo/sources/ccle.py index 066cd8ae..1c0612d7 100644 --- a/src/pyobo/sources/ccle.py +++ b/src/pyobo/sources/ccle.py @@ -50,7 +50,7 @@ def iter_terms(version: Optional[str] = None, force: bool = False) -> Iterable[T yield term -def get_version() -> str: +def get_ccle_static_version() -> str: """Get the default version of CCLE's cell lines.""" return "2019" @@ -58,21 +58,21 @@ def get_version() -> str: def get_url(version: Optional[str] = None) -> str: """Get the cBioPortal URL for the given version of CCLE's cell lines.""" if version is None: - version = get_version() + version = get_ccle_static_version() return f"https://cbioportal-datahub.s3.amazonaws.com/ccle_broad_{version}.tar.gz" def get_inner(version: Optional[str] = None) -> str: """Get the inner tarfile path.""" if version is None: - version = get_version() + version = get_ccle_static_version() return f"ccle_broad_{version}/data_clinical_sample.txt" def ensure(version: Optional[str] = None, **kwargs) -> Path: """Ensure the given version is downloaded.""" if version is None: - version = get_version() + version = get_ccle_static_version() url = get_url(version=version) return pystow.ensure("pyobo", "raw", PREFIX, version, url=url, **kwargs) @@ -80,7 +80,7 @@ def ensure(version: Optional[str] = None, **kwargs) -> Path: def ensure_df(version: Optional[str] = None, force: bool = False) -> pd.DataFrame: """Get the CCLE clinical sample dataframe.""" if version is None: - version = get_version() + version = get_ccle_static_version() path = ensure(version=version, force=force) inner_path = get_inner(version=version) with tarfile.open(path) as tf: diff --git a/src/pyobo/sources/mesh.py b/src/pyobo/sources/mesh.py index 5478ec30..0368e36a 100644 --- a/src/pyobo/sources/mesh.py +++ b/src/pyobo/sources/mesh.py @@ -318,21 +318,23 @@ def _get_descriptor_qualifiers(descriptor: Element) -> List[Mapping[str, str]]: ] -def get_mesh_category_curies(letter: str, skip: Optional[Collection[str]] = None) -> List[str]: +def get_mesh_category_curies( + letter: str, *, skip: Optional[Collection[str]] = None, version: Optional[str] = None +) -> List[str]: """Get the MeSH LUIDs for a category, by letter (e.g., "A"). :param letter: The MeSH tree, A for anatomy, C for disease, etc. :param skip: An optional collection of MeSH tree codes to skip, such as "A03" + :param version: The MeSH version to use. Defaults to latest :returns: A list of MeSH CURIE strings for the top level of each MeSH tree. .. seealso:: https://meshb.nlm.nih.gov/treeView """ - import bioversions + if version is None: + import bioversions - mesh_version = bioversions.get_version("mesh") - if mesh_version is None: - raise ValueError - tree_to_mesh = get_tree_to_mesh_id(mesh_version) + version = bioversions.get_version("mesh") + tree_to_mesh = get_tree_to_mesh_id(version=version) rv = [] for i in range(1, 100): key = f"{letter}{i:02}"