Skip to content

Commit

Permalink
More version getter cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
cthoyt committed Apr 18, 2024
1 parent 207bc5f commit 61fa850
Show file tree
Hide file tree
Showing 5 changed files with 25 additions and 27 deletions.
9 changes: 0 additions & 9 deletions src/pyobo/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,13 @@

import logging
import re
from functools import partial
from typing import Callable

import bioversions
import pystow

__all__ = [
"RAW_DIRECTORY",
"DATABASE_DIRECTORY",
"SPECIES_REMAPPING",
"version_getter",
]

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -85,11 +81,6 @@
SPECIES_FILE = "species.tsv.gz"


def version_getter(name: str) -> Callable[[], str]:
"""Make a function appropriate for getting versions."""
return partial(bioversions.get_version, name)


NCBITAXON_PREFIX = "NCBITaxon"
DATE_FORMAT = "%d:%m:%Y %H:%M"
PROVENANCE_PREFIXES = {
Expand Down
11 changes: 6 additions & 5 deletions src/pyobo/sources/antibodyregistry.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,10 @@
CHUNKSIZE = 20_000


def get_chunks(force: bool = False) -> pd.DataFrame:
def get_chunks(*, force: bool = False, version: Optional[str] = None) -> pd.DataFrame:
"""Get the BioGRID identifiers mapping dataframe."""
version = bioversions.get_version(PREFIX)
if version is None:
version = bioversions.get_version(PREFIX)
df = ensure_df(
PREFIX,
url=URL,
Expand All @@ -47,7 +48,7 @@ class AntibodyRegistryGetter(Obo):

def iter_terms(self, force: bool = False) -> Iterable[Term]:
"""Iterate over terms in the ontology."""
return iter_terms(force=force)
return iter_terms(force=force, version=self._version_or_raise)


def get_obo(*, force: bool = False) -> Obo:
Expand All @@ -74,9 +75,9 @@ def get_obo(*, force: bool = False) -> Obo:
}


def iter_terms(force: bool = False) -> Iterable[Term]:
def iter_terms(*, force: bool = False, version: Optional[str] = None) -> Iterable[Term]:
"""Iterate over antibodies."""
chunks = get_chunks(force=force)
chunks = get_chunks(force=force, version=version)
needs_curating = set()
# df['vendor'] = df['vendor'].map(bioregistry.normalize_prefix)
it = tqdm(chunks, desc=f"{PREFIX}, chunkssize={CHUNKSIZE}")
Expand Down
8 changes: 6 additions & 2 deletions src/pyobo/sources/biogrid.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@

"""Extract and convert BioGRID identifiers."""

from functools import partial
from typing import Mapping, Optional

import bioversions
import pandas as pd

from pyobo.constants import version_getter
from pyobo.resources.ncbitaxon import get_ncbitaxon_id
from pyobo.utils.cache import cached_mapping
from pyobo.utils.path import ensure_df, prefix_directory_join
Expand Down Expand Up @@ -61,7 +61,11 @@ def get_df() -> pd.DataFrame:

@cached_mapping(
path=prefix_directory_join(
PREFIX, "cache", "xrefs", name="ncbigene.tsv", version=version_getter(PREFIX)
PREFIX,
"cache",
"xrefs",
name="ncbigene.tsv",
version=partial(bioversions.get_version, PREFIX),
),
header=["biogrid_id", "ncbigene_id"],
)
Expand Down
10 changes: 5 additions & 5 deletions src/pyobo/sources/ccle.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,37 +50,37 @@ def iter_terms(version: Optional[str] = None, force: bool = False) -> Iterable[T
yield term


def get_version() -> str:
def get_ccle_static_version() -> str:
"""Get the default version of CCLE's cell lines."""
return "2019"


def get_url(version: Optional[str] = None) -> str:
"""Get the cBioPortal URL for the given version of CCLE's cell lines."""
if version is None:
version = get_version()
version = get_ccle_static_version()
return f"https://cbioportal-datahub.s3.amazonaws.com/ccle_broad_{version}.tar.gz"


def get_inner(version: Optional[str] = None) -> str:
"""Get the inner tarfile path."""
if version is None:
version = get_version()
version = get_ccle_static_version()
return f"ccle_broad_{version}/data_clinical_sample.txt"


def ensure(version: Optional[str] = None, **kwargs) -> Path:
"""Ensure the given version is downloaded."""
if version is None:
version = get_version()
version = get_ccle_static_version()
url = get_url(version=version)
return pystow.ensure("pyobo", "raw", PREFIX, version, url=url, **kwargs)


def ensure_df(version: Optional[str] = None, force: bool = False) -> pd.DataFrame:
"""Get the CCLE clinical sample dataframe."""
if version is None:
version = get_version()
version = get_ccle_static_version()
path = ensure(version=version, force=force)
inner_path = get_inner(version=version)
with tarfile.open(path) as tf:
Expand Down
14 changes: 8 additions & 6 deletions src/pyobo/sources/mesh.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,21 +318,23 @@ def _get_descriptor_qualifiers(descriptor: Element) -> List[Mapping[str, str]]:
]


def get_mesh_category_curies(letter: str, skip: Optional[Collection[str]] = None) -> List[str]:
def get_mesh_category_curies(
letter: str, *, skip: Optional[Collection[str]] = None, version: Optional[str] = None
) -> List[str]:
"""Get the MeSH LUIDs for a category, by letter (e.g., "A").
:param letter: The MeSH tree, A for anatomy, C for disease, etc.
:param skip: An optional collection of MeSH tree codes to skip, such as "A03"
:param version: The MeSH version to use. Defaults to latest
:returns: A list of MeSH CURIE strings for the top level of each MeSH tree.
.. seealso:: https://meshb.nlm.nih.gov/treeView
"""
import bioversions
if version is None:
import bioversions

mesh_version = bioversions.get_version("mesh")
if mesh_version is None:
raise ValueError
tree_to_mesh = get_tree_to_mesh_id(mesh_version)
version = bioversions.get_version("mesh")
tree_to_mesh = get_tree_to_mesh_id(version=version)
rv = []
for i in range(1, 100):
key = f"{letter}{i:02}"
Expand Down

0 comments on commit 61fa850

Please sign in to comment.