Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable use of enviornmental variable version pin dictionary #189

Merged
merged 7 commits into from
Aug 13, 2024
2 changes: 2 additions & 0 deletions src/pyobo/api/names.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@

def get_name_by_curie(curie: str, *, version: Optional[str] = None) -> Optional[str]:
"""Get the name for a CURIE, if possible."""
if version is None:
version = get_version(curie.split(":")[0])
prefix, identifier = normalize_curie(curie)
if prefix and identifier:
return get_name(prefix, identifier, version=version)
Expand Down
5 changes: 5 additions & 0 deletions src/pyobo/api/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import bioversions

from ..constants import VERSION_PINS
from ..utils.path import prefix_directory_join

__all__ = [
Expand All @@ -25,6 +26,10 @@ def get_version(prefix: str) -> Optional[str]:
:param prefix: the resource name
:return: The version if available else None
"""
# Prioritize loaded environmental variable VERSION_PINS dictionary
version = VERSION_PINS.get(prefix)
if version:
return version
try:
version = bioversions.get_version(prefix)
except KeyError:
Expand Down
4 changes: 2 additions & 2 deletions src/pyobo/cli/lookup.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@ def ancestors(prefix: str, identifier: str, force: bool, version: Optional[str])
"""Look up ancestors."""
curies = get_ancestors(prefix=prefix, identifier=identifier, force=force, version=version)
for curie in sorted(curies or []):
click.echo(f"{curie}\t{get_name_by_curie(curie)}")
click.echo(f"{curie}\t{get_name_by_curie(curie, version=version)}")


@lookup.command()
Expand All @@ -295,7 +295,7 @@ def descendants(prefix: str, identifier: str, force: bool, version: Optional[str
"""Look up descendants."""
curies = get_descendants(prefix=prefix, identifier=identifier, force=force, version=version)
for curie in sorted(curies or []):
click.echo(f"{curie}\t{get_name_by_curie(curie)}")
click.echo(f"{curie}\t{get_name_by_curie(curie, version=version)}")


@lookup.command()
Expand Down
37 changes: 31 additions & 6 deletions src/pyobo/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,15 @@

"""Constants for PyOBO."""

import json
import logging
import os
import re

import click
import pystow

__all__ = [
"RAW_DIRECTORY",
"DATABASE_DIRECTORY",
"SPECIES_REMAPPING",
]
__all__ = ["RAW_DIRECTORY", "DATABASE_DIRECTORY", "SPECIES_REMAPPING", "VERSION_PINS"]

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -80,7 +79,6 @@
SPECIES_RECORD = "5334738"
SPECIES_FILE = "species.tsv.gz"


NCBITAXON_PREFIX = "NCBITaxon"
DATE_FORMAT = "%d:%m:%Y %H:%M"
PROVENANCE_PREFIXES = {
Expand All @@ -99,3 +97,30 @@
"isbn",
"issn",
}

# Load version pin dictionary from the environmental variable VERSION_PINS
try:
VERSION_PINS_STR = os.getenv("VERSION_PINS")
if not VERSION_PINS_STR:
VERSION_PINS = {}
else:
VERSION_PINS = json.loads(VERSION_PINS_STR)
for k, v in VERSION_PINS.items():
if not isinstance(k, str) or not isinstance(v, str):
logger.error("The prefix and version name must both be " "strings")
VERSION_PINS = {}
break
except ValueError as e:
logger.error(
"The value for the environment variable VERSION_PINS must be a valid JSON string: %s" % e
)
VERSION_PINS = {}

click.echo(
f"These are the resource versions that are pinned.\n{VERSION_PINS}. "
f"\nPyobo will download the latest version of a resource if it's "
f"not pinned.\nIf you want to use a specific version of a "
f"resource, edit your VERSION_PINS environmental "
f"variable which is a JSON string to include a prefix and version "
f"name."
)
4 changes: 2 additions & 2 deletions src/pyobo/sources/antibodyregistry.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@
import logging
from typing import Iterable, Mapping, Optional

import bioversions
import pandas as pd
from bioregistry.utils import removeprefix
from tqdm.auto import tqdm

from pyobo import Obo, Term
from pyobo.api.utils import get_version
from pyobo.utils.path import ensure_df

__all__ = [
Expand All @@ -27,7 +27,7 @@
def get_chunks(*, force: bool = False, version: Optional[str] = None) -> pd.DataFrame:
"""Get the BioGRID identifiers mapping dataframe."""
if version is None:
version = bioversions.get_version(PREFIX)
version = get_version(PREFIX)
df = ensure_df(
PREFIX,
url=URL,
Expand Down
6 changes: 3 additions & 3 deletions src/pyobo/sources/biogrid.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
from functools import partial
from typing import Mapping, Optional

import bioversions
import pandas as pd

from pyobo.api.utils import get_version
from pyobo.resources.ncbitaxon import get_ncbitaxon_id
from pyobo.utils.cache import cached_mapping
from pyobo.utils.path import ensure_df, prefix_directory_join
Expand Down Expand Up @@ -52,7 +52,7 @@ def _lookup(name: str) -> Optional[str]:

def get_df() -> pd.DataFrame:
"""Get the BioGRID identifiers mapping dataframe."""
version = bioversions.get_version("biogrid")
version = get_version("biogrid")
url = f"{BASE_URL}/BIOGRID-{version}/BIOGRID-IDENTIFIERS-{version}.tab.zip"
df = ensure_df(PREFIX, url=url, skiprows=28, dtype=str, version=version)
df["taxonomy_id"] = df["ORGANISM_OFFICIAL_NAME"].map(_lookup)
Expand All @@ -65,7 +65,7 @@ def get_df() -> pd.DataFrame:
"cache",
"xrefs",
name="ncbigene.tsv",
version=partial(bioversions.get_version, PREFIX),
version=partial(get_version, PREFIX),
),
header=["biogrid_id", "ncbigene_id"],
)
Expand Down
4 changes: 2 additions & 2 deletions src/pyobo/sources/hgnc.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@
from operator import attrgetter
from typing import DefaultDict, Dict, Iterable, Optional

import bioversions
from tabulate import tabulate
from tqdm.auto import tqdm

from pyobo.api.utils import get_version
from pyobo.struct import (
Obo,
Reference,
Expand Down Expand Up @@ -241,7 +241,7 @@ def get_obo(*, force: bool = False) -> Obo:
def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Term]: # noqa:C901
"""Get HGNC terms."""
if version is None:
version = bioversions.get_version("hgnc")
version = get_version("hgnc")
unhandled_entry_keys: typing.Counter[str] = Counter()
unhandle_locus_types: DefaultDict[str, Dict[str, Term]] = defaultdict(dict)
path = ensure_path(
Expand Down
6 changes: 3 additions & 3 deletions src/pyobo/sources/mesh.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

from tqdm.auto import tqdm

from pyobo.api.utils import get_version
from pyobo.identifier_utils import standardize_ec
from pyobo.struct import Obo, Reference, Synonym, Term
from pyobo.utils.cache import cached_json, cached_mapping
Expand Down Expand Up @@ -331,9 +332,8 @@ def get_mesh_category_curies(
.. seealso:: https://meshb.nlm.nih.gov/treeView
"""
if version is None:
import bioversions

version = bioversions.get_version("mesh")
version = get_version("mesh")
assert version is not None
tree_to_mesh = get_tree_to_mesh_id(version=version)
rv = []
for i in range(1, 100):
Expand Down
6 changes: 3 additions & 3 deletions src/pyobo/sources/pubchem.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@
import logging
from typing import Iterable, Mapping, Optional

import bioversions
import pandas as pd
from bioregistry.utils import removeprefix
from tqdm.auto import tqdm

from ..api import get_name_id_mapping
from ..api.utils import get_version
from ..struct import Obo, Reference, Synonym, Term
from ..utils.iter import iterate_gzips_together
from ..utils.path import ensure_df, ensure_path
Expand All @@ -26,7 +26,7 @@

def _get_pubchem_extras_url(version: Optional[str], end: str) -> str:
if version is None:
version = bioversions.get_version("pubchem")
version = get_version("pubchem")
return f"ftp://ftp.ncbi.nlm.nih.gov/pubchem/Compound/Monthly/{version}/Extras/{end}"


Expand Down Expand Up @@ -100,7 +100,7 @@ def get_pubchem_id_to_mesh_id(version: str) -> Mapping[str, str]:

def _ensure_cid_name_path(*, version: Optional[str] = None, force: bool = False) -> str:
if version is None:
version = bioversions.get_version("pubchem")
version = get_version("pubchem")
# 2 tab-separated columns: compound_id, name
cid_name_url = _get_pubchem_extras_url(version, "CID-Title.gz")
cid_name_path = ensure_path(PREFIX, url=cid_name_url, version=version, force=force)
Expand Down
4 changes: 2 additions & 2 deletions src/pyobo/sources/rhea.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
import logging
from typing import TYPE_CHECKING, Dict, Iterable, Optional

import bioversions
import pystow

from pyobo.api.utils import get_version
from pyobo.struct import Obo, Reference, Term
from pyobo.struct.typedef import (
TypeDef,
Expand Down Expand Up @@ -63,7 +63,7 @@ def ensure_rhea_rdf(version: Optional[str] = None, force: bool = False) -> "rdfl
"""Get the Rhea RDF graph."""
# see docs: https://ftp.expasy.org/databases/rhea/rdf/rhea_rdf_documentation.pdf
if version is None:
version = bioversions.get_version(PREFIX)
version = get_version(PREFIX)
return pystow.ensure_rdf(
"pyobo",
"raw",
Expand Down
4 changes: 2 additions & 2 deletions src/pyobo/sources/uniprot/uniprot.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@
from pathlib import Path
from typing import Iterable, List, Optional, cast

import bioversions
from tqdm.auto import tqdm

from pyobo import Obo, Reference
from pyobo.api.utils import get_version
from pyobo.constants import RAW_MODULE
from pyobo.identifier_utils import standardize_ec
from pyobo.struct import Term, derives_from, enables, from_species, participates_in
Expand Down Expand Up @@ -166,7 +166,7 @@ def _parse_go(go_terms) -> List[Reference]:
def ensure(version: Optional[str] = None, force: bool = False) -> Path:
"""Ensure the reviewed uniprot names are available."""
if version is None:
version = bioversions.get_version("uniprot")
version = get_version("uniprot")
return RAW_MODULE.ensure(
PREFIX,
version,
Expand Down
5 changes: 2 additions & 3 deletions src/pyobo/struct/struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
term_replaced_by,
)
from .utils import comma_separate, obo_escape_slim
from ..api.utils import get_version
from ..constants import (
DATE_FORMAT,
NCBITAXON_PREFIX,
Expand Down Expand Up @@ -583,10 +584,8 @@ def __post_init__(self):

def _get_version(self) -> Optional[str]:
if self.bioversions_key:
import bioversions

try:
return bioversions.get_version(self.bioversions_key)
return get_version(self.bioversions_key)
except KeyError:
logger.warning(f"[{self.bioversions_key}] bioversions doesn't list this resource ")
except IOError:
Expand Down
3 changes: 2 additions & 1 deletion src/pyobo/utils/path.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

logger = logging.getLogger(__name__)

VersionHint = Union[None, str, Callable[[], str]]
VersionHint = Union[None, str, Callable[[], Optional[str]]]

requests_ftp.monkeypatch_session()

Expand All @@ -46,6 +46,7 @@ def prefix_directory_join(
logger.info("[%s] got version %s", prefix, version)
elif not isinstance(version, str):
raise TypeError(f"Invalid type: {version} ({type(version)})")
assert version is not None
version = cleanup_version(version, prefix=prefix)
if version is not None and "/" in version:
raise ValueError(f"[{prefix}] Can not have slash in version: {version}")
Expand Down
10 changes: 5 additions & 5 deletions src/pyobo/xrefdb/sources/chembl.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@

from typing import Optional

import bioversions
import pandas as pd

from pyobo.api.utils import get_version
from pyobo.constants import (
PROVENANCE,
SOURCE_ID,
Expand All @@ -26,7 +26,7 @@ def get_chembl_compound_equivalences_raw(
) -> pd.DataFrame:
"""Get the chemical representations raw dataframe."""
if version is None:
version = bioversions.get_version("chembl")
version = get_version("chembl")

base_url = f"ftp://ftp.ebi.ac.uk/pub/databases/chembl/ChEMBLdb/releases/chembl_{version}"
url = f"{base_url}/chembl_{version}_chemreps.txt.gz"
Expand All @@ -36,7 +36,7 @@ def get_chembl_compound_equivalences_raw(
def get_chembl_compound_equivalences(version: Optional[str] = None) -> pd.DataFrame:
"""Get ChEMBL chemical equivalences."""
if version is None:
version = bioversions.get_version("chembl")
version = get_version("chembl")

df = get_chembl_compound_equivalences_raw(version=version)
rows = []
Expand All @@ -55,7 +55,7 @@ def get_chembl_compound_equivalences(version: Optional[str] = None) -> pd.DataFr
def get_chembl_protein_equivalences(version: Optional[str] = None) -> pd.DataFrame:
"""Get ChEMBL protein equivalences."""
if version is None:
version = bioversions.get_version("chembl")
version = get_version("chembl")

url = f"ftp://ftp.ebi.ac.uk/pub/databases/chembl/ChEMBLdb/releases/chembl_{version}/chembl_uniprot_mapping.txt"
df = ensure_df(
Expand All @@ -75,7 +75,7 @@ def get_chembl_protein_equivalences(version: Optional[str] = None) -> pd.DataFra
def get_chembl_xrefs_df(version: Optional[str] = None) -> pd.DataFrame:
"""Get all ChEBML equivalences."""
if version is None:
version = bioversions.get_version("chembl")
version = get_version("chembl")

return pd.concat(
[
Expand Down
5 changes: 3 additions & 2 deletions src/pyobo/xrefdb/sources/pubchem.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@

from typing import Optional

import bioversions
import pandas as pd

from ...api.utils import get_version
from ...constants import XREF_COLUMNS
from ...sources.pubchem import _get_pubchem_extras_url, get_pubchem_id_to_mesh_id

Expand All @@ -18,7 +18,8 @@
def get_pubchem_mesh_df(version: Optional[str] = None) -> pd.DataFrame:
"""Get PubChem Compound-MeSH xrefs."""
if version is None:
version = bioversions.get_version("pubchem")
version = get_version("pubchem")
assert version is not None
cid_mesh_url = _get_pubchem_extras_url(version, "CID-MeSH")
return pd.DataFrame(
[
Expand Down
Loading