Skip to content

Commit

Permalink
Add version keyword to all lookup functions (#184)
Browse files Browse the repository at this point in the history
* Add version keyword to all lookup functions

Closes #183

* More version getter cleanup
  • Loading branch information
cthoyt authored Apr 18, 2024
1 parent abf2d4d commit 4c09578
Show file tree
Hide file tree
Showing 15 changed files with 106 additions and 69 deletions.
21 changes: 13 additions & 8 deletions src/pyobo/api/alts.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,15 @@

@lru_cache()
@wrap_norm_prefix
def get_id_to_alts(prefix: str, force: bool = False) -> Mapping[str, List[str]]:
def get_id_to_alts(
prefix: str, *, force: bool = False, version: Optional[str] = None
) -> Mapping[str, List[str]]:
"""Get alternate identifiers."""
if prefix in NO_ALTS:
return {}

version = get_version(prefix)
if version is None:
version = get_version(prefix)
path = prefix_cache_join(prefix, name="alt_ids.tsv", version=version)
header = [f"{prefix}_id", "alt_id"]

Expand All @@ -51,26 +54,28 @@ def _get_mapping() -> Mapping[str, List[str]]:

@lru_cache()
@wrap_norm_prefix
def get_alts_to_id(prefix: str, force: bool = False) -> Mapping[str, str]:
def get_alts_to_id(
prefix: str, *, force: bool = False, version: Optional[str] = None
) -> Mapping[str, str]:
"""Get alternative id to primary id mapping."""
return {
alt: primary
for primary, alts in get_id_to_alts(prefix, force=force).items()
for primary, alts in get_id_to_alts(prefix, force=force, version=version).items()
for alt in alts
}


def get_primary_curie(curie: str) -> Optional[str]:
def get_primary_curie(curie: str, *, version: Optional[str] = None) -> Optional[str]:
"""Get the primary curie for an entity."""
prefix, identifier = normalize_curie(curie)
primary_identifier = get_primary_identifier(prefix, identifier)
primary_identifier = get_primary_identifier(prefix, identifier, version=version)
if primary_identifier is not None:
return f"{prefix}:{primary_identifier}"
return None


@wrap_norm_prefix
def get_primary_identifier(prefix: str, identifier: str) -> str:
def get_primary_identifier(prefix: str, identifier: str, *, version: Optional[str] = None) -> str:
"""Get the primary identifier for an entity.
:param prefix: The name of the resource
Expand All @@ -82,7 +87,7 @@ def get_primary_identifier(prefix: str, identifier: str) -> str:
if prefix in NO_ALTS: # TODO later expand list to other namespaces with no alts
return identifier

alts_to_id = get_alts_to_id(prefix)
alts_to_id = get_alts_to_id(prefix, version=version)
if alts_to_id and identifier in alts_to_id:
return alts_to_id[identifier]
return identifier
14 changes: 9 additions & 5 deletions src/pyobo/api/hierarchy.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from .relations import get_filtered_relations_df
from ..identifier_utils import wrap_norm_prefix
from ..struct import TypeDef, has_member, is_a, part_of
from ..struct.reference import Reference

__all__ = [
"get_hierarchy",
Expand All @@ -24,7 +25,6 @@
"get_children",
]

from ..struct.reference import Reference

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -154,14 +154,16 @@ def _get_hierarchy_helper(
return rv


def is_descendent(prefix, identifier, ancestor_prefix, ancestor_identifier) -> bool:
def is_descendent(
prefix, identifier, ancestor_prefix, ancestor_identifier, *, version: Optional[str] = None
) -> bool:
"""Check that the first identifier has the second as a descendent.
Check that go:0070246 ! natural killer cell apoptotic process is a
descendant of go:0006915 ! apoptotic process::
>>> assert is_descendent('go', '0070246', 'go', '0006915')
"""
descendants = get_descendants(ancestor_prefix, ancestor_identifier)
descendants = get_descendants(ancestor_prefix, ancestor_identifier, version=version)
return descendants is not None and f"{prefix}:{identifier}" in descendants


Expand Down Expand Up @@ -224,13 +226,15 @@ def get_children(
return set(hierarchy.predecessors(curie))


def has_ancestor(prefix, identifier, ancestor_prefix, ancestor_identifier) -> bool:
def has_ancestor(
prefix, identifier, ancestor_prefix, ancestor_identifier, *, version: Optional[str] = None
) -> bool:
"""Check that the first identifier has the second as an ancestor.
Check that go:0008219 ! cell death is an ancestor of go:0006915 ! apoptotic process::
>>> assert has_ancestor('go', '0006915', 'go', '0008219')
"""
ancestors = get_ancestors(prefix, identifier)
ancestors = get_ancestors(prefix, identifier, version=version)
return ancestors is not None and f"{ancestor_prefix}:{ancestor_identifier}" in ancestors


Expand Down
9 changes: 6 additions & 3 deletions src/pyobo/api/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import logging
from functools import lru_cache
from typing import Mapping
from typing import Mapping, Optional

from .utils import get_version
from ..getters import get_ontology
Expand All @@ -21,9 +21,12 @@

@lru_cache()
@wrap_norm_prefix
def get_metadata(prefix: str, force: bool = False) -> Mapping[str, str]:
def get_metadata(
prefix: str, *, force: bool = False, version: Optional[str] = None
) -> Mapping[str, str]:
"""Get metadata for the ontology."""
version = get_version(prefix)
if version is None:
version = get_version(prefix)
path = prefix_cache_join(prefix, name="metadata.json", version=version)

@cached_json(path=path, force=force)
Expand Down
14 changes: 8 additions & 6 deletions src/pyobo/api/names.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def _help_get(
NO_BUILD_PREFIXES.add(prefix)
return None

primary_id = get_primary_identifier(prefix, identifier)
primary_id = get_primary_identifier(prefix, identifier, version=version)
return mapping.get(primary_id)


Expand All @@ -82,7 +82,7 @@ def get_name(prefix: str, identifier: str, *, version: Optional[str] = None) ->
@lru_cache()
@wrap_norm_prefix
def get_ids(
prefix: str, force: bool = False, strict: bool = False, version: Optional[str] = None
prefix: str, *, force: bool = False, strict: bool = False, version: Optional[str] = None
) -> Set[str]:
"""Get the set of identifiers for this prefix."""
if prefix == "ncbigene":
Expand Down Expand Up @@ -150,16 +150,18 @@ def _get_id_name_mapping() -> Mapping[str, str]:

@lru_cache()
@wrap_norm_prefix
def get_name_id_mapping(prefix: str, force: bool = False) -> Mapping[str, str]:
def get_name_id_mapping(
prefix: str, *, force: bool = False, version: Optional[str] = None
) -> Mapping[str, str]:
"""Get a name to identifier mapping for the OBO file."""
id_name = get_id_name_mapping(prefix=prefix, force=force)
id_name = get_id_name_mapping(prefix=prefix, force=force, version=version)
return {v: k for k, v in id_name.items()}


@wrap_norm_prefix
def get_definition(prefix: str, identifier: str) -> Optional[str]:
def get_definition(prefix: str, identifier: str, *, version: Optional[str] = None) -> Optional[str]:
"""Get the definition for an entity."""
return _help_get(get_id_definition_mapping, prefix, identifier)
return _help_get(get_id_definition_mapping, prefix, identifier, version=version)


def get_id_definition_mapping(
Expand Down
14 changes: 11 additions & 3 deletions src/pyobo/api/relations.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,11 @@ def get_relations_df(
force: bool = False,
wide: bool = False,
strict: bool = True,
version: Optional[str] = None,
) -> pd.DataFrame:
"""Get all relations from the OBO."""
version = get_version(prefix)
if version is None:
version = get_version(prefix)
path = prefix_cache_join(prefix, name="relations.tsv", version=version)

@cached_df(path=path, dtype=str, force=force)
Expand Down Expand Up @@ -118,9 +120,11 @@ def get_id_multirelations_mapping(
*,
use_tqdm: bool = False,
force: bool = False,
version: Optional[str] = None,
) -> Mapping[str, List[Reference]]:
"""Get the OBO file and output a synonym dictionary."""
version = get_version(prefix)
if version is None:
version = get_version(prefix)
ontology = get_ontology(prefix, force=force, version=version)
return ontology.get_id_multirelations_mapping(typedef=typedef, use_tqdm=use_tqdm)

Expand All @@ -134,6 +138,7 @@ def get_relation_mapping(
*,
use_tqdm: bool = False,
force: bool = False,
version: Optional[str] = None,
) -> Mapping[str, str]:
"""Get relations from identifiers in the source prefix to target prefix with the given relation.
Expand All @@ -147,7 +152,8 @@ def get_relation_mapping(
>>> hgnc_mgi_orthology_mapping = pyobo.get_relation_mapping('hgnc', 'ro:HOM0000017', 'mgi')
>>> assert mouse_mapt_mgi_id == hgnc_mgi_orthology_mapping[human_mapt_hgnc_id]
"""
version = get_version(prefix)
if version is None:
version = get_version(prefix)
ontology = get_ontology(prefix, force=force, version=version)
return ontology.get_relation_mapping(
relation=relation, target_prefix=target_prefix, use_tqdm=use_tqdm
Expand All @@ -163,6 +169,7 @@ def get_relation(
*,
use_tqdm: bool = False,
force: bool = False,
**kwargs,
) -> Optional[str]:
"""Get the target identifier corresponding to the given relationship from the source prefix/identifier pair.
Expand All @@ -181,6 +188,7 @@ def get_relation(
target_prefix=target_prefix,
use_tqdm=use_tqdm,
force=force,
**kwargs,
)
return relation_mapping.get(source_identifier)

Expand Down
6 changes: 3 additions & 3 deletions src/pyobo/api/species.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,13 @@


@wrap_norm_prefix
def get_species(prefix: str, identifier: str) -> Optional[str]:
def get_species(prefix: str, identifier: str, *, version: Optional[str] = None) -> Optional[str]:
"""Get the species."""
if prefix == "uniprot":
raise NotImplementedError

try:
id_species = get_id_species_mapping(prefix)
id_species = get_id_species_mapping(prefix, version=version)
except NoBuild:
logger.warning("unable to look up species for prefix %s", prefix)
return None
Expand All @@ -37,7 +37,7 @@ def get_species(prefix: str, identifier: str) -> Optional[str]:
logger.warning("no results produced for prefix %s", prefix)
return None

primary_id = get_primary_identifier(prefix, identifier)
primary_id = get_primary_identifier(prefix, identifier, version=version)
return id_species.get(primary_id)


Expand Down
8 changes: 6 additions & 2 deletions src/pyobo/api/typedefs.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import logging
from functools import lru_cache
from typing import Optional

import pandas as pd

Expand All @@ -22,9 +23,12 @@

@lru_cache()
@wrap_norm_prefix
def get_typedef_df(prefix: str, force: bool = False) -> pd.DataFrame:
def get_typedef_df(
prefix: str, *, force: bool = False, version: Optional[str] = None
) -> pd.DataFrame:
"""Get an identifier to name mapping for the typedefs in an OBO file."""
version = get_version(prefix)
if version is None:
version = get_version(prefix)
path = prefix_cache_join(prefix, name="typedefs.tsv", version=version)

@cached_df(path=path, dtype=str, force=force)
Expand Down
13 changes: 10 additions & 3 deletions src/pyobo/api/xrefs.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,16 @@


@wrap_norm_prefix
def get_xref(prefix: str, identifier: str, new_prefix: str, flip: bool = False) -> Optional[str]:
def get_xref(
prefix: str,
identifier: str,
new_prefix: str,
*,
flip: bool = False,
version: Optional[str] = None,
) -> Optional[str]:
"""Get the xref with the new prefix if a direct path exists."""
filtered_xrefs = get_filtered_xrefs(prefix, new_prefix, flip=flip)
filtered_xrefs = get_filtered_xrefs(prefix, new_prefix, flip=flip, version=version)
return filtered_xrefs.get(identifier)


Expand All @@ -41,8 +48,8 @@ def get_xref(prefix: str, identifier: str, new_prefix: str, flip: bool = False)
def get_filtered_xrefs(
prefix: str,
xref_prefix: str,
flip: bool = False,
*,
flip: bool = False,
use_tqdm: bool = False,
force: bool = False,
strict: bool = False,
Expand Down
19 changes: 12 additions & 7 deletions src/pyobo/aws.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,14 +77,19 @@ def upload_artifacts(
upload_artifacts_for_prefix(prefix=prefix, bucket=bucket, s3_client=s3_client)


def upload_artifacts_for_prefix(*, prefix: str, bucket: str, s3_client=None):
def upload_artifacts_for_prefix(
*, prefix: str, bucket: str, s3_client=None, version: Optional[str] = None
):
"""Upload compiled parts for the given prefix to AWS."""
if s3_client is None:
s3_client = boto3.client("s3")

if version is None:
version = get_version(prefix)

logger.info("[%s] getting id->name mapping", prefix)
get_id_name_mapping(prefix)
id_name_path = prefix_cache_join(prefix, name="names.tsv", version=get_version(prefix))
id_name_path = prefix_cache_join(prefix, name="names.tsv", version=version)
if not id_name_path.exists():
raise FileNotFoundError
id_name_key = os.path.join(prefix, "cache", "names.tsv")
Expand All @@ -93,7 +98,7 @@ def upload_artifacts_for_prefix(*, prefix: str, bucket: str, s3_client=None):

logger.info("[%s] getting id->synonyms mapping", prefix)
get_id_synonyms_mapping(prefix)
id_synonyms_path = prefix_cache_join(prefix, name="synonyms.tsv", version=get_version(prefix))
id_synonyms_path = prefix_cache_join(prefix, name="synonyms.tsv", version=version)
if not id_synonyms_path.exists():
raise FileNotFoundError
id_synonyms_key = os.path.join(prefix, "cache", "synonyms.tsv")
Expand All @@ -102,7 +107,7 @@ def upload_artifacts_for_prefix(*, prefix: str, bucket: str, s3_client=None):

logger.info("[%s] getting xrefs", prefix)
get_xrefs_df(prefix)
xrefs_path = prefix_cache_join(prefix, name="xrefs.tsv", version=get_version(prefix))
xrefs_path = prefix_cache_join(prefix, name="xrefs.tsv", version=version)
if not xrefs_path.exists():
raise FileNotFoundError
xrefs_key = os.path.join(prefix, "cache", "xrefs.tsv")
Expand All @@ -111,7 +116,7 @@ def upload_artifacts_for_prefix(*, prefix: str, bucket: str, s3_client=None):

logger.info("[%s] getting relations", prefix)
get_relations_df(prefix)
relations_path = prefix_cache_join(prefix, name="relations.tsv", version=get_version(prefix))
relations_path = prefix_cache_join(prefix, name="relations.tsv", version=version)
if not relations_path.exists():
raise FileNotFoundError
relations_key = os.path.join(prefix, "cache", "relations.tsv")
Expand All @@ -120,7 +125,7 @@ def upload_artifacts_for_prefix(*, prefix: str, bucket: str, s3_client=None):

logger.info("[%s] getting properties", prefix)
get_properties_df(prefix)
properties_path = prefix_cache_join(prefix, name="properties.tsv", version=get_version(prefix))
properties_path = prefix_cache_join(prefix, name="properties.tsv", version=version)
if not properties_path.exists():
raise FileNotFoundError
properties_key = os.path.join(prefix, "cache", "properties.tsv")
Expand All @@ -129,7 +134,7 @@ def upload_artifacts_for_prefix(*, prefix: str, bucket: str, s3_client=None):

logger.info("[%s] getting alternative identifiers", prefix)
get_id_to_alts(prefix)
alts_path = prefix_cache_join(prefix, name="alt_ids.tsv", version=get_version(prefix))
alts_path = prefix_cache_join(prefix, name="alt_ids.tsv", version=version)
if not alts_path.exists():
raise FileNotFoundError
alts_key = os.path.join(prefix, "cache", "alt_ids.tsv")
Expand Down
5 changes: 3 additions & 2 deletions src/pyobo/cli/lookup.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,10 @@ def xrefs(prefix: str, target: str, force: bool, no_strict: bool, version: Optio
@prefix_argument
@verbose_option
@force_option
def metadata(prefix: str, force: bool):
@version_option
def metadata(prefix: str, force: bool, version: Optional[str]):
"""Print the metadata for the given namespace."""
metadata = get_metadata(prefix, force=force)
metadata = get_metadata(prefix, force=force, version=version)
click.echo(json.dumps(metadata, indent=2))


Expand Down
Loading

0 comments on commit 4c09578

Please sign in to comment.