Add version keyword to all lookup functions (#184)

* Add version keyword to all lookup functions Closes #183 * More version getter cleanup
biopragmatics · Apr 18, 2024 · 4c09578 · 4c09578
1 parent abf2d4d
commit 4c09578
Show file tree

Hide file tree

Showing 15 changed files with 106 additions and 69 deletions.
diff --git a/src/pyobo/api/alts.py b/src/pyobo/api/alts.py
@@ -28,12 +28,15 @@
 
 @lru_cache()
 @wrap_norm_prefix
-def get_id_to_alts(prefix: str, force: bool = False) -> Mapping[str, List[str]]:
+def get_id_to_alts(
+    prefix: str, *, force: bool = False, version: Optional[str] = None
+) -> Mapping[str, List[str]]:
     """Get alternate identifiers."""
     if prefix in NO_ALTS:
         return {}
 
-    version = get_version(prefix)
+    if version is None:
+        version = get_version(prefix)
     path = prefix_cache_join(prefix, name="alt_ids.tsv", version=version)
     header = [f"{prefix}_id", "alt_id"]
 
@@ -51,26 +54,28 @@ def _get_mapping() -> Mapping[str, List[str]]:
 
 @lru_cache()
 @wrap_norm_prefix
-def get_alts_to_id(prefix: str, force: bool = False) -> Mapping[str, str]:
+def get_alts_to_id(
+    prefix: str, *, force: bool = False, version: Optional[str] = None
+) -> Mapping[str, str]:
     """Get alternative id to primary id mapping."""
     return {
         alt: primary
-        for primary, alts in get_id_to_alts(prefix, force=force).items()
+        for primary, alts in get_id_to_alts(prefix, force=force, version=version).items()
         for alt in alts
     }
 
 
-def get_primary_curie(curie: str) -> Optional[str]:
+def get_primary_curie(curie: str, *, version: Optional[str] = None) -> Optional[str]:
     """Get the primary curie for an entity."""
     prefix, identifier = normalize_curie(curie)
-    primary_identifier = get_primary_identifier(prefix, identifier)
+    primary_identifier = get_primary_identifier(prefix, identifier, version=version)
     if primary_identifier is not None:
         return f"{prefix}:{primary_identifier}"
     return None
 
 
 @wrap_norm_prefix
-def get_primary_identifier(prefix: str, identifier: str) -> str:
+def get_primary_identifier(prefix: str, identifier: str, *, version: Optional[str] = None) -> str:
     """Get the primary identifier for an entity.
 
     :param prefix: The name of the resource
@@ -82,7 +87,7 @@ def get_primary_identifier(prefix: str, identifier: str) -> str:
     if prefix in NO_ALTS:  # TODO later expand list to other namespaces with no alts
         return identifier
 
-    alts_to_id = get_alts_to_id(prefix)
+    alts_to_id = get_alts_to_id(prefix, version=version)
     if alts_to_id and identifier in alts_to_id:
         return alts_to_id[identifier]
     return identifier
diff --git a/src/pyobo/api/hierarchy.py b/src/pyobo/api/hierarchy.py
@@ -13,6 +13,7 @@
 from .relations import get_filtered_relations_df
 from ..identifier_utils import wrap_norm_prefix
 from ..struct import TypeDef, has_member, is_a, part_of
+from ..struct.reference import Reference
 
 __all__ = [
     "get_hierarchy",
@@ -24,7 +25,6 @@
     "get_children",
 ]
 
-from ..struct.reference import Reference
 
 logger = logging.getLogger(__name__)
 
@@ -154,14 +154,16 @@ def _get_hierarchy_helper(
     return rv
 
 
-def is_descendent(prefix, identifier, ancestor_prefix, ancestor_identifier) -> bool:
+def is_descendent(
+    prefix, identifier, ancestor_prefix, ancestor_identifier, *, version: Optional[str] = None
+) -> bool:
     """Check that the first identifier has the second as a descendent.
 
     Check that go:0070246 ! natural killer cell apoptotic process is a
     descendant of go:0006915 ! apoptotic process::
     >>> assert is_descendent('go', '0070246', 'go', '0006915')
     """
-    descendants = get_descendants(ancestor_prefix, ancestor_identifier)
+    descendants = get_descendants(ancestor_prefix, ancestor_identifier, version=version)
     return descendants is not None and f"{prefix}:{identifier}" in descendants
 
 
@@ -224,13 +226,15 @@ def get_children(
     return set(hierarchy.predecessors(curie))
 
 
-def has_ancestor(prefix, identifier, ancestor_prefix, ancestor_identifier) -> bool:
+def has_ancestor(
+    prefix, identifier, ancestor_prefix, ancestor_identifier, *, version: Optional[str] = None
+) -> bool:
     """Check that the first identifier has the second as an ancestor.
 
     Check that go:0008219 ! cell death is an ancestor of go:0006915 ! apoptotic process::
     >>> assert has_ancestor('go', '0006915', 'go', '0008219')
     """
-    ancestors = get_ancestors(prefix, identifier)
+    ancestors = get_ancestors(prefix, identifier, version=version)
     return ancestors is not None and f"{ancestor_prefix}:{ancestor_identifier}" in ancestors
 
 

diff --git a/src/pyobo/api/metadata.py b/src/pyobo/api/metadata.py
@@ -4,7 +4,7 @@
 
 import logging
 from functools import lru_cache
-from typing import Mapping
+from typing import Mapping, Optional
 
 from .utils import get_version
 from ..getters import get_ontology
@@ -21,9 +21,12 @@
 
 @lru_cache()
 @wrap_norm_prefix
-def get_metadata(prefix: str, force: bool = False) -> Mapping[str, str]:
+def get_metadata(
+    prefix: str, *, force: bool = False, version: Optional[str] = None
+) -> Mapping[str, str]:
     """Get metadata for the ontology."""
-    version = get_version(prefix)
+    if version is None:
+        version = get_version(prefix)
     path = prefix_cache_join(prefix, name="metadata.json", version=version)
 
     @cached_json(path=path, force=force)

diff --git a/src/pyobo/api/names.py b/src/pyobo/api/names.py
@@ -69,7 +69,7 @@ def _help_get(
             NO_BUILD_PREFIXES.add(prefix)
         return None
 
-    primary_id = get_primary_identifier(prefix, identifier)
+    primary_id = get_primary_identifier(prefix, identifier, version=version)
     return mapping.get(primary_id)
 
 
@@ -82,7 +82,7 @@ def get_name(prefix: str, identifier: str, *, version: Optional[str] = None) ->
 @lru_cache()
 @wrap_norm_prefix
 def get_ids(
-    prefix: str, force: bool = False, strict: bool = False, version: Optional[str] = None
+    prefix: str, *, force: bool = False, strict: bool = False, version: Optional[str] = None
 ) -> Set[str]:
     """Get the set of identifiers for this prefix."""
     if prefix == "ncbigene":
@@ -150,16 +150,18 @@ def _get_id_name_mapping() -> Mapping[str, str]:
 
 @lru_cache()
 @wrap_norm_prefix
-def get_name_id_mapping(prefix: str, force: bool = False) -> Mapping[str, str]:
+def get_name_id_mapping(
+    prefix: str, *, force: bool = False, version: Optional[str] = None
+) -> Mapping[str, str]:
     """Get a name to identifier mapping for the OBO file."""
-    id_name = get_id_name_mapping(prefix=prefix, force=force)
+    id_name = get_id_name_mapping(prefix=prefix, force=force, version=version)
     return {v: k for k, v in id_name.items()}
 
 
 @wrap_norm_prefix
-def get_definition(prefix: str, identifier: str) -> Optional[str]:
+def get_definition(prefix: str, identifier: str, *, version: Optional[str] = None) -> Optional[str]:
     """Get the definition for an entity."""
-    return _help_get(get_id_definition_mapping, prefix, identifier)
+    return _help_get(get_id_definition_mapping, prefix, identifier, version=version)
 
 
 def get_id_definition_mapping(

diff --git a/src/pyobo/api/relations.py b/src/pyobo/api/relations.py
@@ -48,9 +48,11 @@ def get_relations_df(
     force: bool = False,
     wide: bool = False,
     strict: bool = True,
+    version: Optional[str] = None,
 ) -> pd.DataFrame:
     """Get all relations from the OBO."""
-    version = get_version(prefix)
+    if version is None:
+        version = get_version(prefix)
     path = prefix_cache_join(prefix, name="relations.tsv", version=version)
 
     @cached_df(path=path, dtype=str, force=force)
@@ -118,9 +120,11 @@ def get_id_multirelations_mapping(
     *,
     use_tqdm: bool = False,
     force: bool = False,
+    version: Optional[str] = None,
 ) -> Mapping[str, List[Reference]]:
     """Get the OBO file and output a synonym dictionary."""
-    version = get_version(prefix)
+    if version is None:
+        version = get_version(prefix)
     ontology = get_ontology(prefix, force=force, version=version)
     return ontology.get_id_multirelations_mapping(typedef=typedef, use_tqdm=use_tqdm)
 
@@ -134,6 +138,7 @@ def get_relation_mapping(
     *,
     use_tqdm: bool = False,
     force: bool = False,
+    version: Optional[str] = None,
 ) -> Mapping[str, str]:
     """Get relations from identifiers in the source prefix to target prefix with the given relation.
 
@@ -147,7 +152,8 @@ def get_relation_mapping(
     >>> hgnc_mgi_orthology_mapping = pyobo.get_relation_mapping('hgnc', 'ro:HOM0000017', 'mgi')
     >>> assert mouse_mapt_mgi_id == hgnc_mgi_orthology_mapping[human_mapt_hgnc_id]
     """
-    version = get_version(prefix)
+    if version is None:
+        version = get_version(prefix)
     ontology = get_ontology(prefix, force=force, version=version)
     return ontology.get_relation_mapping(
         relation=relation, target_prefix=target_prefix, use_tqdm=use_tqdm
@@ -163,6 +169,7 @@ def get_relation(
     *,
     use_tqdm: bool = False,
     force: bool = False,
+    **kwargs,
 ) -> Optional[str]:
     """Get the target identifier corresponding to the given relationship from the source prefix/identifier pair.
 
@@ -181,6 +188,7 @@ def get_relation(
         target_prefix=target_prefix,
         use_tqdm=use_tqdm,
         force=force,
+        **kwargs,
     )
     return relation_mapping.get(source_identifier)
 

diff --git a/src/pyobo/api/species.py b/src/pyobo/api/species.py
@@ -22,13 +22,13 @@
 
 
 @wrap_norm_prefix
-def get_species(prefix: str, identifier: str) -> Optional[str]:
+def get_species(prefix: str, identifier: str, *, version: Optional[str] = None) -> Optional[str]:
     """Get the species."""
     if prefix == "uniprot":
         raise NotImplementedError
 
     try:
-        id_species = get_id_species_mapping(prefix)
+        id_species = get_id_species_mapping(prefix, version=version)
     except NoBuild:
         logger.warning("unable to look up species for prefix %s", prefix)
         return None
@@ -37,7 +37,7 @@ def get_species(prefix: str, identifier: str) -> Optional[str]:
         logger.warning("no results produced for prefix %s", prefix)
         return None
 
-    primary_id = get_primary_identifier(prefix, identifier)
+    primary_id = get_primary_identifier(prefix, identifier, version=version)
     return id_species.get(primary_id)
 
 

diff --git a/src/pyobo/api/typedefs.py b/src/pyobo/api/typedefs.py
@@ -4,6 +4,7 @@
 
 import logging
 from functools import lru_cache
+from typing import Optional
 
 import pandas as pd
 
@@ -22,9 +23,12 @@
 
 @lru_cache()
 @wrap_norm_prefix
-def get_typedef_df(prefix: str, force: bool = False) -> pd.DataFrame:
+def get_typedef_df(
+    prefix: str, *, force: bool = False, version: Optional[str] = None
+) -> pd.DataFrame:
     """Get an identifier to name mapping for the typedefs in an OBO file."""
-    version = get_version(prefix)
+    if version is None:
+        version = get_version(prefix)
     path = prefix_cache_join(prefix, name="typedefs.tsv", version=version)
 
     @cached_df(path=path, dtype=str, force=force)

diff --git a/src/pyobo/api/xrefs.py b/src/pyobo/api/xrefs.py
@@ -30,9 +30,16 @@
 
 
 @wrap_norm_prefix
-def get_xref(prefix: str, identifier: str, new_prefix: str, flip: bool = False) -> Optional[str]:
+def get_xref(
+    prefix: str,
+    identifier: str,
+    new_prefix: str,
+    *,
+    flip: bool = False,
+    version: Optional[str] = None,
+) -> Optional[str]:
     """Get the xref with the new prefix if a direct path exists."""
-    filtered_xrefs = get_filtered_xrefs(prefix, new_prefix, flip=flip)
+    filtered_xrefs = get_filtered_xrefs(prefix, new_prefix, flip=flip, version=version)
     return filtered_xrefs.get(identifier)
 
 
@@ -41,8 +48,8 @@ def get_xref(prefix: str, identifier: str, new_prefix: str, flip: bool = False)
 def get_filtered_xrefs(
     prefix: str,
     xref_prefix: str,
-    flip: bool = False,
     *,
+    flip: bool = False,
     use_tqdm: bool = False,
     force: bool = False,
     strict: bool = False,

diff --git a/src/pyobo/aws.py b/src/pyobo/aws.py
@@ -77,14 +77,19 @@ def upload_artifacts(
         upload_artifacts_for_prefix(prefix=prefix, bucket=bucket, s3_client=s3_client)
 
 
-def upload_artifacts_for_prefix(*, prefix: str, bucket: str, s3_client=None):
+def upload_artifacts_for_prefix(
+    *, prefix: str, bucket: str, s3_client=None, version: Optional[str] = None
+):
     """Upload compiled parts for the given prefix to AWS."""
     if s3_client is None:
         s3_client = boto3.client("s3")
 
+    if version is None:
+        version = get_version(prefix)
+
     logger.info("[%s] getting id->name mapping", prefix)
     get_id_name_mapping(prefix)
-    id_name_path = prefix_cache_join(prefix, name="names.tsv", version=get_version(prefix))
+    id_name_path = prefix_cache_join(prefix, name="names.tsv", version=version)
     if not id_name_path.exists():
         raise FileNotFoundError
     id_name_key = os.path.join(prefix, "cache", "names.tsv")
@@ -93,7 +98,7 @@ def upload_artifacts_for_prefix(*, prefix: str, bucket: str, s3_client=None):
 
     logger.info("[%s] getting id->synonyms mapping", prefix)
     get_id_synonyms_mapping(prefix)
-    id_synonyms_path = prefix_cache_join(prefix, name="synonyms.tsv", version=get_version(prefix))
+    id_synonyms_path = prefix_cache_join(prefix, name="synonyms.tsv", version=version)
     if not id_synonyms_path.exists():
         raise FileNotFoundError
     id_synonyms_key = os.path.join(prefix, "cache", "synonyms.tsv")
@@ -102,7 +107,7 @@ def upload_artifacts_for_prefix(*, prefix: str, bucket: str, s3_client=None):
 
     logger.info("[%s] getting xrefs", prefix)
     get_xrefs_df(prefix)
-    xrefs_path = prefix_cache_join(prefix, name="xrefs.tsv", version=get_version(prefix))
+    xrefs_path = prefix_cache_join(prefix, name="xrefs.tsv", version=version)
     if not xrefs_path.exists():
         raise FileNotFoundError
     xrefs_key = os.path.join(prefix, "cache", "xrefs.tsv")
@@ -111,7 +116,7 @@ def upload_artifacts_for_prefix(*, prefix: str, bucket: str, s3_client=None):
 
     logger.info("[%s] getting relations", prefix)
     get_relations_df(prefix)
-    relations_path = prefix_cache_join(prefix, name="relations.tsv", version=get_version(prefix))
+    relations_path = prefix_cache_join(prefix, name="relations.tsv", version=version)
     if not relations_path.exists():
         raise FileNotFoundError
     relations_key = os.path.join(prefix, "cache", "relations.tsv")
@@ -120,7 +125,7 @@ def upload_artifacts_for_prefix(*, prefix: str, bucket: str, s3_client=None):
 
     logger.info("[%s] getting properties", prefix)
     get_properties_df(prefix)
-    properties_path = prefix_cache_join(prefix, name="properties.tsv", version=get_version(prefix))
+    properties_path = prefix_cache_join(prefix, name="properties.tsv", version=version)
     if not properties_path.exists():
         raise FileNotFoundError
     properties_key = os.path.join(prefix, "cache", "properties.tsv")
@@ -129,7 +134,7 @@ def upload_artifacts_for_prefix(*, prefix: str, bucket: str, s3_client=None):
 
     logger.info("[%s] getting alternative identifiers", prefix)
     get_id_to_alts(prefix)
-    alts_path = prefix_cache_join(prefix, name="alt_ids.tsv", version=get_version(prefix))
+    alts_path = prefix_cache_join(prefix, name="alt_ids.tsv", version=version)
     if not alts_path.exists():
         raise FileNotFoundError
     alts_key = os.path.join(prefix, "cache", "alt_ids.tsv")

diff --git a/src/pyobo/cli/lookup.py b/src/pyobo/cli/lookup.py
@@ -76,9 +76,10 @@ def xrefs(prefix: str, target: str, force: bool, no_strict: bool, version: Optio
 @prefix_argument
 @verbose_option
 @force_option
-def metadata(prefix: str, force: bool):
+@version_option
+def metadata(prefix: str, force: bool, version: Optional[str]):
     """Print the metadata for the given namespace."""
-    metadata = get_metadata(prefix, force=force)
+    metadata = get_metadata(prefix, force=force, version=version)
     click.echo(json.dumps(metadata, indent=2))