diff --git a/src/pyobo/api/alts.py b/src/pyobo/api/alts.py index 512e21e8..6122c7cf 100644 --- a/src/pyobo/api/alts.py +++ b/src/pyobo/api/alts.py @@ -28,12 +28,15 @@ @lru_cache() @wrap_norm_prefix -def get_id_to_alts(prefix: str, force: bool = False) -> Mapping[str, List[str]]: +def get_id_to_alts( + prefix: str, *, force: bool = False, version: Optional[str] = None +) -> Mapping[str, List[str]]: """Get alternate identifiers.""" if prefix in NO_ALTS: return {} - version = get_version(prefix) + if version is None: + version = get_version(prefix) path = prefix_cache_join(prefix, name="alt_ids.tsv", version=version) header = [f"{prefix}_id", "alt_id"] @@ -51,26 +54,28 @@ def _get_mapping() -> Mapping[str, List[str]]: @lru_cache() @wrap_norm_prefix -def get_alts_to_id(prefix: str, force: bool = False) -> Mapping[str, str]: +def get_alts_to_id( + prefix: str, *, force: bool = False, version: Optional[str] = None +) -> Mapping[str, str]: """Get alternative id to primary id mapping.""" return { alt: primary - for primary, alts in get_id_to_alts(prefix, force=force).items() + for primary, alts in get_id_to_alts(prefix, force=force, version=version).items() for alt in alts } -def get_primary_curie(curie: str) -> Optional[str]: +def get_primary_curie(curie: str, *, version: Optional[str] = None) -> Optional[str]: """Get the primary curie for an entity.""" prefix, identifier = normalize_curie(curie) - primary_identifier = get_primary_identifier(prefix, identifier) + primary_identifier = get_primary_identifier(prefix, identifier, version=version) if primary_identifier is not None: return f"{prefix}:{primary_identifier}" return None @wrap_norm_prefix -def get_primary_identifier(prefix: str, identifier: str) -> str: +def get_primary_identifier(prefix: str, identifier: str, *, version: Optional[str] = None) -> str: """Get the primary identifier for an entity. :param prefix: The name of the resource @@ -82,7 +87,7 @@ def get_primary_identifier(prefix: str, identifier: str) -> str: if prefix in NO_ALTS: # TODO later expand list to other namespaces with no alts return identifier - alts_to_id = get_alts_to_id(prefix) + alts_to_id = get_alts_to_id(prefix, version=version) if alts_to_id and identifier in alts_to_id: return alts_to_id[identifier] return identifier diff --git a/src/pyobo/api/hierarchy.py b/src/pyobo/api/hierarchy.py index 8f7bd57f..095cfbec 100644 --- a/src/pyobo/api/hierarchy.py +++ b/src/pyobo/api/hierarchy.py @@ -13,6 +13,7 @@ from .relations import get_filtered_relations_df from ..identifier_utils import wrap_norm_prefix from ..struct import TypeDef, has_member, is_a, part_of +from ..struct.reference import Reference __all__ = [ "get_hierarchy", @@ -24,7 +25,6 @@ "get_children", ] -from ..struct.reference import Reference logger = logging.getLogger(__name__) @@ -154,14 +154,16 @@ def _get_hierarchy_helper( return rv -def is_descendent(prefix, identifier, ancestor_prefix, ancestor_identifier) -> bool: +def is_descendent( + prefix, identifier, ancestor_prefix, ancestor_identifier, *, version: Optional[str] = None +) -> bool: """Check that the first identifier has the second as a descendent. Check that go:0070246 ! natural killer cell apoptotic process is a descendant of go:0006915 ! apoptotic process:: >>> assert is_descendent('go', '0070246', 'go', '0006915') """ - descendants = get_descendants(ancestor_prefix, ancestor_identifier) + descendants = get_descendants(ancestor_prefix, ancestor_identifier, version=version) return descendants is not None and f"{prefix}:{identifier}" in descendants @@ -224,13 +226,15 @@ def get_children( return set(hierarchy.predecessors(curie)) -def has_ancestor(prefix, identifier, ancestor_prefix, ancestor_identifier) -> bool: +def has_ancestor( + prefix, identifier, ancestor_prefix, ancestor_identifier, *, version: Optional[str] = None +) -> bool: """Check that the first identifier has the second as an ancestor. Check that go:0008219 ! cell death is an ancestor of go:0006915 ! apoptotic process:: >>> assert has_ancestor('go', '0006915', 'go', '0008219') """ - ancestors = get_ancestors(prefix, identifier) + ancestors = get_ancestors(prefix, identifier, version=version) return ancestors is not None and f"{ancestor_prefix}:{ancestor_identifier}" in ancestors diff --git a/src/pyobo/api/metadata.py b/src/pyobo/api/metadata.py index d31c1618..641f0b02 100644 --- a/src/pyobo/api/metadata.py +++ b/src/pyobo/api/metadata.py @@ -4,7 +4,7 @@ import logging from functools import lru_cache -from typing import Mapping +from typing import Mapping, Optional from .utils import get_version from ..getters import get_ontology @@ -21,9 +21,12 @@ @lru_cache() @wrap_norm_prefix -def get_metadata(prefix: str, force: bool = False) -> Mapping[str, str]: +def get_metadata( + prefix: str, *, force: bool = False, version: Optional[str] = None +) -> Mapping[str, str]: """Get metadata for the ontology.""" - version = get_version(prefix) + if version is None: + version = get_version(prefix) path = prefix_cache_join(prefix, name="metadata.json", version=version) @cached_json(path=path, force=force) diff --git a/src/pyobo/api/names.py b/src/pyobo/api/names.py index 2233dca1..4c268a77 100644 --- a/src/pyobo/api/names.py +++ b/src/pyobo/api/names.py @@ -69,7 +69,7 @@ def _help_get( NO_BUILD_PREFIXES.add(prefix) return None - primary_id = get_primary_identifier(prefix, identifier) + primary_id = get_primary_identifier(prefix, identifier, version=version) return mapping.get(primary_id) @@ -82,7 +82,7 @@ def get_name(prefix: str, identifier: str, *, version: Optional[str] = None) -> @lru_cache() @wrap_norm_prefix def get_ids( - prefix: str, force: bool = False, strict: bool = False, version: Optional[str] = None + prefix: str, *, force: bool = False, strict: bool = False, version: Optional[str] = None ) -> Set[str]: """Get the set of identifiers for this prefix.""" if prefix == "ncbigene": @@ -150,16 +150,18 @@ def _get_id_name_mapping() -> Mapping[str, str]: @lru_cache() @wrap_norm_prefix -def get_name_id_mapping(prefix: str, force: bool = False) -> Mapping[str, str]: +def get_name_id_mapping( + prefix: str, *, force: bool = False, version: Optional[str] = None +) -> Mapping[str, str]: """Get a name to identifier mapping for the OBO file.""" - id_name = get_id_name_mapping(prefix=prefix, force=force) + id_name = get_id_name_mapping(prefix=prefix, force=force, version=version) return {v: k for k, v in id_name.items()} @wrap_norm_prefix -def get_definition(prefix: str, identifier: str) -> Optional[str]: +def get_definition(prefix: str, identifier: str, *, version: Optional[str] = None) -> Optional[str]: """Get the definition for an entity.""" - return _help_get(get_id_definition_mapping, prefix, identifier) + return _help_get(get_id_definition_mapping, prefix, identifier, version=version) def get_id_definition_mapping( diff --git a/src/pyobo/api/relations.py b/src/pyobo/api/relations.py index 1932592d..8472748e 100644 --- a/src/pyobo/api/relations.py +++ b/src/pyobo/api/relations.py @@ -48,9 +48,11 @@ def get_relations_df( force: bool = False, wide: bool = False, strict: bool = True, + version: Optional[str] = None, ) -> pd.DataFrame: """Get all relations from the OBO.""" - version = get_version(prefix) + if version is None: + version = get_version(prefix) path = prefix_cache_join(prefix, name="relations.tsv", version=version) @cached_df(path=path, dtype=str, force=force) @@ -118,9 +120,11 @@ def get_id_multirelations_mapping( *, use_tqdm: bool = False, force: bool = False, + version: Optional[str] = None, ) -> Mapping[str, List[Reference]]: """Get the OBO file and output a synonym dictionary.""" - version = get_version(prefix) + if version is None: + version = get_version(prefix) ontology = get_ontology(prefix, force=force, version=version) return ontology.get_id_multirelations_mapping(typedef=typedef, use_tqdm=use_tqdm) @@ -134,6 +138,7 @@ def get_relation_mapping( *, use_tqdm: bool = False, force: bool = False, + version: Optional[str] = None, ) -> Mapping[str, str]: """Get relations from identifiers in the source prefix to target prefix with the given relation. @@ -147,7 +152,8 @@ def get_relation_mapping( >>> hgnc_mgi_orthology_mapping = pyobo.get_relation_mapping('hgnc', 'ro:HOM0000017', 'mgi') >>> assert mouse_mapt_mgi_id == hgnc_mgi_orthology_mapping[human_mapt_hgnc_id] """ - version = get_version(prefix) + if version is None: + version = get_version(prefix) ontology = get_ontology(prefix, force=force, version=version) return ontology.get_relation_mapping( relation=relation, target_prefix=target_prefix, use_tqdm=use_tqdm @@ -163,6 +169,7 @@ def get_relation( *, use_tqdm: bool = False, force: bool = False, + **kwargs, ) -> Optional[str]: """Get the target identifier corresponding to the given relationship from the source prefix/identifier pair. @@ -181,6 +188,7 @@ def get_relation( target_prefix=target_prefix, use_tqdm=use_tqdm, force=force, + **kwargs, ) return relation_mapping.get(source_identifier) diff --git a/src/pyobo/api/species.py b/src/pyobo/api/species.py index e0ffbf1d..5c2d2222 100644 --- a/src/pyobo/api/species.py +++ b/src/pyobo/api/species.py @@ -22,13 +22,13 @@ @wrap_norm_prefix -def get_species(prefix: str, identifier: str) -> Optional[str]: +def get_species(prefix: str, identifier: str, *, version: Optional[str] = None) -> Optional[str]: """Get the species.""" if prefix == "uniprot": raise NotImplementedError try: - id_species = get_id_species_mapping(prefix) + id_species = get_id_species_mapping(prefix, version=version) except NoBuild: logger.warning("unable to look up species for prefix %s", prefix) return None @@ -37,7 +37,7 @@ def get_species(prefix: str, identifier: str) -> Optional[str]: logger.warning("no results produced for prefix %s", prefix) return None - primary_id = get_primary_identifier(prefix, identifier) + primary_id = get_primary_identifier(prefix, identifier, version=version) return id_species.get(primary_id) diff --git a/src/pyobo/api/typedefs.py b/src/pyobo/api/typedefs.py index 39421ff2..6d390672 100644 --- a/src/pyobo/api/typedefs.py +++ b/src/pyobo/api/typedefs.py @@ -4,6 +4,7 @@ import logging from functools import lru_cache +from typing import Optional import pandas as pd @@ -22,9 +23,12 @@ @lru_cache() @wrap_norm_prefix -def get_typedef_df(prefix: str, force: bool = False) -> pd.DataFrame: +def get_typedef_df( + prefix: str, *, force: bool = False, version: Optional[str] = None +) -> pd.DataFrame: """Get an identifier to name mapping for the typedefs in an OBO file.""" - version = get_version(prefix) + if version is None: + version = get_version(prefix) path = prefix_cache_join(prefix, name="typedefs.tsv", version=version) @cached_df(path=path, dtype=str, force=force) diff --git a/src/pyobo/api/xrefs.py b/src/pyobo/api/xrefs.py index e2cb2bec..9bfcd2e0 100644 --- a/src/pyobo/api/xrefs.py +++ b/src/pyobo/api/xrefs.py @@ -30,9 +30,16 @@ @wrap_norm_prefix -def get_xref(prefix: str, identifier: str, new_prefix: str, flip: bool = False) -> Optional[str]: +def get_xref( + prefix: str, + identifier: str, + new_prefix: str, + *, + flip: bool = False, + version: Optional[str] = None, +) -> Optional[str]: """Get the xref with the new prefix if a direct path exists.""" - filtered_xrefs = get_filtered_xrefs(prefix, new_prefix, flip=flip) + filtered_xrefs = get_filtered_xrefs(prefix, new_prefix, flip=flip, version=version) return filtered_xrefs.get(identifier) @@ -41,8 +48,8 @@ def get_xref(prefix: str, identifier: str, new_prefix: str, flip: bool = False) def get_filtered_xrefs( prefix: str, xref_prefix: str, - flip: bool = False, *, + flip: bool = False, use_tqdm: bool = False, force: bool = False, strict: bool = False, diff --git a/src/pyobo/aws.py b/src/pyobo/aws.py index d552782c..ac9471fc 100644 --- a/src/pyobo/aws.py +++ b/src/pyobo/aws.py @@ -77,14 +77,19 @@ def upload_artifacts( upload_artifacts_for_prefix(prefix=prefix, bucket=bucket, s3_client=s3_client) -def upload_artifacts_for_prefix(*, prefix: str, bucket: str, s3_client=None): +def upload_artifacts_for_prefix( + *, prefix: str, bucket: str, s3_client=None, version: Optional[str] = None +): """Upload compiled parts for the given prefix to AWS.""" if s3_client is None: s3_client = boto3.client("s3") + if version is None: + version = get_version(prefix) + logger.info("[%s] getting id->name mapping", prefix) get_id_name_mapping(prefix) - id_name_path = prefix_cache_join(prefix, name="names.tsv", version=get_version(prefix)) + id_name_path = prefix_cache_join(prefix, name="names.tsv", version=version) if not id_name_path.exists(): raise FileNotFoundError id_name_key = os.path.join(prefix, "cache", "names.tsv") @@ -93,7 +98,7 @@ def upload_artifacts_for_prefix(*, prefix: str, bucket: str, s3_client=None): logger.info("[%s] getting id->synonyms mapping", prefix) get_id_synonyms_mapping(prefix) - id_synonyms_path = prefix_cache_join(prefix, name="synonyms.tsv", version=get_version(prefix)) + id_synonyms_path = prefix_cache_join(prefix, name="synonyms.tsv", version=version) if not id_synonyms_path.exists(): raise FileNotFoundError id_synonyms_key = os.path.join(prefix, "cache", "synonyms.tsv") @@ -102,7 +107,7 @@ def upload_artifacts_for_prefix(*, prefix: str, bucket: str, s3_client=None): logger.info("[%s] getting xrefs", prefix) get_xrefs_df(prefix) - xrefs_path = prefix_cache_join(prefix, name="xrefs.tsv", version=get_version(prefix)) + xrefs_path = prefix_cache_join(prefix, name="xrefs.tsv", version=version) if not xrefs_path.exists(): raise FileNotFoundError xrefs_key = os.path.join(prefix, "cache", "xrefs.tsv") @@ -111,7 +116,7 @@ def upload_artifacts_for_prefix(*, prefix: str, bucket: str, s3_client=None): logger.info("[%s] getting relations", prefix) get_relations_df(prefix) - relations_path = prefix_cache_join(prefix, name="relations.tsv", version=get_version(prefix)) + relations_path = prefix_cache_join(prefix, name="relations.tsv", version=version) if not relations_path.exists(): raise FileNotFoundError relations_key = os.path.join(prefix, "cache", "relations.tsv") @@ -120,7 +125,7 @@ def upload_artifacts_for_prefix(*, prefix: str, bucket: str, s3_client=None): logger.info("[%s] getting properties", prefix) get_properties_df(prefix) - properties_path = prefix_cache_join(prefix, name="properties.tsv", version=get_version(prefix)) + properties_path = prefix_cache_join(prefix, name="properties.tsv", version=version) if not properties_path.exists(): raise FileNotFoundError properties_key = os.path.join(prefix, "cache", "properties.tsv") @@ -129,7 +134,7 @@ def upload_artifacts_for_prefix(*, prefix: str, bucket: str, s3_client=None): logger.info("[%s] getting alternative identifiers", prefix) get_id_to_alts(prefix) - alts_path = prefix_cache_join(prefix, name="alt_ids.tsv", version=get_version(prefix)) + alts_path = prefix_cache_join(prefix, name="alt_ids.tsv", version=version) if not alts_path.exists(): raise FileNotFoundError alts_key = os.path.join(prefix, "cache", "alt_ids.tsv") diff --git a/src/pyobo/cli/lookup.py b/src/pyobo/cli/lookup.py index 876300cf..7d4746f0 100644 --- a/src/pyobo/cli/lookup.py +++ b/src/pyobo/cli/lookup.py @@ -76,9 +76,10 @@ def xrefs(prefix: str, target: str, force: bool, no_strict: bool, version: Optio @prefix_argument @verbose_option @force_option -def metadata(prefix: str, force: bool): +@version_option +def metadata(prefix: str, force: bool, version: Optional[str]): """Print the metadata for the given namespace.""" - metadata = get_metadata(prefix, force=force) + metadata = get_metadata(prefix, force=force, version=version) click.echo(json.dumps(metadata, indent=2))