Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add version keyword to all lookup functions #184

Merged
merged 2 commits into from
Apr 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 13 additions & 8 deletions src/pyobo/api/alts.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,15 @@

@lru_cache()
@wrap_norm_prefix
def get_id_to_alts(prefix: str, force: bool = False) -> Mapping[str, List[str]]:
def get_id_to_alts(
prefix: str, *, force: bool = False, version: Optional[str] = None
) -> Mapping[str, List[str]]:
"""Get alternate identifiers."""
if prefix in NO_ALTS:
return {}

version = get_version(prefix)
if version is None:
version = get_version(prefix)
path = prefix_cache_join(prefix, name="alt_ids.tsv", version=version)
header = [f"{prefix}_id", "alt_id"]

Expand All @@ -51,26 +54,28 @@ def _get_mapping() -> Mapping[str, List[str]]:

@lru_cache()
@wrap_norm_prefix
def get_alts_to_id(prefix: str, force: bool = False) -> Mapping[str, str]:
def get_alts_to_id(
prefix: str, *, force: bool = False, version: Optional[str] = None
) -> Mapping[str, str]:
"""Get alternative id to primary id mapping."""
return {
alt: primary
for primary, alts in get_id_to_alts(prefix, force=force).items()
for primary, alts in get_id_to_alts(prefix, force=force, version=version).items()
for alt in alts
}


def get_primary_curie(curie: str) -> Optional[str]:
def get_primary_curie(curie: str, *, version: Optional[str] = None) -> Optional[str]:
"""Get the primary curie for an entity."""
prefix, identifier = normalize_curie(curie)
primary_identifier = get_primary_identifier(prefix, identifier)
primary_identifier = get_primary_identifier(prefix, identifier, version=version)
if primary_identifier is not None:
return f"{prefix}:{primary_identifier}"
return None


@wrap_norm_prefix
def get_primary_identifier(prefix: str, identifier: str) -> str:
def get_primary_identifier(prefix: str, identifier: str, *, version: Optional[str] = None) -> str:
"""Get the primary identifier for an entity.
:param prefix: The name of the resource
Expand All @@ -82,7 +87,7 @@ def get_primary_identifier(prefix: str, identifier: str) -> str:
if prefix in NO_ALTS: # TODO later expand list to other namespaces with no alts
return identifier

alts_to_id = get_alts_to_id(prefix)
alts_to_id = get_alts_to_id(prefix, version=version)
if alts_to_id and identifier in alts_to_id:
return alts_to_id[identifier]
return identifier
14 changes: 9 additions & 5 deletions src/pyobo/api/hierarchy.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from .relations import get_filtered_relations_df
from ..identifier_utils import wrap_norm_prefix
from ..struct import TypeDef, has_member, is_a, part_of
from ..struct.reference import Reference

__all__ = [
"get_hierarchy",
Expand All @@ -24,7 +25,6 @@
"get_children",
]

from ..struct.reference import Reference

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -154,14 +154,16 @@ def _get_hierarchy_helper(
return rv


def is_descendent(prefix, identifier, ancestor_prefix, ancestor_identifier) -> bool:
def is_descendent(
prefix, identifier, ancestor_prefix, ancestor_identifier, *, version: Optional[str] = None
) -> bool:
"""Check that the first identifier has the second as a descendent.
Check that go:0070246 ! natural killer cell apoptotic process is a
descendant of go:0006915 ! apoptotic process::
>>> assert is_descendent('go', '0070246', 'go', '0006915')
"""
descendants = get_descendants(ancestor_prefix, ancestor_identifier)
descendants = get_descendants(ancestor_prefix, ancestor_identifier, version=version)
return descendants is not None and f"{prefix}:{identifier}" in descendants


Expand Down Expand Up @@ -224,13 +226,15 @@ def get_children(
return set(hierarchy.predecessors(curie))


def has_ancestor(prefix, identifier, ancestor_prefix, ancestor_identifier) -> bool:
def has_ancestor(
prefix, identifier, ancestor_prefix, ancestor_identifier, *, version: Optional[str] = None
) -> bool:
"""Check that the first identifier has the second as an ancestor.
Check that go:0008219 ! cell death is an ancestor of go:0006915 ! apoptotic process::
>>> assert has_ancestor('go', '0006915', 'go', '0008219')
"""
ancestors = get_ancestors(prefix, identifier)
ancestors = get_ancestors(prefix, identifier, version=version)
return ancestors is not None and f"{ancestor_prefix}:{ancestor_identifier}" in ancestors


Expand Down
9 changes: 6 additions & 3 deletions src/pyobo/api/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import logging
from functools import lru_cache
from typing import Mapping
from typing import Mapping, Optional

from .utils import get_version
from ..getters import get_ontology
Expand All @@ -21,9 +21,12 @@

@lru_cache()
@wrap_norm_prefix
def get_metadata(prefix: str, force: bool = False) -> Mapping[str, str]:
def get_metadata(
prefix: str, *, force: bool = False, version: Optional[str] = None
) -> Mapping[str, str]:
"""Get metadata for the ontology."""
version = get_version(prefix)
if version is None:
version = get_version(prefix)
path = prefix_cache_join(prefix, name="metadata.json", version=version)

@cached_json(path=path, force=force)
Expand Down
14 changes: 8 additions & 6 deletions src/pyobo/api/names.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def _help_get(
NO_BUILD_PREFIXES.add(prefix)
return None

primary_id = get_primary_identifier(prefix, identifier)
primary_id = get_primary_identifier(prefix, identifier, version=version)
return mapping.get(primary_id)


Expand All @@ -82,7 +82,7 @@ def get_name(prefix: str, identifier: str, *, version: Optional[str] = None) ->
@lru_cache()
@wrap_norm_prefix
def get_ids(
prefix: str, force: bool = False, strict: bool = False, version: Optional[str] = None
prefix: str, *, force: bool = False, strict: bool = False, version: Optional[str] = None
) -> Set[str]:
"""Get the set of identifiers for this prefix."""
if prefix == "ncbigene":
Expand Down Expand Up @@ -150,16 +150,18 @@ def _get_id_name_mapping() -> Mapping[str, str]:

@lru_cache()
@wrap_norm_prefix
def get_name_id_mapping(prefix: str, force: bool = False) -> Mapping[str, str]:
def get_name_id_mapping(
prefix: str, *, force: bool = False, version: Optional[str] = None
) -> Mapping[str, str]:
"""Get a name to identifier mapping for the OBO file."""
id_name = get_id_name_mapping(prefix=prefix, force=force)
id_name = get_id_name_mapping(prefix=prefix, force=force, version=version)
return {v: k for k, v in id_name.items()}


@wrap_norm_prefix
def get_definition(prefix: str, identifier: str) -> Optional[str]:
def get_definition(prefix: str, identifier: str, *, version: Optional[str] = None) -> Optional[str]:
"""Get the definition for an entity."""
return _help_get(get_id_definition_mapping, prefix, identifier)
return _help_get(get_id_definition_mapping, prefix, identifier, version=version)


def get_id_definition_mapping(
Expand Down
14 changes: 11 additions & 3 deletions src/pyobo/api/relations.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,11 @@ def get_relations_df(
force: bool = False,
wide: bool = False,
strict: bool = True,
version: Optional[str] = None,
) -> pd.DataFrame:
"""Get all relations from the OBO."""
version = get_version(prefix)
if version is None:
version = get_version(prefix)
path = prefix_cache_join(prefix, name="relations.tsv", version=version)

@cached_df(path=path, dtype=str, force=force)
Expand Down Expand Up @@ -118,9 +120,11 @@ def get_id_multirelations_mapping(
*,
use_tqdm: bool = False,
force: bool = False,
version: Optional[str] = None,
) -> Mapping[str, List[Reference]]:
"""Get the OBO file and output a synonym dictionary."""
version = get_version(prefix)
if version is None:
version = get_version(prefix)
ontology = get_ontology(prefix, force=force, version=version)
return ontology.get_id_multirelations_mapping(typedef=typedef, use_tqdm=use_tqdm)

Expand All @@ -134,6 +138,7 @@ def get_relation_mapping(
*,
use_tqdm: bool = False,
force: bool = False,
version: Optional[str] = None,
) -> Mapping[str, str]:
"""Get relations from identifiers in the source prefix to target prefix with the given relation.
Expand All @@ -147,7 +152,8 @@ def get_relation_mapping(
>>> hgnc_mgi_orthology_mapping = pyobo.get_relation_mapping('hgnc', 'ro:HOM0000017', 'mgi')
>>> assert mouse_mapt_mgi_id == hgnc_mgi_orthology_mapping[human_mapt_hgnc_id]
"""
version = get_version(prefix)
if version is None:
version = get_version(prefix)
ontology = get_ontology(prefix, force=force, version=version)
return ontology.get_relation_mapping(
relation=relation, target_prefix=target_prefix, use_tqdm=use_tqdm
Expand All @@ -163,6 +169,7 @@ def get_relation(
*,
use_tqdm: bool = False,
force: bool = False,
**kwargs,
) -> Optional[str]:
"""Get the target identifier corresponding to the given relationship from the source prefix/identifier pair.
Expand All @@ -181,6 +188,7 @@ def get_relation(
target_prefix=target_prefix,
use_tqdm=use_tqdm,
force=force,
**kwargs,
)
return relation_mapping.get(source_identifier)

Expand Down
6 changes: 3 additions & 3 deletions src/pyobo/api/species.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,13 @@


@wrap_norm_prefix
def get_species(prefix: str, identifier: str) -> Optional[str]:
def get_species(prefix: str, identifier: str, *, version: Optional[str] = None) -> Optional[str]:
"""Get the species."""
if prefix == "uniprot":
raise NotImplementedError

try:
id_species = get_id_species_mapping(prefix)
id_species = get_id_species_mapping(prefix, version=version)
except NoBuild:
logger.warning("unable to look up species for prefix %s", prefix)
return None
Expand All @@ -37,7 +37,7 @@ def get_species(prefix: str, identifier: str) -> Optional[str]:
logger.warning("no results produced for prefix %s", prefix)
return None

primary_id = get_primary_identifier(prefix, identifier)
primary_id = get_primary_identifier(prefix, identifier, version=version)
return id_species.get(primary_id)


Expand Down
8 changes: 6 additions & 2 deletions src/pyobo/api/typedefs.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import logging
from functools import lru_cache
from typing import Optional

import pandas as pd

Expand All @@ -22,9 +23,12 @@

@lru_cache()
@wrap_norm_prefix
def get_typedef_df(prefix: str, force: bool = False) -> pd.DataFrame:
def get_typedef_df(
prefix: str, *, force: bool = False, version: Optional[str] = None
) -> pd.DataFrame:
"""Get an identifier to name mapping for the typedefs in an OBO file."""
version = get_version(prefix)
if version is None:
version = get_version(prefix)
path = prefix_cache_join(prefix, name="typedefs.tsv", version=version)

@cached_df(path=path, dtype=str, force=force)
Expand Down
13 changes: 10 additions & 3 deletions src/pyobo/api/xrefs.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,16 @@


@wrap_norm_prefix
def get_xref(prefix: str, identifier: str, new_prefix: str, flip: bool = False) -> Optional[str]:
def get_xref(
prefix: str,
identifier: str,
new_prefix: str,
*,
flip: bool = False,
version: Optional[str] = None,
) -> Optional[str]:
"""Get the xref with the new prefix if a direct path exists."""
filtered_xrefs = get_filtered_xrefs(prefix, new_prefix, flip=flip)
filtered_xrefs = get_filtered_xrefs(prefix, new_prefix, flip=flip, version=version)
return filtered_xrefs.get(identifier)


Expand All @@ -41,8 +48,8 @@ def get_xref(prefix: str, identifier: str, new_prefix: str, flip: bool = False)
def get_filtered_xrefs(
prefix: str,
xref_prefix: str,
flip: bool = False,
*,
flip: bool = False,
use_tqdm: bool = False,
force: bool = False,
strict: bool = False,
Expand Down
19 changes: 12 additions & 7 deletions src/pyobo/aws.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,14 +77,19 @@ def upload_artifacts(
upload_artifacts_for_prefix(prefix=prefix, bucket=bucket, s3_client=s3_client)


def upload_artifacts_for_prefix(*, prefix: str, bucket: str, s3_client=None):
def upload_artifacts_for_prefix(
*, prefix: str, bucket: str, s3_client=None, version: Optional[str] = None
):
"""Upload compiled parts for the given prefix to AWS."""
if s3_client is None:
s3_client = boto3.client("s3")

if version is None:
version = get_version(prefix)

logger.info("[%s] getting id->name mapping", prefix)
get_id_name_mapping(prefix)
id_name_path = prefix_cache_join(prefix, name="names.tsv", version=get_version(prefix))
id_name_path = prefix_cache_join(prefix, name="names.tsv", version=version)
if not id_name_path.exists():
raise FileNotFoundError
id_name_key = os.path.join(prefix, "cache", "names.tsv")
Expand All @@ -93,7 +98,7 @@ def upload_artifacts_for_prefix(*, prefix: str, bucket: str, s3_client=None):

logger.info("[%s] getting id->synonyms mapping", prefix)
get_id_synonyms_mapping(prefix)
id_synonyms_path = prefix_cache_join(prefix, name="synonyms.tsv", version=get_version(prefix))
id_synonyms_path = prefix_cache_join(prefix, name="synonyms.tsv", version=version)
if not id_synonyms_path.exists():
raise FileNotFoundError
id_synonyms_key = os.path.join(prefix, "cache", "synonyms.tsv")
Expand All @@ -102,7 +107,7 @@ def upload_artifacts_for_prefix(*, prefix: str, bucket: str, s3_client=None):

logger.info("[%s] getting xrefs", prefix)
get_xrefs_df(prefix)
xrefs_path = prefix_cache_join(prefix, name="xrefs.tsv", version=get_version(prefix))
xrefs_path = prefix_cache_join(prefix, name="xrefs.tsv", version=version)
if not xrefs_path.exists():
raise FileNotFoundError
xrefs_key = os.path.join(prefix, "cache", "xrefs.tsv")
Expand All @@ -111,7 +116,7 @@ def upload_artifacts_for_prefix(*, prefix: str, bucket: str, s3_client=None):

logger.info("[%s] getting relations", prefix)
get_relations_df(prefix)
relations_path = prefix_cache_join(prefix, name="relations.tsv", version=get_version(prefix))
relations_path = prefix_cache_join(prefix, name="relations.tsv", version=version)
if not relations_path.exists():
raise FileNotFoundError
relations_key = os.path.join(prefix, "cache", "relations.tsv")
Expand All @@ -120,7 +125,7 @@ def upload_artifacts_for_prefix(*, prefix: str, bucket: str, s3_client=None):

logger.info("[%s] getting properties", prefix)
get_properties_df(prefix)
properties_path = prefix_cache_join(prefix, name="properties.tsv", version=get_version(prefix))
properties_path = prefix_cache_join(prefix, name="properties.tsv", version=version)
if not properties_path.exists():
raise FileNotFoundError
properties_key = os.path.join(prefix, "cache", "properties.tsv")
Expand All @@ -129,7 +134,7 @@ def upload_artifacts_for_prefix(*, prefix: str, bucket: str, s3_client=None):

logger.info("[%s] getting alternative identifiers", prefix)
get_id_to_alts(prefix)
alts_path = prefix_cache_join(prefix, name="alt_ids.tsv", version=get_version(prefix))
alts_path = prefix_cache_join(prefix, name="alt_ids.tsv", version=version)
if not alts_path.exists():
raise FileNotFoundError
alts_key = os.path.join(prefix, "cache", "alt_ids.tsv")
Expand Down
5 changes: 3 additions & 2 deletions src/pyobo/cli/lookup.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,10 @@ def xrefs(prefix: str, target: str, force: bool, no_strict: bool, version: Optio
@prefix_argument
@verbose_option
@force_option
def metadata(prefix: str, force: bool):
@version_option
def metadata(prefix: str, force: bool, version: Optional[str]):
"""Print the metadata for the given namespace."""
metadata = get_metadata(prefix, force=force)
metadata = get_metadata(prefix, force=force, version=version)
click.echo(json.dumps(metadata, indent=2))


Expand Down
Loading
Loading