Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Calculate ortholog relationships in Reactome #38

Open
wants to merge 17 commits into
base: master
Choose a base branch
from
25 changes: 25 additions & 0 deletions scripts/generate_reactome_orthologs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# -*- coding: utf-8 -*-

"""Generate orthologous relations between Reactome pathways."""

from collections import defaultdict
from typing import Iterable, List, Mapping

from biomappings.orthologs import iterate_orthologs
from biomappings.resources import MappingTuple, append_true_mapping_tuples


def _get_species_to_identifiers(names: Mapping[str, str]) -> Mapping[str, List[str]]:
species_to_identifiers = defaultdict(list)
for identifier in names:
species_to_identifiers[identifier.split("-")[-1]].append(identifier)
return species_to_identifiers


def iterate_orthologous_lexical_matches() -> Iterable[MappingTuple]:
"""Generate orthologous relations between Reactome pathways."""
yield from iterate_orthologs("reactome", _get_species_to_identifiers)


if __name__ == "__main__":
append_true_mapping_tuples(iterate_orthologous_lexical_matches())
47 changes: 47 additions & 0 deletions src/biomappings/orthologs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# -*- coding: utf-8 -*-

"""Utilities for generating ortholog predictions."""

import itertools as itt
import logging
from typing import Callable, Iterable, List, Mapping

import pyobo
from tqdm import tqdm

from .resources import MappingTuple
from .utils import get_script_url

__all__ = [
"iterate_orthologs",
]

logger = logging.getLogger(__name__)


def iterate_orthologs(
prefix: str,
f: Callable[[Mapping[str, str]], Mapping[str, List[str]]],
) -> Iterable[MappingTuple]:
"""Iterate over orthologs based on identifier matching."""
provenance = get_script_url(__file__)
names = pyobo.get_id_name_mapping(prefix)
parent_identifier_to_species_identifier = f(names)
count = 0
for identifiers in tqdm(parent_identifier_to_species_identifier.values()):
for source_id, target_id in itt.product(identifiers, repeat=2):
if source_id >= target_id:
continue
count += 1
yield MappingTuple(
prefix,
source_id,
names[source_id],
"RO:HOM0000017",
prefix,
target_id,
names[target_id],
"calculated",
provenance,
)
logger.info(f"[{prefix}] Identified {count} orthologs")
Loading