Skip to content

Commit

Permalink
Add predicted WikiPathways orthologs (#59)
Browse files Browse the repository at this point in the history
  • Loading branch information
cthoyt authored May 23, 2022
1 parent 38b417a commit 223227a
Show file tree
Hide file tree
Showing 2 changed files with 1,743 additions and 0 deletions.
57 changes: 57 additions & 0 deletions scripts/generate_wikipathways_orthologs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# -*- coding: utf-8 -*-

"""Generate orthologous relations between WikiPathways."""

import itertools as itt
from typing import Iterable

import pyobo
from gilda.process import normalize
from tqdm import tqdm

from biomappings.resources import PredictionTuple, append_prediction_tuples
from biomappings.utils import get_script_url


def _lexical_exact_match(name1: str, name2: str) -> bool:
return normalize(name1) == normalize(name2)


def iterate_orthologous_lexical_matches(prefix: str = "wikipathways") -> Iterable[PredictionTuple]:
"""Generate orthologous relations between lexical matches from different species."""
names = pyobo.get_id_name_mapping(prefix)
species = pyobo.get_id_species_mapping(prefix)
provenance = get_script_url(__file__)

count = 0
it = itt.combinations(sorted(names.items()), 2)
it = tqdm(
it,
unit_scale=True,
unit="pair",
total=len(names) * (len(names) - 1) / 2,
)
for (source_id, source_name), (target_id, target_name) in sorted(it):
source_species = species[source_id]
target_species = species[target_id]
if source_species == target_species:
continue
if _lexical_exact_match(source_name, target_name):
count += 1
yield PredictionTuple(
prefix,
source_id,
source_name,
"RO:HOM0000017",
prefix,
target_id,
target_name,
"lexical",
0.95,
provenance,
)
tqdm.write(f"Identified {count:,} orthologs")


if __name__ == "__main__":
append_prediction_tuples(iterate_orthologous_lexical_matches())
Loading

0 comments on commit 223227a

Please sign in to comment.