Skip to content

Commit

Permalink
Add additional MeSH mappings
Browse files Browse the repository at this point in the history
  • Loading branch information
cthoyt committed Jul 3, 2023
1 parent 746fde2 commit b250ea2
Show file tree
Hide file tree
Showing 7 changed files with 92 additions and 8 deletions.
42 changes: 42 additions & 0 deletions scripts/generate_clo_mesh_mappings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# -*- coding: utf-8 -*-

"""Generate mappings to CLO from to MeSH."""

import click
from more_click import verbose_option
from semra.sources.clo import get_clo_mappings

from biomappings.gilda_utils import append_gilda_predictions
from biomappings.mapping_graph import get_filter_from_semra
from biomappings.utils import get_script_url


@click.command()
@verbose_option
def main():
"""Generate CLO-MeSH mappings."""
provenance = get_script_url(__file__)



prefix = "clo"
targets = [
"mesh",
"efo",
"bto",
"cellosaurus",
]

clo_mappings = get_clo_mappings()
custom_filter = get_filter_from_semra(clo_mappings)

append_gilda_predictions(
prefix,
targets,
provenance=provenance,
custom_filter=custom_filter,
)


if __name__ == "__main__":
main()
5 changes: 2 additions & 3 deletions src/biomappings/gilda_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import logging
from collections import defaultdict
from typing import Iterable, Mapping, Optional, Tuple, Union
from typing import Iterable, Optional, Tuple, Union

import bioregistry
import pyobo
Expand All @@ -13,11 +13,10 @@
from pyobo.gilda_utils import get_grounder, iter_gilda_prediction_tuples

from biomappings.resources import PredictionTuple, append_prediction_tuples
from biomappings.utils import CMapping

logger = logging.getLogger(__name__)

CMapping = Mapping[str, Mapping[str, Mapping[str, str]]]


def append_gilda_predictions(
prefix: str,
Expand Down
25 changes: 21 additions & 4 deletions src/biomappings/mapping_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,32 @@

import itertools as itt
from collections import defaultdict
from typing import DefaultDict, Dict, Iterable, Mapping, Optional
from typing import TYPE_CHECKING, DefaultDict, Dict, Iterable, List, Optional

import networkx as nx
import pyobo

from biomappings.utils import CMapping

def get_custom_filter(
prefix: str, targets: Iterable[str]
) -> Mapping[str, Mapping[str, Mapping[str, str]]]:
if TYPE_CHECKING:
import semra

__all__ = [
"get_custom_filter",
"get_filter_from_semra",
"mutual_mapping_graph",
]


def get_filter_from_semra(mappings: List["semra.Mapping"]) -> CMapping:
"""Get a custom filter dictionary from a set of SeMRA mappings."""
rv = defaultdict(lambda: defaultdict(dict))
for mapping in mappings:
rv[mapping.s.prefix][mapping.o.prefix][mapping.s.identifier] = mapping.o.identifier
return rv


def get_custom_filter(prefix: str, targets: Iterable[str]) -> CMapping:
"""Get a custom filter dictionary induced over the mutual mapping graph with all target prefixes.
:param prefix: The source prefix
Expand Down
4 changes: 4 additions & 0 deletions src/biomappings/resources/incorrect.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -194,8 +194,12 @@ cl CL:2000004 pituitary gland cell skos:exactMatch mesh D010902 Pituitary Gland
cl CL:2000021 sebaceous gland cell skos:exactMatch mesh D012627 Sebaceous Glands semapv:ManualMappingCuration orcid:0000-0001-9439-5346
cl CL:2000022 cardiac septum cell skos:exactMatch mesh D006346 Heart Septum semapv:ManualMappingCuration orcid:0000-0001-9439-5346
cl CL:2000030 hypothalamus cell skos:exactMatch mesh D007031 Hypothalamus semapv:ManualMappingCuration orcid:0000-0001-9439-5346
clo 0001922 BE2 cell skos:exactMatch mesh D016175 B-Lymphocyte Subsets semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/746fde/scripts/generate_clo_mesh_mappings.py 0.502
clo 0002596 COS-1 cell skos:exactMatch mesh D019556 COS Cells semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:UnspecifiedMatching clo 0.8
clo 0002597 COS-7 cell skos:exactMatch mesh D019556 COS Cells semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:UnspecifiedMatching clo 0.8
clo 0002941 EPI cell skos:exactMatch mesh D015251 Epirubicin semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/746fde/scripts/generate_clo_mesh_mappings.py 0.556
clo 0003413 G cell skos:exactMatch mesh D019863 Gastrin-Secreting Cells semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/746fde/scripts/generate_clo_mesh_mappings.py 0.549
clo 0009279 TC-1 cell skos:exactMatch mesh D013602 T-Lymphocytes, Cytotoxic semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/746fde/scripts/generate_clo_mesh_mappings.py 0.549
clo 0037163 Ishikawa cell skos:exactMatch cellosaurus CVCL_D199 Ishikawa 3-H-12 semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:UnspecifiedMatching clo 0.8
clo 0037237 293-derived cell skos:exactMatch cellosaurus CVCL_0045 HEK293 semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:UnspecifiedMatching clo 0.8
clo 0037261 3T3-derived cell skos:exactMatch mesh D016475 3T3 Cells semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:UnspecifiedMatching clo 0.8
Expand Down
14 changes: 14 additions & 0 deletions src/biomappings/resources/mappings.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -3011,11 +3011,25 @@ cl CL:0008002 skeletal muscle fiber skos:exactMatch mesh D018485 Muscle Fibers,
cl CL:0010003 epithelial cell of alveolus of lung skos:exactMatch mesh D056809 Alveolar Epithelial Cells semapv:ManualMappingCuration orcid:0000-0001-9439-5346
cl CL:0010017 zygote skos:exactMatch mesh D015053 Zygote semapv:ManualMappingCuration orcid:0000-0001-9439-5346
cl CL:0010021 cardiac myoblast skos:exactMatch mesh D032386 Myoblasts, Cardiac semapv:ManualMappingCuration orcid:0000-0001-9439-5346
clo 0000031 cell line skos:exactMatch mesh D002460 Cell Line semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/746fde/scripts/generate_clo_mesh_mappings.py 0.762
clo 0001230 HEK293 skos:exactMatch cellosaurus CVCL_0045 HEK293 semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:UnspecifiedMatching clo 0.8
clo 0001345 3T3 cell skos:exactMatch mesh D016475 3T3 Cells semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:UnspecifiedMatching clo 0.8
clo 0001601 A549 cell skos:exactMatch mesh D000072283 A549 Cells semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/746fde/scripts/generate_clo_mesh_mappings.py 0.549
clo 0002585 COR-L23 cell skos:exactMatch efo 0002142 CORL23 semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:UnspecifiedMatching clo 0.8
clo 0003704 Hep G2 cell skos:exactMatch mesh D056945 Hep G2 Cells semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/746fde/scripts/generate_clo_mesh_mappings.py 0.549
clo 0007606 MCF7 cell skos:exactMatch mesh D061986 MCF-7 Cells semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/746fde/scripts/generate_clo_mesh_mappings.py 0.549
clo 0007634 MDA-MB-231 cell skos:exactMatch efo 0001209 MDAMB231 semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:UnspecifiedMatching clo 0.8
clo 0007634 MDA-MB-231 cell skos:exactMatch mesh D000092302 MDA-MB-231 Cells semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/746fde/scripts/generate_clo_mesh_mappings.py 0.549
clo 0007646 MDCK cell skos:exactMatch mesh D061985 Madin Darby Canine Kidney Cells semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/746fde/scripts/generate_clo_mesh_mappings.py 0.549
clo 0008395 PC-3 cell skos:exactMatch mesh D000078722 PC-3 Cells semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/746fde/scripts/generate_clo_mesh_mappings.py 0.549
clo 0008753 RAW 264.7 cell skos:exactMatch mesh D000067996 RAW 264.7 Cells semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/746fde/scripts/generate_clo_mesh_mappings.py 0.549
clo 0009348 THP-1 cell skos:exactMatch mesh D000074084 THP-1 Cells semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/746fde/scripts/generate_clo_mesh_mappings.py 0.549
clo 0036932 Hybridoma skos:exactMatch mesh D006825 Hybridomas semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/746fde/scripts/generate_clo_mesh_mappings.py 0.556
clo 0036936 Somatic cell hybrid skos:exactMatch mesh D006822 Hybrid Cells semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/746fde/scripts/generate_clo_mesh_mappings.py 0.549
clo 0037230 Ishikawa 3-H-12 cell skos:exactMatch cellosaurus CVCL_D199 Ishikawa 3-H-12 semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:UnspecifiedMatching clo 0.8
clo 0037291 MDAMB231 cell skos:exactMatch mesh D000092302 MDA-MB-231 Cells semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/746fde/scripts/generate_clo_mesh_mappings.py 0.549
clo 0037300 BALL-1 cell skos:exactMatch cellosaurus CVCL_1075 BALL-1 semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:UnspecifiedMatching clo 0.8
clo 0037339 tissue donor skos:exactMatch mesh D014019 Tissue Donors semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/746fde/scripts/generate_clo_mesh_mappings.py 0.54
doid DOID:0040002 aspirin allergy skos:exactMatch umls C0004058 Allergy to aspirin semapv:ManualMappingCuration orcid:0000-0003-4423-4370
doid DOID:0040004 amoxicillin allergy skos:exactMatch umls C0571417 Allergy to amoxicillin semapv:ManualMappingCuration orcid:0000-0003-4423-4370
doid DOID:0040005 ceftriaxone allergy skos:exactMatch umls C0571463 Allergy to ceftriaxone semapv:ManualMappingCuration orcid:0000-0003-4423-4370
Expand Down
6 changes: 5 additions & 1 deletion src/biomappings/resources/predictions.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -9459,7 +9459,11 @@ chebi CHEBI:9954 Verbenalin skos:exactMatch mesh C000511 cornin iridoid semapv:L
chebi CHEBI:9955 (R)-(+)-verbenone skos:exactMatch mesh C052875 verbenone semapv:LexicalMatching 0.95 generate_chebi_mesh_mappings.py
clo 0007050 K 562 cell skos:exactMatch cellosaurus CVCL_0004 K-562 semapv:UnspecifiedMatching 0.8 clo
clo 0007059 K-562 cell skos:exactMatch cellosaurus CVCL_0004 K-562 semapv:UnspecifiedMatching 0.8 clo
clo 0007634 MDA-MB-231 cell skos:exactMatch efo 0001209 MDAMB231 semapv:UnspecifiedMatching 0.8 clo
clo 0007219 L929 cell skos:exactMatch mesh D007739 L Cells semapv:LexicalMatching 0.549 https://github.com/biomappings/biomappings/blob/746fde/scripts/generate_clo_mesh_mappings.py
clo 0007220 L-929 cell skos:exactMatch mesh D007739 L Cells semapv:LexicalMatching 0.549 https://github.com/biomappings/biomappings/blob/746fde/scripts/generate_clo_mesh_mappings.py
clo 0008987 SF-21 cell skos:exactMatch mesh D061987 Sf9 Cells semapv:LexicalMatching 0.549 https://github.com/biomappings/biomappings/blob/746fde/scripts/generate_clo_mesh_mappings.py
clo 0008988 SF-9 cell skos:exactMatch mesh D061987 Sf9 Cells semapv:LexicalMatching 0.549 https://github.com/biomappings/biomappings/blob/746fde/scripts/generate_clo_mesh_mappings.py
clo 0008989 Sf9 cell skos:exactMatch mesh D061987 Sf9 Cells semapv:LexicalMatching 0.549 https://github.com/biomappings/biomappings/blob/746fde/scripts/generate_clo_mesh_mappings.py
clo 0009034 SK-BR-3 cell skos:exactMatch efo 0001236 SKBR3 semapv:UnspecifiedMatching 0.8 clo
clo 0009040 SK-MEL-1 cell skos:exactMatch efo 0002332 SKMEL1 semapv:UnspecifiedMatching 0.8 clo
clo 0037291 MDAMB231 cell skos:exactMatch efo 0001209 MDAMB231 semapv:UnspecifiedMatching 0.8 clo
Expand Down
4 changes: 4 additions & 0 deletions src/biomappings/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,3 +233,7 @@ def get_curie(prefix: str, identifier: str) -> str:
if prefix_norm is None or identifier_norm is None:
raise ValueError(f"could not normalize {prefix}:{identifier}")
return f"{prefix_norm}:{identifier_norm}"


#: A filter 3-dictionary of source prefix to target prefix to source identifier to target identifier
CMapping = Mapping[str, Mapping[str, Mapping[str, str]]]

0 comments on commit b250ea2

Please sign in to comment.