Skip to content

Commit

Permalink
Add broad/narrow curation to interface (#158)
Browse files Browse the repository at this point in the history
- Add buttons for broad/narrow curation
- Regenerate VO-MeSH mappings
- Curate some of them
  • Loading branch information
cthoyt authored Sep 23, 2023
1 parent 28ac41e commit e738472
Show file tree
Hide file tree
Showing 6 changed files with 144 additions and 38 deletions.
69 changes: 34 additions & 35 deletions scripts/generate_vo_mesh_mappings.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import bioontologies
import gilda
import pyobo
import pyobo.gilda_utils
from bioontologies.obograph import Node
from tqdm import tqdm

from biomappings import PredictionTuple
Expand All @@ -14,39 +16,38 @@

def main():
"""Generate mappings from between VO and MeSH."""
mesh_grounder = pyobo.gilda_utils.get_grounder("mesh")
provenance = get_script_url(__file__)
graph = (
bioontologies.get_obograph_by_prefix(
"vo", check=False, json_path="/Users/cthoyt/Desktop/vo.json"
)
.guess("vo")
.standardize()
)
graph = bioontologies.get_obograph_by_prefix("vo", check=False).guess("vo").standardize()
rows = []
extracted_mesh = 0
for node in tqdm(graph.nodes, unit="node", unit_scale=True):
if not node.lbl or node.prefix != "vo":
if not node.name or node.prefix != "vo":
continue
if node.meta:
found_mesh = False
for p in node.meta.basicPropertyValues or []:
if p.pred_prefix == "rdfs" and p.pred_identifier == "seeAlso":
values = [value.strip().replace(" ", "") for value in p.val.strip().split(";")]
for p in node.meta.properties or []:
if not p.predicate:
continue
if p.predicate.curie == "rdfs:seeAlso":
values = [
value.strip().replace(" ", "") for value in p.value_raw.strip().split(";")
]
# print(node.luid, values)
for value in values:
# TODO this is place to extract oher mapping types
# TODO this is place to extract other mapping types
if not value.lower().startswith("mesh:"):
continue
mesh_id = value.split(":", 1)[1].strip()
mesh_name = pyobo.get_name("mesh", mesh_id)
if not mesh_name:
tqdm.write(f"No mesh name for vo:{node.luid} mapped to mesh:{mesh_id}")
tqdm.write(f"No mesh name for vo:{node.name} mapped to mesh:{mesh_id}")
continue
rows.append(
PredictionTuple(
"vo",
node.luid,
node.lbl,
node.prefix,
node.identifier,
node.name,
"skos:exactMatch",
"mesh",
mesh_id,
Expand All @@ -61,41 +62,39 @@ def main():
if found_mesh:
continue

_ground(node, rows, provenance)
_ground(mesh_grounder, node, rows, provenance)

append_prediction_tuples(rows)
print(f"extracted {extracted_mesh} mesh mappings. should be abount 65")
print(f"extracted {extracted_mesh} mesh mappings. should be about 65")


def _ground(node, rows, provenance):
texts = [node.lbl]
def _ground(grounder: gilda.Grounder, node: Node, rows, provenance):
texts = [node.name]
# VO doesn't store its synonyms using standard predicates,
# so look in IAO_0000118 (alternate label) or IAO_0000116 (editor note)
# with "synonym: " as the string prefix
if node.meta:
for p in node.meta.basicPropertyValues or []:
if p.pred_prefix == "iao" and p.pred_identifier == "0000118":
texts.append(p.val)
if (
p.pred_prefix == "iao"
and p.pred_identifier == "0000116"
and p.val.startswith("synonym:")
):
texts.append(p.val.removeprefix("synonym:").strip())
for p in node.meta.properties or []:
if not p.predicate:
continue
if p.predicate.curie == "iao:0000118":
texts.append(p.value_raw)
elif p.predicate.curie == "iao:0000116" and p.value_raw.startswith("synonym:"):
texts.append(p.value_raw.removeprefix("synonym:").strip())

for text in [node.lbl, *(s.val for s in node.synonyms)]:
for scored_match in gilda.ground(text, namespaces=["MESH"]):
for text in [node.name, *(s.value for s in node.synonyms)]:
for scored_match in grounder.ground(text):
rows.append(
PredictionTuple(
"vo",
node.luid,
node.lbl,
node.prefix,
node.identifier,
node.name,
"skos:exactMatch",
scored_match.term.db.lower(),
scored_match.term.id,
scored_match.term.entry_name,
"semapv:LexicalMatching",
scored_match.score,
round(scored_match.score, 2),
provenance,
)
)
Expand Down
3 changes: 3 additions & 0 deletions src/biomappings/resources/incorrect.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -1406,6 +1406,7 @@ uberon UBERON:0012245 silk skos:exactMatch bto BTO:0002854 corn silk semapv:Manu
uberon UBERON:0022469 primary olfactory cortex skos:exactMatch mesh D066194 Olfactory Cortex semapv:ManualMappingCuration orcid:0000-0001-9439-5346
uberon UBERON:2001977 pad skos:exactMatch mesh D058729 Peripheral Arterial Disease semapv:ManualMappingCuration orcid:0000-0001-9439-5346
umls C0006142 Malignant neoplasm of breast skos:exactMatch mesh D001943 Breast Neoplasms semapv:ManualMappingCuration orcid:0000-0002-6601-2165
vo 0000189 colony forming unit skos:exactMatch mesh D013234 Stem Cells semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/28ac41/scripts/generate_vo_mesh_mappings.py 0.54
vo 0004075 PBT skos:exactMatch mesh D001803 Blood Transfusion semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/4b2628/scripts/generate_vo_mesh_mappings.py 0.5555555555555556
vo 0004075 PBT skos:exactMatch mesh D013601 T-Lymphocytes semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/4b2628/scripts/generate_vo_mesh_mappings.py 0.5555555555555556
vo 0004075 PBT skos:exactMatch mesh D061766 Proton Therapy semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/4b2628/scripts/generate_vo_mesh_mappings.py 0.5555555555555556
Expand Down Expand Up @@ -1433,7 +1434,9 @@ vo 0010927 ORF skos:exactMatch mesh D004474 Ecthyma, Contagious semapv:ManualMap
vo 0010944 Eae skos:exactMatch mesh D004681 Encephalomyelitis, Autoimmune, Experimental semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/4b2628/scripts/generate_vo_mesh_mappings.py 0.5400948258091115
vo 0010971 SurA skos:exactMatch uberon UBERON:0003823 hindlimb zeugopod semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching mira 0.5209235209235209
vo 0010988 IroN skos:exactMatch chebi CHEBI:18248 iron atom semapv:ManualMappingCuration orcid:0000-0003-4423-4370
vo 0010988 IroN skos:exactMatch mesh D007501 Iron semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/28ac41/scripts/generate_vo_mesh_mappings.py 0.74
vo 0010997 IroN skos:exactMatch chebi CHEBI:18248 iron atom semapv:ManualMappingCuration orcid:0000-0003-4423-4370
vo 0010997 IroN skos:exactMatch mesh D007501 Iron semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/28ac41/scripts/generate_vo_mesh_mappings.py 0.74
vo 0011021 CP skos:exactMatch chebi CHEBI:3380 captopril semapv:ManualMappingCuration orcid:0000-0003-4423-4370
vo 0011021 CP skos:exactMatch hp HP:0100021 Cerebral palsy semapv:ManualMappingCuration orcid:0000-0003-4423-4370
vo 0011021 CP skos:exactMatch mesh D002547 Cerebral Palsy semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/4b2628/scripts/generate_vo_mesh_mappings.py 0.5555555555555556
Expand Down
Loading

0 comments on commit e738472

Please sign in to comment.