Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable custom curation files #140

Merged
merged 5 commits into from
Aug 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 17 additions & 3 deletions src/biomappings/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"""The biomappings CLI."""

import sys
from pathlib import Path

import click
from more_click import run_app
Expand All @@ -23,12 +24,25 @@ def main():
if get_git_hash() is not None:

@main.command()
@click.option("--path", type=click.Path(), help="A predictions TSV file path")
def web(path):
@click.option("--predictions-path", type=click.Path(), help="A predictions TSV file path")
@click.option("--positives-path", type=click.Path(), help="A positives curation TSV file path")
@click.option("--negatives-path", type=click.Path(), help="A negatives curation TSV file path")
@click.option("--unsure-path", type=click.Path(), help="An unsure curation TSV file path")
def web(
predictions_path: Path,
positives_path: Path,
negatives_path: Path,
unsure_path: Path,
):
"""Run the biomappings web app."""
from .wsgi import get_app

app = get_app(predictions_path=path)
app = get_app(
predictions_path=predictions_path,
positives_path=positives_path,
negatives_path=negatives_path,
unsure_path=unsure_path,
)
run_app(app, with_gunicorn=False)

@main.command()
Expand Down
88 changes: 54 additions & 34 deletions src/biomappings/resources/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

import csv
import itertools as itt
import os
from collections import defaultdict
from pathlib import Path
from typing import (
Expand Down Expand Up @@ -167,9 +166,9 @@ def target_curie(self) -> str:
return f"{self.target_prefix}:{self.target_identifier}"


def get_resource_file_path(fname) -> str:
def get_resource_file_path(fname) -> Path:
"""Get a resource by its file name."""
return os.path.join(RESOURCE_PATH, fname)
return RESOURCE_PATH.joinpath(fname)


def _load_table(fname) -> List[Dict[str, str]]:
Expand Down Expand Up @@ -211,87 +210,108 @@ def mapping_sort_key(prediction: Mapping[str, str]) -> Tuple[str, ...]:
TRUE_MAPPINGS_PATH = get_resource_file_path("mappings.tsv")


def load_mappings() -> List[Dict[str, str]]:
def load_mappings(*, path: Optional[Path] = None) -> List[Dict[str, str]]:
"""Load the mappings table."""
return _load_table(TRUE_MAPPINGS_PATH)
return _load_table(path or TRUE_MAPPINGS_PATH)


def append_true_mappings(m: Iterable[Mapping[str, str]], sort: bool = True) -> None:
def append_true_mappings(
m: Iterable[Mapping[str, str]],
*,
sort: bool = True,
path: Optional[Path] = None,
) -> None:
"""Append new lines to the mappings table."""
_write_helper(MAPPINGS_HEADER, m, TRUE_MAPPINGS_PATH, mode="a")
if path is None:
path = TRUE_MAPPINGS_PATH
_write_helper(MAPPINGS_HEADER, m, path, mode="a")
if sort:
lint_true_mappings()
lint_true_mappings(path=path)


def append_true_mapping_tuples(mappings: Iterable[MappingTuple]) -> None:
"""Append new lines to the mappings table."""
append_true_mappings(mapping.as_dict() for mapping in set(mappings))


def write_true_mappings(m: Iterable[Mapping[str, str]]) -> None:
def write_true_mappings(m: Iterable[Mapping[str, str]], *, path: Optional[Path] = None) -> None:
"""Write mappigns to the true mappings file."""
_write_helper(MAPPINGS_HEADER, m, TRUE_MAPPINGS_PATH, mode="w")
_write_helper(MAPPINGS_HEADER, m, path or TRUE_MAPPINGS_PATH, mode="w")


def lint_true_mappings(*, standardize: bool = False) -> None:
def lint_true_mappings(*, standardize: bool = False, path: Optional[Path] = None) -> None:
"""Lint the true mappings file."""
mappings = load_mappings()
mappings = load_mappings(path=path)
mappings = _remove_redundant(mappings, MappingTuple, standardize=standardize)
write_true_mappings(sorted(mappings, key=mapping_sort_key))
write_true_mappings(sorted(mappings, key=mapping_sort_key), path=path)


FALSE_MAPPINGS_PATH = get_resource_file_path("incorrect.tsv")


def load_false_mappings() -> List[Dict[str, str]]:
def load_false_mappings(*, path: Optional[Path] = None) -> List[Dict[str, str]]:
"""Load the false mappings table."""
return _load_table(FALSE_MAPPINGS_PATH)
return _load_table(path or FALSE_MAPPINGS_PATH)


def append_false_mappings(m: Iterable[Mapping[str, str]], sort: bool = True) -> None:
def append_false_mappings(
m: Iterable[Mapping[str, str]],
*,
sort: bool = True,
path: Optional[Path] = None,
) -> None:
"""Append new lines to the false mappings table."""
_write_helper(MAPPINGS_HEADER, m, FALSE_MAPPINGS_PATH, mode="a")
if path is None:
path = FALSE_MAPPINGS_PATH
_write_helper(MAPPINGS_HEADER, m, path, mode="a")
if sort:
lint_false_mappings()
lint_false_mappings(path=path)


def write_false_mappings(m: Iterable[Mapping[str, str]]) -> None:
def write_false_mappings(m: Iterable[Mapping[str, str]], *, path: Optional[Path] = None) -> None:
"""Write mappings to the false mappings file."""
_write_helper(MAPPINGS_HEADER, m, FALSE_MAPPINGS_PATH, mode="w")
_write_helper(MAPPINGS_HEADER, m, path or FALSE_MAPPINGS_PATH, mode="w")


def lint_false_mappings(*, standardize: bool = False) -> None:
def lint_false_mappings(*, standardize: bool = False, path: Optional[Path] = None) -> None:
"""Lint the false mappings file."""
mappings = load_false_mappings()
mappings = load_false_mappings(path=path)
mappings = _remove_redundant(mappings, MappingTuple, standardize=standardize)
write_false_mappings(sorted(mappings, key=mapping_sort_key))
write_false_mappings(sorted(mappings, key=mapping_sort_key), path=path)


UNSURE_PATH = get_resource_file_path("unsure.tsv")


def load_unsure() -> List[Dict[str, str]]:
def load_unsure(*, path: Optional[Path] = None) -> List[Dict[str, str]]:
"""Load the unsure table."""
return _load_table(UNSURE_PATH)
return _load_table(path or UNSURE_PATH)


def append_unsure_mappings(m: Iterable[Mapping[str, str]], sort: bool = True) -> None:
def append_unsure_mappings(
m: Iterable[Mapping[str, str]],
*,
sort: bool = True,
path: Optional[Path] = None,
) -> None:
"""Append new lines to the "unsure" mappings table."""
_write_helper(MAPPINGS_HEADER, m, UNSURE_PATH, mode="a")
if path is None:
path = UNSURE_PATH
_write_helper(MAPPINGS_HEADER, m, path, mode="a")
if sort:
lint_unsure_mappings()
lint_unsure_mappings(path=path)


def write_unsure_mappings(m: Iterable[Mapping[str, str]]) -> None:
def write_unsure_mappings(m: Iterable[Mapping[str, str]], *, path: Optional[Path] = None) -> None:
"""Write mappings to the unsure mappings file."""
_write_helper(MAPPINGS_HEADER, m, UNSURE_PATH, mode="w")
_write_helper(MAPPINGS_HEADER, m, path or UNSURE_PATH, mode="w")


def lint_unsure_mappings(*, standardize: bool = False) -> None:
def lint_unsure_mappings(*, standardize: bool = False, path: Optional[Path] = None) -> None:
"""Lint the unsure mappings file."""
mappings = load_unsure()
mappings = load_unsure(path=path)
mappings = _remove_redundant(mappings, MappingTuple, standardize=standardize)
write_unsure_mappings(sorted(mappings, key=mapping_sort_key))
write_unsure_mappings(sorted(mappings, key=mapping_sort_key), path=path)


PREDICTIONS_PATH = get_resource_file_path("predictions.tsv")
Expand Down Expand Up @@ -425,7 +445,7 @@ def load_curators():


def filter_predictions(custom_filter: Mapping[str, Mapping[str, Mapping[str, str]]]) -> None:
"""Filter all of the predictions by removing what's in the custom filter then re-write.
"""Filter all the predictions by removing what's in the custom filter then re-write.

:param custom_filter: A filter 3-dictionary of source prefix to target prefix
to source identifier to target identifier
Expand Down
4 changes: 4 additions & 0 deletions src/biomappings/resources/mappings.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -4313,6 +4313,7 @@ mesh C000637995 Ruminococcus gnavus skos:exactMatch ncit C124373 Ruminococcus gn
mesh C000639838 Sphingomonas paucimobilis skos:exactMatch ncit C86749 Sphingomonas paucimobilis semapv:ManualMappingCuration orcid:0000-0003-4423-4370
mesh C000650035 Citrobacter farmeri skos:exactMatch ncit C86264 Citrobacter farmeri semapv:ManualMappingCuration orcid:0000-0003-4423-4370
mesh C000654629 H3B-6545 skos:exactMatch ncit C142980 Selective Estrogen Receptor Covalent Antagonist H3B-6545 semapv:ManualMappingCuration orcid:0000-0003-4423-4370
mesh C000655084 red meat allergy skos:exactMatch hp HP:0410319 Alpha-gal allergy semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/a80ed2/scripts/import_gilda_mappings.py 0.95
mesh C000655246 ACT-774312 skos:exactMatch ncit C161834 CRTH2 Antagonist ACT-774312 semapv:ManualMappingCuration orcid:0000-0003-4423-4370
mesh C000655292 unicuspid aortic valve skos:exactMatch hp HP:0012561 Unicuspid aortic valve semapv:ManualMappingCuration orcid:0000-0003-4423-4370
mesh C000656407 MSANTD3 protein, human skos:exactMatch uniprot Q96H12 MSANTD3 semapv:ManualMappingCuration orcid:0000-0003-4423-4370
Expand All @@ -4335,11 +4336,14 @@ mesh C000706695 ABBV-744 skos:exactMatch ncit C148415 BET Inhibitor ABBV-744 sem
mesh C000706947 FGF21 protein, human skos:exactMatch uniprot Q9NSA1 FGF21 semapv:ManualMappingCuration orcid:0000-0001-9439-5346
mesh C000715747 Telangiectasia macularis eruptiva perstans skos:exactMatch hp HP:0007583 Telangiectasia macularis eruptiva perstans semapv:ManualMappingCuration orcid:0000-0001-9439-5346 semapv:LexicalMatching generate_hp_mesh_mappings.py 0.9
mesh C000718787 AA amyloidosis skos:exactMatch hp HP:4000041 AA amyloidosis semapv:ManualMappingCuration orcid:0000-0001-9439-5346 semapv:LexicalMatching generate_hp_mesh_mappings.py 0.9
mesh C000718810 anti-amphiphysin autoantibody skos:exactMatch hp HP:5000002 Anti-Amphiphysin antibody semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching generate_hp_mesh_mappings.py 0.9
mesh C000718811 Anti-ARHGAP26 antibody skos:exactMatch hp HP:5000003 Anti-ARHGAP26 antibody semapv:ManualMappingCuration orcid:0000-0001-9439-5346 semapv:LexicalMatching generate_hp_mesh_mappings.py 0.9
mesh C000718813 Anti-CARP VIII antibody skos:exactMatch hp HP:5000004 Anti-CARP VIII antibody semapv:ManualMappingCuration orcid:0000-0001-9439-5346 semapv:LexicalMatching generate_hp_mesh_mappings.py 0.9
mesh C000719029 anti-GFAP autoantibodies skos:exactMatch hp HP:5000013 Anti-GFAP antibody semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching generate_hp_mesh_mappings.py 0.9
mesh C000719042 anti-ryanodine receptor autoantibody skos:exactMatch hp HP:5000047 Anti-ryanodine receptor antibody semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching generate_hp_mesh_mappings.py 0.9
mesh C000719407 Midline brainstem cleft skos:exactMatch hp HP:0033645 Midline brainstem cleft semapv:ManualMappingCuration orcid:0000-0001-9439-5346 semapv:LexicalMatching generate_hp_mesh_mappings.py 0.9
mesh C000719531 misophonia skos:exactMatch hp HP:0025113 Misophonia semapv:ManualMappingCuration orcid:0000-0001-9439-5346 semapv:LexicalMatching generate_hp_mesh_mappings.py 0.9
mesh C000721288 Cleft eyelid skos:exactMatch hp HP:0000625 Eyelid coloboma semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching generate_hp_mesh_mappings.py 0.9
mesh C000721322 Coarse facial features skos:exactMatch hp HP:0000280 Coarse facial features semapv:ManualMappingCuration orcid:0000-0001-9439-5346 semapv:LexicalMatching generate_hp_mesh_mappings.py 0.9
mesh C000721349 Air crescent sign skos:exactMatch hp HP:0032172 Air crescent sign semapv:ManualMappingCuration orcid:0000-0001-9439-5346 semapv:LexicalMatching generate_hp_mesh_mappings.py 0.9
mesh C000721350 Silhouette sign skos:exactMatch hp HP:0033647 Silhouette sign semapv:ManualMappingCuration orcid:0000-0001-9439-5346 semapv:LexicalMatching generate_hp_mesh_mappings.py 0.9
Expand Down
4 changes: 0 additions & 4 deletions src/biomappings/resources/predictions.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -14130,7 +14130,6 @@ mesh C000655028 CR2 protein, human skos:exactMatch uniprot P20023 CR2 semapv:Lex
mesh C000655029 ABHD15 protein, human skos:exactMatch uniprot Q6UXT9 ABHD15 semapv:LexicalMatching 0.999 https://github.com/biomappings/biomappings/blob/a840cf/scripts/generate_mesh_uniprot_mappings.py
mesh C000655039 ME3 protein, human skos:exactMatch uniprot Q16798 ME3 semapv:LexicalMatching 0.999 https://github.com/biomappings/biomappings/blob/a840cf/scripts/generate_mesh_uniprot_mappings.py
mesh C000655044 BCHE protein, human skos:exactMatch uniprot P06276 BCHE semapv:LexicalMatching 0.999 https://github.com/biomappings/biomappings/blob/a840cf/scripts/generate_mesh_uniprot_mappings.py
mesh C000655084 red meat allergy skos:exactMatch hp HP:0410319 Alpha-gal allergy semapv:LexicalMatching 0.95 https://github.com/biomappings/biomappings/blob/a80ed2/scripts/import_gilda_mappings.py
mesh C000655129 MINDY4 protein, human skos:exactMatch uniprot Q4G0A6 MINDY4 semapv:LexicalMatching 0.999 https://github.com/biomappings/biomappings/blob/a840cf/scripts/generate_mesh_uniprot_mappings.py
mesh C000655132 SHISA3 protein, human skos:exactMatch uniprot A0PJX4 SHISA3 semapv:LexicalMatching 0.999 https://github.com/biomappings/biomappings/blob/a840cf/scripts/generate_mesh_uniprot_mappings.py
mesh C000655134 TXNDC9 protein, human skos:exactMatch uniprot O14530 TXNDC9 semapv:LexicalMatching 0.999 https://github.com/biomappings/biomappings/blob/a840cf/scripts/generate_mesh_uniprot_mappings.py
Expand Down Expand Up @@ -14291,10 +14290,7 @@ mesh C000707228 MYMX protein, human skos:exactMatch uniprot A0A1B0GTQ4 MYMX sema
mesh C000707256 AAMDC protein, human skos:exactMatch uniprot Q9H7C9 AAMDC semapv:LexicalMatching 0.999 https://github.com/biomappings/biomappings/blob/a840cf/scripts/generate_mesh_uniprot_mappings.py
mesh C000707475 HTR2A protein, human skos:exactMatch uniprot P28223 HTR2A semapv:LexicalMatching 0.999 https://github.com/biomappings/biomappings/blob/a840cf/scripts/generate_mesh_uniprot_mappings.py
mesh C000707567 CG428 skos:exactMatch ncit C125063 Botanical Lotion CG428 semapv:LexicalMatching 0.95 https://github.com/biomappings/biomappings/blob/a80ed2/scripts/import_gilda_mappings.py
mesh C000718810 anti-amphiphysin autoantibody skos:exactMatch hp HP:5000002 Anti-Amphiphysin antibody semapv:LexicalMatching 0.9 generate_hp_mesh_mappings.py
mesh C000719029 anti-GFAP autoantibodies skos:exactMatch hp HP:5000013 Anti-GFAP antibody semapv:LexicalMatching 0.9 generate_hp_mesh_mappings.py
mesh C000721268 Isolated left subclavian artery skos:exactMatch hp HP:0031633 Isolation of the left subclavian artery semapv:LexicalMatching 0.9 generate_hp_mesh_mappings.py
mesh C000721288 Cleft eyelid skos:exactMatch hp HP:0000625 Eyelid coloboma semapv:LexicalMatching 0.9 generate_hp_mesh_mappings.py
mesh C000721289 Malar hypoplasia skos:exactMatch hp HP:0000272 Malar flattening semapv:LexicalMatching 0.9 generate_hp_mesh_mappings.py
mesh C000721389 tram track sign optic nerve skos:exactMatch hp HP:0032270 Optic nerve tram-track sign semapv:LexicalMatching 0.9 generate_hp_mesh_mappings.py
mesh C000847 daunorubicinol skos:exactMatch ncit C1062 Duborimycin semapv:LexicalMatching 0.95 https://github.com/biomappings/biomappings/blob/a80ed2/scripts/import_gilda_mappings.py
Expand Down
2 changes: 1 addition & 1 deletion src/biomappings/templates/summary.html
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
{{ util.render_messages(dismissible=True, container=False) }}
<div class="row">
<div class="card">
<h5 class="card-header text-center">Biomappings Su</h5>
<h5 class="card-header text-center">Biomappings Summary</h5>
<div class="card-body">
<p>Summary text.</p>
</div>
Expand Down
12 changes: 7 additions & 5 deletions src/biomappings/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,18 @@

import os
import re
from pathlib import Path
from subprocess import CalledProcessError, check_output # noqa: S404
from typing import Any, Mapping, Optional, Tuple

import bioregistry

HERE = os.path.dirname(os.path.abspath(__file__))
RESOURCE_PATH = os.path.abspath(os.path.join(HERE, "resources"))
DOCS = os.path.abspath(os.path.join(HERE, os.pardir, os.pardir, "docs"))
IMG = os.path.join(DOCS, "img")
DATA = os.path.join(DOCS, "_data")
HERE = Path(__file__).parent.resolve()
ROOT = HERE.parent.parent.resolve()
RESOURCE_PATH = HERE.joinpath("resources")
DOCS = ROOT.joinpath("docs")
IMG = DOCS.joinpath("img")
DATA = DOCS.joinpath("_data")

OVERRIDE_MIRIAM = {
# ITO is very messy (combines mostly numbers with a few
Expand Down
30 changes: 25 additions & 5 deletions src/biomappings/wsgi.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,14 +100,23 @@ def url_for_state(endpoint, state: State, **kwargs) -> str:
def get_app(
target_curies: Optional[Iterable[Tuple[str, str]]] = None,
predictions_path: Optional[Path] = None,
positives_path: Optional[Path] = None,
negatives_path: Optional[Path] = None,
unsure_path: Optional[Path] = None,
) -> flask.Flask:
"""Get a curation flask app."""
app_ = flask.Flask(__name__)
app_.config["WTF_CSRF_ENABLED"] = False
app_.config["SECRET_KEY"] = os.urandom(8)
app_.config["SHOW_RELATIONS"] = True
app_.config["SHOW_LINES"] = False
controller = Controller(target_curies=target_curies, predictions_path=predictions_path)
controller = Controller(
target_curies=target_curies,
predictions_path=predictions_path,
positives_path=positives_path,
negatives_path=negatives_path,
unsure_path=unsure_path,
)
app_.config["controller"] = controller
flask_bootstrap.Bootstrap4(app_)
app_.register_blueprint(blueprint)
Expand Down Expand Up @@ -137,16 +146,27 @@ def __init__(
*,
target_curies: Optional[Iterable[Tuple[str, str]]] = None,
predictions_path: Optional[Path] = None,
positives_path: Optional[Path] = None,
negatives_path: Optional[Path] = None,
unsure_path: Optional[Path] = None,
):
"""Instantiate the web controller.

:param target_curies: Pairs of prefix, local unique identifiers that are the target
of curation. If this is given, pre-filters will be made before on predictions
to only show ones where either the source or target appears in this set
:param predictions_path: A custom predictions file to curate from
:param positives_path: A custom positives file to curate to
:param negatives_path: A custom negatives file to curate to
:param unsure_path: A custom unsure file to curate to
"""
self.predictions_path = predictions_path
self._predictions = load_predictions(path=self.predictions_path)

self.positives_path = positives_path
self.negatives_path = negatives_path
self.unsure_path = unsure_path

self._marked: Dict[int, str] = {}
self.total_curated = 0
self._added_mappings: List[Dict[str, Union[None, str, float]]] = []
Expand Down Expand Up @@ -442,14 +462,14 @@ def persist(self):
prediction["type"] = "semapv:ManualMappingCuration"
entries[value].append(prediction)

append_true_mappings(entries["correct"])
append_false_mappings(entries["incorrect"])
append_unsure_mappings(entries["unsure"])
append_true_mappings(entries["correct"], path=self.positives_path)
append_false_mappings(entries["incorrect"], path=self.negatives_path)
append_unsure_mappings(entries["unsure"], path=self.unsure_path)
write_predictions(self._predictions, path=self.predictions_path)
self._marked.clear()

# Now add manually curated mappings
append_true_mappings(self._added_mappings)
append_true_mappings(self._added_mappings, path=self.positives_path)
self._added_mappings = []


Expand Down
Loading