diff --git a/src/biomappings/cli.py b/src/biomappings/cli.py index a9746fe5..a478e9fe 100644 --- a/src/biomappings/cli.py +++ b/src/biomappings/cli.py @@ -3,6 +3,7 @@ """The biomappings CLI.""" import sys +from pathlib import Path import click from more_click import run_app @@ -23,12 +24,25 @@ def main(): if get_git_hash() is not None: @main.command() - @click.option("--path", type=click.Path(), help="A predictions TSV file path") - def web(path): + @click.option("--predictions-path", type=click.Path(), help="A predictions TSV file path") + @click.option("--positives-path", type=click.Path(), help="A positives curation TSV file path") + @click.option("--negatives-path", type=click.Path(), help="A negatives curation TSV file path") + @click.option("--unsure-path", type=click.Path(), help="An unsure curation TSV file path") + def web( + predictions_path: Path, + positives_path: Path, + negatives_path: Path, + unsure_path: Path, + ): """Run the biomappings web app.""" from .wsgi import get_app - app = get_app(predictions_path=path) + app = get_app( + predictions_path=predictions_path, + positives_path=positives_path, + negatives_path=negatives_path, + unsure_path=unsure_path, + ) run_app(app, with_gunicorn=False) @main.command() diff --git a/src/biomappings/resources/__init__.py b/src/biomappings/resources/__init__.py index 95433b3e..8eb85791 100644 --- a/src/biomappings/resources/__init__.py +++ b/src/biomappings/resources/__init__.py @@ -4,7 +4,6 @@ import csv import itertools as itt -import os from collections import defaultdict from pathlib import Path from typing import ( @@ -167,9 +166,9 @@ def target_curie(self) -> str: return f"{self.target_prefix}:{self.target_identifier}" -def get_resource_file_path(fname) -> str: +def get_resource_file_path(fname) -> Path: """Get a resource by its file name.""" - return os.path.join(RESOURCE_PATH, fname) + return RESOURCE_PATH.joinpath(fname) def _load_table(fname) -> List[Dict[str, str]]: @@ -211,16 +210,23 @@ def mapping_sort_key(prediction: Mapping[str, str]) -> Tuple[str, ...]: TRUE_MAPPINGS_PATH = get_resource_file_path("mappings.tsv") -def load_mappings() -> List[Dict[str, str]]: +def load_mappings(*, path: Optional[Path] = None) -> List[Dict[str, str]]: """Load the mappings table.""" - return _load_table(TRUE_MAPPINGS_PATH) + return _load_table(path or TRUE_MAPPINGS_PATH) -def append_true_mappings(m: Iterable[Mapping[str, str]], sort: bool = True) -> None: +def append_true_mappings( + m: Iterable[Mapping[str, str]], + *, + sort: bool = True, + path: Optional[Path] = None, +) -> None: """Append new lines to the mappings table.""" - _write_helper(MAPPINGS_HEADER, m, TRUE_MAPPINGS_PATH, mode="a") + if path is None: + path = TRUE_MAPPINGS_PATH + _write_helper(MAPPINGS_HEADER, m, path, mode="a") if sort: - lint_true_mappings() + lint_true_mappings(path=path) def append_true_mapping_tuples(mappings: Iterable[MappingTuple]) -> None: @@ -228,70 +234,84 @@ def append_true_mapping_tuples(mappings: Iterable[MappingTuple]) -> None: append_true_mappings(mapping.as_dict() for mapping in set(mappings)) -def write_true_mappings(m: Iterable[Mapping[str, str]]) -> None: +def write_true_mappings(m: Iterable[Mapping[str, str]], *, path: Optional[Path] = None) -> None: """Write mappigns to the true mappings file.""" - _write_helper(MAPPINGS_HEADER, m, TRUE_MAPPINGS_PATH, mode="w") + _write_helper(MAPPINGS_HEADER, m, path or TRUE_MAPPINGS_PATH, mode="w") -def lint_true_mappings(*, standardize: bool = False) -> None: +def lint_true_mappings(*, standardize: bool = False, path: Optional[Path] = None) -> None: """Lint the true mappings file.""" - mappings = load_mappings() + mappings = load_mappings(path=path) mappings = _remove_redundant(mappings, MappingTuple, standardize=standardize) - write_true_mappings(sorted(mappings, key=mapping_sort_key)) + write_true_mappings(sorted(mappings, key=mapping_sort_key), path=path) FALSE_MAPPINGS_PATH = get_resource_file_path("incorrect.tsv") -def load_false_mappings() -> List[Dict[str, str]]: +def load_false_mappings(*, path: Optional[Path] = None) -> List[Dict[str, str]]: """Load the false mappings table.""" - return _load_table(FALSE_MAPPINGS_PATH) + return _load_table(path or FALSE_MAPPINGS_PATH) -def append_false_mappings(m: Iterable[Mapping[str, str]], sort: bool = True) -> None: +def append_false_mappings( + m: Iterable[Mapping[str, str]], + *, + sort: bool = True, + path: Optional[Path] = None, +) -> None: """Append new lines to the false mappings table.""" - _write_helper(MAPPINGS_HEADER, m, FALSE_MAPPINGS_PATH, mode="a") + if path is None: + path = FALSE_MAPPINGS_PATH + _write_helper(MAPPINGS_HEADER, m, path, mode="a") if sort: - lint_false_mappings() + lint_false_mappings(path=path) -def write_false_mappings(m: Iterable[Mapping[str, str]]) -> None: +def write_false_mappings(m: Iterable[Mapping[str, str]], *, path: Optional[Path] = None) -> None: """Write mappings to the false mappings file.""" - _write_helper(MAPPINGS_HEADER, m, FALSE_MAPPINGS_PATH, mode="w") + _write_helper(MAPPINGS_HEADER, m, path or FALSE_MAPPINGS_PATH, mode="w") -def lint_false_mappings(*, standardize: bool = False) -> None: +def lint_false_mappings(*, standardize: bool = False, path: Optional[Path] = None) -> None: """Lint the false mappings file.""" - mappings = load_false_mappings() + mappings = load_false_mappings(path=path) mappings = _remove_redundant(mappings, MappingTuple, standardize=standardize) - write_false_mappings(sorted(mappings, key=mapping_sort_key)) + write_false_mappings(sorted(mappings, key=mapping_sort_key), path=path) UNSURE_PATH = get_resource_file_path("unsure.tsv") -def load_unsure() -> List[Dict[str, str]]: +def load_unsure(*, path: Optional[Path] = None) -> List[Dict[str, str]]: """Load the unsure table.""" - return _load_table(UNSURE_PATH) + return _load_table(path or UNSURE_PATH) -def append_unsure_mappings(m: Iterable[Mapping[str, str]], sort: bool = True) -> None: +def append_unsure_mappings( + m: Iterable[Mapping[str, str]], + *, + sort: bool = True, + path: Optional[Path] = None, +) -> None: """Append new lines to the "unsure" mappings table.""" - _write_helper(MAPPINGS_HEADER, m, UNSURE_PATH, mode="a") + if path is None: + path = UNSURE_PATH + _write_helper(MAPPINGS_HEADER, m, path, mode="a") if sort: - lint_unsure_mappings() + lint_unsure_mappings(path=path) -def write_unsure_mappings(m: Iterable[Mapping[str, str]]) -> None: +def write_unsure_mappings(m: Iterable[Mapping[str, str]], *, path: Optional[Path] = None) -> None: """Write mappings to the unsure mappings file.""" - _write_helper(MAPPINGS_HEADER, m, UNSURE_PATH, mode="w") + _write_helper(MAPPINGS_HEADER, m, path or UNSURE_PATH, mode="w") -def lint_unsure_mappings(*, standardize: bool = False) -> None: +def lint_unsure_mappings(*, standardize: bool = False, path: Optional[Path] = None) -> None: """Lint the unsure mappings file.""" - mappings = load_unsure() + mappings = load_unsure(path=path) mappings = _remove_redundant(mappings, MappingTuple, standardize=standardize) - write_unsure_mappings(sorted(mappings, key=mapping_sort_key)) + write_unsure_mappings(sorted(mappings, key=mapping_sort_key), path=path) PREDICTIONS_PATH = get_resource_file_path("predictions.tsv") @@ -425,7 +445,7 @@ def load_curators(): def filter_predictions(custom_filter: Mapping[str, Mapping[str, Mapping[str, str]]]) -> None: - """Filter all of the predictions by removing what's in the custom filter then re-write. + """Filter all the predictions by removing what's in the custom filter then re-write. :param custom_filter: A filter 3-dictionary of source prefix to target prefix to source identifier to target identifier diff --git a/src/biomappings/resources/mappings.tsv b/src/biomappings/resources/mappings.tsv index d7195c95..8fef413f 100644 --- a/src/biomappings/resources/mappings.tsv +++ b/src/biomappings/resources/mappings.tsv @@ -4313,6 +4313,7 @@ mesh C000637995 Ruminococcus gnavus skos:exactMatch ncit C124373 Ruminococcus gn mesh C000639838 Sphingomonas paucimobilis skos:exactMatch ncit C86749 Sphingomonas paucimobilis semapv:ManualMappingCuration orcid:0000-0003-4423-4370 mesh C000650035 Citrobacter farmeri skos:exactMatch ncit C86264 Citrobacter farmeri semapv:ManualMappingCuration orcid:0000-0003-4423-4370 mesh C000654629 H3B-6545 skos:exactMatch ncit C142980 Selective Estrogen Receptor Covalent Antagonist H3B-6545 semapv:ManualMappingCuration orcid:0000-0003-4423-4370 +mesh C000655084 red meat allergy skos:exactMatch hp HP:0410319 Alpha-gal allergy semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/a80ed2/scripts/import_gilda_mappings.py 0.95 mesh C000655246 ACT-774312 skos:exactMatch ncit C161834 CRTH2 Antagonist ACT-774312 semapv:ManualMappingCuration orcid:0000-0003-4423-4370 mesh C000655292 unicuspid aortic valve skos:exactMatch hp HP:0012561 Unicuspid aortic valve semapv:ManualMappingCuration orcid:0000-0003-4423-4370 mesh C000656407 MSANTD3 protein, human skos:exactMatch uniprot Q96H12 MSANTD3 semapv:ManualMappingCuration orcid:0000-0003-4423-4370 @@ -4335,11 +4336,14 @@ mesh C000706695 ABBV-744 skos:exactMatch ncit C148415 BET Inhibitor ABBV-744 sem mesh C000706947 FGF21 protein, human skos:exactMatch uniprot Q9NSA1 FGF21 semapv:ManualMappingCuration orcid:0000-0001-9439-5346 mesh C000715747 Telangiectasia macularis eruptiva perstans skos:exactMatch hp HP:0007583 Telangiectasia macularis eruptiva perstans semapv:ManualMappingCuration orcid:0000-0001-9439-5346 semapv:LexicalMatching generate_hp_mesh_mappings.py 0.9 mesh C000718787 AA amyloidosis skos:exactMatch hp HP:4000041 AA amyloidosis semapv:ManualMappingCuration orcid:0000-0001-9439-5346 semapv:LexicalMatching generate_hp_mesh_mappings.py 0.9 +mesh C000718810 anti-amphiphysin autoantibody skos:exactMatch hp HP:5000002 Anti-Amphiphysin antibody semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching generate_hp_mesh_mappings.py 0.9 mesh C000718811 Anti-ARHGAP26 antibody skos:exactMatch hp HP:5000003 Anti-ARHGAP26 antibody semapv:ManualMappingCuration orcid:0000-0001-9439-5346 semapv:LexicalMatching generate_hp_mesh_mappings.py 0.9 mesh C000718813 Anti-CARP VIII antibody skos:exactMatch hp HP:5000004 Anti-CARP VIII antibody semapv:ManualMappingCuration orcid:0000-0001-9439-5346 semapv:LexicalMatching generate_hp_mesh_mappings.py 0.9 +mesh C000719029 anti-GFAP autoantibodies skos:exactMatch hp HP:5000013 Anti-GFAP antibody semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching generate_hp_mesh_mappings.py 0.9 mesh C000719042 anti-ryanodine receptor autoantibody skos:exactMatch hp HP:5000047 Anti-ryanodine receptor antibody semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching generate_hp_mesh_mappings.py 0.9 mesh C000719407 Midline brainstem cleft skos:exactMatch hp HP:0033645 Midline brainstem cleft semapv:ManualMappingCuration orcid:0000-0001-9439-5346 semapv:LexicalMatching generate_hp_mesh_mappings.py 0.9 mesh C000719531 misophonia skos:exactMatch hp HP:0025113 Misophonia semapv:ManualMappingCuration orcid:0000-0001-9439-5346 semapv:LexicalMatching generate_hp_mesh_mappings.py 0.9 +mesh C000721288 Cleft eyelid skos:exactMatch hp HP:0000625 Eyelid coloboma semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching generate_hp_mesh_mappings.py 0.9 mesh C000721322 Coarse facial features skos:exactMatch hp HP:0000280 Coarse facial features semapv:ManualMappingCuration orcid:0000-0001-9439-5346 semapv:LexicalMatching generate_hp_mesh_mappings.py 0.9 mesh C000721349 Air crescent sign skos:exactMatch hp HP:0032172 Air crescent sign semapv:ManualMappingCuration orcid:0000-0001-9439-5346 semapv:LexicalMatching generate_hp_mesh_mappings.py 0.9 mesh C000721350 Silhouette sign skos:exactMatch hp HP:0033647 Silhouette sign semapv:ManualMappingCuration orcid:0000-0001-9439-5346 semapv:LexicalMatching generate_hp_mesh_mappings.py 0.9 diff --git a/src/biomappings/resources/predictions.tsv b/src/biomappings/resources/predictions.tsv index d2a94382..5878ea00 100644 --- a/src/biomappings/resources/predictions.tsv +++ b/src/biomappings/resources/predictions.tsv @@ -14130,7 +14130,6 @@ mesh C000655028 CR2 protein, human skos:exactMatch uniprot P20023 CR2 semapv:Lex mesh C000655029 ABHD15 protein, human skos:exactMatch uniprot Q6UXT9 ABHD15 semapv:LexicalMatching 0.999 https://github.com/biomappings/biomappings/blob/a840cf/scripts/generate_mesh_uniprot_mappings.py mesh C000655039 ME3 protein, human skos:exactMatch uniprot Q16798 ME3 semapv:LexicalMatching 0.999 https://github.com/biomappings/biomappings/blob/a840cf/scripts/generate_mesh_uniprot_mappings.py mesh C000655044 BCHE protein, human skos:exactMatch uniprot P06276 BCHE semapv:LexicalMatching 0.999 https://github.com/biomappings/biomappings/blob/a840cf/scripts/generate_mesh_uniprot_mappings.py -mesh C000655084 red meat allergy skos:exactMatch hp HP:0410319 Alpha-gal allergy semapv:LexicalMatching 0.95 https://github.com/biomappings/biomappings/blob/a80ed2/scripts/import_gilda_mappings.py mesh C000655129 MINDY4 protein, human skos:exactMatch uniprot Q4G0A6 MINDY4 semapv:LexicalMatching 0.999 https://github.com/biomappings/biomappings/blob/a840cf/scripts/generate_mesh_uniprot_mappings.py mesh C000655132 SHISA3 protein, human skos:exactMatch uniprot A0PJX4 SHISA3 semapv:LexicalMatching 0.999 https://github.com/biomappings/biomappings/blob/a840cf/scripts/generate_mesh_uniprot_mappings.py mesh C000655134 TXNDC9 protein, human skos:exactMatch uniprot O14530 TXNDC9 semapv:LexicalMatching 0.999 https://github.com/biomappings/biomappings/blob/a840cf/scripts/generate_mesh_uniprot_mappings.py @@ -14291,10 +14290,7 @@ mesh C000707228 MYMX protein, human skos:exactMatch uniprot A0A1B0GTQ4 MYMX sema mesh C000707256 AAMDC protein, human skos:exactMatch uniprot Q9H7C9 AAMDC semapv:LexicalMatching 0.999 https://github.com/biomappings/biomappings/blob/a840cf/scripts/generate_mesh_uniprot_mappings.py mesh C000707475 HTR2A protein, human skos:exactMatch uniprot P28223 HTR2A semapv:LexicalMatching 0.999 https://github.com/biomappings/biomappings/blob/a840cf/scripts/generate_mesh_uniprot_mappings.py mesh C000707567 CG428 skos:exactMatch ncit C125063 Botanical Lotion CG428 semapv:LexicalMatching 0.95 https://github.com/biomappings/biomappings/blob/a80ed2/scripts/import_gilda_mappings.py -mesh C000718810 anti-amphiphysin autoantibody skos:exactMatch hp HP:5000002 Anti-Amphiphysin antibody semapv:LexicalMatching 0.9 generate_hp_mesh_mappings.py -mesh C000719029 anti-GFAP autoantibodies skos:exactMatch hp HP:5000013 Anti-GFAP antibody semapv:LexicalMatching 0.9 generate_hp_mesh_mappings.py mesh C000721268 Isolated left subclavian artery skos:exactMatch hp HP:0031633 Isolation of the left subclavian artery semapv:LexicalMatching 0.9 generate_hp_mesh_mappings.py -mesh C000721288 Cleft eyelid skos:exactMatch hp HP:0000625 Eyelid coloboma semapv:LexicalMatching 0.9 generate_hp_mesh_mappings.py mesh C000721289 Malar hypoplasia skos:exactMatch hp HP:0000272 Malar flattening semapv:LexicalMatching 0.9 generate_hp_mesh_mappings.py mesh C000721389 tram track sign optic nerve skos:exactMatch hp HP:0032270 Optic nerve tram-track sign semapv:LexicalMatching 0.9 generate_hp_mesh_mappings.py mesh C000847 daunorubicinol skos:exactMatch ncit C1062 Duborimycin semapv:LexicalMatching 0.95 https://github.com/biomappings/biomappings/blob/a80ed2/scripts/import_gilda_mappings.py diff --git a/src/biomappings/templates/summary.html b/src/biomappings/templates/summary.html index bdf99cfc..6966f626 100644 --- a/src/biomappings/templates/summary.html +++ b/src/biomappings/templates/summary.html @@ -10,7 +10,7 @@ {{ util.render_messages(dismissible=True, container=False) }}
-
Biomappings Su
+
Biomappings Summary

Summary text.

diff --git a/src/biomappings/utils.py b/src/biomappings/utils.py index b529e79b..c5b34243 100644 --- a/src/biomappings/utils.py +++ b/src/biomappings/utils.py @@ -4,16 +4,18 @@ import os import re +from pathlib import Path from subprocess import CalledProcessError, check_output # noqa: S404 from typing import Any, Mapping, Optional, Tuple import bioregistry -HERE = os.path.dirname(os.path.abspath(__file__)) -RESOURCE_PATH = os.path.abspath(os.path.join(HERE, "resources")) -DOCS = os.path.abspath(os.path.join(HERE, os.pardir, os.pardir, "docs")) -IMG = os.path.join(DOCS, "img") -DATA = os.path.join(DOCS, "_data") +HERE = Path(__file__).parent.resolve() +ROOT = HERE.parent.parent.resolve() +RESOURCE_PATH = HERE.joinpath("resources") +DOCS = ROOT.joinpath("docs") +IMG = DOCS.joinpath("img") +DATA = DOCS.joinpath("_data") OVERRIDE_MIRIAM = { # ITO is very messy (combines mostly numbers with a few diff --git a/src/biomappings/wsgi.py b/src/biomappings/wsgi.py index 969a64a8..3d93eb4f 100644 --- a/src/biomappings/wsgi.py +++ b/src/biomappings/wsgi.py @@ -100,6 +100,9 @@ def url_for_state(endpoint, state: State, **kwargs) -> str: def get_app( target_curies: Optional[Iterable[Tuple[str, str]]] = None, predictions_path: Optional[Path] = None, + positives_path: Optional[Path] = None, + negatives_path: Optional[Path] = None, + unsure_path: Optional[Path] = None, ) -> flask.Flask: """Get a curation flask app.""" app_ = flask.Flask(__name__) @@ -107,7 +110,13 @@ def get_app( app_.config["SECRET_KEY"] = os.urandom(8) app_.config["SHOW_RELATIONS"] = True app_.config["SHOW_LINES"] = False - controller = Controller(target_curies=target_curies, predictions_path=predictions_path) + controller = Controller( + target_curies=target_curies, + predictions_path=predictions_path, + positives_path=positives_path, + negatives_path=negatives_path, + unsure_path=unsure_path, + ) app_.config["controller"] = controller flask_bootstrap.Bootstrap4(app_) app_.register_blueprint(blueprint) @@ -137,6 +146,9 @@ def __init__( *, target_curies: Optional[Iterable[Tuple[str, str]]] = None, predictions_path: Optional[Path] = None, + positives_path: Optional[Path] = None, + negatives_path: Optional[Path] = None, + unsure_path: Optional[Path] = None, ): """Instantiate the web controller. @@ -144,9 +156,17 @@ def __init__( of curation. If this is given, pre-filters will be made before on predictions to only show ones where either the source or target appears in this set :param predictions_path: A custom predictions file to curate from + :param positives_path: A custom positives file to curate to + :param negatives_path: A custom negatives file to curate to + :param unsure_path: A custom unsure file to curate to """ self.predictions_path = predictions_path self._predictions = load_predictions(path=self.predictions_path) + + self.positives_path = positives_path + self.negatives_path = negatives_path + self.unsure_path = unsure_path + self._marked: Dict[int, str] = {} self.total_curated = 0 self._added_mappings: List[Dict[str, Union[None, str, float]]] = [] @@ -442,14 +462,14 @@ def persist(self): prediction["type"] = "semapv:ManualMappingCuration" entries[value].append(prediction) - append_true_mappings(entries["correct"]) - append_false_mappings(entries["incorrect"]) - append_unsure_mappings(entries["unsure"]) + append_true_mappings(entries["correct"], path=self.positives_path) + append_false_mappings(entries["incorrect"], path=self.negatives_path) + append_unsure_mappings(entries["unsure"], path=self.unsure_path) write_predictions(self._predictions, path=self.predictions_path) self._marked.clear() # Now add manually curated mappings - append_true_mappings(self._added_mappings) + append_true_mappings(self._added_mappings, path=self.positives_path) self._added_mappings = []