diff --git a/pyproject.toml b/pyproject.toml index 83d0fdc..fb4658f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,6 +43,7 @@ web = [ "flask", "bootstrap_flask", "neo4j", + "biomappings", ] diff --git a/src/semra/api.py b/src/semra/api.py index 2b41fdf..d0921ba 100644 --- a/src/semra/api.py +++ b/src/semra/api.py @@ -469,13 +469,19 @@ def validate_mappings(mappings: list[Mapping]) -> None: raise ValueError(f"invalid object prefix: {mapping}.") if not bioregistry.is_valid_identifier(mapping.s.prefix, mapping.s.identifier): raise ValueError( - f"Invalid mapping subject.\n\nMapping:{mapping}.\n\nSubject: {mapping.s}\n\nUse regex {bioregistry.get_pattern(mapping.s.prefix)}" + f"Invalid mapping subject." + f"\n\nMapping:{mapping}." + f"\n\nSubject: {mapping.s}" + f"\n\nUse regex {bioregistry.get_pattern(mapping.s.prefix)}" ) if ":" in mapping.s.identifier: raise ValueError(f"banana in mapping subject: {mapping}") if not bioregistry.is_valid_identifier(mapping.o.prefix, mapping.o.identifier): raise ValueError( - f"Invalid mapping object.\n\nMapping:{mapping}.\n\nObject: {mapping.o}\n\nUse regex {bioregistry.get_pattern(mapping.o.prefix)}" + f"Invalid mapping object." + f"\n\nMapping:{mapping}." + f"\n\nObject: {mapping.o}" + f"\n\nUse regex {bioregistry.get_pattern(mapping.o.prefix)}" ) if ":" in mapping.o.identifier: raise ValueError(f"banana in mapping object: {mapping}") diff --git a/src/semra/client.py b/src/semra/client.py index d0b76bc..dfffe0b 100644 --- a/src/semra/client.py +++ b/src/semra/client.py @@ -6,6 +6,8 @@ from typing import Any, TypeAlias import neo4j +import neo4j.graph +import networkx as nx import pydantic from neo4j import Transaction, unit_of_work @@ -217,13 +219,43 @@ def summarize_authors(self) -> Counter: return Counter(dict(self.read_query(query))) def get_highest_exact_matches(self, limit: int = 10) -> Counter: - query = "MATCH (a)-[:`skos:exactMatch`]-(b) WHERE a.priority RETURN a.curie, count(distinct b) as c ORDER BY c DESCENDING LIMIT $limit" + query = """\ + MATCH (a)-[:`skos:exactMatch`]-(b) + WHERE a.priority RETURN a.curie, count(distinct b) as c + ORDER BY c DESCENDING + LIMIT $limit + """ return Counter(dict(self.read_query(query, limit=limit))) def get_exact_matches(self, curie: str) -> dict[Reference, str]: query = "MATCH (a {curie: $curie})-[:`skos:exactMatch`]-(b) RETURN b" return {Reference.from_curie(node["curie"]): node["name"] for node, in self.read_query(query, curie=curie)} + def get_connected_component(self, curie: str) -> tuple[list[neo4j.graph.Node], list[neo4j.graph.Relationship]]: + query = """\ + MATCH (:concept {curie: $curie})-[r *..3 {hasPrimary: true}]-(n:concept) + RETURN collect(DISTINCT n) AS nodes, collect(DISTINCT r) AS relations + """ + res = self.read_query(query, curie=curie) + nodes = res[0][0] + relations = list({r for relations in res[0][1] for r in relations}) + return nodes, relations + + def get_connected_component_graph(self, curie: str) -> nx.MultiDiGraph: + nodes, relations = self.get_connected_component(curie) + g = nx.MultiDiGraph() + for node in nodes: + g.add_node(node["curie"], **node) + for relation in relations: + g.add_edge( + relation.nodes[0]["curie"], + relation.nodes[1]["curie"], + key=relation.element_id, + type=relation.type, + **relation, + ) + return g + def get_concept_name(self, curie: str) -> str: return _get_name_by_curie(curie) diff --git a/src/semra/pipeline.py b/src/semra/pipeline.py index a8c1c61..54fe00b 100644 --- a/src/semra/pipeline.py +++ b/src/semra/pipeline.py @@ -94,7 +94,8 @@ class Configuration(BaseModel): remove_imprecise: bool = True validate_raw: bool = Field( default=False, - description="Should the raw mappings be validated against Bioregistry prefixes and local unique identifier regular expressions (when available)?", + description="Should the raw mappings be validated against Bioregistry " + "prefixes and local unique identifier regular expressions (when available)?", ) raw_pickle_path: Path | None = None diff --git a/src/semra/templates/base.html b/src/semra/templates/base.html index 8aef008..a4ca6c7 100644 --- a/src/semra/templates/base.html +++ b/src/semra/templates/base.html @@ -29,8 +29,10 @@ {% endblock %} + - {% block title %}{% endblock %} {% endblock %} @@ -46,5 +48,6 @@

+{% block scripts %}{% endblock %} diff --git a/src/semra/templates/concept.html b/src/semra/templates/concept.html index 274f55f..c9416fc 100644 --- a/src/semra/templates/concept.html +++ b/src/semra/templates/concept.html @@ -4,6 +4,75 @@ {% block title %}SeMRA{% endblock %} +{% block head %} +{{ super() }} + + + +{% endblock %} + +{% block scripts %} + +{% endblock %} {% block content %}
@@ -49,6 +118,7 @@
Exact Matches
{% endfor %} +
diff --git a/src/semra/wsgi.py b/src/semra/wsgi.py index 7d61c03..f34a8ef 100644 --- a/src/semra/wsgi.py +++ b/src/semra/wsgi.py @@ -4,8 +4,10 @@ import fastapi import flask +import networkx as nx from curies import Reference from fastapi import Path +from fastapi.responses import JSONResponse from flask import Flask, render_template from flask_bootstrap import Bootstrap5 from starlette.middleware.wsgi import WSGIMiddleware @@ -14,25 +16,24 @@ from semra.client import Neo4jClient try: - import biomappings.utils as biomappings_utils + import biomappings.utils except ImportError: - biomappings_utils = None + BIOMAPPINGS_GIT_HASH = None +else: + BIOMAPPINGS_GIT_HASH = biomappings.utils.get_git_hash() client = Neo4jClient() -api_router = fastapi.APIRouter() +api_router = fastapi.APIRouter(prefix="/api") flask_app = Flask(__name__) flask_app.secret_key = os.urandom(8) Bootstrap5(flask_app) -# Could group this in a function later -app = fastapi.FastAPI() -app.include_router(api_router) -api_router.mount("/", WSGIMiddleware(flask_app)) +EXAMPLE_CONCEPTS = ["efo:0002142"] EXAMPLE_MAPPINGS = ["25b67912bc720127a43a06ce4688b672", "5a56bf7ac409d8de84c3382a99e17715"] -BIOMAPPINGS_GIT_HASH = biomappings_utils is not None and biomappings_utils.get_git_hash() + PREDICATE_COUNTER = client.summarize_predicates() MAPPING_SET_COUNTER = client.summarize_mapping_sets() @@ -48,15 +49,15 @@ def _figure_number(n: int): - if n > 1_000_000: + if n > 1_000_000: # noqa:PLR2004 lead = n / 1_000_000 - if lead < 10: + if lead < 10: # noqa:PLR2004 return round(lead, 1), "M" else: return round(lead), "M" - if n > 1_000: + if n > 1_000: # noqa:PLR2004 lead = n / 1_000 - if lead < 10: + if lead < 10: # noqa:PLR2004 return round(lead, 1), "K" else: return round(lead), "K" @@ -99,7 +100,6 @@ def view_concept(curie: str): name = client.get_concept_name(curie) exact_matches = client.get_exact_matches(curie) # TODO when showing equivalence between two entities from same namespace, suggest curating a replaced by relation - return render_template( "concept.html", reference=reference, @@ -152,12 +152,22 @@ def view_mapping_set(curie: str): return render_template("mapping_set.html", mapping_set=m) -@api_router.get("/api/evidence/{curie}", response_model=Evidence) +@api_router.get("/evidence/{curie}", response_model=Evidence) def get_evidence(curie: str = Path(description="An evidence's MD5 hex digest.")): # noqa:B008 return client.get_evidence(curie) -@api_router.get("/api/mapping/{mapping}", response_model=Mapping) +@api_router.get("/cytoscape/{curie}") +def get_concept_cytoscape( + curie: str = Path(description="the compact URI (CURIE) for a concept", examples=EXAMPLE_CONCEPTS) # noqa:B008 +): + """Get the mapping graph surrounding the concept as a Cytoscape.js JSON object.""" + graph = client.get_connected_component_graph(curie) + cytoscape_json = nx.cytoscape_data(graph)["elements"] + return JSONResponse(cytoscape_json) + + +@api_router.get("/mapping/{mapping}", response_model=Mapping) def get_mapping( mapping: str = Path( # noqa:B008 description="A mapping's MD5 hex digest.", @@ -167,7 +177,7 @@ def get_mapping( return client.get_mapping(mapping) -@api_router.get("/api/mapping_set/{mapping_set}", response_model=MappingSet) +@api_router.get("/mapping_set/{mapping_set}", response_model=MappingSet) def get_mapping_set( mapping_set: str = Path( # noqa:B008 description="A mapping set's MD5 hex digest.", examples=["7831d5bc95698099fb6471667e5282cd"] @@ -176,12 +186,22 @@ def get_mapping_set( return client.get_mapping_set(mapping_set) -@api_router.get("/api/mapping_set/", response_model=list[MappingSet]) +@api_router.get("/mapping_set/", response_model=list[MappingSet]) def get_mapping_sets(): return client.get_mapping_sets() +def get_app(): + app = fastapi.FastAPI( + title="Semantic Reasoning Assembler", + description="A web app to access a SeMRA Neo4j database", + ) + app.include_router(api_router) + app.mount("/", WSGIMiddleware(flask_app)) + return app + + if __name__ == "__main__": import uvicorn - uvicorn.run(api_router, port=5000, host="0.0.0.0") # noqa:S104 + uvicorn.run(get_app(), port=5000, host="0.0.0.0") # noqa:S104