Skip to content

Commit

Permalink
Add network visualization to concept page
Browse files Browse the repository at this point in the history
  • Loading branch information
cthoyt committed Oct 17, 2023
1 parent 3344440 commit d6dbdd6
Show file tree
Hide file tree
Showing 7 changed files with 156 additions and 23 deletions.
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ web = [
"flask",
"bootstrap_flask",
"neo4j",
"biomappings",
]


Expand Down
10 changes: 8 additions & 2 deletions src/semra/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -469,13 +469,19 @@ def validate_mappings(mappings: list[Mapping]) -> None:
raise ValueError(f"invalid object prefix: {mapping}.")
if not bioregistry.is_valid_identifier(mapping.s.prefix, mapping.s.identifier):
raise ValueError(
f"Invalid mapping subject.\n\nMapping:{mapping}.\n\nSubject: {mapping.s}\n\nUse regex {bioregistry.get_pattern(mapping.s.prefix)}"
f"Invalid mapping subject."
f"\n\nMapping:{mapping}."
f"\n\nSubject: {mapping.s}"
f"\n\nUse regex {bioregistry.get_pattern(mapping.s.prefix)}"
)
if ":" in mapping.s.identifier:
raise ValueError(f"banana in mapping subject: {mapping}")
if not bioregistry.is_valid_identifier(mapping.o.prefix, mapping.o.identifier):
raise ValueError(
f"Invalid mapping object.\n\nMapping:{mapping}.\n\nObject: {mapping.o}\n\nUse regex {bioregistry.get_pattern(mapping.o.prefix)}"
f"Invalid mapping object."
f"\n\nMapping:{mapping}."
f"\n\nObject: {mapping.o}"
f"\n\nUse regex {bioregistry.get_pattern(mapping.o.prefix)}"
)
if ":" in mapping.o.identifier:
raise ValueError(f"banana in mapping object: {mapping}")
Expand Down
34 changes: 33 additions & 1 deletion src/semra/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
from typing import Any, TypeAlias

import neo4j
import neo4j.graph
import networkx as nx
import pydantic
from neo4j import Transaction, unit_of_work

Expand Down Expand Up @@ -217,13 +219,43 @@ def summarize_authors(self) -> Counter:
return Counter(dict(self.read_query(query)))

def get_highest_exact_matches(self, limit: int = 10) -> Counter:
query = "MATCH (a)-[:`skos:exactMatch`]-(b) WHERE a.priority RETURN a.curie, count(distinct b) as c ORDER BY c DESCENDING LIMIT $limit"
query = """\
MATCH (a)-[:`skos:exactMatch`]-(b)
WHERE a.priority RETURN a.curie, count(distinct b) as c
ORDER BY c DESCENDING
LIMIT $limit
"""
return Counter(dict(self.read_query(query, limit=limit)))

def get_exact_matches(self, curie: str) -> dict[Reference, str]:
query = "MATCH (a {curie: $curie})-[:`skos:exactMatch`]-(b) RETURN b"
return {Reference.from_curie(node["curie"]): node["name"] for node, in self.read_query(query, curie=curie)}

def get_connected_component(self, curie: str) -> tuple[list[neo4j.graph.Node], list[neo4j.graph.Relationship]]:
query = """\
MATCH (:concept {curie: $curie})-[r *..3 {hasPrimary: true}]-(n:concept)
RETURN collect(DISTINCT n) AS nodes, collect(DISTINCT r) AS relations
"""
res = self.read_query(query, curie=curie)
nodes = res[0][0]
relations = list({r for relations in res[0][1] for r in relations})
return nodes, relations

def get_connected_component_graph(self, curie: str) -> nx.MultiDiGraph:
nodes, relations = self.get_connected_component(curie)
g = nx.MultiDiGraph()
for node in nodes:
g.add_node(node["curie"], **node)
for relation in relations:
g.add_edge(
relation.nodes[0]["curie"],
relation.nodes[1]["curie"],
key=relation.element_id,
type=relation.type,
**relation,
)
return g

def get_concept_name(self, curie: str) -> str:
return _get_name_by_curie(curie)

Expand Down
3 changes: 2 additions & 1 deletion src/semra/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,8 @@ class Configuration(BaseModel):
remove_imprecise: bool = True
validate_raw: bool = Field(
default=False,
description="Should the raw mappings be validated against Bioregistry prefixes and local unique identifier regular expressions (when available)?",
description="Should the raw mappings be validated against Bioregistry "
"prefixes and local unique identifier regular expressions (when available)?",
)

raw_pickle_path: Path | None = None
Expand Down
5 changes: 4 additions & 1 deletion src/semra/templates/base.html
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,10 @@
</style>
{% endblock %}

<script src="https://code.jquery.com/jquery-3.3.1.slim.min.js"
integrity="sha384-q8i/X+965DzO0rT7abK41JStQIAqVgRVzpbzo5smXKp4YfRvH+8abtTE1Pi6jizo"
crossorigin="anonymous"></script>
<script src="https://kit.fontawesome.com/4c86883252.js" crossorigin="anonymous"></script>

<title>{% block title %}{% endblock %}</title>
{% endblock %}
</head>
Expand All @@ -46,5 +48,6 @@
</p>
</footer>

{% block scripts %}{% endblock %}
</body>
</html>
70 changes: 70 additions & 0 deletions src/semra/templates/concept.html
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,75 @@

{% block title %}SeMRA{% endblock %}

{% block head %}
{{ super() }}
<style>
#cy {
width: 100%;
height: 500px;
display: block;
}
</style>
<script src="https://code.jquery.com/jquery-3.1.1.min.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/cytoscape/3.26.0/cytoscape.min.js"></script>
{% endblock %}

{% block scripts %}
<script>
let edgeLabel = {
'oboinowl:hasDbXref': 'xref',
'skos:exactMatch': 'exact'
};
$.getJSON("/api/cytoscape/{{ curie }}", function (data) {
// console.log(data);
var cy = cytoscape({
container: document.getElementById('cy'),
elements: data,
style: [
{
selector: 'node',
style: {
'label': function (node) {
return `${node.data("name")}\n${node.data("curie")})`
},
'width': '30px',
'height': '30px',
'color': 'blue',
'background-fit': 'contain',
'background-clip': 'none',
'text-wrap': 'wrap'
}
},
{
selector: 'edge',
style: {
'label': function (edge) {
return `${edgeLabel[edge.data("type")]}`
},
"curve-style": "bezier",
'text-background-color': 'yellow',
'text-background-opacity': 0.4,
'width': '4px',
'target-arrow-shape': 'triangle',
'control-point-step-size': '140px',
autorotate: true
}
}
],
layout: {
name: 'cose',
animate: true,
padding: 100,
}
});

var layout = cy.layout({ name: 'cose' });
layout.run();

layout.run();
});
</script>
{% endblock %}

{% block content %}
<div class="container" style="margin-top: 50px; margin-bottom: 50px">
Expand Down Expand Up @@ -49,6 +118,7 @@ <h6>Exact Matches</h6>
{% endfor %}
</tbody>
</table>
<div id="cy"></div>
</div>
</div>
</div>
Expand Down
56 changes: 38 additions & 18 deletions src/semra/wsgi.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@

import fastapi
import flask
import networkx as nx
from curies import Reference
from fastapi import Path
from fastapi.responses import JSONResponse
from flask import Flask, render_template
from flask_bootstrap import Bootstrap5
from starlette.middleware.wsgi import WSGIMiddleware
Expand All @@ -14,25 +16,24 @@
from semra.client import Neo4jClient

try:
import biomappings.utils as biomappings_utils
import biomappings.utils
except ImportError:
biomappings_utils = None
BIOMAPPINGS_GIT_HASH = None
else:
BIOMAPPINGS_GIT_HASH = biomappings.utils.get_git_hash()

client = Neo4jClient()

api_router = fastapi.APIRouter()
api_router = fastapi.APIRouter(prefix="/api")

flask_app = Flask(__name__)
flask_app.secret_key = os.urandom(8)
Bootstrap5(flask_app)

# Could group this in a function later
app = fastapi.FastAPI()
app.include_router(api_router)
api_router.mount("/", WSGIMiddleware(flask_app))

EXAMPLE_CONCEPTS = ["efo:0002142"]
EXAMPLE_MAPPINGS = ["25b67912bc720127a43a06ce4688b672", "5a56bf7ac409d8de84c3382a99e17715"]
BIOMAPPINGS_GIT_HASH = biomappings_utils is not None and biomappings_utils.get_git_hash()


PREDICATE_COUNTER = client.summarize_predicates()
MAPPING_SET_COUNTER = client.summarize_mapping_sets()
Expand All @@ -48,15 +49,15 @@


def _figure_number(n: int):
if n > 1_000_000:
if n > 1_000_000: # noqa:PLR2004
lead = n / 1_000_000
if lead < 10:
if lead < 10: # noqa:PLR2004
return round(lead, 1), "M"
else:
return round(lead), "M"
if n > 1_000:
if n > 1_000: # noqa:PLR2004
lead = n / 1_000
if lead < 10:
if lead < 10: # noqa:PLR2004
return round(lead, 1), "K"
else:
return round(lead), "K"
Expand Down Expand Up @@ -99,7 +100,6 @@ def view_concept(curie: str):
name = client.get_concept_name(curie)
exact_matches = client.get_exact_matches(curie)
# TODO when showing equivalence between two entities from same namespace, suggest curating a replaced by relation

return render_template(
"concept.html",
reference=reference,
Expand Down Expand Up @@ -152,12 +152,22 @@ def view_mapping_set(curie: str):
return render_template("mapping_set.html", mapping_set=m)


@api_router.get("/api/evidence/{curie}", response_model=Evidence)
@api_router.get("/evidence/{curie}", response_model=Evidence)
def get_evidence(curie: str = Path(description="An evidence's MD5 hex digest.")): # noqa:B008
return client.get_evidence(curie)


@api_router.get("/api/mapping/{mapping}", response_model=Mapping)
@api_router.get("/cytoscape/{curie}")
def get_concept_cytoscape(
curie: str = Path(description="the compact URI (CURIE) for a concept", examples=EXAMPLE_CONCEPTS) # noqa:B008
):
"""Get the mapping graph surrounding the concept as a Cytoscape.js JSON object."""
graph = client.get_connected_component_graph(curie)
cytoscape_json = nx.cytoscape_data(graph)["elements"]
return JSONResponse(cytoscape_json)


@api_router.get("/mapping/{mapping}", response_model=Mapping)
def get_mapping(
mapping: str = Path( # noqa:B008
description="A mapping's MD5 hex digest.",
Expand All @@ -167,7 +177,7 @@ def get_mapping(
return client.get_mapping(mapping)


@api_router.get("/api/mapping_set/{mapping_set}", response_model=MappingSet)
@api_router.get("/mapping_set/{mapping_set}", response_model=MappingSet)
def get_mapping_set(
mapping_set: str = Path( # noqa:B008
description="A mapping set's MD5 hex digest.", examples=["7831d5bc95698099fb6471667e5282cd"]
Expand All @@ -176,12 +186,22 @@ def get_mapping_set(
return client.get_mapping_set(mapping_set)


@api_router.get("/api/mapping_set/", response_model=list[MappingSet])
@api_router.get("/mapping_set/", response_model=list[MappingSet])
def get_mapping_sets():
return client.get_mapping_sets()


def get_app():
app = fastapi.FastAPI(
title="Semantic Reasoning Assembler",
description="A web app to access a SeMRA Neo4j database",
)
app.include_router(api_router)
app.mount("/", WSGIMiddleware(flask_app))
return app


if __name__ == "__main__":
import uvicorn

uvicorn.run(api_router, port=5000, host="0.0.0.0") # noqa:S104
uvicorn.run(get_app(), port=5000, host="0.0.0.0") # noqa:S104

0 comments on commit d6dbdd6

Please sign in to comment.