Skip to content

Commit

Permalink
Fix query over connected components (#20)
Browse files Browse the repository at this point in the history
Closes #19

Before, the method for identifying a connected component in a single
query was not working properly. This PR makes it two steps that 1)
identify all nodes in a connected component, then 2) get all pairwise
relations between nodes in the connected component.

http://0.0.0.0:5000/concept/cellosaurus:0347 now gives:

<img width="424" alt="Screenshot 2024-04-17 at 16 03 24"
src="https://github.com/biopragmatics/semra/assets/5069736/def3ca11-cc0c-4702-b2a1-d0dbd5b604cc">

This PR also adds minor UI improvements for displaying names and making
links to OLS
  • Loading branch information
cthoyt authored Apr 17, 2024
1 parent 57296ea commit b004e10
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 20 deletions.
42 changes: 27 additions & 15 deletions src/semra/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,7 @@ def get_exact_matches(self, curie: ReferenceHint, *, max_distance: t.Optional[in

def get_connected_component(
self, curie: ReferenceHint, max_distance: t.Optional[int] = None
) -> tuple[list[neo4j.graph.Node], list[neo4j.graph.Relationship]]:
) -> tuple[list[neo4j.graph.Node], list[neo4j.graph.Path]]:
"""Get the nodes and relations in the connected component of mappings around the given CURIE.
:param curie: A CURIE string or reference
Expand All @@ -294,14 +294,26 @@ def get_connected_component(
curie = curie.curie
if max_distance is None:
max_distance = DEFAULT_MAX_LENGTH
query = f"""\

connected_query = f"""\
MATCH (:concept {{curie: $curie}})-[r:{self._rel_q} *..{max_distance}]-(n:concept)
WHERE ALL(p IN r WHERE p.primary or p.secondary)
RETURN collect(DISTINCT n) AS nodes, collect(DISTINCT r) AS relations
RETURN DISTINCT n
UNION ALL
MATCH (n:concept {{curie: $curie}})
RETURN n
"""
res = self.read_query(query, curie=curie)
nodes = res[0][0]
relations = sorted({r for relations in res[0][1] for r in relations}, key=lambda r: r.type)
nodes = [n[0] for n in self.read_query(connected_query, curie=curie)]

component_curies = {node["curie"] for node in nodes}
# component_curies.add(curie)

edge_query = """\
MATCH p=(a:concept)-[r]->(b:concept)
WHERE a.curie in $curies and b.curie in $curies and (r.primary or r.secondary)
RETURN p
"""
relations = [r[0] for r in self.read_query(edge_query, curies=sorted(component_curies))]
return nodes, relations

def get_connected_component_graph(self, curie: ReferenceHint) -> nx.MultiDiGraph:
Expand All @@ -310,18 +322,18 @@ def get_connected_component_graph(self, curie: ReferenceHint) -> nx.MultiDiGraph
:param curie: A CURIE string or reference
:returns: A networkx MultiDiGraph where mappings subject CURIE strings are th
"""
nodes, relations = self.get_connected_component(curie)
nodes, paths = self.get_connected_component(curie)
g = nx.MultiDiGraph()
for node in nodes:
g.add_node(node["curie"], **node)
for relation in relations:
g.add_edge(
relation.nodes[0]["curie"], # type: ignore
relation.nodes[1]["curie"], # type: ignore
key=relation.element_id,
type=relation.type,
**relation,
)
for path in paths:
for relationship in path.relationships:
g.add_edge(
path.start_node["curie"], # type: ignore
path.end_node["curie"], # type: ignore
key=relationship.id,
type=relationship.type,
)
return g

def get_concept_name(self, curie: ReferenceHint) -> str | None:
Expand Down
26 changes: 21 additions & 5 deletions src/semra/templates/concept.html
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,14 @@
<script src="https://cytoscape.org/cytoscape.js-cola/cytoscape-cola.js"></script>
{% endblock %}

{% macro bioregistry_href(ref) %}
{% if ref.prefix == "clo" %}
https://bioregistry.io/{{ ref.curie }}?provider=ols
{% else %}
https://bioregistry.io/{{ ref.curie }}
{% endif %}
{% endmacro %}

{% block scripts %}
<script>
let edgeLabel = {
Expand All @@ -35,7 +43,11 @@
selector: 'node',
style: {
'label': function (node) {
return `${node.data("name")}\n${node.data("curie")}`
if (node.data("name") === node.data("curie")) {
return node.data("curie")
} else {
return `${node.data("name")}\n${node.data("curie")}`
}
},
'width': '30px',
'height': '30px',
Expand Down Expand Up @@ -63,7 +75,9 @@
],
layout: {
name: 'cola',
nodeSpacing: function( node ){ return 75; },
nodeSpacing: function (node) {
return 75;
},
// nodeDimensionsIncludeLabels: true
}
});
Expand All @@ -78,9 +92,11 @@
<div class="card">
<div class="card-body">
<h5 class="card-title">
{{ name }} <a class="badge badge-info" href="https://bioregistry.io/{{ curie }}"><code>{{ curie }}</code></a>
{{ name }}
<a class="badge bg-info" href="https://bioregistry.io/{{ curie }}"><code>{{ curie }}</code></a>
</h5>
<h6>Exact Matches</h6>
<p>These exact matches are inferred, potentially using promotion of database cross-references.</p>
</div>
<table class="table table-striped table-borderless">
<tbody>
Expand All @@ -90,13 +106,13 @@ <h6>Exact Matches</h6>
<code>{{ exact_match.curie }}</code>
</td>
<td>
{{ name }}
{% if name %}{{ name }}{% endif %}
</td>
<td>
<a href="{{ url_for('view_concept', curie=exact_match.curie) }}">SeMRA</a>
</td>
<td>
<a href="https://bioregistry.io/{{ exact_match.curie }}">Bioregistry</a>
<a href="{{ bioregistry_href(exact_match) }}">Bioregistry</a>
</td>
{% if has_biomappings %}
<td>
Expand Down

0 comments on commit b004e10

Please sign in to comment.