Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix query over connected components #20

Merged
merged 6 commits into from
Apr 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 27 additions & 15 deletions src/semra/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,7 @@ def get_exact_matches(self, curie: ReferenceHint, *, max_distance: t.Optional[in

def get_connected_component(
self, curie: ReferenceHint, max_distance: t.Optional[int] = None
) -> tuple[list[neo4j.graph.Node], list[neo4j.graph.Relationship]]:
) -> tuple[list[neo4j.graph.Node], list[neo4j.graph.Path]]:
"""Get the nodes and relations in the connected component of mappings around the given CURIE.

:param curie: A CURIE string or reference
Expand All @@ -294,14 +294,26 @@ def get_connected_component(
curie = curie.curie
if max_distance is None:
max_distance = DEFAULT_MAX_LENGTH
query = f"""\

connected_query = f"""\
MATCH (:concept {{curie: $curie}})-[r:{self._rel_q} *..{max_distance}]-(n:concept)
WHERE ALL(p IN r WHERE p.primary or p.secondary)
RETURN collect(DISTINCT n) AS nodes, collect(DISTINCT r) AS relations
RETURN DISTINCT n
UNION ALL
MATCH (n:concept {{curie: $curie}})
RETURN n
"""
res = self.read_query(query, curie=curie)
nodes = res[0][0]
relations = sorted({r for relations in res[0][1] for r in relations}, key=lambda r: r.type)
nodes = [n[0] for n in self.read_query(connected_query, curie=curie)]

component_curies = {node["curie"] for node in nodes}
# component_curies.add(curie)

edge_query = """\
MATCH p=(a:concept)-[r]->(b:concept)
WHERE a.curie in $curies and b.curie in $curies and (r.primary or r.secondary)
RETURN p
"""
relations = [r[0] for r in self.read_query(edge_query, curies=sorted(component_curies))]
return nodes, relations

def get_connected_component_graph(self, curie: ReferenceHint) -> nx.MultiDiGraph:
Expand All @@ -310,18 +322,18 @@ def get_connected_component_graph(self, curie: ReferenceHint) -> nx.MultiDiGraph
:param curie: A CURIE string or reference
:returns: A networkx MultiDiGraph where mappings subject CURIE strings are th
"""
nodes, relations = self.get_connected_component(curie)
nodes, paths = self.get_connected_component(curie)
g = nx.MultiDiGraph()
for node in nodes:
g.add_node(node["curie"], **node)
for relation in relations:
g.add_edge(
relation.nodes[0]["curie"], # type: ignore
relation.nodes[1]["curie"], # type: ignore
key=relation.element_id,
type=relation.type,
**relation,
)
for path in paths:
for relationship in path.relationships:
g.add_edge(
path.start_node["curie"], # type: ignore
path.end_node["curie"], # type: ignore
key=relationship.id,
type=relationship.type,
)
return g

def get_concept_name(self, curie: ReferenceHint) -> str | None:
Expand Down
26 changes: 21 additions & 5 deletions src/semra/templates/concept.html
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,14 @@
<script src="https://cytoscape.org/cytoscape.js-cola/cytoscape-cola.js"></script>
{% endblock %}

{% macro bioregistry_href(ref) %}
{% if ref.prefix == "clo" %}
https://bioregistry.io/{{ ref.curie }}?provider=ols
{% else %}
https://bioregistry.io/{{ ref.curie }}
{% endif %}
{% endmacro %}

{% block scripts %}
<script>
let edgeLabel = {
Expand All @@ -35,7 +43,11 @@
selector: 'node',
style: {
'label': function (node) {
return `${node.data("name")}\n${node.data("curie")}`
if (node.data("name") === node.data("curie")) {
return node.data("curie")
} else {
return `${node.data("name")}\n${node.data("curie")}`
}
},
'width': '30px',
'height': '30px',
Expand Down Expand Up @@ -63,7 +75,9 @@
],
layout: {
name: 'cola',
nodeSpacing: function( node ){ return 75; },
nodeSpacing: function (node) {
return 75;
},
// nodeDimensionsIncludeLabels: true
}
});
Expand All @@ -78,9 +92,11 @@
<div class="card">
<div class="card-body">
<h5 class="card-title">
{{ name }} <a class="badge badge-info" href="https://bioregistry.io/{{ curie }}"><code>{{ curie }}</code></a>
{{ name }}
<a class="badge bg-info" href="https://bioregistry.io/{{ curie }}"><code>{{ curie }}</code></a>
</h5>
<h6>Exact Matches</h6>
<p>These exact matches are inferred, potentially using promotion of database cross-references.</p>
</div>
<table class="table table-striped table-borderless">
<tbody>
Expand All @@ -90,13 +106,13 @@ <h6>Exact Matches</h6>
<code>{{ exact_match.curie }}</code>
</td>
<td>
{{ name }}
{% if name %}{{ name }}{% endif %}
</td>
<td>
<a href="{{ url_for('view_concept', curie=exact_match.curie) }}">SeMRA</a>
</td>
<td>
<a href="https://bioregistry.io/{{ exact_match.curie }}">Bioregistry</a>
<a href="{{ bioregistry_href(exact_match) }}">Bioregistry</a>
</td>
{% if has_biomappings %}
<td>
Expand Down
Loading