diff --git a/pyproject.toml b/pyproject.toml index 83d0fdc..fb4658f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,6 +43,7 @@ web = [ "flask", "bootstrap_flask", "neo4j", + "biomappings", ] diff --git a/src/semra/api.py b/src/semra/api.py index 2b41fdf..d0921ba 100644 --- a/src/semra/api.py +++ b/src/semra/api.py @@ -469,13 +469,19 @@ def validate_mappings(mappings: list[Mapping]) -> None: raise ValueError(f"invalid object prefix: {mapping}.") if not bioregistry.is_valid_identifier(mapping.s.prefix, mapping.s.identifier): raise ValueError( - f"Invalid mapping subject.\n\nMapping:{mapping}.\n\nSubject: {mapping.s}\n\nUse regex {bioregistry.get_pattern(mapping.s.prefix)}" + f"Invalid mapping subject." + f"\n\nMapping:{mapping}." + f"\n\nSubject: {mapping.s}" + f"\n\nUse regex {bioregistry.get_pattern(mapping.s.prefix)}" ) if ":" in mapping.s.identifier: raise ValueError(f"banana in mapping subject: {mapping}") if not bioregistry.is_valid_identifier(mapping.o.prefix, mapping.o.identifier): raise ValueError( - f"Invalid mapping object.\n\nMapping:{mapping}.\n\nObject: {mapping.o}\n\nUse regex {bioregistry.get_pattern(mapping.o.prefix)}" + f"Invalid mapping object." + f"\n\nMapping:{mapping}." + f"\n\nObject: {mapping.o}" + f"\n\nUse regex {bioregistry.get_pattern(mapping.o.prefix)}" ) if ":" in mapping.o.identifier: raise ValueError(f"banana in mapping object: {mapping}") diff --git a/src/semra/client.py b/src/semra/client.py index d0b76bc..dfffe0b 100644 --- a/src/semra/client.py +++ b/src/semra/client.py @@ -6,6 +6,8 @@ from typing import Any, TypeAlias import neo4j +import neo4j.graph +import networkx as nx import pydantic from neo4j import Transaction, unit_of_work @@ -217,13 +219,43 @@ def summarize_authors(self) -> Counter: return Counter(dict(self.read_query(query))) def get_highest_exact_matches(self, limit: int = 10) -> Counter: - query = "MATCH (a)-[:`skos:exactMatch`]-(b) WHERE a.priority RETURN a.curie, count(distinct b) as c ORDER BY c DESCENDING LIMIT $limit" + query = """\ + MATCH (a)-[:`skos:exactMatch`]-(b) + WHERE a.priority RETURN a.curie, count(distinct b) as c + ORDER BY c DESCENDING + LIMIT $limit + """ return Counter(dict(self.read_query(query, limit=limit))) def get_exact_matches(self, curie: str) -> dict[Reference, str]: query = "MATCH (a {curie: $curie})-[:`skos:exactMatch`]-(b) RETURN b" return {Reference.from_curie(node["curie"]): node["name"] for node, in self.read_query(query, curie=curie)} + def get_connected_component(self, curie: str) -> tuple[list[neo4j.graph.Node], list[neo4j.graph.Relationship]]: + query = """\ + MATCH (:concept {curie: $curie})-[r *..3 {hasPrimary: true}]-(n:concept) + RETURN collect(DISTINCT n) AS nodes, collect(DISTINCT r) AS relations + """ + res = self.read_query(query, curie=curie) + nodes = res[0][0] + relations = list({r for relations in res[0][1] for r in relations}) + return nodes, relations + + def get_connected_component_graph(self, curie: str) -> nx.MultiDiGraph: + nodes, relations = self.get_connected_component(curie) + g = nx.MultiDiGraph() + for node in nodes: + g.add_node(node["curie"], **node) + for relation in relations: + g.add_edge( + relation.nodes[0]["curie"], + relation.nodes[1]["curie"], + key=relation.element_id, + type=relation.type, + **relation, + ) + return g + def get_concept_name(self, curie: str) -> str: return _get_name_by_curie(curie) diff --git a/src/semra/pipeline.py b/src/semra/pipeline.py index a8c1c61..54fe00b 100644 --- a/src/semra/pipeline.py +++ b/src/semra/pipeline.py @@ -94,7 +94,8 @@ class Configuration(BaseModel): remove_imprecise: bool = True validate_raw: bool = Field( default=False, - description="Should the raw mappings be validated against Bioregistry prefixes and local unique identifier regular expressions (when available)?", + description="Should the raw mappings be validated against Bioregistry " + "prefixes and local unique identifier regular expressions (when available)?", ) raw_pickle_path: Path | None = None diff --git a/src/semra/templates/base.html b/src/semra/templates/base.html index 8aef008..a4ca6c7 100644 --- a/src/semra/templates/base.html +++ b/src/semra/templates/base.html @@ -29,8 +29,10 @@ {% endblock %} + -