From f8808c8a1a037bfa8dae104746f5cd548afc336d Mon Sep 17 00:00:00 2001
From: nanglo123 <tnanglo1998@gmail.com>
Date: Fri, 12 Jul 2024 09:07:51 -0400
Subject: [PATCH 01/21] Initial implementation of resource adding endpoint

---
 mira/dkg/api.py | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/mira/dkg/api.py b/mira/dkg/api.py
index c109beb1..36709a43 100644
--- a/mira/dkg/api.py
+++ b/mira/dkg/api.py
@@ -13,6 +13,7 @@
 
 from mira.dkg.client import AskemEntity, Entity, Relation
 from mira.dkg.utils import DKG_REFINER_RELS
+from mira.dkg.construct import process_resource
 
 __all__ = [
     "api_blueprint",
@@ -360,6 +361,31 @@ def add_relations(
             request.app.state.client.add_relation(relation)
 
 
+    @api_blueprint.post(
+        "/add_resources",
+        response_model=None,
+        tags=["relations"],
+    )
+    def add_resources(
+        request: Request,
+        resource_list: List[str]
+    ):
+        for resource in resource_list:
+            # nodes and edges will be a list of dicts
+            nodes, edges = process_resource(resource)
+
+            # node_info and edge_info are dictionaries that will be
+            # unpacked when creating instances of entities and relations
+            entities = [Entity(**node_info) for node_info in nodes]
+            relations = [Relation(**edge_info) for edge_info in edges]
+
+            for entity in entities:
+                request.app.state.client.add_node(entity)
+            for relation in relations:
+                request.app.state.client.add_relation(relation)
+
+
+
 class IsOntChildResult(BaseModel):
     """Result of a query to /is_ontological_child"""
 

From 88bc327d04a23a77538a670cbf74e02d1d9e2fce Mon Sep 17 00:00:00 2001
From: nanglo123 <tnanglo1998@gmail.com>
Date: Fri, 12 Jul 2024 17:54:48 -0400
Subject: [PATCH 02/21] Fix validation error for starting uvicorn instance and
 add retrieving probonto resource to add_resource endpoint, fix errors for
 adding synonyms, xrefs, properties info for nodes

---
 mira/dkg/api.py       | 10 ++++--
 mira/dkg/client.py    | 41 +++++++++++++++-------
 mira/dkg/construct.py | 82 ++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 117 insertions(+), 16 deletions(-)

diff --git a/mira/dkg/api.py b/mira/dkg/api.py
index 36709a43..84fa0f74 100644
--- a/mira/dkg/api.py
+++ b/mira/dkg/api.py
@@ -2,7 +2,7 @@
 
 import itertools as itt
 import os
-from typing import Any, List, Mapping, Optional, Union, Dict
+from typing import Any, List, Mapping, Optional, Union
 
 import pydantic
 from fastapi import APIRouter, Body, HTTPException, Path, Query, Request
@@ -368,12 +368,16 @@ def add_relations(
     )
     def add_resources(
         request: Request,
-        resource_list: List[str]
+        resource_list: List[str] = Body(
+            ...,
+            description="A of resources to add to the DKG",
+            title="Resource Prefixes",
+            example=["probonto"],
+        )
     ):
         for resource in resource_list:
             # nodes and edges will be a list of dicts
             nodes, edges = process_resource(resource)
-
             # node_info and edge_info are dictionaries that will be
             # unpacked when creating instances of entities and relations
             entities = [Entity(**node_info) for node_info in nodes]
diff --git a/mira/dkg/client.py b/mira/dkg/client.py
index 7b9e2d16..5288fe76 100644
--- a/mira/dkg/client.py
+++ b/mira/dkg/client.py
@@ -3,6 +3,7 @@
 import itertools as itt
 import logging
 import os
+import json
 from collections import Counter, defaultdict
 from difflib import SequenceMatcher
 from functools import lru_cache
@@ -352,20 +353,34 @@ def add_node(self, entity):
         alts = entity.alts
         xrefs = entity.xrefs
         labels = entity.labels
+        properties = entity.properties
 
         create_source_node_query = (
-            f"MERGE (n {{curie: '{curie}', "
-            f"name: '{name}', "
-            f"type: '{type}', "
-            f"obsolete: {obsolete}, "
-            f"description: '{description}', "
-            f"synonyms: {synonyms}, "
-            f"alts: {alts}, "
-            f"xrefs: {xrefs}, "
-            f"labels: {labels} }} )"
+            "MERGE (n {curie: $curie, "
+            "name: $name, "
+            "type: $type, "
+            "obsolete: $obsolete, "
+            "description: $description, "
+            "synonyms: $synonyms, "
+            "alts: $alts, "
+            "xrefs: $xrefs, "
+            "labels: $labels, "
+            "properties: $properties})"
         )
-
-        self.create_tx(create_source_node_query)
+        query_parameters = {
+            "curie": curie,
+            "name": name,
+            "type": type,
+            "obsolete": obsolete,
+            "description": description,
+            "synonyms": json.dumps([synonym.dict() for synonym in synonyms]),
+            "alts": alts,
+            "xrefs": json.dumps([xref.dict() for xref in xrefs]),
+            "labels": labels,
+            "properties": json.dumps(properties)
+        }
+
+        self.create_tx(create_source_node_query, **query_parameters)
 
     def add_relation(self, relation):
         """Add a relation to the DKG
@@ -521,7 +536,9 @@ def get_grounder_terms(self, prefix: str) -> List["gilda.term.Term"]:
 
     def get_lexical(self) -> List[Entity]:
         """Get Lexical information for all entities."""
-        query = f"MATCH (n) WHERE NOT n.obsolete and EXISTS(n.name) RETURN n"
+        query = (f"MATCH (n) WHERE NOT n.obsolete and EXISTS(n.name)"
+                 f" and EXISTS(n.type) "
+                 f"RETURN n")
         return [Entity.from_data(n) for n, in self.query_tx(query) or []]
 
     def get_grounder(self, prefix: Union[str, List[str]]) -> "gilda.grounder.Grounder":
diff --git a/mira/dkg/construct.py b/mira/dkg/construct.py
index c1b0b046..71c995ae 100644
--- a/mira/dkg/construct.py
+++ b/mira/dkg/construct.py
@@ -33,6 +33,7 @@
 import biomappings
 import bioontologies
 import click
+import pydantic.error_wrappers
 import pyobo
 import pystow
 from bioontologies import obograph
@@ -54,6 +55,7 @@
 from mira.dkg.physical_constants import get_physical_constant_terms
 from mira.dkg.constants import EDGE_HEADER, NODE_HEADER
 from mira.dkg.utils import PREFIXES
+from mira.dkg.client import Synonym, Xref
 
 MODULE = pystow.module("mira")
 DEMO_MODULE = MODULE.module("demo", "import")
@@ -199,6 +201,75 @@ class NodeInfo(NamedTuple):
     synonym_types: str
 
 
+def get_probonto_data():
+    probonto_edges = []
+    nodes = []
+    for term in tqdm(
+        get_probonto_terms(), unit="term", desc="Loading probonto"
+    ):
+        curie, name, parameters = (
+            term["curie"],
+            term["name"],
+            term["parameters"],
+        )
+        nodes.append(
+            {
+                "id": curie,
+                "name": name,
+                "type": "class",
+                "description": "",
+                "obsolete": False,
+                "xrefs": [Xref(id=eq.get("curie", ""), type=eq.get("name", ""))
+                          for eq in term.get("equivalent", [])]
+            }
+        )
+
+        for parameter in term.get("parameters", []):
+            parameter_curie, parameter_name = (
+                parameter["curie"],
+                parameter["name"],
+            )
+            synonyms = []
+            synonym_types = []
+            parameter_symbol = parameter.get("symbol")
+            if parameter_symbol:
+                synonyms.append(parameter_symbol)
+                synonym_types.append("referenced_by_latex")
+            parameter_short = parameter.get("short_name")
+            if parameter_short:
+                synonyms.append(parameter_short)
+                synonym_types.append("oboInOwl:hasExactSynonym")
+            synonyms_list = [Synonym(value=value, type=type) for value, type in
+                             zip(synonyms, synonym_types)]
+            nodes.append(
+                {
+                    "id": parameter_curie,
+                    "name": parameter_name,
+                    "type": "class",
+                    "description": "",
+                    "obsolete": False,
+                    "synonyms": synonyms_list
+                }
+            )
+            probonto_edges.append(
+                {
+                    "source_curie": curie,
+                    "target_curie": parameter_curie,
+                    "type": "has_parameter",
+                    "pred": "probonto:c0000062",
+                    "source": "probonto",
+                    "graph": "https://raw.githubusercontent.com/probonto/ontologymaster/probonto4ols.owl",
+                    "version": "2.5",
+                }
+            )
+    return nodes, probonto_edges
+
+
+def process_resource(resource_prefix: str):
+    if resource_prefix == "probonto":
+        return get_probonto_data()
+
+
 @click.command()
 @click.option(
     "--add-xref-edges",
@@ -254,7 +325,14 @@ def construct(
         edge_names = {}
         for edge_prefix in DEFAULT_VOCABS:
             click.secho(f"Caching {manager.get_name(edge_prefix)}", fg="green", bold=True)
-            parse_results = bioontologies.get_obograph_by_prefix(edge_prefix)
+            try:
+                parse_results = bioontologies.get_obograph_by_prefix(edge_prefix)
+            except pydantic.error_wrappers.ValidationError:
+                print(f"VALIDATE NODE GRAPH ERROR {edge_prefix}")
+                continue
+            if not parse_results.graph_document:
+                print(f"EMPTY GRAPH {edge_prefix}")
+                continue
             for edge_graph in parse_results.graph_document.graphs:
                 edge_graph = edge_graph.standardize()
                 for edge_node in edge_graph.nodes:
@@ -931,6 +1009,8 @@ def _get_edge_name(curie_: str, strict: bool = False) -> str:
         writer = csv.writer(file, delimiter="\t", quoting=csv.QUOTE_MINIMAL)
         writer.writerow(EDGE_HEADER)
         for prefix, edge_path in tqdm(sorted(use_case_paths.EDGES_PATHS.items()), desc="cat edges"):
+            if not edge_path.is_file():
+                continue
             with edge_path.open() as edge_file:
                 reader = csv.reader(edge_file, delimiter="\t", quoting=csv.QUOTE_MINIMAL)
                 _header = next(reader)

From 42e04785d9f87527111319ae0337cad21b08bf1a Mon Sep 17 00:00:00 2001
From: nanglo123 <tnanglo1998@gmail.com>
Date: Fri, 12 Jul 2024 18:04:22 -0400
Subject: [PATCH 03/21] Add properties to probonto nodes

---
 mira/dkg/construct.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/mira/dkg/construct.py b/mira/dkg/construct.py
index 71c995ae..f13ad687 100644
--- a/mira/dkg/construct.py
+++ b/mira/dkg/construct.py
@@ -212,6 +212,11 @@ def get_probonto_data():
             term["name"],
             term["parameters"],
         )
+        properties = {
+            "has_parameter": [parameter["name"].replace("\n", " ") for parameter
+                              in
+                              parameters]
+        }
         nodes.append(
             {
                 "id": curie,
@@ -220,10 +225,11 @@ def get_probonto_data():
                 "description": "",
                 "obsolete": False,
                 "xrefs": [Xref(id=eq.get("curie", ""), type=eq.get("name", ""))
-                          for eq in term.get("equivalent", [])]
+                          for eq in term.get("equivalent", [])],
+                "properties": properties
+
             }
         )
-
         for parameter in term.get("parameters", []):
             parameter_curie, parameter_name = (
                 parameter["curie"],

From ff74798690e12fbec6dd9e4387f7633509698dfc Mon Sep 17 00:00:00 2001
From: nanglo123 <tnanglo1998@gmail.com>
Date: Mon, 15 Jul 2024 15:03:15 -0400
Subject: [PATCH 04/21] Process epi use case nodes and edges

---
 mira/dkg/api.py       |   2 +-
 mira/dkg/construct.py | 118 +++++++++++++++++++++++++++++++++++++++---
 2 files changed, 111 insertions(+), 9 deletions(-)

diff --git a/mira/dkg/api.py b/mira/dkg/api.py
index 84fa0f74..d811931c 100644
--- a/mira/dkg/api.py
+++ b/mira/dkg/api.py
@@ -377,7 +377,7 @@ def add_resources(
     ):
         for resource in resource_list:
             # nodes and edges will be a list of dicts
-            nodes, edges = process_resource(resource)
+            nodes, edges = process_resource(resource.lower())
             # node_info and edge_info are dictionaries that will be
             # unpacked when creating instances of entities and relations
             entities = [Entity(**node_info) for node_info in nodes]
diff --git a/mira/dkg/construct.py b/mira/dkg/construct.py
index f13ad687..e200ad68 100644
--- a/mira/dkg/construct.py
+++ b/mira/dkg/construct.py
@@ -56,6 +56,7 @@
 from mira.dkg.constants import EDGE_HEADER, NODE_HEADER
 from mira.dkg.utils import PREFIXES
 from mira.dkg.client import Synonym, Xref
+from .resources.geonames import get_geonames_terms
 
 MODULE = pystow.module("mira")
 DEMO_MODULE = MODULE.module("demo", "import")
@@ -201,9 +202,8 @@ class NodeInfo(NamedTuple):
     synonym_types: str
 
 
-def get_probonto_data():
-    probonto_edges = []
-    nodes = []
+def extract_probonto_nodes_edges():
+    probonto_nodes, probonto_edges = [], []
     for term in tqdm(
         get_probonto_terms(), unit="term", desc="Loading probonto"
     ):
@@ -217,7 +217,7 @@ def get_probonto_data():
                               in
                               parameters]
         }
-        nodes.append(
+        probonto_nodes.append(
             {
                 "id": curie,
                 "name": name,
@@ -247,7 +247,7 @@ def get_probonto_data():
                 synonym_types.append("oboInOwl:hasExactSynonym")
             synonyms_list = [Synonym(value=value, type=type) for value, type in
                              zip(synonyms, synonym_types)]
-            nodes.append(
+            probonto_nodes.append(
                 {
                     "id": parameter_curie,
                     "name": parameter_name,
@@ -268,12 +268,115 @@ def get_probonto_data():
                     "version": "2.5",
                 }
             )
-    return nodes, probonto_edges
+    return probonto_nodes, probonto_edges
+
+
+def extract_geonames_nodes_edges():
+    geonames_nodes, geonames_edges = [], []
+    for term in tqdm(get_geonames_terms(), unit="term", desc="Geonames"):
+        geonames_nodes.append(
+            {
+                "id": term.curie,
+                "name": term.name,
+                "type": "individual",
+                "description": term.definition,
+                "obsolete": False if not term.is_obsolete else True,
+                "synonyms": term.synonyms,
+                "alts": term.alt_ids,
+                "xrefs": term.xrefs,
+                "properties": term.properties,
+            }
+        )
+        for parent in term.get_relationships(part_of):
+            geonames_edges.append(
+                (
+                    term.curie,
+                    parent.curie,
+                    "part_of",
+                    part_of.curie.lower(),
+                    "geonames",
+                    "geonames",
+                    "",
+                )
+            )
+    return geonames_nodes, geonames_edges
+
+
+def extract_ncit_nodes_edges():
+    ncit_nodes, ncit_edges = [], []
+    for term in tqdm(get_ncit_subset(), unit="term", desc="NCIT"):
+        ncit_nodes.append(
+            {
+                "id": term.curie,
+                "name": term.name,
+                "type": "class",
+                "description": term.definition,
+                "obsolete": False if not term.is_obsolete else True,
+                "synonyms": term.synonyms,
+                "alts": term.alt_ids,
+                "xrefs": term.xrefs,
+                "properties": term.properties,
+            }
+        )
+        for parent in term.get_relationships(part_of):
+            ncit_edges.append(
+                (
+                    term.curie,
+                    parent.curie,
+                    "part_of",
+                    part_of.curie.lower(),
+                    "ncit",
+                    "ncit",
+                    "",
+                )
+            )
+
+
+def extract_ncbitaxon_nodes_edges():
+    ncbitaxon_nodes, ncbitaxon_edges = [], []
+    for term in tqdm(get_ncbitaxon(), unit="term", desc="NCBITaxon"):
+        ncbitaxon_nodes.append(
+            {
+                "id": term.curie,
+                "name": term.name,
+                "type": "class",
+                "description": term.definition,
+                "obsolete": False if not term.is_obsolete else True,
+                "synonyms": term.synonyms,
+                "alts": term.alt_ids,
+                "xrefs": term.xrefs,
+                "properties": term.properties,
+            }
+        )
+        for parent in term.get_relationships(part_of):
+            ncbitaxon_edges.append(
+                (
+                    term.curie,
+                    parent.curie,
+                    "part_of",
+                    part_of.curie.lower(),
+                    "ncbitaxon",
+                    "ncbitaxon",
+                    "",
+                )
+            )
 
 
 def process_resource(resource_prefix: str):
     if resource_prefix == "probonto":
-        return get_probonto_data()
+        return extract_probonto_nodes_edges()
+    elif resource_prefix == "geonames":
+        return extract_geonames_nodes_edges()
+    elif resource_prefix == "ncit":
+        return extract_ncit_nodes_edges()
+    elif resource_prefix == "ncbitaxon":
+        return extract_ncbitaxon_nodes_edges()
+    elif resource_prefix == "eiffel":
+        pass
+    elif resource_prefix == "cso":
+        pass
+    elif resource_prefix == "wikidata":
+        pass
 
 
 @click.command()
@@ -534,7 +637,6 @@ def construct(
             writer.writerow(EDGE_HEADER)
             writer.writerows(eiffel_edges)
     if use_case == "epi":
-        from .resources.geonames import get_geonames_terms
         geonames_edges = []
         for term in tqdm(get_geonames_terms(), unit="term", desc="Geonames"):
             node_sources[term.curie].add("geonames")

From 831c7d1d79321c2ff00dd7b88f70a8a912a34a0b Mon Sep 17 00:00:00 2001
From: nanglo123 <tnanglo1998@gmail.com>
Date: Mon, 15 Jul 2024 16:13:48 -0400
Subject: [PATCH 05/21] Add climate nodes and edges, add kwarg to from_obo_path
 to extract cso data

---
 mira/dkg/construct.py     | 91 +++++++++++++++++++++++++++++++++------
 mira/dkg/resources/cso.py |  3 +-
 2 files changed, 79 insertions(+), 15 deletions(-)

diff --git a/mira/dkg/construct.py b/mira/dkg/construct.py
index e200ad68..619eaa18 100644
--- a/mira/dkg/construct.py
+++ b/mira/dkg/construct.py
@@ -56,7 +56,10 @@
 from mira.dkg.constants import EDGE_HEADER, NODE_HEADER
 from mira.dkg.utils import PREFIXES
 from mira.dkg.client import Synonym, Xref
-from .resources.geonames import get_geonames_terms
+from mira.dkg.resources.cso import get_cso_obo
+from mira.dkg.resources.geonames import get_geonames_terms
+from mira.dkg.resources.extract_eiffel_ontology import get_eiffel_ontology_terms
+from mira.dkg.resources.uat import get_uat
 
 MODULE = pystow.module("mira")
 DEMO_MODULE = MODULE.module("demo", "import")
@@ -204,9 +207,7 @@ class NodeInfo(NamedTuple):
 
 def extract_probonto_nodes_edges():
     probonto_nodes, probonto_edges = [], []
-    for term in tqdm(
-        get_probonto_terms(), unit="term", desc="Loading probonto"
-    ):
+    for term in tqdm(get_probonto_terms(), unit="term"):
         curie, name, parameters = (
             term["curie"],
             term["name"],
@@ -273,7 +274,7 @@ def extract_probonto_nodes_edges():
 
 def extract_geonames_nodes_edges():
     geonames_nodes, geonames_edges = [], []
-    for term in tqdm(get_geonames_terms(), unit="term", desc="Geonames"):
+    for term in tqdm(get_geonames_terms(), unit="term"):
         geonames_nodes.append(
             {
                 "id": term.curie,
@@ -304,7 +305,7 @@ def extract_geonames_nodes_edges():
 
 def extract_ncit_nodes_edges():
     ncit_nodes, ncit_edges = [], []
-    for term in tqdm(get_ncit_subset(), unit="term", desc="NCIT"):
+    for term in tqdm(get_ncit_subset(), unit="term"):
         ncit_nodes.append(
             {
                 "id": term.curie,
@@ -334,7 +335,7 @@ def extract_ncit_nodes_edges():
 
 def extract_ncbitaxon_nodes_edges():
     ncbitaxon_nodes, ncbitaxon_edges = [], []
-    for term in tqdm(get_ncbitaxon(), unit="term", desc="NCBITaxon"):
+    for term in tqdm(get_ncbitaxon(), unit="term"):
         ncbitaxon_nodes.append(
             {
                 "id": term.curie,
@@ -362,6 +363,72 @@ def extract_ncbitaxon_nodes_edges():
             )
 
 
+def extract_eiffel_nodes_edges():
+    eiffel_nodes, eiffel_edges = [], []
+    for term in tqdm(get_eiffel_ontology_terms(), unit="term"):
+        eiffel_nodes.append(
+            {
+                "id": term.curie,
+                "name": term.name,
+                "type": "class",
+                "description": term.definition,
+                "obsolete": False if not term.is_obsolete else True,
+                "synonyms": term.synonyms,
+                "alts": term.alt_ids,
+                "xrefs": term.xrefs,
+                "properties": term.properties,
+            }
+        )
+        for typedef, object_references in term.relationships.items():
+            for object_reference in object_references:
+                eiffel_edges.append(
+                    (
+                        term.curie,
+                        object_reference.curie,
+                        typedef.name.replace(" ", "").lower(),
+                        typedef.curie,
+                        "eiffel",
+                        "eiffel",
+                        "",
+                    )
+                )
+    return eiffel_nodes, eiffel_edges
+
+
+def extract_cso_nodes_edges():
+    cso_nodes, cso_edges = [], []
+    for term in get_cso_obo().iter_terms():
+        cso_nodes.append(
+            {
+                "id": term.curie,
+                "name": term.name,
+                "type": "class",
+                "description": term.definition,
+                "obsolete": False if not term.is_obsolete else True,
+                "synonyms": term.synonyms,
+                "alts": term.alt_ids,
+                "xrefs": term.xrefs,
+                "properties": term.properties,
+            }
+        )
+        for parent in term.get_relationship(part_of):
+            cso_edges.append(
+                (
+                    term.curie,
+                    parent.curie,
+                    "part_of",
+                    part_of.curie.lower(),
+                    "cso",
+                    "cso",
+                    "",
+                )
+            )
+
+
+def extract_wikidata_nodes_edges():
+    pass
+
+
 def process_resource(resource_prefix: str):
     if resource_prefix == "probonto":
         return extract_probonto_nodes_edges()
@@ -372,11 +439,11 @@ def process_resource(resource_prefix: str):
     elif resource_prefix == "ncbitaxon":
         return extract_ncbitaxon_nodes_edges()
     elif resource_prefix == "eiffel":
-        pass
+        return extract_eiffel_nodes_edges()
     elif resource_prefix == "cso":
-        pass
+        return extract_cso_nodes_edges()
     elif resource_prefix == "wikidata":
-        pass
+        return extract_wikidata_nodes_edges()
 
 
 @click.command()
@@ -406,7 +473,6 @@ def main(
         use_case = config.use_case
     else:
         config = None
-
     construct(
         use_case=use_case,
         config=config,
@@ -606,13 +672,11 @@ def construct(
         writer.writerows(probonto_edges)
 
     if use_case == "climate":
-        from .resources.cso import get_cso_obo
 
         for term in get_cso_obo().iter_terms():
             node_sources[term.curie].add("cso")
             nodes[term.curie] = get_node_info(term)
 
-        from .resources.extract_eiffel_ontology import get_eiffel_ontology_terms
 
         eiffel_edges = []
         for term in tqdm(get_eiffel_ontology_terms(), unit="term", desc="Eiffel"):
@@ -671,7 +735,6 @@ def construct(
             # TODO add edges to source file later, if important
 
     if use_case == "space":
-        from .resources.uat import get_uat
 
         uat_ontology = get_uat()
         uat_edges = []
diff --git a/mira/dkg/resources/cso.py b/mira/dkg/resources/cso.py
index 88e8d1da..2cde3264 100644
--- a/mira/dkg/resources/cso.py
+++ b/mira/dkg/resources/cso.py
@@ -23,7 +23,8 @@ def get_cso_obo() -> Obo:
     )
     download(url=URL, path=PATH)
     # use https://github.com/pyobo/pyobo/pull/159
-    return from_obo_path(PATH, prefix="cso", default_prefix="cso", strict=False)
+    kwargs = {"default_prefix": "cso"}
+    return from_obo_path(PATH, prefix="cso", strict=False, **kwargs)
 
 
 if __name__ == "__main__":

From 20abbb963ca189ba4eb3006c2b1afb7f05337d86 Mon Sep 17 00:00:00 2001
From: nanglo123 <tnanglo1998@gmail.com>
Date: Mon, 15 Jul 2024 17:05:27 -0400
Subject: [PATCH 06/21] Add wikidata nodes and edges, update add_resources
 endpoint example, use better variable names, revert kwarg addition to
 'from_obo_path' call

---
 mira/dkg/api.py           | 11 +++++----
 mira/dkg/client.py        |  1 -
 mira/dkg/construct.py     | 50 +++++++++++++++++++++++++++++++++++++--
 mira/dkg/resources/cso.py |  5 ++--
 4 files changed, 56 insertions(+), 11 deletions(-)

diff --git a/mira/dkg/api.py b/mira/dkg/api.py
index d811931c..6fa5638d 100644
--- a/mira/dkg/api.py
+++ b/mira/dkg/api.py
@@ -13,7 +13,7 @@
 
 from mira.dkg.client import AskemEntity, Entity, Relation
 from mira.dkg.utils import DKG_REFINER_RELS
-from mira.dkg.construct import process_resource
+from mira.dkg.construct import add_resource_to_dkg
 
 __all__ = [
     "api_blueprint",
@@ -368,16 +368,17 @@ def add_relations(
     )
     def add_resources(
         request: Request,
-        resource_list: List[str] = Body(
+        resource_prefix_list: List[str] = Body(
             ...,
             description="A of resources to add to the DKG",
             title="Resource Prefixes",
-            example=["probonto"],
+            example=["probonto", "wikidata", "eiffel", "geonames", "ncit",
+                     "nbcbitaxon"],
         )
     ):
-        for resource in resource_list:
+        for resource_prefix in resource_prefix_list:
             # nodes and edges will be a list of dicts
-            nodes, edges = process_resource(resource.lower())
+            nodes, edges = add_resource_to_dkg(resource_prefix.lower())
             # node_info and edge_info are dictionaries that will be
             # unpacked when creating instances of entities and relations
             entities = [Entity(**node_info) for node_info in nodes]
diff --git a/mira/dkg/client.py b/mira/dkg/client.py
index 5288fe76..f0929569 100644
--- a/mira/dkg/client.py
+++ b/mira/dkg/client.py
@@ -410,7 +410,6 @@ def add_relation(self, relation):
         
         self.create_tx(create_relation_query)
 
-
     def create_single_property_node_index(
         self,
         index_name: str,
diff --git a/mira/dkg/construct.py b/mira/dkg/construct.py
index 619eaa18..10212712 100644
--- a/mira/dkg/construct.py
+++ b/mira/dkg/construct.py
@@ -426,10 +426,55 @@ def extract_cso_nodes_edges():
 
 
 def extract_wikidata_nodes_edges():
-    pass
+    wikidata_nodes, wikidata_edges = [], []
+    for wikidata_id, label, description, synonyms, xrefs in tqdm(
+            get_unit_terms(), unit="unit"):
+        synonyms_list = [Synonym(value=value, type="") for value in synonyms]
+        xrefs_list = [Xref(id=_id, type="oboinowl:hasDbXref") for _id in xrefs]
+        wikidata_nodes.append(
+            {
+                "id": f"wikidata:{wikidata_id}",
+                "name": label,
+                "type": "class",
+                "description": description,
+                "synonyms": synonyms_list,
+                "xrefs": xrefs_list,
+                "obsolete": False
+            }
+        )
+
+    for (wikidata_id, label, description, synonyms, xrefs, value, formula,
+         symbols) in tqdm(get_physical_constant_terms()):
+        synonym_types, synonym_values = [], []
+        for syn in synonyms:
+            synonym_values.append(syn)
+            synonym_types.append("oboInOwl:hasExactSynonym")
+        for symbol in symbols:
+            synonym_values.append(symbol)
+            synonym_types.append("debio:0000031")
+
+        synonyms_list = [Synonym(value=value, type=type) for value, type
+                         in zip(synonym_values, synonym_types)]
+        xrefs_list = [Xref(id=_id, type="oboinowl:hasDbXref") for _id in xrefs]
+        if value:
+            properties = {"debio:0000042": [str(value)]}
+        else:
+            properties = {}
+        wikidata_nodes.append(
+            {
+                "id": f"wikidata:{wikidata_id}",
+                "name": label,
+                "obsolete": False,
+                "type": "class",
+                "description": description,
+                "synonyms": synonyms_list,
+                "xrefs": xrefs_list,
+                "properties": properties
+            }
+        )
 
 
-def process_resource(resource_prefix: str):
+def add_resource_to_dkg(resource_prefix: str):
     if resource_prefix == "probonto":
         return extract_probonto_nodes_edges()
     elif resource_prefix == "geonames":
@@ -443,6 +488,7 @@ def process_resource(resource_prefix: str):
     elif resource_prefix == "cso":
         return extract_cso_nodes_edges()
     elif resource_prefix == "wikidata":
+        # combine retrieval of wikidata constants and units
         return extract_wikidata_nodes_edges()
 
 
diff --git a/mira/dkg/resources/cso.py b/mira/dkg/resources/cso.py
index 2cde3264..aff995b6 100644
--- a/mira/dkg/resources/cso.py
+++ b/mira/dkg/resources/cso.py
@@ -23,10 +23,9 @@ def get_cso_obo() -> Obo:
     )
     download(url=URL, path=PATH)
     # use https://github.com/pyobo/pyobo/pull/159
-    kwargs = {"default_prefix": "cso"}
-    return from_obo_path(PATH, prefix="cso", strict=False, **kwargs)
+    return from_obo_path(PATH, prefix="cso", default_prefix="cso", strict=False)
 
 
 if __name__ == "__main__":
     for term in get_cso_obo():
-        print(term)
+        print(term)
\ No newline at end of file

From 18ebfbfe8015a2ed45921a4286fcddbf323f9958 Mon Sep 17 00:00:00 2001
From: nanglo123 <tnanglo1998@gmail.com>
Date: Tue, 16 Jul 2024 13:40:43 -0400
Subject: [PATCH 07/21] Add return values for some extraction methods, refactor
 how synonyms and xrefs are processed for terms

---
 mira/dkg/client.py        |   4 +-
 mira/dkg/construct.py     | 148 ++++++++++++++++++++++----------------
 mira/dkg/resources/cso.py |   2 +-
 3 files changed, 89 insertions(+), 65 deletions(-)

diff --git a/mira/dkg/client.py b/mira/dkg/client.py
index f0929569..30e5dbf3 100644
--- a/mira/dkg/client.py
+++ b/mira/dkg/client.py
@@ -535,9 +535,7 @@ def get_grounder_terms(self, prefix: str) -> List["gilda.term.Term"]:
 
     def get_lexical(self) -> List[Entity]:
         """Get Lexical information for all entities."""
-        query = (f"MATCH (n) WHERE NOT n.obsolete and EXISTS(n.name)"
-                 f" and EXISTS(n.type) "
-                 f"RETURN n")
+        query = f"MATCH (n) WHERE NOT n.obsolete and EXISTS(n.name) RETURN n"
         return [Entity.from_data(n) for n, in self.query_tx(query) or []]
 
     def get_grounder(self, prefix: Union[str, List[str]]) -> "gilda.grounder.Grounder":
diff --git a/mira/dkg/construct.py b/mira/dkg/construct.py
index 10212712..e660d786 100644
--- a/mira/dkg/construct.py
+++ b/mira/dkg/construct.py
@@ -280,25 +280,29 @@ def extract_geonames_nodes_edges():
                 "id": term.curie,
                 "name": term.name,
                 "type": "individual",
-                "description": term.definition,
+                "description": term.definition if term.definition else "",
                 "obsolete": False if not term.is_obsolete else True,
-                "synonyms": term.synonyms,
+                "synonyms": [Synonym(value=syn._fp(),
+                                     type=f"{syn.type.reference.prefix}:"
+                                          f"{syn.type.reference.prefix}")
+                             for syn in term.synonyms],
                 "alts": term.alt_ids,
-                "xrefs": term.xrefs,
+                "xrefs": [Xref(value=value, type=type) for value, type in
+                          zip(term.xrefs, term.xref_types)],
                 "properties": term.properties,
             }
         )
         for parent in term.get_relationships(part_of):
             geonames_edges.append(
-                (
-                    term.curie,
-                    parent.curie,
-                    "part_of",
-                    part_of.curie.lower(),
-                    "geonames",
-                    "geonames",
-                    "",
-                )
+                {
+                    "source_curie": term.curie,
+                    "target_curie": parent.curie,
+                    "type": "part_of",
+                    "pred": part_of.curie.lower(),
+                    "source": "geonames",
+                    "graph": "geonames",
+                    "version": "",
+                }
             )
     return geonames_nodes, geonames_edges
 
@@ -311,26 +315,31 @@ def extract_ncit_nodes_edges():
                 "id": term.curie,
                 "name": term.name,
                 "type": "class",
-                "description": term.definition,
+                "description": term.definition if term.definition else "",
                 "obsolete": False if not term.is_obsolete else True,
-                "synonyms": term.synonyms,
+                "synonyms": [Synonym(value=syn._fp(),
+                                     type=f"{syn.type.reference.prefix}:"
+                                          f"{syn.type.reference.prefix}")
+                             for syn in term.synonyms],
                 "alts": term.alt_ids,
-                "xrefs": term.xrefs,
+                "xrefs": [Xref(value=value, type=type) for value, type in
+                          zip(term.xrefs, term.xref_types)],
                 "properties": term.properties,
             }
         )
         for parent in term.get_relationships(part_of):
             ncit_edges.append(
-                (
-                    term.curie,
-                    parent.curie,
-                    "part_of",
-                    part_of.curie.lower(),
-                    "ncit",
-                    "ncit",
-                    "",
-                )
+                {
+                    "source_curie": term.curie,
+                    "target_curie": parent.curie,
+                    "type": "part_of",
+                    "pred": part_of.curie.lower(),
+                    "source": "ncit",
+                    "graph": "ncit",
+                    "version": "",
+                }
             )
+    return ncit_nodes, ncit_edges
 
 
 def extract_ncbitaxon_nodes_edges():
@@ -341,26 +350,31 @@ def extract_ncbitaxon_nodes_edges():
                 "id": term.curie,
                 "name": term.name,
                 "type": "class",
-                "description": term.definition,
+                "description": term.definition if term.definition else "",
                 "obsolete": False if not term.is_obsolete else True,
-                "synonyms": term.synonyms,
+                "synonyms": [Synonym(value=syn._fp(),
+                                     type=f"{syn.type.reference.prefix}:"
+                                          f"{syn.type.reference.prefix}")
+                             for syn in term.synonyms],
                 "alts": term.alt_ids,
-                "xrefs": term.xrefs,
+                "xrefs": [Xref(value=value, type=type) for value, type in
+                          zip(term.xrefs, term.xref_types)],
                 "properties": term.properties,
             }
         )
         for parent in term.get_relationships(part_of):
             ncbitaxon_edges.append(
-                (
-                    term.curie,
-                    parent.curie,
-                    "part_of",
-                    part_of.curie.lower(),
-                    "ncbitaxon",
-                    "ncbitaxon",
-                    "",
-                )
+                {
+                    "source_curie": term.curie,
+                    "target_curie": parent.curie,
+                    "type": "part_of",
+                    "pred": part_of.curie.lower(),
+                    "source": "ncbitaxon",
+                    "graph": "ncbitaxon",
+                    "version": "",
+                }
             )
+    return ncbitaxon_nodes, ncbitaxon_edges
 
 
 def extract_eiffel_nodes_edges():
@@ -371,26 +385,30 @@ def extract_eiffel_nodes_edges():
                 "id": term.curie,
                 "name": term.name,
                 "type": "class",
-                "description": term.definition,
+                "description": term.definition if term.definition else "",
                 "obsolete": False if not term.is_obsolete else True,
-                "synonyms": term.synonyms,
+                "synonyms": [Synonym(value=syn._fp(),
+                                     type=f"{syn.type.reference.prefix}:"
+                                          f"{syn.type.reference.prefix}")
+                             for syn in term.synonyms],
                 "alts": term.alt_ids,
-                "xrefs": term.xrefs,
+                "xrefs": [Xref(value=value, type=type) for value, type in
+                          zip(term.xrefs, term.xref_types)],
                 "properties": term.properties,
             }
         )
         for typedef, object_references in term.relationships.items():
             for object_reference in object_references:
                 eiffel_edges.append(
-                    (
-                        term.curie,
-                        object_reference.curie,
-                        typedef.name.replace(" ", "").lower(),
-                        typedef.curie,
-                        "eiffel",
-                        "eiffel",
-                        "",
-                    )
+                    {
+                        "source_curie": term.curie,
+                        "target_curie": object_reference.curie,
+                        "type": typedef.name.replace(" ", "").lower(),
+                        "pred": typedef.curie,
+                        "source": "eiffel",
+                        "graph": "eiffel",
+                        "version": "",
+                    }
                 )
     return eiffel_nodes, eiffel_edges
 
@@ -403,26 +421,31 @@ def extract_cso_nodes_edges():
                 "id": term.curie,
                 "name": term.name,
                 "type": "class",
-                "description": term.definition,
+                "description": term.definition if term.definition else "",
                 "obsolete": False if not term.is_obsolete else True,
-                "synonyms": term.synonyms,
+                "synonyms": [Synonym(value=syn._fp(),
+                                     type=f"{syn.type.reference.prefix}:"
+                                          f"{syn.type.reference.prefix}")
+                             for syn in term.synonyms],
                 "alts": term.alt_ids,
-                "xrefs": term.xrefs,
+                "xrefs": [Xref(value=value, type=type) for value, type in
+                          zip(term.xrefs, term.xref_types)],
                 "properties": term.properties,
             }
         )
         for parent in term.get_relationship(part_of):
             cso_edges.append(
-                (
-                    term.curie,
-                    parent.curie,
-                    "part_of",
-                    part_of.curie.lower(),
-                    "cso",
-                    "cso",
-                    "",
-                )
+                {
+                    "source_curie": term.curie,
+                    "target_curie": parent.curie,
+                    "type": "part_of",
+                    "pred": part_of.curie.lower(),
+                    "source": "cso",
+                    "graph": "cso",
+                    "version": "",
+                }
             )
+    return cso_nodes, cso_edges
 
 
 def extract_wikidata_nodes_edges():
@@ -472,6 +495,7 @@ def extract_wikidata_nodes_edges():
                 "properties": properties
             }
         )
+    return wikidata_nodes, wikidata_edges
 
 
 def add_resource_to_dkg(resource_prefix: str):
@@ -490,6 +514,9 @@ def add_resource_to_dkg(resource_prefix: str):
     elif resource_prefix == "wikidata":
         # combine retrieval of wikidata constants and units
         return extract_wikidata_nodes_edges()
+    else:
+        # handle resource names that we don't process
+        return [], []
 
 
 @click.command()
@@ -723,7 +750,6 @@ def construct(
             node_sources[term.curie].add("cso")
             nodes[term.curie] = get_node_info(term)
 
-
         eiffel_edges = []
         for term in tqdm(get_eiffel_ontology_terms(), unit="term", desc="Eiffel"):
             node_sources[term.curie].add("eiffel")
diff --git a/mira/dkg/resources/cso.py b/mira/dkg/resources/cso.py
index aff995b6..88e8d1da 100644
--- a/mira/dkg/resources/cso.py
+++ b/mira/dkg/resources/cso.py
@@ -28,4 +28,4 @@ def get_cso_obo() -> Obo:
 
 if __name__ == "__main__":
     for term in get_cso_obo():
-        print(term)
\ No newline at end of file
+        print(term)

From a7aef3c328f320d7bdbf92374c8eefb02f14c3fe Mon Sep 17 00:00:00 2001
From: nanglo123 <tnanglo1998@gmail.com>
Date: Tue, 16 Jul 2024 13:43:20 -0400
Subject: [PATCH 08/21] Revert error handling in construct.py

---
 mira/dkg/construct.py | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/mira/dkg/construct.py b/mira/dkg/construct.py
index e660d786..77a72408 100644
--- a/mira/dkg/construct.py
+++ b/mira/dkg/construct.py
@@ -573,14 +573,7 @@ def construct(
         edge_names = {}
         for edge_prefix in DEFAULT_VOCABS:
             click.secho(f"Caching {manager.get_name(edge_prefix)}", fg="green", bold=True)
-            try:
-                parse_results = bioontologies.get_obograph_by_prefix(edge_prefix)
-            except pydantic.error_wrappers.ValidationError:
-                print(f"VALIDATE NODE GRAPH ERROR {edge_prefix}")
-                continue
-            if not parse_results.graph_document:
-                print(f"EMPTY GRAPH {edge_prefix}")
-                continue
+            parse_results = bioontologies.get_obograph_by_prefix(edge_prefix)
             for edge_graph in parse_results.graph_document.graphs:
                 edge_graph = edge_graph.standardize()
                 for edge_node in edge_graph.nodes:
@@ -1252,8 +1245,6 @@ def _get_edge_name(curie_: str, strict: bool = False) -> str:
         writer = csv.writer(file, delimiter="\t", quoting=csv.QUOTE_MINIMAL)
         writer.writerow(EDGE_HEADER)
         for prefix, edge_path in tqdm(sorted(use_case_paths.EDGES_PATHS.items()), desc="cat edges"):
-            if not edge_path.is_file():
-                continue
             with edge_path.open() as edge_file:
                 reader = csv.reader(edge_file, delimiter="\t", quoting=csv.QUOTE_MINIMAL)
                 _header = next(reader)

From a709ae707754df5d391b7538e9591b7404be2f03 Mon Sep 17 00:00:00 2001
From: nanglo123 <tnanglo1998@gmail.com>
Date: Tue, 16 Jul 2024 15:49:19 -0400
Subject: [PATCH 09/21] Add correct node properties for a node to be added to
 the dkg

---
 mira/dkg/client.py | 64 ++++++++++++++++++++++++++++++++++------------
 1 file changed, 47 insertions(+), 17 deletions(-)

diff --git a/mira/dkg/client.py b/mira/dkg/client.py
index 30e5dbf3..cdb09001 100644
--- a/mira/dkg/client.py
+++ b/mira/dkg/client.py
@@ -171,6 +171,7 @@ def from_data(cls, data):
         -------
         A MIRA entity
         """
+
         if isinstance(data, neo4j.graph.Node):
             data = dict(data.items())
         properties = defaultdict(list)
@@ -186,6 +187,8 @@ def from_data(cls, data):
         ):
             synonyms.append(Synonym(value=value, type=type))
         xrefs = []
+
+
         for curie, type in zip(
             data.pop("xrefs", []),
             data.pop("xref_types", []),
@@ -344,40 +347,67 @@ def add_node(self, entity):
         entity:
             The node object that will be added to the DKG
         """
-        curie = entity.id
+        xrefs, xref_types = [], []
+        synonyms, synonym_types = [], []
+        property_predicates, property_values = [], []
+        for xref in entity.xrefs:
+            xrefs.append(xref.id)
+            xref_types.append(xref.type)
+        for synonym in entity.synonyms:
+            synonyms.append(synonym.value)
+            synonym_types.append(synonym.value)
+        for property_predicate, property_value_list in entity.properties.items():
+            property_predicates.append(property_predicate)
+            property_values.extend(property_value_list)
+
+        _id = entity.id
         name = entity.name
         type = entity.type
         obsolete = entity.obsolete
         description = entity.description
-        synonyms = entity.synonyms
         alts = entity.alts
-        xrefs = entity.xrefs
         labels = entity.labels
-        properties = entity.properties
 
         create_source_node_query = (
-            "MERGE (n {curie: $curie, "
-            "name: $name, "
+            "MERGE (n {id: $id, "
             "type: $type, "
-            "obsolete: $obsolete, "
-            "description: $description, "
-            "synonyms: $synonyms, "
-            "alts: $alts, "
-            "xrefs: $xrefs, "
-            "labels: $labels, "
-            "properties: $properties})"
+            "obsolete: $obsolete"
         )
+
+        if name:
+            create_source_node_query += ", name: $name"
+        if description:
+            create_source_node_query += ", description: $description"
+        if alts:
+            create_source_node_query += ", alts: $alts"
+        if labels:
+            create_source_node_query += ", labels: $labels"
+        if xrefs:
+            create_source_node_query += ", xrefs: $xrefs"
+            create_source_node_query += ", xref_types: $xref_types"
+        if synonyms:
+            create_source_node_query += ", synonyms: $synonyms"
+            create_source_node_query += ", synonym_types: $synonym_types"
+        if property_predicates:
+            create_source_node_query += ", property_predicates: $property_predicates"
+            create_source_node_query += ", property_values: $property_values"
+
+        create_source_node_query += "})"
+
         query_parameters = {
-            "curie": curie,
+            "id": _id,
             "name": name,
             "type": type,
             "obsolete": obsolete,
             "description": description,
-            "synonyms": json.dumps([synonym.dict() for synonym in synonyms]),
+            "synonyms": synonyms,
+            "synonym_types": synonym_types,
             "alts": alts,
-            "xrefs": json.dumps([xref.dict() for xref in xrefs]),
+            "xrefs": xrefs,
+            "xref_types": xref_types,
             "labels": labels,
-            "properties": json.dumps(properties)
+            "property_predicates": property_predicates,
+            "property_values": property_values
         }
 
         self.create_tx(create_source_node_query, **query_parameters)

From 87782d8db40152f7dccfe4ac99c0f5180e219acb Mon Sep 17 00:00:00 2001
From: nanglo123 <tnanglo1998@gmail.com>
Date: Tue, 16 Jul 2024 16:20:55 -0400
Subject: [PATCH 10/21] Aggregate retrieval of pyobo resources into one method

---
 mira/dkg/construct.py | 247 +++++++++++-------------------------------
 1 file changed, 65 insertions(+), 182 deletions(-)

diff --git a/mira/dkg/construct.py b/mira/dkg/construct.py
index 77a72408..aee21845 100644
--- a/mira/dkg/construct.py
+++ b/mira/dkg/construct.py
@@ -205,6 +205,60 @@ class NodeInfo(NamedTuple):
     synonym_types: str
 
 
+def extract_nodes_edges_from_pyobo_terms(term_getter, resource_prefix):
+    nodes, edges = [], []
+    if resource_prefix in {"geonames"}:
+        entity_type = "individual"
+    elif resource_prefix in {"ncit", "ncbitaxon", "eiffel", "cso"}:
+        entity_type = "class"
+    for term in tqdm(term_getter(), unit="term"):
+        nodes.append(
+            {
+                "id": term.curie,
+                "name": term.name,
+                "type": entity_type,
+                "description": term.definition if term.definition else "",
+                "obsolete": False if not term.is_obsolete else True,
+                "synonyms": [Synonym(value=syn._fp(),
+                                     type=f"{syn.type.reference.prefix}:"
+                                          f"{syn.type.reference.prefix}")
+                             for syn in term.synonyms],
+                "alts": term.alt_ids,
+                "xrefs": [Xref(value=value, type=type) for value, type in
+                          zip(term.xrefs, term.xref_types)],
+                "properties": dict(term.properties),
+            }
+        )
+        if resource_prefix != "eiffel":
+            for parent in term.get_relationships(part_of):
+                edges.append(
+                    {
+                        "source_curie": term.curie,
+                        "target_curie": parent.curie,
+                        "type": "part_of",
+                        "pred": part_of.curie.lower(),
+                        "source": resource_prefix,
+                        "graph": resource_prefix,
+                        "version": "",
+                    }
+                )
+        else:
+            for typedef, object_references in term.relationships.items():
+                for object_reference in object_references:
+                    edges.append(
+                        {
+                            "source_curie": term.curie,
+                            "target_curie": object_reference.curie,
+                            "type": typedef.name.replace(" ", "").lower(),
+                            "pred": typedef.curie,
+                            "source": "eiffel",
+                            "graph": "eiffel",
+                            "version": "",
+                        }
+                    )
+    return nodes, edges
+
+
 def extract_probonto_nodes_edges():
     probonto_nodes, probonto_edges = [], []
     for term in tqdm(get_probonto_terms(), unit="term"):
@@ -272,186 +326,10 @@ def extract_probonto_nodes_edges():
     return probonto_nodes, probonto_edges
 
 
-def extract_geonames_nodes_edges():
-    geonames_nodes, geonames_edges = [], []
-    for term in tqdm(get_geonames_terms(), unit="term"):
-        geonames_nodes.append(
-            {
-                "id": term.curie,
-                "name": term.name,
-                "type": "individual",
-                "description": term.definition if term.definition else "",
-                "obsolete": False if not term.is_obsolete else True,
-                "synonyms": [Synonym(value=syn._fp(),
-                                     type=f"{syn.type.reference.prefix}:"
-                                          f"{syn.type.reference.prefix}")
-                             for syn in term.synonyms],
-                "alts": term.alt_ids,
-                "xrefs": [Xref(value=value, type=type) for value, type in
-                          zip(term.xrefs, term.xref_types)],
-                "properties": term.properties,
-            }
-        )
-        for parent in term.get_relationships(part_of):
-            geonames_edges.append(
-                {
-                    "source_curie": term.curie,
-                    "target_curie": parent.curie,
-                    "type": "part_of",
-                    "pred": part_of.curie.lower(),
-                    "source": "geonames",
-                    "graph": "geonames",
-                    "version": "",
-                }
-            )
-    return geonames_nodes, geonames_edges
-
-
-def extract_ncit_nodes_edges():
-    ncit_nodes, ncit_edges = [], []
-    for term in tqdm(get_ncit_subset(), unit="term"):
-        ncit_nodes.append(
-            {
-                "id": term.curie,
-                "name": term.name,
-                "type": "class",
-                "description": term.definition if term.definition else "",
-                "obsolete": False if not term.is_obsolete else True,
-                "synonyms": [Synonym(value=syn._fp(),
-                                     type=f"{syn.type.reference.prefix}:"
-                                          f"{syn.type.reference.prefix}")
-                             for syn in term.synonyms],
-                "alts": term.alt_ids,
-                "xrefs": [Xref(value=value, type=type) for value, type in
-                          zip(term.xrefs, term.xref_types)],
-                "properties": term.properties,
-            }
-        )
-        for parent in term.get_relationships(part_of):
-            ncit_edges.append(
-                {
-                    "source_curie": term.curie,
-                    "target_curie": parent.curie,
-                    "type": "part_of",
-                    "pred": part_of.curie.lower(),
-                    "source": "ncit",
-                    "graph": "ncit",
-                    "version": "",
-                }
-            )
-    return ncit_nodes, ncit_edges
-
-
-def extract_ncbitaxon_nodes_edges():
-    ncbitaxon_nodes, ncbitaxon_edges = [], []
-    for term in tqdm(get_ncbitaxon(), unit="term"):
-        ncbitaxon_nodes.append(
-            {
-                "id": term.curie,
-                "name": term.name,
-                "type": "class",
-                "description": term.definition if term.definition else "",
-                "obsolete": False if not term.is_obsolete else True,
-                "synonyms": [Synonym(value=syn._fp(),
-                                     type=f"{syn.type.reference.prefix}:"
-                                          f"{syn.type.reference.prefix}")
-                             for syn in term.synonyms],
-                "alts": term.alt_ids,
-                "xrefs": [Xref(value=value, type=type) for value, type in
-                          zip(term.xrefs, term.xref_types)],
-                "properties": term.properties,
-            }
-        )
-        for parent in term.get_relationships(part_of):
-            ncbitaxon_edges.append(
-                {
-                    "source_curie": term.curie,
-                    "target_curie": parent.curie,
-                    "type": "part_of",
-                    "pred": part_of.curie.lower(),
-                    "source": "ncbitaxon",
-                    "graph": "ncbitaxon",
-                    "version": "",
-                }
-            )
-    return ncbitaxon_nodes, ncbitaxon_edges
-
-
-def extract_eiffel_nodes_edges():
-    eiffel_nodes, eiffel_edges = [], []
-    for term in tqdm(get_eiffel_ontology_terms(), unit="term"):
-        eiffel_nodes.append(
-            {
-                "id": term.curie,
-                "name": term.name,
-                "type": "class",
-                "description": term.definition if term.definition else "",
-                "obsolete": False if not term.is_obsolete else True,
-                "synonyms": [Synonym(value=syn._fp(),
-                                     type=f"{syn.type.reference.prefix}:"
-                                          f"{syn.type.reference.prefix}")
-                             for syn in term.synonyms],
-                "alts": term.alt_ids,
-                "xrefs": [Xref(value=value, type=type) for value, type in
-                          zip(term.xrefs, term.xref_types)],
-                "properties": term.properties,
-            }
-        )
-        for typedef, object_references in term.relationships.items():
-            for object_reference in object_references:
-                eiffel_edges.append(
-                    {
-                        "source_curie": term.curie,
-                        "target_curie": object_reference.curie,
-                        "type": typedef.name.replace(" ", "").lower(),
-                        "pred": typedef.curie,
-                        "source": "eiffel",
-                        "graph": "eiffel",
-                        "version": "",
-                    }
-                )
-    return eiffel_nodes, eiffel_edges
-
-
-def extract_cso_nodes_edges():
-    cso_nodes, cso_edges = [], []
-    for term in get_cso_obo().iter_terms():
-        cso_nodes.append(
-            {
-                "id": term.curie,
-                "name": term.name,
-                "type": "class",
-                "description": term.definition if term.definition else "",
-                "obsolete": False if not term.is_obsolete else True,
-                "synonyms": [Synonym(value=syn._fp(),
-                                     type=f"{syn.type.reference.prefix}:"
-                                          f"{syn.type.reference.prefix}")
-                             for syn in term.synonyms],
-                "alts": term.alt_ids,
-                "xrefs": [Xref(value=value, type=type) for value, type in
-                          zip(term.xrefs, term.xref_types)],
-                "properties": term.properties,
-            }
-        )
-        for parent in term.get_relationship(part_of):
-            cso_edges.append(
-                {
-                    "source_curie": term.curie,
-                    "target_curie": parent.curie,
-                    "type": "part_of",
-                    "pred": part_of.curie.lower(),
-                    "source": "cso",
-                    "graph": "cso",
-                    "version": "",
-                }
-            )
-    return cso_nodes, cso_edges
-
-
 def extract_wikidata_nodes_edges():
     wikidata_nodes, wikidata_edges = [], []
     for wikidata_id, label, description, synonyms, xrefs in tqdm(
-            get_unit_terms(), unit="unit"):
+        get_unit_terms(), unit="unit"):
         synonyms_list = [Synonym(value=value, type="") for value in synonyms]
         xrefs_list = [Xref(id=_id, type="oboinowl:hasDbXref") for _id in xrefs]
         wikidata_nodes.append(
@@ -502,15 +380,20 @@ def add_resource_to_dkg(resource_prefix: str):
     if resource_prefix == "probonto":
         return extract_probonto_nodes_edges()
     elif resource_prefix == "geonames":
-        return extract_geonames_nodes_edges()
+        return extract_nodes_edges_from_pyobo_terms(get_geonames_terms,
+                                                       "geonames")
     elif resource_prefix == "ncit":
-        return extract_ncit_nodes_edges()
+        return extract_nodes_edges_from_pyobo_terms(get_ncit_subset,
+                                                       "ncit")
     elif resource_prefix == "ncbitaxon":
-        return extract_ncbitaxon_nodes_edges()
+        return extract_nodes_edges_from_pyobo_terms(get_ncbitaxon,
+                                                       "ncbitaxon")
     elif resource_prefix == "eiffel":
-        return extract_eiffel_nodes_edges()
+        return extract_nodes_edges_from_pyobo_terms(
+            get_eiffel_ontology_terms, "eiffel")
     elif resource_prefix == "cso":
-        return extract_cso_nodes_edges()
+        return extract_nodes_edges_from_pyobo_terms(get_cso_obo(),
+                                                       "cso")
     elif resource_prefix == "wikidata":
         # combine retrieval of wikidata constants and units
         return extract_wikidata_nodes_edges()

From 7525bd224e775358c8fa00e7454fa0811b3348f2 Mon Sep 17 00:00:00 2001
From: nanglo123 <tnanglo1998@gmail.com>
Date: Tue, 16 Jul 2024 16:33:22 -0400
Subject: [PATCH 11/21] Remove unused imports

---
 mira/dkg/client.py    | 1 -
 mira/dkg/construct.py | 1 -
 2 files changed, 2 deletions(-)

diff --git a/mira/dkg/client.py b/mira/dkg/client.py
index cdb09001..c5e699a7 100644
--- a/mira/dkg/client.py
+++ b/mira/dkg/client.py
@@ -3,7 +3,6 @@
 import itertools as itt
 import logging
 import os
-import json
 from collections import Counter, defaultdict
 from difflib import SequenceMatcher
 from functools import lru_cache
diff --git a/mira/dkg/construct.py b/mira/dkg/construct.py
index aee21845..65120296 100644
--- a/mira/dkg/construct.py
+++ b/mira/dkg/construct.py
@@ -33,7 +33,6 @@
 import biomappings
 import bioontologies
 import click
-import pydantic.error_wrappers
 import pyobo
 import pystow
 from bioontologies import obograph

From 75b5d041087d707b74c80fc15212a2f64f6bccab Mon Sep 17 00:00:00 2001
From: nanglo123 <tnanglo1998@gmail.com>
Date: Wed, 17 Jul 2024 09:26:39 -0400
Subject: [PATCH 12/21] Add biomappings to dkg-construct extra and install
 dkg-construct for github tests

---
 .github/workflows/tests.yml | 1 +
 mira/dkg/api.py             | 1 -
 setup.cfg                   | 1 +
 3 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 4eaa3470..97a753e8 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -20,6 +20,7 @@ jobs:
           sudo apt-get install graphviz libgraphviz-dev
           pip install --upgrade pip setuptools wheel
           pip install "tox<4.0.0"
+          pip install .[dkg-construct]
       - name: Test with pytest
         run: |
           export MIRA_REST_URL=http://34.230.33.149:8771
diff --git a/mira/dkg/api.py b/mira/dkg/api.py
index 6fa5638d..ce06cb8d 100644
--- a/mira/dkg/api.py
+++ b/mira/dkg/api.py
@@ -390,7 +390,6 @@ def add_resources(
                 request.app.state.client.add_relation(relation)
 
 
-
 class IsOntChildResult(BaseModel):
     """Result of a query to /is_ontological_child"""
 
diff --git a/setup.cfg b/setup.cfg
index 7053d3a9..1ffc52d9 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -60,6 +60,7 @@ dkg-construct =
     pystow
     tabulate
     tqdm
+    biomappings
 dkg-embed =
     grape
 metaregistry =

From 8afb529b96fc0eedb2018c7b1e1dc528ac9f9970 Mon Sep 17 00:00:00 2001
From: nanglo123 <tnanglo1998@gmail.com>
Date: Wed, 17 Jul 2024 09:35:17 -0400
Subject: [PATCH 13/21] Verify biomappings installed on github tests

---
 .github/workflows/tests.yml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 97a753e8..faba09e1 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -21,6 +21,10 @@ jobs:
           pip install --upgrade pip setuptools wheel
           pip install "tox<4.0.0"
           pip install .[dkg-construct]
+      - name: Verify installed packages
+        run: | 
+          pip list
+          pip show biomappings
       - name: Test with pytest
         run: |
           export MIRA_REST_URL=http://34.230.33.149:8771

From 876c37e10c9f03ec7c9d1c4fd0c137f0301105b2 Mon Sep 17 00:00:00 2001
From: nanglo123 <tnanglo1998@gmail.com>
Date: Wed, 17 Jul 2024 09:49:02 -0400
Subject: [PATCH 14/21] Add dkg-construct to tox.ini

---
 .github/workflows/tests.yml | 5 -----
 tox.ini                     | 1 +
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index faba09e1..4eaa3470 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -20,11 +20,6 @@ jobs:
           sudo apt-get install graphviz libgraphviz-dev
           pip install --upgrade pip setuptools wheel
           pip install "tox<4.0.0"
-          pip install .[dkg-construct]
-      - name: Verify installed packages
-        run: | 
-          pip list
-          pip show biomappings
       - name: Test with pytest
         run: |
           export MIRA_REST_URL=http://34.230.33.149:8771
diff --git a/tox.ini b/tox.ini
index fdadd46b..cd9e4e85 100644
--- a/tox.ini
+++ b/tox.ini
@@ -14,6 +14,7 @@ passenv = PYTHONPATH, MIRA_REST_URL
 extras =
     tests
     web
+    dkg-construct
 deps = 
     anyio<4
 commands =

From 6c4417f9ba469ec61bc29f965bd8d9cfcff98df9 Mon Sep 17 00:00:00 2001
From: nanglo123 <tnanglo1998@gmail.com>
Date: Wed, 17 Jul 2024 09:57:04 -0400
Subject: [PATCH 15/21] Add rdflib to dkg-construct extra in setup.cfg

---
 setup.cfg | 1 +
 1 file changed, 1 insertion(+)

diff --git a/setup.cfg b/setup.cfg
index 1ffc52d9..ef7f299a 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -61,6 +61,7 @@ dkg-construct =
     tabulate
     tqdm
     biomappings
+    rdflib
 dkg-embed =
     grape
 metaregistry =

From 667494019482a2cb0387c29ed2f0b42811686cf5 Mon Sep 17 00:00:00 2001
From: nanglo123 <tnanglo1998@gmail.com>
Date: Wed, 17 Jul 2024 10:06:30 -0400
Subject: [PATCH 16/21] Use backwards compatiable List type annotation for
 method header

---
 mira/dkg/resources/extract_eiffel_ontology.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/mira/dkg/resources/extract_eiffel_ontology.py b/mira/dkg/resources/extract_eiffel_ontology.py
index 36342aa5..5ab79d05 100644
--- a/mira/dkg/resources/extract_eiffel_ontology.py
+++ b/mira/dkg/resources/extract_eiffel_ontology.py
@@ -1,4 +1,6 @@
 """Get terms from the eiffel climate ontology"""
+from typing import List
+
 import curies
 import pystow
 from curies import Converter
@@ -378,7 +380,7 @@ def process_sdg_series(converter: curies.Converter):
     return curie_to_term
 
 
-def get_eiffel_ontology_terms() -> list[Term]:
+def get_eiffel_ontology_terms() -> List[Term]:
     converter = Converter.from_prefix_map(
         {
             "ecv": "http://purl.org/eiffo/ecv#",

From 081145e39587c7256ef0134e1e00263f68387a2e Mon Sep 17 00:00:00 2001
From: nanglo123 <tnanglo1998@gmail.com>
Date: Wed, 17 Jul 2024 10:21:17 -0400
Subject: [PATCH 17/21] Pass get_cso_obo reference rather than call it

---
 mira/dkg/construct.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mira/dkg/construct.py b/mira/dkg/construct.py
index 65120296..1c40a062 100644
--- a/mira/dkg/construct.py
+++ b/mira/dkg/construct.py
@@ -391,7 +391,7 @@ def add_resource_to_dkg(resource_prefix: str):
         return extract_nodes_edges_from_pyobo_terms(
             get_eiffel_ontology_terms, "eiffel")
     elif resource_prefix == "cso":
-        return extract_nodes_edges_from_pyobo_terms(get_cso_obo(),
+        return extract_nodes_edges_from_pyobo_terms(get_cso_obo,
                                                        "cso")
     elif resource_prefix == "wikidata":
         # combine retrieval of wikidata constants and units

From ba9b237700aa153261db605ec435abc5207f799d Mon Sep 17 00:00:00 2001
From: nanglo123 <tnanglo1998@gmail.com>
Date: Wed, 17 Jul 2024 11:34:35 -0400
Subject: [PATCH 18/21] Adjust node property attributes

---
 mira/dkg/client.py        | 2 +-
 mira/dkg/construct.py     | 4 ++--
 mira/dkg/resources/cso.py | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/mira/dkg/client.py b/mira/dkg/client.py
index c5e699a7..5a55c7d7 100644
--- a/mira/dkg/client.py
+++ b/mira/dkg/client.py
@@ -354,7 +354,7 @@ def add_node(self, entity):
             xref_types.append(xref.type)
         for synonym in entity.synonyms:
             synonyms.append(synonym.value)
-            synonym_types.append(synonym.value)
+            synonym_types.append(synonym.type)
         for property_predicate, property_value_list in entity.properties.items():
             property_predicates.append(property_predicate)
             property_values.extend(property_value_list)
diff --git a/mira/dkg/construct.py b/mira/dkg/construct.py
index 1c40a062..83003377 100644
--- a/mira/dkg/construct.py
+++ b/mira/dkg/construct.py
@@ -218,9 +218,9 @@ def extract_nodes_edges_from_pyobo_terms(term_getter, resource_prefix):
                 "type": entity_type,
                 "description": term.definition if term.definition else "",
                 "obsolete": False if not term.is_obsolete else True,
-                "synonyms": [Synonym(value=syn._fp(),
+                "synonyms": [Synonym(value=syn.name,
                                      type=f"{syn.type.reference.prefix}:"
-                                          f"{syn.type.reference.prefix}")
+                                          f"{syn.type.reference.identifier}")
                              for syn in term.synonyms],
                 "alts": term.alt_ids,
                 "xrefs": [Xref(value=value, type=type) for value, type in
diff --git a/mira/dkg/resources/cso.py b/mira/dkg/resources/cso.py
index 88e8d1da..490bced9 100644
--- a/mira/dkg/resources/cso.py
+++ b/mira/dkg/resources/cso.py
@@ -23,7 +23,7 @@ def get_cso_obo() -> Obo:
     )
     download(url=URL, path=PATH)
     # use https://github.com/pyobo/pyobo/pull/159
-    return from_obo_path(PATH, prefix="cso", default_prefix="cso", strict=False)
+    return from_obo_path(PATH, prefix="cso", strict=False)
 
 
 if __name__ == "__main__":

From 8ba98f39b62a46ae7ae0c6e6853f0fb9821161b7 Mon Sep 17 00:00:00 2001
From: nanglo123 <tnanglo1998@gmail.com>
Date: Wed, 17 Jul 2024 12:02:54 -0400
Subject: [PATCH 19/21] Change probonto xref_type

---
 mira/dkg/construct.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/mira/dkg/construct.py b/mira/dkg/construct.py
index 83003377..2c25d2d8 100644
--- a/mira/dkg/construct.py
+++ b/mira/dkg/construct.py
@@ -250,8 +250,8 @@ def extract_nodes_edges_from_pyobo_terms(term_getter, resource_prefix):
                             "target_curie": object_reference.curie,
                             "type": typedef.name.replace(" ", "").lower(),
                             "pred": typedef.curie,
-                            "source": "eiffel",
-                            "graph": "eiffel",
+                            "source": resource_prefix,
+                            "graph": resource_prefix,
                             "version": "",
                         }
                     )
@@ -278,7 +278,7 @@ def extract_probonto_nodes_edges():
                 "type": "class",
                 "description": "",
                 "obsolete": False,
-                "xrefs": [Xref(id=eq.get("curie", ""), type=eq.get("name", ""))
+                "xrefs": [Xref(id=eq.get("curie", ""), type="askemo:0000016")
                           for eq in term.get("equivalent", [])],
                 "properties": properties
 

From 255b94b6ac28ce9673cf26af5731eef14daaf9db Mon Sep 17 00:00:00 2001
From: nanglo123 <tnanglo1998@gmail.com>
Date: Wed, 17 Jul 2024 16:35:56 -0400
Subject: [PATCH 20/21] Add dkg-construct dependency to Docker version of mira,
 use id field of nodes rather than curie for adding relations

---
 docker/Dockerfile  | 2 +-
 mira/dkg/api.py    | 2 ++
 mira/dkg/client.py | 4 ++--
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/docker/Dockerfile b/docker/Dockerfile
index 7254d906..6bdfa9d6 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -31,7 +31,7 @@ RUN wget -O /sw/nodes.tsv.gz https://askem-mira.s3.amazonaws.com/dkg/$domain/bui
 
 # Python packages
 RUN python -m pip install --upgrade pip && \
-    python -m pip install git+https://github.com/gyorilab/mira.git@main#egg=mira[web,uvicorn,dkg-client] && \
+    python -m pip install git+https://github.com/gyorilab/mira.git@main#egg=mira[web,uvicorn,dkg-client,dkg-construct] && \
     python -m pip uninstall -y flask_bootstrap && \
     python -m pip uninstall -y bootstrap_flask && \
     python -m pip install bootstrap_flask && \
diff --git a/mira/dkg/api.py b/mira/dkg/api.py
index ce06cb8d..21f92d94 100644
--- a/mira/dkg/api.py
+++ b/mira/dkg/api.py
@@ -376,6 +376,8 @@ def add_resources(
                      "nbcbitaxon"],
         )
     ):
+        """From a list of resource prefixes, add a list of nodes and edges
+        extract from each resource to the DKG"""
         for resource_prefix in resource_prefix_list:
             # nodes and edges will be a list of dicts
             nodes, edges = add_resource_to_dkg(resource_prefix.lower())
diff --git a/mira/dkg/client.py b/mira/dkg/client.py
index 5a55c7d7..92999c7a 100644
--- a/mira/dkg/client.py
+++ b/mira/dkg/client.py
@@ -428,8 +428,8 @@ def add_relation(self, relation):
         graph = relation.graph
 
         create_relation_query = (
-            f"MATCH (source_node {{curie: '{source_curie}'}}), "
-            f"(target_node {{curie: '{target_curie}'}}) "
+            f"MATCH (source_node {{id: '{source_curie}'}}), "
+            f"(target_node {{id: '{target_curie}'}}) "
             f"MERGE (source_node)-[rel:{type}]->(target_node)"
             f"SET rel.pred = '{pred}'"
             f"SET rel.source = '{source}'"

From 13952484db73ffc7b0f5b441b8d2fcd0357a3d48 Mon Sep 17 00:00:00 2001
From: nanglo123 <tnanglo1998@gmail.com>
Date: Wed, 17 Jul 2024 17:50:58 -0400
Subject: [PATCH 21/21] Use mira defined Xref object model for nodes and
 process ncbitaxon nodes differently, add demo notebook

---
 mira/dkg/construct.py           |  55 +++-
 notebooks/Extend_DKG_demo.ipynb | 558 ++++++++++++++++++++++++++++++++
 2 files changed, 596 insertions(+), 17 deletions(-)
 create mode 100644 notebooks/Extend_DKG_demo.ipynb

diff --git a/mira/dkg/construct.py b/mira/dkg/construct.py
index 2c25d2d8..2b9a54a1 100644
--- a/mira/dkg/construct.py
+++ b/mira/dkg/construct.py
@@ -211,23 +211,44 @@ def extract_nodes_edges_from_pyobo_terms(term_getter, resource_prefix):
     elif resource_prefix in {"ncit", "ncbitaxon", "eiffel", "cso"}:
         entity_type = "class"
     for term in tqdm(term_getter(), unit="term"):
-        nodes.append(
-            {
-                "id": term.curie,
-                "name": term.name,
-                "type": entity_type,
-                "description": term.definition if term.definition else "",
-                "obsolete": False if not term.is_obsolete else True,
-                "synonyms": [Synonym(value=syn.name,
-                                     type=f"{syn.type.reference.prefix}:"
-                                          f"{syn.type.reference.identifier}")
-                             for syn in term.synonyms],
-                "alts": term.alt_ids,
-                "xrefs": [Xref(value=value, type=type) for value, type in
-                          zip(term.xrefs, term.xref_types)],
-                "properties": dict(term.properties),
-            }
-        )
+        if resource_prefix != "ncbitaxon":
+            nodes.append(
+                {
+                    "id": term.curie,
+                    "name": term.name,
+                    "type": entity_type,
+                    "description": term.definition if term.definition else "",
+                    "obsolete": False if not term.is_obsolete else True,
+                    "synonyms": [Synonym(value=syn.name,
+                                         type=f"{syn.type.reference.prefix}:"
+                                              f"{syn.type.reference.identifier}")
+                                 for syn in term.synonyms],
+                    "alts": term.alt_ids,
+                    "xrefs": [Xref(id=_id, type=type) for _id, type in
+                              zip(term.xrefs, term.xref_types)],
+                    "properties": dict(term.properties),
+                }
+            )
+        else:
+            nodes.append(
+                {
+                    "id": term.curie,
+                    "name": term.name,
+                    "type": entity_type,
+                    "description": term.definition if term.definition else "",
+                    "obsolete": False if not term.is_obsolete else True,
+                    "synonyms": [Synonym(value=syn.name,
+                                         type=f"{syn.type.reference.prefix}:"
+                                              f"{syn.type.reference.identifier}")
+                                 for syn in term.synonyms],
+                    "alts": [f"{reference.prefix}:{reference.identifier}" for
+                             reference in term.alt_ids],
+                    "xrefs": [Xref(id=f"{reference.prefix}:"
+                                      f"{reference.identifier}", type="")
+                              for reference in term.xrefs],
+                    "properties": dict(term.properties),
+                }
+            )
         if resource_prefix != "eiffel":
             for parent in term.get_relationships(part_of):
                 edges.append(
diff --git a/notebooks/Extend_DKG_demo.ipynb b/notebooks/Extend_DKG_demo.ipynb
new file mode 100644
index 00000000..71e4ecfb
--- /dev/null
+++ b/notebooks/Extend_DKG_demo.ipynb
@@ -0,0 +1,558 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "a3f4694c-6dfd-438c-ba7f-25571f8b6db0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import requests\n",
+    "from mira.dkg.client import Neo4jClient"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9750dc65-e30d-43a9-acbf-5a661961e5ed",
+   "metadata": {},
+   "source": [
+    "## We define an instance of the Neo4jClient to query for the nodes and edges added to the DKG"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "a69c02f1-284b-4248-bddc-b10cc5ed87cd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "client = Neo4jClient()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "780f5a69-c4cd-4130-9547-d9974a0f53ec",
+   "metadata": {},
+   "source": [
+    "# We define sample nodes to be added to the DKG"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "6cdf0570-f177-4adf-a446-2216bd26089e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "node_list = []\n",
+    "\n",
+    "node1 = {\n",
+    "    \"id\": \"ido:0000511\",\n",
+    "    \"name\": \"infected population\",\n",
+    "    \"type\": \"class\",\n",
+    "    \"obsolete\": False,\n",
+    "    \"description\": \"An organism population whose members have an infection.\",\n",
+    "    \"synonyms\": [],\n",
+    "    \"alts\": [],\n",
+    "    \"xrefs\": [],\n",
+    "    \"labels\": [\"ido\"],\n",
+    "    \"properties\": {},\n",
+    "    \"link\": \"string\",\n",
+    "    \"physical_min\": 0,\n",
+    "    \"physical_max\": 0,\n",
+    "    \"suggested_data_type\": \"string\",\n",
+    "    \"suggested_unit\": \"string\",\n",
+    "    \"typical_min\": 0,\n",
+    "    \"typical_max\": 0,\n",
+    "}\n",
+    "\n",
+    "node2 = {\n",
+    "    \"id\": \"ido:0000514\",\n",
+    "    \"name\": \"susceptible population\",\n",
+    "    \"type\": \"class\",\n",
+    "    \"obsolete\": False,\n",
+    "    \"description\": \"An organism population whose members are not infected with an infectious agent and who lack immunity to the infectious agent.\",\n",
+    "    \"synonyms\": [],\n",
+    "    \"alts\": [],\n",
+    "    \"xrefs\": [],\n",
+    "    \"labels\": [\"ido\"],\n",
+    "    \"properties\": {},\n",
+    "    \"link\": \"string\",\n",
+    "}\n",
+    "\n",
+    "\n",
+    "# Define a fully instantiated node\n",
+    "node3 = {\n",
+    "    \"id\": \"ido:0000511\",\n",
+    "    \"name\": \"infected population\",\n",
+    "    \"type\": \"class\",\n",
+    "    \"obsolete\": False,\n",
+    "    \"description\": \"An organism population whose members have an infection.\",\n",
+    "    \"synonyms\": [{\"value\": \"infected pop\", \"type\": \"skos:exactMatch\"}],\n",
+    "    \"alts\": [\"ido:0000511-alt1\", \"ido:0000511-alt2\"],\n",
+    "    \"xrefs\": [\n",
+    "        {\"id\": \"xref:0001\", \"type\": \"skos:exactMatch\"},\n",
+    "        {\"id\": \"xref:0002\", \"type\": \"skos:exactMatch\"},\n",
+    "    ],\n",
+    "    \"labels\": [\"ido\", \"population\"],\n",
+    "    \"properties\": {\"property1\": [\"value1\"], \"property2\": [\"value3\"]},\n",
+    "}\n",
+    "\n",
+    "\n",
+    "node_list.append(node1)\n",
+    "node_list.append(node2)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "60fcebd4-af01-46ae-a6ce-25b70f0f6bf5",
+   "metadata": {},
+   "source": [
+    "## Test the add_nodes endpoint \n",
+    "\n",
+    "We first test the ```add_nodes``` endpoint that takes in a list of ```Entity``` and ```AskemEntity``` objects and adds them as nodes to the DKG. Only the ```id```, ```obsolete```, and ```type``` properties are mandatory."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "2c3452bf-5bc3-43ca-9623-e8860fc7e0a5",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "200"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "response = requests.post(\"http://localhost:8771/api/add_nodes\", json=node_list)\n",
+    "response.status_code"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "88a24f10-f276-45e9-a12f-7b24a605f1ba",
+   "metadata": {},
+   "source": [
+    "## We query for the added nodes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "0a0351be-2553-460e-974b-b32777a4c8a5",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'name': 'infected population', 'obsolete': False, 'description': 'An organism population whose members have an infection.', 'id': 'ido:0000511', 'type': 'class', 'labels': ['ido']}\n"
+     ]
+    }
+   ],
+   "source": [
+    "query = f\"MATCH (N) WHERE N.id = 'ido:0000511' RETURN N\"\n",
+    "print(client.query_tx(query)[0][0]._properties)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "9be0bc56-c4b5-447e-a2ce-2a00b1697e2a",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'name': 'susceptible population', 'obsolete': False, 'description': 'An organism population whose members are not infected with an infectious agent and who lack immunity to the infectious agent.', 'id': 'ido:0000514', 'type': 'class', 'labels': ['ido']}\n"
+     ]
+    }
+   ],
+   "source": [
+    "query = f\"MATCH (N) WHERE N.id = 'ido:0000514' RETURN N\"\n",
+    "print(client.query_tx(query)[0][0]._properties)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4e4197b8-4e33-483d-b009-cad060a615d9",
+   "metadata": {},
+   "source": [
+    "## Add a fully instantiated node to the DKG\n",
+    "We then add a node with all of its properties supplied. Duplicate nodes (all properties must be matching for a node to be considered duplicate) are not added to the DKG."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "b9d7017d-339d-4f7f-930b-1348ddd5f593",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "200"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "node_list.append(node3)\n",
+    "node_list.append(node1)\n",
+    "response = requests.post(\"http://localhost:8771/api/add_nodes\", json=node_list)\n",
+    "response.status_code"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "10b4c035-69f9-4df7-b3bd-bf3e0d1fa9ec",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'name': 'infected population', 'obsolete': False, 'description': 'An organism population whose members have an infection.', 'id': 'ido:0000511', 'type': 'class', 'labels': ['ido']}\n",
+      "\n",
+      "{'xref_types': ['skos:exactMatch', 'skos:exactMatch'], 'synonyms': ['infected pop'], 'alts': ['ido:0000511-alt1', 'ido:0000511-alt2'], 'xrefs': ['xref:0001', 'xref:0002'], 'obsolete': False, 'description': 'An organism population whose members have an infection.', 'type': 'class', 'labels': ['ido', 'population'], 'synonym_types': ['skos:exactMatch'], 'property_values': ['value1', 'value3'], 'property_predicates': ['property1', 'property2'], 'name': 'infected population', 'id': 'ido:0000511'}\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "# We have two node objects returned from the query both with id ido:0000511 even though \n",
+    "# we used the add_nodes endpoint to add a node with ```id=ido:0000511``` three times\n",
+    "\n",
+    "query = f\"MATCH (N) WHERE N.id = 'ido:0000511' RETURN N\"\n",
+    "for n in client.query_tx(query):\n",
+    "    print(n[0]._properties)\n",
+    "    print()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c94627a4-cee9-4d7a-ac9b-ebf36cccca30",
+   "metadata": {},
+   "source": [
+    "# Test the add_relations  endpoint\n",
+    "The ```add_relations``` endpoint takes in a list of ```Relation``` objects and adds the relation to the DKG. All properties of the relation are required. Duplicate relations are not added."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "cfd152e2-b3af-428c-b264-f40c6d15825f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "relation_list = [\n",
+    "    {\n",
+    "        \"source_curie\": \"probonto:k0000000\",\n",
+    "        \"target_curie\": \"probonto:k0000007\",\n",
+    "        \"type\": \"has_parameter\",\n",
+    "        \"pred\": \"probonto:c0000062\",\n",
+    "        \"source\": \"probonto\",\n",
+    "        \"graph\": \"https://raw.githubusercontent.com/probonto/ontology/master/probonto4ols.owl\",\n",
+    "        \"version\": \"2.5\",\n",
+    "    },\n",
+    "    {\n",
+    "        \"source_curie\": \"geonames:12042053\",\n",
+    "        \"target_curie\": \"geonames:292969\",\n",
+    "        \"type\": \"part_of\",\n",
+    "        \"pred\": \"bfo:0000050\",\n",
+    "        \"source\": \"geonames\",\n",
+    "        \"graph\": \"geonames\",\n",
+    "        \"version\": \"\",\n",
+    "    },\n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "bb5c11cb-75b7-429e-a5b9-64af0e4a866c",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "200"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "response = requests.post(\n",
+    "    \"http://localhost:8771/api/add_relations\", json=relation_list\n",
+    ")\n",
+    "response.status_code"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "53cc6281-ff93-40f4-8b55-515c9a4c6de1",
+   "metadata": {},
+   "source": [
+    "# We query for the added relations"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "973f3bed-665c-4aba-829c-9ea5e6258487",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Source Node : {'property_values': ['probability'], 'xref_types': ['askemo:0000016'], 'property_predicates': ['has_parameter'], 'name': 'Bernoulli1', 'obsolete': False, 'xrefs': ['probonto:k0000028'], 'id': 'probonto:k0000000', 'source': 'probonto', 'type': 'class', 'version': '2.5'} \n",
+      "\n",
+      "Relation : {'pred': 'probonto:c0000062', 'source': 'probonto', 'version': '2.5', 'graph': 'https://raw.githubusercontent.com/probonto/ontology/master/probonto4ols.owl'} \n",
+      "\n",
+      "Target Node : {'synonym_types': ['referenced_by_latex', 'oboInOwl:hasExactSynonym'], 'synonyms': ['p', 'probability of success'], 'name': 'probability', 'obsolete': False, 'id': 'probonto:k0000007', 'type': 'class', 'version': '2.5'} \n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "source_curie = \"probonto:k0000000\"\n",
+    "target_curie = \"probonto:k0000007\"\n",
+    "rel_type = \"has_parameter\"\n",
+    "\n",
+    "relation_query = f\"MATCH (source_node {{id: '{source_curie}'}}), (target_node {{id: '{target_curie}'}}) MATCH (source_node)-[rel:{rel_type}]->(target_node) RETURN source_node, rel, target_node\"\n",
+    "\n",
+    "result = client.query_tx(relation_query)\n",
+    "\n",
+    "print(f\"Source Node : {result[0][0]._properties} \\n\")\n",
+    "print(f\"Relation : {result[0][1]._properties} \\n\")\n",
+    "print(f\"Target Node : {result[0][2]._properties} \\n\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "b8333206-4b12-44ad-b180-d1e63670111f",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Source Node : <Node element_id='5849' labels=frozenset() properties={'synonym_types': ['oboinowl:SynonymType', 'oboinowl:SynonymType', 'oboinowl:SynonymType', 'oboinowl:SynonymType', 'oboinowl:SynonymType'], 'synonyms': ['Musaffa', 'Musaffah City', 'msfh', 'Мусаффа', 'مصفح'], 'name': 'Musaffah', 'obsolete': False, 'id': 'geonames:12042053', 'type': 'individual'}>._properties \n",
+      "\n",
+      "Relation : {'pred': 'bfo:0000050', 'source': 'geonames', 'version': '', 'graph': 'geonames'} \n",
+      "\n",
+      "Target Node : {'property_values': ['AE.01'], 'property_predicates': ['code'], 'name': 'Abu Dhabi', 'obsolete': False, 'id': 'geonames:292969', 'type': 'individual'} \n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "source_curie = \"geonames:12042053\"\n",
+    "target_curie = \"geonames:292969\"\n",
+    "rel_type = \"part_of\"\n",
+    "\n",
+    "relation_query = f\"MATCH (source_node {{id: '{source_curie}'}}), (target_node {{id: '{target_curie}'}}) MATCH (source_node)-[rel:{rel_type}]->(target_node) RETURN source_node, rel, target_node\"\n",
+    "\n",
+    "result = client.query_tx(relation_query)\n",
+    "\n",
+    "print(f\"Source Node : {result[0][0]}._properties \\n\")\n",
+    "print(f\"Relation : {result[0][1]._properties} \\n\")\n",
+    "print(f\"Target Node : {result[0][2]._properties} \\n\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "091a04f3-a2e0-4113-9fc8-272dc99e688f",
+   "metadata": {},
+   "source": [
+    "# Test the add_resources endpoint\n",
+    "The ```add_resources``` endpoint accepts a list of strings that represent resource prefixes. Nodes and edges are extracted from each resource and then added to the DKG. The resources that can be added are ```eiffel, cso, wikidata, probonto, ncit, ncbitaxon, geonames```. The names are not case-sensitive and invalid resource prefixes are ignored. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "c00ac8c4-59d5-4533-a95a-17c6def957b0",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "200"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "resource_list = [\n",
+    "    \"probonto\",\n",
+    "    \"wikidata\",\n",
+    "    \"eiffel\",\n",
+    "    \"geonames\",\n",
+    "    \"ncit\",\n",
+    "    \"nbcbitaxon\",\n",
+    "    \"cso\",\n",
+    "]\n",
+    "\n",
+    "response = requests.post(\n",
+    "    \"http://localhost:8771/api/add_resources\", json=resource_list\n",
+    ")\n",
+    "response.status_code"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4b646cdc-5f60-4ed2-8dfe-e510bd8469d5",
+   "metadata": {},
+   "source": [
+    "# We then query for some of the added nodes from the resources processed"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "a66a111c-5aac-485c-b77d-49d856b693cd",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'synonym_types': ['debio:0000031'],\n",
+       " 'property_values': ['0.01438776877'],\n",
+       " 'xref_types': ['oboinowl:hasDbXref'],\n",
+       " 'synonyms': ['c_{2}'],\n",
+       " 'property_predicates': ['debio:0000042'],\n",
+       " 'xrefs': ['nist.codata:c22ndrc'],\n",
+       " 'name': 'second radiation constant',\n",
+       " 'obsolete': False,\n",
+       " 'description': \"constant in Wien's radiation law\",\n",
+       " 'id': 'wikidata:Q112300321',\n",
+       " 'type': 'class'}"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "query = f\"MATCH (N) WHERE N.id = 'wikidata:Q112300321' RETURN N\"\n",
+    "client.query_tx(query)[0][0]._properties"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "1194936e-5c93-4d8b-887f-a2c4395dc599",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'synonym_types': ['oboinowl:SynonymType',\n",
+       "  'oboinowl:SynonymType',\n",
+       "  'oboinowl:SynonymType',\n",
+       "  'oboinowl:SynonymType',\n",
+       "  'oboinowl:SynonymType'],\n",
+       " 'synonyms': ['Musaffa', 'Musaffah City', 'msfh', 'Мусаффа', 'مصفح'],\n",
+       " 'name': 'Musaffah',\n",
+       " 'obsolete': False,\n",
+       " 'id': 'geonames:12042053',\n",
+       " 'type': 'individual'}"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "query = f\"MATCH (N) WHERE N.id = 'geonames:12042053' RETURN N\"\n",
+    "client.query_tx(query)[0][0]._properties"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "ae1442ef-7558-4b47-8bd5-0c87b309102a",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'synonym_types': ['oboinowl:SynonymType', 'oboinowl:SynonymType'],\n",
+       " 'property_values': ['C123547',\n",
+       "  'Eukaryote',\n",
+       "  'Plasmodium falciparum',\n",
+       "  'C0032150',\n",
+       "  'CDISC',\n",
+       "  'Any unicellular, eukaryotic organism that can be assigned to the species Plasmodium falciparum.',\n",
+       "  '5833'],\n",
+       " 'synonyms': ['PLASMODIUM FALCIPARUM', 'Plasmodium falciparum'],\n",
+       " 'property_predicates': ['NCIT:NHC0',\n",
+       "  'NCIT:P106',\n",
+       "  'NCIT:P108',\n",
+       "  'NCIT:P207',\n",
+       "  'NCIT:P322',\n",
+       "  'NCIT:P325',\n",
+       "  'NCIT:P331'],\n",
+       " 'name': 'Plasmodium falciparum',\n",
+       " 'obsolete': False,\n",
+       " 'description': 'A protozoan parasite in the family Plasmodiidae. P. falciparum is transmitted by the female Anopheles mosquito and is a causative agent of malaria in humans. The malaria caused by this species is the most dangerous form of malaria.',\n",
+       " 'id': 'ncit:C123547',\n",
+       " 'type': 'class'}"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "query = f\"MATCH (N) WHERE N.id = 'ncit:C123547' RETURN N\"\n",
+    "client.query_tx(query)[0][0]._properties"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}