From 0df22de681bbf5f9a3bf11f033e684ab4d8a2b15 Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Mon, 22 Jan 2024 15:06:21 +0100
Subject: [PATCH 01/23] Begin cleanup

---
 pyproject.toml           |  4 ++++
 src/semra/client.py      | 16 +++++++++++-----
 src/semra/sources/clo.py |  6 +++++-
 3 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index b221004..25ac2b0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -84,6 +84,7 @@ dependencies = [
   "black[jupyter]>=23.1.0",
   "mypy>=1.0.0",
   "ruff>=0.0.243",
+  "pydantic",
 ]
 [tool.hatch.envs.lint.scripts]
 typing = "mypy --install-types --non-interactive --ignore-missing-imports {args:src/semra tests}"
@@ -105,6 +106,9 @@ all = [
 target-version = ["py39"]
 line-length = 120
 
+[tool.mypy]
+plugins = ["pydantic.mypy"]
+
 [tool.ruff]
 target-version = "py39"
 line-length = 120
diff --git a/src/semra/client.py b/src/semra/client.py
index badfde0..b029561 100644
--- a/src/semra/client.py
+++ b/src/semra/client.py
@@ -12,7 +12,7 @@
 import neo4j.graph
 import networkx as nx
 import pydantic
-from neo4j import Transaction, unit_of_work
+from neo4j import unit_of_work
 from typing_extensions import TypeAlias
 
 import semra
@@ -125,6 +125,8 @@ def _get_node_by_curie(self, curie: ReferenceHint) -> Node:
 
     def get_mapping(self, curie: ReferenceHint) -> semra.Mapping:
         """Get a mapping."""
+        if isinstance(curie, Reference):
+            curie = curie.curie
         if not curie.startswith("semra.mapping:"):
             curie = f"semra.mapping:{curie}"
         query = """\
@@ -175,6 +177,8 @@ def get_mapping_set(self, curie: ReferenceHint) -> MappingSet:
             For example, use ``semra.mappingset:7831d5bc95698099fb6471667e5282cd`` for biomappings
         :return: A mapping set
         """
+        if isinstance(curie, Reference):
+            curie = curie.curie
         if not curie.startswith("semra.mappingset:"):
             curie = f"semra.mappingset:{curie}"
         node = self._get_node_by_curie(curie)
@@ -217,7 +221,9 @@ def summarize_nodes(self) -> t.Counter[str]:
 
     def summarize_concepts(self) -> t.Counter[tuple[str, str]]:
         query = "MATCH (e:concept) WHERE e.prefix <> 'orcid' RETURN e.prefix, count(e.prefix)"
-        return Counter({(prefix, bioregistry.get_name(prefix)): count for prefix, count in self.read_query(query)})
+        return Counter(
+            {(prefix, t.cast(str, bioregistry.get_name(prefix))): count for prefix, count in self.read_query(query)}
+        )
 
     def summarize_authors(self) -> t.Counter[tuple[str, str]]:
         query = "MATCH (e:evidence)-[:hasAuthor]->(a:concept) RETURN a.curie, a.name, count(e)"
@@ -257,8 +263,8 @@ def get_connected_component_graph(self, curie: str) -> nx.MultiDiGraph:
             g.add_node(node["curie"], **node)
         for relation in relations:
             g.add_edge(
-                relation.nodes[0]["curie"],
-                relation.nodes[1]["curie"],
+                relation.nodes[0]["curie"],  # type: ignore
+                relation.nodes[1]["curie"],  # type: ignore
                 key=relation.element_id,
                 type=relation.type,
                 **relation,
@@ -273,6 +279,6 @@ def get_concept_name(self, curie: str) -> str | None:
 # https://neo4j.com/docs/python-manual/current/session-api/#python-driver-simple-transaction-fn
 # and from the docstring of neo4j.Session.read_transaction
 @unit_of_work()
-def do_cypher_tx(tx: Transaction, query: str, **query_params) -> list[list]:
+def do_cypher_tx(tx, query, **query_params) -> list[list]:
     result = tx.run(query, parameters=query_params)
     return [record.values() for record in result]
diff --git a/src/semra/sources/clo.py b/src/semra/sources/clo.py
index 8fafa28..db66ced 100644
--- a/src/semra/sources/clo.py
+++ b/src/semra/sources/clo.py
@@ -1,5 +1,7 @@
 """Process mappings from CLO."""
 
+from typing import Optional
+
 import bioontologies
 import bioregistry
 import click
@@ -36,6 +38,8 @@ def get_clo_mappings(confidence: float = 0.8) -> list[Mapping]:
                 continue
             for raw_curie in _split(p.value_raw):
                 curie = raw_curie.removeprefix("rrid:").removeprefix("RRID:")
+                prefix: Optional[str]
+                identifier: Optional[str]
                 if curie.startswith("Sanger:COSMICID:"):
                     prefix, identifier = "cosmic.cell", curie.removeprefix("Sanger:COSMICID:")
                 elif curie.startswith("atcc:COSMICID:"):
@@ -83,7 +87,7 @@ def get_clo_mappings(confidence: float = 0.8) -> list[Mapping]:
                 else:
                     prefix, identifier = bioregistry.parse_curie(curie)
 
-                if prefix is None:
+                if prefix is None or identifier is None:
                     tqdm.write(f"CLO:{clo_id} unparsed: {click.style(curie, fg='red')} from line:\n  {p.value_raw}")
                     continue
                 if prefix in SKIP_PREFIXES:

From 179118c78af7a20498540e70bdabee7dc29d757d Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Mon, 22 Jan 2024 15:13:43 +0100
Subject: [PATCH 02/23] Update typing

---
 src/semra/api.py | 80 ++++++++++++++++++++++++++----------------------
 1 file changed, 44 insertions(+), 36 deletions(-)

diff --git a/src/semra/api.py b/src/semra/api.py
index f2091b5..747c6a6 100644
--- a/src/semra/api.py
+++ b/src/semra/api.py
@@ -4,6 +4,7 @@
 
 import itertools as itt
 import logging
+import typing as t
 from collections import Counter, defaultdict
 from collections.abc import Iterable
 from typing import cast
@@ -22,7 +23,14 @@
     KNOWLEDGE_MAPPING,
     NARROW_MATCH,
 )
-from semra.struct import Evidence, Mapping, ReasonedEvidence, Reference, Triple, triple_key
+from semra.struct import (
+    Evidence,
+    Mapping,
+    ReasonedEvidence,
+    Reference,
+    Triple,
+    triple_key,
+)
 
 logger = logging.getLogger(__name__)
 
@@ -30,7 +38,7 @@
 EVIDENCE_KEY = "evidence"
 
 #: An index allows for the aggregation of evidences for each core triple
-Index = dict[Triple, list[Evidence]]
+Index = t.Dict[Triple, t.List[Evidence]]
 
 
 def _tqdm(mappings: Iterable[Mapping], desc: str | None = None, *, progress: bool = True):
@@ -43,7 +51,7 @@ def _tqdm(mappings: Iterable[Mapping], desc: str | None = None, *, progress: boo
     )
 
 
-def count_source_target(mappings: Iterable[Mapping]) -> Counter[tuple[str, str]]:
+def count_source_target(mappings: Iterable[Mapping]) -> Counter[t.Tuple[str, str]]:
     """Count source prefix-target prefix pairs."""
     return Counter((s.prefix, o.prefix) for s, _, o in get_index(mappings))
 
@@ -65,18 +73,18 @@ def print_source_target_counts(mappings: Iterable[Mapping], minimum: int = 0) ->
 
 def get_index(mappings: Iterable[Mapping], *, progress: bool = True) -> Index:
     """Aggregate and deduplicate evidences for each core triple."""
-    dd: defaultdict[Triple, list[Evidence]] = defaultdict(list)
+    dd: t.DefaultDict[Triple, t.List[Evidence]] = defaultdict(list)
     for mapping in _tqdm(mappings, desc="Indexing mappings", progress=progress):
         dd[mapping.triple].extend(mapping.evidence)
     return {triple: deduplicate_evidence(evidence) for triple, evidence in dd.items()}
 
 
-def assemble_evidences(mappings: list[Mapping], *, progress: bool = True) -> list[Mapping]:
+def assemble_evidences(mappings: t.List[Mapping], *, progress: bool = True) -> t.List[Mapping]:
     index = get_index(mappings, progress=progress)
     return unindex(index, progress=progress)
 
 
-def infer_reversible(mappings: list[Mapping], *, progress: bool = True) -> list[Mapping]:
+def infer_reversible(mappings: t.List[Mapping], *, progress: bool = True) -> t.List[Mapping]:
     rv = []
     for mapping in _tqdm(mappings, desc="Infer reverse", progress=progress):
         rv.append(mapping)
@@ -111,7 +119,7 @@ def flip(mapping: Mapping) -> Mapping | None:
     )
 
 
-def to_graph(mappings: list[Mapping]) -> nx.DiGraph:
+def to_graph(mappings: t.List[Mapping]) -> nx.DiGraph:
     """Convert mappings into a directed graph data model."""
     graph = nx.DiGraph()
     for mapping in mappings:
@@ -123,7 +131,7 @@ def to_graph(mappings: list[Mapping]) -> nx.DiGraph:
     return graph
 
 
-def from_graph(graph: nx.DiGraph) -> list[Mapping]:
+def from_graph(graph: nx.DiGraph) -> t.List[Mapping]:
     """Extract mappings from a directed graph data model."""
     return [_from_edge(graph, s, o) for s, o in graph.edges()]
 
@@ -133,7 +141,7 @@ def _from_edge(graph: nx.DiGraph, s: Reference, o: Reference) -> Mapping:
     return Mapping(s=s, p=data[PREDICATE_KEY], o=o, evidence=data[EVIDENCE_KEY])
 
 
-def _condense_predicates(predicates: list[Reference]) -> Reference | None:
+def _condense_predicates(predicates: t.List[Reference]) -> Reference | None:
     predicate_set = set(predicates)
     if predicate_set == {EXACT_MATCH}:
         return EXACT_MATCH
@@ -145,8 +153,8 @@ def _condense_predicates(predicates: list[Reference]) -> Reference | None:
 
 
 def infer_chains(
-    mappings: list[Mapping], *, backwards: bool = True, progress: bool = True, cutoff: int = 5
-) -> list[Mapping]:
+    mappings: t.List[Mapping], *, backwards: bool = True, progress: bool = True, cutoff: int = 5
+) -> t.List[Mapping]:
     """Apply graph-based reasoning over mapping chains to infer new mappings.
 
     :param mappings: A list of input mappings
@@ -198,7 +206,7 @@ def tabulate_index(index: Index) -> str:
     """Tabulate"""
     from tabulate import tabulate
 
-    rows: list[tuple[str, str, str, str]] = []
+    rows: t.List[t.Tuple[str, str, str, str]] = []
 
     def key(pair):
         return triple_key(pair[0])
@@ -218,16 +226,16 @@ def infer_mutual_dbxref_mutations(
     mappings: Iterable[Mapping],
     prefixes: set[str],
     confidence: float | None = None,
-) -> list[Mapping]:
+) -> t.List[Mapping]:
     pairs = {(s, t) for s, t in itt.product(prefixes, repeat=2) if s != t}
     return infer_dbxref_mutations(mappings, pairs=pairs, confidence=confidence)
 
 
 def infer_dbxref_mutations(
     mappings: Iterable[Mapping],
-    pairs: dict[tuple[str, str], float] | Iterable[tuple[str, str]],
+    pairs: t.Dict[t.Tuple[str, str], float] | Iterable[t.Tuple[str, str]],
     confidence: float | None = None,
-) -> list[Mapping]:
+) -> t.List[Mapping]:
     """Upgrade database cross-references into exact matches for the given pairs.
 
     :param mappings: A list of mappings
@@ -249,12 +257,12 @@ def infer_dbxref_mutations(
 
 def infer_mutations(
     mappings: Iterable[Mapping],
-    pairs: dict[tuple[str, str], float],
+    pairs: t.Dict[t.Tuple[str, str], float],
     old: Reference,
     new: Reference,
     *,
     progress: bool = False,
-) -> list[Mapping]:
+) -> t.List[Mapping]:
     """Infer mappings with alternate predicates for the given prefix pairs.
 
     :param mappings: Mappings to infer from
@@ -286,7 +294,7 @@ def infer_mutations(
     return rv
 
 
-def keep_prefixes(mappings: Iterable[Mapping], prefixes: Iterable[str], *, progress: bool = True) -> list[Mapping]:
+def keep_prefixes(mappings: Iterable[Mapping], prefixes: Iterable[str], *, progress: bool = True) -> t.List[Mapping]:
     """Filter out mappings whose subject or object are not in the given list of prefixes."""
     prefixes = set(prefixes)
     return [
@@ -314,7 +322,7 @@ def keep_object_prefixes(mappings: Iterable[Mapping], prefixes: str | Iterable[s
     ]
 
 
-def filter_prefixes(mappings: Iterable[Mapping], prefixes: Iterable[str], *, progress: bool = True) -> list[Mapping]:
+def filter_prefixes(mappings: Iterable[Mapping], prefixes: Iterable[str], *, progress: bool = True) -> t.List[Mapping]:
     """Filter out mappings whose subject or object are in the given list of prefixes."""
     prefixes = set(prefixes)
     return [
@@ -324,7 +332,7 @@ def filter_prefixes(mappings: Iterable[Mapping], prefixes: Iterable[str], *, pro
     ]
 
 
-def filter_self_matches(mappings: Iterable[Mapping], *, progress: bool = True) -> list[Mapping]:
+def filter_self_matches(mappings: Iterable[Mapping], *, progress: bool = True) -> t.List[Mapping]:
     """Filter out mappings within the same resource."""
     return [
         mapping
@@ -333,7 +341,7 @@ def filter_self_matches(mappings: Iterable[Mapping], *, progress: bool = True) -
     ]
 
 
-def filter_mappings(mappings: list[Mapping], skip_mappings: list[Mapping], *, progress: bool = True) -> list[Mapping]:
+def filter_mappings(mappings: t.List[Mapping], skip_mappings: t.List[Mapping], *, progress: bool = True) -> t.List[Mapping]:
     """Filter out mappings in the second set from the first set."""
     skip_triples = {skip_mapping.triple for skip_mapping in skip_mappings}
     return [
@@ -343,10 +351,10 @@ def filter_mappings(mappings: list[Mapping], skip_mappings: list[Mapping], *, pr
     ]
 
 
-M2MIndex = defaultdict[tuple[str, str], defaultdict[str, defaultdict[str, list[Mapping]]]]
+M2MIndex = t.DefaultDict[t.Tuple[str, str], t.DefaultDict[str, t.DefaultDict[str, t.List[Mapping]]]]
 
 
-def get_many_to_many(mappings: list[Mapping]) -> list[Mapping]:
+def get_many_to_many(mappings: t.List[Mapping]) -> t.List[Mapping]:
     """Get many-to-many mappings, disregarding predicate type."""
     forward: M2MIndex = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
     backward: M2MIndex = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
@@ -354,7 +362,7 @@ def get_many_to_many(mappings: list[Mapping]) -> list[Mapping]:
         forward[mapping.s.prefix, mapping.o.prefix][mapping.s.identifier][mapping.o.identifier].append(mapping)
         backward[mapping.s.prefix, mapping.o.prefix][mapping.o.identifier][mapping.s.identifier].append(mapping)
 
-    index: defaultdict[Triple, list[Evidence]] = defaultdict(list)
+    index: t.DefaultDict[Triple, t.List[Evidence]] = defaultdict(list)
     for preindex in [forward, backward]:
         for d1 in preindex.values():
             for d2 in d1.values():
@@ -366,15 +374,15 @@ def get_many_to_many(mappings: list[Mapping]) -> list[Mapping]:
     return rv
 
 
-def filter_many_to_many(mappings: list[Mapping], *, progress: bool = True) -> list[Mapping]:
+def filter_many_to_many(mappings: t.List[Mapping], *, progress: bool = True) -> t.List[Mapping]:
     """Filter out many to many mappings."""
     skip_mappings = get_many_to_many(mappings)
     return filter_mappings(mappings, skip_mappings, progress=progress)
 
 
 def project(
-    mappings: list[Mapping], source_prefix: str, target_prefix: str, *, return_sus: bool = False, progress: bool = False
-) -> list[Mapping] | tuple[list[Mapping], list[Mapping]]:
+    mappings: t.List[Mapping], source_prefix: str, target_prefix: str, *, return_sus: bool = False, progress: bool = False
+) -> t.List[Mapping] | t.Tuple[t.List[Mapping], t.List[Mapping]]:
     """Ensure that each identifier only appears as the subject of one mapping."""
     mappings = keep_subject_prefixes(mappings, source_prefix, progress=progress)
     mappings = keep_object_prefixes(mappings, target_prefix, progress=progress)
@@ -386,13 +394,13 @@ def project(
     return mappings
 
 
-def project_dict(mappings: list[Mapping], source_prefix: str, target_prefix: str) -> dict[str, str]:
+def project_dict(mappings: t.List[Mapping], source_prefix: str, target_prefix: str) -> t.Dict[str, str]:
     """Get a dictionary from source identifiers to target identifiers."""
-    mappings = cast(list[Mapping], project(mappings, source_prefix, target_prefix))
+    mappings = cast(t.List[Mapping], project(mappings, source_prefix, target_prefix))
     return {mapping.s.identifier: mapping.o.identifier for mapping in mappings}
 
 
-def prioritize(mappings: list[Mapping], priority: list[str]) -> list[Mapping]:
+def prioritize(mappings: t.List[Mapping], priority: t.List[str]) -> t.List[Mapping]:
     """Get a priority star graph.
 
     :param mappings:
@@ -403,7 +411,7 @@ def prioritize(mappings: list[Mapping], priority: list[str]) -> list[Mapping]:
     exact_mappings = len(mappings)
 
     graph = to_graph(mappings).to_undirected()
-    rv: list[Mapping] = []
+    rv: t.List[Mapping] = []
     for component in tqdm(nx.connected_components(graph), unit="component", unit_scale=True):
         o = _get_priority(component, priority)
         if o is None:
@@ -427,7 +435,7 @@ def prioritize(mappings: list[Mapping], priority: list[str]) -> list[Mapping]:
     return rv
 
 
-def _get_priority(component: list[Reference], priority: list[str]) -> Reference | None:
+def _get_priority(component: t.List[Reference], priority: t.List[str]) -> t.Optional[Reference]:
     prefix_to_references = defaultdict(list)
     for c in component:
         prefix_to_references[c.prefix].append(c)
@@ -444,7 +452,7 @@ def _get_priority(component: list[Reference], priority: list[str]) -> Reference
     return None
 
 
-def unindex(index: Index, *, progress: bool = True) -> list[Mapping]:
+def unindex(index: Index, *, progress: bool = True) -> t.List[Mapping]:
     """Convert a mapping index into a list of mapping objects."""
     return [
         Mapping.from_triple(triple, evidence=evidence)
@@ -454,13 +462,13 @@ def unindex(index: Index, *, progress: bool = True) -> list[Mapping]:
     ]
 
 
-def deduplicate_evidence(evidence: list[Evidence]) -> list[Evidence]:
+def deduplicate_evidence(evidence: t.List[Evidence]) -> t.List[Evidence]:
     """Deduplicate a list of evidences based on their "key" function."""
     d = {e.key(): e for e in evidence}
     return list(d.values())
 
 
-def validate_mappings(mappings: list[Mapping], *, progress: bool = True) -> None:
+def validate_mappings(mappings: t.List[Mapping], *, progress: bool = True) -> None:
     """Validate mappings against the Bioregistry and raise an error on the first invalid."""
     import bioregistry
 
@@ -489,7 +497,7 @@ def validate_mappings(mappings: list[Mapping], *, progress: bool = True) -> None
             raise ValueError(f"banana in mapping object: {mapping}")
 
 
-def summarize_prefixes(mappings: list[Mapping]) -> pd.DataFrame:
+def summarize_prefixes(mappings: t.List[Mapping]) -> pd.DataFrame:
     """Get a dataframe summarizing the prefixes appearing in the mappings."""
     import bioregistry
 

From d8c8dbe39ec265ac637562888b67567caacc0cdb Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Mon, 22 Jan 2024 15:13:50 +0100
Subject: [PATCH 03/23] Update api.py

---
 src/semra/api.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/semra/api.py b/src/semra/api.py
index 747c6a6..eb2cf28 100644
--- a/src/semra/api.py
+++ b/src/semra/api.py
@@ -341,7 +341,9 @@ def filter_self_matches(mappings: Iterable[Mapping], *, progress: bool = True) -
     ]
 
 
-def filter_mappings(mappings: t.List[Mapping], skip_mappings: t.List[Mapping], *, progress: bool = True) -> t.List[Mapping]:
+def filter_mappings(
+    mappings: t.List[Mapping], skip_mappings: t.List[Mapping], *, progress: bool = True
+) -> t.List[Mapping]:
     """Filter out mappings in the second set from the first set."""
     skip_triples = {skip_mapping.triple for skip_mapping in skip_mappings}
     return [
@@ -381,7 +383,12 @@ def filter_many_to_many(mappings: t.List[Mapping], *, progress: bool = True) ->
 
 
 def project(
-    mappings: t.List[Mapping], source_prefix: str, target_prefix: str, *, return_sus: bool = False, progress: bool = False
+    mappings: t.List[Mapping],
+    source_prefix: str,
+    target_prefix: str,
+    *,
+    return_sus: bool = False,
+    progress: bool = False,
 ) -> t.List[Mapping] | t.Tuple[t.List[Mapping], t.List[Mapping]]:
     """Ensure that each identifier only appears as the subject of one mapping."""
     mappings = keep_subject_prefixes(mappings, source_prefix, progress=progress)

From 9a544121f3030fe9a09ceb411ab1a607638f3b20 Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Mon, 22 Jan 2024 15:18:43 +0100
Subject: [PATCH 04/23] More typing

---
 src/semra/struct.py | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/src/semra/struct.py b/src/semra/struct.py
index a4e60c5..07f7be4 100644
--- a/src/semra/struct.py
+++ b/src/semra/struct.py
@@ -4,17 +4,19 @@
 
 import math
 import pickle
+import typing as t
 import uuid
 from collections.abc import Iterable
 from hashlib import md5
 from itertools import islice
-from typing import Annotated, ClassVar, Literal, Optional, Union
+from typing import ClassVar, Literal, Optional, Union
 
 import pydantic
 from curies import Reference
 from more_itertools import triplewise
 from pydantic import Field
 from pydantic.types import UUID4
+from typing_extensions import Annotated
 
 __all__ = [
     "Reference",
@@ -29,10 +31,10 @@
 ]
 
 #: A type annotation for a subject-predicate-object triple
-Triple = tuple[Reference, Reference, Reference]
+Triple = t.Tuple[Reference, Reference, Reference]
 
 
-def triple_key(triple: Triple) -> tuple[str, str, str]:
+def triple_key(triple: Triple) -> t.Tuple[str, str, str]:
     """Get a sortable key for a triple."""
     return triple[0].curie, triple[2].curie, triple[1].curie
 
@@ -148,7 +150,7 @@ def key(self):
         return self.evidence_type, self.justification, self.author, self.mapping_set.key(), self.uuid
 
     @property
-    def mapping_set_names(self) -> set[str]:
+    def mapping_set_names(self) -> t.Set[str]:
         return {self.mapping_set.name}
 
     def get_confidence(self) -> float:
@@ -165,7 +167,7 @@ class Config:
 
     evidence_type: Literal["reasoned"] = Field(default="reasoned")
     justification: Reference = Field(..., description="A SSSOM-compliant justification")
-    mappings: list[Mapping] = Field(
+    mappings: t.List[Mapping] = Field(
         ..., description="A list of mappings and their evidences consumed to create this evidence"
     )
     author: Optional[Reference] = None
@@ -187,9 +189,12 @@ def mapping_set(self) -> None:
         return None
 
     @property
-    def mapping_set_names(self) -> set[str]:
+    def mapping_set_names(self) -> t.Set[str]:
         return {
-            name for mapping in self.mappings for evidence in mapping.evidence for name in evidence.mapping_set_names
+            name
+            for mapping in self.mappings
+            for evidence in mapping.evidence
+            for name in evidence.mapping_set_names  # type:ignore
         }
 
     @property
@@ -214,7 +219,7 @@ class Config:
     s: Reference = Field(..., title="subject")
     p: Reference = Field(..., title="predicate")
     o: Reference = Field(..., title="object")
-    evidence: list[Evidence] = Field(default_factory=list)
+    evidence: t.List[Evidence] = Field(default_factory=list)
 
     @property
     def triple(self) -> Triple:
@@ -222,7 +227,7 @@ def triple(self) -> Triple:
         return self.s, self.p, self.o
 
     @classmethod
-    def from_triple(cls, triple: Triple, evidence: Optional[list[Evidence]] = None) -> Mapping:
+    def from_triple(cls, triple: Triple, evidence: Optional[t.List[Evidence]] = None) -> Mapping:
         """Instantiate a mapping from a triple."""
         s, p, o = triple
         return cls(s=s, p=p, o=o, evidence=evidence or [])
@@ -255,7 +260,7 @@ def has_tertiary(self) -> bool:
         return any(not isinstance(evidence, SimpleEvidence) for evidence in self.evidence)
 
 
-def line(*references: Reference) -> list[Mapping]:
+def line(*references: Reference) -> t.List[Mapping]:
     """Create a list of mappings from a simple mappings path."""
     if not (3 <= len(references) and len(references) % 2):  # noqa:PLR2004
         raise ValueError

From 405d509d6c2ac604fde634a0d68c4e04cec26f29 Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Mon, 22 Jan 2024 15:22:51 +0100
Subject: [PATCH 05/23] Add future

---
 src/semra/rules.py           | 2 ++
 src/semra/sources/chembl.py  | 3 +++
 src/semra/sources/clo.py     | 2 ++
 src/semra/sources/famplex.py | 2 ++
 src/semra/sources/intact.py  | 2 ++
 src/semra/sources/ncit.py    | 1 +
 src/semra/sources/pubchem.py | 2 ++
 7 files changed, 14 insertions(+)

diff --git a/src/semra/rules.py b/src/semra/rules.py
index 4f9785e..51bc7c3 100644
--- a/src/semra/rules.py
+++ b/src/semra/rules.py
@@ -1,5 +1,7 @@
 """Constants and rules for inference."""
 
+from __future__ import annotations
+
 from semra.struct import Reference
 
 EXACT_MATCH = Reference(prefix="skos", identifier="exactMatch")
diff --git a/src/semra/sources/chembl.py b/src/semra/sources/chembl.py
index f4fec39..c6228ba 100644
--- a/src/semra/sources/chembl.py
+++ b/src/semra/sources/chembl.py
@@ -1,4 +1,7 @@
 """Get mappings from ChEMBL."""
+
+from __future__ import annotations
+
 from typing import Optional
 
 import bioregistry
diff --git a/src/semra/sources/clo.py b/src/semra/sources/clo.py
index db66ced..3b4ae8f 100644
--- a/src/semra/sources/clo.py
+++ b/src/semra/sources/clo.py
@@ -1,5 +1,7 @@
 """Process mappings from CLO."""
 
+from __future__ import annotations
+
 from typing import Optional
 
 import bioontologies
diff --git a/src/semra/sources/famplex.py b/src/semra/sources/famplex.py
index 0f8091a..e0f0155 100644
--- a/src/semra/sources/famplex.py
+++ b/src/semra/sources/famplex.py
@@ -1,5 +1,7 @@
 """Get mappings from FamPlex."""
 
+from __future__ import annotations
+
 import logging
 
 import bioregistry
diff --git a/src/semra/sources/intact.py b/src/semra/sources/intact.py
index d200689..4e01bf2 100644
--- a/src/semra/sources/intact.py
+++ b/src/semra/sources/intact.py
@@ -1,5 +1,7 @@
 """Get mappings from IntAct."""
 
+from __future__ import annotations
+
 import bioregistry
 import bioversions
 import pandas as pd
diff --git a/src/semra/sources/ncit.py b/src/semra/sources/ncit.py
index ec624a5..df364a6 100644
--- a/src/semra/sources/ncit.py
+++ b/src/semra/sources/ncit.py
@@ -1,4 +1,5 @@
 """Get mappings from NCIT."""
+
 from __future__ import annotations
 
 from functools import lru_cache
diff --git a/src/semra/sources/pubchem.py b/src/semra/sources/pubchem.py
index 135cf8e..a5d06ca 100644
--- a/src/semra/sources/pubchem.py
+++ b/src/semra/sources/pubchem.py
@@ -1,5 +1,7 @@
 """Get mappings from PubChem."""
 
+from __future__ import annotations
+
 import logging
 from typing import Optional
 

From dad08701939223ecc517c0447fa8a6beec4c323d Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Mon, 22 Jan 2024 15:23:24 +0100
Subject: [PATCH 06/23] Update io.py

---
 src/semra/io.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/semra/io.py b/src/semra/io.py
index 4e1bada..1c2f141 100644
--- a/src/semra/io.py
+++ b/src/semra/io.py
@@ -367,7 +367,7 @@ def _get_name_by_curie(curie: str) -> str | None:
     if curie.startswith("orcid:"):
         import requests
 
-        orcid = curie.removeprefix("orcid:")
+        orcid = curie[len("orcid:") :]
         res = requests.get(f"https://orcid.org/{orcid}", headers={"Accept": "application/json"}, timeout=5).json()
         return res["person"]["name"]["given-names"]["value"] + " " + res["person"]["name"]["family-name"]["value"]
     return pyobo.get_name_by_curie(curie)

From f557ac93af6d0df98cbce622d7da2d4ae2ef3110 Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Mon, 22 Jan 2024 15:24:36 +0100
Subject: [PATCH 07/23] Clean

---
 src/semra/client.py      | 2 +-
 src/semra/sources/clo.py | 2 +-
 src/semra/wsgi.py        | 2 ++
 3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/semra/client.py b/src/semra/client.py
index b029561..f5a0fe7 100644
--- a/src/semra/client.py
+++ b/src/semra/client.py
@@ -26,7 +26,7 @@
 
 Node: TypeAlias = t.Mapping[str, Any]
 
-TxResult: TypeAlias = t.Optional[list[list[Any]]]
+TxResult: TypeAlias = t.Optional[t.List[t.List[Any]]]
 
 ReferenceHint: TypeAlias = t.Union[str, Reference]
 
diff --git a/src/semra/sources/clo.py b/src/semra/sources/clo.py
index 3b4ae8f..5f8cd93 100644
--- a/src/semra/sources/clo.py
+++ b/src/semra/sources/clo.py
@@ -34,7 +34,7 @@ def get_clo_mappings(confidence: float = 0.8) -> list[Mapping]:
     for node in tqdm(graph.nodes, unit_scale=True, unit="node"):
         if not node.id.startswith(CLO_URI_PREFIX):
             continue
-        clo_id = node.id.removeprefix(CLO_URI_PREFIX)
+        clo_id = node.id[len(CLO_URI_PREFIX) :]
         for p in node.properties or []:
             if p.predicate_raw != "http://www.w3.org/2000/01/rdf-schema#seeAlso":
                 continue
diff --git a/src/semra/wsgi.py b/src/semra/wsgi.py
index f34a8ef..cf7200b 100644
--- a/src/semra/wsgi.py
+++ b/src/semra/wsgi.py
@@ -1,5 +1,7 @@
 """Run the app."""
 
+from __future__ import annotations
+
 import os
 
 import fastapi

From 8552c1e65ee4d41268ad72ce0cc9b53e3c4c37a9 Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Mon, 22 Jan 2024 15:26:15 +0100
Subject: [PATCH 08/23] Update pyproject.toml

---
 pyproject.toml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pyproject.toml b/pyproject.toml
index 25ac2b0..358fe4c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -150,6 +150,8 @@ ignore = [
   "EM102", "EM101",
   # Ignore pickle security warnings
   "S301",
+  # Ignore upgrading type annotations
+  "UP006", "UP007", "UP035",
 ]
 unfixable = [
   # Don't touch unused imports

From df9ca40569a2f6bdce961104a96b8022cfc3f579 Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Mon, 22 Jan 2024 15:26:50 +0100
Subject: [PATCH 09/23] Update tests.yml

---
 .github/workflows/tests.yml | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 1c70cb3..3ae09ff 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -23,7 +23,10 @@ jobs:
           python-version: ${{ matrix.python-version }}
       - name: Install dependencies
         run: |
-          pip install tox
+          pip install tox hatch
+      - name: Test linting
+        run:
+          hatch run lint:style
       - name: Test with mypy
         run:
           tox -e mypy

From d63f49da89261229bf63cd922095721e585987b2 Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Mon, 22 Jan 2024 15:28:27 +0100
Subject: [PATCH 10/23] Update pyproject.toml

---
 pyproject.toml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pyproject.toml b/pyproject.toml
index 358fe4c..dfc64e1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -152,6 +152,8 @@ ignore = [
   "S301",
   # Ignore upgrading type annotations
   "UP006", "UP007", "UP035",
+  # Ignore shadowing python builtins (because we use 'license')
+  "A001", "A002", "A003",
 ]
 unfixable = [
   # Don't touch unused imports

From abf5f876ef577243691656b862316aaf2c059d37 Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Mon, 22 Jan 2024 15:29:29 +0100
Subject: [PATCH 11/23] Update wsgi.py

---
 src/semra/wsgi.py | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/src/semra/wsgi.py b/src/semra/wsgi.py
index cf7200b..c6e855e 100644
--- a/src/semra/wsgi.py
+++ b/src/semra/wsgi.py
@@ -155,13 +155,13 @@ def view_mapping_set(curie: str):
 
 
 @api_router.get("/evidence/{curie}", response_model=Evidence)
-def get_evidence(curie: str = Path(description="An evidence's MD5 hex digest.")):  # noqa:B008
+def get_evidence(curie: str = Path(description="An evidence's MD5 hex digest.")):
     return client.get_evidence(curie)
 
 
 @api_router.get("/cytoscape/{curie}")
 def get_concept_cytoscape(
-    curie: str = Path(description="the compact URI (CURIE) for a concept", examples=EXAMPLE_CONCEPTS)  # noqa:B008
+    curie: str = Path(description="the compact URI (CURIE) for a concept", examples=EXAMPLE_CONCEPTS)
 ):
     """Get the mapping graph surrounding the concept as a Cytoscape.js JSON object."""
     graph = client.get_connected_component_graph(curie)
@@ -170,18 +170,13 @@ def get_concept_cytoscape(
 
 
 @api_router.get("/mapping/{mapping}", response_model=Mapping)
-def get_mapping(
-    mapping: str = Path(  # noqa:B008
-        description="A mapping's MD5 hex digest.",
-        examples=EXAMPLE_MAPPINGS,
-    )
-):
+def get_mapping(mapping: str = Path(description="A mapping's MD5 hex digest.", examples=EXAMPLE_MAPPINGS)):
     return client.get_mapping(mapping)
 
 
 @api_router.get("/mapping_set/{mapping_set}", response_model=MappingSet)
 def get_mapping_set(
-    mapping_set: str = Path(  # noqa:B008
+    mapping_set: str = Path(
         description="A mapping set's MD5 hex digest.", examples=["7831d5bc95698099fb6471667e5282cd"]
     )
 ):

From 94e67a6c9ac5a921fce0cb466b10cb2f063e5e77 Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Mon, 22 Jan 2024 15:32:43 +0100
Subject: [PATCH 12/23] More cleanup

---
 src/semra/sources/clo.py | 5 ++++-
 src/semra/wsgi.py        | 3 ++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/semra/sources/clo.py b/src/semra/sources/clo.py
index 5f8cd93..7613f15 100644
--- a/src/semra/sources/clo.py
+++ b/src/semra/sources/clo.py
@@ -39,7 +39,10 @@ def get_clo_mappings(confidence: float = 0.8) -> list[Mapping]:
             if p.predicate_raw != "http://www.w3.org/2000/01/rdf-schema#seeAlso":
                 continue
             for raw_curie in _split(p.value_raw):
-                curie = raw_curie.removeprefix("rrid:").removeprefix("RRID:")
+                if raw_curie.lower().startswith("rrid:"):
+                    curie = raw_curie[len("rrid:"): ]
+                else:
+                    curie = raw_curie
                 prefix: Optional[str]
                 identifier: Optional[str]
                 if curie.startswith("Sanger:COSMICID:"):
diff --git a/src/semra/wsgi.py b/src/semra/wsgi.py
index c6e855e..3ccbb67 100644
--- a/src/semra/wsgi.py
+++ b/src/semra/wsgi.py
@@ -3,6 +3,7 @@
 from __future__ import annotations
 
 import os
+import typing as t
 
 import fastapi
 import flask
@@ -183,7 +184,7 @@ def get_mapping_set(
     return client.get_mapping_set(mapping_set)
 
 
-@api_router.get("/mapping_set/", response_model=list[MappingSet])
+@api_router.get("/mapping_set/", response_model=t.List[MappingSet])
 def get_mapping_sets():
     return client.get_mapping_sets()
 

From b690a736afa858cf8111578f5ffc288896fbc9cf Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Mon, 22 Jan 2024 15:34:02 +0100
Subject: [PATCH 13/23] Update clo.py

---
 src/semra/sources/clo.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/semra/sources/clo.py b/src/semra/sources/clo.py
index 7613f15..c4a775f 100644
--- a/src/semra/sources/clo.py
+++ b/src/semra/sources/clo.py
@@ -40,7 +40,7 @@ def get_clo_mappings(confidence: float = 0.8) -> list[Mapping]:
                 continue
             for raw_curie in _split(p.value_raw):
                 if raw_curie.lower().startswith("rrid:"):
-                    curie = raw_curie[len("rrid:"): ]
+                    curie = raw_curie[len("rrid:") :]
                 else:
                     curie = raw_curie
                 prefix: Optional[str]

From d5b06f62966f08b552a8c4e25b0c1290f7f64116 Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Mon, 22 Jan 2024 15:35:34 +0100
Subject: [PATCH 14/23] Update clo.py

---
 src/semra/sources/clo.py | 49 +++++++++++++++++++++-------------------
 1 file changed, 26 insertions(+), 23 deletions(-)

diff --git a/src/semra/sources/clo.py b/src/semra/sources/clo.py
index c4a775f..3768bb5 100644
--- a/src/semra/sources/clo.py
+++ b/src/semra/sources/clo.py
@@ -21,6 +21,12 @@ def _split(s: str) -> list[str]:
     return [p2.replace(" ", "").rstrip(")") for p1 in s.strip().split(";") for p2 in p1.strip().split(",")]
 
 
+def _removeprefix(s, prefix):
+    if s.startswith(prefix):
+        return s[len(prefix) :]
+    return s
+
+
 def get_clo_mappings(confidence: float = 0.8) -> list[Mapping]:
     graph = bioontologies.get_obograph_by_prefix("clo", check=False).guess("clo")
     mapping_set = MappingSet(
@@ -39,56 +45,53 @@ def get_clo_mappings(confidence: float = 0.8) -> list[Mapping]:
             if p.predicate_raw != "http://www.w3.org/2000/01/rdf-schema#seeAlso":
                 continue
             for raw_curie in _split(p.value_raw):
-                if raw_curie.lower().startswith("rrid:"):
-                    curie = raw_curie[len("rrid:") :]
-                else:
-                    curie = raw_curie
+                curie = _removeprefix(_removeprefix(raw_curie, "rrid:"), "RRID")
                 prefix: Optional[str]
                 identifier: Optional[str]
                 if curie.startswith("Sanger:COSMICID:"):
-                    prefix, identifier = "cosmic.cell", curie.removeprefix("Sanger:COSMICID:")
+                    prefix, identifier = "cosmic.cell", _removeprefix(curie, "Sanger:COSMICID:")
                 elif curie.startswith("atcc:COSMICID:"):
-                    prefix, identifier = "cosmic.cell", curie.removeprefix("atcc:COSMICID:")
+                    prefix, identifier = "cosmic.cell", _removeprefix(curie, "atcc:COSMICID:")
                 elif curie.startswith("DSMZ:COSMICID:"):
-                    prefix, identifier = "cosmic.cell", curie.removeprefix("DSMZ:COSMICID:")
+                    prefix, identifier = "cosmic.cell", _removeprefix(curie, "DSMZ:COSMICID:")
                 elif curie.startswith("COSMIC: COSMIC ID:"):
-                    prefix, identifier = "cosmic.cell", curie.removeprefix("COSMIC: COSMIC ID:")
+                    prefix, identifier = "cosmic.cell", _removeprefix(curie, "COSMIC: COSMIC ID:")
                 elif curie.startswith("RIKEN:COSMICID:"):
-                    prefix, identifier = "cosmic.cell", curie.removeprefix("RIKEN:COSMICID:")
+                    prefix, identifier = "cosmic.cell", _removeprefix(curie, "RIKEN:COSMICID:")
                 elif curie.startswith("COSMICID:"):
-                    prefix, identifier = "cosmic.cell", curie.removeprefix("COSMICID:")
+                    prefix, identifier = "cosmic.cell", _removeprefix(curie, "COSMICID:")
                 elif curie.startswith("LINCS_HMS:"):
-                    prefix, identifier = "hms.lincs.cell", curie.removeprefix("LINCS_HMS:")
+                    prefix, identifier = "hms.lincs.cell", _removeprefix(curie, "LINCS_HMS:")
                 elif curie.startswith("CHEMBL:"):
-                    prefix, identifier = "chembl.cell", curie.removeprefix("CHEMBL:")
+                    prefix, identifier = "chembl.cell", _removeprefix(curie, "CHEMBL:")
                 elif curie.startswith("ChEMBL:"):
-                    prefix, identifier = "chembl.cell", curie.removeprefix("ChEMBL:")
+                    prefix, identifier = "chembl.cell", _removeprefix(curie, "ChEMBL:")
                 elif curie.startswith("BTO_"):
-                    prefix, identifier = "bto", curie.removeprefix("BTO_")
+                    prefix, identifier = "bto", _removeprefix(curie, "BTO_")
                 elif curie.startswith("CVCL_"):
-                    prefix, identifier = "cellosaurus", curie.removeprefix("CVCL_")
+                    prefix, identifier = "cellosaurus", _removeprefix(curie, "CVCL_")
                 elif curie.startswith("JHSF:"):
-                    prefix, identifier = "jcrb", curie.removeprefix("JHSF:")
+                    prefix, identifier = "jcrb", _removeprefix(curie, "JHSF:")
                 elif curie.startswith("CRL-"):
                     prefix, identifier = "atcc", curie
                 elif curie.startswith("jcrb:JHSF:"):
-                    prefix, identifier = "jcrb", curie.removeprefix("jcrb:JHSF:")
+                    prefix, identifier = "jcrb", _removeprefix(curie, "jcrb:JHSF:")
                 elif curie.startswith("JCRB"):
                     prefix, identifier = "jcrb", curie
                 elif curie.startswith("JHSF:JCRB"):
-                    prefix, identifier = "jcrb", curie.removeprefix("JHSF:")
+                    prefix, identifier = "jcrb", _removeprefix(curie, "JHSF:")
                 elif curie.startswith("ATCCCRL"):
-                    prefix, identifier = "atcc", curie.removeprefix("ATCC")
+                    prefix, identifier = "atcc", _removeprefix(curie, "ATCC")
                 elif curie.startswith("bto:BAO_"):
-                    prefix, identifier = "bao", curie.removeprefix("bto:BAO_")
+                    prefix, identifier = "bao", _removeprefix(curie, "bto:BAO_")
                 elif curie.startswith("ACC"):
                     prefix, identifier = "dsmz", curie
                 elif curie.startswith("DSMZACC"):
-                    prefix, identifier = "dsmz", curie.removeprefix("DSMZ")
+                    prefix, identifier = "dsmz", _removeprefix(curie, "DSMZ")
                 elif curie.startswith("dsmz:ACC"):
-                    prefix, identifier = "dsmz", "ACC-" + curie.removeprefix("dsmz:ACC")
+                    prefix, identifier = "dsmz", "ACC-" + _removeprefix(curie, "dsmz:ACC")
                 elif curie.startswith("DSMZ:ACC"):
-                    prefix, identifier = "dsmz", "ACC-" + curie.removeprefix("DSMZ:ACC")
+                    prefix, identifier = "dsmz", "ACC-" + _removeprefix(curie, "DSMZ:ACC")
                 else:
                     prefix, identifier = bioregistry.parse_curie(curie)
 

From 922c80c2052437d4793b57245c058985929efb99 Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Mon, 22 Jan 2024 15:37:36 +0100
Subject: [PATCH 15/23] Up

---
 pyproject.toml           | 3 ++-
 src/semra/sources/clo.py | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index dfc64e1..b243526 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -7,7 +7,7 @@ name = "semra"
 dynamic = ["version"]
 description = 'Semantic Mapping Reasoning Assembler'
 readme = "README.md"
-requires-python = ">=3.9"
+requires-python = ">=3.8"
 license = "MIT"
 keywords = []
 authors = [
@@ -16,6 +16,7 @@ authors = [
 classifiers = [
   "Development Status :: 4 - Beta",
   "Programming Language :: Python",
+  "Programming Language :: Python :: 3.8",
   "Programming Language :: Python :: 3.9",
   "Programming Language :: Python :: 3.10",
   "Programming Language :: Python :: 3.11",
diff --git a/src/semra/sources/clo.py b/src/semra/sources/clo.py
index 3768bb5..7cd41b9 100644
--- a/src/semra/sources/clo.py
+++ b/src/semra/sources/clo.py
@@ -21,7 +21,7 @@ def _split(s: str) -> list[str]:
     return [p2.replace(" ", "").rstrip(")") for p1 in s.strip().split(";") for p2 in p1.strip().split(",")]
 
 
-def _removeprefix(s, prefix):
+def _removeprefix(s: str, prefix: str) -> str:
     if s.startswith(prefix):
         return s[len(prefix) :]
     return s

From e1253150aac2545b7eb601ac0ef9b0164d83d063 Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Mon, 22 Jan 2024 15:43:43 +0100
Subject: [PATCH 16/23] Update pyproject.toml

---
 pyproject.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pyproject.toml b/pyproject.toml
index b243526..d00d28b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -34,6 +34,7 @@ dependencies = [
   "bioontologies",
   "pyobo",
   "typing_extensions",
+  "rdflib", # remove after https://github.com/biopragmatics/bioregistry/pull/1030 is released
 ]
 
 [project.optional-dependencies]

From 6778c5977b7573f136b2400bd00f18f5e66d1152 Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Mon, 22 Jan 2024 16:12:34 +0100
Subject: [PATCH 17/23] Update test_pipeline.py

---
 tests/test_pipeline.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py
index be606fe..faf7bd8 100644
--- a/tests/test_pipeline.py
+++ b/tests/test_pipeline.py
@@ -1,6 +1,7 @@
 """Tests for the automated assembly pipeline."""
 
 import tempfile
+import typing as t
 import unittest
 from pathlib import Path
 
@@ -30,7 +31,7 @@
 ]
 
 
-def get_test_mappings() -> list[Mapping]:
+def get_test_mappings() -> t.List[Mapping]:
     """A test function to get mappings."""
     return TEST_MAPPINGS
 

From b87da1054c77e8f01f593174d074bd5a4af03159 Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Mon, 22 Jan 2024 16:32:55 +0100
Subject: [PATCH 18/23] Update test_api.py

---
 tests/test_api.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/tests/test_api.py b/tests/test_api.py
index edd1fa7..1f14c6d 100644
--- a/tests/test_api.py
+++ b/tests/test_api.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import typing as t
 import unittest
 
 from semra import api
@@ -24,11 +25,11 @@
 from semra.struct import Mapping, MappingSet, ReasonedEvidence, Reference, SimpleEvidence, line, triple_key
 
 
-def _get_references(n: int, prefix: str = "test") -> list[Reference]:
+def _get_references(n: int, prefix: str = "test") -> t.List[Reference]:
     return [Reference(prefix=prefix, identifier=str(i)) for i in range(1, n + 1)]
 
 
-def _exact(s, o, evidence: list[SimpleEvidence] | None = None) -> Mapping:
+def _exact(s, o, evidence: t.Optional[t.List[SimpleEvidence]] = None) -> Mapping:
     return Mapping(s=s, p=EXACT_MATCH, o=o, evidence=evidence or [])
 
 
@@ -101,8 +102,8 @@ def test_index(self):
 
     def assert_same_triples(
         self,
-        expected_mappings: Index | list[Mapping],
-        actual_mappings: Index | list[Mapping],
+        expected_mappings: t.Union[Index, t.List[Mapping]],
+        actual_mappings: t.Union[Index, t.List[Mapping]],
         msg: str | None = None,
     ) -> None:
         """Assert that two sets of mappings are the same."""
@@ -118,7 +119,7 @@ def assert_same_triples(
         )
 
     @staticmethod
-    def _clean_index(index: Index) -> list[str]:
+    def _clean_index(index: Index) -> t.List[str]:
         triples = sorted(set(index), key=triple_key)
         return ["<" + ", ".join(element.curie for element in triple) + ">" for triple in triples]
 

From c2490da9bc2490c47dd3e34f01655d05f0949868 Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Mon, 22 Jan 2024 16:53:01 +0100
Subject: [PATCH 19/23] Update __init__.py

---
 src/semra/sources/__init__.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/semra/sources/__init__.py b/src/semra/sources/__init__.py
index 11ee38d..13b23ad 100644
--- a/src/semra/sources/__init__.py
+++ b/src/semra/sources/__init__.py
@@ -1,6 +1,7 @@
 """Sources of xrefs not from OBO."""
 
 import itertools as itt
+import typing as t
 from collections.abc import Callable, Iterable
 
 from class_resolver import FunctionResolver
@@ -43,7 +44,7 @@
     "get_clo_mappings",
 ]
 
-SOURCE_RESOLVER: FunctionResolver[Callable[[], list[Mapping]]] = FunctionResolver(
+SOURCE_RESOLVER: FunctionResolver[Callable[[], t.List[Mapping]]] = FunctionResolver(
     [
         get_chembl_compound_mappings,
         get_chembl_protein_mappings,

From 840f7045571dba1456aff11e68bf4923fe6dfa4e Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Mon, 22 Jan 2024 17:00:15 +0100
Subject: [PATCH 20/23] Update __init__.py

---
 src/semra/sources/__init__.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/semra/sources/__init__.py b/src/semra/sources/__init__.py
index 13b23ad..395fd49 100644
--- a/src/semra/sources/__init__.py
+++ b/src/semra/sources/__init__.py
@@ -2,7 +2,6 @@
 
 import itertools as itt
 import typing as t
-from collections.abc import Callable, Iterable
 
 from class_resolver import FunctionResolver
 
@@ -44,7 +43,7 @@
     "get_clo_mappings",
 ]
 
-SOURCE_RESOLVER: FunctionResolver[Callable[[], t.List[Mapping]]] = FunctionResolver(
+SOURCE_RESOLVER: FunctionResolver[t.Callable[[], t.List[Mapping]]] = FunctionResolver(
     [
         get_chembl_compound_mappings,
         get_chembl_protein_mappings,
@@ -75,6 +74,6 @@
     SOURCE_RESOLVER.synonyms[norm_key] = func
 
 
-def get_custom() -> Iterable[Mapping]:
+def get_custom() -> t.Iterable[Mapping]:
     """Get all custom mappings."""
     return itt.chain.from_iterable(func() for func in SOURCE_RESOLVER)

From d07096e741018ada1bdd7e529f38ea064326c576 Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Mon, 22 Jan 2024 17:16:10 +0100
Subject: [PATCH 21/23] Update pipeline.py

---
 src/semra/pipeline.py | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/src/semra/pipeline.py b/src/semra/pipeline.py
index 9b35e80..bd27b52 100644
--- a/src/semra/pipeline.py
+++ b/src/semra/pipeline.py
@@ -4,6 +4,7 @@
 
 import logging
 import time
+import typing as t
 from pathlib import Path
 from typing import Any, Literal, Optional
 
@@ -79,18 +80,18 @@ class Configuration(BaseModel):
 
     name: str = Field(description="The name of the mapping set configuration")
     description: str = Field(description="An explanation of the purpose of the mapping set configuration")
-    inputs: list[Input]
-    negative_inputs: list[Input] = Field(default=[Input(source="biomappings", prefix="negative")])
-    priority: list[str] = Field(..., description="If no priority is given, is inferred from the order of inputs")
-    mutations: list[Mutation] = Field(default_factory=list)
+    inputs: t.List[Input]
+    negative_inputs: t.List[Input] = Field(default=[Input(source="biomappings", prefix="negative")])
+    priority: t.List[str] = Field(..., description="If no priority is given, is inferred from the order of inputs")
+    mutations: t.List[Mutation] = Field(default_factory=list)
 
-    exclude_pairs: list[tuple[str, str]] = Field(
+    exclude_pairs: t.List[tuple[str, str]] = Field(
         default_factory=list,
         description="A list of pairs of prefixes. Remove all mappings whose source "
         "prefix is the first in a pair and target prefix is second in a pair. Order matters.",
     )
-    remove_prefixes: Optional[list[str]] = None
-    keep_prefixes: Optional[list[str]] = None
+    remove_prefixes: Optional[t.List[str]] = None
+    keep_prefixes: Optional[t.List[str]] = None
     remove_imprecise: bool = True
     validate_raw: bool = Field(
         default=False,
@@ -128,7 +129,7 @@ def get_mappings_from_config(
     *,
     refresh_raw: bool = False,
     refresh_processed: bool = False,
-) -> list[Mapping]:
+) -> t.List[Mapping]:
     """Run assembly based on a configuration."""
     if (
         configuration.processed_pickle_path
@@ -204,7 +205,7 @@ def _get_equivalence_classes(mappings, prioritized_mappings) -> dict[Reference,
     return rv
 
 
-def get_raw_mappings(configuration: Configuration) -> list[Mapping]:
+def get_raw_mappings(configuration: Configuration) -> t.List[Mapping]:
     """Get raw mappings based on the inputs in a configuration."""
     mappings = []
     for inp in tqdm(configuration.inputs, desc="Loading configured mappings", unit="source"):
@@ -243,13 +244,13 @@ def get_raw_mappings(configuration: Configuration) -> list[Mapping]:
 
 
 def process(
-    mappings: list[Mapping],
+    mappings: t.List[Mapping],
     upgrade_prefixes=None,
     remove_prefix_set=None,
     keep_prefix_set=None,
     *,
     remove_imprecise: bool = True,
-) -> list[Mapping]:
+) -> t.List[Mapping]:
     """Run a full deduplication, reasoning, and inference pipeline over a set of mappings."""
     from semra.sources.biopragmatics import from_biomappings_negative
 
@@ -329,7 +330,7 @@ def process(
     return mappings
 
 
-def _log_diff(before: int, mappings: list[Mapping], *, verb: str, elapsed) -> None:
+def _log_diff(before: int, mappings: t.List[Mapping], *, verb: str, elapsed) -> None:
     logger.info(
         f"{verb} from {before:,} to {len(mappings):,} mappings (Δ={len(mappings) - before:,}) in %.2f seconds.",
         elapsed,

From b2000b32e40268ac1ed10930bd72679e49ed0203 Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Mon, 22 Jan 2024 17:22:36 +0100
Subject: [PATCH 22/23] Update pipeline.py

---
 src/semra/pipeline.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/semra/pipeline.py b/src/semra/pipeline.py
index bd27b52..891fe66 100644
--- a/src/semra/pipeline.py
+++ b/src/semra/pipeline.py
@@ -63,7 +63,7 @@ class Input(BaseModel):
     source: Literal["pyobo", "bioontologies", "biomappings", "custom", "sssom", "gilda"]
     prefix: Optional[str] = None
     confidence: float = 1.0
-    extras: dict[str, Any] = Field(default_factory=dict)
+    extras: t.Dict[str, Any] = Field(default_factory=dict)
 
 
 class Mutation(BaseModel):

From 2d0fa9f6369bced2ef99d9e774144173e4cff30c Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Mon, 22 Jan 2024 17:24:31 +0100
Subject: [PATCH 23/23] Update pipeline.py

---
 src/semra/pipeline.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/semra/pipeline.py b/src/semra/pipeline.py
index 891fe66..bb3911d 100644
--- a/src/semra/pipeline.py
+++ b/src/semra/pipeline.py
@@ -85,7 +85,7 @@ class Configuration(BaseModel):
     priority: t.List[str] = Field(..., description="If no priority is given, is inferred from the order of inputs")
     mutations: t.List[Mutation] = Field(default_factory=list)
 
-    exclude_pairs: t.List[tuple[str, str]] = Field(
+    exclude_pairs: t.List[t.Tuple[str, str]] = Field(
         default_factory=list,
         description="A list of pairs of prefixes. Remove all mappings whose source "
         "prefix is the first in a pair and target prefix is second in a pair. Order matters.",