From d5cbe4d4d34a271eea84926897599f39c41c7be5 Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Tue, 12 Dec 2023 18:14:57 +0100
Subject: [PATCH 1/5] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.md b/README.md
index 169612c..02ac69b 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,5 @@
 <p align="center">
-  <img src="https://github.com/biopragmatics/semra/raw/master/docs/img/logo.png" alt="Logo for SeMRA" height="150">
+  <img src="https://github.com/biopragmatics/semra/blob/main/docs/img/logo.png?raw=true" alt="Logo for SeMRA" height="150">
 </p>
 
 # Semantic Mapping Reasoning Assembler (SeMRA)

From 2151ec6c0fcd3cbd4579d1464b28f997a074f87e Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Mon, 22 Jan 2024 10:23:04 +0100
Subject: [PATCH 2/5] Minor API refactoring

---
 src/semra/api.py  | 41 +++++++++++++++++++++--------------------
 tests/test_api.py | 28 ++++++++++++++--------------
 2 files changed, 35 insertions(+), 34 deletions(-)

diff --git a/src/semra/api.py b/src/semra/api.py
index 000c579..37ca9ad 100644
--- a/src/semra/api.py
+++ b/src/semra/api.py
@@ -268,20 +268,21 @@ def infer_mutations(
     for mapping in _tqdm(mappings, desc="Adding mutated predicates", progress=progress):
         rv.append(mapping)
         confidence = pairs.get((mapping.s.prefix, mapping.o.prefix))
-        if confidence is not None and mapping.p == old:
-            inferred_mapping = Mapping(
-                s=mapping.s,
-                p=new,
-                o=mapping.o,
-                evidence=[
-                    ReasonedEvidence(
-                        justification=KNOWLEDGE_MAPPING,
-                        mappings=[mapping],
-                        confidence_factor=confidence,
-                    )
-                ],
-            )
-            rv.append(inferred_mapping)
+        if confidence is None or mapping.p != old:
+            continue
+        inferred_mapping = Mapping(
+            s=mapping.s,
+            p=new,
+            o=mapping.o,
+            evidence=[
+                ReasonedEvidence(
+                    justification=KNOWLEDGE_MAPPING,
+                    mappings=[mapping],
+                    confidence_factor=confidence,
+                )
+            ],
+        )
+        rv.append(inferred_mapping)
     return rv
 
 
@@ -313,12 +314,12 @@ def keep_object_prefixes(mappings: Iterable[Mapping], prefixes: str | Iterable[s
     ]
 
 
-def filter_prefixes(mappings: Iterable[Mapping], prefixes: Iterable[str]) -> list[Mapping]:
+def filter_prefixes(mappings: Iterable[Mapping], prefixes: Iterable[str], *, progress: bool = True) -> list[Mapping]:
     """Filter out mappings whose subject or object are in the given list of prefixes."""
     prefixes = set(prefixes)
     return [
         mapping
-        for mapping in _tqdm(mappings, desc=f"Filtering out {len(prefixes)} prefixes")
+        for mapping in _tqdm(mappings, desc=f"Filtering out {len(prefixes)} prefixes", progress=progress)
         if mapping.s.prefix not in prefixes and mapping.o.prefix not in prefixes
     ]
 
@@ -375,8 +376,8 @@ def project(
     mappings: list[Mapping], source_prefix: str, target_prefix: str, *, return_sus: bool = False, progress: bool = False
 ) -> list[Mapping] | tuple[list[Mapping], list[Mapping]]:
     """Ensure that each identifier only appears as the subject of one mapping."""
-    mappings = keep_subject_prefixes(mappings, source_prefix)
-    mappings = keep_object_prefixes(mappings, target_prefix)
+    mappings = keep_subject_prefixes(mappings, source_prefix, progress=progress)
+    mappings = keep_object_prefixes(mappings, target_prefix, progress=progress)
     m2m_mappings = get_many_to_many(mappings)
     mappings = filter_mappings(mappings, m2m_mappings, progress=progress)
     mappings = assemble_evidences(mappings, progress=progress)
@@ -459,11 +460,11 @@ def deduplicate_evidence(evidence: list[Evidence]) -> list[Evidence]:
     return list(d.values())
 
 
-def validate_mappings(mappings: list[Mapping]) -> None:
+def validate_mappings(mappings: list[Mapping], *, progress: bool = True) -> None:
     """Validate mappings against the Bioregistry and raise an error on the first invalid."""
     import bioregistry
 
-    for mapping in tqdm(mappings, desc="Validating mappings", unit_scale=True, unit="mapping"):
+    for mapping in tqdm(mappings, desc="Validating mappings", unit_scale=True, unit="mapping", disable=not progress):
         if bioregistry.normalize_prefix(mapping.s.prefix) != mapping.s.prefix:
             raise ValueError(f"invalid subject prefix.\n\nMapping: {mapping}\n\nSubject:{mapping.s}.")
         if bioregistry.normalize_prefix(mapping.o.prefix) != mapping.o.prefix:
diff --git a/tests/test_api.py b/tests/test_api.py
index 512c026..b19881d 100644
--- a/tests/test_api.py
+++ b/tests/test_api.py
@@ -31,9 +31,9 @@ def _exact(s, o, evidence: list[SimpleEvidence] | None = None) -> Mapping:
 
 EV = SimpleEvidence(
     justification=MANUAL_MAPPING,
-    mapping_set=MappingSet(name="test_mapping_set"),
+    mapping_set=MappingSet(name="test_mapping_set", confidence=0.95),
 )
-MS = MappingSet(name="test")
+MS = MappingSet(name="test", confidence=0.95)
 
 
 class TestOperations(unittest.TestCase):
@@ -87,7 +87,7 @@ def test_index(self):
         )
         m1 = Mapping(s=r1, p=EXACT_MATCH, o=r2, evidence=[e1])
         m2 = Mapping(s=r1, p=EXACT_MATCH, o=r2, evidence=[e2])
-        index = get_index([m1, m2])
+        index = get_index([m1, m2], progress=False)
         self.assertIn(m1.triple, index)
         self.assertEqual(1, len(index))
         self.assertEqual(2, len(index[m1.triple]))
@@ -130,10 +130,10 @@ def test_infer_exact_match(self):
 
         backwards_msg = "backwards inference is not supposed to be done here"
 
-        index = get_index(infer_chains([m1, m2], backwards=False))
+        index = get_index(infer_chains([m1, m2], backwards=False, progress=False), progress=False)
         self.assertNotIn(m4_inv.triple, index, msg=backwards_msg)
 
-        index = get_index(infer_chains([m1, m2, m3], backwards=False))
+        index = get_index(infer_chains([m1, m2, m3], backwards=False, progress=False), progress=False)
         self.assert_same_triples([m1, m2, m3, m4, m4, m5, m6], index)
         self.assertNotIn(m4_inv.triple, index, msg=backwards_msg)
         self.assertNotIn(m5_inv.triple, index, msg=backwards_msg)
@@ -164,12 +164,12 @@ def test_no_infer(self):
     def test_infer_broad_match_1(self):
         r1, r2, r3, r4 = _get_references(4)
         m1, m2, m3 = line(r1, EXACT_MATCH, r2, BROAD_MATCH, r3, EXACT_MATCH, r4)
-        m4 = Mapping(s=r1, p=BROAD_MATCH, o=r3)
-        m5 = Mapping(s=r1, p=BROAD_MATCH, o=r4)
-        m6 = Mapping(s=r2, p=BROAD_MATCH, o=r4)
-        m4_i = Mapping(o=r1, p=NARROW_MATCH, s=r3)
-        m5_i = Mapping(o=r1, p=NARROW_MATCH, s=r4)
-        m6_i = Mapping(o=r2, p=NARROW_MATCH, s=r4)
+        m4 = Mapping(s=r1, p=BROAD_MATCH, o=r3, evidence=[EV])
+        m5 = Mapping(s=r1, p=BROAD_MATCH, o=r4, evidence=[EV])
+        m6 = Mapping(s=r2, p=BROAD_MATCH, o=r4, evidence=[EV])
+        m4_i = Mapping(o=r1, p=NARROW_MATCH, s=r3, evidence=[EV])
+        m5_i = Mapping(o=r1, p=NARROW_MATCH, s=r4, evidence=[EV])
+        m6_i = Mapping(o=r2, p=NARROW_MATCH, s=r4, evidence=[EV])
 
         # Check inference over two steps
         self.assert_same_triples(
@@ -258,7 +258,7 @@ def test_filter_self(self):
         m2 = Mapping(s=r12, p=EXACT_MATCH, o=r22)
         m3 = Mapping(s=r11, p=EXACT_MATCH, o=r13)
         mappings = [m1, m2, m3]
-        self.assert_same_triples([m1, m2], filter_self_matches(mappings))
+        self.assert_same_triples([m1, m2], filter_self_matches(mappings, progress=False))
 
     def test_filter_negative(self):
         """Test filtering out mappings within a given prefix."""
@@ -268,7 +268,7 @@ def test_filter_negative(self):
         m2 = Mapping(s=r12, p=EXACT_MATCH, o=r22)
         mappings = [m1, m2]
         negative = [m2]
-        self.assert_same_triples([m1], filter_mappings(mappings, negative))
+        self.assert_same_triples([m1], filter_mappings(mappings, negative, progress=False))
 
     def test_project(self):
         """Test projecting into a given source/target pair."""
@@ -280,7 +280,7 @@ def test_project(self):
         m2_i = Mapping(o=r12, p=EXACT_MATCH, s=r22)
         m3 = Mapping(s=r11, p=EXACT_MATCH, o=r31)
         mappings = [m1, m2, m2_i, m3]
-        self.assert_same_triples([m1, m2], project(mappings, "p1", "p2"))
+        self.assert_same_triples([m1, m2], project(mappings, "p1", "p2", progress=False))
 
     def test_get_many_to_many(self):
         """Test getting many-to-many mappings."""

From a8091bb36f05c0116f3c50557c853fabd1d090f9 Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Mon, 22 Jan 2024 10:25:42 +0100
Subject: [PATCH 3/5] Update probability model and docs

---
 src/semra/io.py     |  17 +++++--
 src/semra/struct.py | 112 +++++++++++++++++++++++---------------------
 tests/test_api.py   |  32 ++++++++++++-
 3 files changed, 102 insertions(+), 59 deletions(-)

diff --git a/src/semra/io.py b/src/semra/io.py
index f5aad28..4e1bada 100644
--- a/src/semra/io.py
+++ b/src/semra/io.py
@@ -391,7 +391,7 @@ def _get_sssom_row(mapping: Mapping, e: Evidence):
         ",".join(sorted(e.mapping_set_names)),
         mapping_set_version,
         mapping_set_license,
-        round(confidence, CONFIDENCE_PRECISION) if (confidence := e.confidence) is not None else "",
+        _safe_confidence(e),
         e.author.curie if e.author else "",
         e.explanation,
     )
@@ -436,6 +436,13 @@ def _neo4j_bool(b: bool, /) -> Literal["true", "false"]:  # noqa:FBT001
     return "true" if b else "false"  # type:ignore
 
 
+def _safe_confidence(x) -> str:
+    confidence = x.get_confidence()
+    if confidence is None:
+        return ""
+    return str(round(confidence, CONFIDENCE_PRECISION))
+
+
 def write_neo4j(
     mappings: list[Mapping],
     directory: str | Path,
@@ -515,7 +522,7 @@ def write_neo4j(
                 mapping.s.curie,
                 mapping.p.curie,
                 mapping.o.curie,
-                round(c, CONFIDENCE_PRECISION) if (c := mapping.confidence) is not None else "",
+                _safe_confidence(mapping),
                 _neo4j_bool(mapping.has_primary),
                 _neo4j_bool(mapping.has_secondary),
                 _neo4j_bool(mapping.has_tertiary),
@@ -566,7 +573,7 @@ def write_neo4j(
                 "mapping",
                 "semra.mapping",
                 mapping.p.curie,
-                mapping.confidence and round(mapping.confidence, CONFIDENCE_PRECISION),
+                _safe_confidence(mapping),
                 _neo4j_bool(mapping.has_primary),
                 _neo4j_bool(mapping.has_secondary),
                 _neo4j_bool(mapping.has_tertiary),
@@ -585,7 +592,7 @@ def write_neo4j(
                 mapping_set.name,
                 mapping_set.license or "",
                 mapping_set.version or "",
-                c if (c := mapping_set.confidence) is not None else "",
+                _safe_confidence(mapping_set),
             )
             for mapping_set in sorted(mapping_sets.values(), key=lambda n: n.curie)
         ),
@@ -600,7 +607,7 @@ def write_neo4j(
                 "semra.evidence",
                 evidence.evidence_type,
                 evidence.justification.curie,
-                c if (c := evidence.confidence) is not None else "",
+                _safe_confidence(evidence),
             )
             for evidence in sorted(evidences.values(), key=lambda row: row.curie)
         ),
diff --git a/src/semra/struct.py b/src/semra/struct.py
index 9289c63..a4e60c5 100644
--- a/src/semra/struct.py
+++ b/src/semra/struct.py
@@ -8,7 +8,7 @@
 from collections.abc import Iterable
 from hashlib import md5
 from itertools import islice
-from typing import Annotated, Literal, Optional, Union
+from typing import Annotated, ClassVar, Literal, Optional, Union
 
 import pydantic
 from curies import Reference
@@ -37,6 +37,7 @@ def triple_key(triple: Triple) -> tuple[str, str, str]:
     return triple[0].curie, triple[2].curie, triple[1].curie
 
 
+EPSILON = 1e-6
 EvidenceType = Literal["simple", "mutated", "reasoned"]
 JUSTIFICATION_FIELD = Field(description="A SSSOM-compliant justification")
 
@@ -47,43 +48,70 @@ def _md5_hexdigest(picklable) -> str:
     return hasher.hexdigest()
 
 
-class EvidenceMixin:
+class KeyedMixin:
+    """A mixin for a class that can be hashed and CURIE-encoded."""
+
+    #: The prefix for CURIEs for instances of this class
+    _prefix: ClassVar[str]
+
+    def __init_subclass__(cls, *, prefix: str, **kwargs):
+        cls._prefix = prefix
+
     def key(self):
+        """Return a picklable key."""
         raise NotImplementedError
 
     def hexdigest(self) -> str:
+        """Generate a hexadecimal representation of the MD5 hash of the pickled key() for this class."""
         key = self.key()
         return _md5_hexdigest(key)
 
     def get_reference(self) -> Reference:
-        return Reference(prefix="semra.evidence", identifier=self.hexdigest())
+        """Get a CURIE reference using this class's prefix and its hexadecimal representation."""
+        return Reference(prefix=self._prefix, identifier=self.hexdigest())
 
     @property
     def curie(self) -> str:
+        """Get a string representing the CURIE."""
         return self.get_reference().curie
 
 
-class MappingSet(pydantic.BaseModel):
+class ConfidenceMixin:
+    def get_confidence(self) -> float:
+        raise NotImplementedError
+
+
+class EvidenceMixin:
+    @property
+    def explanation(self) -> str:
+        return ""
+
+
+class MappingSet(pydantic.BaseModel, ConfidenceMixin, KeyedMixin, prefix="semra.mappingset"):
+    """Represents a set of semantic mappings.
+
+    For example, this might correspond to:
+
+    1. All the mappings extracted from an ontology
+    2. All the mappings published with a database
+    3. All the mappings inferred by SeMRA based on a given configuration
+    """
+
     name: str = Field(..., description="Name of the mapping set")
     version: Optional[str] = Field(default=None, description="The version of the dataset from which the mapping comes")
     license: Optional[str] = Field(default=None, description="License name or URL for mapping set")
-    confidence: Optional[float] = Field(default=None, description="Mapping set level confidence")
+    confidence: float = Field(..., description="Mapping set level confidence")
 
     def key(self):
-        return self.name, self.version or "", self.license or "", 1.0 if self.confidence is None else self.confidence
+        """Get a picklable key representing the mapping set."""
+        return self.name, self.version or "", self.license or "", self.confidence
 
-    def hexdigest(self) -> str:
-        return _md5_hexdigest(self.key())
+    def get_confidence(self) -> float:
+        """Get the confidence for the mapping set."""
+        return self.confidence
 
-    def get_reference(self) -> Reference:
-        return Reference(prefix="semra.mappingset", identifier=self.hexdigest())
 
-    @property
-    def curie(self) -> str:
-        return self.get_reference().curie
-
-
-class SimpleEvidence(pydantic.BaseModel, EvidenceMixin):
+class SimpleEvidence(pydantic.BaseModel, KeyedMixin, EvidenceMixin, ConfidenceMixin, prefix="semra.evidence"):
     """Evidence for a mapping.
 
     Ideally, this matches the SSSOM data model.
@@ -108,6 +136,7 @@ class Config:
         ],
     )
     uuid: UUID4 = Field(default_factory=uuid.uuid4)
+    confidence: Optional[float] = Field(None, description="The confidence")
 
     def key(self):
         """Get a key suitable for hashing the evidence.
@@ -116,22 +145,17 @@ def key(self):
 
         Note: this should be extended to include basically _all_ fields
         """
-        return (self.evidence_type, self.justification, self.author, self.mapping_set.key(), self.uuid)
+        return self.evidence_type, self.justification, self.author, self.mapping_set.key(), self.uuid
 
     @property
     def mapping_set_names(self) -> set[str]:
         return {self.mapping_set.name}
 
-    @property
-    def confidence(self) -> Optional[float]:
-        return self.mapping_set.confidence
-
-    @property
-    def explanation(self) -> str:
-        return ""
+    def get_confidence(self) -> float:
+        return self.confidence if self.confidence is not None else self.mapping_set.confidence
 
 
-class ReasonedEvidence(pydantic.BaseModel, EvidenceMixin):
+class ReasonedEvidence(pydantic.BaseModel, KeyedMixin, EvidenceMixin, ConfidenceMixin, prefix="semra.evidence"):
     """A complex evidence based on multiple mappings."""
 
     class Config:
@@ -145,7 +169,7 @@ class Config:
         ..., description="A list of mappings and their evidences consumed to create this evidence"
     )
     author: Optional[Reference] = None
-    confidence_factor: float = 1.0
+    confidence_factor: float = Field(1.0, description="The probability that the reasoning method is correct")
 
     def key(self):
         return (
@@ -154,13 +178,9 @@ def key(self):
             *((*m.triple, *(e.key() for e in m.evidence)) for m in self.mappings),
         )
 
-    @property
-    def confidence(self) -> Optional[float]:
-        confidences = [mapping.confidence for mapping in self.mappings]
-        nn_confidences = [c for c in confidences if c is not None]
-        if not nn_confidences:
-            return None
-        return self.confidence_factor * _joint_probability(nn_confidences)
+    def get_confidence(self) -> float:
+        confidences = [mapping.get_confidence() for mapping in self.mappings]
+        return _joint_probability([self.confidence_factor, *confidences])
 
     @property
     def mapping_set(self) -> None:
@@ -183,7 +203,7 @@ def explanation(self) -> str:
 ]
 
 
-class Mapping(pydantic.BaseModel):
+class Mapping(pydantic.BaseModel, ConfidenceMixin, KeyedMixin, prefix="semra.mapping"):
     """A semantic mapping."""
 
     class Config:
@@ -202,30 +222,16 @@ def triple(self) -> Triple:
         return self.s, self.p, self.o
 
     @classmethod
-    def from_triple(cls, triple: Triple, evidence: Union[list[Evidence], None] = None) -> Mapping:
+    def from_triple(cls, triple: Triple, evidence: Optional[list[Evidence]] = None) -> Mapping:
         """Instantiate a mapping from a triple."""
         s, p, o = triple
         return cls(s=s, p=p, o=o, evidence=evidence or [])
 
-    @property
-    def confidence(self) -> Optional[float]:
+    def get_confidence(self) -> float:
+        """Get the mapping's confidence by aggregating its evidences' confidences in a binomial model."""
         if not self.evidence:
-            return None
-        confidences = [e.confidence for e in self.evidence]
-        nn_confidences = [c for c in confidences if c is not None]
-        if not nn_confidences:
-            return None
-        return _joint_probability(nn_confidences)
-
-    def hexdigest(self) -> str:
-        return _md5_hexdigest(self.triple)
-
-    def get_reference(self) -> Reference:
-        return Reference(prefix="semra.mapping", identifier=self.hexdigest())
-
-    @property
-    def curie(self) -> str:
-        return self.get_reference().curie
+            raise ValueError("can not calculate confidence since no evidence")
+        return _joint_probability(e.get_confidence() for e in self.evidence)
 
     @property
     def has_primary(self) -> bool:
diff --git a/tests/test_api.py b/tests/test_api.py
index b19881d..c6fe7d2 100644
--- a/tests/test_api.py
+++ b/tests/test_api.py
@@ -4,6 +4,7 @@
 
 from semra.api import (
     BROAD_MATCH,
+    DB_XREF,
     EXACT_MATCH,
     NARROW_MATCH,
     Index,
@@ -13,11 +14,12 @@
     get_index,
     get_many_to_many,
     infer_chains,
+    infer_mutations,
     infer_reversible,
     keep_prefixes,
     project,
 )
-from semra.rules import MANUAL_MAPPING
+from semra.rules import KNOWLEDGE_MAPPING, MANUAL_MAPPING
 from semra.struct import Mapping, MappingSet, ReasonedEvidence, Reference, SimpleEvidence, line, triple_key
 
 
@@ -295,3 +297,31 @@ def test_get_many_to_many(self):
 
         m4 = Mapping(s=a3, p=EXACT_MATCH, o=b2)
         self.assert_same_triples([m3, m4], get_many_to_many([m2, m3, m4]))
+
+
+class TestUpgrades(unittest.TestCase):
+    """Test inferring mutations."""
+
+    def test_infer_mutations(self):
+        """Test inferring mutations."""
+        (a1,) = _get_references(1, prefix="a")
+        (b1,) = _get_references(1, prefix="b")
+        original_confidence = 0.95
+        mutation_confidence = 0.80
+        m1 = Mapping(s=a1, p=DB_XREF, o=b1, evidence=[SimpleEvidence(confidence=original_confidence, mapping_set=MS)])
+        new_mappings = infer_mutations(
+            [m1], {("a", "b"): mutation_confidence}, old=DB_XREF, new=EXACT_MATCH, progress=False
+        )
+        self.assertEqual(2, len(new_mappings))
+        new_m1, new_m2 = new_mappings
+        self.assertEqual(m1, new_m1)
+        self.assertEqual(a1, new_m2.s)
+        self.assertEqual(EXACT_MATCH, new_m2.p)
+        self.assertEqual(b1, new_m2.o)
+        self.assertEqual(1, len(new_m2.evidence))
+        new_evidence = new_m2.evidence[0]
+        self.assertIsInstance(new_evidence, ReasonedEvidence)
+        new_confidence = new_evidence.get_confidence()
+        self.assertIsNotNone(new_confidence)
+        self.assertEqual(1 - (1 - original_confidence) * (1 - mutation_confidence), new_confidence)
+        self.assertEqual(KNOWLEDGE_MAPPING, new_evidence.justification)

From ce4f34a448a46b3bdbebf358ab899d001368b693 Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Mon, 22 Jan 2024 11:06:27 +0100
Subject: [PATCH 4/5] Add py38 tests

---
 .github/workflows/tests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 0eab69d..1c70cb3 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -13,7 +13,7 @@ jobs:
     strategy:
       matrix:
         os: [ ubuntu-latest ]
-        python-version: [ "3.11", "3.9" ]
+        python-version: [ "3.11", "3.8" ]
         pydantic: [ "pydantic1", "pydantic2" ]
     steps:
       - uses: actions/checkout@v2

From 8d1d4b472ddb50d34d5ab1cf2e326f54d54aa928 Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Mon, 22 Jan 2024 11:07:16 +0100
Subject: [PATCH 5/5] Add confidence filter function

---
 src/semra/api.py  | 11 +++++++++++
 tests/test_api.py | 10 ++++++++++
 2 files changed, 21 insertions(+)

diff --git a/src/semra/api.py b/src/semra/api.py
index 37ca9ad..f2091b5 100644
--- a/src/semra/api.py
+++ b/src/semra/api.py
@@ -498,3 +498,14 @@ def summarize_prefixes(mappings: list[Mapping]) -> pd.DataFrame:
         [(prefix, bioregistry.get_name(prefix), bioregistry.get_description(prefix)) for prefix in sorted(prefixes)],
         columns=["prefix", "name", "description"],
     ).set_index("prefix")
+
+
+def filter_minimum_confidence(mappings: Iterable[Mapping], cutoff: float = 0.7) -> Iterable[Mapping]:
+    """Filter mappings below a given confidence."""
+    for mapping in mappings:
+        try:
+            confidence = mapping.get_confidence()
+        except ValueError:
+            continue
+        if confidence >= cutoff:
+            yield mapping
diff --git a/tests/test_api.py b/tests/test_api.py
index c6fe7d2..edd1fa7 100644
--- a/tests/test_api.py
+++ b/tests/test_api.py
@@ -2,6 +2,7 @@
 
 import unittest
 
+from semra import api
 from semra.api import (
     BROAD_MATCH,
     DB_XREF,
@@ -298,6 +299,15 @@ def test_get_many_to_many(self):
         m4 = Mapping(s=a3, p=EXACT_MATCH, o=b2)
         self.assert_same_triples([m3, m4], get_many_to_many([m2, m3, m4]))
 
+    def test_filter_confidence(self):
+        """Test filtering by confidence."""
+        (a1, a2) = _get_references(2, prefix="a")
+        (b1, b2) = _get_references(2, prefix="b")
+        m1 = Mapping(s=a1, p=DB_XREF, o=b1, evidence=[SimpleEvidence(confidence=0.95, mapping_set=MS)])
+        m2 = Mapping(s=a1, p=DB_XREF, o=b1, evidence=[SimpleEvidence(confidence=0.65, mapping_set=MS)])
+        mmm = list(api.filter_minimum_confidence([m1, m2], cutoff=0.7))
+        self.assertEqual([m1], mmm)
+
 
 class TestUpgrades(unittest.TestCase):
     """Test inferring mutations."""