diff --git a/src/curies/reconciliation.py b/src/curies/reconciliation.py index 8484db6..ae55b25 100644 --- a/src/curies/reconciliation.py +++ b/src/curies/reconciliation.py @@ -4,6 +4,8 @@ from collections import Counter, defaultdict from typing import Collection, List, Mapping, Optional, Tuple +from typing_extensions import Literal + from .api import Converter, Record __all__ = [ @@ -34,7 +36,12 @@ def __str__(self) -> str: ) -def remap_curie_prefixes(converter: Converter, remapping: Mapping[str, str]) -> Converter: +def remap_curie_prefixes( + converter: Converter, + remapping: Mapping[str, str], + *, + intersection_resolution: Literal["overwrite", "drop"] = "drop", +) -> Converter: """Apply CURIE prefix remappings. :param converter: A converter @@ -68,9 +75,16 @@ def remap_curie_prefixes(converter: Converter, remapping: Mapping[str, str]) -> new_record, ) elif old in intersection: - record.prefix_synonyms = sorted( - set(record.prefix_synonyms).difference({old, new_prefix}) - ) + if intersection_resolution == "drop": + # throw away all synonyms from intersections, + # since there can be non-trivial overlaps + record.prefix_synonyms = [] + elif intersection_resolution == "overwrite": + record.prefix_synonyms = sorted( + set(record.prefix_synonyms).difference({old, new_prefix}) + ) + else: + raise TypeError(f"invalid intersection resolution mode: {intersection_resolution}") record.prefix = new_prefix else: record.prefix_synonyms = sorted( diff --git a/tests/test_reconciliation.py b/tests/test_reconciliation.py index d5c686a..013c4f1 100644 --- a/tests/test_reconciliation.py +++ b/tests/test_reconciliation.py @@ -77,6 +77,59 @@ def test_duplicate_correspondence(self): with self.assertRaises(InconsistentMapping): _order_curie_remapping(converter, curie_remapping) + def test_remapping_with_synonym(self): + """Test that remapping with synonym prefixes works as expected.""" + r1 = Record( + prefix="geo", # also should not survive + prefix_synonyms=["GEO", "should_not_survive"], + uri_prefix="https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=", + pattern="^G(PL|SM|SE|DS)\\d+$", + ) + r2 = Record( + prefix="geogeo", + prefix_synonyms=["GEOGEO"], + uri_prefix="http://purl.obolibrary.org/obo/GEO_", + pattern="^\\d{9}$", + ) + c1 = Converter([r1, r2]) + remapping = { + "GEO": "ncbi.geo", + "geogeo": "GEO", + } + c2 = remap_curie_prefixes(c1, remapping) + r3 = Record( + prefix="ncbi.geo", + uri_prefix="https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=", + pattern="^G(PL|SM|SE|DS)\\d+$", + ) + r4 = Record( + prefix="GEO", + prefix_synonyms=["GEOGEO", "geogeo"], + uri_prefix="http://purl.obolibrary.org/obo/GEO_", + pattern="^\\d{9}$", + ) + self.assertEqual([r4, r3], c2.records) + + def test_remapping_invalid_mode(self): + """Test that remapping with synonym prefixes works as expected.""" + r1 = Record( + prefix="geo", # also should not survive + prefix_synonyms=["GEO", "should_not_survive"], + uri_prefix="https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=", + ) + r2 = Record( + prefix="geogeo", + prefix_synonyms=["GEOGEO"], + uri_prefix="http://purl.obolibrary.org/obo/GEO_", + ) + c1 = Converter([r1, r2]) + remapping = { + "GEO": "ncbi.geo", + "geogeo": "GEO", + } + with self.assertRaises(TypeError): + remap_curie_prefixes(c1, remapping, intersection_resolution="nope") + def test_cycles(self): """Test detecting bad mapping with cycles.""" converter = Converter(