Future-House · nadolskit · Sep 16, 2024 · Sep 14, 2024 · Sep 14, 2024 · Sep 14, 2024
diff --git a/tests/test_clients.py b/tests/test_clients.py
@@ -19,6 +19,7 @@
 from paperqa.clients.journal_quality import JournalQualityPostProcessor
 from paperqa.clients.retractions import RetrationDataPostProcessor
 
+from utils.paper_helpers import compare_formatted_citations
 
 @pytest.mark.vcr
 @pytest.mark.parametrize(
@@ -112,11 +113,17 @@ async def test_title_search(paper_attributes: dict[str, str]) -> None:
             ),
         )
         details = await client.query(title=paper_attributes["title"])
+
+        # compares the citation without the specific number of citations
+        assert compare_formatted_citations(
+            paper_attributes['formatted_citation'], details.formatted_citation
+        ), "Formatted citation should match"
+
         assert set(details.other["client_source"]) == set(  # type: ignore[union-attr]
             paper_attributes["source"]
         ), "Should have the correct source"
         for key, value in paper_attributes.items():
-            if key not in {"is_oa", "source"}:
+            if key not in {"is_oa", "source", "formatted_citation"}:
                 assert getattr(details, key) == value, f"Should have the correct {key}"
             elif key == "is_oa":
                 assert (

diff --git a/tests/utils/paper_helpers.py b/tests/utils/paper_helpers.py
@@ -0,0 +1,18 @@
+import re
+
+def compare_formatted_citations(expected: str, actual: str) -> bool:
+    """
+    Compares two formatted citation strings; ignoring the citation_count value.
+
+    :param expected: The expected formatted citation string.
+    :param actual: The actual formatted citation string.
+    :return: True if the citations match except for the citation count, False otherwise.
+    """
+    # https://regex101.com/r/lCN8ET/1
+    citation_pattern = r"(This article has )\d+( citations?)"
+
+    # between group 1 and 2, replace with the character "n"
+    expected_cleaned = re.sub(citation_pattern, r"\1n\2", expected).strip()
+    actual_cleaned = re.sub(citation_pattern, r"\1n\2", actual).strip()
+
+    return expected_cleaned == actual_cleaned