From 8fa6be3824205743ecbc1e79dda33f9e13f14358 Mon Sep 17 00:00:00 2001
From: Alyssa Dai <alyssa.ydai@gmail.com>
Date: Fri, 1 Dec 2023 17:19:12 -0500
Subject: [PATCH] [REF] Refactor dataset size request into separate function
 (#235)

* refactor matching dataset size query into separate func

* update tests with refactored dataset size query util
---
 app/api/crud.py     | 39 +++++++++++++++++++++++---------
 tests/conftest.py   | 12 ----------
 tests/test_query.py | 55 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 83 insertions(+), 23 deletions(-)

diff --git a/app/api/crud.py b/app/api/crud.py
index 61c5e1c..d103a47 100644
--- a/app/api/crud.py
+++ b/app/api/crud.py
@@ -73,6 +73,32 @@ def post_query_to_graph(query: str, timeout: float = 5.0) -> dict:
     return response.json()
 
 
+def query_matching_dataset_sizes(dataset_uuids: list) -> dict:
+    """
+    Queries the graph for the number of subjects in each dataset in a list of dataset UUIDs.
+
+    Parameters
+    ----------
+    dataset_uuids : pd.Series
+        A list of unique dataset UUIDs.
+
+    Returns
+    -------
+    dict
+        A dictionary with keys corresponding to the dataset UUIDs and values corresponding to the number of subjects in the dataset.
+    """
+    # Get the total number of subjects in each dataset that matched the query
+    matching_dataset_size_results = post_query_to_graph(
+        util.create_multidataset_size_query(dataset_uuids)
+    )
+    return {
+        ds["dataset_uuid"]: int(ds["total_subjects"])
+        for ds in util.unpack_http_response_json_to_dicts(
+            matching_dataset_size_results
+        )
+    }
+
+
 async def get(
     min_age: float,
     max_age: float,
@@ -129,18 +155,9 @@ async def get(
         util.unpack_http_response_json_to_dicts(results)
     ).reindex(columns=ATTRIBUTES_ORDER)
 
-    # Get the total number of subjects in each dataset that matched the query
-    matching_dataset_size_results = post_query_to_graph(
-        util.create_multidataset_size_query(
-            results_df["dataset_uuid"].unique()
-        )
+    matching_dataset_sizes = query_matching_dataset_sizes(
+        results_df["dataset_uuid"].unique()
     )
-    matching_dataset_sizes = {
-        ds["dataset_uuid"]: int(ds["total_subjects"])
-        for ds in util.unpack_http_response_json_to_dicts(
-            matching_dataset_size_results
-        )
-    }
 
     response_obj = []
     dataset_cols = ["dataset_uuid", "dataset_name"]
diff --git a/tests/conftest.py b/tests/conftest.py
index 52c30fe..279037b 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -51,7 +51,6 @@ def test_data():
     ]
 
 
-# TODO update the test once https://github.com/neurobagel/api/issues/234 is resolved
 @pytest.fixture
 def mock_post_query_to_graph():
     """Mock post_query_to_graph function that returns toy data containing a dataset with no modalities for testing."""
@@ -65,7 +64,6 @@ def mockreturn(query, timeout=5.0):
                     "dataset_portal_uri",
                     "sub_id",
                     "image_modal",
-                    "total_subjects",
                 ]
             },
             "results": {
@@ -81,11 +79,6 @@ def mockreturn(query, timeout=5.0):
                         },
                         "sub_id": {"type": "literal", "value": "sub-ON95534"},
                         "dataset_name": {"type": "literal", "value": "QPN"},
-                        "total_subjects": {
-                            "datatype": "http://www.w3.org/2001/XMLSchema#integer",
-                            "type": "literal",
-                            "value": "200",
-                        },
                     },
                     {
                         "dataset_uuid": {
@@ -102,11 +95,6 @@ def mockreturn(query, timeout=5.0):
                             "type": "uri",
                             "value": "http://purl.org/nidash/nidm#T1Weighted",
                         },
-                        "total_subjects": {
-                            "datatype": "http://www.w3.org/2001/XMLSchema#integer",
-                            "type": "literal",
-                            "value": "200",
-                        },
                     },
                 ]
             },
diff --git a/tests/test_query.py b/tests/test_query.py
index d4dfe39..de9ba35 100644
--- a/tests/test_query.py
+++ b/tests/test_query.py
@@ -6,12 +6,67 @@
 from app.api import crud
 
 
+def test_get_subjects_by_query(monkeypatch):
+    """Test that graph results for dataset size queries are correctly parsed into a dictionary."""
+
+    def mock_post_query_to_graph(query, timeout=5.0):
+        return {
+            "head": {"vars": ["dataset_uuid", "total_subjects"]},
+            "results": {
+                "bindings": [
+                    {
+                        "dataset_uuid": {
+                            "type": "uri",
+                            "value": "http://neurobagel.org/vocab/ds1234",
+                        },
+                        "total_subjects": {
+                            "datatype": "http://www.w3.org/2001/XMLSchema#integer",
+                            "type": "literal",
+                            "value": "70",
+                        },
+                    },
+                    {
+                        "dataset_uuid": {
+                            "type": "uri",
+                            "value": "http://neurobagel.org/vocab/ds2345",
+                        },
+                        "total_subjects": {
+                            "datatype": "http://www.w3.org/2001/XMLSchema#integer",
+                            "type": "literal",
+                            "value": "40",
+                        },
+                    },
+                ]
+            },
+        }
+
+    monkeypatch.setattr(crud, "post_query_to_graph", mock_post_query_to_graph)
+    assert crud.query_matching_dataset_sizes(
+        [
+            "http://neurobagel.org/vocab/ds1234",
+            "http://neurobagel.org/vocab/ds2345",
+        ]
+    ) == {
+        "http://neurobagel.org/vocab/ds1234": 70,
+        "http://neurobagel.org/vocab/ds2345": 40,
+    }
+
+
 def test_null_modalities(
     test_app, test_data, mock_post_query_to_graph, monkeypatch
 ):
     """Given a response containing a dataset with no recorded modalities, returns an empty list for the imaging modalities."""
 
+    def mock_query_matching_dataset_sizes(dataset_uuids):
+        return {
+            "http://neurobagel.org/vocab/12345": 200,
+        }
+
     monkeypatch.setattr(crud, "post_query_to_graph", mock_post_query_to_graph)
+    monkeypatch.setattr(
+        crud, "query_matching_dataset_sizes", mock_query_matching_dataset_sizes
+    )
+
     response = test_app.get("/query/")
     assert response.json()[0]["image_modals"] == [
         "http://purl.org/nidash/nidm#T1Weighted"