From 8fa6be3824205743ecbc1e79dda33f9e13f14358 Mon Sep 17 00:00:00 2001 From: Alyssa Dai Date: Fri, 1 Dec 2023 17:19:12 -0500 Subject: [PATCH] [REF] Refactor dataset size request into separate function (#235) * refactor matching dataset size query into separate func * update tests with refactored dataset size query util --- app/api/crud.py | 39 +++++++++++++++++++++++--------- tests/conftest.py | 12 ---------- tests/test_query.py | 55 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 83 insertions(+), 23 deletions(-) diff --git a/app/api/crud.py b/app/api/crud.py index 61c5e1c..d103a47 100644 --- a/app/api/crud.py +++ b/app/api/crud.py @@ -73,6 +73,32 @@ def post_query_to_graph(query: str, timeout: float = 5.0) -> dict: return response.json() +def query_matching_dataset_sizes(dataset_uuids: list) -> dict: + """ + Queries the graph for the number of subjects in each dataset in a list of dataset UUIDs. + + Parameters + ---------- + dataset_uuids : pd.Series + A list of unique dataset UUIDs. + + Returns + ------- + dict + A dictionary with keys corresponding to the dataset UUIDs and values corresponding to the number of subjects in the dataset. + """ + # Get the total number of subjects in each dataset that matched the query + matching_dataset_size_results = post_query_to_graph( + util.create_multidataset_size_query(dataset_uuids) + ) + return { + ds["dataset_uuid"]: int(ds["total_subjects"]) + for ds in util.unpack_http_response_json_to_dicts( + matching_dataset_size_results + ) + } + + async def get( min_age: float, max_age: float, @@ -129,18 +155,9 @@ async def get( util.unpack_http_response_json_to_dicts(results) ).reindex(columns=ATTRIBUTES_ORDER) - # Get the total number of subjects in each dataset that matched the query - matching_dataset_size_results = post_query_to_graph( - util.create_multidataset_size_query( - results_df["dataset_uuid"].unique() - ) + matching_dataset_sizes = query_matching_dataset_sizes( + results_df["dataset_uuid"].unique() ) - matching_dataset_sizes = { - ds["dataset_uuid"]: int(ds["total_subjects"]) - for ds in util.unpack_http_response_json_to_dicts( - matching_dataset_size_results - ) - } response_obj = [] dataset_cols = ["dataset_uuid", "dataset_name"] diff --git a/tests/conftest.py b/tests/conftest.py index 52c30fe..279037b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -51,7 +51,6 @@ def test_data(): ] -# TODO update the test once https://github.com/neurobagel/api/issues/234 is resolved @pytest.fixture def mock_post_query_to_graph(): """Mock post_query_to_graph function that returns toy data containing a dataset with no modalities for testing.""" @@ -65,7 +64,6 @@ def mockreturn(query, timeout=5.0): "dataset_portal_uri", "sub_id", "image_modal", - "total_subjects", ] }, "results": { @@ -81,11 +79,6 @@ def mockreturn(query, timeout=5.0): }, "sub_id": {"type": "literal", "value": "sub-ON95534"}, "dataset_name": {"type": "literal", "value": "QPN"}, - "total_subjects": { - "datatype": "http://www.w3.org/2001/XMLSchema#integer", - "type": "literal", - "value": "200", - }, }, { "dataset_uuid": { @@ -102,11 +95,6 @@ def mockreturn(query, timeout=5.0): "type": "uri", "value": "http://purl.org/nidash/nidm#T1Weighted", }, - "total_subjects": { - "datatype": "http://www.w3.org/2001/XMLSchema#integer", - "type": "literal", - "value": "200", - }, }, ] }, diff --git a/tests/test_query.py b/tests/test_query.py index d4dfe39..de9ba35 100644 --- a/tests/test_query.py +++ b/tests/test_query.py @@ -6,12 +6,67 @@ from app.api import crud +def test_get_subjects_by_query(monkeypatch): + """Test that graph results for dataset size queries are correctly parsed into a dictionary.""" + + def mock_post_query_to_graph(query, timeout=5.0): + return { + "head": {"vars": ["dataset_uuid", "total_subjects"]}, + "results": { + "bindings": [ + { + "dataset_uuid": { + "type": "uri", + "value": "http://neurobagel.org/vocab/ds1234", + }, + "total_subjects": { + "datatype": "http://www.w3.org/2001/XMLSchema#integer", + "type": "literal", + "value": "70", + }, + }, + { + "dataset_uuid": { + "type": "uri", + "value": "http://neurobagel.org/vocab/ds2345", + }, + "total_subjects": { + "datatype": "http://www.w3.org/2001/XMLSchema#integer", + "type": "literal", + "value": "40", + }, + }, + ] + }, + } + + monkeypatch.setattr(crud, "post_query_to_graph", mock_post_query_to_graph) + assert crud.query_matching_dataset_sizes( + [ + "http://neurobagel.org/vocab/ds1234", + "http://neurobagel.org/vocab/ds2345", + ] + ) == { + "http://neurobagel.org/vocab/ds1234": 70, + "http://neurobagel.org/vocab/ds2345": 40, + } + + def test_null_modalities( test_app, test_data, mock_post_query_to_graph, monkeypatch ): """Given a response containing a dataset with no recorded modalities, returns an empty list for the imaging modalities.""" + def mock_query_matching_dataset_sizes(dataset_uuids): + return { + "http://neurobagel.org/vocab/12345": 200, + } + monkeypatch.setattr(crud, "post_query_to_graph", mock_post_query_to_graph) + monkeypatch.setattr( + crud, "query_matching_dataset_sizes", mock_query_matching_dataset_sizes + ) + response = test_app.get("/query/") assert response.json()[0]["image_modals"] == [ "http://purl.org/nidash/nidm#T1Weighted"