Skip to content

Commit

Permalink
[REF] Refactor dataset size request into separate function (#235)
Browse files Browse the repository at this point in the history
* refactor matching dataset size query into separate func

* update tests with refactored dataset size query util
  • Loading branch information
alyssadai committed Dec 1, 2023
1 parent c79b4c7 commit 8fa6be3
Show file tree
Hide file tree
Showing 3 changed files with 83 additions and 23 deletions.
39 changes: 28 additions & 11 deletions app/api/crud.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,32 @@ def post_query_to_graph(query: str, timeout: float = 5.0) -> dict:
return response.json()


def query_matching_dataset_sizes(dataset_uuids: list) -> dict:
"""
Queries the graph for the number of subjects in each dataset in a list of dataset UUIDs.
Parameters
----------
dataset_uuids : pd.Series
A list of unique dataset UUIDs.
Returns
-------
dict
A dictionary with keys corresponding to the dataset UUIDs and values corresponding to the number of subjects in the dataset.
"""
# Get the total number of subjects in each dataset that matched the query
matching_dataset_size_results = post_query_to_graph(
util.create_multidataset_size_query(dataset_uuids)
)
return {
ds["dataset_uuid"]: int(ds["total_subjects"])
for ds in util.unpack_http_response_json_to_dicts(
matching_dataset_size_results
)
}


async def get(
min_age: float,
max_age: float,
Expand Down Expand Up @@ -129,18 +155,9 @@ async def get(
util.unpack_http_response_json_to_dicts(results)
).reindex(columns=ATTRIBUTES_ORDER)

# Get the total number of subjects in each dataset that matched the query
matching_dataset_size_results = post_query_to_graph(
util.create_multidataset_size_query(
results_df["dataset_uuid"].unique()
)
matching_dataset_sizes = query_matching_dataset_sizes(
results_df["dataset_uuid"].unique()
)
matching_dataset_sizes = {
ds["dataset_uuid"]: int(ds["total_subjects"])
for ds in util.unpack_http_response_json_to_dicts(
matching_dataset_size_results
)
}

response_obj = []
dataset_cols = ["dataset_uuid", "dataset_name"]
Expand Down
12 changes: 0 additions & 12 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ def test_data():
]


# TODO update the test once https://github.com/neurobagel/api/issues/234 is resolved
@pytest.fixture
def mock_post_query_to_graph():
"""Mock post_query_to_graph function that returns toy data containing a dataset with no modalities for testing."""
Expand All @@ -65,7 +64,6 @@ def mockreturn(query, timeout=5.0):
"dataset_portal_uri",
"sub_id",
"image_modal",
"total_subjects",
]
},
"results": {
Expand All @@ -81,11 +79,6 @@ def mockreturn(query, timeout=5.0):
},
"sub_id": {"type": "literal", "value": "sub-ON95534"},
"dataset_name": {"type": "literal", "value": "QPN"},
"total_subjects": {
"datatype": "http://www.w3.org/2001/XMLSchema#integer",
"type": "literal",
"value": "200",
},
},
{
"dataset_uuid": {
Expand All @@ -102,11 +95,6 @@ def mockreturn(query, timeout=5.0):
"type": "uri",
"value": "http://purl.org/nidash/nidm#T1Weighted",
},
"total_subjects": {
"datatype": "http://www.w3.org/2001/XMLSchema#integer",
"type": "literal",
"value": "200",
},
},
]
},
Expand Down
55 changes: 55 additions & 0 deletions tests/test_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,67 @@
from app.api import crud


def test_get_subjects_by_query(monkeypatch):
"""Test that graph results for dataset size queries are correctly parsed into a dictionary."""

def mock_post_query_to_graph(query, timeout=5.0):
return {
"head": {"vars": ["dataset_uuid", "total_subjects"]},
"results": {
"bindings": [
{
"dataset_uuid": {
"type": "uri",
"value": "http://neurobagel.org/vocab/ds1234",
},
"total_subjects": {
"datatype": "http://www.w3.org/2001/XMLSchema#integer",
"type": "literal",
"value": "70",
},
},
{
"dataset_uuid": {
"type": "uri",
"value": "http://neurobagel.org/vocab/ds2345",
},
"total_subjects": {
"datatype": "http://www.w3.org/2001/XMLSchema#integer",
"type": "literal",
"value": "40",
},
},
]
},
}

monkeypatch.setattr(crud, "post_query_to_graph", mock_post_query_to_graph)
assert crud.query_matching_dataset_sizes(
[
"http://neurobagel.org/vocab/ds1234",
"http://neurobagel.org/vocab/ds2345",
]
) == {
"http://neurobagel.org/vocab/ds1234": 70,
"http://neurobagel.org/vocab/ds2345": 40,
}


def test_null_modalities(
test_app, test_data, mock_post_query_to_graph, monkeypatch
):
"""Given a response containing a dataset with no recorded modalities, returns an empty list for the imaging modalities."""

def mock_query_matching_dataset_sizes(dataset_uuids):
return {
"http://neurobagel.org/vocab/12345": 200,
}

monkeypatch.setattr(crud, "post_query_to_graph", mock_post_query_to_graph)
monkeypatch.setattr(
crud, "query_matching_dataset_sizes", mock_query_matching_dataset_sizes
)

response = test_app.get("/query/")
assert response.json()[0]["image_modals"] == [
"http://purl.org/nidash/nidm#T1Weighted"
Expand Down

0 comments on commit 8fa6be3

Please sign in to comment.