Skip to content

Commit

Permalink
[MAINT] Openneuro updates (#131)
Browse files Browse the repository at this point in the history
* remove dataset_file_path from query template + results model/processing

* add dataset uuid back to results model
- ephemeral, serves as unique dataset identifier in case of equivalent dataset names

* make dataset_portal_uri optional

* update test data: dataset_uuid, appropriate dataset_portal_uris

* Fixed the empty response bug

Since `dataset_portal_uri` is now optional and can be NaN, it was
removed as one of the `dataset_cols` used for intial grouping of
response and is now only passed to the `CohortQueryResponse` object
if it's not Falsy

* Modified `IS_CONTROL_TERM` to match the cli generated jsonld

* ensure missing portal uris are returned as null, not nan

---------

Co-authored-by: Alyssa Dai <[email protected]>
  • Loading branch information
rmanaem and alyssadai committed Jun 21, 2023
1 parent aeee164 commit 71f0b4c
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 22 deletions.
18 changes: 9 additions & 9 deletions app/api/crud.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@
"assessment",
"image_modal",
"dataset_name",
"dataset_uuid",
"dataset_portal_uri",
"dataset_file_path",
]


Expand Down Expand Up @@ -107,13 +107,11 @@ async def get(
results_df = pd.DataFrame(results_dicts).reindex(columns=ATTRIBUTES_ORDER)

response_obj = []
dataset_cols = ["dataset_name", "dataset_portal_uri", "dataset_file_path"]
dataset_cols = ["dataset_uuid", "dataset_name"]
if not results_df.empty:
for (
dataset_name,
dataset_portal_uri,
dataset_file_path,
), group in results_df.groupby(by=dataset_cols):
for (dataset_uuid, dataset_name), group in results_df.groupby(
by=dataset_cols
):
if util.RETURN_AGG.val:
subject_data = list(group["session_file_path"].dropna())
else:
Expand All @@ -139,9 +137,11 @@ async def get(

response_obj.append(
CohortQueryResponse(
dataset_uuid=dataset_uuid,
dataset_name=dataset_name,
dataset_portal_uri=dataset_portal_uri,
dataset_file_path=dataset_file_path,
dataset_portal_uri=group["dataset_portal_uri"].iloc[0]
if group["dataset_portal_uri"].notna().all()
else None,
num_matching_subjects=group["sub_id"].nunique(),
subject_data=subject_data,
image_modals=list(group["image_modal"].unique()),
Expand Down
7 changes: 5 additions & 2 deletions app/api/models.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
"""Data models."""

from typing import Optional

from fastapi import Query
from fastapi.exceptions import HTTPException
from pydantic import BaseModel, constr, root_validator
Expand Down Expand Up @@ -51,9 +53,10 @@ def check_exclusive_diagnosis_or_ctrl(cls, values):
class CohortQueryResponse(BaseModel):
"""Data model for query results for one matching dataset (i.e., a cohort)."""

dataset_uuid: str
# dataset_file_path: str # TODO: Revisit this field once we have datasets without imaging info/sessions.
dataset_name: str
dataset_portal_uri: str
dataset_file_path: str
dataset_portal_uri: Optional[str]
num_matching_subjects: int
subject_data: list
image_modals: list
15 changes: 8 additions & 7 deletions app/api/utility.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@

CATEGORICAL_DOMAINS = [SEX, DIAGNOSIS, IMAGE_MODAL, ASSESSMENT]

IS_CONTROL_TERM = "http://purl.obolibrary.org/obo/NCIT_C94342"
IS_CONTROL_TERM = "purl:NCIT_C94342" # TODO: Remove once https://github.com/neurobagel/bagel-cli/issues/139 is resolved.


def create_query(
Expand Down Expand Up @@ -136,19 +136,20 @@ def create_query(
)

query_string = f"""
SELECT DISTINCT ?dataset_name ?dataset_portal_uri ?dataset_file_path ?sub_id ?age ?sex
SELECT DISTINCT ?dataset_uuid ?dataset_name ?dataset_portal_uri ?sub_id ?age ?sex
?diagnosis ?subject_group ?num_sessions ?session_id ?assessment ?image_modal ?session_file_path
WHERE {{
?dataset a nb:Dataset;
?dataset_uuid a nb:Dataset;
nb:hasLabel ?dataset_name;
nb:hasPortalURI ?dataset_portal_uri;
nb:hasFilePath ?dataset_file_path;
nb:hasSamples ?subject.
?subject a nb:Subject;
nb:hasLabel ?sub_id;
nb:hasSession ?session;
nb:hasSession/nb:hasAcquisition/nb:hasContrastType ?image_modal.
?session nb:hasLabel ?session_id.
OPTIONAL {{
?dataset_uuid nb:hasPortalURI ?dataset_portal_uri.
}}
OPTIONAL {{
?session nb:hasFilePath ?session_file_path.
}}
Expand Down Expand Up @@ -184,9 +185,9 @@ def create_query(
# wrap query in an aggregating statement so data returned from graph include only attributes needed for dataset-level aggregate metadata.
if return_agg:
query_string = f"""
SELECT ?dataset_name ?dataset_portal_uri ?dataset_file_path ?sub_id ?session_file_path ?image_modal WHERE {{\n
SELECT ?dataset_uuid ?dataset_name ?dataset_portal_uri ?sub_id ?session_file_path ?image_modal WHERE {{\n
{query_string}
\n}} GROUP BY ?dataset_name ?dataset_portal_uri ?dataset_file_path ?sub_id ?session_file_path ?image_modal
\n}} GROUP BY ?dataset_uuid ?dataset_name ?dataset_portal_uri ?sub_id ?session_file_path ?image_modal
"""

return "\n".join([DEFAULT_CONTEXT, query_string])
8 changes: 4 additions & 4 deletions tests/test_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ def test_data():
"""Create toy data for two datasets for testing."""
return [
{
"dataset_uuid": "http://neurobagel.org/vocab/12345",
"dataset_name": "QPN",
"dataset_portal_uri": "https://openneuro.org/datasets/ds002725",
"dataset_file_path": "https://github.com/OpenNeuroDatasets/ds002725.git",
"dataset_portal_uri": "https://rpq-qpn.ca/en/researchers-section/databases/",
"num_matching_subjects": 5,
"subject_data": [
"/my/happy/path/sub-0051/to/session-01",
Expand All @@ -30,9 +30,9 @@ def test_data():
],
},
{
"dataset_uuid": "http://neurobagel.org/vocab/67890",
"dataset_name": "PPMI",
"dataset_portal_uri": "https://openneuro.org/datasets/ds002727",
"dataset_file_path": "https://github.com/OpenNeuroDatasets/ds002727.git",
"dataset_portal_uri": "https://www.ppmi-info.org/access-data-specimens/download-data",
"num_matching_subjects": 3,
"subject_data": [
"/my/happy/path/sub-719238/to/session-01",
Expand Down

0 comments on commit 71f0b4c

Please sign in to comment.