diff --git a/app/api/crud.py b/app/api/crud.py index 1cdc6e1..5ea2f82 100644 --- a/app/api/crud.py +++ b/app/api/crud.py @@ -24,8 +24,8 @@ "assessment", "image_modal", "dataset_name", + "dataset_uuid", "dataset_portal_uri", - "dataset_file_path", ] @@ -107,13 +107,11 @@ async def get( results_df = pd.DataFrame(results_dicts).reindex(columns=ATTRIBUTES_ORDER) response_obj = [] - dataset_cols = ["dataset_name", "dataset_portal_uri", "dataset_file_path"] + dataset_cols = ["dataset_uuid", "dataset_name"] if not results_df.empty: - for ( - dataset_name, - dataset_portal_uri, - dataset_file_path, - ), group in results_df.groupby(by=dataset_cols): + for (dataset_uuid, dataset_name), group in results_df.groupby( + by=dataset_cols + ): if util.RETURN_AGG.val: subject_data = list(group["session_file_path"].dropna()) else: @@ -139,9 +137,11 @@ async def get( response_obj.append( CohortQueryResponse( + dataset_uuid=dataset_uuid, dataset_name=dataset_name, - dataset_portal_uri=dataset_portal_uri, - dataset_file_path=dataset_file_path, + dataset_portal_uri=group["dataset_portal_uri"].iloc[0] + if group["dataset_portal_uri"].notna().all() + else None, num_matching_subjects=group["sub_id"].nunique(), subject_data=subject_data, image_modals=list(group["image_modal"].unique()), diff --git a/app/api/models.py b/app/api/models.py index e45ecc4..f1a31ff 100644 --- a/app/api/models.py +++ b/app/api/models.py @@ -1,5 +1,7 @@ """Data models.""" +from typing import Optional + from fastapi import Query from fastapi.exceptions import HTTPException from pydantic import BaseModel, constr, root_validator @@ -51,9 +53,10 @@ def check_exclusive_diagnosis_or_ctrl(cls, values): class CohortQueryResponse(BaseModel): """Data model for query results for one matching dataset (i.e., a cohort).""" + dataset_uuid: str + # dataset_file_path: str # TODO: Revisit this field once we have datasets without imaging info/sessions. dataset_name: str - dataset_portal_uri: str - dataset_file_path: str + dataset_portal_uri: Optional[str] num_matching_subjects: int subject_data: list image_modals: list diff --git a/app/api/utility.py b/app/api/utility.py index 73146dc..3fa06ee 100644 --- a/app/api/utility.py +++ b/app/api/utility.py @@ -53,7 +53,7 @@ CATEGORICAL_DOMAINS = [SEX, DIAGNOSIS, IMAGE_MODAL, ASSESSMENT] -IS_CONTROL_TERM = "http://purl.obolibrary.org/obo/NCIT_C94342" +IS_CONTROL_TERM = "purl:NCIT_C94342" # TODO: Remove once https://github.com/neurobagel/bagel-cli/issues/139 is resolved. def create_query( @@ -136,19 +136,20 @@ def create_query( ) query_string = f""" - SELECT DISTINCT ?dataset_name ?dataset_portal_uri ?dataset_file_path ?sub_id ?age ?sex + SELECT DISTINCT ?dataset_uuid ?dataset_name ?dataset_portal_uri ?sub_id ?age ?sex ?diagnosis ?subject_group ?num_sessions ?session_id ?assessment ?image_modal ?session_file_path WHERE {{ - ?dataset a nb:Dataset; + ?dataset_uuid a nb:Dataset; nb:hasLabel ?dataset_name; - nb:hasPortalURI ?dataset_portal_uri; - nb:hasFilePath ?dataset_file_path; nb:hasSamples ?subject. ?subject a nb:Subject; nb:hasLabel ?sub_id; nb:hasSession ?session; nb:hasSession/nb:hasAcquisition/nb:hasContrastType ?image_modal. ?session nb:hasLabel ?session_id. + OPTIONAL {{ + ?dataset_uuid nb:hasPortalURI ?dataset_portal_uri. + }} OPTIONAL {{ ?session nb:hasFilePath ?session_file_path. }} @@ -184,9 +185,9 @@ def create_query( # wrap query in an aggregating statement so data returned from graph include only attributes needed for dataset-level aggregate metadata. if return_agg: query_string = f""" - SELECT ?dataset_name ?dataset_portal_uri ?dataset_file_path ?sub_id ?session_file_path ?image_modal WHERE {{\n + SELECT ?dataset_uuid ?dataset_name ?dataset_portal_uri ?sub_id ?session_file_path ?image_modal WHERE {{\n {query_string} - \n}} GROUP BY ?dataset_name ?dataset_portal_uri ?dataset_file_path ?sub_id ?session_file_path ?image_modal + \n}} GROUP BY ?dataset_uuid ?dataset_name ?dataset_portal_uri ?sub_id ?session_file_path ?image_modal """ return "\n".join([DEFAULT_CONTEXT, query_string]) diff --git a/tests/test_query.py b/tests/test_query.py index d3263c8..da444e4 100644 --- a/tests/test_query.py +++ b/tests/test_query.py @@ -13,9 +13,9 @@ def test_data(): """Create toy data for two datasets for testing.""" return [ { + "dataset_uuid": "http://neurobagel.org/vocab/12345", "dataset_name": "QPN", - "dataset_portal_uri": "https://openneuro.org/datasets/ds002725", - "dataset_file_path": "https://github.com/OpenNeuroDatasets/ds002725.git", + "dataset_portal_uri": "https://rpq-qpn.ca/en/researchers-section/databases/", "num_matching_subjects": 5, "subject_data": [ "/my/happy/path/sub-0051/to/session-01", @@ -30,9 +30,9 @@ def test_data(): ], }, { + "dataset_uuid": "http://neurobagel.org/vocab/67890", "dataset_name": "PPMI", - "dataset_portal_uri": "https://openneuro.org/datasets/ds002727", - "dataset_file_path": "https://github.com/OpenNeuroDatasets/ds002727.git", + "dataset_portal_uri": "https://www.ppmi-info.org/access-data-specimens/download-data", "num_matching_subjects": 3, "subject_data": [ "/my/happy/path/sub-719238/to/session-01",