diff --git a/.gitignore b/.gitignore index 956145d..91748e3 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,6 @@ src/ibc_api.egg-info src/ibc_api/__pycache__ +src/ibc_api/data +src/ibc-api ibc_data .ipynb_checkpoints \ No newline at end of file diff --git a/README.md b/README.md index bb8be8f..8dda350 100644 --- a/README.md +++ b/README.md @@ -20,16 +20,16 @@ import ibc_api.utils as ibc ibc.authenticate() # Fetch info on all available files -# Load as a pandas dataframe and save as ibc_data/available_statistic_map.csv -db = ibc.get_info(data_type="statistic_map") +# Load as a pandas dataframe and save as ibc_data/available_{data_type}.csv +db = ibc.get_info(data_type="volume_maps") # Keep statistic maps for sub-08, for task-Discount filtered_db = ibc.filter_data(db, subject_list=["sub-08"], task_list=["Discount"]) # Download all statistic maps for sub-08, task-Discount # Saved under ibc_data/resulting_smooth_maps/sub-08/task-Discount -# Also create ibc_data/downloaded_statistic_map.csv -# This contains downloaded file paths and time of download +# Also creates ibc_data/downloaded_volume_maps.csv +# which contains downloaded file paths and time of download downloaded_db = ibc.download_data(filtered_db, organise_by='task') ``` # Note diff --git a/examples/example.py b/examples/example.py index 511ac91..87080ad 100644 --- a/examples/example.py +++ b/examples/example.py @@ -4,14 +4,14 @@ ibc.authenticate() # Fetch info on all available files -# Load as a pandas dataframe and save as ibc_data/available_statistic_map.csv -db = ibc.get_info(data_type="statistic_map") +# Load as a pandas dataframe and save as ibc_data/available_{data_type}.csv +db = ibc.get_info(data_type="volume_maps") # Keep statistic maps for sub-08, for task-Discount filtered_db = ibc.filter_data(db, subject_list=["sub-08"], task_list=["Discount"]) # Download all statistic maps for sub-08, task-Discount # Saved under ibc_data/resulting_smooth_maps/sub-08/task-Discount -# Also creates ibc_data/downloaded_statistic_map.csv +# Also creates ibc_data/downloaded_volume_maps.csv # which contains downloaded file paths and time of download downloaded_db = ibc.download_data(filtered_db, organise_by='task') \ No newline at end of file diff --git a/examples/get_data.ipynb b/examples/get_data.ipynb index cfc8993..159e192 100644 --- a/examples/get_data.ipynb +++ b/examples/get_data.ipynb @@ -1,7 +1,6 @@ { "cells": [ { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -12,7 +11,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -28,10 +26,9 @@ "name": "stderr", "output_type": "stream", "text": [ - "[siibra:INFO] Version: 0.4a47\n", + "[siibra:INFO] Version: 0.4a61\n", "[siibra:WARNING] This is a development release. Use at your own risk.\n", - "[siibra:INFO] Please file bugs and issues at https://github.com/FZJ-INM1-BDA/siibra-python.\n", - "[siibra:INFO] Clearing siibra cache at /home/himanshu/.cache/siibra.retrieval\n" + "[siibra:INFO] Please file bugs and issues at https://github.com/FZJ-INM1-BDA/siibra-python.\n" ] } ], @@ -40,7 +37,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -58,7 +54,7 @@ "output_type": "stream", "text": [ "***\n", - "To continue, please go to https://iam.ebrains.eu/auth/realms/hbp/device?user_code=SEBC-RHFZ\n", + "To continue, please go to https://iam.ebrains.eu/auth/realms/hbp/device?user_code=USBN-YMBC\n", "***\n", "ebrains token successfuly set.\n" ] @@ -69,15 +65,14 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "To see what is available for a given data type on IBC, we need fetch the file that contains that information.\n", "The following loads a CSV file with all that info as a pandas dataframe and\n", - "saves it as ``ibc_data/available_statistic_map.csv``.\n", + "saves it as ``ibc_data/available_{data_type}.csv``.\n", "\n", - "Let's do that for IBC statistic maps.\n", + "Let's do that for IBC volumetric contrast maps.\n", "\n" ] }, @@ -90,16 +85,15 @@ "name": "stderr", "output_type": "stream", "text": [ - "[siibra:INFO] 33194 objects found for dataset 07ab1665-73b0-40c5-800e-557bc319109d returned.\n" + "[siibra:INFO] 139625 objects found for dataset ad04f919-7dcc-48d9-864a-d7b62af3d49d returned.\n" ] } ], "source": [ - "db = ibc.get_info(data_type=\"statistic_map\")" + "db = ibc.get_info(data_type=\"volume_maps\")" ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -152,13 +146,13 @@ " \n", " 0\n", " fMRI-BOLD\n", - " statistic_map\n", + " volume_maps\n", " Z\n", " IBC\n", " ArchiStandard\n", " S\n", " 1\n", - " visual_sentence_comprehension, response_select...\n", + " visual_sentence_comprehension,response_selecti...\n", " trm_5873cd1c9d4c4\n", " http://www.cognitiveatlas.org/task/id/trm_5873...\n", " left hand button presses upon video instructions\n", @@ -168,65 +162,65 @@ " \n", " 1\n", " fMRI-BOLD\n", - " statistic_map\n", + " volume_maps\n", " Z\n", " IBC\n", " ArchiStandard\n", " S\n", " 1\n", - " visual_sentence_comprehension, response_select...\n", + " visual_sentence_comprehension,response_selecti...\n", " trm_5873cd1c9d4c4\n", " http://www.cognitiveatlas.org/task/id/trm_5873...\n", " left hand button presses upon video instructions\n", - " sub-01_ses-07_task-ArchiStandard_dir-ap_space-...\n", + " sub-01_ses-00_task-ArchiStandard_dir-ffx_space...\n", " sub-01\n", " \n", " \n", " 2\n", " fMRI-BOLD\n", - " statistic_map\n", + " volume_maps\n", " Z\n", " IBC\n", " ArchiStandard\n", " S\n", " 1\n", - " visual_sentence_comprehension, response_select...\n", + " visual_sentence_comprehension,response_selecti...\n", " trm_5873cd1c9d4c4\n", " http://www.cognitiveatlas.org/task/id/trm_5873...\n", - " right hand button presses upon video instructions\n", - " sub-01_ses-00_task-ArchiStandard_dir-ap_space-...\n", + " left hand button presses upon video instructions\n", + " sub-01_ses-00_task-ArchiStandard_dir-pa_space-...\n", " sub-01\n", " \n", " \n", " 3\n", " fMRI-BOLD\n", - " statistic_map\n", + " volume_maps\n", " Z\n", " IBC\n", " ArchiStandard\n", " S\n", " 1\n", - " visual_sentence_comprehension, response_select...\n", + " visual_sentence_comprehension,response_selecti...\n", " trm_5873cd1c9d4c4\n", " http://www.cognitiveatlas.org/task/id/trm_5873...\n", - " right hand button presses upon video instructions\n", + " left hand button presses upon video instructions\n", " sub-01_ses-07_task-ArchiStandard_dir-ap_space-...\n", " sub-01\n", " \n", " \n", " 4\n", " fMRI-BOLD\n", - " statistic_map\n", + " volume_maps\n", " Z\n", " IBC\n", " ArchiStandard\n", " S\n", " 1\n", - " auditory_sentence_comprehension, response_sele...\n", + " visual_sentence_comprehension,response_selecti...\n", " trm_5873cd1c9d4c4\n", " http://www.cognitiveatlas.org/task/id/trm_5873...\n", - " left hand button presses upon audio instructions\n", - " sub-01_ses-00_task-ArchiStandard_dir-ap_space-...\n", + " left hand button presses upon video instructions\n", + " sub-01_ses-07_task-ArchiStandard_dir-ffx_space...\n", " sub-01\n", " \n", " \n", @@ -246,116 +240,116 @@ " ...\n", " \n", " \n", - " 18393\n", + " 26568\n", " fMRI-BOLD\n", - " statistic_map\n", + " volume_maps\n", " Z\n", " IBC\n", - " MathLanguage\n", + " RewProc\n", " S\n", " 1\n", - " theory-of-mind\n", - " trm_55217a9f473f0\n", - " http://www.cognitiveatlas.org/task/id/trm_5521...\n", - " NaN\n", - " sub-15_ses-30_task-MathLanguage_dir-ffx_space-...\n", + " reward_valuation,reward_processing\n", + " trm_550b5c1a7f4db\n", + " http://www.cognitiveatlas.org/task/id/trm_550b...\n", + " gained vs lost 20 or 10 units of reward\n", + " sub-15_ses-39_task-RewProc_dir-unknown_space-M...\n", " sub-15\n", " \n", " \n", - " 18394\n", + " 26569\n", " fMRI-BOLD\n", - " statistic_map\n", + " volume_maps\n", " Z\n", " IBC\n", - " MathLanguage\n", + " RewProc\n", " S\n", " 1\n", - " NaN\n", - " trm_55217a9f473f0\n", - " http://www.cognitiveatlas.org/task/id/trm_5521...\n", - " NaN\n", - " sub-15_ses-30_task-MathLanguage_dir-ffx_space-...\n", + " reward_valuation,reward_processing\n", + " trm_550b5c1a7f4db\n", + " http://www.cognitiveatlas.org/task/id/trm_550b...\n", + " gained vs lost 20 or 10 units of reward\n", + " sub-15_ses-39_task-RewProc_dir-unknown_space-M...\n", " sub-15\n", " \n", " \n", - " 18395\n", + " 26570\n", " fMRI-BOLD\n", - " statistic_map\n", + " volume_maps\n", " Z\n", " IBC\n", - " MathLanguage\n", + " RewProc\n", " S\n", " 1\n", - " theory-of-mind\n", - " trm_55217a9f473f0\n", - " http://www.cognitiveatlas.org/task/id/trm_5521...\n", - " NaN\n", - " sub-15_ses-30_task-MathLanguage_dir-ffx_space-...\n", + " risk_aversion,risk_processing,loss_aversion\n", + " trm_550b5c1a7f4db\n", + " http://www.cognitiveatlas.org/task/id/trm_550b...\n", + " lost vs gained 20 or 10 units of reward\n", + " sub-15_ses-39_task-RewProc_dir-ffx_space-MNI15...\n", " sub-15\n", " \n", " \n", - " 18396\n", + " 26571\n", " fMRI-BOLD\n", - " statistic_map\n", + " volume_maps\n", " Z\n", " IBC\n", - " MathLanguage\n", + " RewProc\n", " S\n", " 1\n", - " NaN\n", - " trm_55217a9f473f0\n", - " http://www.cognitiveatlas.org/task/id/trm_5521...\n", - " NaN\n", - " sub-15_ses-30_task-MathLanguage_dir-ffx_space-...\n", + " risk_aversion,risk_processing,loss_aversion\n", + " trm_550b5c1a7f4db\n", + " http://www.cognitiveatlas.org/task/id/trm_550b...\n", + " lost vs gained 20 or 10 units of reward\n", + " sub-15_ses-39_task-RewProc_dir-unknown_space-M...\n", " sub-15\n", " \n", " \n", - " 18397\n", + " 26572\n", " fMRI-BOLD\n", - " statistic_map\n", + " volume_maps\n", " Z\n", " IBC\n", - " MathLanguage\n", + " RewProc\n", " S\n", " 1\n", - " NaN\n", - " trm_55217a9f473f0\n", - " http://www.cognitiveatlas.org/task/id/trm_5521...\n", - " NaN\n", - " sub-15_ses-30_task-MathLanguage_dir-ffx_space-...\n", + " risk_aversion,risk_processing,loss_aversion\n", + " trm_550b5c1a7f4db\n", + " http://www.cognitiveatlas.org/task/id/trm_550b...\n", + " lost vs gained 20 or 10 units of reward\n", + " sub-15_ses-39_task-RewProc_dir-unknown_space-M...\n", " sub-15\n", " \n", " \n", "\n", - "

18398 rows × 13 columns

\n", + "

26573 rows × 13 columns

\n", "" ], "text/plain": [ - " modality image_type map_type study task analysis_level \\\n", - "0 fMRI-BOLD statistic_map Z IBC ArchiStandard S \n", - "1 fMRI-BOLD statistic_map Z IBC ArchiStandard S \n", - "2 fMRI-BOLD statistic_map Z IBC ArchiStandard S \n", - "3 fMRI-BOLD statistic_map Z IBC ArchiStandard S \n", - "4 fMRI-BOLD statistic_map Z IBC ArchiStandard S \n", - "... ... ... ... ... ... ... \n", - "18393 fMRI-BOLD statistic_map Z IBC MathLanguage S \n", - "18394 fMRI-BOLD statistic_map Z IBC MathLanguage S \n", - "18395 fMRI-BOLD statistic_map Z IBC MathLanguage S \n", - "18396 fMRI-BOLD statistic_map Z IBC MathLanguage S \n", - "18397 fMRI-BOLD statistic_map Z IBC MathLanguage S \n", + " modality image_type map_type study task analysis_level \\\n", + "0 fMRI-BOLD volume_maps Z IBC ArchiStandard S \n", + "1 fMRI-BOLD volume_maps Z IBC ArchiStandard S \n", + "2 fMRI-BOLD volume_maps Z IBC ArchiStandard S \n", + "3 fMRI-BOLD volume_maps Z IBC ArchiStandard S \n", + "4 fMRI-BOLD volume_maps Z IBC ArchiStandard S \n", + "... ... ... ... ... ... ... \n", + "26568 fMRI-BOLD volume_maps Z IBC RewProc S \n", + "26569 fMRI-BOLD volume_maps Z IBC RewProc S \n", + "26570 fMRI-BOLD volume_maps Z IBC RewProc S \n", + "26571 fMRI-BOLD volume_maps Z IBC RewProc S \n", + "26572 fMRI-BOLD volume_maps Z IBC RewProc S \n", "\n", " number_of_subjects tags \\\n", - "0 1 visual_sentence_comprehension, response_select... \n", - "1 1 visual_sentence_comprehension, response_select... \n", - "2 1 visual_sentence_comprehension, response_select... \n", - "3 1 visual_sentence_comprehension, response_select... \n", - "4 1 auditory_sentence_comprehension, response_sele... \n", + "0 1 visual_sentence_comprehension,response_selecti... \n", + "1 1 visual_sentence_comprehension,response_selecti... \n", + "2 1 visual_sentence_comprehension,response_selecti... \n", + "3 1 visual_sentence_comprehension,response_selecti... \n", + "4 1 visual_sentence_comprehension,response_selecti... \n", "... ... ... \n", - "18393 1 theory-of-mind \n", - "18394 1 NaN \n", - "18395 1 theory-of-mind \n", - "18396 1 NaN \n", - "18397 1 NaN \n", + "26568 1 reward_valuation,reward_processing \n", + "26569 1 reward_valuation,reward_processing \n", + "26570 1 risk_aversion,risk_processing,loss_aversion \n", + "26571 1 risk_aversion,risk_processing,loss_aversion \n", + "26572 1 risk_aversion,risk_processing,loss_aversion \n", "\n", " cognitive_paradigm_cogatlas \\\n", "0 trm_5873cd1c9d4c4 \n", @@ -364,11 +358,11 @@ "3 trm_5873cd1c9d4c4 \n", "4 trm_5873cd1c9d4c4 \n", "... ... \n", - "18393 trm_55217a9f473f0 \n", - "18394 trm_55217a9f473f0 \n", - "18395 trm_55217a9f473f0 \n", - "18396 trm_55217a9f473f0 \n", - "18397 trm_55217a9f473f0 \n", + "26568 trm_550b5c1a7f4db \n", + "26569 trm_550b5c1a7f4db \n", + "26570 trm_550b5c1a7f4db \n", + "26571 trm_550b5c1a7f4db \n", + "26572 trm_550b5c1a7f4db \n", "\n", " cognitive_paradigm_description_url \\\n", "0 http://www.cognitiveatlas.org/task/id/trm_5873... \n", @@ -377,39 +371,39 @@ "3 http://www.cognitiveatlas.org/task/id/trm_5873... \n", "4 http://www.cognitiveatlas.org/task/id/trm_5873... \n", "... ... \n", - "18393 http://www.cognitiveatlas.org/task/id/trm_5521... \n", - "18394 http://www.cognitiveatlas.org/task/id/trm_5521... \n", - "18395 http://www.cognitiveatlas.org/task/id/trm_5521... \n", - "18396 http://www.cognitiveatlas.org/task/id/trm_5521... \n", - "18397 http://www.cognitiveatlas.org/task/id/trm_5521... \n", + "26568 http://www.cognitiveatlas.org/task/id/trm_550b... \n", + "26569 http://www.cognitiveatlas.org/task/id/trm_550b... \n", + "26570 http://www.cognitiveatlas.org/task/id/trm_550b... \n", + "26571 http://www.cognitiveatlas.org/task/id/trm_550b... \n", + "26572 http://www.cognitiveatlas.org/task/id/trm_550b... \n", "\n", - " contrast_definition \\\n", - "0 left hand button presses upon video instructions \n", - "1 left hand button presses upon video instructions \n", - "2 right hand button presses upon video instructions \n", - "3 right hand button presses upon video instructions \n", - "4 left hand button presses upon audio instructions \n", - "... ... \n", - "18393 NaN \n", - "18394 NaN \n", - "18395 NaN \n", - "18396 NaN \n", - "18397 NaN \n", + " contrast_definition \\\n", + "0 left hand button presses upon video instructions \n", + "1 left hand button presses upon video instructions \n", + "2 left hand button presses upon video instructions \n", + "3 left hand button presses upon video instructions \n", + "4 left hand button presses upon video instructions \n", + "... ... \n", + "26568 gained vs lost 20 or 10 units of reward \n", + "26569 gained vs lost 20 or 10 units of reward \n", + "26570 lost vs gained 20 or 10 units of reward \n", + "26571 lost vs gained 20 or 10 units of reward \n", + "26572 lost vs gained 20 or 10 units of reward \n", "\n", " path subject \n", "0 sub-01_ses-00_task-ArchiStandard_dir-ap_space-... sub-01 \n", - "1 sub-01_ses-07_task-ArchiStandard_dir-ap_space-... sub-01 \n", - "2 sub-01_ses-00_task-ArchiStandard_dir-ap_space-... sub-01 \n", + "1 sub-01_ses-00_task-ArchiStandard_dir-ffx_space... sub-01 \n", + "2 sub-01_ses-00_task-ArchiStandard_dir-pa_space-... sub-01 \n", "3 sub-01_ses-07_task-ArchiStandard_dir-ap_space-... sub-01 \n", - "4 sub-01_ses-00_task-ArchiStandard_dir-ap_space-... sub-01 \n", + "4 sub-01_ses-07_task-ArchiStandard_dir-ffx_space... sub-01 \n", "... ... ... \n", - "18393 sub-15_ses-30_task-MathLanguage_dir-ffx_space-... sub-15 \n", - "18394 sub-15_ses-30_task-MathLanguage_dir-ffx_space-... sub-15 \n", - "18395 sub-15_ses-30_task-MathLanguage_dir-ffx_space-... sub-15 \n", - "18396 sub-15_ses-30_task-MathLanguage_dir-ffx_space-... sub-15 \n", - "18397 sub-15_ses-30_task-MathLanguage_dir-ffx_space-... sub-15 \n", + "26568 sub-15_ses-39_task-RewProc_dir-unknown_space-M... sub-15 \n", + "26569 sub-15_ses-39_task-RewProc_dir-unknown_space-M... sub-15 \n", + "26570 sub-15_ses-39_task-RewProc_dir-ffx_space-MNI15... sub-15 \n", + "26571 sub-15_ses-39_task-RewProc_dir-unknown_space-M... sub-15 \n", + "26572 sub-15_ses-39_task-RewProc_dir-unknown_space-M... sub-15 \n", "\n", - "[18398 rows x 13 columns]" + "[26573 rows x 13 columns]" ] }, "execution_count": 4, @@ -422,11 +416,10 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ - "There are over 18000 statistic maps available for download.\n", + "There are over 26000 statistic maps available for download.\n", "But since it's a pandas dataframe, we can filter it to get just what we want.\n", "Let's see how many statistic maps are available for each task.\n", "\n" @@ -440,50 +433,63 @@ { "data": { "text/plain": [ - "Audio 2793\n", + "Audio 2926\n", + "MathLanguage 2880\n", "ArchiStandard 1794\n", "RSVPLanguage 1729\n", - "Audi 1488\n", - "MTTWE 988\n", "MTTNS 912\n", + "MTTWE 912\n", + "Audi 900\n", + "SpatialNavigation 864\n", "ArchiSocial 702\n", "Self 660\n", "Visu 576\n", - "ArchiSpatial 546\n", - "ArchiEmotional 546\n", + "BiologicalMotion1 550\n", + "BiologicalMotion2 550\n", + "VSTMC 550\n", "HcpWm 546\n", + "ArchiEmotional 546\n", + "ArchiSpatial 546\n", + "RewProc 459\n", + "FaceBody 450\n", "HcpMotor 429\n", "MVEB 396\n", - "MathLanguage 390\n", "DotPatterns 363\n", + "NARPS 360\n", "WardAndAllport 330\n", + "Scene 330\n", + "TwoByTwo 330\n", + "Attention 330\n", + "EmoReco 330\n", "MCSE 324\n", "Moto 324\n", - "MVIS 216\n", + "SelectiveStopSignal 264\n", + "StopNogo 231\n", "Lec1 216\n", + "MVIS 216\n", + "EmoMem 198\n", "VSTM 180\n", + "FingerTapping 165\n", + "HcpLanguage 156\n", "HcpGambling 156\n", "HcpEmotion 156\n", - "HcpLanguage 156\n", "HcpSocial 117\n", "HcpRelational 117\n", "PreferenceFaces 111\n", - "Enumeration 108\n", - "Lec2 108\n", - "PreferenceFood 108\n", - "PainMovie 108\n", - "EmotionalPain 108\n", "TheoryOfMind 108\n", + "EmotionalPain 108\n", + "PreferenceFood 108\n", + "Lec2 108\n", "PreferenceHouses 108\n", + "Enumeration 108\n", + "PainMovie 108\n", "PreferencePaintings 105\n", + "Stroop 99\n", + "Catell 99\n", + "StopSignal 99\n", "ColumbiaCards 96\n", "Bang 72\n", - "Attention 30\n", - "TwoByTwo 30\n", - "SelectiveStopSignal 24\n", - "StopSignal 9\n", - "Stroop 9\n", - "Discount 6\n", + "Discount 66\n", "Name: task, dtype: int64" ] }, @@ -497,7 +503,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -506,12 +511,10 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ - "For this example, let's just download the 6 maps from Discount task. You can filter the maps for a task like this.\n", - "\n" + "For this example, let's just download the maps from Discount task, only for sub-08. You can filter the maps for tasks and subjects like this.\n" ] }, { @@ -557,15 +560,15 @@ " \n", " \n", " \n", - " 3183\n", + " 12500\n", " fMRI-BOLD\n", - " statistic_map\n", + " volume_maps\n", " Z\n", " IBC\n", " Discount\n", " S\n", " 1\n", - " response_conflict, selective_control\n", + " response_conflict,selective_control\n", " trm_566748c929afc\n", " http://www.cognitiveatlas.org/task/id/trm_5667...\n", " effect of delay on reward\n", @@ -573,31 +576,31 @@ " sub-08\n", " \n", " \n", - " 3184\n", + " 12501\n", " fMRI-BOLD\n", - " statistic_map\n", + " volume_maps\n", " Z\n", " IBC\n", " Discount\n", " S\n", " 1\n", - " incentive salience, selective_control\n", + " response_conflict,selective_control\n", " trm_566748c929afc\n", " http://www.cognitiveatlas.org/task/id/trm_5667...\n", - " effect of reward gain\n", - " sub-08_ses-27_task-Discount_dir-ap_space-MNI15...\n", + " effect of delay on reward\n", + " sub-08_ses-27_task-Discount_dir-ffx_space-MNI1...\n", " sub-08\n", " \n", " \n", - " 9809\n", + " 12502\n", " fMRI-BOLD\n", - " statistic_map\n", + " volume_maps\n", " Z\n", " IBC\n", " Discount\n", " S\n", " 1\n", - " response_conflict, selective_control\n", + " response_conflict,selective_control\n", " trm_566748c929afc\n", " http://www.cognitiveatlas.org/task/id/trm_5667...\n", " effect of delay on reward\n", @@ -605,51 +608,51 @@ " sub-08\n", " \n", " \n", - " 9810\n", + " 12503\n", " fMRI-BOLD\n", - " statistic_map\n", + " volume_maps\n", " Z\n", " IBC\n", " Discount\n", " S\n", " 1\n", - " incentive salience, selective_control\n", + " incentive_salience,selective_control\n", " trm_566748c929afc\n", " http://www.cognitiveatlas.org/task/id/trm_5667...\n", " effect of reward gain\n", - " sub-08_ses-27_task-Discount_dir-pa_space-MNI15...\n", + " sub-08_ses-27_task-Discount_dir-ap_space-MNI15...\n", " sub-08\n", " \n", " \n", - " 15823\n", + " 12504\n", " fMRI-BOLD\n", - " statistic_map\n", + " volume_maps\n", " Z\n", " IBC\n", " Discount\n", " S\n", " 1\n", - " response_conflict, selective_control\n", + " incentive_salience,selective_control\n", " trm_566748c929afc\n", " http://www.cognitiveatlas.org/task/id/trm_5667...\n", - " effect of delay on reward\n", + " effect of reward gain\n", " sub-08_ses-27_task-Discount_dir-ffx_space-MNI1...\n", " sub-08\n", " \n", " \n", - " 15824\n", + " 12505\n", " fMRI-BOLD\n", - " statistic_map\n", + " volume_maps\n", " Z\n", " IBC\n", " Discount\n", " S\n", " 1\n", - " incentive salience, selective_control\n", + " incentive_salience,selective_control\n", " trm_566748c929afc\n", " http://www.cognitiveatlas.org/task/id/trm_5667...\n", " effect of reward gain\n", - " sub-08_ses-27_task-Discount_dir-ffx_space-MNI1...\n", + " sub-08_ses-27_task-Discount_dir-pa_space-MNI15...\n", " sub-08\n", " \n", " \n", @@ -657,53 +660,53 @@ "" ], "text/plain": [ - " modality image_type map_type study task analysis_level \\\n", - "3183 fMRI-BOLD statistic_map Z IBC Discount S \n", - "3184 fMRI-BOLD statistic_map Z IBC Discount S \n", - "9809 fMRI-BOLD statistic_map Z IBC Discount S \n", - "9810 fMRI-BOLD statistic_map Z IBC Discount S \n", - "15823 fMRI-BOLD statistic_map Z IBC Discount S \n", - "15824 fMRI-BOLD statistic_map Z IBC Discount S \n", + " modality image_type map_type study task analysis_level \\\n", + "12500 fMRI-BOLD volume_maps Z IBC Discount S \n", + "12501 fMRI-BOLD volume_maps Z IBC Discount S \n", + "12502 fMRI-BOLD volume_maps Z IBC Discount S \n", + "12503 fMRI-BOLD volume_maps Z IBC Discount S \n", + "12504 fMRI-BOLD volume_maps Z IBC Discount S \n", + "12505 fMRI-BOLD volume_maps Z IBC Discount S \n", "\n", - " number_of_subjects tags \\\n", - "3183 1 response_conflict, selective_control \n", - "3184 1 incentive salience, selective_control \n", - "9809 1 response_conflict, selective_control \n", - "9810 1 incentive salience, selective_control \n", - "15823 1 response_conflict, selective_control \n", - "15824 1 incentive salience, selective_control \n", + " number_of_subjects tags \\\n", + "12500 1 response_conflict,selective_control \n", + "12501 1 response_conflict,selective_control \n", + "12502 1 response_conflict,selective_control \n", + "12503 1 incentive_salience,selective_control \n", + "12504 1 incentive_salience,selective_control \n", + "12505 1 incentive_salience,selective_control \n", "\n", " cognitive_paradigm_cogatlas \\\n", - "3183 trm_566748c929afc \n", - "3184 trm_566748c929afc \n", - "9809 trm_566748c929afc \n", - "9810 trm_566748c929afc \n", - "15823 trm_566748c929afc \n", - "15824 trm_566748c929afc \n", + "12500 trm_566748c929afc \n", + "12501 trm_566748c929afc \n", + "12502 trm_566748c929afc \n", + "12503 trm_566748c929afc \n", + "12504 trm_566748c929afc \n", + "12505 trm_566748c929afc \n", "\n", " cognitive_paradigm_description_url \\\n", - "3183 http://www.cognitiveatlas.org/task/id/trm_5667... \n", - "3184 http://www.cognitiveatlas.org/task/id/trm_5667... \n", - "9809 http://www.cognitiveatlas.org/task/id/trm_5667... \n", - "9810 http://www.cognitiveatlas.org/task/id/trm_5667... \n", - "15823 http://www.cognitiveatlas.org/task/id/trm_5667... \n", - "15824 http://www.cognitiveatlas.org/task/id/trm_5667... \n", + "12500 http://www.cognitiveatlas.org/task/id/trm_5667... \n", + "12501 http://www.cognitiveatlas.org/task/id/trm_5667... \n", + "12502 http://www.cognitiveatlas.org/task/id/trm_5667... \n", + "12503 http://www.cognitiveatlas.org/task/id/trm_5667... \n", + "12504 http://www.cognitiveatlas.org/task/id/trm_5667... \n", + "12505 http://www.cognitiveatlas.org/task/id/trm_5667... \n", "\n", " contrast_definition \\\n", - "3183 effect of delay on reward \n", - "3184 effect of reward gain \n", - "9809 effect of delay on reward \n", - "9810 effect of reward gain \n", - "15823 effect of delay on reward \n", - "15824 effect of reward gain \n", + "12500 effect of delay on reward \n", + "12501 effect of delay on reward \n", + "12502 effect of delay on reward \n", + "12503 effect of reward gain \n", + "12504 effect of reward gain \n", + "12505 effect of reward gain \n", "\n", " path subject \n", - "3183 sub-08_ses-27_task-Discount_dir-ap_space-MNI15... sub-08 \n", - "3184 sub-08_ses-27_task-Discount_dir-ap_space-MNI15... sub-08 \n", - "9809 sub-08_ses-27_task-Discount_dir-pa_space-MNI15... sub-08 \n", - "9810 sub-08_ses-27_task-Discount_dir-pa_space-MNI15... sub-08 \n", - "15823 sub-08_ses-27_task-Discount_dir-ffx_space-MNI1... sub-08 \n", - "15824 sub-08_ses-27_task-Discount_dir-ffx_space-MNI1... sub-08 " + "12500 sub-08_ses-27_task-Discount_dir-ap_space-MNI15... sub-08 \n", + "12501 sub-08_ses-27_task-Discount_dir-ffx_space-MNI1... sub-08 \n", + "12502 sub-08_ses-27_task-Discount_dir-pa_space-MNI15... sub-08 \n", + "12503 sub-08_ses-27_task-Discount_dir-ap_space-MNI15... sub-08 \n", + "12504 sub-08_ses-27_task-Discount_dir-ffx_space-MNI1... sub-08 \n", + "12505 sub-08_ses-27_task-Discount_dir-pa_space-MNI15... sub-08 " ] }, "execution_count": 6, @@ -712,12 +715,11 @@ } ], "source": [ - "filtered_db = ibc.filter_data(db, task_list=[\"Discount\"])\n", + "filtered_db = ibc.filter_data(db, task_list=[\"Discount\"], subject_list=[\"sub-08\"])\n", "filtered_db" ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -725,7 +727,7 @@ "\n", "The following will save the requested maps under\n", "``ibc_data/resulting_smooth_maps/sub-08/task-Discount`` \n", - "(or whatever subject you chose). And will also create a local CSV file ``ibc_data/downloaded_statistic_map.csv`` to track the downloaded files. This will contain local file paths and the time they were downloaded at, and is updated everytime you download new files.\n" + "(or whatever subject you chose). And will also create a local CSV file ``ibc_data/downloaded_volume_maps.csv`` to track the downloaded files. This will contain local file paths and the time they were downloaded at, and is updated everytime you download new files.\n" ] }, { @@ -737,22 +739,15 @@ "name": "stderr", "output_type": "stream", "text": [ - "[siibra:INFO] 33194 objects found for dataset 07ab1665-73b0-40c5-800e-557bc319109d returned.\n", - "100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:05<00:00, 1.15it/s]" + "[siibra:INFO] 139625 objects found for dataset ad04f919-7dcc-48d9-864a-d7b62af3d49d returned.\n", + "100%|███████████████████████████████████████████████████████| 6/6 [00:01<00:00, 5.65it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Downloaded requested files from IBC statistic_map dataset. See ibc_data/downloaded_statistic_map.csv for details.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n" + "Downloaded requested files from IBC volume_maps dataset. See ibc_data/downloaded_volume_maps.csv for details.\n" ] }, { @@ -784,32 +779,32 @@ " \n", " 0\n", " ibc_data/resulting_smooth_maps/sub-08/task-Dis...\n", - " 2023-05-12 11:36:49.203279\n", + " 2023-07-25 18:23:01.270473\n", " \n", " \n", " 1\n", " ibc_data/resulting_smooth_maps/sub-08/task-Dis...\n", - " 2023-05-12 11:36:50.066253\n", + " 2023-07-25 18:23:01.439383\n", " \n", " \n", " 2\n", " ibc_data/resulting_smooth_maps/sub-08/task-Dis...\n", - " 2023-05-12 11:36:50.994438\n", + " 2023-07-25 18:23:01.611974\n", " \n", " \n", " 3\n", " ibc_data/resulting_smooth_maps/sub-08/task-Dis...\n", - " 2023-05-12 11:36:51.869514\n", + " 2023-07-25 18:23:01.801682\n", " \n", " \n", " 4\n", " ibc_data/resulting_smooth_maps/sub-08/task-Dis...\n", - " 2023-05-12 11:36:52.702647\n", + " 2023-07-25 18:23:01.982841\n", " \n", " \n", " 5\n", " ibc_data/resulting_smooth_maps/sub-08/task-Dis...\n", - " 2023-05-12 11:36:53.589603\n", + " 2023-07-25 18:23:02.156350\n", " \n", " \n", "\n", @@ -825,12 +820,12 @@ "5 ibc_data/resulting_smooth_maps/sub-08/task-Dis... \n", "\n", " downloaded_on \n", - "0 2023-05-12 11:36:49.203279 \n", - "1 2023-05-12 11:36:50.066253 \n", - "2 2023-05-12 11:36:50.994438 \n", - "3 2023-05-12 11:36:51.869514 \n", - "4 2023-05-12 11:36:52.702647 \n", - "5 2023-05-12 11:36:53.589603 " + "0 2023-07-25 18:23:01.270473 \n", + "1 2023-07-25 18:23:01.439383 \n", + "2 2023-07-25 18:23:01.611974 \n", + "3 2023-07-25 18:23:01.801682 \n", + "4 2023-07-25 18:23:01.982841 \n", + "5 2023-07-25 18:23:02.156350 " ] }, "execution_count": 7, @@ -844,7 +839,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -856,10 +850,18 @@ "execution_count": 8, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/himanshu/Desktop/ibc_analysis/ibcpy/lib/python3.8/site-packages/nilearn/plotting/img_plotting.py:300: FutureWarning: Default resolution of the MNI template will change from 2mm to 1mm in version 0.10.0\n", + " anat_img = load_mni152_template()\n" + ] + }, { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 8, @@ -868,9 +870,9 @@ }, { "data": { - "image/png": "", + "image/png": "", "text/plain": [ - "
" + "
" ] }, "metadata": {}, @@ -883,6 +885,13 @@ "map_path = downloaded_db[\"local_path\"][0]\n", "plot_stat_map(map_path)" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/src/ibc_api/metadata.py b/src/ibc_api/metadata.py new file mode 100644 index 0000000..71e140f --- /dev/null +++ b/src/ibc_api/metadata.py @@ -0,0 +1,163 @@ +"""Functions to fetch metadata about the available IBC datasets.""" + +import json +import os + +REMOTE_ROOT = ( + "https://api.github.com/repos/individual-brain-charting/docs/contents" +) + +LOCAL_ROOT = os.path.join(os.path.dirname(__file__), "data") +os.makedirs(LOCAL_ROOT, exist_ok=True) + +SUBJECTS = [ + f"sub-{sub:02d}" for sub in [1, 2, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15] +] + + +def _load_json(data_file): + """Read a given json file + + Parameters + ---------- + data_file : str + path to json file to read + + Returns + ------- + dict + json file loaded as a dictionary + """ + with open(data_file, "r") as f: + data = json.load(f) + + return data + + +def select_dataset(data_type, metadata=None): + """Select metadata of the requested dataset + + Parameters + ---------- + data_type : str + what dataset to select, could be one of 'volume_maps', 'surface_maps', 'preprocessed', 'raw' + metadata : dict, optional + dictionary object containing version info, dataset ids etc, by default None + + Returns + ------- + dict + the metadata of latest version of the requested dataset + + Raises + ------ + KeyError + if the requested dataset is not found in the metadata + """ + if metadata is None: + metadata = fetch_metadata() + try: + dataset = metadata[data_type] + except KeyError: + raise KeyError( + f"Dataset type {data_type} not found in IBC collection." + ) + latest_version = _find_latest_version(dataset) + dataset = dataset[latest_version] + return dataset + + +def _find_latest_version(dataset): + """Find the latest version of the dataset + + Parameters + ---------- + dataset : list of dicts + value of one of the datasets in the metadata, probably with multiple versions in a list + + Returns + ------- + int + index of the latest version of the dataset + """ + latest_version_index = 0 + latest_version = 0 + for i, data in enumerate(dataset): + if data["version"] > latest_version: + latest_version = data["version"] + latest_version_index = i + + return latest_version_index + + +def fetch_remote_file( + file, + remote_root=REMOTE_ROOT, + local_root=LOCAL_ROOT, +): + """Fetch a file from the IBC docs repo + + Parameters + ---------- + file : str + name of the file to fetch + remote_root : str, optional + root link to wherever the file is stores, by default REMOTE_ROOT + local_root : str, optional + location to write the fetched file, by default LOCAL_ROOT + + Returns + ------- + str + full path of the fetched file + """ + # Link to the json file on the IBC docs repo + remote_file = f"{remote_root}/{file}" + # save the file locally + save_as = os.path.join(local_root, file) + # use curl with github api to download the file + os.system( + f"curl -s -S -L -H 'Accept: application/vnd.github.v4.raw' -H 'X-GitHub-Api-Version: 2022-11-28' {remote_file} -o '{save_as}'" + ) + + # Return the data + return save_as + + +def fetch_metadata(file="datasets.json"): + """Fetch the metadata file from the IBC docs repo + + Parameters + ---------- + file : str, optional + name of the file, by default "datasets.json" + + Returns + ------- + dict + json file loaded as a dictionary + """ + # Fetch the datasets.json file + data_file = fetch_remote_file(file) + + # Load the data as a dictionary + return _load_json(data_file) + + +def fetch_dataset_db(data_type, metadata=None): + """Fetch csv containing file-by-file information about the requested dataset. + + Parameters + ---------- + data_type : str + what dataset to select, could be one of 'volume_maps', 'surface_maps', 'preprocessed', 'raw' + metadata : dict, optional + dictionary object containing version info, dataset ids etc, by default None + Returns + ------- + str + full path of the fetched file csv file + """ + dataset = select_dataset(data_type, metadata) + + return fetch_remote_file(dataset["db_file"]) diff --git a/src/ibc_api/utils.py b/src/ibc_api/utils.py index 7b9df0e..5f9fe77 100644 --- a/src/ibc_api/utils.py +++ b/src/ibc_api/utils.py @@ -1,5 +1,5 @@ """API to fetch IBC data from EBRAINS via Human Data Gateway using siibra. -#TODO add other data sources: neurovault, openneuro""" +""" import siibra from siibra.retrieval.repositories import EbrainsHdgConnector @@ -8,26 +8,17 @@ import nibabel from siibra.retrieval.cache import CACHE import pandas as pd -from io import BytesIO from datetime import datetime +from . import metadata as md # clear cache -CACHE.clear() +CACHE.run_maintenance() # dataset ids on ebrains -DATASET_ID = { - "statistic_map": "07ab1665-73b0-40c5-800e-557bc319109d", - "preprocessed": "3ca4f5a1-647b-4829-8107-588a699763c1", - "raw": "8ddf749f-fb1d-4d16-acc3-fbde91b90e24", -} - -# path to csv file with information about all statistic maps on EBRAINS -STAT_MAPS_DB = "resulting_smooth_maps/ibc_neurovault.csv" +METADATA = md.fetch_metadata() # all subjects in IBC dataset -SUBJECTS = [ - "sub-%02d" % i for i in [1, 2, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15] -] +SUBJECTS = md.SUBJECTS def authenticate(): @@ -37,24 +28,23 @@ def authenticate(): siibra.fetch_ebrains_token() -def _connect_ebrains(data_type="statistic_map"): +def _connect_ebrains(data_type="volume_maps", metadata=METADATA): """Connect to given IBC dataset on EBRAINS via Human Data Gateway. Parameters ---------- data_type : str, optional - dataset to fetch, by default "statistic_map", can be one of - ["statistic_map", "raw", "preprocessed"] + dataset to fetch, by default "statistic_map", can be one of + ["volume_maps", "surface_maps", "preprocessed", "raw] Returns ------- EbrainsHdgConnector connector to the dataset """ - try: - dataset_id = DATASET_ID[data_type] - except KeyError: - return ValueError(f"Unknown data type: {data_type}") + # get the dataset id + dataset = md.select_dataset(data_type, metadata) + dataset_id = dataset["id"] return EbrainsHdgConnector(dataset_id) @@ -84,14 +74,14 @@ def _create_root_dir(dir_path=None): return dir_path -def get_info(data_type="statistic_map", save_to=None): +def get_info(data_type="volume_maps", save_to=None, metadata=METADATA): """Fetch a csv file describing each file in a given IBC dataset on EBRAINS. Parameters ---------- data_type : str, optional - dataset to fetch, by default "statistic_map", TODO one of - ["statistic_map", "raw", "preprocessed"] + dataset to fetch, by default "volume_maps", one of + ["volume_maps", "surface_maps", "raw", "preprocessed"] save_as : str or None, optional filename to save this csv as, by default None, if None saves as "ibc_data/available_{data_type}.csv" @@ -103,18 +93,12 @@ def get_info(data_type="statistic_map", save_to=None): """ # connect to ebrains dataset connector = _connect_ebrains(data_type) - if data_type == "statistic_map": - # file with all information about the dataset - db_file = STAT_MAPS_DB - # get the file - db = connector.get( - db_file, - decode_func=lambda b: pd.read_csv(BytesIO(b), delimiter=","), - ) - db.drop(columns=["Unnamed: 0"], inplace=True) - # TODO add other data types: raw, preprocessed, etc. - else: - return ValueError(f"Unknown data type: {data_type}") + # file with all information about the dataset + db_file = md.fetch_dataset_db(data_type, metadata) + # load the file as dataframe + db = pd.read_csv(db_file) + db.drop(columns=["Unnamed: 0"], inplace=True, errors="ignore") + db["image_type"] = [data_type for _ in range(len(db))] # save the database file save_to = _create_root_dir(save_to) save_as = os.path.join(save_to, f"available_{data_type}.csv") @@ -150,7 +134,7 @@ def filter_data(db, subject_list=SUBJECTS, task_list=False): return filtered_db -def get_file_paths(db): +def get_file_paths(db, metadata=METADATA): """Get the file paths for each file in a (filtered) dataframe. Parameters @@ -173,12 +157,8 @@ def get_file_paths(db): _file_names = db["path"].tolist() # update file names to be relative to the dataset file_names = [] + root_dir = md.select_dataset(data_type, metadata)["root"] for file in _file_names: - if data_type == "statistic_map": - root_dir = "resulting_smooth_maps" - # TODO add other data types: raw, preprocessed, etc. - else: - return ValueError(f"Unknown data type: {data_type}") # get the subject and session sub_ses = file.split("_")[:2] # put the file path together @@ -286,22 +266,23 @@ def _download_file(src_file, dst_file, connector): if not os.path.exists(dst_file): # load the file from ebrains src_data = connector.get(src_file) - if type(src_data) is nibabel.nifti1.Nifti1Image: - src_data.to_filename(dst_file) - # TODO add other data like json, etc. + if type(src_data) == nibabel.nifti1.Nifti1Image: + nibabel.save(src_data, dst_file) + elif type(src_data) == nibabel.gifti.gifti.GiftiImage: + nibabel.save(src_data, dst_file, mode="compat") else: return ValueError( f"Don't know how to save file {src_file}" f" of type {type(src_data)}" ) - return dst_file, datetime.now() + return dst_file else: print(f"File {dst_file} already exists, skipping download.") - return [], [] + return [] -def download_data(db, save_to=None, organise_by="session"): +def download_data(db, save_to=None, organise_by="session", metadata=METADATA): """Download the files in a (filtered) dataframe. Parameters @@ -340,7 +321,8 @@ def download_data(db, save_to=None, organise_by="session"): ) # file path to save the data dst_file = os.path.join(dst_file_head, dst_file_base) - file_name, file_time = _download_file(src_file, dst_file, connector) + file_name = _download_file(src_file, dst_file, connector) + file_time = datetime.now() local_db = _update_local_db(local_db_file, file_name, file_time) # keep cache < 2GiB, delete oldest files first CACHE.run_maintenance()