-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathPrepareMetadata.py
111 lines (89 loc) · 3.31 KB
/
PrepareMetadata.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
#!/usr/bin/env python
# coding: utf-8
"""
Consolidates the preparation of metadata for the analyses.
It requires that narps.py has already been run.
"""
import os
import argparse
import pandas
from narps import Narps
from utils import get_merged_metadata_decisions
def package_recoder(p):
others = ['nistats', 'PALM', 'randomise']
if not isinstance(p, str):
return('Other')
if p.find('SPM') == 0:
return('SPM')
elif p in others:
return('Other')
else:
return p
def prepare_metadata(narps, verbose=True):
# get original image and decision metadata
alldata_df = get_merged_metadata_decisions(
narps.metadata_file,
os.path.join(narps.dirs.dirs['orig'], 'narps_results.xlsx'))
print('found merged metadata for %d teams' %
alldata_df.teamID.unique().shape[0])
# change type of varnum to int
alldata_df['varnum'] = alldata_df['varnum'].astype('int')
# recode variables to make analysis cleaner
alldata_df['software'] = [
package_recoder(x) for x in alldata_df['analysis_SW']]
# save a copy for decision analyses with all teams
alldata_df.to_csv(os.path.join(
narps.dirs.dirs['metadata'], 'narps_metadata_all_teams.csv'))
# load smoothness data
smoothness_df = pandas.read_csv(
os.path.join(
narps.dirs.dirs['metadata'],
'smoothness_est.csv'))
print("found smoothness data for %d teams" %
len(smoothness_df.teamID.unique()))
print('missing smoothness data for:')
print(set(narps.complete_image_sets['unthresh']).difference(
set(smoothness_df.teamID.unique())))
alldata_df = pandas.merge(
alldata_df, smoothness_df,
how='left',
left_on=['teamID', 'varnum'],
right_on=['teamID', 'hyp'])
# average FWHM estimated as:
# AvgFWHM = RESELS^(1/3)
# (multplied by 2 since this value is in voxels
# rather than mm) per:
# https://www.jiscmail.ac.uk/cgi-bin/webadmin?A2=FSL;e792b5da.0803
alldata_df['fwhm'] = [i**(1/3.)*2 for i in alldata_df.resels]
# recode analysis SW for statistical analysis
alldata_df['package'] = alldata_df['analysis_SW']
for i in alldata_df.index:
if alldata_df.loc[i, 'package'].replace(
';', ',').find(',') > -1:
alldata_df.loc[i, 'package'] = 'Other'
# not enough teams to adequately model these packages
if alldata_df.loc[i, 'package'] in ['nistats']:
alldata_df.loc[i, 'package'] = 'Other'
# save data for loading into R
alldata_df.to_csv(os.path.join(
narps.dirs.dirs['metadata'], 'all_metadata.csv'))
if __name__ == "__main__":
# parse arguments
parser = argparse.ArgumentParser(
description='Generate NARPS metadata')
parser.add_argument('-b', '--basedir',
help='base directory')
args = parser.parse_args()
# set up base directory
if args.basedir is not None:
basedir = args.basedir
elif 'NARPS_BASEDIR' in os.environ:
basedir = os.environ['NARPS_BASEDIR']
print("using basedir specified in NARPS_BASEDIR")
else:
basedir = '/data'
print("using default basedir:", basedir)
overwrite = False
# setup main class
narps = Narps(basedir, overwrite=overwrite)
prepare_metadata(narps)