-
Notifications
You must be signed in to change notification settings - Fork 13
/
PhilipsDeIdentify.m
176 lines (144 loc) · 5.83 KB
/
PhilipsDeIdentify.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
function PhilipsDeIdentify(fnames)
% PhilipsDeIdentify
% Reads Philips SPAR files and removes participant information. Newly
% de-identified SPAR files are then output, with filenames appended with
% '_noID'. The original files are not overwritten.
%
% PhilipsDeIdentify will also create copies of the SDAT files associated
% with the SPAR files, with filenames appended with '_noID'. This step is
% required for Gannet functionality. (It is assumed that each SDAT/SPAR
% file pair has the same name.)
%
% If PhilipsDeIdentify has already been run, and de-identified files are
% found within the directory, the user will be asked whether these files
% should be overwritten.
%
% NOTE: The user must make sure that filenames themselves do not contain
% information that can personally identify participants. This function
% will only de-identify the content of the SPAR file.
%
% Usage:
% PhilipsDeIdentify, by itself, de-identifies all SPAR files found
% within the current directory.
%
% PhilipsDeIdentify(fnames) de-identifies SPAR files listed in the
% cell array fnames.
%
% Example:
% c = {'S01_gaba_7_2_raw_act.SPAR', 'S01_gaba_7_2_raw_ref.SPAR'};
% PhilipsDeIdentify(c);
% Author: Mark Mikkelsen (Johns Hopkins University, 2018)
%
% Version history:
% 2016-08-03: + Function created
% 2016-08-04: + All files found in current directory de-identified
% if nargin < 1
% + Copies of SDAT files created; filenames appended with
% '_noID' (req. for Gannet)
% + CheckForOutput added
% 2017-01-19: + Fix for case-sensitivity of extensions
% 2018-09-13: + Remove scan date and time
% 2018-09-25: + Minor bug fix
if nargin < 1 % De-identify all SPAR files in current directory
flist = dir('*.spar');
if isempty(flist)
flist = dir('*.SPAR');
end
flist = flist(cellfun(@isempty, strfind({flist.name}, '._'))); %#ok<*STRCLFH>
for ii = 1:length(flist)
fnames(ii) = cellstr(flist(ii).name);
end
nArgs = nargin;
[exitFunc, fnames] = CheckForOutput(nArgs, fnames);
if exitFunc
return
end
else % De-identify SPAR files user has listed in fnames
% Check if filenames include a .SPAR/.spar extension
for ii = 1:length(fnames)
ext = fnames{ii}(end-4:end);
assert(strcmpi(ext, '.spar'), ...
['The filename ' fnames{ii} ' does not include a .SPAR/.spar extension.']);
end
% Check if files can be found
for ii = 1:length(fnames)
assert(any(exist(fnames{ii}, 'file')), ...
['The file ' fnames{ii} ' cannot be found.' ...
' Check spelling of filenames (SDAT/SPAR files must include an extension in their filename).' ...
' Also check that you are in the right directory.']);
end
nArgs = nargin;
[exitFunc, fnames] = CheckForOutput(nArgs, fnames);
if exitFunc
return
end
end
% Read SPAR files and remove participant information; save new SPAR files
for ii = 1:length(fnames)
spar_fid = fopen(fnames{ii}, 'r');
spar_fid_noID = fopen([fnames{ii}(1:end-5) '_noID' fnames{ii}(end-4:end)], 'w');
tline = fgetl(spar_fid);
while ischar(tline)
if any(strfind(tline, 'examination_name'))
tline = 'examination_name : ';
elseif any(strfind(tline, 'patient_name'))
tline = 'patient_name : ';
elseif any(strfind(tline, 'patient_birth_date'))
tline = 'patient_birth_date : ';
elseif any(strfind(tline, 'scan_date'))
tline = 'scan_date : ';
end
fprintf(spar_fid_noID, '%s\n', tline);
tline = fgetl(spar_fid);
end
fclose(spar_fid);
fclose(spar_fid_noID);
end
% Create duplicate SDAT files
for ii = 1:length(fnames)
if all(isstrprop(fnames{ii}(end-3:end), 'lower'))
fnames_sdat = [fnames{ii}(1:end-5) '.sdat']; %#ok<*AGROW>
elseif all(isstrprop(fnames{ii}(end-3:end), 'upper'))
fnames_sdat = [fnames{ii}(1:end-5) '.SDAT'];
else
fnames_sdat = [fnames{ii}(1:end-5) '.sdat'];
end
assert(any(exist(fnames_sdat, 'file')), ...
['The file ' fnames_sdat ' cannot be found.' ...
' SDAT/SPAR file pairs must have the same name (and include an extension).' ...
' Also check that all files are in the same directory.']);
copyfile(fnames_sdat, [fnames_sdat(1:end-5) '_noID' fnames_sdat(end-4:end)]);
end
function [exitFunc, fnames] = CheckForOutput(nArgs, fnames)
% Check if any de-identified files have already been output and ask user if
% they want to overwrite them
exitFunc = 0;
if nArgs < 1
if any(~cellfun('isempty', strfind(fnames, '_noID'))) %#ok<*STRCL1>
resp = input('\nDe-identified files found in the directory! Proceed and overwrite? [y/n]: ','s');
if strcmpi(resp, 'y')
disp('Overwriting...');
elseif strcmpi(resp, 'n')
disp('Exiting...');
exitFunc = 1;
return
end
ind1 = strfind(fnames, '_noID');
ind2 = ~cellfun('isempty', ind1);
fnames(ind2) = [];
end
else
for ii = 1:length(fnames)
fnames_noID{ii} = [fnames{ii}(1:end-5) '_noID' fnames{ii}(end-4:end)];
end
if any(cellfun(@exist, fnames_noID))
resp = input('\nDe-identified files found in the directory! Proceed and overwrite? [y/n]: ','s');
if strcmpi(resp, 'y')
disp('Overwriting...');
elseif strcmpi(resp, 'n')
disp('Exiting...');
exitFunc = 1;
return
end
end
end