-
Notifications
You must be signed in to change notification settings - Fork 35
/
Copy pathadvanced_classification.m
602 lines (504 loc) · 23 KB
/
advanced_classification.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
% ADVANCED CLASSIFICATION
%
% This tutorial covers more complex classification scenarios such as
% higher-dimensional data (time-frequency), searchlight analysis across
% multiple dimensions and precomputing of kernels.
%
% Contents:
% (1) Create a time-frequency dataset
% (2) Classification of 4D time-frequency data
% (3) Searchlight across time and frequency
% (4) Appending dimensions for faster searchlight analysis
% (5) Multiple feature dimensions
% (6) Time generalization, frequency generalization and electrode
% generalization
% (7) Precompute kernel matrix
% (8) Transfer classification (cross decoding) for ERP data
% (9) Transfer classification (cross decoding) for time-frequency data
%
% Note: If you are new to working with MVPA-Light, make sure that you
% complete the introductory tutorials first:
% - getting_started_with_classification
% - getting_started_with_regression
% They are found in the same folder as this tutorial.
close all
clear
% Load data
[dat,clabel] = load_example_data('epoched2');
%% (1) Create a time-frequency dataset
% Most tutorial examples use 3D data which is [samples x features x time
% points]. MVPA-Light can deal with data of any dimensionality. As an
% example for a more complex analysis, we will consider time frequency data
% here.
%
% To obtain time-frequency data, we will take the ERP example data and
% calculate calculate the spectrogram, yielding a [samples x channels x
% frequencies x time points] dataset of spectral power values.
% The data will be saved in the struct freq.
sz = size(dat.trial);
% Set parameters for spectrogram
win= chebwin(20);
Fs = 1/mean(diff(dat.time));
noverlap = 18;
nfft = 64;
% get number of frequencies and time points
[S,F,T] = spectrogram(squeeze(dat.trial(1,1,:)), win, noverlap, nfft, Fs);
% Correct time T
T = T + dat.time(1);
freq = dat;
freq.trial = zeros([sz(1:2), numel(F), numel(T)]);
freq.dimord = 'rpt_chan_freq_time';
freq.time = T;
freq.freq = F;
for nn=1:sz(1)
for cc=1:sz(2)
S = abs(spectrogram(squeeze(dat.trial(nn,cc,:)), win, noverlap, nfft, Fs));
freq.trial(nn,cc,:,:) = S;
end
end
% Baseline correction
pre = find(freq.time < 0);
BL = mean(mean(freq.trial(:,:,:,pre), 4),1);
% calculate relative baseline (the resultant signal can be interpreted in
% terms of percent signal change)
sz = size(freq.trial);
BLmat = repmat(BL, [sz(1) 1 1 sz(4)]);
freq.trial = (freq.trial - BLmat) ./ BLmat;
%% (2) Classification of 4D time-frequency data
% Based on the time-frequency data created in the previous section, we will
% use mv_classify to perform a classification for each time-frequency point
% separately.
cfg = [];
cfg.classifier = 'lda';
cfg.metric = 'auc';
% mv_classify needs to be told which dimensions encode the samples and
% features. Samples are in dimension 1 and features in
% dimension 2. The last two dimensions will be automatically devised as search
% dimensions.
% Actually, this is the default setting but we will set the dimensions
% explicitly to make clear how the mechanism works.
cfg.sample_dimension = 1;
cfg.feature_dimension = 2;
% optional: provide the names of the dimensions for nice output
cfg.dimension_names = {'samples','channels','frequencies','time points'};
% we can now perform the classification
[~, result] = mv_classify(cfg, freq.trial, clabel);
% call mv_plot_result: results is the first argument, followed by the
% arguments defining the x-axis (time) and y-axis (frequency)
mv_plot_result(result, freq.time, freq.freq)
%%%%%% EXERCISE 1 %%%%%%
% Let us imagine that the order of the data dimensions is different.
% Run the code
% X = permute(freq.trial, [4 2 1 3]);
% Now the dimensions are [time points x channels x samples x frequencies].
% Can you change the parameters sample_dimension, feature_dimension, and
% the dimension_names such that mv_classify produces the same output as
% before (i.e. classification for each time-frequency point)?
%%%%%%%%%%%%%%%%%%%%%%%%
%% (3) Searchlight across time and frequency
% In the previous section we calculated classification performance for each
% time-frequency point. However, each time point and each frequency were
% considered separately. Here, we define a 'larger' searchlight by also
% taking into account the immediately neighbouring time points and
% frequencies. To this end, we need to create neighbourhood matrices for
% each of the two search dimensions. Let's get the size of each dimension
% first
[nsamples, nchannels, nfrequencies, ntimes] = size(freq.trial);
% 1) create binary neighbourhood matrix for frequencies (includes a given
% frequency and the preceding and following frequency)
O = ones(nfrequencies);
O = O - triu(O,2) - tril(O,-2);
freq_neighbours = O;
% Let's look at the first 5 rows and first 10 columns. We can see that e.g.
% for the 2nd frequency (row 2), the 1st, 2nd and 3rd frequencies are
% considered as features. Taking into account neighbouring frequencies
% enlarges the feature space, giving the classifier potentially more useful
% information.
freq_neighbours(1:5,1:10)
% 2) create neighbourhood matrix for time points (include a given
% time point and the preceding and following time point)
O = ones(ntimes);
O = O - triu(O,2) - tril(O,-2);
time_neighbours = O;
cfg = [];
cfg.sample_dimension = 1;
cfg.feature_dimension = 2;
cfg.dimension_names = {'samples','channels','frequencies','time points'};
% Store both neighbourhood matrices together in a cell array
cfg.neighbours = {freq_neighbours, time_neighbours};
cfg.classifier = 'naive_bayes'; % we use Naive Bayes this time
% this might take a while...
rng(21)
[~, result] = mv_classify(cfg, freq.trial, clabel);
% Let's plot the result
mv_plot_result(result, freq.time, freq.freq)
% The resultant time-frequency map looks smoother. This is because of taking
% into account neighbouring times/frequencies.
%%%%%% EXERCISE 2 %%%%%%
% We want information not only for each time-frequency point, but also for
% each electrode separately. Can you modify the cfg struct such that we get
% classification performance for electrodes x frequencies x time points?
%%%%%%%%%%%%%%%%%%%%%%%%
%% (4) Appending dimensions for faster searchlight analysis
% Searchlight analysis can get quite sluggish. This is because mv_classify
% is looping over all searchlight dimensions and performing a separate
% analysis each time.
% An alternative way is to append all searchlight dimensions to the
% training data and pass all of them to the train function of the
% classifier at once.
% This requires bespoke code in the train/test function of the classifier.
% At the moment, this is only supported by the Naive Bayes classifier.
% To use this option, set cfg.append = 1.
rng(21)
cfg = [];
cfg.classifier = 'naive_bayes';
cfg.dimension_names = {'samples','channels','frequencies', 'time points'};
cfg.neighbours = {freq_neighbours, time_neighbours};
cfg.append = 1;
[perf, result] = mv_classify(cfg, freq.trial, clabel);
% This was much faster, but the result is the same as before
mv_plot_result(result, freq.time, freq.freq)
%% (5) Multiple feature dimensions
% With multi-dimensional data, we can still opt to search only across one
% of the dimensions. For instance, we can perform a classification across
% time. To achieve this, both the channels and the frequencies have to
% act as features at the same time. This is easily done by setting the
% feature_dimension to include both channels (dimension 2) and frequencies
% (dimension 3).
cfg = [];
cfg.dimension_names = {'samples','channels','frequencies', 'time points'};
cfg.feature_dimension = [2 3];
% Since cfg.flatten_features = 1 per default, both feature dimension will be
% flattened into a single feature vector of length channels x frequencies.
% The result is a time x time map of classification accuracies.
[~, result] = mv_classify(cfg, freq.trial, clabel);
% The resultant classification performance is not great though
mv_plot_result(result, freq.time)
%%%%%% EXERCISE 3 %%%%%%
% Compare the classification performance to classification across time of
% the original 3D ERP data (using the dat struct). What do you notice?
%%%%%%%%%%%%%%%%%%%%%%%%
%% (6) Time generalization, frequency generalization and electrode generalization
% Building on the example in the previous section, we can still perform
% generalization if we only have one search dimension. As before, we simply
% designate channels and frequencies as features. Additionally, we designate
% the time points (dimension 4) as generalization dimension.
cfg = [];
cfg.dimension_names = {'samples', 'channels', 'frequencies', 'time points'};
cfg.sample_dimension = 1;
cfg.feature_dimension = [2, 3]; % channels and frequencies serves as features
cfg.generalization_dimension = 4; % time serves for generalization
[~, result] = mv_classify(cfg, freq.trial, clabel);
% Time x time plot
mv_plot_result(result, freq.time, freq.time)
% In the literature, generalization has mostly be performed for time
% points. However, there is nothing stopping you from performing
% generalization across any of the other dimensions. Whether it makes sense
% will depend on your data and your research question.
%
% Let us start by performing frequency generalization. The result will be a
% frequency x frequency map of classification results. To achieve this, we
% only need to swap time and frequency dimensions.
cfg.feature_dimension = [2, 4]; % now time is also a feature dimension
cfg.generalization_dimension = 3; % now frequency is the generalization dimension
[~, result] = mv_classify(cfg, freq.trial, clabel);
% The result is interesting. Training at low frequencies <10 Hz gives us
% decent testing performance even at higher frequencies.
mv_plot_result(result, freq.freq, freq.freq)
%%%%%% EXERCISE 4 %%%%%%
% Perform an electrode x electrode generalization on the time-frequency
% data.
% For comparison, also perform an electrode x electrode generalization on
% the original ERP data (dat struct).
%%%%%%%%%%%%%%%%%%%%%%%%
%% (7) Precompute kernel matrix
% Kernel methods (e.g. SVM) operate on kernel matrices which are calculated
% from the samples in the train functions. The computation of the kernel
% matrix takes some time. It can be more efficient to precompute the kernel
% matrix i.e. compute it on the full dataset first and then pass it as
% data. mv_classify supports kernel matrices as input data.
% To precompute the kernel, we can use the compute_kernel_matrix function.
% We need to set the kernel hyperparameters first.
cfg_kernel = [];
cfg_kernel.kernel = 'rbf';
cfg_kernel.gamma = .1;
cfg_kernel.regularize_kernel = 0.001;
X_kernel = compute_kernel_matrix(cfg_kernel, dat.trial);
% To understand what happened, let us compare the sizes of the original
% data with the size of X_kernel
size(dat.trial)
size(X_kernel)
% We see that the data is [samples x channels x time points] whereas our
% kernel matrix is [samples x samples x time points]. We can now pass on
% the kernel matrix to mv_classify. Just make sure to set the hyperparameter
% kernel='precomputed'.
cfg = [];
cfg.classifier = 'svm';
cfg.hyperparameter = [];
cfg.hyperparameter.kernel = 'precomputed';
tic;
perf = mv_classify_across_time(cfg, X_kernel, clabel);
time1 = toc;
fprintf('Computation time with precomputed kernels: %2.2fs\n', time1)
% Let us compare how the performance changes when we use the same kernel
% without precomputation
cfg = [];
cfg.classifier = 'svm';
cfg.hyperparameter = [];
cfg.hyperparameter.kernel = 'rbf';
cfg.hyperparameter.gamma = .1;
cfg.hyperparameter.regularize_kernel = 0.001;
tic;
perf = mv_classify_across_time(cfg, dat.trial, clabel);
time2 = toc;
fprintf('Computation time without precomputed kernels: %2.2fs\n', time2)
% Comparing computation times we see that precomputing kernels indeed
% speeds up things (for a fair comparison the time it takes to precompute
% the kernel should be taken into account as well, but if you do it it's
% still faster)
%%%%%% EXERCISE 5 %%%%%%
% Try precomputing a RBF kernel for the time-frequency data (freq struct).
% Compare computation time with / without kernel.
%%%%%%%%%%%%%%%%%%%%%%%%
%% (8) Transfer classification (cross decoding) for ERP data
% In transfer classification, one dataset is used for training, another one
% for testing. Technically, this is the same as in cross-validation, where
% one fold is defined for training and a separate fold for testing.
% However, in transfer classification we assume that the two datasets come
% from potentially different distributions (e.g. two different
% participants, two different phases in an experiment etc).
% Transfer classification is supported by mv_classify,
% mv_classify_across_time and mv_classify_timextime.
% For mv_classify_timextime, cross decoding has already been covered in
% example 4 in getting_started_with_classification, so we will focus on
% mv_classify_across_time and mv_classify here.
% Here, we will train on the dataset dat,clabel and test on the following
% second dataset:
[dat2, clabel2] = load_example_data('epoched3');
% We can see that both datasets have the same number of features and time
% points. This is essential for mv_classify_across_time, the datasets may
% only differ in the number of trials.
size(dat.trial)
size(dat2.trial)
% To perform cross decoding, we simply pass the second dataset as extra
% parameters to the function. A classifier is trained at a given
% time point in dataset 1 and tested at exactly the same time point in
% dataset 2.
cfg = [];
cfg.metric = 'auc';
[~, result] = mv_classify_across_time(cfg, dat.trial, clabel, dat2.trial, clabel2);
mv_plot_result(result, dat.time)
title('train on dataset 1, test on dataset 2')
% By swapping the input arguments we can train on dataset 2 and test on
% dataset 1
[perf, result] = mv_classify_across_time(cfg, dat2.trial, clabel2, dat.trial, clabel);
mv_plot_result(result, dat.time)
title('train on dataset 2, test on dataset 1')
%%%%%% EXERCISE 6 %%%%%%
% Achieve the same result using mv_classify instead of
% mv_classify_across_time. Double check that the results are identical.
%%%%%%%%%%%%%%%%%%%%%%%%
%% (9) Transfer classification (cross decoding) for time-frequency data
% Here we apply transfer classification with two time-frequency datasets
% using mv_classify. Let's first perform time-frequency analysis for our
% second dataset using the same code as in example 1.
sz = size(dat2.trial);
win= chebwin(20);
Fs = 1/mean(diff(dat2.time));
noverlap = 18;
nfft = 64;
[S,F,T] = spectrogram(squeeze(dat2.trial(1,1,:)), win, noverlap, nfft, Fs);
T = T + dat2.time(1);
freq2 = dat2;
freq2.trial = zeros([sz(1:2), numel(F), numel(T)]);
freq2.dimord = 'rpt_chan_freq_time';
freq2.time = T;
freq2.freq = F;
for nn=1:sz(1)
for cc=1:sz(2)
S = abs(spectrogram(squeeze(dat2.trial(nn,cc,:)), win, noverlap, nfft, Fs));
freq2.trial(nn,cc,:,:) = S;
end
end
pre = find(freq2.time < 0);
BL = mean(mean(freq2.trial(:,:,:,pre), 4),1);
sz = size(freq2.trial);
BLmat = repmat(BL, [sz(1) 1 1 sz(4)]);
freq2.trial = (freq2.trial - BLmat) ./ BLmat;
% Let's do the cross decoding now, using freq for training and freq2 for
% testing. The classifier is trained for each time-frequency point separately.
% To do this, we simply pass freq2 and the corresponding class labels as
% additional arguments to mv_classify
cfg = [];
cfg.dimension_names = {'samples' 'channels' 'frequencies' 'time points'};
[~, result] = mv_classify(cfg, freq.trial, clabel, freq2.trial, clabel2);
mv_plot_result(result, freq.time, freq.freq)
title('train on freq, test on freq2')
% What happens if the dimensions between the two datasets do not
% match? Let's try out by removing some frequencies from freq2, storing the
% reduced data in freq3
freq3 = freq2;
freq3.trial = freq3.trial(:,:,1:29,:);
freq3.freq = freq3.freq(1:29);
% Now train on freq and test on freq3: this will lead to an ERROR so
% let's wrap it into a try-except statement
try
[~, result] = mv_classify(cfg, freq.trial, clabel, freq3.trial, clabel2);
catch err
fprintf('[ERROR] Call to mv_classify failed:\n%s\n', err.message)
end
% This does not work because MVPA-Light does not know how to match up the
% frequency points if they don't have the same number of elements.
% A possible way to avoid this problem is to define frequency as a generalization
% dimension: a classifier is trained/tested for every combination of
% train/test frequency, and it does not matter any more whether they match.
cfg.generalization_dimension = 3;
[perf, result] = mv_classify(cfg, freq.trial, clabel, freq3.trial, clabel2);
% The dimensions of the result are [time points x train frequencies x test
% frequencies]. The generalization dimension is always moved to the end,
% this is why time points come first. mv_plot_result does not plot 3D data,
% so we leave it at just looking at the dimensions here.
size(perf)
%%%%%% EXERCISE 7 %%%%%%
% Now perform a time x time generalization using freq as train data and
% freq2 as test data. Jointly use channels and frequencies as features.
% Hint: You need to set the feature dimension and generalization
% dimension.
%%%%%%%%%%%%%%%%%%%%%%%%
%% The End
% Congrats, you finished the tutorial! You are now a MVPA-Light wizard!
%% SOLUTIONS TO THE EXERCISES
%% SOLUTION TO EXERCISE 1
% Run the code from the question
X = permute(freq.trial, [4 2 1 3]);
% Since the data is now [time points x channels x samples x frequencies]
% the samples are dimension 3 whereas the features are still in dimension 2
cfg = [];
cfg.classifier = 'lda';
cfg.metric = 'auc';
cfg.sample_dimension = 3;
cfg.feature_dimension = 2;
cfg.dimension_names = {'time points', 'channels', 'samples', 'frequencies'};
[~, result] = mv_classify(cfg, X, clabel);
% dimensions are now flipped because time points comes before frequencies
% in the data
mv_plot_result(result)
%% SOLUTION TO EXERCISE 2
% Let's first recreate the cfg struct in example 3
cfg = [];
cfg.sample_dimension = 1;
cfg.feature_dimension = 2;
cfg.dimension_names = {'samples','channels','frequencies', 'time points'};
cfg.neighbours = {freq_neighbours, time_neighbours};
% So far elecrodes have been used as features. To move the searchlight
% across the electrodes, too, we need to set the feature dimension to []
cfg.feature_dimension = [];
% We also need to add a neighbourhood matrix for electrodes. For now, we
% only want to consider each electrode separately, therefore we add an
% identity matrix. If you want to group neighbouring electrodes, refer to
% the searchlight examples in getting_started_with_classification.
% The order of the neighbourhood matrices needs to be the same as the order
% of the dimensions. Since electrodes come before frequencies and time
% points, we need to add the identity matrix as first element.
cfg.neighbours = [eye(size(freq.trial,2)) cfg.neighbours];
% Let's reduce the number of folds/repetitions so that the analysis runs
% faster
cfg.k = 2;
cfg.repeat = 1;
[perf, result] = mv_classify(cfg, freq.trial, clabel);
% 3D images cannot be currently plotted using mv_plot_result, so let's just
% confirm that the size is correct
size(perf)
%% SOLUTION TO EXERCISE 3
% We can calculate the performance on the original ERP data by simply using
% the dat struct.
cfg = [];
[perf, result] = mv_classify(cfg, dat.trial, clabel);
% If we plot the result, we see that the performance is much better than
% for the flattened time-frequency data, although the latter has more
% features. One possible explanation is that the time-frequency data
% contains a lot of frequencies with little information about the classes.
% Additionally, the phase information (which is dicarded when calculating
% spectral power) could be relevant to classification, too.
mv_plot_result(result, dat.time)
%% SOLUTION TO EXERCISE 4
% For electrode x electrode generalization, we just need to change
% feature_dimension and generalization_dimension again
cfg = [];
cfg.dimension_names = {'samples', 'electrodes', 'frequencies', 'time points'};
cfg.feature_dimension = [3, 4];
cfg.generalization_dimension = 2;
[~, result] = mv_classify(cfg, freq.trial, clabel);
% Electrode x electrode plot
mv_plot_result(result)
set(gca,'XTick',1:numel(freq.label), 'XTickLabel', freq.label)
set(gca,'YTick',1:numel(freq.label), 'YTickLabel', freq.label)
% Using the original dat struct
cfg = [];
cfg.feature_dimension = 3;
cfg.generalization_dimension = 2;
[~, result] = mv_classify(cfg, dat.trial, clabel);
mv_plot_result(result)
set(gca,'XTick',1:numel(freq.label), 'XTickLabel', freq.label)
set(gca,'YTick',1:numel(freq.label), 'YTickLabel', freq.label)
% Overall accuracy is again higher for the original ERP data
%% SOLUTION TO EXERCISE 5
% The analysis is very similar. However, we cannot use
% mv_classify_across_time any more because the data is now 4-dimensional.
% We have to use mv_classify instead and set the sample and feature
% dimensions.
% precompute kernel
cfg_kernel = [];
cfg_kernel.kernel = 'rbf';
cfg_kernel.gamma = .1;
cfg_kernel.regularize_kernel = 0.001;
cfg = [];
cfg.classifier = 'svm';
cfg.hyperparameter = [];
cfg.hyperparameter.kernel = 'precomputed';
cfg.sample_dimension = [1 2]; % dimensions 1 and 2 are [samples x samples]
cfg.feature_dimension = []; % with precomputed kernels we have no feature dimension, only the kernel matrix
tic;
% we want to take into account both precomputation and classification for
% timing
X_kernel = compute_kernel_matrix(cfg_kernel, freq.trial);
perf = mv_classify(cfg, X_kernel, clabel);
time1 = toc;
fprintf('Computation time with precomputed kernels: %2.2fs\n', time1)
% without precomputation
cfg = [];
cfg.classifier = 'svm';
cfg.hyperparameter = [];
cfg.hyperparameter.kernel = 'rbf';
cfg.hyperparameter.gamma = .1;
cfg.hyperparameter.regularize_kernel = 0.001;
tic;
perf = mv_classify(cfg, X_kernel, clabel);
time2 = toc;
fprintf('Computation time without precomputed kernels: %2.2fs\n', time2)
% Again, precomputing kernels makes the computations faster
%% SOLUTION TO EXERCISE 6
% First reproduce the result using mv_classify_across_time
cfg = [];
cfg.metric = 'auc';
perf = mv_classify_across_time(cfg, dat2.trial, clabel2, dat.trial, clabel);
% We can achieve the same with mv_classify
[perf2, result] = mv_classify(cfg, dat2.trial, clabel2, dat.trial, clabel);
mv_plot_result(result, dat.time);
title('train on dataset 2, test on dataset 1 using mv_classify')
% difference between both results is zero
norm(perf-perf2)
%% SOLUTION TO EXERCISE 7
% We need to define both dimensions 2 (channels) and 3 (frequencies) as
% feature dimensions. Both dimensions will automatically be flattened into
% a single dimension since cfg.flatten_features = 1 by default.
% Dimension 4 (time points) needs to be defined as a generalization
% dimension.
cfg = [];
cfg.feature_dimension = [2 3];
cfg.generalization_dimension = 4;
cfg.dimension_names = {'samples' 'channels' 'frequencies' 'time points'};
[perf, result] = mv_classify(cfg, freq.trial, clabel, freq2.trial, clabel2);
% The result is a time x time plot, but the result is not great (maximum
% performance <70%).
mv_plot_result(result, freq.time, freq2.time)