svm plots; pooled project codes; time course of decoding; interpolation

AllenInstitute · farznaj · Jun 17, 2020 · Jun 17, 2020 · Jun 19, 2020 · Jun 19, 2020
commit a63288c2100a1033574f148e4d5b98c751326812
diff --git a/visual_behavior/decoding_population/svm_images_init_pbs.py b/visual_behavior/decoding_population/svm_images_init_pbs.py
@@ -20,11 +20,11 @@
 
 #%% Define vars for svm_images analysis
 
-project_codes = 'VisualBehaviorMultiscope' #'VisualBehavior' # has to only include 1 project # project_codes : ['VisualBehaviorMultiscope', 'VisualBehaviorTask1B', 'VisualBehavior', 'VisualBehaviorMultiscope4areasx2d']
+project_codes = 'VisualBehavior' #'VisualBehavior' # has to only include 1 project # project_codes : ['VisualBehaviorMultiscope', 'VisualBehaviorTask1B', 'VisualBehavior', 'VisualBehaviorMultiscope4areasx2d']
 
 # Note: the variable names 'to_decode' and 'trial_type' are confusing. The names really only make sense when we are decoding images (ie when trial_type is images/changes/omissions), in which case they mean we are decoding to_decode image (eg current image) from trial_type (eg images); otherwise, to_decode is useless (we just default it to 'current') and trial_type indicates what was decoded from what (eg hits_vs_misses)
-to_decode = 'current' # 'current' (default): decode current image.    'previous': decode previous image.    'next': decode next image.     # remember for omissions, you cant do "current", bc there is no current image, it has to be previous or next!
-trial_type = 'changes_vs_nochanges' #'omissions' #'baseline_vs_nobaseline' #'hits_vs_misses' #'changes_vs_nochanges' # 'images_omissions', 'images', 'changes', 'omissions' # what trials to use for SVM analysis # the population activity of these trials at time time_win will be used to decode the image identity of flashes that occurred at their time 0 (if to_decode='current') or 750ms before (if to_decode='previous').   # if 'changes_vs_nochanges', we will decode image changes from no changes; in this case set to_decode to 'current', but it doesnt really matter. # 'baseline_vs_nobaseline' # decode activity at each frame vs. baseline (ie the frame before omission unless use_spont_omitFrMinus1 = 1 (see below))
+to_decode = 'next' # 'current' (default): decode current image.    'previous': decode previous image.    'next': decode next image.     # remember for omissions, you cant do "current", bc there is no current image, it has to be previous or next!
+trial_type = 'omissions' #'omissions' #'baseline_vs_nobaseline' #'hits_vs_misses' #'changes_vs_nochanges' # 'images_omissions', 'images', 'changes', 'omissions' # what trials to use for SVM analysis # the population activity of these trials at time time_win will be used to decode the image identity of flashes that occurred at their time 0 (if to_decode='current') or 750ms before (if to_decode='previous').   # if 'changes_vs_nochanges', we will decode image changes from no changes; in this case set to_decode to 'current', but it doesnt really matter. # 'baseline_vs_nobaseline' # decode activity at each frame vs. baseline (ie the frame before omission unless use_spont_omitFrMinus1 = 1 (see below))
 #### NOTE: svm codes will result in decoding 9 classes (8 images + omissions) when to_decode='previous' and trial_type='images'. (it wont happen when to_decode='current' because above we only include images for trial_type='images'; it also wont happen when trial_type='omissions' or 'changes', because changes and omissions are not preceded by omissions (although rarely we do see double omissions))
 # if you want to also decode omissions (in addition to the 8 images) when to_decode='current', you should set trial_type='images_omissions'; HOWEVER, I dont think it's a good idea to mix image and omission aligned traces because omission aligned traces may have prediction/error signal, so it wont be easy to interpret the results bc we wont know if the decoding reflects image-evoked or image-prediciton/error related signals.
 

diff --git a/visual_behavior/decoding_population/svm_images_main_pre_pbs.py b/visual_behavior/decoding_population/svm_images_main_pre_pbs.py
@@ -658,8 +658,10 @@ def svm_images_main_pre_pbs(isess, project_codes, use_events, to_decode, trial_t
 
     #%% Run the SVM function
 
+    c = data_list['cre_line'].iloc[0][:3]
+    s = data_list['session_type'].iloc[0]
     # numSamples = 2
-    print('\n\n======================== Analyzing session %d, %d/%d ========================\n' %(session_id, isess, len(list_all_sessions_valid)))
+    print(f'\n\n======================== Analyzing {c}, {s}\nsession %d, %d/%d ========================\n' %(session_id, isess, len(list_all_sessions_valid)))
 
     # Use below if you set session_data and session_trials above: for VIP and SST
     svm_images_main_pbs(session_id, data_list, experiment_ids_valid, df_data, session_trials, trial_type, dir_svm, kfold, frames_svm, numSamples, saveResults, cols_basic, cols_svm, project_codes, to_decode, svm_blocks, engagement_pupil_running, use_events, same_num_neuron_all_planes, use_balanced_trials, use_spont_omitFrMinus1, use_matched_cells)

diff --git a/..._plots_compare_ophys_experience_levels.py → ...images_plots_compare_experience_levels.py b/..._plots_compare_ophys_experience_levels.py → ...images_plots_compare_experience_levels.py
@@ -10,25 +10,73 @@
 
 """
 
-
-##############################################################################################################################
+import matplotlib.gridspec as gridspec
+import seaborn
+import visual_behavior.visualization.utils as utils
+import scipy.stats as st
+import statsmodels.api as sm
+from statsmodels.formula.api import ols
+from statsmodels.stats.multicomp import (pairwise_tukeyhsd, MultiComparison)
+
+sigval = .05 # value for ttest significance
+fmt_all = ['o', 'x']
+fmt_now = fmt_all[0]
+if baseline_subtract: # subtract the baseline (CA average during baseline, ie before time 0) from the evoked CA (classification accuracy)
+    ylabs = '% Class accuracy rel. baseline' #'Amplitude'
+else:
+    ylabs = '% Classification accuracy' #'Amplitude'    
+cres = ['Slc17a7', 'Sst', 'Vip']
+
+
+
 ##############################################################################################################################        
 ##############################################################################################################################
-#%% Plot response amplitude for **experience levels**: errorbars comparing SVM decoding accuracy across experience levels; also do anova/tukey
+#%% Plot response amplitude for **experience levels**: 
+# errorbars comparing SVM decoding accuracy (averaged across all experiemnts) across experience levels; 
+# also do anova/tukey
 ##############################################################################################################################
 ##############################################################################################################################
+
+
+
+##############################################################################################################################        
+#%% Compute p values and ttest stats between actual and shuffled 
 ##############################################################################################################################        
-
-#%% Make errorbars for response amplitude (averaged across all experiments for each experience level)
 
-import visual_behavior.visualization.utils as utils
-import statsmodels.api as sm
-from statsmodels.formula.api import ols
-from statsmodels.stats.multicomp import (pairwise_tukeyhsd, MultiComparison)
+p_act_shfl = np.full((len(cres), len(exp_level_all)), np.nan)
+icre = -1
+for crenow in cres: # crenow = cres[0]
+    icre = icre+1
+    iexpl = -1    
+    for expl in exp_level_all: # expl = exp_level_all[0]
+        iexpl = iexpl+1
 
-cres = ['Slc17a7', 'Sst', 'Vip']
+        a = svm_df[np.logical_and(svm_df['cre_allPlanes']==crenow , svm_df['experience_levels']==expl)]
+
+        a_amp = np.vstack(a['peak_amp_allPlanes_allExp'].values)[:,1] # n_exp
+        b_amp = np.vstack(a['peak_amp_allPlanes_allExp'].values)[:,2] # n_exp
+        print(a_amp.shape, b_amp.shape)
+        print(sum(~np.isnan(a_amp)), sum(~np.isnan(b_amp)))
+
+        _, p = st.ttest_ind(a_amp, b_amp, nan_policy='omit') #, axis=1, equal_var=equal_var)
+        p_act_shfl[icre, iexpl] = p
+
+p_act_shfl_sigval = p_act_shfl+0 
+p_act_shfl_sigval[p_act_shfl <= sigval] = 1
+p_act_shfl_sigval[p_act_shfl > sigval] = np.nan
+
+print(f'\n---------------------\n')
+print(f'Actual vs. shuffled data significance, for each experience level')
+print(p_act_shfl_sigval)
+print(f'\n---------------------\n')    
+
+
+
+
+##############################################################################################################################        
+#%% Do stats; for each cre line, are the 3 experience levels significantly different? do anova; then tukey
+##############################################################################################################################        
 
-#%% Do stats; for each cre line, are the 3 experience levels significantly different? anova; then tukey
 if np.isnan(svm_blocks) or svm_blocks==-101: # svm was run on the whole session (no block by block analysis)    
 
     tukey_all = []
@@ -101,7 +149,9 @@
 
 
 
+##############################################################################################################################                
 #%% Plot error bars for the SVM decoding accuracy across the 3 experience levels, for each cre line
+##############################################################################################################################        
 
 colors = utils.get_experience_level_colors() # will always be in order of Familiar, Novel 1, Novel >1
 
@@ -197,6 +247,8 @@
                 ax.set_title(f'data\n{areasn[iax]}', y=1.1)
             if ax==ax2:
                 ax.set_title(f'data-shuffle\n{areasn[iax]}', y=1.1)
+            if ax==ax3: # print number of experiments per experience level
+                ax.set_title(f"n experiments\n{df['n_experiments'].values.astype(int)}", y=1.1)
 
 
         ####### add legend
@@ -304,8 +356,7 @@
             if len(project_codes_all)==1:
                 fgn = f'{fgn}_frames{frames_svm[0]}to{frames_svm[-1]}'                        
             fgn = fgn + '_ClassAccur'
-#             if project_codes_all == ['VisualBehavior']:
-#             fgn = f'{fgn}_{project_codes_all[0]}'
+            fgn = f'{fgn}_allProjects'
 
             if len(project_codes_all)==1:
                 pcn = project_codes_all[0] + '_'
@@ -317,7 +368,7 @@
 
             fgn = f'{fgn}_{pcn}'            
 
-            nam = f'{crenow[:3]}{whatSess}_{bln}_aveSessPooled{fgn}_{now}'
+            nam = f'{crenow[:3]}{whatSess}_{bln}_aveExpPooled{fgn}_{now}'
 
             fign = os.path.join(dir0, 'svm', dir_now, nam+fmt)
             print(fign)
@@ -327,4 +378,12 @@
 
 
 
+
+#%% Print these p values
+
+print(f'\n---------------------\n')
+print(f'Actual vs. shuffled data significance, for each experience level')
+print(p_act_shfl_sigval)
+print(f'\n---------------------\n')    
+
 
diff --git a/visual_behavior/decoding_population/svm_images_plots_compare_ophys_stages.py b/visual_behavior/decoding_population/svm_images_plots_compare_ophys_stages.py
@@ -349,7 +349,7 @@ def run_anova_tukey():
             a_amp_pooled = np.array([a_amp[inds_pooled[idepth]].flatten() for idepth in range(num_depth)])
             b_amp_pooled = np.array([b_amp[inds_pooled[idepth]].flatten() for idepth in range(num_depth)])
 
-#         print(a_amp.shape, b_amp_pooled.shape)
+#         print(a_amp_pooled.shape, b_amp_pooled.shape)
 
         _, p = st.ttest_ind(a_amp, b_amp, nan_policy='omit', axis=1, equal_var=equal_var)
         p_act_shfl[icre, istage, :] = p

diff --git a/visual_behavior/decoding_population/svm_images_plots_compare_traces_experience_levels.py b/visual_behavior/decoding_population/svm_images_plots_compare_traces_experience_levels.py
@@ -0,0 +1,191 @@
+"""
+Gets called in svm_images_plots_setVars.py
+
+Here, we use svm_df from all projects to plot the decoding traces (timeseries), averaged across project codes, for each experience level
+
+Vars needed here are set in svm_images_plots_setVars_sumMice3_svmdf.py
+
+Created on Sat Nov 13 09:26:05 2021
+@author: farzaneh
+
+"""
+
+# scientifica
+'''
+frame_dur
+0.032
+
+time_trace
+array([-0.48 , -0.448, -0.416, -0.384, -0.352, -0.32 , -0.288, -0.256,
+       -0.224, -0.192, -0.16 , -0.128, -0.096, -0.064, -0.032,  0.   ,
+        0.032,  0.064,  0.096,  0.128,  0.16 ,  0.192,  0.224,  0.256,
+        0.288,  0.32 ,  0.352,  0.384,  0.416,  0.448,  0.48 ,  0.512,
+        0.544,  0.576,  0.608,  0.64 ,  0.672,  0.704])
+'''
+
+# mesoscope
+'''
+frame_dur
+0.093
+
+time_trace
+array([-0.465, -0.372, -0.279, -0.186, -0.093,  0.   ,  0.093,  0.186,
+        0.279,  0.372,  0.465,  0.558,  0.651])
+'''
+
+# svm_df_all.keys()
+# svm_df_all['project_code'].unique()
+
+
+#########################
+
+time_trace_vb = np.array([-0.48 , -0.448, -0.416, -0.384, -0.352, -0.32 , -0.288, -0.256,
+       -0.224, -0.192, -0.16 , -0.128, -0.096, -0.064, -0.032,  0.   ,
+        0.032,  0.064,  0.096,  0.128,  0.16 ,  0.192,  0.224,  0.256,
+        0.288,  0.32 ,  0.352,  0.384,  0.416,  0.448,  0.48 ,  0.512,
+        0.544,  0.576,  0.608,  0.64 ,  0.672,  0.704])
+
+time_trace_ms = np.array([-0.465, -0.372, -0.279, -0.186, -0.093,  0.   ,  0.093,  0.186,
+        0.279,  0.372,  0.465,  0.558,  0.651])
+
+
+colors = utils.get_experience_level_colors() # will always be in order of Familiar, Novel 1, Novel >1
+
+plt.figure(figsize=(16,4))
+icre = 0
+for cren in cres: # cren = cres[0]    
+    icre = icre+1
+    plt.subplot(1,3,icre)
+    iel = -1
+    h = []
+    for el in exp_level_all: # el = exp_level_all[0]
+        iel = iel+1
+        sdf = svm_df_all[svm_df_all['project_code']=='VisualBehaviorMultiscope']
+        sdf = sdf[sdf['cre_allPlanes']==cren]
+        sdf = sdf[sdf['experience_levels']==el]
+        traces_ms = np.vstack(sdf['av_test_data_allPlanes'].values)
+#         print(traces_ms.shape)
+
+        sdf = svm_df_all[svm_df_all['project_code']=='VisualBehavior']
+        sdf = sdf[sdf['cre_allPlanes']==cren]
+        sdf = sdf[sdf['experience_levels']==el]
+        traces_vb = np.vstack(sdf['av_test_data_allPlanes'].values)
+#         print(traces_vb.shape)
+
+        sdf = svm_df_all[svm_df_all['project_code']=='VisualBehaviorTask1B']
+        sdf = sdf[sdf['cre_allPlanes']==cren]
+        sdf = sdf[sdf['experience_levels']==el]
+        traces_1b = np.vstack(sdf['av_test_data_allPlanes'].values)
+#         print(traces_1b.shape)
+
+        ### for omission decoding, set the frame right before omission to nan; so we dont have to deal with the dip we see there; which you have an explanation for (check OneNote, Research, SVM notes); in brief it is because frame -1 represents one of the classes, so when training SVM on that frame, frame -1 is representing both classes (ie the same data represents both classes!), this causes the classifier to get trained on a certail class for a given observation, but in the testing set see a different class for that same observation, hence the lower than chance performance.
+        if trial_type=='baseline_vs_nobaseline':
+            traces_ms[:, np.argwhere(time_trace_ms==0)-1] = np.nan
+            traces_vb[:, np.argwhere(time_trace_vb==0)-1] = np.nan
+            traces_1b[:, np.argwhere(time_trace_vb==0)-1] = np.nan
+            omit_aligned = 1
+        else:
+            omit_aligned = 0
+
+        ### upsample mesoscope traces to match scientifica data
+        x = time_trace_vb
+        xp = time_trace_ms
+        traces_ms_interp = []
+        for i in range(traces_ms.shape[0]):
+            fp = traces_ms[i]
+            traces_ms_interp.append(np.interp(x, xp, fp))
+        traces_ms_interp = np.array(traces_ms_interp)
+        print(traces_ms_interp.shape)
+
+
+        ############################### plots ###############################
+        # plot individual project codes
+#         plt.plot(xp, np.nanmean(traces_ms, axis=0), color='b')
+#         plt.plot(x, np.nanmean(traces_ms_interp, axis=0), color='r')
+#         plt.plot(x, np.nanmean(traces_vb, axis=0), color='k')
+#         plt.plot(x, np.nanmean(traces_1b, axis=0), color='g')
+
+
+        # plot the average traces across project codes
+        m = np.concatenate((traces_ms_interp, traces_vb, traces_1b))
+        print(m.shape)
+
+        hn = plt.plot(x, np.nanmean(m, axis=0), color=colors[iel], label=el)
+        h.append(hn)
+
+
+    #### done with all exp levels for a given cre line
+    handles, labels = plt.gca().get_legend_handles_labels();
+#     lims = [np.min(np.min(lims_v1lm, axis=1)), np.max(np.max(lims_v1lm, axis=1))]
+    plot_flashLines_ticks_legend([], handles, flashes_win_trace_index_unq_time, grays_win_trace_index_unq_time, time_trace_vb, xmjn=xmjn, bbox_to_anchor=bb, ylab=ylabel, xlab='Time rel. trial onset (sec)', omit_aligned=omit_aligned)
+    plt.xlim(xlim);
+    plt.title(cren, fontsize=13, y=1); # np.unique(area)
+    # mark time_win: the window over which the response quantification (peak or mean) was computed 
+    lims = plt.gca().get_ylim();
+    plt.hlines(lims[1], time_win[0], time_win[1], color='gray')
+    plt.subplots_adjust(wspace=.8)
+
+
+#%%
+if dosavefig:
+
+    whatSess = f'_timeCourse_experienceLevels'
+
+    fgn = '' #f'{whatSess}'
+    if same_num_neuron_all_planes:
+        fgn = fgn + '_sameNumNeursAllPlanes'
+
+    if svm_blocks==-1:
+        word = 'engaged_disengaged_blocks_'
+    elif svm_blocks==-101:
+        word = 'only_engaged_'
+    elif ~np.isnan(svm_blocks):
+        word = 'blocks_'
+    else:
+        word = ''
+
+    if use_events:
+        word = word + 'events'
+
+#     frames_svmn = np.arange(-15,23)
+    frames_svmf = -np.argwhere(time_trace_vb==0).squeeze()
+    frames_svml = len(time_trace_vb)-np.argwhere(time_trace_vb==0).squeeze()-1
+
+    fgn = f'{fgn}_{word}_frames{frames_svmf}to{frames_svml}'
+    fgn = fgn + '_ClassAccur'
+    fgn = f'{fgn}_allProjects'
+
+    nam = f'AllCre{whatSess}_aveExpPooled{fgn}_{now}'
+    fign = os.path.join(dir0, 'svm', dir_now, nam+fmt)
+    print(fign)
+
+    plt.savefig(fign, bbox_inches='tight') # , bbox_extra_artists=(lgd,)    
+
+
+
+
+
+
+'''
+# build a tidy dataframe
+
+list_of_cell_dfs = []
+for i in range(traces.shape[0]): # loop over sessions
+    cell_df = pd.DataFrame({
+        'timestamps': time_trace,
+        'decoded_events': traces[i]})
+
+    # append the dataframe for this cell to the list of cell dataframes
+    list_of_cell_dfs.append(cell_df)
+
+# concatenate all dataframes in the list
+tidy_df = pd.concat(list_of_cell_dfs)    
+tidy_df
+
+tidy_df.shape
+np.product(np.shape(traces))
+
+neural_data = tidy_df
+
+'''
+
diff --git a/visual_behavior/decoding_population/svm_images_plots_init.py b/visual_behavior/decoding_population/svm_images_plots_init.py
@@ -8,6 +8,8 @@
 
 After this script, run svm_images_plots_setVars.py to make plots.
 
+Note: run this script for individual project codes to save their all_sess files.
+
 
 Created on Tue Oct 13 20:48:43 2020
 @author: farzaneh
@@ -35,10 +37,10 @@
 
 #%% Set vars
 
-project_codes = ['VisualBehaviorMultiscope'] # ['VisualBehaviorMultiscope'] # ['VisualBehaviorMultiscope', 'VisualBehaviorTask1B', 'VisualBehavior', 'VisualBehaviorMultiscope4areasx2d']
+project_codes = ['VisualBehaviorTask1B'] # ['VisualBehaviorMultiscope'] # ['VisualBehaviorMultiscope', 'VisualBehaviorTask1B', 'VisualBehavior', 'VisualBehaviorMultiscope4areasx2d']
 
-to_decode = 'current' #'next' # 'current' (default): decode current image.    'previous': decode previous image.    'next': decode next image.
-trial_type = 'changes' #'changes' #'baseline_vs_nobaseline' #'hits_vs_misses' #'changes_vs_nochanges' #'omissions' # 'omissions', 'images', 'changes' # what trials to use for SVM analysis # the population activity of these trials at time time_win will be used to decode the image identity of flashes that occurred at their time 0 (if to_decode='current') or 750ms before (if to_decode='previous'). # 'baseline_vs_nobaseline' # decode activity at each frame vs. baseline (ie the frame before omission unless use_spont_omitFrMinus1 = 1 (see below))
+to_decode = 'next' #'next' # 'current' (default): decode current image.    'previous': decode previous image.    'next': decode next image.
+trial_type = 'omissions' #'changes' #'baseline_vs_nobaseline' #'hits_vs_misses' #'changes_vs_nochanges' #'omissions' # 'omissions', 'images', 'changes' # what trials to use for SVM analysis # the population activity of these trials at time time_win will be used to decode the image identity of flashes that occurred at their time 0 (if to_decode='current') or 750ms before (if to_decode='previous'). # 'baseline_vs_nobaseline' # decode activity at each frame vs. baseline (ie the frame before omission unless use_spont_omitFrMinus1 = 1 (see below))
 
 use_events = True #False # whether to run the analysis on detected events (inferred spikes) or dff traces.
 svm_blocks = np.nan #-101 #np.nan # -1: divide trials based on engagement #2 # number of trial blocks to divide the session to, and run svm on. # set to np.nan to run svm analysis on the whole session
@@ -190,6 +192,20 @@
     list_all_sessions_valid_matched = df[df['project_code']==project_codes[0]]['ophys_session_id'].unique() # note that if you get ophys experiments it has to be a multiplication of 3. (not ophys sessions.)
     list_all_sessions_valid_matched = np.sort(list_all_sessions_valid_matched)
 
+
+
+
+
+    # TEMPORARY SOLUTION: remove this session; 
+    # SOLUTION: switch to sdk branch rc/2.13.2 to fix : dataset.running_speed has issues.
+    print(len(list_all_sessions_valid_matched))
+    list_all_sessions_valid_matched = list_all_sessions_valid_matched[list_all_sessions_valid_matched!=795625712] # it's a novel 1 session, VB project
+    print(len(list_all_sessions_valid_matched))
+
+
+
+
+
     b = len(list_all_sessions_valid_matched) / len(list_all_sessions_valid)
     print(f'{len(list_all_sessions_valid_matched)}/{len(list_all_sessions_valid)}, {b*100:.0f}% of {project_codes} sessions have matched cells in the 3 experience levels.')
 

diff --git a/visual_behavior/decoding_population/svm_images_plots_setVars.py b/visual_behavior/decoding_population/svm_images_plots_setVars.py
@@ -92,10 +92,12 @@
 
 #%% Set the following vars
 
-project_codes = ['VisualBehavior'], ['VisualBehaviorTask1B'], ['VisualBehaviorMultiscope'] # pooled: ['VisualBehavior'], ['VisualBehaviorMultiscope'] # ['VisualBehaviorMultiscope'] # ['VisualBehaviorMultiscope', 'VisualBehaviorTask1B', 'VisualBehavior', 'VisualBehaviorMultiscope4areasx2d']
+project_codes = ['VisualBehavior'], ['VisualBehaviorTask1B'], ['VisualBehaviorMultiscope'] # pooled project codes for the paper: ['VisualBehavior'], ['VisualBehaviorTask1B'], ['VisualBehaviorMultiscope'] # if making plots for multiple project codes: ['VisualBehavior'], ['VisualBehaviorMultiscope'] # if making plots for a single project code: ['VisualBehaviorTask1B']       # all project codes: 'VisualBehaviorMultiscope', 'VisualBehaviorTask1B', 'VisualBehavior', 'VisualBehaviorMultiscope4areasx2d'
+
+dosavefig = 1 # 0
 
 to_decode = 'current' # 'current': decode current image. # 'previous': decode previous image. # 'next': decode next image.
-trial_type = 'changes' # 'baseline_vs_nobaseline' # 'omissions' # 'changes' # 'hits_vs_misses' # 'changes_vs_nochanges' # 'images'# what trials to use for SVM analysis # the population activity of these trials at time time_win will be used to decode the image identity of flashes that occurred at their time 0 (if to_decode='current') or 750ms before (if to_decode='previous'). # eg 'omissions' means to use omission-aligned traces # 'baseline_vs_nobaseline' # decode activity at each frame vs. baseline (ie the frame before omission unless use_spont_omitFrMinus1 = 1 (see below))
+trial_type = 'changes' # 'baseline_vs_nobaseline' # 'omissions' # 'changes' # 'changes_vs_nochanges' # 'hits_vs_misses' # 'images'# what trials to use for SVM analysis # the population activity of these trials at time time_win will be used to decode the image identity of flashes that occurred at their time 0 (if to_decode='current') or 750ms before (if to_decode='previous'). # eg 'omissions' means to use omission-aligned traces # 'baseline_vs_nobaseline' # decode activity at each frame vs. baseline (ie the frame before omission unless use_spont_omitFrMinus1 = 1 (see below))
 # Note: when trial_type is 'hits_vs_misses' or 'changes_vs_nochanges', to_decode will be 'current' and wont really make sense.
 # in all other cases, we decode "to_decode" image from "trial_type", e.g. we decode 'current' image from 'changes' (ie change-aligned traces)
 
@@ -108,7 +110,6 @@
 baseline_subtract = 0 #1 # subtract the baseline (CA average during baseline, ie before time 0) from the evoked CA (classification accuracy)
 
 summary_which_comparison = [3, 4, 6] #'all' # an array of session numbers: [3, 4, 6] or the following strings: # 'novelty' # 'engagement' # 'all' # determins sessions to use for plotting summary of ophys stages in svm_images_plots_compare_ophys_stages.py # 'novelty' will use [1,3,4,6] # 'engagement' will use [1,2,3] # 'all' will use [1,2,3,4,5,6]
-dosavefig = 1 # 0
 fmt = '.pdf' # '.png' # '.svg'
 
 
@@ -350,12 +351,13 @@
     svm_allMice_sessAvSd0 = copy.deepcopy(svm_allMice_sessAvSd)
 
 
-    ######### set vars to make mouse-averaged plots for each ophys stage
+    ######### set vars to make decoding timecourse plots, mouse-averaged, for each ophys stage
     # also set summary_vars_all (a df that includes response amplitude, computed from svm_allMice_sessPooled) which will be used in svm_images_plots_compare_ophys_stages
     exec(open('svm_images_plots_setVars_sumMice2.py').read()) 
 
     # make mouse-averaged plots
-    exec(open('svm_images_plots_sumMice.py').read()) 
+    # it gets called in svm_images_plots_setVars_sumMice2.py
+#     exec(open('svm_images_plots_sumMice.py').read()) 
 
 
     ######### compare quantifications across ophys stages
@@ -460,6 +462,10 @@
 
 else: # pooling data across multiple project codes    
 
+    #####################################################################################
+    #%% Set svm_allMice_sessPooled_allprojects and svm_allMice_sessAvSd_allprojects
+
+    all_sess0_allprojects = []
     svm_allMice_sessPooled_allprojects = []
     svm_allMice_sessAvSd_allprojects = []
 
@@ -490,8 +496,6 @@
 
         #%%    
         cols_each = colorOrder(num_planes)
-
-
 
         #%% Set svm vars for each plane across all sessions (for each mouse)
         if project_codes != ['VisualBehaviorMultiscope']: # remove area/layer pooled columns
@@ -533,7 +537,7 @@
             columns = columns0        
 
         #####################################
-        #%% Set svm_this_plane_allsess
+        #%% Loads and concatenates all_sess from all cre lines into a df called all_sess0. Also sets svm_this_plane_allsess.
         #####################################
 
         exec(open('svm_images_plots_setVars2.py').read())     
@@ -545,50 +549,48 @@
         # note: svm_allMice_sessPooled will be used to set summary_vars_all, which is a key paramter in svm_images_plots_compare_ophys_stages.py)
         exec(open('svm_images_plots_setVars_sumMice.py').read()) 
 
-
+        all_sess0_allprojects.append(all_sess0)
         svm_allMice_sessPooled_allprojects.append(svm_allMice_sessPooled)
         svm_allMice_sessAvSd_allprojects.append(svm_allMice_sessAvSd)
 
 
-    #####################################
-    #%% Pool data from both project codes
-    #####################################
-
-    # svm_allMice_sessPooled0_vb.iloc[0]['av_test_shfl_allPlanes'].shape
-    # (1, 34, 38)
-    # svm_allMice_sessPooled0_vbm.iloc[0]['av_test_shfl_allPlanes'].shape
-    # (8, 21, 13)
-
-    # svm_allMice_sessPooled0_vb.iloc[0]['peak_amp_allPlanes'].shape
-    # (1, 34, 4)
-    # svm_allMice_sessPooled0_vbm.iloc[0]['peak_amp_allPlanes'].shape
-    # (8, 21, 4)
-
-
-
-    #%% turn the vars into a single long vector, independent of the area/depth
+
+
+    ####################################################
+    #%% make svm_df which is a proper pandas table including svm response amplitude results for each experiment ##################
+    # these dfs will be used to make summary plots across experience levels, and also for doing stats.
 
-    # for each project code: concatenate data from all planes and sessions    
-    pa_allpr = []
+    svm_df_allpr = []
+    resp_amp_sum_df_allpr = [] # we dont really use this; instead we set it in "svm_images_plots_setVars_sumMice3_resp_sum" for all projects codes pooled (ie ave and sd of resp amp across all project codes)
     for ipc in range(len(project_codes_all)):
 
-#         svm_allMice_sessPooled_allprojects[ipc].iloc[0]['peak_amp_allPlanes'].shape
-        pa_now = np.vstack(svm_allMice_sessPooled_allprojects[ipc].iloc[0]['peak_amp_allPlanes'])
-        pa_now.shape
+        project_code = project_codes_all[ipc]
+        all_sess0 = all_sess0_allprojects[ipc]
+        svm_allMice_sessPooled0 = svm_allMice_sessPooled_allprojects[ipc]
 
-        pa_allpr.append(pa_now)
-
+        exec(open('svm_images_plots_setVars_sumMice3_svmdf.py').read())
 
-    # now concatenate data from all projects
-    pa_pooled_projects = np.concatenate((pa_allpr), axis=0) # pooled_session_planes_projects x 4
-    pa_pooled_projects.shape
+        svm_df_allpr.append(svm_df)
+        resp_amp_sum_df_allpr.append(resp_amp_sum_df)
+
 
+    ################## set resp_amp_sum_df: it includes the average of response amplitude across all experiments of all projects ##################
 
+    exec(open('svm_images_plots_setVars_sumMice3_resp_sum.py').read())
+
+
+    ################## compare quantifications across experience levels ##################
+
+    exec(open('svm_images_plots_compare_experience_levels.py').read())
+
+
+    ################## plot decoding traces for each experience level ##################
+    exec(open('svm_images_plots_compare_traces_experience_levels.py').read())
 
-    ######################################################
-    #%% set vars and plot traces and quantifications for each ophys stage
 
-    # for each project code: set vars to make mouse-averaged plots
+    ######################################################
+    #%% set summary_vars_all, a dataframe that includes response amplitude (computed from svm_allMice_sessPooled); it will be used in svm_images_plots_compare_ophys_stages.py
+
     summary_vars_allpr = []
     for ipc in range(len(project_codes_all)):
 
@@ -597,28 +599,10 @@
 
         project_codes = project_codes_all[ipc]
 
-        # sets summary_vars_all, a dataframe that includes response amplitude (computed from svm_allMice_sessPooled); it will be used in svm_images_plots_compare_ophys_stages.py
-        # if len(project_codes_all)==1, it calls svm_images_plots_sumMice.py to make mouse-averaged plots for each ophys stage.        
         exec(open('svm_images_plots_setVars_sumMice2.py').read()) 
 
         summary_vars_allpr.append(summary_vars_all)
-
-
-    # make mouse-averaged plots
-#     exec(open('svm_images_plots_sumMice.py').read()) 
-
-
-
-    ################## make svm_df which is a proper pandas table including svm response amplitude results for each experiment ##################
-    # svm_df will be used to make summary plots across experience levels
-
-    exec(open('svm_images_plots_setVars_sumMice3_svmdf.py').read())
-
-
-    ################## compare quantifications across ophys experience levels ##################
-
-    exec(open('svm_images_plots_compare_ophys_experience_levels.py').read())
-
+
 
     ################## compare quantifications across ophys stages ##################
 
@@ -632,4 +616,38 @@
 
 
 
+
+
+    #####################################
+    #%% Pool data from both project codes
+    #####################################
+
+    # svm_allMice_sessPooled0_vb.iloc[0]['av_test_shfl_allPlanes'].shape
+    # (1, 34, 38)
+    # svm_allMice_sessPooled0_vbm.iloc[0]['av_test_shfl_allPlanes'].shape
+    # (8, 21, 13)
+
+    # svm_allMice_sessPooled0_vb.iloc[0]['peak_amp_allPlanes'].shape
+    # (1, 34, 4)
+    # svm_allMice_sessPooled0_vbm.iloc[0]['peak_amp_allPlanes'].shape
+    # (8, 21, 4)
+
+
+    #%% turn the variables into a single long vector, independent of the area/depth
+    '''
+    # for each project code: concatenate data from all planes and sessions    
+    pa_allpr = []
+    for ipc in range(len(project_codes_all)):
+        
+#         svm_allMice_sessPooled_allprojects[ipc].iloc[0]['peak_amp_allPlanes'].shape
+        pa_now = np.vstack(svm_allMice_sessPooled_allprojects[ipc].iloc[0]['peak_amp_allPlanes']) # you are doing iloc[0] so you are taking data only from one session
+        print(pa_now.shape)
+
+        pa_allpr.append(pa_now)
+        
+
+    # now concatenate data from all projects
+    pa_pooled_projects = np.concatenate((pa_allpr), axis=0) # pooled_session_planes_projects x 4
+    pa_pooled_projects.shape
+    '''
 
diff --git a/visual_behavior/decoding_population/svm_images_plots_setVars2.py b/visual_behavior/decoding_population/svm_images_plots_setVars2.py
@@ -2,7 +2,9 @@
 # -*- coding: utf-8 -*-
 """
 Gets called in svm_images_plots_setVars.py (Read the comments in that script for more info.)
-Set svm_this_plane_allsess.
+
+Loads and concatenates all_sess from all cre lines into a df called all_sess0
+Also sets svm_this_plane_allsess.
 
 Created on Tue Oct  20 13:56:00 2020
 @author: farzaneh
@@ -164,9 +166,15 @@ def pool_sesss_areas_eachDepth(planes_allsess, y, num_depth=4):
     #     iblock = np.nan
 
 
-
+
+    #########################################################################
+    #########################################################################
+    #########################################################################
     #%% Load all_sess dataframe for all cre lines, for a given block
-
+    #########################################################################
+    #########################################################################
+    #########################################################################
+
     all_sess = pd.DataFrame()
     for ia in range(len(allSessName)):
         print(f'Loading: {allSessName[ia]}')
@@ -229,7 +237,29 @@ def pool_sesss_areas_eachDepth(planes_allsess, y, num_depth=4):
     all_sess0[['cre', 'stage', 'experience_level', 'mouse_id', 'session_id', 'experiment_id']].groupby(['experience_level', 'cre']).count()
 
 
+    ###################################################################
+    #%% Set time trace (timestamps for the decoding trace)
+    ###################################################################
+
+    frame_dur = all_sess0['frame_dur'].mode().values[0]
+    print(f'frame duration: {frame_dur}')
+
+    if type(time_win)==str:
+        time_win = (frames_svm*frame_dur)[[0,-1]]
+
+    # set the entire time_trace for the flash-aligned traces, on which we applied frames_svm to get svm results.
+    samps_bef_time = (samps_bef+1) * frame_dur # 1 is added bc below we do np.arange(0,-samps_bef), so we get upto one value below samps_bef
+    samps_aft_time = samps_aft * frame_dur # frames_after_omission in svm_main # we trained the classifier until 30 frames after omission    
+    time_trace0 = np.unique(np.concatenate((np.arange(0, -samps_bef_time, -frame_dur)[0:samps_bef+1], np.arange(0, samps_aft_time, frame_dur)[0:samps_aft])))
 
+    # set trace_time corresponding to svm traces
+    rt = np.arange(samps_bef+frames_svm[0] , min(len(time_trace0), samps_bef+frames_svm[-1]+1))
+#     rt = samps_bef+frames_svm
+    time_trace = time_trace0[rt]
+
+
+
+    ###################################################################
     #%% Set the stage and experience level for each session in all_sess
 
     session_stage_df = pd.DataFrame([], columns=['session_id', 'stage', 'experience_level'])
@@ -494,7 +524,9 @@ def pool_sesss_areas_eachDepth(planes_allsess, y, num_depth=4):
             #%% Set a number of useful variables
             ######################################################################################################
 
-            #%%
+            #%% 
+            # below moved up
+            '''
             frame_dur = all_sess['frame_dur'].mode().values[0]
             print(f'frame duration: {frame_dur}')
 
@@ -510,7 +542,7 @@ def pool_sesss_areas_eachDepth(planes_allsess, y, num_depth=4):
             rt = np.arange(samps_bef+frames_svm[0] , min(len(time_trace0), samps_bef+frames_svm[-1]+1))
 #             rt = samps_bef+frames_svm
             time_trace = time_trace0[rt]
-
+            '''
 
             xlim = [time_trace[0], time_trace[-1]] #[-1.2, 2.25] # [-13, 24]
 

diff --git a/visual_behavior/decoding_population/svm_images_plots_setVars_sumMice3_resp_sum.py b/visual_behavior/decoding_population/svm_images_plots_setVars_sumMice3_resp_sum.py
@@ -0,0 +1,63 @@
+"""
+Gets called in svm_images_plots_setVars.py
+
+Here, we use svm_df from all projects to set resp_amp_sum_df, a df that includes the mean and stdev of decoding magnitude (aka response amplitude) across all experiments of all sessions
+
+Vars needed here are set in svm_images_plots_setVars_sumMice3_svmdf.py
+
+Created on Fri Oct 29 22:02:05 2021
+@author: farzaneh
+
+"""
+
+################################################################################################
+### Create a dataframe: resp_amp_sum_df, that includes the mean and stdev of response amplitude across all experiments of all sessions
+################################################################################################
+
+svm_df_all = pd.concat(svm_df_allpr)
+# svm_df_all = svm_df_allpr[2] # run the code below for a single project code
+print(len(svm_df_all))
+
+exp_level_all = svm_df_all['experience_levels'].unique()
+cresdf = svm_df_all['cre_allPlanes'].unique()
+resp_amp_sum_df = pd.DataFrame()
+
+cnt = -1
+for cre in cresdf: # cre = cresdf[0]
+    for i in range(len(exp_level_all)): # i=0
+        cnt = cnt+1
+
+        # svm_df for a given cre and experience level
+        thiscre = svm_df_all[svm_df_all['cre_allPlanes']==cre]
+        thiscre = thiscre[thiscre['experience_levels']==exp_level_all[i]]
+        print(len(thiscre))
+
+        depthav = thiscre['depth_allPlanes'].mean()
+#         areasu = thiscre['area_allPlanes'].unique()        
+        ampall = np.vstack(thiscre['peak_amp_allPlanes_allExp']) # ampall.shape # exp x 4 # pooled_experiments x 4_trTsShCh
+        nexp = sum(~np.isnan(ampall[:,1]))
+
+        # testing data
+        testav = np.nanmean(ampall[:,1])
+        testsd = np.nanstd(ampall[:,1]) / np.sqrt(ampall[:,1].shape[0])
+
+        # shuffled
+        shflav = np.nanmean(ampall[:,2])  
+        shflsd = np.nanstd(ampall[:,2]) / np.sqrt(nexp) # ampall[:,2].shape[0]
+
+        # create the summary df
+        resp_amp_sum_df.at[cnt, 'cre'] = cre
+        resp_amp_sum_df.at[cnt, 'experience_level'] = exp_level_all[i]
+        resp_amp_sum_df.at[cnt, 'depth_av'] = depthav
+        resp_amp_sum_df.at[cnt, 'n_experiments'] = nexp
+
+        resp_amp_sum_df.at[cnt, 'test_av'] = testav
+        resp_amp_sum_df.at[cnt, 'test_sd'] = testsd        
+        resp_amp_sum_df.at[cnt, 'shfl_av'] = shflav
+        resp_amp_sum_df.at[cnt, 'shfl_sd'] = shflsd
+
+resp_amp_sum_df
+# [areasu for x in resp_amp_sum_df['cre']]
+
+
+
diff --git a/visual_behavior/decoding_population/svm_images_plots_setVars_sumMice3_svmdf.py b/visual_behavior/decoding_population/svm_images_plots_setVars_sumMice3_svmdf.py
@@ -1,7 +1,7 @@
 """
 Gets called in svm_images_plots_setVars.py
 
-Sets svm_df, a proper pandas table, that will be used to make summary plots for experience levels in svm_images_plots_compare_ophys_experience_levels.py
+Here, we set svm_df, a proper pandas table, that will be used to make summary plots for experience levels in svm_images_plots_compare_ophys_experience_levels.py
 
 Vars needed here are set in svm_images_plots_setVars_sumMice3_svmdf.py
 
@@ -10,7 +10,13 @@
 
 """
 
-
+
+if project_code == ['VisualBehaviorMultiscope']:
+    num_planes = 8
+else:
+    num_planes = 1
+
+
 ##########################################################################################
 ##########################################################################################
 ############# Create svm_df, a proper pandas table #######################################
@@ -24,14 +30,18 @@
 def concatall(df, col):
     # df = svm_allMice_sessPooled0.copy()
     # col = 'av_test_data_allPlanes'    
-    # df[col].iloc[0].shape # size: sess   or    planes x sess    or    planes x sess x time  (it must have )
+    # df[col].iloc[0].shape    # sess   or    planes x sess    or    planes x sess x time  (it must have )
 
     df = df.copy()
 
     if np.ndim(df[col].iloc[0])==1: # data is for all sessions but only 1 plane; we need to replicate it so the size becomes planes x sessions
-        for i in range(df.shape[0]): #i=0
-            df[col].iloc[i] = [df[col].iloc[i][:] for j in range(8)] # planes x sess
-
+        if project_code == ['VisualBehaviorMultiscope']:
+            for i in range(df.shape[0]): #i=0
+                df[col].iloc[i] = [df[col].iloc[i][:] for j in range(8)] # planes x sess
+        else:
+            for i in range(df.shape[0]): #i=0
+                df[col].iloc[i] = df[col].iloc[i][np.newaxis,:] # 1 x sess
+
     a = np.concatenate((df[col].iloc[0]))
 #     print(a.shape)
     for i in np.arange(1, df.shape[0]): #i=0
@@ -53,12 +63,14 @@ def concatall(df, col):
     for iplane in range(num_planes): #iplane=0
         for isess in range(nsess): #isess=0
             cnt = cnt + 1
+            svm_df.at[cnt, 'project_code'] = project_code
             session_id = svm_allMice_sessPooled0['session_ids'].iloc[i][iplane, isess]
             experiment_id = all_sess0[all_sess0['session_id']==session_id]['experiment_id'].iloc[iplane]
 
             svm_df.at[cnt, 'session_id'] = session_id
             svm_df.at[cnt, 'experiment_id'] = experiment_id
-            svm_df.at[cnt, 'session_labs'] = svm_allMice_sessPooled0['session_labs'].iloc[i][0]    
+            svm_df.at[cnt, 'session_labs'] = svm_allMice_sessPooled0['session_labs'].iloc[i][0]
+
 # svm_df.head(300)
 
 
@@ -91,23 +103,45 @@ def concatall(df, col):
 svm_df['peak_amp_allPlanes_allExp'] = peak_amp_allPlanes_allExp
 
 svm_df #.head(300)
+print(np.shape(svm_df))
 
 # svm_allMice_sessPooled0.keys()
 
 
+
+"""
+Gets called in svm_images_plots_setVars.py
+
+Here, we use svm_df from all projects to set resp_amp_sum_df, a df that includes the mean and stdev of decoding magnitude (aka response amplitude) across all experiments of all sessions
+
+Vars needed here are set in svm_images_plots_setVars_sumMice3_svmdf.py
+
+Created on Fri Oct 29 22:02:05 2021
+@author: farzaneh
+
+"""
+
+
+
+### Note, when pooling across projects, we dont really use below. We call the codes below in a separate script, which sets resp_amp_sum_df for svm_df_allpr (ie svm_df pooled across all project codes)
+
 ################################################################################################
 ### Create a dataframe: resp_amp_sum_df, that includes the mean and stdev of response amplitude across all experiments of all sessions
 ################################################################################################
+
 exp_level_all = svm_df['experience_levels'].unique()
 cresdf = svm_df['cre_allPlanes'].unique()
 resp_amp_sum_df = pd.DataFrame()
+
 cnt = -1
 for cre in cresdf: # cre = cresdf[0]
-    for i in range(len(exp_level_all)):
+    for i in range(len(exp_level_all)): # i=0
         cnt = cnt+1
+
         # svm_df for a given cre and experience level
         thiscre = svm_df[svm_df['cre_allPlanes']==cre]
         thiscre = thiscre[thiscre['experience_levels']==exp_level_all[i]]
+        print(len(thiscre))
 
         depthav = thiscre['depth_allPlanes'].mean()
 #         areasu = thiscre['area_allPlanes'].unique()        

diff --git a/visual_behavior/decoding_population/svm_images_plots_sumMice.py b/visual_behavior/decoding_population/svm_images_plots_sumMice.py
@@ -398,7 +398,7 @@
         #             fgn = fgn + f'_block{iblock}'
 
         if svm_blocks==-1:
-            word = 'engagement_'
+            word = 'engaged_disengaged_blocks_'
         elif svm_blocks==-101:
             word = 'only_engaged_'
         elif ~np.isnan(svm_blocks):