Doric.py

# -*- coding: utf-8 -*
import h5py
import numpy as np
import pdb
import tools.dataAnalysis as dA
import tools.createPreprocessingVisualization as createPreprocessingVisualization

figDir =  r"C:\Analysis\preprocessingFigures"
cPV = createPreprocessingVisualization.createPreprocessingVisualization(figDir)
def ish5dataset(item):
    return isinstance(item, h5py.Dataset)


def h5printR(item, leading=''):
    for key in item:
        if ish5dataset(item[key]):
            print(leading + key + ': ' + str(item[key].shape))
        else:
            print(leading + key)
            h5printR(item[key], leading + '  ')


# Print structure of a .doric file
def h5print(filename):
    with h5py.File(filename, 'r') as h:
        print(filename)
        h5printR(h, '  ')


def h5read(filename, where):
    data = []
    with h5py.File(filename, 'r') as h:
        item = h
        # pdb.set_trace()
        for w in where:
            # print('extracting',w,'.............................')
            if ish5dataset(item[w]):
                data = np.array(item[w])
                DataInfo = {atrib: item[w].attrs[atrib] for atrib in item[w].attrs}
            else:
                item = item[w]

    return data, DataInfo


def h5getDatasetR(item, leading=''):
    r = []
    for key in item:
        # First have to check if the next layer is a dataset or not
        firstkey = list(item[key].keys())[0]
        if ish5dataset(item[key][firstkey]):
            r = r + [{'Name': leading + '_' + key, 'Data':
                [{'Name': k, 'Data': np.array(item[key][k]),
                  'DataInfo': {atrib: item[key][k].attrs[atrib] for atrib in item[key][k].attrs}} for k in item[key]]}]
        else:
            r = r + h5getDatasetR(item[key], leading + '_' + key)

    return r


# Extact Data from a doric file
def ExtractDataAcquisition(filename):
    with h5py.File(filename, 'r') as h:
        # print(filename)
        return h5getDatasetR(h['DataAcquisition'], filename)
def ExtractDataDataProcessed(filename):
    with h5py.File(filename, 'r') as h:
        # print(filename)
        return h5getDatasetR(h['DataProcessed'], filename)

def process_trial(trial_path, signalChanID, refChanId, trigger_channel, eventChanId):
    data={}
    # Load the data from the file
    print(trial_path)
    f= h5py.File(trial_path, 'r')

    sampling_rateFilter = f['Configurations']['FPConsole']['AIN01']['Settings'].attrs['GlobalSampleRate']
    pdb.set_trace()
    Signal, _ = h5read(trial_path, ['DataAcquisition', 'FPConsole', 'Signals', 'Series0001', signalChanID, 'Values'])
    Time, _ = h5read(trial_path, ['DataAcquisition', 'FPConsole', 'Signals', 'Series0001', signalChanID, 'Time'])
    refSignal, _ = h5read(trial_path, ['DataAcquisition', 'FPConsole', 'Signals', 'Series0001', refChanId, 'Values'])
    refTime, _ = h5read(trial_path, ['DataAcquisition', 'FPConsole', 'Signals', 'Series0001', refChanId, 'Time'])


    # Calculate differences between consecutive timestamps
    time_diffs = np.diff(Time)

    # Average sampling interval
    avg_sampling_interval = np.mean(time_diffs)

    # Calculate sampling rate
    sampling_rate = 1 / avg_sampling_interval
    data['sampling_rate_filter'] = sampling_rateFilter
    data['Signal'] = Signal
    data['Time'] = Time
    data['refSignal'] = refSignal
    data['refTime'] = refTime
    data['sampling_rate'] = sampling_rate
    if eventChanId is not None:
        event = f['Configurations']['FPConsole']['Events']['Series0001'].attrs[eventChanId]
        pdb.set_trace()
    if trigger_channel is not None:
        Trigger, _ = h5read(trial_path, ['DataAcquisition', 'FPConsole', 'Signals', 'Series0001', 'AnalogOut', f'{trigger_channel}'])
        TriggerTime, _ = h5read(trial_path, ['DataAcquisition', 'FPConsole', 'Signals', 'Series0001', 'AnalogOut', 'Time'])
        data['Trigger']=Trigger
        data['TriggerTime']=TriggerTime
        data['triggerStartingDelay']=(f['Configurations']['FPConsole'][f'{trigger_channel}']['Modulations']['Modulation1'].attrs['StartingDelay'])/1000
        data['triggerTimeOn']=(f['Configurations']['FPConsole'][f'{trigger_channel}']['Modulations']['Modulation1'].attrs['TimeON'])/1000
        data['triggerVoltage'] = (f['Configurations']['FPConsole'][f'{trigger_channel}']['Modulations'][
            'Modulation1'].attrs['StepsVoltage'])[1]
        data['triggerFreq'] = (f['Configurations']['FPConsole'][f'{trigger_channel}']['Modulations'][
            'Modulation1'].attrs['Frequency'])
        #interpolate the signal to have the same lenght as the trigger, so we can plot them together
        interpSignal=np.interp(TriggerTime, Time, Signal)
        interpRefSignal = np.interp(TriggerTime, refTime, refSignal)

        data['Signal'] = interpSignal
        data['Time'] = TriggerTime
        data['refSignal'] = interpRefSignal
        data['refTime'] = TriggerTime

    return data


def process_trialNewVersion(trial_path, signalChanID, refChanId, trigger_channel, eventChanId):
    data={}
    # Load the data from the file
    print(trial_path)
    f= h5py.File(trial_path, 'r')

    sampling_rateFilter = f['Configurations']['FPConsole']['AIN01']['Settings'].attrs['GlobalSampleRate']
    DecimactionFac = f['Configurations']['FPConsole']['SavingSettings'].attrs['DecimationFactor']
    try:
        Signal = f['DataAcquisition']['FPConsole']['Signals']['Series0001'][signalChanID[0]][signalChanID[1]][()]
    except:
        pdb.set_trace()
    refSignal = f['DataAcquisition']['FPConsole']['Signals']['Series0001'][refChanId[0]][refChanId[1]][()]
    Time = f['DataAcquisition']['FPConsole']['Signals']['Series0001'][signalChanID[0]]['Time'][()]


    # Calculate differences between consecutive timestamps
    time_diffs = np.diff(Time)

    # Average sampling interval
    avg_sampling_interval = np.mean(time_diffs)

    # Calculate sampling rate
    sampling_rate = 1 / avg_sampling_interval
    data['sampling_rate_filter'] = sampling_rateFilter
    data['Signal'] = Signal
    data['Time'] = Time
    data['refSignal'] = refSignal
    data['decimationFactor'] = DecimactionFac
    data['sampling_rate'] = sampling_rate
    if eventChanId is not None:
        event = f['DataAcquisition']['FPConsole']['Events']['Series0001'][eventChanId][()]
        eventIdx = (f['DataAcquisition']['FPConsole']['Events']['Series0001'][eventChanId].attrs['Index'])/1000
        data['event'] = event

    if trigger_channel is not None:
        if 'DIO' in trigger_channel:
            Trigger = f['DataAcquisition']['FPConsole']['Signals']['Series0001']['DigitalIO'][trigger_channel][()]
            TriggerTime = f['DataAcquisition']['FPConsole']['Signals']['Series0001']['DigitalIO']['Time'][()]
            data['Trigger'] = Trigger
            data['TriggerTime'] = TriggerTime
        else:
            try:
                Trigger = f['DataAcquisition']['FPConsole']['Signals']['Series0001']['AnalogOut'][trigger_channel][()]
                TriggerTime = f['DataAcquisition']['FPConsole']['Signals']['Series0001']['AnalogOut']['Time'][()]
                data['Trigger'] = Trigger
                data['TriggerTime'] = TriggerTime
            except:
                Trigger = f['DataAcquisition']['FPConsole']['Signals']['Series0001']['DigitalIO']['DIO01'][()]
                TriggerTime = f['DataAcquisition']['FPConsole']['Signals']['Series0001']['DigitalIO']['Time'][()]
                data['Trigger'] = Trigger
                data['TriggerTime'] = TriggerTime
        data['triggerStartingDelay'] = (f['Configurations']['FPConsole'][f'{trigger_channel}']['Modulations'][
            'Modulation1'].attrs['StartingDelay']) / 1000
        data['triggerTimeOn'] = (f['Configurations']['FPConsole'][f'{trigger_channel}']['Modulations'][
            'Modulation1'].attrs['TimeON']) / 1000
        # data['triggerVoltage'] = (f['Configurations']['FPConsole'][f'{trigger_channel}']['Modulations'][
        #     'Modulation1'].attrs['StepsVoltage'])[1]
        data['triggerFreq'] = (f['Configurations']['FPConsole'][f'{trigger_channel}']['Modulations'][
            'Modulation1'].attrs['Frequency'])
        # interpolate the signal to have the same lenght as the trigger, so we can plot them together
        interpSignal = np.interp(TriggerTime, Time, Signal)
        interpRefSignal = np.interp(TriggerTime, Time, refSignal)
        data['Signal'] = interpSignal
        data['Time'] = TriggerTime
        data['refSignal'] = interpRefSignal
        data['refTime'] = TriggerTime
        data['decimationFactor'] = DecimactionFac

        data['Trigger']=Trigger
    # pdb.set_trace()
    if int(sum(Trigger))==0:
        trigger_channel = f'AOUT{trigger_channel[3:]}'
        Trigger = f['DataAcquisition']['FPConsole']['Signals']['Series0001']['AnalogOut'][trigger_channel][()]
        TriggerTime = f['DataAcquisition']['FPConsole']['Signals']['Series0001']['AnalogOut']['Time'][()]
        data['Trigger'] = Trigger
        data['TriggerTime']=TriggerTime
        data['triggerStartingDelay']=(f['Configurations']['FPConsole'][f'{trigger_channel}']['Modulations']['Modulation1'].attrs['StartingDelay'])/1000
        data['triggerTimeOn']=(f['Configurations']['FPConsole'][f'{trigger_channel}']['Modulations']['Modulation1'].attrs['TimeON'])/1000
        data['triggerVoltage'] = (f['Configurations']['FPConsole'][f'{trigger_channel}']['Modulations'][
            'Modulation1'].attrs['StepsVoltage'])[1]
        data['triggerFreq'] = (f['Configurations']['FPConsole'][f'{trigger_channel}']['Modulations'][
            'Modulation1'].attrs['Frequency'])
        #interpolate the signal to have the same lenght as the trigger, so we can plot them together
        interpSignal=np.interp(TriggerTime, Time, Signal)
        interpRefSignal = np.interp(TriggerTime, Time, refSignal)

        data['Signal'] = interpSignal
        data['Time'] = TriggerTime
        data['refSignal'] = interpRefSignal
        data['refTime'] = TriggerTime
        data['decimationFactor'] = DecimactionFac
    return data

def process_post(trial_path):
    data={}
    # Load the data from the file
    print(trial_path)
    f= h5py.File(trial_path, 'r')

    motif_list = list(f['GroupAnalysis'].keys())
    print(motif_list)
    conditions=['GroupHealthy','GroupSick']
    for m, motif in enumerate(motif_list):
        data[motif] = {}
    for m, motif in enumerate(motif_list):
        for c, condition in enumerate(conditions):
            data[motif][condition] = {}
    for m,motif in enumerate(motif_list):
        for c, condition in enumerate(conditions):
            PSTH = f['GroupAnalysis'][motif]['DFFSignals']['Series0001']['AIN01xAOUT02-LockIn']['SignalAIN01xAOUT02-LockIn'][condition]['PerieventOnset'][()]
            PSTH_time=PSTH[0]
            PSTH_dff= PSTH[1]
            time_array = np.linspace(-5, 5, len(PSTH_dff))
            stats = f['GroupAnalysis'][motif]['DFFSignals']['Series0001']['AIN01xAOUT02-LockIn']['SignalAIN01xAOUT02-LockIn'][condition]['StatisticOnset'][()]
            AUC_list=['Baseline 1sec', 'Event 1sec', 'Baseline 5sec', 'Event 5sec']
            data[motif][condition]['PSTH'] = PSTH
            data[motif][condition]['PSTH_time'] = time_array
            for a, AUC in enumerate(AUC_list):
                data[motif][condition][AUC] = {}
            for a, AUC in enumerate(AUC_list):
                data[motif][condition][AUC] = stats[a]

    return data


def process_post_trial(trial_path, plan_df,params,aniDic):
    # Load the data from the file
    print('processing', trial_path, '.............................')
    f= h5py.File(trial_path, 'r')

    motif_list = params['motifList']
    print(motif_list)
    conditions=params['conditions']
    aniDic['dFF'] = f['DataProcessed']['FPConsole']['DFFSignals']['Series0001']['AIN01xAOUT02-LockIn']['Values'][()]
    aniDic['time'] = f['DataProcessed']['FPConsole']['DFFSignals']['Series0001']['AIN01xAOUT02-LockIn']['Values'][()]

    return aniDic
from scipy.interpolate import interp1d
import matplotlib.pyplot as plt

import numpy as np
from scipy.interpolate import interp1d
import matplotlib.pyplot as plt

def detect_and_remove_artifacts(signal, drop_threshold=-500, rise_threshold=500, plot=False):
    """
    Detect and remove artifacts in the signal. The artifacts are identified as values
    that drop or rise sharply and deviate significantly from the overall signal trend.
    The function removes these artifacts and stitches the empty points by interpolation from both sides.

    Parameters:
    - signal: 1D numpy array representing the input signal.
    - drop_threshold: The value below which a point is considered a drop artifact (default is -500).
    - rise_threshold: The value above which a point is considered a rise artifact (default is 500).
    - plot: If True, plots the original signal vs. cleaned signal (default is False).

    Returns:
    - cleaned_signal: 1D numpy array with artifacts removed and gaps filled by interpolation.
    """
    # Detect artifacts based on sudden drops or rises relative to the signal mean and standard deviation
    mean_signal = np.mean(signal)
    std_signal = np.std(signal)

    # Identify points where signal drops or rises significantly (using thresholds or mean ± 2.5*std as cutoffs)
    artifact_indices = np.where(
        (signal < (mean_signal - 2.5 * std_signal)) | (signal > (mean_signal + 4 * std_signal))
    )[0]
    valid_indices = np.ones(len(signal), dtype=bool)

    if len(artifact_indices) > 0:
        for idx in artifact_indices:
            # Find the start of the artifact (either drop or rise)
            start_idx = idx
            while start_idx > 0 and (
                signal[start_idx] < (mean_signal - std_signal) or signal[start_idx] > (mean_signal + std_signal)
            ):
                start_idx -= 1

            # Find the end of the artifact (until the signal returns within mean ± std)
            end_idx = idx
            while end_idx < len(signal) and (
                signal[end_idx] < mean_signal - std_signal or signal[end_idx] > mean_signal + std_signal
            ):
                valid_indices[end_idx] = False
                end_idx += 1

    # Extract valid time points and values
    x_valid = np.where(valid_indices)[0]
    y_valid = signal[valid_indices]

    # Interpolate to fill the gaps left by artifacts
    interpolator = interp1d(x_valid, y_valid, kind='linear', fill_value='extrapolate')
    cleaned_signal = interpolator(np.arange(len(signal)))

    # Plot original vs cleaned signal if plot is True
    if plot and len(artifact_indices) > 0:
        plt.figure(figsize=(10, 5))
        plt.plot(signal, label='Original Signal', color='red', alpha=0.5)
        plt.plot(cleaned_signal, label='Cleaned Signal', color='blue', alpha=0.7)
        plt.xlabel('Time')
        plt.ylabel('Signal Amplitude')
        plt.title('Artifact Removal: Original vs Cleaned Signal')
        plt.legend()
        plt.show()

    return cleaned_signal

'''
get_zdFF.py calculates standardized dF/F signal based on calcium-idependent 
and calcium-dependent signals commonly recorded using fiber photometry calcium imaging

Ocober 2019 Ekaterina Martianova ekaterina.martianova.1@ulaval.ca 

Reference:
  (1) Martianova, E., Aronson, S., Proulx, C.D. Multi-Fiber Photometry 
      to Record Neural Activity in Freely Moving Animal. J. Vis. Exp. 
      (152), e60278, doi:10.3791/60278 (2019)
      https://www.jove.com/video/60278/multi-fiber-photometry-to-record-neural-activity-freely-moving

'''
import scipy.signal as signal
def downsample_data(aniDic, decimation_factor):
    # Downsample each key in the dictionary
    for key in ['Signal', 'refSignal', 'Trigger', 'Time']:
        aniDic[key] = signal.decimate(aniDic[key], decimation_factor, ftype='iir')
    return aniDic

def downsample_dataTwoCols(aniDic, decimation_factor):
    # Downsample each key in the dictionary
    for key in ['redChanSignal', 'redRefSignal', 'greenChanSignal','greenRefSignal', 'Time']:
        aniDic[key] = signal.decimate(aniDic[key], decimation_factor, ftype='iir')
    return aniDic
def get_zdFF(reference, signal,time, smooth_win=8, remove=0, lambd=5e4, porder=2, itermax=300, plot=False):
    '''
    Calculates z-score dF/F signal based on fiber photometry calcium-independent
    and calcium-dependent signals and optionally plots raw signal vs dFF.

    Input
        reference: calcium-independent signal (usually 405-420 nm excitation), 1D array
        signal: calcium-dependent signal (usually 465-490 nm excitation for
                     green fluorescent proteins, or ~560 nm for red), 1D array
        smooth_win: window for moving average smooth, integer
        remove: the beginning of the traces with a big slope one would like to remove, integer
        Inputs for airPLS:
        lambd: parameter that can be adjusted by user. The larger lambda is,
                the smoother the resulting background, z
        porder: adaptive iteratively reweighted penalized least squares for baseline fitting
        itermax: maximum iteration times
        plot: if True, plots raw signal and dFF

    Output
        zdFF - z-score dF/F, 1D numpy array
    '''
    import numpy as np
    from sklearn.linear_model import Lasso
    raw_signal = signal
    # Smooth signal
    reference = smooth_signal(reference, smooth_win)
    signal = smooth_signal(signal, smooth_win)

    # Remove slope using airPLS algorithm
    r_base = airPLS(reference, lambda_=lambd, porder=porder, itermax=itermax)
    s_base = airPLS(signal, lambda_=lambd, porder=porder, itermax=itermax)

    # Remove baseline and the beginning of recording
    reference = (reference[remove:] - r_base[remove:])
    signal = (signal[remove:] - s_base[remove:])
    time = time[remove:]
    # Standardize signals
    reference = (reference - np.median(reference)) / np.std(reference)
    signal = (signal - np.median(signal)) / np.std(signal)

    # Align reference signal to calcium signal using non-negative robust linear regression
    lin = Lasso(alpha=0.0001, precompute=True, max_iter=1000,
                positive=True, random_state=9999, selection='random')
    n = len(reference)
    lin.fit(reference.reshape(n, 1), signal.reshape(n, 1))
    reference = lin.predict(reference.reshape(n, 1)).reshape(n, )

    # z dFF
    zdFF = (signal - reference)

    # Optional plotting of raw signal vs dFF
    if plot:
        plt.figure(figsize=(10, 5))
        # time = np.arange(len(signal))  # assuming time in units of sample index

        # Plot raw reference and signal
        plt.subplot(2, 1, 1)
        # plt.plot(time, reference, label='Reference (Calcium-independent)', color='purple', alpha=0.6)
        plt.plot(time, raw_signal, label='Signal (Calcium-dependent)', color='green', alpha=0.6)
        plt.legend()
        plt.xlabel('Time (samples)')
        plt.ylabel('Normalized Signal')
        plt.title('Raw Signal vs Reference')
        plt.axvline(x=90, color='red', linestyle='--', label='Removed')
        # Plot zdFF
        plt.subplot(2, 1, 2)
        plt.plot(time, zdFF, label='z-score dF/F', color='blue')
        plt.xlabel('Time (samples)')
        plt.ylabel('zdFF')
        plt.legend()
        plt.title('z-score dF/F')
        plt.axvline(x=90, color='red', linestyle='--', label='Removed')
        plt.axvline(x=91, color='red', linestyle='--', label='Removed')
        plt.tight_layout()
        plt.show()

    return zdFF


def smooth_signal(x, window_len=10, window='flat'):
    """smooth the data using a window with requested size.

    This method is based on the convolution of a scaled window with the signal.
    The signal is prepared by introducing reflected copies of the signal
    (with the window size) in both ends so that transient parts are minimized
    in the begining and end part of the output signal.
    The code taken from: https://scipy-cookbook.readthedocs.io/items/SignalSmooth.html

    input:
        x: the input signal
        window_len: the dimension of the smoothing window; should be an odd integer
        window: the type of window from 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'
                'flat' window will produce a moving average smoothing.

    output:
        the smoothed signal
    """

    import numpy as np

    if x.ndim != 1:
        raise (ValueError, "smooth only accepts 1 dimension arrays.")

    if x.size < window_len:
        raise (ValueError, "Input vector needs to be bigger than window size.")

    if window_len < 3:
        return x

    if not window in ['flat', 'hanning', 'hamming', 'bartlett', 'blackman']:
        raise (ValueError, "Window is one of 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'")

    s = np.r_[x[window_len - 1:0:-1], x, x[-2:-window_len - 1:-1]]

    if window == 'flat':  # Moving average
        w = np.ones(window_len, 'd')
    else:
        w = eval('np.' + window + '(window_len)')

    y = np.convolve(w / w.sum(), s, mode='valid')

    return y[(int(window_len / 2) - 1):-int(window_len / 2)]


'''
airPLS.py Copyright 2014 Renato Lombardo - renato.lombardo@unipa.it
Baseline correction using adaptive iteratively reweighted penalized least squares

This program is a translation in python of the R source code of airPLS version 2.0
by Yizeng Liang and Zhang Zhimin - https://code.google.com/p/airpls

Reference:
Z.-M. Zhang, S. Chen, and Y.-Z. Liang, Baseline correction using adaptive iteratively 
reweighted penalized least squares. Analyst 135 (5), 1138-1146 (2010).

Description from the original documentation:
Baseline drift always blurs or even swamps signals and deteriorates analytical 
results, particularly in multivariate analysis.  It is necessary to correct baseline 
drift to perform further data analysis. Simple or modified polynomial fitting has 
been found to be effective in some extent. However, this method requires user 
intervention and prone to variability especially in low signal-to-noise ratio 
environments. The proposed adaptive iteratively reweighted Penalized Least Squares
(airPLS) algorithm doesn't require any user intervention and prior information, 
such as detected peaks. It iteratively changes weights of sum squares errors (SSE) 
between the fitted baseline and original signals, and the weights of SSE are obtained 
adaptively using between previously fitted baseline and original signals. This 
baseline estimator is general, fast and flexible in fitting baseline.


LICENCE
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU Lesser General Public License for more details.

You should have received a copy of the GNU Lesser General Public License
along with this program.  If not, see <http://www.gnu.org/licenses/>
'''

import numpy as np
from scipy.sparse import csc_matrix, eye, diags
from scipy.sparse.linalg import spsolve


def WhittakerSmooth(x, w, lambda_, differences=1):
    '''
    Penalized least squares algorithm for background fitting

    input
        x: input data (i.e. chromatogram of spectrum)
        w: binary masks (value of the mask is zero if a point belongs to peaks and one otherwise)
        lambda_: parameter that can be adjusted by user. The larger lambda is,
                 the smoother the resulting background
        differences: integer indicating the order of the difference of penalties

    output
        the fitted background vector
    '''
    X = np.matrix(x)
    m = X.size
    i = np.arange(0, m)
    E = eye(m, format='csc')
    D = E[1:] - E[:-1]  # numpy.diff() does not work with sparse matrix. This is a workaround.
    W = diags(w, 0, shape=(m, m))
    A = csc_matrix(W + (lambda_ * D.T * D))
    B = csc_matrix(W * X.T)
    background = spsolve(A, B)
    return np.array(background)


def airPLS(x, lambda_=100, porder=1, itermax=15):
    '''
    Adaptive iteratively reweighted penalized least squares for baseline fitting

    input
        x: input data (i.e. chromatogram of spectrum)
        lambda_: parameter that can be adjusted by user. The larger lambda is,
                 the smoother the resulting background, z
        porder: adaptive iteratively reweighted penalized least squares for baseline fitting

    output
        the fitted background vector
    '''
    m = x.shape[0]
    w = np.ones(m)
    for i in range(1, itermax + 1):
        z = WhittakerSmooth(x, w, lambda_, porder)
        d = x - z
        dssn = np.abs(d[d < 0].sum())
        if (dssn < 0.001 * (abs(x)).sum() or i == itermax):
            if (i == itermax): print('WARING max iteration reached!')
            break
        w[d >= 0] = 0  # d>0 means that this point is part of a peak, so its weight is set to 0 in order to ignore it
        w[d < 0] = np.exp(i * np.abs(d[d < 0]) / dssn)
        w[0] = np.exp(i * (d[d < 0]).max() / dssn)
        w[-1] = w[0]
    return z
def preprocesFiberSignal(signal, refSignal, time, trialInfos,experiment, plots, sampling_rate=60, highpass_freq=0.001, lowpass_freq=10):
    from scipy.signal import butter, filtfilt
    from scipy.stats import linregress
    from scipy.optimize import curve_fit, minimize
    from sklearn.preprocessing import MinMaxScaler
    def double_exponential(t, const, amp_fast, amp_slow, tau_slow, tau_multiplier):
        '''Compute a double exponential function with constant offset.
        Parameters:
        t       : Time vector in seconds.
        const   : Amplitude of the constant offset.
        amp_fast: Amplitude of the fast component.
        amp_slow: Amplitude of the slow component.
        tau_slow: Time constant of slow component in seconds.
        tau_multiplier: Time constant of fast component relative to slow.
        '''
        tau_fast = tau_slow * tau_multiplier
        return const + amp_slow * np.exp(-t / tau_slow) + amp_fast * np.exp(-t / tau_fast)

    def sliding_average(signal, window_size):
        """Compute the sliding average of a signal."""
        return np.convolve(signal, np.ones(window_size) / window_size, mode='same')
    def logarithmic(t, a, b, c, d):
        return a * np.log(b * t + c) + d
    def polynomial_fit(t, signal, degree=2):
        # Fit a polynomial of the specified degree to the signal
        p = np.polyfit(t, signal, degree)
        return np.polyval(p, t)
    #low pass filter the signal
    # low pass filter the signal
    b_value, a_value = butter(2, 10, btype='low', fs=60)

    signal_denoised = filtfilt(b_value, a_value, signal)
    ref_denoised = filtfilt(b_value, a_value, refSignal)

    max_sig = np.max(signal_denoised)

    # use polynomial fit to correct for the exponential fit (detrend, remove bleaching effect)
    # signal_expfit = polynomial_fit(time, signal_denoised, degree=16)
    # ref_expfit = polynomial_fit(time, ref_denoised, degree=16)

    # Possible new initial parameter guesses based on signal characteristics
    max_sig = np.max(signal_denoised)
    min_sig = np.min(signal_denoised)
    initial_params = [min_sig, (max_sig - min_sig) / 10, (max_sig - min_sig) / 20, 600, 0.1]
    bounds = ([0, 0, 0, 100, 0.01], [max_sig, max_sig / 2, max_sig / 4, 1200, 1])

    # # Refit with adjusted parameters
    # signal_parms, parm_cov = curve_fit(double_exponential, time, signal_denoised, p0=initial_params, bounds=bounds,
    #                                    maxfev=10000)
    # signal_expfit = double_exponential(time, *signal_parms)
    # ref_parms, parm_cov = curve_fit(double_exponential, time, ref_denoised, p0=initial_params, bounds=bounds,
    #                                 maxfev=5000)
    # ref_expfit = double_exponential(time, *ref_parms)
    # Fit double exponential to signal and reference signal
    # inital_params = [max_sig / 2, max_sig / 4, max_sig / 4, 3600, 0.1]
    # bounds = ([0, 0, 0, 600, 0],
    #           [max_sig, max_sig, max_sig, 36000, 1])
    # signal_parms, parm_cov = curve_fit(double_exponential, time, signal_denoised, p0=inital_params, bounds=bounds,
    #                                    maxfev=1000)
    # signal_expfit = double_exponential(time, *signal_parms)
    # ref_parms, parm_cov = curve_fit(double_exponential, time, ref_denoised, p0=inital_params, bounds=bounds,
    #                                 maxfev=1000)
    # ref_expfit = double_exponential(time, *ref_parms)





    # High pass filter the signal
    b_h, a_h = butter(2, 0.0001, btype='high', fs=sampling_rate)
    signal_highpass = filtfilt(b_h, a_h, signal_denoised, padtype='even')
    ref_highpass = filtfilt(b_h, a_h, ref_denoised, padtype='even')

    signal_expfit = polynomial_fit(time, signal_highpass, degree=16)
    ref_expfit = polynomial_fit(time, ref_highpass, degree=16)

    # correct for the exponential fit (detrend, remove bleaching effect)
    signal_detrended = signal_denoised - signal_expfit
    ref_detrended = ref_denoised - ref_expfit
    # Normalize the signal using linear regression (how much the ref signal explains the signal)
    slope, intercept, r_value, p_value, std_err = linregress(x=ref_detrended, y=signal_detrended)
    # use the prediction y= mx + b as estimate of the motion artifact
    sigMotion_est_motion = intercept + slope * ref_detrended
    sigCorrected = signal_detrended - sigMotion_est_motion
    dF_F=(signal_highpass-signal_expfit)/signal_expfit
    # F0=sliding_average(sigCorrected, 60)
    # F0 = polynomial_fit(time, sigCorrected, degree=2)
    # # F0=abs(F0)
    F0 = np.mean(sigCorrected)
    dF_F_zscore = (dF_F-np.mean(dF_F))/np.std(dF_F)
    # pdb.set_trace()

    preProDict = {'signal': signal, 'refSignal': refSignal, 'time': time, 'sampling_rate': sampling_rate,
                  'signalDenoised': signal_denoised, 'signalExpFit': signal_expfit, 'refDenoised': ref_denoised,
                  'refExpFit': ref_expfit, 'signalDetrended': signal_detrended, 'refDetrended': ref_detrended,
                  'signalHighpass': signal_highpass, 'refHighpass': ref_highpass, 'slope': slope,
                  'intercept': intercept, 'r_value': r_value, 'p_value': p_value, 'std_err': std_err,
                  'sigMotion_est_motion': sigMotion_est_motion, 'sigCorrected': sigCorrected, 'F0':F0, 'dFF': dF_F, 'dFF_zscore':dF_F_zscore}

    # generate the visualization
    if plots:
        cPV.plotFiberSignal(trialInfos, experiment, preProDict)
    return dF_F_zscore


def preprocessFiberSignalWithBiexponential(signal, refSignal, time, trialInfos, experiment, plots, sampling_rate=60,
                                           highpass_freq=0.0001, lowpass_freq=10):
    from scipy.signal import butter, filtfilt
    from scipy.optimize import curve_fit
    import numpy as np
    from scipy.stats import linregress
    def biexponential(t, const, amp_fast, amp_slow, tau_slow, tau_multiplier):
        '''Compute a biexponential function with constant offset.'''
        tau_fast = tau_slow * tau_multiplier
        return const + amp_slow * np.exp(-t / tau_slow) + amp_fast * np.exp(-t / tau_fast)

    # Low-pass filter the signals
    b_value, a_value = butter(2, lowpass_freq, btype='low', fs=sampling_rate)
    signal_denoised = filtfilt(b_value, a_value, signal)
    ref_denoised = filtfilt(b_value, a_value, refSignal)

    # b_h, a_h = butter(4, highpass_freq, btype='high', fs=sampling_rate)
    # signal_highpass = filtfilt(b_h, a_h, signal_denoised, padtype='even')
    #
    # ref_highpass = filtfilt(b_h, a_h, ref_denoised, padtype='even')

    # Fit reference signal to a biexponential decay
    max_ref = np.max(ref_denoised)
    min_ref = np.min(ref_denoised)
    max_signal = np.max(signal_denoised)
    min_signal = np.min(signal_denoised)

    initial_params = [min_ref, (max_ref - min_ref) / 10, (max_ref - min_ref) / 20, 600, 0.1]
    bounds = ([0, 0, 0, 100, 0.01], [max_ref, max_ref / 2, max_ref / 4, 1200, 1])

    ref_parms, _ = curve_fit(biexponential, time, ref_denoised, p0=initial_params, bounds=bounds, maxfev=10000)
    ref_expfit = biexponential(time, *ref_parms)
    ref_denoinised_scaled = ref_denoised * (max_signal - min_signal) / (max_ref - min_ref)
    ref_expfit_scaled = ref_expfit * (max_signal - min_signal) / (max_ref - min_ref)
    # Linearly scale the biexponential decay of the reference to the raw signal of interest


    initial_params_sig = [min_signal, (max_signal - min_signal) / 10, (max_signal - min_signal) / 20, 600, 0.1]
    bounds_sig = ([0, 0, 0, 100, 0.01], [max_signal, max_signal / 2, max_signal / 4, 1200, 1])

    sig_parms, _ = curve_fit(biexponential, time, signal_denoised, p0=initial_params_sig, bounds=bounds_sig, maxfev=10000)
    signal_expfit = biexponential(time, *sig_parms)
    signal_detrended = signal_denoised - signal_expfit
    ref_detrended = ref_denoinised_scaled - ref_expfit_scaled

    #Normalize the signal using linear regression (how much the ref signal explains the signal)
    slope, intercept, r_value, p_value, std_err = linregress(x=ref_detrended, y=signal_detrended)

    sigMotion_est_motion = intercept + slope * ref_expfit_scaled
    sigCorrected = signal_detrended - sigMotion_est_motion
    # High-pass filter the signal

    # Apply linear fit
    slope, intercept = np.polyfit(ref_expfit_scaled, sigCorrected, 1)
    sig_fit = intercept + slope * ref_expfit_scaled

    # Calculate delta F/F
    dF_F = (sigCorrected - sig_fit) / sig_fit

    # Store results in dictionary
    preProDict = {'signal': signal, 'refSignal': refSignal, 'time': time, 'sampling_rate': sampling_rate,
                  'signalDenoised': signal_denoised, 'signalExpFit': signal_expfit, 'refDenoised': ref_denoised,
                  'refExpFit': ref_expfit, 'signalDetrended': signal_detrended,
                  # 'signalHighpass': signal_highpass, 'refHighpass': ref_highpass, 'slope': slope,
                  'intercept': intercept, 'r_value': r_value, 'p_value': p_value, 'std_err': std_err,
                  'sigMotion_est_motion': sigMotion_est_motion, 'sigCorrected': sigCorrected, 'dFF': dF_F}

    # Generate the visualization
    if plots:
        cPV.plotFiberSignal(trialInfos,experiment, preProDict)

    return preProDict
def process_trialDualVersion(trial_path, chanDict, eventChanId):
    data={}
    # Load the data from the file
    print(trial_path)
    f= h5py.File(trial_path, 'r')
    DecimactionFac = f['Configurations']['FPConsole']['SavingSettings'].attrs['DecimationFactor']
    sampling_rateFilter = f['Configurations']['FPConsole']['AIN02']['Settings'].attrs['GlobalSampleRate']
    # greenChan = {'folder': 'LockInAOUT02', 'chan': 'AIN02', 'refFolder': 'LockInAOUT01', 'refChan': 'AIN02'}
    # redChan = {'folder': 'LockInAOUT03', 'chan': 'AIN03', 'refFolder': 'LockInAOUT01', 'refChan': 'AIN03'}
    # chanDict = {'green': greenChan, 'red': redChan}
    try:
        greenChanSignal = f['DataAcquisition']['FPConsole']['Signals']['Series0001'][chanDict['green']['folder']][chanDict['green']['chan']][()]
        redChanSignal = f['DataAcquisition']['FPConsole']['Signals']['Series0001'][chanDict['red']['folder']][
            chanDict['red']['chan']][()]
    except:
        pdb.set_trace()
    redRefSignal = f['DataAcquisition']['FPConsole']['Signals']['Series0001'][chanDict['red']['refFolder']][
            chanDict['red']['refChan']][()]
    greenRefSignal = f['DataAcquisition']['FPConsole']['Signals']['Series0001'][chanDict['green']['refFolder']][
        chanDict['green']['refChan']][()]
    Time = f['DataAcquisition']['FPConsole']['Signals']['Series0001'][chanDict['green']['refFolder']]['Time'][()]


    # Calculate differences between consecutive timestamps
    time_diffs = np.diff(Time)

    # Average sampling interval
    avg_sampling_interval = np.mean(time_diffs)

    # Calculate sampling rate
    sampling_rate = 1 / avg_sampling_interval
    data['sampling_rate_filter'] = sampling_rateFilter
    data['greenChanSignal'] = greenChanSignal
    data['greenRefSignal'] = greenRefSignal
    data['decimationFactor'] = DecimactionFac
    data['redChanSignal'] = redChanSignal
    data['redRefSignal'] = redRefSignal

    data['Time'] = Time
    data['sampling_rate'] = sampling_rate

    if eventChanId is not None:
        event = f['DataAcquisition']['FPConsole']['Events']['Series0001'][eventChanId][()]
        eventIdx = (f['DataAcquisition']['FPConsole']['Events']['Series0001'][eventChanId].attrs['Index'])/1000
        data['event'] = event



    return data

def process_trialFlexVersion(trial_path, chanDict, eventChanId):
    data={}
    # Load the data from the file
    print(trial_path)
    f= h5py.File(trial_path, 'r')

    sampling_rateFilter = f['Configurations']['FPConsole']['AIN02']['Settings'].attrs['GlobalSampleRate']
    # greenChan = {'folder': 'LockInAOUT02', 'chan': 'AIN02', 'refFolder': 'LockInAOUT01', 'refChan': 'AIN02'}
    # redChan = {'folder': 'LockInAOUT03', 'chan': 'AIN03', 'refFolder': 'LockInAOUT01', 'refChan': 'AIN03'}
    # chanDict = {'green': greenChan, 'red': redChan}
    nChan = len(chanDict)
    chanId= list(chanDict.keys())
    for c, chan in enumerate(chanId):
        chanSignalName=f'{chan}ChanSignal'
        refSignalName = f'{chan}RefSignal'
        data[chanSignalName] = f['DataAcquisition']['FPConsole']['Signals']['Series0001'][chanDict[chan]['folder']][chanDict[chan]['chan']][()]
    for c, chan in enumerate(chanId):
        chanSignalName = f'{chan}ChanSignal'
        try:
            data[chanSignalName] = f['DataAcquisition']['FPConsole']['Signals']['Series0001'][chanDict[chan]['folder']][chanDict[chan]['chan']][()]
            data[refSignalName] = f['DataAcquisition']['FPConsole']['Signals']['Series0001'][chanDict[chan]['refFolder']][chanDict[chan]['refChan']][()]
            data['Time'] = f['DataAcquisition']['FPConsole']['Signals']['Series0001'][chanDict[chan]['refFolder']]['Time'][()]

        except Exception as e:
            print(e)
            pdb.set_trace()
        # redRefSignal = f['DataAcquisition']['FPConsole']['Signals']['Series0001'][chanDict['red']['refFolder']][
        #         chanDict['red']['refChan']][()]
        # greenRefSignal = f['DataAcquisition']['FPConsole']['Signals']['Series0001'][chanDict['green']['refFolder']][
        #     chanDict['green']['refChan']][()]
        # Time = f['DataAcquisition']['FPConsole']['Signals']['Series0001'][chanDict['green']['refFolder']]['Time'][()]


    # Calculate differences between consecutive timestamps
    time_diffs = np.diff(data['Time'])

    # Average sampling interval
    avg_sampling_interval = np.mean(time_diffs)

    # Calculate sampling rate
    sampling_rate = 1 / avg_sampling_interval
    data['sampling_rate_filter'] = sampling_rateFilter

    data['sampling_rate'] = sampling_rate

    if eventChanId is not None:
        event = f['DataAcquisition']['FPConsole']['Events']['Series0001'][eventChanId][()]
        eventIdx = (f['DataAcquisition']['FPConsole']['Events']['Series0001'][eventChanId].attrs['Index'])/1000
        data['event'] = event



    return data