extract_ethovision_raw_columns.py

import os
import pandas as pd
import re
import pdb
# Folder containing the files
folder_path = r'C:\Analysis\fiberPhotometry\rawData\07-10-2024-sert-dopamine-release\velocities\half'


# Function to process each file
def process_file(file_path, trial_number):
    # Load the Excel file
    df = pd.read_excel(file_path, header=[40])
    #delete the first 40 rows
    # df = df.drop(df.index[:40])
    # pdb.set_trace()
    # Replace '-' with 0
    df.replace('-', 0, inplace=True)

    # # Drop line 42 (containing "cm/s" or "s") if it exists
    # if len(df) > 41:
    df.drop(index=0, inplace=True)
    #reset index
    df.reset_index(drop=True, inplace=True)

    # Extract only the relevant columns
    velocity = df['Velocity'] if 'Velocity' in df.columns else None
    recording = df['Recording time'] if 'Recording time' in df.columns else None
    # pdb.set_trace()
    if velocity is not None and recording is not None:
        # Create a new DataFrame with only 'Velocity' and 'Recording' columns
        result_df = df[['Velocity', 'Recording time']]

        # Save the new DataFrame to CSV
        output_filename = f'trial_{trial_number:04}.csv'
        output_path = os.path.join(folder_path, output_filename)
        result_df.to_csv(output_path, index=False)
        print(f"Processed and saved: {output_path}")
    else:
        print(f"Missing 'Velocity' or 'Recording' columns in {file_path}")


# Loop through all files in the folder
for filename in os.listdir(folder_path):
    if filename.startswith("Raw data") and filename.endswith(".xlsx"):
        # Extract trial number from filename
        match = re.search(r'Trial\s+(\d+)', filename)
        if match:
            trial_number = int(match.group(1))
            file_path = os.path.join(folder_path, filename)
            process_file(file_path, trial_number)