09_combineFiberMeasurements.Rmd

#This script is used to pull in csvs that are output from Slicer, and extra relevant values

```{r setup}

#libraries
library('data.table')
library('tidyverse')
library('dplyr')
library('knitr')
library('stringr')
library('stringi')
library('varhandle')

#note: I first first moved all csvs from the 3 directory (commissural, left, right) into a single directory, in bash.
#cd /projects/ncalarco/thesis/SPINS/Slicer/data/registered/FiberMeasurements

#set working directory like this, so it doesn't change in each chunk!
opts_knit$set(root.dir = '/mnt/tigrlab/projects/ncalarco/thesis/SPINS/Slicer/txt_outputs/05_FiberMeasurements_csvs')

```

```{r}

#create a vector of the csvs in our directory
csvs <- list.files(path='/projects/ncalarco/thesis/SPINS/Slicer/txt_outputs/05_FiberMeasurements_csvs', pattern="*.csv") #have 1335

#make sure that the 'index for rbind will be the filename (and not list index)
csvs <- setNames(csvs, csvs)

#read the csv files in as a single list
csv_list = lapply(csvs, read.delim)

#combine the list elements into a single df, and make a filename column
df <- rbindlist(csv_list, idcol = "index", fill=TRUE)

#make a unique column for subject ID
df$participant_id <- str_sub(df$index, -15, -5)

#make a column for file type
df$hemisphere <- sapply(strsplit(df$index, "_"), function(x) x[2])

#make a column for tract
spl <- strsplit(as.character(df$Name), "/") #split at last slash
df$region <- sapply(lapply(spl, tail, 1), paste, collapse="/")
df$region <- gsub(".vtp ", "" , df$region) #remove file extension

#remove 'index' and 'Name' variable  
df <- df[, -c('index', 'Name')]

#reorder variables for clarity
df <- as.data.frame(df[, c("participant_id",                   
"hemisphere",
"region",
"Num_Points",                        
"Num_Fibers",                        
"Mean_Length",                       
"cluster_idx.Mean",                 
"tensors.FractionalAnisotropy.Mean",
"tensors.LinearMeasure.Mean",       
"tensors.MaxEigenvalue.Mean",        
"tensors.MeanDiffusivity.Mean",     
"tensors.MidEigenvalue.Mean",        
"tensors.MinEigenvalue.Mean",        
"tensors.PlanarMeasure.Mean",      
"tensors.RelativeAnisotropy.Mean",  
"tensors.SphericalMeasure.Mean",   
"tensors.Trace.Mean")])

#make sure that all the numeric columns are actually numeric
df[,6:17] <- lapply(df[,6:17],as.character); df[,6:17] <- lapply(df[,6:17],as.numeric)

#if row contains all NAN, delete that row: not part of the region 
df <- df[complete.cases(df),]

#write.csv
write.csv(df, paste0('/projects/ncalarco/thesis/SPINS/Slicer/txt_outputs/05_FiberMeasurements_', Sys.Date(), '.csv', sep=''), row.names = F) 

```