forked from navonacalarco/Slicer
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path09_combineFiberMeasurements.Rmd
80 lines (59 loc) · 2.59 KB
/
09_combineFiberMeasurements.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#This script is used to pull in csvs that are output from Slicer, and extra relevant values
```{r setup}
#libraries
library('data.table')
library('tidyverse')
library('dplyr')
library('knitr')
library('stringr')
library('stringi')
library('varhandle')
#note: I first first moved all csvs from the 3 directory (commissural, left, right) into a single directory, in bash.
#cd /projects/ncalarco/thesis/SPINS/Slicer/data/registered/FiberMeasurements
#set working directory like this, so it doesn't change in each chunk!
opts_knit$set(root.dir = '/mnt/tigrlab/projects/ncalarco/thesis/SPINS/Slicer/txt_outputs/05_FiberMeasurements_csvs')
```
```{r}
#create a vector of the csvs in our directory
csvs <- list.files(path='/projects/ncalarco/thesis/SPINS/Slicer/txt_outputs/05_FiberMeasurements_csvs', pattern="*.csv") #have 1335
#make sure that the 'index for rbind will be the filename (and not list index)
csvs <- setNames(csvs, csvs)
#read the csv files in as a single list
csv_list = lapply(csvs, read.delim)
#combine the list elements into a single df, and make a filename column
df <- rbindlist(csv_list, idcol = "index", fill=TRUE)
#make a unique column for subject ID
df$participant_id <- str_sub(df$index, -15, -5)
#make a column for file type
df$hemisphere <- sapply(strsplit(df$index, "_"), function(x) x[2])
#make a column for tract
spl <- strsplit(as.character(df$Name), "/") #split at last slash
df$region <- sapply(lapply(spl, tail, 1), paste, collapse="/")
df$region <- gsub(".vtp ", "" , df$region) #remove file extension
#remove 'index' and 'Name' variable
df <- df[, -c('index', 'Name')]
#reorder variables for clarity
df <- as.data.frame(df[, c("participant_id",
"hemisphere",
"region",
"Num_Points",
"Num_Fibers",
"Mean_Length",
"cluster_idx.Mean",
"tensors.FractionalAnisotropy.Mean",
"tensors.LinearMeasure.Mean",
"tensors.MaxEigenvalue.Mean",
"tensors.MeanDiffusivity.Mean",
"tensors.MidEigenvalue.Mean",
"tensors.MinEigenvalue.Mean",
"tensors.PlanarMeasure.Mean",
"tensors.RelativeAnisotropy.Mean",
"tensors.SphericalMeasure.Mean",
"tensors.Trace.Mean")])
#make sure that all the numeric columns are actually numeric
df[,6:17] <- lapply(df[,6:17],as.character); df[,6:17] <- lapply(df[,6:17],as.numeric)
#if row contains all NAN, delete that row: not part of the region
df <- df[complete.cases(df),]
#write.csv
write.csv(df, paste0('/projects/ncalarco/thesis/SPINS/Slicer/txt_outputs/05_FiberMeasurements_', Sys.Date(), '.csv', sep=''), row.names = F)
```