-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcaret all reg.R
134 lines (114 loc) · 4.24 KB
/
caret all reg.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
# title : caret all regression.R
# purpose : Run a list of caret regression models in parallel and compare R2 and RMSE
# producer : A. Chinilin
# address : Moscow. RSAU-MTAA
# load all libraries
library(doParallel)
library(caret)
library(dplyr)
library(DT)
models <- c("glmnet", "knn",
"pcaNNet", "pcr", "pls", "ranger", "qrf",
"cubist", "svmRadial", "xgbTree")
cl <- makeCluster(detectCores())
registerDoParallel(cl)
# compile cross-validation settings
ctrl1 <- trainControl(method = "repeatedcv", number = 5,
repeats = 10, allowParallel = TRUE)
# use lapply/loop to run everything
l <- lapply(models, function(i)
{cat("----------------------------------------------------","\n");
set.seed(1234); cat(i," <- done\n");
t <- train(C~., data = FD.spectra, (i), trControl = ctrl1,
preProcess = c("center", "scale"),
metric = "RMSE")
}
)
# use lapply to print the results
results <- lapply(1:length(l), function(i)
{cat(sprintf("%-20s",(models[i])));
cat(round(l[[i]]$results$Rsquared[which.min(l[[i]]$results$RMSE)],4),"\t");
cat(round(l[[i]]$results$RMSE[which.min(l[[i]]$results$RMSE)],4),"\t")
cat(l[[i]]$times$everything[3],"\n")
}
)
# stop the parallel processing and register sequential front-end
stopCluster(cl)
registerDoSEQ()
# preallocate data types
i = 1; MAX = length(l);
x1 <- character() # Name
x2 <- numeric() # R2
x3 <- numeric() # RMSE
x4 <- numeric() # time [s]
x5 <- character() # long model name
# fill data and check indexes and NA
for (i in 1:length(l)) {
x1[i] <- l[[i]]$method
x2[i] <- as.numeric(l[[i]]$results$Rsquared[which.min(l[[i]]$results$RMSE)])
x3[i] <- as.numeric(l[[i]]$results$RMSE[which.min(l[[i]]$results$RMSE)])
x4[i] <- as.numeric(l[[i]]$times$everything[3])
x5[i] <- l[[i]]$modelInfo$label
}
# coerce to data frame
df <- data.frame(x1,x2,x3,x4,x5, stringsAsFactors = FALSE)
# print all results to R-GUI
df
# call web browser output with sortable column names
datatable(df, options = list(
columnDefs = list(list(className = 'dt-left', targets = c(0,1,2,3,4,5))),
pageLength = MAX,
order = list(list(3, 'desc'))),
colnames = c('№', 'Name', 'R2', 'RMSE', 'time [s]', 'Model name'),
caption = paste('Regression results from "caret" list models'),
class = 'cell-border stripe') %>%
formatRound('x2', 3) %>%
formatRound('x3', 3) %>%
formatRound('x4', 3) %>%
formatStyle(3,
background = styleColorBar(x3, 'steelblue'),
backgroundSize = '100% 90%',
backgroundRepeat = 'no-repeat',
backgroundPosition = 'center',
fontWeight = 'bold'
)
# compile models and compare perfomance
model_list <- list(GLMNET = l[[1]], KNN = l[[2]], pcaNNet = l[[3]],
PCR = l[[4]], PLSR = l[[5]], RF = l[[6]],
QRF = l[[7]], Cubist = l[[8]], SVM = l[[9]], XGB = l[[10]])
results <- resamples(model_list)
summary(results)
# boxplot comparing results
bwplot(results, layout = c(3, 1)) # RMSE, MSE and R-squared
#-----------------------------------------------------------------------------#
# Run a list cross-validation methods with PCR method
cl <- makeCluster(detectCores())
registerDoParallel(cl)
# define all cross-validation methods
cvMethods <- c("boot", # bootstrap
"boot632", # 0.632 bootstrap
"LGOCV", # leave-one-group cross validation, variant of LOOCV for hierarchical data
"LOOCV", # leave-one-out cross validation, also known as jacknife
"cv", # cross validation
"repeatedcv" # repeated n-fold cross validation
)
# use R lapply function to loop through all CV methos with qrf
all <- lapply(cvMethods, function(x)
{set.seed(1234); print(x); tc <- trainControl(method=(x))
fit1 <- train(C~., data = FD.spectra,
preProcess = c("center", "scale"),
trControl = tc,
method = "pls") # may choose any of possible regression models
}
)
# stop cluster
stopCluster(cl)
registerDoSEQ()
# extract the used cvMethods
myNames <- lapply(1:6, function(x) all[[x]]$control$method)
# save results
results <- sapply(all, getTrainPerf)
# change column Names to cv methods
colnames(results) <- myNames
# get the results
results