-
Notifications
You must be signed in to change notification settings - Fork 0
/
Quiz3.R
97 lines (59 loc) · 2.15 KB
/
Quiz3.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
library(AppliedPredictiveModeling)
data(segmentationOriginal)
library(caret)
set.seed(125)
training <- segmentationOriginal[ segmentationOriginal$Case == "Train", ]
testing <- segmentationOriginal[ segmentationOriginal$Case == "Test", ]
Mod <- train(Class ~ ., method = "rpart", data = training)
print(Mod$finalModel)
plot(Mod$finalModel, uniform = TRUE, main = "Classification Tree")
text(Mod$finalModel, use.n = TRUE, all = TRUE, cex = 0.8)
library(rattle)
library(rpart)
library(rpart.plot)
fancyRpartPlot(Mod$finalModel)
predict(Mod, )
names(segmentationOriginal)
# Question 3
library(tree)
library(pgmm)
data(olive)
olive <- olive[ , -1]
newdata <- as.data.frame(t(colMeans(olive)))
# Converting Area to a factor variable changes the output
# It allows you to use type = "class" to predict an integral Area value (3)
# Leaving Area as a numeric variable results in a prediction of 2.875
olive$Area = factor(olive$Area)
Mod <- tree(Area ~ ., data = olive)
predict(Mod, newdata)
plot(Mod, uniform = TRUE, main = "Classification Tree")
text(Mod, all = TRUE, cex = 0.8)
# Question 4
library(ElemStatLearn)
data(SAheart)
set.seed(8484)
train <- sample(1:dim(SAheart)[1], size = dim(SAheart)[1]/2, replace = F)
trainSA <- SAheart[train,]
testSA <- SAheart[-train,]
set.seed(13234)
ModO <- glm(chd ~ age + alcohol + obesity + tobacco + typea + ldl,
data = trainSA, family = "binomial")
missClass <- function(values, prediction) {
sum(((prediction > 0.5)*1) != values)/length(values)
}
# missclassification rate for test set
missClass(testSA$chd, predict(ModO, testSA, type = "response"))
# missclassification rate for training set
missClass(trainSA$chd, predict(ModO,trainSA, type = "response"))
# Question 5
library(ElemStatLearn)
data(vowel.train)
data(vowel.test)
vowel.train$y <- factor(vowel.train$y)
vowel.test$y <- factor(vowel.test$y)
set.seed(33833)
# ModX <- train(y ~ ., data = vowel.train, method = "rf", prox = TRUE)
ModY <- randomForest(y ~ ., data = vowel.train, mtry = 3,
importance = TRUE, na.action = na.omit)
ModY <- randomForest(y ~ ., data = vowel.train)
varImp(ModY, useModel = TRUE)