-
Notifications
You must be signed in to change notification settings - Fork 0
/
minimumFunction.R
67 lines (56 loc) · 2.21 KB
/
minimumFunction.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
### Minimum
#### Libraries ####
install.packages("naivebayes")
library(naivebayes)
Mushrooms <- read.csv("https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data", header=FALSE, sep=",", dec=".", na.strings=c("?"))
summary(Mushrooms)
##### Declare Variables
no_observations <- dim(Mushrooms)[1] # No. observations (8124)
no_predictors <- dim(Mushrooms)[2] - 1 # No. predictors (22) = No. variables (23) - dependent var. (1st column)
test_index <- sample(no_observations, size=as.integer(no_observations*0.2), replace=FALSE) #20% data for test
training_index <- -test_index # Remaining 80% data observations for training
measurevar <- "V1"
preds <- colnames(Mushrooms)[2:length(Mushrooms)] #Create list containing predictors
#FUNCTION 1 : MeanError
meanError <- function(dataset, model) {
set.seed(0)
error <- 0
#for (i in 2:length(Mushrooms)){
for (i in 1:10){
test_index <- sample(no_observations, size=as.integer(no_observations*0.2), replace=FALSE) # 20% data for test
training_index <- -test_index # Remaining 80% data observations for training
Pred_class <- predict(model, newdata = dataset[test_index, ])
tab <- table(Pred_class, dataset[test_index,"V1"])
accuracy <- sum(diag(tab))/sum(tab)
error <- error + (1 - accuracy)
#featuresA1 <- c(featuresA1, i)
}
(error <- error/10)
print(error)
}
#FUNCTION 2: FIND MINIMUM COLUMN NAME
############ FIND MINIMUM / CALCULATE ERROR ###########
minimumCol <- function(predictorList)
{
featuresMinimum <- c()
for (x in 1:no_predictors)
{
#print(x)
measurevar <- "V1"
pred <- predictorList[[x]]
# This creates the appropriate string:
paste(measurevar, paste(pred, collapse=" + "), sep=" ~ ")
formula <- as.formula(paste(measurevar, paste(pred, collapse=" + "), sep=" ~ "))
#print(formula)
NaiveBayesModel <- naive_bayes(formula , data = Mushrooms[training_index, ])
errorResult <- meanError(Mushrooms, NaiveBayesModel)
featuresMinimum <- c(featuresMinimum, errorResult)
minimumIndex <- which.min(featuresMinimum)
minimum <- preds[[minimumIndex]]
#print(x)
#print(minimum)
#return(errorResult)
}
return(minimum)
}
minimumCol(preds)