From 36b9d3d8489491785d5c4c7992bfedc99809df47 Mon Sep 17 00:00:00 2001 From: "Lauren St. Clair" Date: Sat, 23 Mar 2019 18:23:04 -0700 Subject: [PATCH 01/26] Create laurenedits.Rmd This is my first commit, testing it out now --- laurenedits.Rmd | 86 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 laurenedits.Rmd diff --git a/laurenedits.Rmd b/laurenedits.Rmd new file mode 100644 index 0000000..fb45c24 --- /dev/null +++ b/laurenedits.Rmd @@ -0,0 +1,86 @@ +--- +title: "R Notebook" +output: html_notebook +--- +```{r setup, include=FALSE} + +knitr::opts_chunk$set(echo = TRUE) +``` + +#laurenedits + +```{r} +data = read.csv("pokemon_alopez247.csv", header=T) +``` + +```{r} +summary(data) +``` + +```{r} +library(scatterplot3d) +attach(pokemon) +scatterplot3d(Defense,HP,hasGender, pch=16, highlight.3d=TRUE, + type="h", main="3D Scatterplot") +``` + +```{r} +library(scatterplot3d) +attach(pokemon) +scatterplot3d(HP,Defense,Pr_Male, pch=16, highlight.3d=TRUE, + type="h", main="3D Scatterplot") +``` + +```{r} +library(scatterplot3d) +attach(pokemon) +scatterplot3d(Defense,HP,Pr_Male, main="3D Scatterplot") +``` + +```{r} +library(gclus) +dta <- pokemon[c(5,6,7,8)] # data, numbers as column numbers +dta.r <- abs(cor(dta)) # correlation +dta.col <- dmat.color(dta.r) # colors +dta.o <- order.single(dta.r) +cpairs(dta, dta.o, panel.colors=dta.col, gap=.5, +main="Variables Ordered and Colored by Correlation" ) +``` + +```{r} +library(neuralnet) +attach(pokemon) +scale() +NN <- neuralnet(HP~Defense, data=pokemon, hidden = 0) +sum((predict(NN, data.frame(Defense))~HP)^2) +``` + +shreeves@alumni.ubc.ca + +Cannot do this: +NN <- neuralnet(HP~hasGender, data=pokemon, hidden = 0) + as it is not numeric, but is binary + +Cannot do this: +NN = neauralnet(Number ~ Name + Type_1 + Type_2 + Total + HP + Attack + Defense + Sp_Attk + Sp_Def + Speed + Generation + isLegendary + Color + hasGender + Pr_Male + Egg_Group_1 + Egg_Group_2 + hasMegaEvolution + Height_m + Weight_kg + Catch_Rate + Body_Style, data=pokemon, hidden = 0) +plot(NN) as these are non-numeric responses + +```{r} +library(neuralnet) +trainNN = scaled[index,] +testNN = scaled[-index,] +set.seed(2) +NN = neauralnet(Number ~ Name + Type_1 + Type_2 + Total + HP + Attack + Defense + Sp_Attk + Sp_Def + Speed + Generation + isLegendary + Color + hasGender + Pr_Male + Egg_Group_1 + Egg_Group_2 + hasMegaEvolution + Height_m + Weight_kg + Catch_Rate + Body_Style) +plot(NN) +``` + + +**ERROR +```{r} +pokemon <- read.csv("~/pokemon_alopez247.csv", stringsAsFactors = FALSE) +#remove NA +cleanPoke <- na.omit(pokemon) +pcPoke <- prcomp(as.matrix(cleanPoke[,c(6,8,16)]), scale. = TRUE) +summary(pcPoke) +``` + From 30c63faac931fc7a111a1b4faa7f4c4adbe4c318 Mon Sep 17 00:00:00 2001 From: "Lauren St. Clair" Date: Mon, 25 Mar 2019 10:05:54 -0700 Subject: [PATCH 02/26] Update laurenedits.Rmd --- laurenedits.Rmd | 59 +++++++++++++++++++++++++++++++------------------ 1 file changed, 38 insertions(+), 21 deletions(-) diff --git a/laurenedits.Rmd b/laurenedits.Rmd index fb45c24..4e5f574 100644 --- a/laurenedits.Rmd +++ b/laurenedits.Rmd @@ -47,40 +47,57 @@ cpairs(dta, dta.o, panel.colors=dta.col, gap=.5, main="Variables Ordered and Colored by Correlation" ) ``` +**Running into errors here, will double check with TA ```{r} library(neuralnet) -attach(pokemon) -scale() -NN <- neuralnet(HP~Defense, data=pokemon, hidden = 0) -sum((predict(NN, data.frame(Defense))~HP)^2) -``` - -shreeves@alumni.ubc.ca - -Cannot do this: -NN <- neuralnet(HP~hasGender, data=pokemon, hidden = 0) - as it is not numeric, but is binary - -Cannot do this: -NN = neauralnet(Number ~ Name + Type_1 + Type_2 + Total + HP + Attack + Defense + Sp_Attk + Sp_Def + Speed + Generation + isLegendary + Color + hasGender + Pr_Male + Egg_Group_1 + Egg_Group_2 + hasMegaEvolution + Height_m + Weight_kg + Catch_Rate + Body_Style, data=pokemon, hidden = 0) -plot(NN) as these are non-numeric responses - -```{r} -library(neuralnet) +pokemon <- read.csv("~/pokemon_alopez247.csv", stringsAsFactors = FALSE) +clean <- as.numeric(c(2, 3, 4, 12, 13, 14, 15, 16, 17, 18, 19, 23)) +cleanPoke <- na.omit(clean) +scaled <- scale(cleanPoke) trainNN = scaled[index,] testNN = scaled[-index,] -set.seed(2) +set.seed(27) NN = neauralnet(Number ~ Name + Type_1 + Type_2 + Total + HP + Attack + Defense + Sp_Attk + Sp_Def + Speed + Generation + isLegendary + Color + hasGender + Pr_Male + Egg_Group_1 + Egg_Group_2 + hasMegaEvolution + Height_m + Weight_kg + Catch_Rate + Body_Style) plot(NN) ``` -**ERROR ```{r} pokemon <- read.csv("~/pokemon_alopez247.csv", stringsAsFactors = FALSE) -#remove NA +#remove NA from dataset cleanPoke <- na.omit(pokemon) pcPoke <- prcomp(as.matrix(cleanPoke[,c(6,8,16)]), scale. = TRUE) summary(pcPoke) ``` +```{r} +biplot(pcPoke) +``` + +```{r} +plot(pcPoke$x[,1:2]) +``` + +```{r} +plot(pcPoke$x[,1:2], type="n") +text(pcPoke$x[,1], pcPoke$x[,2], labels = 1:nrow(pokemon)) +``` + +```{r} +round(pcPoke$rotation[,1:2], 2) +``` + +```{r} +pokemon[order(pcPoke$x[,1], decreasing=TRUE)[1:4], 1:3] +``` + +```{r} +test1 <- hclust(dist(scale(pokemon[,-c(2, 3, 4, 12, 13, 14, 15, 16, 17, 18, 19, 23)]))) +plot(test1) +test2 <- hclust(dist(pcPoke$x)) +plot(test2) +``` + +```{r} +all.equal(dist(scale(pokemon[,-c(2, 3, 4, 12, 13, 14, 15, 16, 17, 18, 19, 23)])), dist(pcPoke$x), check.attributes = FALSE) +``` \ No newline at end of file From 10faada461b6e26a270609afa47eb005a1fd9792 Mon Sep 17 00:00:00 2001 From: "Lauren St. Clair" Date: Mon, 25 Mar 2019 16:26:28 -0700 Subject: [PATCH 03/26] Update laurenedits.Rmd --- laurenedits.Rmd | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/laurenedits.Rmd b/laurenedits.Rmd index 4e5f574..01f5e41 100644 --- a/laurenedits.Rmd +++ b/laurenedits.Rmd @@ -47,18 +47,31 @@ cpairs(dta, dta.o, panel.colors=dta.col, gap=.5, main="Variables Ordered and Colored by Correlation" ) ``` -**Running into errors here, will double check with TA ```{r} -library(neuralnet) -pokemon <- read.csv("~/pokemon_alopez247.csv", stringsAsFactors = FALSE) -clean <- as.numeric(c(2, 3, 4, 12, 13, 14, 15, 16, 17, 18, 19, 23)) -cleanPoke <- na.omit(clean) -scaled <- scale(cleanPoke) -trainNN = scaled[index,] -testNN = scaled[-index,] -set.seed(27) -NN = neauralnet(Number ~ Name + Type_1 + Type_2 + Total + HP + Attack + Defense + Sp_Attk + Sp_Def + Speed + Generation + isLegendary + Color + hasGender + Pr_Male + Egg_Group_1 + Egg_Group_2 + hasMegaEvolution + Height_m + Weight_kg + Catch_Rate + Body_Style) -plot(NN) +set.seed(763) +library(tree) +trainindex <- sample(1:nrow(pokemon), 721) +proftrain <- salaries[trainindex, ] +proftest <- salaries[-trainindex, ] +predictTotal <- tree(Total~., data=proftrain) +plot(predictTotal) +text(predictTotal, pretty=0) +predict(predictTotal, proftest) +mean((predict(predictTotal, proftest) - proftest$Total)^2) +summary(predictTotal) +``` + + +```{r} +library(nnet) +library(NeuralNetTools) +set.seed(53747958) +numeric_col <- c(5:12, 16, 20:21) +pokemon[,numeric_col] <- scale(pokemon[,numeric_col]) +colnames(data)[22]<-"Pr_Male" +nnpokemon<-nnet(Pr_Male~., data = data, size = 1) +plotnet(nnpokemon) +mean(nnpokemon$residuals^2) ``` From 2711e8aa18fb30fa9a114558c3d084c512476805 Mon Sep 17 00:00:00 2001 From: "Lauren St. Clair" Date: Mon, 25 Mar 2019 22:04:23 -0700 Subject: [PATCH 04/26] Update laurenedits.Rmd --- laurenedits.Rmd | 49 +++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 39 insertions(+), 10 deletions(-) diff --git a/laurenedits.Rmd b/laurenedits.Rmd index 01f5e41..da05f41 100644 --- a/laurenedits.Rmd +++ b/laurenedits.Rmd @@ -17,13 +17,6 @@ data = read.csv("pokemon_alopez247.csv", header=T) summary(data) ``` -```{r} -library(scatterplot3d) -attach(pokemon) -scatterplot3d(Defense,HP,hasGender, pch=16, highlight.3d=TRUE, - type="h", main="3D Scatterplot") -``` - ```{r} library(scatterplot3d) attach(pokemon) @@ -61,17 +54,53 @@ mean((predict(predictTotal, proftest) - proftest$Total)^2) summary(predictTotal) ``` +Neural net testing isLegendary w/ all +```{r} +library(nnet) +library(NeuralNetTools) +set.seed(53747958) +numeric_col <- c(5:12, 16, 20:21) +pokemon[,numeric_col] <- scale(pokemon[,numeric_col]) +colnames(data)[22]<-"isLegendary" +nnpokemon<-nnet(isLegendary~., data = data, size = 1) +plotnet(nnpokemon) +mean(nnpokemon$residuals^2) +``` +Neural net testing isLgendary w/ Defense+Pr_Male+Attack+HP ```{r} library(nnet) library(NeuralNetTools) set.seed(53747958) numeric_col <- c(5:12, 16, 20:21) pokemon[,numeric_col] <- scale(pokemon[,numeric_col]) +trainset <- pokemon[1:505, ] +testset <- pokemon[506:721, ] colnames(data)[22]<-"Pr_Male" -nnpokemon<-nnet(Pr_Male~., data = data, size = 1) +nnpokemon<-neuralnet(isLegendary~Defense+Pr_Male+Attack+HP,data=pokemon, hidden=3,act.fct = "logistic", + linear.output = FALSE) plotnet(nnpokemon) -mean(nnpokemon$residuals^2) +``` + + +```{r} +test=data.frame(Defense,Pr_Male,Attack,HP) +Predict=compute(nnpokemon,test) +Predict$net.result +``` + +```{r} +prob <- Predict$net.result +pred <- ifelse(prob>0.5, 1, 0) +pred +``` + +```{r} +#Test the resulting output +temp_test <- subset(testset, select = c("Defense","Pr_Male", "Attack", "HP")) +head(temp_test) +nn.results <- compute(nnpokemon, temp_test) +results <- data.frame(actual = testset$isLegendary, prediction = nn.results$net.result) ``` @@ -79,7 +108,7 @@ mean(nnpokemon$residuals^2) pokemon <- read.csv("~/pokemon_alopez247.csv", stringsAsFactors = FALSE) #remove NA from dataset cleanPoke <- na.omit(pokemon) -pcPoke <- prcomp(as.matrix(cleanPoke[,c(6,8,16)]), scale. = TRUE) +pcPoke <- prcomp(as.matrix(cleanPoke[,c(5:8,16)]), scale. = TRUE) summary(pcPoke) ``` From fd8ddce47e582f706ba7f4b0b9632c8b1c646c5b Mon Sep 17 00:00:00 2001 From: "Lauren St. Clair" Date: Mon, 25 Mar 2019 22:45:22 -0700 Subject: [PATCH 05/26] Update laurenedits.Rmd --- laurenedits.Rmd | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/laurenedits.Rmd b/laurenedits.Rmd index da05f41..8f0edf4 100644 --- a/laurenedits.Rmd +++ b/laurenedits.Rmd @@ -20,7 +20,7 @@ summary(data) ```{r} library(scatterplot3d) attach(pokemon) -scatterplot3d(HP,Defense,Pr_Male, pch=16, highlight.3d=TRUE, +sp <- scatterplot3d(HP,Defense,Pr_Male, pch=16, highlight.3d=TRUE, type="h", main="3D Scatterplot") ``` @@ -32,28 +32,16 @@ scatterplot3d(Defense,HP,Pr_Male, main="3D Scatterplot") ```{r} library(gclus) -dta <- pokemon[c(5,6,7,8)] # data, numbers as column numbers +dta <- pokemon[c(5:8)] # data, numbers as column numbers +#dta <- pokemon[c(5:8, 16)] dta.r <- abs(cor(dta)) # correlation dta.col <- dmat.color(dta.r) # colors +#dta.r[is.na(dta.r)] <- 0.5 dta.o <- order.single(dta.r) -cpairs(dta, dta.o, panel.colors=dta.col, gap=.5, +cpairs(dta, dta.o, panel.csolors=dta.col, gap=.5, main="Variables Ordered and Colored by Correlation" ) ``` -```{r} -set.seed(763) -library(tree) -trainindex <- sample(1:nrow(pokemon), 721) -proftrain <- salaries[trainindex, ] -proftest <- salaries[-trainindex, ] -predictTotal <- tree(Total~., data=proftrain) -plot(predictTotal) -text(predictTotal, pretty=0) -predict(predictTotal, proftest) -mean((predict(predictTotal, proftest) - proftest$Total)^2) -summary(predictTotal) -``` - Neural net testing isLegendary w/ all ```{r} library(nnet) @@ -70,6 +58,7 @@ mean(nnpokemon$residuals^2) Neural net testing isLgendary w/ Defense+Pr_Male+Attack+HP ```{r} library(nnet) +library(neuralnet) library(NeuralNetTools) set.seed(53747958) numeric_col <- c(5:12, 16, 20:21) @@ -77,7 +66,7 @@ pokemon[,numeric_col] <- scale(pokemon[,numeric_col]) trainset <- pokemon[1:505, ] testset <- pokemon[506:721, ] colnames(data)[22]<-"Pr_Male" -nnpokemon<-neuralnet(isLegendary~Defense+Pr_Male+Attack+HP,data=pokemon, hidden=3,act.fct = "logistic", +nnpokemon<-neuralnet(isLegendary~Defense+Pr_Male+Attack+HP,data=pokemon, hidden=2,act.fct = "logistic", linear.output = FALSE) plotnet(nnpokemon) ``` From 32bdfdf351c0a456e280c76db4f51ec7e7bf86af Mon Sep 17 00:00:00 2001 From: "Lauren St. Clair" Date: Wed, 27 Mar 2019 10:05:01 -0700 Subject: [PATCH 06/26] Update laurenedits.Rmd --- laurenedits.Rmd | 55 ++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 45 insertions(+), 10 deletions(-) diff --git a/laurenedits.Rmd b/laurenedits.Rmd index 8f0edf4..467ffa1 100644 --- a/laurenedits.Rmd +++ b/laurenedits.Rmd @@ -3,7 +3,6 @@ title: "R Notebook" output: html_notebook --- ```{r setup, include=FALSE} - knitr::opts_chunk$set(echo = TRUE) ``` @@ -17,31 +16,48 @@ data = read.csv("pokemon_alopez247.csv", header=T) summary(data) ``` +#Basic 3d Scatterplots +##Evaluating variables, Total, Defense, and Pr_Male ```{r} library(scatterplot3d) attach(pokemon) -sp <- scatterplot3d(HP,Defense,Pr_Male, pch=16, highlight.3d=TRUE, +sp <- scatterplot3d(Total,Defense,Pr_Male, pch=16, highlight.3d=TRUE, type="h", main="3D Scatterplot") ``` +3d Scatterplot without line markers ```{r} library(scatterplot3d) attach(pokemon) -scatterplot3d(Defense,HP,Pr_Male, main="3D Scatterplot") +scatterplot3d(Total,Defense,Pr_Male, main="3D Scatterplot") +``` + +#Corelation +```{r} +library(gclus) +cleanPoke <- na.omit(pokemon) +dta <- cleanPoke[c(5:8, 16)] # data, numbers as column numbers +dta.r <- abs(cor(dta)) # correlation +#dta.r[is.na(dta.r)] <- 0.5 +dta.col <- dmat.color(dta.r) # colors +dta.o <- order.single(dta.r) +cpairs(dta, dta.o, panel.colors=dta.col, gap=.5, +main="Variables Ordered and Colored by Correlation" ) ``` +It may be worth exploring how the correlation changes depending on what value is set to Pr_Male NA values when using "ta.r[is.na(dta.r)] <- 0.5." Below is the correlation jsy between the columns 5:8 for additional clarity. ```{r} library(gclus) +#cleanPoke <- na.omit(pokemon) dta <- pokemon[c(5:8)] # data, numbers as column numbers -#dta <- pokemon[c(5:8, 16)] dta.r <- abs(cor(dta)) # correlation dta.col <- dmat.color(dta.r) # colors -#dta.r[is.na(dta.r)] <- 0.5 dta.o <- order.single(dta.r) -cpairs(dta, dta.o, panel.csolors=dta.col, gap=.5, +cpairs(dta, dta.o, panel.colors=dta.col, gap=.5, main="Variables Ordered and Colored by Correlation" ) ``` +#Neural Network, predicting isLegendary Neural net testing isLegendary w/ all ```{r} library(nnet) @@ -62,23 +78,27 @@ library(neuralnet) library(NeuralNetTools) set.seed(53747958) numeric_col <- c(5:12, 16, 20:21) +is.na(numeric_col) <- 0.5 pokemon[,numeric_col] <- scale(pokemon[,numeric_col]) trainset <- pokemon[1:505, ] testset <- pokemon[506:721, ] -colnames(data)[22]<-"Pr_Male" -nnpokemon<-neuralnet(isLegendary~Defense+Pr_Male+Attack+HP,data=pokemon, hidden=2,act.fct = "logistic", +colnames(data)[22]<-"isLegendary" +nnpokemon<-neuralnet(isLegendary~Defense+Pr_Male+Attack+HP,data=pokemon, hidden=3,act.fct = "logistic", linear.output = FALSE) plotnet(nnpokemon) +nnpokemon$result.matrix ``` - +*DOUBLE CHECK ```{r} +attach(pokemon) test=data.frame(Defense,Pr_Male,Attack,HP) Predict=compute(nnpokemon,test) Predict$net.result ``` ```{r} +attach(pokemon) prob <- Predict$net.result pred <- ifelse(prob>0.5, 1, 0) pred @@ -89,10 +109,23 @@ pred temp_test <- subset(testset, select = c("Defense","Pr_Male", "Attack", "HP")) head(temp_test) nn.results <- compute(nnpokemon, temp_test) +results <- data.frame(actual = testset$isLegendary, predicted = nn.results$net.result) +results +``` + +```{r} results <- data.frame(actual = testset$isLegendary, prediction = nn.results$net.result) +results +``` + +*DOUBLE CHECK +```{r} +#mse <- sum((compute(nnpokemon, testset[,2:15])$net.result-test$Price)^2) +#mse ``` +#Principle Component Analysis ```{r} pokemon <- read.csv("~/pokemon_alopez247.csv", stringsAsFactors = FALSE) #remove NA from dataset @@ -115,11 +148,13 @@ text(pcPoke$x[,1], pcPoke$x[,2], labels = 1:nrow(pokemon)) ``` ```{r} -round(pcPoke$rotation[,1:2], 2) +round(pcPoke$rotation[,1:5], 3) ``` ```{r} pokemon[order(pcPoke$x[,1], decreasing=TRUE)[1:4], 1:3] +plot(pcPoke, type="lines") +abline(a=1, b=0, col="blue", lwd=3) ``` ```{r} From b9f16f3bdb9cba5d817ee0b4891e4f3c0ecfddb7 Mon Sep 17 00:00:00 2001 From: "Lauren St. Clair" Date: Wed, 27 Mar 2019 10:13:34 -0700 Subject: [PATCH 07/26] Create Scatters.Rmd --- Scatters.Rmd | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 Scatters.Rmd diff --git a/Scatters.Rmd b/Scatters.Rmd new file mode 100644 index 0000000..5764f31 --- /dev/null +++ b/Scatters.Rmd @@ -0,0 +1,58 @@ +--- +title: "R Notebook" +output: html_notebook +--- + +```{r setup, include=FALSE} +knitr::opts_chunk$set(echo = TRUE) +``` + + +```{r} +data = read.csv("pokemon_alopez247.csv", header=T) +``` + +```{r} +summary(data) +``` + +#Basic 3d Scatterplots +##Evaluating variables, Total, Defense, and Pr_Male +```{r} +library(scatterplot3d) +attach(pokemon) +sp <- scatterplot3d(Total,Defense,Pr_Male, pch=16, highlight.3d=TRUE, + type="h", main="3D Scatterplot") +``` + +3d Scatterplot without line markers +```{r} +library(scatterplot3d) +attach(pokemon) +scatterplot3d(Total,Defense,Pr_Male, main="3D Scatterplot") +``` + +#Corelation +```{r} +library(gclus) +cleanPoke <- na.omit(pokemon) +dta <- cleanPoke[c(5:8, 16)] # data, numbers as column numbers +dta.r <- abs(cor(dta)) # correlation +#dta.r[is.na(dta.r)] <- 0.5 +dta.col <- dmat.color(dta.r) # colors +dta.o <- order.single(dta.r) +cpairs(dta, dta.o, panel.colors=dta.col, gap=.5, +main="Variables Ordered and Colored by Correlation" ) +``` +It may be worth exploring how the correlation changes depending on what value is set to Pr_Male NA values when using "ta.r[is.na(dta.r)] <- 0.5." Below is the correlation jsy between the columns 5:8 for additional clarity. + +```{r} +library(gclus) +#cleanPoke <- na.omit(pokemon) +dta <- pokemon[c(5:8)] # data, numbers as column numbers +dta.r <- abs(cor(dta)) # correlation +dta.col <- dmat.color(dta.r) # colors +dta.o <- order.single(dta.r) +cpairs(dta, dta.o, panel.colors=dta.col, gap=.5, +main="Variables Ordered and Colored by Correlation" ) +``` From 70b6286fdc5e3ad80000c25e9c3e95840c50a1d8 Mon Sep 17 00:00:00 2001 From: "Lauren St. Clair" Date: Wed, 27 Mar 2019 10:13:37 -0700 Subject: [PATCH 08/26] Create Principle.Rmd --- Principle.Rmd | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 Principle.Rmd diff --git a/Principle.Rmd b/Principle.Rmd new file mode 100644 index 0000000..5c8b6ab --- /dev/null +++ b/Principle.Rmd @@ -0,0 +1,63 @@ +--- +title: "R Notebook" +output: html_notebook +--- + + +```{r setup, include=FALSE} +knitr::opts_chunk$set(echo = TRUE) +``` + +#laurenedits + +```{r} +data = read.csv("pokemon_alopez247.csv", header=T) +``` + +```{r} +summary(data) +``` + + +#Principle Component Analysis +```{r} +pokemon <- read.csv("~/pokemon_alopez247.csv", stringsAsFactors = FALSE) +#remove NA from dataset +cleanPoke <- na.omit(pokemon) +pcPoke <- prcomp(as.matrix(cleanPoke[,c(5:8,16)]), scale. = TRUE) +summary(pcPoke) +``` + +```{r} +biplot(pcPoke) +``` + +```{r} +plot(pcPoke$x[,1:2]) +``` + +```{r} +plot(pcPoke$x[,1:2], type="n") +text(pcPoke$x[,1], pcPoke$x[,2], labels = 1:nrow(pokemon)) +``` + +```{r} +round(pcPoke$rotation[,1:5], 3) +``` + +```{r} +pokemon[order(pcPoke$x[,1], decreasing=TRUE)[1:4], 1:3] +plot(pcPoke, type="lines") +abline(a=1, b=0, col="blue", lwd=3) +``` + +```{r} +test1 <- hclust(dist(scale(pokemon[,-c(2, 3, 4, 12, 13, 14, 15, 16, 17, 18, 19, 23)]))) +plot(test1) +test2 <- hclust(dist(pcPoke$x)) +plot(test2) +``` + +```{r} +all.equal(dist(scale(pokemon[,-c(2, 3, 4, 12, 13, 14, 15, 16, 17, 18, 19, 23)])), dist(pcPoke$x), check.attributes = FALSE) +``` \ No newline at end of file From aa8d1cdcfe4d6fccfcd10a8043d0c34f174a6c1f Mon Sep 17 00:00:00 2001 From: "Lauren St. Clair" Date: Wed, 27 Mar 2019 10:14:39 -0700 Subject: [PATCH 09/26] Create Neural.Rmd Running into trouble with creating the network with isLegendary w/ Defense + Pr_Male + Attack + HP. However, it was running fine last evening so I am unsure why it is running into trouble now. --- Neural.Rmd | 86 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 Neural.Rmd diff --git a/Neural.Rmd b/Neural.Rmd new file mode 100644 index 0000000..ca5df26 --- /dev/null +++ b/Neural.Rmd @@ -0,0 +1,86 @@ +--- +title: "R Notebook" +output: html_notebook +--- + +```{r setup, include=FALSE} +knitr::opts_chunk$set(echo = TRUE) +``` + +#laurenedits + +```{r} +data = read.csv("pokemon_alopez247.csv", header=T) +``` + +```{r} +summary(data) +``` + + +#Neural Network, predicting isLegendary +Neural net testing isLegendary w/ all +```{r} +library(nnet) +library(NeuralNetTools) +set.seed(53747958) +numeric_col <- c(5:12, 16, 20:21) +pokemon[,numeric_col] <- scale(pokemon[,numeric_col]) +colnames(data)[22]<-"isLegendary" +nnpokemon<-nnet(isLegendary~., data = data, size = 1) +plotnet(nnpokemon) +mean(nnpokemon$residuals^2) +``` + +Neural net testing isLgendary w/ Defense+Pr_Male+Attack+HP +```{r} +library(nnet) +library(neuralnet) +library(NeuralNetTools) +set.seed(53747958) +numeric_col <- c(5:12, 16, 20:21) +is.na(numeric_col) <- 0.5 +pokemon[,numeric_col] <- scale(pokemon[,numeric_col]) +trainset <- pokemon[1:505, ] +testset <- pokemon[506:721, ] +colnames(data)[22]<-"isLegendary" +nnpokemon<-neuralnet(isLegendary~Defense+Pr_Male+Attack+HP,data=pokemon, hidden=3,act.fct = "logistic", + linear.output = FALSE) +plotnet(nnpokemon) +nnpokemon$result.matrix +``` +*DOUBLE CHECK + +```{r} +attach(pokemon) +test=data.frame(Defense,Pr_Male,Attack,HP) +Predict=compute(nnpokemon,test) +Predict$net.result +``` + +```{r} +attach(pokemon) +prob <- Predict$net.result +pred <- ifelse(prob>0.5, 1, 0) +pred +``` + +```{r} +#Test the resulting output +temp_test <- subset(testset, select = c("Defense","Pr_Male", "Attack", "HP")) +head(temp_test) +nn.results <- compute(nnpokemon, temp_test) +results <- data.frame(actual = testset$isLegendary, predicted = nn.results$net.result) +results +``` + +```{r} +results <- data.frame(actual = testset$isLegendary, prediction = nn.results$net.result) +results +``` + +*DOUBLE CHECK +```{r} +#mse <- sum((compute(nnpokemon, testset[,2:15])$net.result-test$Price)^2) +#mse +``` From 7d70975e588c5c6bae9553c9cc70fbb0f91d7de8 Mon Sep 17 00:00:00 2001 From: "Lauren St. Clair" Date: Wed, 27 Mar 2019 12:19:13 -0700 Subject: [PATCH 10/26] Update Neural.Rmd --- Neural.Rmd | 67 +++++++++++++++--------------------------------------- 1 file changed, 18 insertions(+), 49 deletions(-) diff --git a/Neural.Rmd b/Neural.Rmd index ca5df26..c4086bf 100644 --- a/Neural.Rmd +++ b/Neural.Rmd @@ -13,74 +13,43 @@ knitr::opts_chunk$set(echo = TRUE) data = read.csv("pokemon_alopez247.csv", header=T) ``` -```{r} -summary(data) -``` - - -#Neural Network, predicting isLegendary -Neural net testing isLegendary w/ all ```{r} library(nnet) library(NeuralNetTools) -set.seed(53747958) -numeric_col <- c(5:12, 16, 20:21) -pokemon[,numeric_col] <- scale(pokemon[,numeric_col]) -colnames(data)[22]<-"isLegendary" -nnpokemon<-nnet(isLegendary~., data = data, size = 1) -plotnet(nnpokemon) -mean(nnpokemon$residuals^2) -``` - -Neural net testing isLgendary w/ Defense+Pr_Male+Attack+HP -```{r} -library(nnet) library(neuralnet) -library(NeuralNetTools) set.seed(53747958) numeric_col <- c(5:12, 16, 20:21) -is.na(numeric_col) <- 0.5 pokemon[,numeric_col] <- scale(pokemon[,numeric_col]) trainset <- pokemon[1:505, ] testset <- pokemon[506:721, ] colnames(data)[22]<-"isLegendary" -nnpokemon<-neuralnet(isLegendary~Defense+Pr_Male+Attack+HP,data=pokemon, hidden=3,act.fct = "logistic", - linear.output = FALSE) -plotnet(nnpokemon) -nnpokemon$result.matrix +nn <- neuralnet(isLegendary ~ Attack + Defense + HP + Sp_Atk + Sp_Def + Speed,data=trainset, hidden=4, linear.output=TRUE, threshold=0.01) +nn$result.matrix +plotnet(nn) ``` -*DOUBLE CHECK ```{r} -attach(pokemon) -test=data.frame(Defense,Pr_Male,Attack,HP) -Predict=compute(nnpokemon,test) -Predict$net.result +results <- data.frame(actual = testset$isLegendary, prediction = nn.results$net.result) +results ``` ```{r} -attach(pokemon) -prob <- Predict$net.result -pred <- ifelse(prob>0.5, 1, 0) -pred -``` +predval<- results[results$prediction.1>0.5,] +predval1<-vector() +for(i in predval[i,]) { + predval1[i]<-as.logical(FALSE) +} +predval1 -```{r} -#Test the resulting output -temp_test <- subset(testset, select = c("Defense","Pr_Male", "Attack", "HP")) -head(temp_test) -nn.results <- compute(nnpokemon, temp_test) -results <- data.frame(actual = testset$isLegendary, predicted = nn.results$net.result) -results ``` ```{r} -results <- data.frame(actual = testset$isLegendary, prediction = nn.results$net.result) -results +predicted=results$prediction * abs(diff(range(isLegendary))) + min(isLegendary) +actual=results$actual * abs(diff(range(isLegendary))) + min(isLegendary) +comparison=data.frame(predicted,actual) +deviation=((actual-predicted)/actual) +comparison=data.frame(predicted,actual,deviation) +accuracy=1-abs(mean(deviation)) +accuracy ``` -*DOUBLE CHECK -```{r} -#mse <- sum((compute(nnpokemon, testset[,2:15])$net.result-test$Price)^2) -#mse -``` From 565c78979cad42d2f684dfa508f5394d650ecf20 Mon Sep 17 00:00:00 2001 From: "Lauren St. Clair" Date: Wed, 27 Mar 2019 12:19:17 -0700 Subject: [PATCH 11/26] Update laurenedits.Rmd --- laurenedits.Rmd | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/laurenedits.Rmd b/laurenedits.Rmd index 467ffa1..a9cb707 100644 --- a/laurenedits.Rmd +++ b/laurenedits.Rmd @@ -130,7 +130,7 @@ results pokemon <- read.csv("~/pokemon_alopez247.csv", stringsAsFactors = FALSE) #remove NA from dataset cleanPoke <- na.omit(pokemon) -pcPoke <- prcomp(as.matrix(cleanPoke[,c(5:8,16)]), scale. = TRUE) +pcPoke <- prcomp(as.matrix(cleanPoke[,c(6:11)]), scale. = TRUE) summary(pcPoke) ``` @@ -148,15 +148,16 @@ text(pcPoke$x[,1], pcPoke$x[,2], labels = 1:nrow(pokemon)) ``` ```{r} -round(pcPoke$rotation[,1:5], 3) +round(pcPoke$rotation[,1:2], 3) ``` ```{r} -pokemon[order(pcPoke$x[,1], decreasing=TRUE)[1:4], 1:3] +pokemon[order(pcPoke$x[,1], decreasing=TRUE)[1:4], 1:11] plot(pcPoke, type="lines") abline(a=1, b=0, col="blue", lwd=3) ``` + ```{r} test1 <- hclust(dist(scale(pokemon[,-c(2, 3, 4, 12, 13, 14, 15, 16, 17, 18, 19, 23)]))) plot(test1) From 27e23e916401e1b87e0d7c881f943c0a5f8f9163 Mon Sep 17 00:00:00 2001 From: "Lauren St. Clair" Date: Wed, 27 Mar 2019 13:44:41 -0700 Subject: [PATCH 12/26] Update Neural.Rmd --- Neural.Rmd | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/Neural.Rmd b/Neural.Rmd index c4086bf..8a48729 100644 --- a/Neural.Rmd +++ b/Neural.Rmd @@ -23,33 +23,26 @@ pokemon[,numeric_col] <- scale(pokemon[,numeric_col]) trainset <- pokemon[1:505, ] testset <- pokemon[506:721, ] colnames(data)[22]<-"isLegendary" -nn <- neuralnet(isLegendary ~ Attack + Defense + HP + Sp_Atk + Sp_Def + Speed,data=trainset, hidden=4, linear.output=TRUE, threshold=0.01) +nn <- neuralnet(isLegendary ~ Attack + Defense + HP + Sp_Atk + Sp_Def + Speed,data=trainset, hidden=3, linear.output=TRUE, threshold=0.008) nn$result.matrix plotnet(nn) ``` ```{r} -results <- data.frame(actual = testset$isLegendary, prediction = nn.results$net.result) +nn.result <- predict(nn,newdata=testset, type="class") +results <- data.frame(actual = testset$isLegendary, prediction = nn.result) results ``` ```{r} -predval<- results[results$prediction.1>0.5,] -predval1<-vector() -for(i in predval[i,]) { - predval1[i]<-as.logical(FALSE) +for(i in 1:nrow(results)){ + results[i,4]<-as.integer(results[i,2]) } -predval1 - +results[,4]<-as.logical(results[,4]) +results ``` ```{r} -predicted=results$prediction * abs(diff(range(isLegendary))) + min(isLegendary) -actual=results$actual * abs(diff(range(isLegendary))) + min(isLegendary) -comparison=data.frame(predicted,actual) -deviation=((actual-predicted)/actual) -comparison=data.frame(predicted,actual,deviation) -accuracy=1-abs(mean(deviation)) -accuracy +table(results$actual, results[,4]) ``` From 8c85156981c0f4388d5a95fc0df07b089982c760 Mon Sep 17 00:00:00 2001 From: "Lauren St. Clair" Date: Sat, 30 Mar 2019 16:13:10 -0700 Subject: [PATCH 13/26] Update Neural.Rmd --- Neural.Rmd | 98 +++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 78 insertions(+), 20 deletions(-) diff --git a/Neural.Rmd b/Neural.Rmd index 8a48729..1055e26 100644 --- a/Neural.Rmd +++ b/Neural.Rmd @@ -7,42 +7,100 @@ output: html_notebook knitr::opts_chunk$set(echo = TRUE) ``` -#laurenedits - ```{r} data = read.csv("pokemon_alopez247.csv", header=T) ``` + +#Neural Networks ```{r} +library(gclus) library(nnet) library(NeuralNetTools) -library(neuralnet) -set.seed(53747958) -numeric_col <- c(5:12, 16, 20:21) -pokemon[,numeric_col] <- scale(pokemon[,numeric_col]) -trainset <- pokemon[1:505, ] -testset <- pokemon[506:721, ] -colnames(data)[22]<-"isLegendary" -nn <- neuralnet(isLegendary ~ Attack + Defense + HP + Sp_Atk + Sp_Def + Speed,data=trainset, hidden=3, linear.output=TRUE, threshold=0.008) -nn$result.matrix -plotnet(nn) +set.seed(1995) +spoke <- cbind(scale(trainset[,6:11]), factor(trainset$isLegendary)) +colnames(spoke)[7] <- "isLegendary" +spoke<-data.frame(spoke) +nnpoke <- nnet(factor(isLegendary)~., data=spoke, size=9) +table(trainset$isLegendary, predict(nnpoke, type="class")) +plotnet(nnpoke) +``` + +```{r} +spoke +``` + +```{r} +spoketest <- cbind(scale(testset[,6:11]), factor(testset$isLegendary)) +colnames(spoketest)[7] <- "isLegendary" +spoketest<-data.frame(spoketest) +table(spoketest$isLegendary, predict(nnpoke, newdata=spoketest, type="class")) +``` + +```{r} +attach(data) +trainsetg<-trainset[which(hasGender=='True'),] +testsetg<-testset[which(hasGender=='True'),] +trainsetg<-na.omit(trainsetg) +trainsetg +testsetg<-na.omit(testsetg) +testsetg +``` + +#Neural Net predicting Pr_Male +```{r} +set.seed(906534) +nnmale <- neuralnet(Pr_Male ~ Attack + Defense + HP + Sp_Atk + Sp_Def + Speed,data=trainsetg, hidden=3, threshold=0.01) +plotnet(nnmale) +mse<-mean((compute(nnmale, testsetg[,6:11])$net.result-testsetg$Pr_Male)^2) +mse ``` +Optimizing number of nodes in first layer ```{r} -nn.result <- predict(nn,newdata=testset, type="class") -results <- data.frame(actual = testset$isLegendary, prediction = nn.result) -results +for(i in 1:5){ + nnmaletr <- neuralnet(Pr_Male ~ Attack + Defense + HP + Sp_Atk + Sp_Def + Speed,data=trainsetg, hidden=c(i,3), threshold=0.01) + print(paste("Number of hidden layer variables in first layer:", i)) + print(paste("MSE: ", mean((compute(nnmaletr, testsetg[,6:11])$net.result-testsetg$Pr_Male)^2))) +} ``` +Optimizing number of nodes in second layer ```{r} -for(i in 1:nrow(results)){ - results[i,4]<-as.integer(results[i,2]) +for(i in 1:5){ + nnmaletr <- neuralnet(Pr_Male ~ Attack + Defense + HP + Sp_Atk + Sp_Def + Speed,data=trainsetg, hidden=c(4,i), threshold=0.01) + print(paste("Number of hidden layer variables in second layer:", i)) + print(paste("MSE: ", mean((compute(nnmaletr, testsetg[,6:11])$net.result-testsetg$Pr_Male)^2))) } -results[,4]<-as.logical(results[,4]) -results ``` +MSE with 2 hidden layers and 4 and 3 nodes: 0.04011755 +MSE with 1 hidden layer and 3 nodes: 0.03988659 + ```{r} -table(results$actual, results[,4]) +linmod<-lm(Pr_Male ~ Attack + Defense + HP + Sp_Atk + Sp_Def + Speed,data=trainsetg) +mean((predict(linmod,newdata=testsetg)-testsetg$Pr_Male)^2) ``` +This is pretty close to our neural net modeled above when we use 1 hidden layer and 3 nodes. +#Neural Net, predicting Generation +```{r} +set.seed(12345) +library(neuralnet) +nnGen <- neuralnet(Generation ~ Attack + Defense + HP + Sp_Atk + Sp_Def + Speed,data=trainsetg, hidden=5, threshold=0.01) +plotnet(nnGen) +``` + +```{r} +mse<-mean((compute(nnGen, testsetg[,6:11])$net.result-testsetg$Generation)^2) +mse +``` +MSE without Pr_Male included, 5 nodes 1 hidden +1.349612 +MSE with Pr_Male included, 5 nodes 1 hidden +1.466631 + +```{r} +linmod<-lm(Generation ~ Attack + Defense + HP + Sp_Atk + Sp_Def + Speed,data=trainsetg) +mean((predict(linmod,newdata=testsetg)-testsetg$Generation)^2) +``` From 33f30e13169c89e3d012031d7cadc6b78dbf748d Mon Sep 17 00:00:00 2001 From: "Lauren St. Clair" Date: Sat, 30 Mar 2019 16:50:58 -0700 Subject: [PATCH 14/26] Update Neural.Rmd --- Neural.Rmd | 38 +++++++++++++++++++++++++------------- 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/Neural.Rmd b/Neural.Rmd index 1055e26..de70e8c 100644 --- a/Neural.Rmd +++ b/Neural.Rmd @@ -1,5 +1,5 @@ --- -title: "R Notebook" +title: "Neural networks, Pokemon" output: html_notebook --- @@ -11,8 +11,20 @@ knitr::opts_chunk$set(echo = TRUE) data = read.csv("pokemon_alopez247.csv", header=T) ``` - #Neural Networks +```{r} +library(nnet) +library(NeuralNetTools) +library(neuralnet) +set.seed(53747958) +numeric_col <- c(5:12, 16, 20:21) +pokemon[,numeric_col] <- scale(pokemon[,numeric_col]) +set.seed(1995) +train<-sample(1:nrow(pokemon),505) +testset<-poke[-train,] +trainset<-poke[train,] +``` + ```{r} library(gclus) library(nnet) @@ -50,6 +62,7 @@ testsetg #Neural Net predicting Pr_Male ```{r} set.seed(906534) +library(neuralnet) nnmale <- neuralnet(Pr_Male ~ Attack + Defense + HP + Sp_Atk + Sp_Def + Speed,data=trainsetg, hidden=3, threshold=0.01) plotnet(nnmale) mse<-mean((compute(nnmale, testsetg[,6:11])$net.result-testsetg$Pr_Male)^2) @@ -73,7 +86,6 @@ for(i in 1:5){ print(paste("MSE: ", mean((compute(nnmaletr, testsetg[,6:11])$net.result-testsetg$Pr_Male)^2))) } ``` - MSE with 2 hidden layers and 4 and 3 nodes: 0.04011755 MSE with 1 hidden layer and 3 nodes: 0.03988659 @@ -83,22 +95,22 @@ mean((predict(linmod,newdata=testsetg)-testsetg$Pr_Male)^2) ``` This is pretty close to our neural net modeled above when we use 1 hidden layer and 3 nodes. + #Neural Net, predicting Generation ```{r} -set.seed(12345) +set.seed(23453) library(neuralnet) -nnGen <- neuralnet(Generation ~ Attack + Defense + HP + Sp_Atk + Sp_Def + Speed,data=trainsetg, hidden=5, threshold=0.01) +trainsetg<-trainset[which(hasGender=='True'),] +testsetg<-testset[which(hasGender=='True'),] +trainsetg<-na.omit(trainsetg) +testsetg<-na.omit(testsetg) +nnGen <- neuralnet(Generation ~ Attack + Defense + HP + Sp_Atk + Sp_Def + Speed + Pr_Male,data=trainsetg, hidden=4, threshold=0.01) plotnet(nnGen) -``` - -```{r} -mse<-mean((compute(nnGen, testsetg[,6:11])$net.result-testsetg$Generation)^2) +mse<-mean((compute(nnGen, testsetg[,c(6:11, 16)])$net.result-testsetg$Generation)^2) mse ``` -MSE without Pr_Male included, 5 nodes 1 hidden -1.349612 -MSE with Pr_Male included, 5 nodes 1 hidden -1.466631 + +@Barret, I cannot seem to get a misclassification table working for Generation, let me know if you can for whatever reason ```{r} linmod<-lm(Generation ~ Attack + Defense + HP + Sp_Atk + Sp_Def + Speed,data=trainsetg) From 21abce0851b1727d15a6cb1579a1669c0d1edd9c Mon Sep 17 00:00:00 2001 From: "Lauren St. Clair" Date: Sun, 31 Mar 2019 00:37:47 -0700 Subject: [PATCH 15/26] Update Neural.Rmd --- Neural.Rmd | 45 +++++++++++++++++++++++++++++++++------------ 1 file changed, 33 insertions(+), 12 deletions(-) diff --git a/Neural.Rmd b/Neural.Rmd index de70e8c..d2ae832 100644 --- a/Neural.Rmd +++ b/Neural.Rmd @@ -19,7 +19,6 @@ library(neuralnet) set.seed(53747958) numeric_col <- c(5:12, 16, 20:21) pokemon[,numeric_col] <- scale(pokemon[,numeric_col]) -set.seed(1995) train<-sample(1:nrow(pokemon),505) testset<-poke[-train,] trainset<-poke[train,] @@ -62,6 +61,8 @@ testsetg #Neural Net predicting Pr_Male ```{r} set.seed(906534) +library(nnet) +library(NeuralNetTools) library(neuralnet) nnmale <- neuralnet(Pr_Male ~ Attack + Defense + HP + Sp_Atk + Sp_Def + Speed,data=trainsetg, hidden=3, threshold=0.01) plotnet(nnmale) @@ -95,24 +96,44 @@ mean((predict(linmod,newdata=testsetg)-testsetg$Pr_Male)^2) ``` This is pretty close to our neural net modeled above when we use 1 hidden layer and 3 nodes. +#Neural net predicting hasGender +```{r} +library(gclus) +library(nnet) +library(NeuralNetTools) +set.seed(1995) +gpoke <- cbind(scale(trainset[,c(6:11, 20:22)]), factor(trainset$hasGender)) +colnames(gpoke)[10] <- "hasGender" +gpoke<-data.frame(gpoke) +nngend <- nnet(factor(hasGender)~ Attack + Defense + HP + Sp_Atk + Sp_Def + Speed + Catch_Rate + Height_m + Weight_kg, data=gpoke, size=11) +plotnet(nngend) +table(trainset$hasGender, predict(nngend, type="class")) +``` -#Neural Net, predicting Generation +#Neural net predicting Generation ```{r} -set.seed(23453) +library(gclus) +library(nnet) library(neuralnet) -trainsetg<-trainset[which(hasGender=='True'),] -testsetg<-testset[which(hasGender=='True'),] -trainsetg<-na.omit(trainsetg) -testsetg<-na.omit(testsetg) -nnGen <- neuralnet(Generation ~ Attack + Defense + HP + Sp_Atk + Sp_Def + Speed + Pr_Male,data=trainsetg, hidden=4, threshold=0.01) -plotnet(nnGen) -mse<-mean((compute(nnGen, testsetg[,c(6:11, 16)])$net.result-testsetg$Generation)^2) +library(NeuralNetTools) +set.seed(19127395) +#nnGeneration <- neuralnet(Generation ~ Attack + Defense + HP + Sp_Atk + Sp_Def + Speed + Pr_Male,data=trainsetg, hidden=5, threshold=0.01) +nnGeneration <- neuralnet(Generation ~ Attack + Defense + HP + Sp_Atk + Sp_Def + Speed,data=trainsetg, hidden=5, threshold=0.01) +plotnet(nnGeneration) +mse<-mean((compute(nnGeneration, testset[,6:11])$net.result-testset$Generation)^2) mse ``` -@Barret, I cannot seem to get a misclassification table working for Generation, let me know if you can for whatever reason +Optimizing number of nodes in first layer +```{r} +for(i in 1:5){ + nnmaletr <- neuralnet(Generation ~ Attack + Defense + HP + Sp_Atk + Sp_Def + Speed,data=trainsetg, hidden=3, threshold=0.01) + print(paste("Number of hidden layer variables in first layer:", i)) + print(paste("MSE: ", mean((compute(nnmaletr, testsetg[,6:11])$net.result-testsetg$Pr_Male)^2))) +} +``` ```{r} linmod<-lm(Generation ~ Attack + Defense + HP + Sp_Atk + Sp_Def + Speed,data=trainsetg) mean((predict(linmod,newdata=testsetg)-testsetg$Generation)^2) -``` +``` \ No newline at end of file From 46297b81e8b03e64c0d1415482441b71a76f0258 Mon Sep 17 00:00:00 2001 From: "Lauren St. Clair" Date: Sun, 31 Mar 2019 13:08:53 -0700 Subject: [PATCH 16/26] Update Neural.Rmd --- Neural.Rmd | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/Neural.Rmd b/Neural.Rmd index d2ae832..354698b 100644 --- a/Neural.Rmd +++ b/Neural.Rmd @@ -105,10 +105,18 @@ set.seed(1995) gpoke <- cbind(scale(trainset[,c(6:11, 20:22)]), factor(trainset$hasGender)) colnames(gpoke)[10] <- "hasGender" gpoke<-data.frame(gpoke) -nngend <- nnet(factor(hasGender)~ Attack + Defense + HP + Sp_Atk + Sp_Def + Speed + Catch_Rate + Height_m + Weight_kg, data=gpoke, size=11) +nngend <- nnet(factor(hasGender)~ Attack + Defense + HP + Sp_Atk + Sp_Def + Speed + Catch_Rate + Height_m + Weight_kg, data=gpoke, size=7) plotnet(nngend) table(trainset$hasGender, predict(nngend, type="class")) ``` +It appears that our neural net is efefctive in predicting hasGender. +1 hidden layer with 11 nodes appears to overfit the model, + 1 2 + False 45 0 + True 0 460 + +Instead 1 hidden layer with 7 nodes appears to have a reasonable misclassification without totally overfitting. + #Neural net predicting Generation ```{r} From 55f8eb275915b2b588c11aab142e4d14ba4199ac Mon Sep 17 00:00:00 2001 From: "Lauren St. Clair" Date: Sun, 31 Mar 2019 15:01:58 -0700 Subject: [PATCH 17/26] Update Neural.Rmd --- Neural.Rmd | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/Neural.Rmd b/Neural.Rmd index 354698b..2af2573 100644 --- a/Neural.Rmd +++ b/Neural.Rmd @@ -58,7 +58,7 @@ testsetg<-na.omit(testsetg) testsetg ``` -#Neural Net predicting Pr_Male +##Neural Net predicting Pr_Male ```{r} set.seed(906534) library(nnet) @@ -96,7 +96,7 @@ mean((predict(linmod,newdata=testsetg)-testsetg$Pr_Male)^2) ``` This is pretty close to our neural net modeled above when we use 1 hidden layer and 3 nodes. -#Neural net predicting hasGender +##Neural net predicting hasGender ```{r} library(gclus) library(nnet) @@ -105,20 +105,26 @@ set.seed(1995) gpoke <- cbind(scale(trainset[,c(6:11, 20:22)]), factor(trainset$hasGender)) colnames(gpoke)[10] <- "hasGender" gpoke<-data.frame(gpoke) -nngend <- nnet(factor(hasGender)~ Attack + Defense + HP + Sp_Atk + Sp_Def + Speed + Catch_Rate + Height_m + Weight_kg, data=gpoke, size=7) +nngend <- nnet(factor(hasGender)~ Attack + Defense + HP + Sp_Atk + Sp_Def + Speed + Catch_Rate + Height_m + Weight_kg, data=gpoke, size=5) plotnet(nngend) table(trainset$hasGender, predict(nngend, type="class")) ``` -It appears that our neural net is efefctive in predicting hasGender. +It appears that our neural net is effective in predicting hasGender, and also easy to generate an overfitted model for. 1 hidden layer with 11 nodes appears to overfit the model, 1 2 - False 45 0 + False 45 0 True 0 460 -Instead 1 hidden layer with 7 nodes appears to have a reasonable misclassification without totally overfitting. +Instead 1 hidden layer with 5 nodes appears to have a reasonable misclassification without totally overfitting. Let's try this on gspoketest, +```{r} +gpoketest <- cbind(scale(trainset[,c(6:11, 20:22)]), factor(testset$hasGender)) +colnames(gpoketest)[10] <- "hasGender" +gpoketest<-data.frame(gpoketest) +table(gpoketest$hasGender, predict(nngend, newdata=gpoketest, type="class")) +``` -#Neural net predicting Generation +##Neural net predicting Generation ```{r} library(gclus) library(nnet) @@ -128,7 +134,7 @@ set.seed(19127395) #nnGeneration <- neuralnet(Generation ~ Attack + Defense + HP + Sp_Atk + Sp_Def + Speed + Pr_Male,data=trainsetg, hidden=5, threshold=0.01) nnGeneration <- neuralnet(Generation ~ Attack + Defense + HP + Sp_Atk + Sp_Def + Speed,data=trainsetg, hidden=5, threshold=0.01) plotnet(nnGeneration) -mse<-mean((compute(nnGeneration, testset[,6:11])$net.result-testset$Generation)^2) +mse<-mean((compute(nnGeneration, testsetg[,6:11])$net.result-testsetg$Generation)^2) mse ``` From 648f84085f789df41660d282eec3965e3ff8fe04 Mon Sep 17 00:00:00 2001 From: "Lauren St. Clair" Date: Sun, 31 Mar 2019 17:53:14 -0700 Subject: [PATCH 18/26] Create PCAWithWH.Rmd --- PCAWithWH.Rmd | 80 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 PCAWithWH.Rmd diff --git a/PCAWithWH.Rmd b/PCAWithWH.Rmd new file mode 100644 index 0000000..d3da6ca --- /dev/null +++ b/PCAWithWH.Rmd @@ -0,0 +1,80 @@ +--- +title: "R Notebook" +output: html_notebook +--- + +```{r setup, include=FALSE} +knitr::opts_chunk$set(echo = TRUE) +``` + +```{r} +pokemon<-read.csv("pokemon_alopez247.csv") +poke<-data.frame(pokemon) +``` + +```{r} +pcapoke <- prcomp(as.matrix(poke[,c(6:11, 20:21)]), scale.=TRUE) +summary(pcapoke) +biplot(pcapoke) +``` + + +```{r} +round(pcapoke$rotation[,1:2], 2) +``` + + +```{r} +poke[order(pcapoke$x[,1], decreasing=TRUE)[1:4] , c(1:11, 20:21)] +``` + +```{r} +poke[order(pcapoke$x[,2], decreasing=TRUE)[1:4] , c(1:11, 20:21)] +``` + +```{r} +poke[order(pcapoke$x[,1], decreasing=TRUE)[1:20],] +``` + +Majority are of the legendary type + + +```{r} +lda.pred<-predict(leggenlda1,poke1) +lda.class<-lda.pred$class +table(lda.class,poke1$isLegendary) +``` + +Yea, this is still a naive classifier, NOT VERY USEFUL! + +Let's try a linear model, see if PC1 and PC2 are any good at predicting Pr_Male: + +```{r} +linmod<-lm(poke1$Pr_Male~pcgenleg[,1]+pcgenleg[,2]) +summary(linmod) +linmod<-lm(poke1$Pr_Male~pcgenleg[,1]) +summary(linmod) +plot(pcgenleg[,1],poke1$Pr_Male) +abline(linmod) +``` + +Ok, so the second model is statistically significant. So let's try to interpret this now. The intercept on this linear model is 0.55, which is already above 50%. Oh man that graph looks like garbage. I don't think PCA really did anything here... + + +```{r} +# linmod<-lm(poke1$Catch_Rate~pcgenleg[,1]+pcgenleg[,2]) +# summary(linmod) +linmod<-lm(poke1$Catch_Rate~pcgenleg[,1]) +summary(linmod) +plot(pcgenleg[,1],poke1$Catch_Rate) +abline(linmod) +``` + +Ok, now we're talking. So it looks like PC1 is correlated with the harder to catch Pokemon rather than the legendary ones. Probably a good time to start a new file, this is getting messy... + +```{r} +set.seed(1995) +train<-sample(1:nrow(poke),432) +poke.test<-poke[-train,] +poke.train<-poke[train,] +``` \ No newline at end of file From 1e3ee478cec2f2695646a9f97baf736dfd605c01 Mon Sep 17 00:00:00 2001 From: "Lauren St. Clair" Date: Sun, 31 Mar 2019 17:53:17 -0700 Subject: [PATCH 19/26] Update Neural.Rmd --- Neural.Rmd | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Neural.Rmd b/Neural.Rmd index 2af2573..947961b 100644 --- a/Neural.Rmd +++ b/Neural.Rmd @@ -65,8 +65,10 @@ library(nnet) library(NeuralNetTools) library(neuralnet) nnmale <- neuralnet(Pr_Male ~ Attack + Defense + HP + Sp_Atk + Sp_Def + Speed,data=trainsetg, hidden=3, threshold=0.01) +#nnmale <- neuralnet(Pr_Male ~ Attack + Defense + HP + Sp_Atk + Sp_Def + Speed + Weight_kg + Height_m,data=trainsetg, hidden=5, threshold=0.01) plotnet(nnmale) mse<-mean((compute(nnmale, testsetg[,6:11])$net.result-testsetg$Pr_Male)^2) +#mse<-mean((compute(nnmale, testsetg[,c(6:11, 20:21)])$net.result-testsetg$Pr_Male)^2) mse ``` @@ -91,7 +93,7 @@ MSE with 2 hidden layers and 4 and 3 nodes: 0.04011755 MSE with 1 hidden layer and 3 nodes: 0.03988659 ```{r} -linmod<-lm(Pr_Male ~ Attack + Defense + HP + Sp_Atk + Sp_Def + Speed,data=trainsetg) +linmod<-lm(Pr_Male ~ Attack + Defense + HP + Sp_Atk + Sp_Def + Speed + Weight_kg + Height_m,data=trainsetg) mean((predict(linmod,newdata=testsetg)-testsetg$Pr_Male)^2) ``` This is pretty close to our neural net modeled above when we use 1 hidden layer and 3 nodes. From 3c27235b834de7b73ffb067feef35ab58e1c3c21 Mon Sep 17 00:00:00 2001 From: "Lauren St. Clair" Date: Mon, 1 Apr 2019 12:09:27 -0700 Subject: [PATCH 20/26] Create PCAWithWH.nb.html --- PCAWithWH.nb.html | 416 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 416 insertions(+) create mode 100644 PCAWithWH.nb.html diff --git a/PCAWithWH.nb.html b/PCAWithWH.nb.html new file mode 100644 index 0000000..46d1042 --- /dev/null +++ b/PCAWithWH.nb.html @@ -0,0 +1,416 @@ + + + + + + + + + + + + + +R Notebook + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + + + + +
pokemon<-read.csv("pokemon_alopez247.csv")
+poke<-data.frame(pokemon)
+ + + + + + +
pcapoke <- prcomp(as.matrix(poke[,c(6:11, 20:21)]), scale.=TRUE)
+summary(pcapoke)
+biplot(pcapoke)
+ + + + + + +
round(pcapoke$rotation[,1:2], 2)
+ + +
           PC1   PC2
+HP        0.36  0.00
+Attack    0.38 -0.02
+Defense   0.33 -0.41
+Sp_Atk    0.35  0.47
+Sp_Def    0.35  0.10
+Speed     0.23  0.68
+Height_m  0.40 -0.17
+Weight_kg 0.41 -0.34
+ + + + + + +
poke[order(pcapoke$x[,1], decreasing=TRUE)[1:4] , c(1:11, 20:21)]
+ + +
+ +
+ + + + + + +
poke[order(pcapoke$x[,2], decreasing=TRUE)[1:4] , c(1:11, 20:21)]
+ + +
+ +
+ + + + + + +
poke[order(pcapoke$x[,1], decreasing=TRUE)[1:20],]
+ + +
+ +
+ + + +

Majority are of the legendary type

+ + + +
lda.pred<-predict(leggenlda1,poke1)
+ + +
Error in predict(leggenlda1, poke1) : object 'leggenlda1' not found
+ + + +

Yea, this is still a naive classifier, NOT VERY USEFUL!

+

Let’s try a linear model, see if PC1 and PC2 are any good at predicting Pr_Male:

+ + + +
linmod<-lm(poke1$Pr_Male~pcgenleg[,1]+pcgenleg[,2])
+summary(linmod)
+linmod<-lm(poke1$Pr_Male~pcgenleg[,1])
+summary(linmod)
+plot(pcgenleg[,1],poke1$Pr_Male)
+abline(linmod)
+ + + +

Ok, so the second model is statistically significant. So let’s try to interpret this now. The intercept on this linear model is 0.55, which is already above 50%. Oh man that graph looks like garbage. I don’t think PCA really did anything here…

+ + + +
# linmod<-lm(poke1$Catch_Rate~pcgenleg[,1]+pcgenleg[,2])
+# summary(linmod)
+linmod<-lm(poke1$Catch_Rate~pcgenleg[,1])
+summary(linmod)
+plot(pcgenleg[,1],poke1$Catch_Rate)
+abline(linmod)
+ + + +

Ok, now we’re talking. So it looks like PC1 is correlated with the harder to catch Pokemon rather than the legendary ones. Probably a good time to start a new file, this is getting messy…

+ + + +
set.seed(1995)
+train<-sample(1:nrow(poke),432)
+poke.test<-poke[-train,]
+poke.train<-poke[train,]
+ + + +
LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKYGBge3Igc2V0dXAsIGluY2x1ZGU9RkFMU0V9CmtuaXRyOjpvcHRzX2NodW5rJHNldChlY2hvID0gVFJVRSkKYGBgCgpgYGB7cn0KcG9rZW1vbjwtcmVhZC5jc3YoInBva2Vtb25fYWxvcGV6MjQ3LmNzdiIpCnBva2U8LWRhdGEuZnJhbWUocG9rZW1vbikKYGBgCgpgYGB7cn0KcGNhcG9rZSA8LSBwcmNvbXAoYXMubWF0cml4KHBva2VbLGMoNjoxMSwgMjA6MjEpXSksIHNjYWxlLj1UUlVFKQpzdW1tYXJ5KHBjYXBva2UpCmJpcGxvdChwY2Fwb2tlKQpgYGAKCgpgYGB7cn0Kcm91bmQocGNhcG9rZSRyb3RhdGlvblssMToyXSwgMikKYGBgCgoKYGBge3J9CnBva2Vbb3JkZXIocGNhcG9rZSR4WywxXSwgZGVjcmVhc2luZz1UUlVFKVsxOjRdICwgYygxOjExLCAyMDoyMSldCmBgYAoKYGBge3J9CnBva2Vbb3JkZXIocGNhcG9rZSR4WywyXSwgZGVjcmVhc2luZz1UUlVFKVsxOjRdICwgYygxOjExLCAyMDoyMSldCmBgYAoKYGBge3J9CnBva2Vbb3JkZXIocGNhcG9rZSR4WywxXSwgZGVjcmVhc2luZz1UUlVFKVsxOjIwXSxdCmBgYAoKTWFqb3JpdHkgYXJlIG9mIHRoZSBsZWdlbmRhcnkgdHlwZQoKCmBgYHtyfQpsZGEucHJlZDwtcHJlZGljdChsZWdnZW5sZGExLHBva2UxKQpsZGEuY2xhc3M8LWxkYS5wcmVkJGNsYXNzCnRhYmxlKGxkYS5jbGFzcyxwb2tlMSRpc0xlZ2VuZGFyeSkKYGBgCgpZZWEsIHRoaXMgaXMgc3RpbGwgYSBuYWl2ZSBjbGFzc2lmaWVyLCBOT1QgVkVSWSBVU0VGVUwhCgpMZXQncyB0cnkgYSBsaW5lYXIgbW9kZWwsIHNlZSBpZiBQQzEgYW5kIFBDMiBhcmUgYW55IGdvb2QgYXQgcHJlZGljdGluZyBQcl9NYWxlOgoKYGBge3J9Cmxpbm1vZDwtbG0ocG9rZTEkUHJfTWFsZX5wY2dlbmxlZ1ssMV0rcGNnZW5sZWdbLDJdKQpzdW1tYXJ5KGxpbm1vZCkKbGlubW9kPC1sbShwb2tlMSRQcl9NYWxlfnBjZ2VubGVnWywxXSkKc3VtbWFyeShsaW5tb2QpCnBsb3QocGNnZW5sZWdbLDFdLHBva2UxJFByX01hbGUpCmFibGluZShsaW5tb2QpCmBgYAoKT2ssIHNvIHRoZSBzZWNvbmQgbW9kZWwgaXMgc3RhdGlzdGljYWxseSBzaWduaWZpY2FudC4gU28gbGV0J3MgdHJ5IHRvIGludGVycHJldCB0aGlzIG5vdy4gVGhlIGludGVyY2VwdCBvbiB0aGlzIGxpbmVhciBtb2RlbCBpcyAwLjU1LCB3aGljaCBpcyBhbHJlYWR5IGFib3ZlIDUwJS4gT2ggbWFuIHRoYXQgZ3JhcGggbG9va3MgbGlrZSBnYXJiYWdlLiBJIGRvbid0IHRoaW5rIFBDQSByZWFsbHkgZGlkIGFueXRoaW5nIGhlcmUuLi4KCgpgYGB7cn0KIyBsaW5tb2Q8LWxtKHBva2UxJENhdGNoX1JhdGV+cGNnZW5sZWdbLDFdK3BjZ2VubGVnWywyXSkKIyBzdW1tYXJ5KGxpbm1vZCkKbGlubW9kPC1sbShwb2tlMSRDYXRjaF9SYXRlfnBjZ2VubGVnWywxXSkKc3VtbWFyeShsaW5tb2QpCnBsb3QocGNnZW5sZWdbLDFdLHBva2UxJENhdGNoX1JhdGUpCmFibGluZShsaW5tb2QpCmBgYAoKT2ssIG5vdyB3ZSdyZSB0YWxraW5nLiBTbyBpdCBsb29rcyBsaWtlIFBDMSBpcyBjb3JyZWxhdGVkIHdpdGggdGhlIGhhcmRlciB0byBjYXRjaCBQb2tlbW9uIHJhdGhlciB0aGFuIHRoZSBsZWdlbmRhcnkgb25lcy4gUHJvYmFibHkgYSBnb29kIHRpbWUgdG8gc3RhcnQgYSBuZXcgZmlsZSwgdGhpcyBpcyBnZXR0aW5nIG1lc3N5Li4uCgpgYGB7cn0Kc2V0LnNlZWQoMTk5NSkKdHJhaW48LXNhbXBsZSgxOm5yb3cocG9rZSksNDMyKQpwb2tlLnRlc3Q8LXBva2VbLXRyYWluLF0KcG9rZS50cmFpbjwtcG9rZVt0cmFpbixdCmBgYA==
+ + + +
+ + + + + + + + From cb4dd240691dab1e209fafbfd4f719e318aa33b7 Mon Sep 17 00:00:00 2001 From: "Lauren St. Clair" Date: Mon, 1 Apr 2019 12:09:33 -0700 Subject: [PATCH 21/26] Create Neural.nb.html --- Neural.nb.html | 655 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 655 insertions(+) create mode 100644 Neural.nb.html diff --git a/Neural.nb.html b/Neural.nb.html new file mode 100644 index 0000000..14c5408 --- /dev/null +++ b/Neural.nb.html @@ -0,0 +1,655 @@ + + + + + + + + + + + + + +Neural networks, Pokemon + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + + + + +
data = read.csv("pokemon_alopez247.csv", header=T)
+ + + +
+

Neural Networks

+ + + +
library(nnet)
+library(NeuralNetTools)
+library(neuralnet)
+set.seed(53747958)
+numeric_col <- c(5:12, 16, 20:21)
+pokemon[,numeric_col] <- scale(pokemon[,numeric_col])
+train<-sample(1:nrow(pokemon),505)
+testset<-poke[-train,]
+trainset<-poke[train,]
+ + + + + + +
library(gclus)
+library(nnet)
+library(NeuralNetTools)
+set.seed(1995)
+spoke <- cbind(scale(trainset[,6:11]), factor(trainset$isLegendary))
+colnames(spoke)[7] <- "isLegendary"
+spoke<-data.frame(spoke)
+nnpoke <- nnet(factor(isLegendary)~., data=spoke, size=9)
+ + +
# weights:  73
+initial  value 230.803477 
+iter  10 value 21.386442
+iter  20 value 10.225302
+iter  30 value 5.443762
+iter  40 value 5.124080
+iter  50 value 5.117497
+iter  60 value 5.096526
+iter  70 value 5.032397
+iter  80 value 4.957708
+iter  90 value 4.956312
+iter 100 value 4.953087
+final  value 4.953087 
+stopped after 100 iterations
+ + +
table(trainset$isLegendary, predict(nnpoke, type="class"))
+ + +
       
+          1   2
+  False 479   1
+  True    1  24
+ + +
plotnet(nnpoke)
+ + +

+ + + + + + +
spoke
+ + +
+ +
+ + + + + + +
spoketest <- cbind(scale(testset[,6:11]), factor(testset$isLegendary))
+colnames(spoketest)[7] <- "isLegendary"
+spoketest<-data.frame(spoketest)
+table(spoketest$isLegendary, predict(nnpoke, newdata=spoketest, type="class"))
+ + +
   
+      1   2
+  1 193   2
+  2  15   6
+ + + + + + +
attach(data)
+ + +
The following object is masked _by_ .GlobalEnv:
+
+    Pr_Male
+
+The following objects are masked from data (pos = 3):
+
+    Attack, Body_Style, Catch_Rate, Color, Defense, Egg_Group_1, Egg_Group_2, Generation, hasGender,
+    hasMegaEvolution, Height_m, HP, isLegendary, Name, Number, Pr_Male, Sp_Atk, Sp_Def, Speed,
+    Total, Type_1, Type_2, Weight_kg
+
+The following objects are masked from data (pos = 4):
+
+    Attack, Body_Style, Catch_Rate, Color, Defense, Egg_Group_1, Egg_Group_2, Generation, hasGender,
+    hasMegaEvolution, Height_m, HP, isLegendary, Name, Number, Pr_Male, Sp_Atk, Sp_Def, Speed,
+    Total, Type_1, Type_2, Weight_kg
+ + +
trainsetg<-trainset[which(hasGender=='True'),]
+testsetg<-testset[which(hasGender=='True'),]
+trainsetg<-na.omit(trainsetg)
+trainsetg
+ + +
+ +
+ + +
testsetg<-na.omit(testsetg)
+testsetg
+ + +
+ +
+ + + +
+

Neural Net predicting Pr_Male

+ + + +
set.seed(906534)
+library(nnet)
+library(NeuralNetTools)
+library(neuralnet)
+nnmale <- neuralnet(Pr_Male ~ Attack + Defense + HP + Sp_Atk + Sp_Def + Speed,data=trainsetg, hidden=3, threshold=0.01)
+#nnmale <- neuralnet(Pr_Male ~ Attack + Defense + HP + Sp_Atk + Sp_Def + Speed + Weight_kg + Height_m,data=trainsetg, hidden=5, threshold=0.01)
+plotnet(nnmale)
+ + +

+ + +
mse<-mean((compute(nnmale, testsetg[,6:11])$net.result-testsetg$Pr_Male)^2)
+#mse<-mean((compute(nnmale, testsetg[,c(6:11, 20:21)])$net.result-testsetg$Pr_Male)^2)
+mse
+ + +
[1] 0.03509201
+ + + +

Optimizing number of nodes in first layer

+ + + +
for(i in 1:5){
+  nnmaletr <- neuralnet(Pr_Male ~ Attack + Defense + HP + Sp_Atk + Sp_Def + Speed,data=trainsetg, hidden=c(i,3), threshold=0.01)
+  print(paste("Number of hidden layer variables in first layer:", i))
+  print(paste("MSE: ", mean((compute(nnmaletr, testsetg[,6:11])$net.result-testsetg$Pr_Male)^2)))
+}
+ + +
[1] "Number of hidden layer variables in first layer: 1"
+[1] "MSE:  0.0350882335552937"
+[1] "Number of hidden layer variables in first layer: 2"
+[1] "MSE:  0.0350818416116905"
+[1] "Number of hidden layer variables in first layer: 3"
+[1] "MSE:  0.035248926798519"
+[1] "Number of hidden layer variables in first layer: 4"
+[1] "MSE:  0.0395926526903969"
+[1] "Number of hidden layer variables in first layer: 5"
+[1] "MSE:  0.0354457058825359"
+ + + +

Optimizing number of nodes in second layer

+ + + +
for(i in 1:5){
+  nnmaletr <- neuralnet(Pr_Male ~ Attack + Defense + HP + Sp_Atk + Sp_Def + Speed,data=trainsetg, hidden=c(4,i), threshold=0.01)
+  print(paste("Number of hidden layer variables in second layer:", i))
+  print(paste("MSE: ", mean((compute(nnmaletr, testsetg[,6:11])$net.result-testsetg$Pr_Male)^2)))
+}
+ + +
[1] "Number of hidden layer variables in second layer: 1"
+[1] "MSE:  0.0431050100780546"
+[1] "Number of hidden layer variables in second layer: 2"
+[1] "MSE:  0.0414010218086393"
+[1] "Number of hidden layer variables in second layer: 3"
+[1] "MSE:  0.0481998685468267"
+[1] "Number of hidden layer variables in second layer: 4"
+[1] "MSE:  0.0352346597808724"
+ + +
Algorithm did not converge in 1 of 1 repetition(s) within the stepmax.
+ + +
[1] "Number of hidden layer variables in second layer: 5"
+ + +
Error in cbind(1, pred) %*% weights[[num_hidden_layers + 1]] : 
+  requires numeric/complex matrix/vector arguments
+ + + +

MSE with 2 hidden layers and 4 and 3 nodes: 0.04011755 MSE with 1 hidden layer and 3 nodes: 0.03988659

+ + + +
linmod<-lm(Pr_Male ~ Attack + Defense + HP + Sp_Atk + Sp_Def + Speed + Weight_kg + Height_m,data=trainsetg)
+mean((predict(linmod,newdata=testsetg)-testsetg$Pr_Male)^2)
+ + +
[1] 0.03378086
+ + + +

This is pretty close to our neural net modeled above when we use 1 hidden layer and 3 nodes.

+
+
+

Neural net predicting hasGender

+ + + +
library(gclus)
+library(nnet)
+library(NeuralNetTools)
+set.seed(1995)
+gpoke <- cbind(scale(trainset[,c(6:11, 20:22)]), factor(trainset$hasGender))
+colnames(gpoke)[10] <- "hasGender"
+gpoke<-data.frame(gpoke)
+nngend <- nnet(factor(hasGender)~ Attack + Defense + HP + Sp_Atk + Sp_Def + Speed + Catch_Rate + Height_m + Weight_kg, data=gpoke, size=5)
+ + +
# weights:  56
+initial  value 525.566326 
+iter  10 value 111.914061
+iter  20 value 95.010360
+iter  30 value 79.510878
+iter  40 value 74.458633
+iter  50 value 69.719439
+iter  60 value 63.374675
+iter  70 value 62.255317
+iter  80 value 59.258941
+iter  90 value 57.722880
+iter 100 value 55.807885
+final  value 55.807885 
+stopped after 100 iterations
+ + +
plotnet(nngend)
+ + +

+ + +
table(trainset$hasGender, predict(nngend, type="class"))
+ + +
       
+          1   2
+  False  36   9
+  True   25 435
+ + + +

It appears that our neural net is effective in predicting hasGender, and also easy to generate an overfitted model for. 1 hidden layer with 11 nodes appears to overfit the model, 1 2 False 45 0 True 0 460

+

Instead 1 hidden layer with 5 nodes appears to have a reasonable misclassification without totally overfitting. Let’s try this on gspoketest,

+ + + +
gpoketest <- cbind(scale(trainset[,c(6:11, 20:22)]), factor(testset$hasGender))
+ + +
number of rows of result is not a multiple of vector length (arg 2)
+ + +
colnames(gpoketest)[10] <- "hasGender"
+gpoketest<-data.frame(gpoketest)
+table(gpoketest$hasGender, predict(nngend, newdata=gpoketest, type="class"))
+ + +
   
+      1   2
+  1  14  57
+  2  47 387
+ + + +
+
+

Neural net predicting Generation

+ + + +
library(gclus)
+library(nnet)
+library(neuralnet)
+library(NeuralNetTools)
+set.seed(19127395)
+#nnGeneration <- neuralnet(Generation ~ Attack + Defense + HP + Sp_Atk + Sp_Def + Speed + Pr_Male,data=trainsetg, hidden=5, threshold=0.01)
+nnGeneration <- neuralnet(Generation ~ Attack + Defense + HP + Sp_Atk + Sp_Def + Speed,data=trainsetg, hidden=5, threshold=0.01)
+plotnet(nnGeneration)
+ + +

+ + +
mse<-mean((compute(nnGeneration, testsetg[,6:11])$net.result-testsetg$Generation)^2)
+mse
+ + +
[1] 2.756416
+ + + +

Optimizing number of nodes in first layer

+ + + +
for(i in 1:5){
+  nnmaletr <- neuralnet(Generation ~ Attack + Defense + HP + Sp_Atk + Sp_Def + Speed,data=trainsetg, hidden=3, threshold=0.01)
+  print(paste("Number of hidden layer variables in first layer:", i))
+  print(paste("MSE: ", mean((compute(nnmaletr, testsetg[,6:11])$net.result-testsetg$Pr_Male)^2)))
+}
+ + + + + + +
linmod<-lm(Generation ~ Attack + Defense + HP + Sp_Atk + Sp_Def + Speed,data=trainsetg)
+mean((predict(linmod,newdata=testsetg)-testsetg$Generation)^2)
+ + +
[1] 2.834874
+ + +
+
+ +
LS0tCnRpdGxlOiAiTmV1cmFsIG5ldHdvcmtzLCBQb2tlbW9uIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgpgYGB7ciBzZXR1cCwgaW5jbHVkZT1GQUxTRX0Ka25pdHI6Om9wdHNfY2h1bmskc2V0KGVjaG8gPSBUUlVFKQpgYGAKCmBgYHtyfQpkYXRhID0gcmVhZC5jc3YoInBva2Vtb25fYWxvcGV6MjQ3LmNzdiIsIGhlYWRlcj1UKQpgYGAKCiNOZXVyYWwgTmV0d29ya3MKYGBge3J9CmxpYnJhcnkobm5ldCkKbGlicmFyeShOZXVyYWxOZXRUb29scykKbGlicmFyeShuZXVyYWxuZXQpCnNldC5zZWVkKDUzNzQ3OTU4KQpudW1lcmljX2NvbCA8LSBjKDU6MTIsIDE2LCAyMDoyMSkKcG9rZW1vblssbnVtZXJpY19jb2xdIDwtIHNjYWxlKHBva2Vtb25bLG51bWVyaWNfY29sXSkKdHJhaW48LXNhbXBsZSgxOm5yb3cocG9rZW1vbiksNTA1KQp0ZXN0c2V0PC1wb2tlWy10cmFpbixdCnRyYWluc2V0PC1wb2tlW3RyYWluLF0KYGBgCgpgYGB7cn0KbGlicmFyeShnY2x1cykKbGlicmFyeShubmV0KQpsaWJyYXJ5KE5ldXJhbE5ldFRvb2xzKQpzZXQuc2VlZCgxOTk1KQpzcG9rZSA8LSBjYmluZChzY2FsZSh0cmFpbnNldFssNjoxMV0pLCBmYWN0b3IodHJhaW5zZXQkaXNMZWdlbmRhcnkpKQpjb2xuYW1lcyhzcG9rZSlbN10gPC0gImlzTGVnZW5kYXJ5IgpzcG9rZTwtZGF0YS5mcmFtZShzcG9rZSkKbm5wb2tlIDwtIG5uZXQoZmFjdG9yKGlzTGVnZW5kYXJ5KX4uLCBkYXRhPXNwb2tlLCBzaXplPTkpCnRhYmxlKHRyYWluc2V0JGlzTGVnZW5kYXJ5LCBwcmVkaWN0KG5ucG9rZSwgdHlwZT0iY2xhc3MiKSkKcGxvdG5ldChubnBva2UpCmBgYAoKYGBge3J9CnNwb2tlCmBgYAoKYGBge3J9CnNwb2tldGVzdCA8LSBjYmluZChzY2FsZSh0ZXN0c2V0Wyw2OjExXSksIGZhY3Rvcih0ZXN0c2V0JGlzTGVnZW5kYXJ5KSkKY29sbmFtZXMoc3Bva2V0ZXN0KVs3XSA8LSAiaXNMZWdlbmRhcnkiCnNwb2tldGVzdDwtZGF0YS5mcmFtZShzcG9rZXRlc3QpCnRhYmxlKHNwb2tldGVzdCRpc0xlZ2VuZGFyeSwgcHJlZGljdChubnBva2UsIG5ld2RhdGE9c3Bva2V0ZXN0LCB0eXBlPSJjbGFzcyIpKQpgYGAKCmBgYHtyfQphdHRhY2goZGF0YSkKdHJhaW5zZXRnPC10cmFpbnNldFt3aGljaChoYXNHZW5kZXI9PSdUcnVlJyksXQp0ZXN0c2V0ZzwtdGVzdHNldFt3aGljaChoYXNHZW5kZXI9PSdUcnVlJyksXQp0cmFpbnNldGc8LW5hLm9taXQodHJhaW5zZXRnKQp0cmFpbnNldGcKdGVzdHNldGc8LW5hLm9taXQodGVzdHNldGcpCnRlc3RzZXRnCmBgYAoKIyNOZXVyYWwgTmV0IHByZWRpY3RpbmcgUHJfTWFsZQpgYGB7cn0Kc2V0LnNlZWQoOTA2NTM0KQpsaWJyYXJ5KG5uZXQpCmxpYnJhcnkoTmV1cmFsTmV0VG9vbHMpCmxpYnJhcnkobmV1cmFsbmV0KQpubm1hbGUgPC0gbmV1cmFsbmV0KFByX01hbGUgfiBBdHRhY2sgKyBEZWZlbnNlICsgSFAgKyBTcF9BdGsgKyBTcF9EZWYgKyBTcGVlZCxkYXRhPXRyYWluc2V0ZywgaGlkZGVuPTMsIHRocmVzaG9sZD0wLjAxKQojbm5tYWxlIDwtIG5ldXJhbG5ldChQcl9NYWxlIH4gQXR0YWNrICsgRGVmZW5zZSArIEhQICsgU3BfQXRrICsgU3BfRGVmICsgU3BlZWQgKyBXZWlnaHRfa2cgKyBIZWlnaHRfbSxkYXRhPXRyYWluc2V0ZywgaGlkZGVuPTUsIHRocmVzaG9sZD0wLjAxKQpwbG90bmV0KG5ubWFsZSkKbXNlPC1tZWFuKChjb21wdXRlKG5ubWFsZSwgdGVzdHNldGdbLDY6MTFdKSRuZXQucmVzdWx0LXRlc3RzZXRnJFByX01hbGUpXjIpCiNtc2U8LW1lYW4oKGNvbXB1dGUobm5tYWxlLCB0ZXN0c2V0Z1ssYyg2OjExLCAyMDoyMSldKSRuZXQucmVzdWx0LXRlc3RzZXRnJFByX01hbGUpXjIpCm1zZQpgYGAKCk9wdGltaXppbmcgbnVtYmVyIG9mIG5vZGVzIGluIGZpcnN0IGxheWVyCmBgYHtyfQpmb3IoaSBpbiAxOjUpewogIG5ubWFsZXRyIDwtIG5ldXJhbG5ldChQcl9NYWxlIH4gQXR0YWNrICsgRGVmZW5zZSArIEhQICsgU3BfQXRrICsgU3BfRGVmICsgU3BlZWQsZGF0YT10cmFpbnNldGcsIGhpZGRlbj1jKGksMyksIHRocmVzaG9sZD0wLjAxKQogIHByaW50KHBhc3RlKCJOdW1iZXIgb2YgaGlkZGVuIGxheWVyIHZhcmlhYmxlcyBpbiBmaXJzdCBsYXllcjoiLCBpKSkKICBwcmludChwYXN0ZSgiTVNFOiAiLCBtZWFuKChjb21wdXRlKG5ubWFsZXRyLCB0ZXN0c2V0Z1ssNjoxMV0pJG5ldC5yZXN1bHQtdGVzdHNldGckUHJfTWFsZSleMikpKQp9CmBgYAoKT3B0aW1pemluZyBudW1iZXIgb2Ygbm9kZXMgaW4gc2Vjb25kIGxheWVyCmBgYHtyfQpmb3IoaSBpbiAxOjUpewogIG5ubWFsZXRyIDwtIG5ldXJhbG5ldChQcl9NYWxlIH4gQXR0YWNrICsgRGVmZW5zZSArIEhQICsgU3BfQXRrICsgU3BfRGVmICsgU3BlZWQsZGF0YT10cmFpbnNldGcsIGhpZGRlbj1jKDQsaSksIHRocmVzaG9sZD0wLjAxKQogIHByaW50KHBhc3RlKCJOdW1iZXIgb2YgaGlkZGVuIGxheWVyIHZhcmlhYmxlcyBpbiBzZWNvbmQgbGF5ZXI6IiwgaSkpCiAgcHJpbnQocGFzdGUoIk1TRTogIiwgbWVhbigoY29tcHV0ZShubm1hbGV0ciwgdGVzdHNldGdbLDY6MTFdKSRuZXQucmVzdWx0LXRlc3RzZXRnJFByX01hbGUpXjIpKSkKfQpgYGAKTVNFIHdpdGggMiBoaWRkZW4gbGF5ZXJzIGFuZCA0IGFuZCAzIG5vZGVzOiAwLjA0MDExNzU1Ck1TRSB3aXRoIDEgaGlkZGVuIGxheWVyIGFuZCAzIG5vZGVzOiAwLjAzOTg4NjU5CgpgYGB7cn0KbGlubW9kPC1sbShQcl9NYWxlIH4gQXR0YWNrICsgRGVmZW5zZSArIEhQICsgU3BfQXRrICsgU3BfRGVmICsgU3BlZWQgKyBXZWlnaHRfa2cgKyBIZWlnaHRfbSxkYXRhPXRyYWluc2V0ZykKbWVhbigocHJlZGljdChsaW5tb2QsbmV3ZGF0YT10ZXN0c2V0ZyktdGVzdHNldGckUHJfTWFsZSleMikKYGBgClRoaXMgaXMgcHJldHR5IGNsb3NlIHRvIG91ciBuZXVyYWwgbmV0IG1vZGVsZWQgYWJvdmUgd2hlbiB3ZSB1c2UgMSBoaWRkZW4gbGF5ZXIgYW5kIDMgbm9kZXMuIAoKIyNOZXVyYWwgbmV0IHByZWRpY3RpbmcgaGFzR2VuZGVyCmBgYHtyfQpsaWJyYXJ5KGdjbHVzKQpsaWJyYXJ5KG5uZXQpCmxpYnJhcnkoTmV1cmFsTmV0VG9vbHMpCnNldC5zZWVkKDE5OTUpCmdwb2tlIDwtIGNiaW5kKHNjYWxlKHRyYWluc2V0WyxjKDY6MTEsIDIwOjIyKV0pLCBmYWN0b3IodHJhaW5zZXQkaGFzR2VuZGVyKSkKY29sbmFtZXMoZ3Bva2UpWzEwXSA8LSAiaGFzR2VuZGVyIgpncG9rZTwtZGF0YS5mcmFtZShncG9rZSkKbm5nZW5kIDwtIG5uZXQoZmFjdG9yKGhhc0dlbmRlcil+IEF0dGFjayArIERlZmVuc2UgKyBIUCArIFNwX0F0ayArIFNwX0RlZiArIFNwZWVkICsgQ2F0Y2hfUmF0ZSArIEhlaWdodF9tICsgV2VpZ2h0X2tnLCBkYXRhPWdwb2tlLCBzaXplPTUpCnBsb3RuZXQobm5nZW5kKQp0YWJsZSh0cmFpbnNldCRoYXNHZW5kZXIsIHByZWRpY3Qobm5nZW5kLCB0eXBlPSJjbGFzcyIpKQpgYGAKSXQgYXBwZWFycyB0aGF0IG91ciBuZXVyYWwgbmV0IGlzIGVmZmVjdGl2ZSBpbiBwcmVkaWN0aW5nIGhhc0dlbmRlciwgYW5kIGFsc28gZWFzeSB0byBnZW5lcmF0ZSBhbiBvdmVyZml0dGVkIG1vZGVsIGZvci4gCjEgaGlkZGVuIGxheWVyIHdpdGggMTEgbm9kZXMgYXBwZWFycyB0byBvdmVyZml0IHRoZSBtb2RlbCwgCiAgICAgICAgIDEgICAyCiAgRmFsc2UgNDUgICAwCiAgVHJ1ZSAgICAwIDQ2MAogIApJbnN0ZWFkIDEgaGlkZGVuIGxheWVyIHdpdGggNSBub2RlcyBhcHBlYXJzIHRvIGhhdmUgYSByZWFzb25hYmxlIG1pc2NsYXNzaWZpY2F0aW9uIHdpdGhvdXQgdG90YWxseSBvdmVyZml0dGluZy4gTGV0J3MgdHJ5IHRoaXMgb24gZ3Nwb2tldGVzdCwKCmBgYHtyfQpncG9rZXRlc3QgPC0gY2JpbmQoc2NhbGUodHJhaW5zZXRbLGMoNjoxMSwgMjA6MjIpXSksIGZhY3Rvcih0ZXN0c2V0JGhhc0dlbmRlcikpCmNvbG5hbWVzKGdwb2tldGVzdClbMTBdIDwtICJoYXNHZW5kZXIiCmdwb2tldGVzdDwtZGF0YS5mcmFtZShncG9rZXRlc3QpCnRhYmxlKGdwb2tldGVzdCRoYXNHZW5kZXIsIHByZWRpY3Qobm5nZW5kLCBuZXdkYXRhPWdwb2tldGVzdCwgdHlwZT0iY2xhc3MiKSkKYGBgCgojI05ldXJhbCBuZXQgcHJlZGljdGluZyBHZW5lcmF0aW9uCmBgYHtyfQpsaWJyYXJ5KGdjbHVzKQpsaWJyYXJ5KG5uZXQpCmxpYnJhcnkobmV1cmFsbmV0KQpsaWJyYXJ5KE5ldXJhbE5ldFRvb2xzKQpzZXQuc2VlZCgxOTEyNzM5NSkKI25uR2VuZXJhdGlvbiA8LSBuZXVyYWxuZXQoR2VuZXJhdGlvbiB+IEF0dGFjayArIERlZmVuc2UgKyBIUCArIFNwX0F0ayArIFNwX0RlZiArIFNwZWVkICsgUHJfTWFsZSxkYXRhPXRyYWluc2V0ZywgaGlkZGVuPTUsIHRocmVzaG9sZD0wLjAxKQpubkdlbmVyYXRpb24gPC0gbmV1cmFsbmV0KEdlbmVyYXRpb24gfiBBdHRhY2sgKyBEZWZlbnNlICsgSFAgKyBTcF9BdGsgKyBTcF9EZWYgKyBTcGVlZCxkYXRhPXRyYWluc2V0ZywgaGlkZGVuPTUsIHRocmVzaG9sZD0wLjAxKQpwbG90bmV0KG5uR2VuZXJhdGlvbikKbXNlPC1tZWFuKChjb21wdXRlKG5uR2VuZXJhdGlvbiwgdGVzdHNldGdbLDY6MTFdKSRuZXQucmVzdWx0LXRlc3RzZXRnJEdlbmVyYXRpb24pXjIpCm1zZQpgYGAKCk9wdGltaXppbmcgbnVtYmVyIG9mIG5vZGVzIGluIGZpcnN0IGxheWVyCmBgYHtyfQpmb3IoaSBpbiAxOjUpewogIG5ubWFsZXRyIDwtIG5ldXJhbG5ldChHZW5lcmF0aW9uIH4gQXR0YWNrICsgRGVmZW5zZSArIEhQICsgU3BfQXRrICsgU3BfRGVmICsgU3BlZWQsZGF0YT10cmFpbnNldGcsIGhpZGRlbj0zLCB0aHJlc2hvbGQ9MC4wMSkKICBwcmludChwYXN0ZSgiTnVtYmVyIG9mIGhpZGRlbiBsYXllciB2YXJpYWJsZXMgaW4gZmlyc3QgbGF5ZXI6IiwgaSkpCiAgcHJpbnQocGFzdGUoIk1TRTogIiwgbWVhbigoY29tcHV0ZShubm1hbGV0ciwgdGVzdHNldGdbLDY6MTFdKSRuZXQucmVzdWx0LXRlc3RzZXRnJFByX01hbGUpXjIpKSkKfQpgYGAKCmBgYHtyfQpsaW5tb2Q8LWxtKEdlbmVyYXRpb24gfiBBdHRhY2sgKyBEZWZlbnNlICsgSFAgKyBTcF9BdGsgKyBTcF9EZWYgKyBTcGVlZCxkYXRhPXRyYWluc2V0ZykKbWVhbigocHJlZGljdChsaW5tb2QsbmV3ZGF0YT10ZXN0c2V0ZyktdGVzdHNldGckR2VuZXJhdGlvbileMikKYGBg
+ + + +
+ + + + + + + + From 923b778f03a68937d768e1ccf811455be0aa13c3 Mon Sep 17 00:00:00 2001 From: "Lauren St. Clair" Date: Mon, 1 Apr 2019 12:11:10 -0700 Subject: [PATCH 22/26] Create AllModels.Rmd --- AllModels.Rmd | 759 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 759 insertions(+) create mode 100644 AllModels.Rmd diff --git a/AllModels.Rmd b/AllModels.Rmd new file mode 100644 index 0000000..b9160b2 --- /dev/null +++ b/AllModels.Rmd @@ -0,0 +1,759 @@ +--- +title: "DATA 311, All models" +author: "Barret Jackson, Emily Medema, Kat Lecha, Lauren St. Clair" +date: "March 30th, 2019" +output: pdf_document +--- + +```{r setup, include=FALSE} +knitr::opts_chunk$set(echo = TRUE) +``` + +```{r setup, include=FALSE} +pokemon<-read.csv("pokemon_alopez247.csv") +``` + +#Number of Pokemon per Type +```{r} +library(ggplot2) +type<-ggplot(pokemon, aes(pokemon$Type_1, fill = pokemon$Type_1)) + geom_histogram(stat="count", color = "black") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) +type +``` + +Pokemon dataset split into training and testing sets +```{r} +set.seed(1995) +train<-sample(1:nrow(pokemon),432) +poke.test<-pokemon[-train,] +poke.train<-pokemon[train,] +``` + +#Linear Model +Linear model, Total as response and HP, Attack, and Defense are predictors: +```{r} +library(DAAG) +linmod <- lm(poke.train$Total~poke.train$HP+poke.train$Attack+poke.train$Defense) +summary(linmod) +#plot(linmod) +plot(poke.train$HP+poke.train$Attack+poke.train$Defense, poke.train$Total) +abline(linmod, h = 0.5, col = "red") +#mmmm tasty sig values +predicted<-predict(linmod, newdata=poke.test) +mean(linmod$residuals^2) +mean((poke.test$Total-predicted)^2) +``` + +#Clustering +Single, Average, and Complete linkage respectively modeled below +```{r} +eucdist<-dist(pokemon, method="euclidean") +clusPokemon<-hclust(eucdist, method = "single") +plot(clusPokemon) +clusPokemonAvg<-hclust(eucdist, method = "average") +plot(clusPokemonAvg) +clusComplete<-hclust(eucdist, method = "complete") +plot(clusComplete) +``` +We see that a complete linkage method appears to fit our dataset best. + +#Regression Tree, Total as response and HP, Attack, Defense, Sp_Atk, Sp_Def, and Speed as predictors +```{r} +library(tree) +poke<-data.frame(pokemon) +attach(poke) +pocl<-tree(Total~HP+Attack+Defense+Sp_Atk+Sp_Def+Speed,data=poke) +plot(pocl) +text(pocl) +``` + +Let's try pruning back our tree from above, +```{r} +cv.pocl<-cv.tree(pocl, FUN=prune.tree) +plot(cv.pocl,type="b") +p.pocl<-prune.tree(pocl,best=10) +plot(p.pocl) +text(p.pocl) +summary(p.pocl) +``` +We can see that the lowest is MSE is given with 12 nodes, suggesting that pruning may be unnecessary. + +#Random Forests +```{r} +library(randomForest) +set.seed(1995) +pokebag<-randomForest(Total~HP+Attack+Defense+Sp_Atk+Sp_Def+Speed,data=poke,mtry=6,importance=FALSE) +pokebag +``` + +Random forest where m = 3 +```{r} +pokeRF<-randomForest(Total~HP+Attack+Defense+Sp_Atk+Sp_Def+Speed,data=poke,mtry=3,importance=TRUE) +pokeRF +``` + +Let's see if we can predict if a Pokemon is legendary using the Total predictor. In using the predictor Total, we are under the assumption that legendary Pokemon have high totals. From looking at the data set this appears to be true. It also appears from the data set that legendary Pokemon do not have a gender expecept for a couple outliers. Let's see if this is the case, +```{r} +#https://www.kaggle.com/excaliburzero/predicting-legendary-pokemon +maxTotal<-order(pokemon$Total, decreasing = TRUE) +head(pokemon[maxTotal,]) +``` +It does appear that the Pokemon with the highest total are in fact of the legendary type. + +```{r} +library(ggplot2) +plot<-ggplot(pokemon, aes(x =Total, fill = isLegendary)) + geom_histogram() +plot +``` +From this graph we can see that the higher the total the more likely a pokemon is to be legendary. In fact, it appears that a pokemon is only legendary when it is above 650 in total and most likely legendary from around 550-625. + +Let's now check the correlation between gender and legendary status, +```{r} +pokemon$hasGender<-factor(pokemon$hasGender) +plot2<-ggplot(pokemon, aes(x =hasGender, fill = isLegendary)) + geom_bar() +plot2 +``` +As our first assumption suggested, the plot too suggests that majority of legendary pokemon (isLegendary = TRUE), do not have a gender (hasGender = FALSE). + +Let's see if there are any linear relationships within our Pokemon dataset. Name and Number will be excluded from examination as these will likely have no effect on the data. + +#K-Means +```{r} +library(mclust) +library(cluster) +library(dplyr) +library(fpc) +pokeNum<-select_if(pokemon, is.numeric) +distPoke<-daisy(pokemon) +#distPoke<-daisy(pokeNum) +summary(distPoke) +pokeDist<-cmdscale(distPoke) +plot(pokeDist, type = "n") +text(pokeDist, rownames(pokeDist)) +set.seed(413) +clustore<-matrix(0, nrow = 721, ncol=25) +wsstore<-NULL +for(i in 1:10){ + km<-kmeans(pokeDist, i, nstart=10) + clustore[,i]<-km$cluster + wsstore[i]<-km$tot.withinss +} +plot(wsstore) +kPoke2<-kmeans(pokeDist, 7, nstart=25) +plot(pokeDist, col = kPoke2$cluster) +points(kPoke2$centers, col = 1:4, pch=8, cex=2) +out <- cbind(pokemon, clusterNum = kPoke2$cluster) +clusterGroups<-order(out$clusterNum, decreasing = TRUE) +head(out[clusterGroups,]) +``` + + +#KNN Classification +```{r} +library(class) +knnrun<-knn.cv(pokeDist, cl = poke.train$isLegendary, k = 5, prob = TRUE) +table(poke.train$isLegendary, knnrun) +``` + +#Linear Discriminant Analysis +```{r} +library(MASS) +library(MLmetrics) +poke.train$hasGender<-factor(poke.train$hasGender) +poke.train$isLegendary<-factor(poke.train$isLegendary) +pokelda<-lda(poke.train$isLegendary~poke.train$hasGender+poke.train$Total) +table(poke.train$isLegendary, predict(pokelda)$class) +Sensitivity(poke.train$isLegendary, predict(pokelda)$class) +Recall(poke.train$isLegendary, predict(pokelda)$class) #same as sensitivity +Precision(poke.train$isLegendary, predict(pokelda)$class) +Specificity(poke.train$isLegendary, predict(pokelda)$class) +F1_Score(poke.train$isLegendary, predict(pokelda)$class) +``` + +#QDA +```{r} +pokeqda<-qda(poke.train$isLegendary~poke.train$hasGender+poke.train$Total) +table(poke.train$isLegendary, predict(pokeqda)$class) +Sensitivity(poke.train$isLegendary, predict(pokelda)$class) +Recall(poke.train$isLegendary, predict(pokeqda)$class) #same as sensitivity +Precision(poke.train$isLegendary, predict(pokeqda)$class) +Specificity(poke.train$isLegendary, predict(pokeqda)$class) +F1_Score(poke.train$isLegendary, predict(pokeqda)$class) +``` + +#Logistic Regression +```{r} +simlog<-glm(factor(poke.train$isLegendary)~poke.train$hasGender+poke.train$Total, family = "binomial") +table(predict(simlog, type = "response")>0.5, poke.train$isLegendary) +``` + +```{r} +#https://www.kaggle.com/excaliburzero/predicting-legendary-pokemon +poke<-data.frame(pokemon) +pokeLegend<-poke[which(isLegendary=='True'),] +plot(Generation~isLegendary) +TheLegends<-as.data.frame(table(pokeLegend$Generation)) +colnames(TheLegends)<-c("Generation", "Legends") +TheLegends +summary(TheLegends) +plot<-ggplot(TheLegends, aes(Generation, Legends))+geom_bar(stat="identity") +plot +TheMan<-as.data.frame(table(pokeLegend$Type_1)) +colnames(TheMan)<-c("Type 1", "Legends") +TheMan +summary(TheMan) +plot(TheMan) +maxTotalL<-order(TheMan$Legends, decreasing = TRUE) +head(TheMan[maxTotalL,]) +#Of Type 2 +TheMyth<-as.data.frame(table(pokeLegend$Type_2)) +colnames(TheMyth)<-c("Type 2", "Legends") +TheMyth +summary(TheMyth) +plot(TheMyth) +maxTotalL2<-order(TheMyth$Legends, decreasing = TRUE) +head(TheMyth[maxTotalL2,]) +``` + +```{r} +poke<-data.frame(pokemon) +poke1<-poke[which(hasGender=='True'),] +attach(poke) +head(poke1) +``` + +Let's see if there is a relationshp between Score and Pr_Male, a predictor for the probability of gender according to male +```{r} +set.seed(983457) +pokeG<-tree(Pr_Male~HP+Attack+Defense+Sp_Atk+Sp_Def+Speed + Total,data=poke1) +plot(pokeG) +text(pokeG, pretty=0) +cv.pokeG<-cv.tree(pokeG, FUN=prune.tree) +plot(cv.pokeG) +prunePokeG<-prune.tree(pokeG, best=12) +plot(prunePokeG) +text(prunePokeG, pretty=0) +``` + +#K-Means +```{r} +library(mclust) +library(cluster) +library(dplyr) +library(fpc) +pokeNum<-select_if(pokemon, is.numeric) +distPoke<-daisy(pokemon) +summary(distPoke) +pokeDist<-cmdscale(distPoke) +plot(pokeDist, type = "n") +text(pokeDist, rownames(pokeDist)) +set.seed(413) +clustore<-matrix(0, nrow = 721, ncol=25) +wsstore<-NULL +for(i in 1:10){ + km<-kmeans(pokeDist, i, nstart=10) + clustore[,i]<-km$cluster + wsstore[i]<-km$tot.withinss +} +plot(wsstore) +kPoke2<-kmeans(pokeDist, 7, nstart=25) +plot(pokeDist, col = kPoke2$cluster) +points(kPoke2$centers, col = 1:4, pch=8, cex=2) +out <- cbind(pokemon, clusterNum = kPoke2$cluster) +clusterGroups<-order(out$clusterNum, decreasing = TRUE) +out[clusterGroups,] +``` + +Ok, let's check out the mean for Total for each cluster + +```{r} +for(i in 1:7) { + print(paste("Mean for total for cluster ",i)) + print(mean(out[which(out$clusterNum==i),]$Total)) +} +``` + +Ok, how about the number of isLegendary in each cluster + +```{r} +for(i in 1:7) { + print(paste("Number of isLegendary for cluster ",i)) + legendTemp<-out[which(out$clusterNum==i),] + print(count(legendTemp,vars=isLegendary)) +} +``` + +Ok, that didnt' look great, but it looks like isLegendary==TRUE are mostly in clusters 2 and 4. + + +##Creating a subset with only pokemon that have a gender + +```{r} +pokemon<-read.csv("pokemon_alopez247.csv") +poke<-data.frame(pokemon) +``` + + +```{r} +attach(poke) +poke1<-poke[which(hasGender=='True'),] +head(poke1) +length(poke1[,1]) +``` + +##Trees on the new data set + +```{r} +attach(poke1) +library(tree) +pocl<-tree(Pr_Male~HP+Attack+Defense+Sp_Atk+Sp_Def+Speed,data=poke1) +plot(pocl) +text(pocl) +``` + +Ok, let's prune this tree down now... + +```{r} +j<-sample(0,10000,100) +size<-{} +for(i in 1:100) { + set.seed(i) + cv.pocl<-cv.tree(pocl, FUN=prune.tree) + thing<-cv.pocl$size[which.min(cv.pocl$dev)] + size[i]<-thing +} +hist(size) +sort(table(size),decreasing=TRUE)[1:3] +``` + +```{r} +p.pocl<-prune.tree(pocl,best=3) +plot(p.pocl) +text(p.pocl) +summary(p.pocl) +``` + +Alright, let's use bagging now... + +```{r} +library(randomForest) +set.seed(1995) +pokebag<-randomForest(Pr_Male~HP+Attack+Defense+Sp_Atk+Sp_Def+Speed,data=poke1,mtry=6,importance=TRUE) +pokebag +varImpPlot(pokebag) +``` + +Well that didn't work out very well... + +How about random forest... + +```{r} +pokeRF<-randomForest(Pr_Male~HP+Attack+Defense+Sp_Atk+Sp_Def+Speed,data=poke1,mtry=3,importance=TRUE) +pokeRF +varImpPlot(pokeRF) +``` +Very slightly better, still not a lot of evidence that this model is any good. + + + +```{r} +pokemon<-read.csv("pokemon_alopez247.csv") +poke <- data.frame(pokemon, stringsAsFactors = TRUE) +poke[is.na(poke)] <- 0 +poke$isLegendary<-(as.integer(factor(poke$isLegendary))-1) +poke$hasGender<-(as.integer(factor(poke$hasGender))-1) +poke<-poke[,-c(1,2)] +set.seed(1995) +train<-sample(1:nrow(poke),432) +poke.train<-poke[train,] +poke.test<-poke[-train,] +``` + +##LDA +```{r} +#install.packages(MASS) +library(MASS) +pkmlda<- lda(poke$hasMegaEvolution~poke$hasGender+poke$Type_1+poke$Total+poke$Generation+poke$Pr_Male+poke$isLegendary, data=poke, CV=TRUE) +table(poke$hasMegaEvolution, pkmlda$class) +``` +```{r} +pkmlda<- lda(poke$isLegendary~poke$hasMegaEvolution+poke$Total+poke$hasGender+poke$Pr_Male, data=poke, CV=TRUE) +table(poke$hasMegaEvolution, pkmlda$class) +``` +```{r} +pkmlda<- lda(poke$isLegendary~poke$hasGender+poke$Pr_Male, data=poke, CV=TRUE) +table(poke$isLegendary, pkmlda$class) +``` + +```{r} +pkmlda<- lda(poke$isLegendary~poke$Type_1+poke$Type_2, data=poke, CV=TRUE) +table(poke$isLegendary, pkmlda$class) +``` + +##KNN +```{r} +poke<-data.frame(pokemon) +#remove unique identifiers +poke<-poke[,-c(1,2)] +``` +This block removes all NA values for Pr_Male. +```{r} +#the new dataset poke2 has all na values fr Pr_Male removed +poke2<-poke[which(hasGender=='True'),] +poke2 +``` +```{r} +for(j in 1:ncol(poke2)){ + if(!is.numeric(poke2[,j]) ){ + poke2[,j]<-(as.numeric(poke2[,j])) + } +} +poke2$isLegendary <- (poke2$isLegendary - 1) +poke2$hasMegaEvolution <- (poke2$hasMegaEvolution - 1) +poke2$hasGender <- (poke2$hasGender - 1) +``` + +```{r} +poke<-data.frame(pokemon) +#remove unique identifiers +poke<-poke[,-c(1,2)] +``` + +```{r} +for(j in 1:ncol(poke)){ + if(!is.numeric(poke[,j]) ){ + poke[,j]<-(as.numeric(poke[,j])) + } +} +poke$isLegendary <- (poke$isLegendary - 1) +poke$hasMegaEvolution <- (poke$hasMegaEvolution - 1) +poke$hasGender <- (poke$hasGender - 1) +``` + + +## Logistic Regression + +```{r} +library(class) +library(boot) +library("gclus") +# typeglm <- glm(poke.train$hasGender~poke.train$Type_1 + poke.train$Type_2, data=poke.train) +# typeglm +# predgend<- predict(typeglm, newdata = poke.test, type= "response") +# predgend +# predgend2<- predgend[c(1:289)] +# length(poke.test$hasGender) +# table(predgend2>0.5, poke.test$hasGender) +``` +isLegendary ~ hasGender + Catch_Rate + +```{r} +pokeglm<- glm(isLegendary ~ hasGender + Catch_Rate, family = "binomial", data = poke) +summary(pokeglm) +``` + +ok so the t test variable selection says all of the variables are important. +This might be Type 1 error??? (probs nah tbh why would a legendary pokemon need a gender or be easy to catch?) + + +Leave One Out Cross Validation! +```{r} +attach(poke2) +pokeglm <- list() +cv.mse <- NA +for(i in 1:nrow(poke)){ + cvisLeg <- poke$isLegendary[-i] + cvhasGend <- poke$hasGender[-i] + cvCatchR <- poke$Catch_Rate[-i] + + pokeglm[[i]]<- glm(cvisLeg ~ cvhasGend + cvCatchR, family = "binomial") + cv.mse[i] <- (predict(pokeglm[[i]], newdata = data.frame(poke$isLegendary[i])) - poke$isLegendary[i])^2 + +} +mean(cv.mse) +``` + +See what a regression tree looks like using total as the predictor and hp, attack, defense, sp_atk, sp_def, and speed as predictors. +```{r} +pokemon<-read.csv("pokemon_alopez247.csv") +library(tree) +poke<-data.frame(pokemon) +attach(poke) +pocl<-tree(Total~HP+Attack+Defense+Sp_Atk+Sp_Def+Speed,data=poke) +plot(pocl) +text(pocl) +``` + +Now let's try pruning it back + +```{r} +cv.pocl<-cv.tree(pocl, FUN=prune.tree) +plot(cv.pocl,type="b") +p.pocl<-prune.tree(pocl,best=10) +plot(p.pocl) +text(p.pocl) +summary(p.pocl) +``` + +Looks like pruning was unnecessary since the lowest MSE is with 12 nodes... + +How about with bagging... + +```{r} +library(randomForest) +set.seed(1995) +pokebag<-randomForest(Total~HP+Attack+Defense+Sp_Atk+Sp_Def+Speed,data=poke,mtry=6,importance=FALSE) +pokebag +``` +Random forest where m=3 +```{r} +pokeRF<-randomForest(Total~HP+Attack+Defense+Sp_Atk+Sp_Def+Speed,data=poke,mtry=3,importance=TRUE) +pokeRF +``` + + + +##PCA + +Alright, let's check out PCA on Pr_Male response with ...stats as predictors + +```{r} +head(poke) +``` + +```{r} +pcapoke <- prcomp(as.matrix(poke[,6:11]), scale.=TRUE) +summary(pcapoke) +biplot(pcapoke) +``` + +Ok cool, two principal components satisfy the Kaiser criterion. Let's take a look at which predictors influence these components... + +```{r} +round(pcapoke$rotation[,1:2], 2) +``` + +Ok, so looks like PC1 refers to kind of all around, balanced pokemon, and PC2 refers to slow defenders with bad HP? I don't think this model is all that great... But, let's see which pokemon each component is referring to. + +```{r} +poke[order(pcapoke$x[,1], decreasing=TRUE)[1:4] , 1:11] +``` + +```{r} +poke[order(pcapoke$x[,2], decreasing=TRUE)[1:4] , 1:11] +``` + +The first component doesn't really seem to refer to much at all, just kind of all around generalists maybe. The totals are quite high though, so maybe these are the powerhouses? Wait, let's see how many of them are legendary... + +```{r} +poke[order(pcapoke$x[,1], decreasing=TRUE)[1:20],] +``` + + +The first 13 are legendary, this is a good sign. Let's see how PC1 correlates with isLegendary... + +```{r} +library(MASS) +pcleg<-data.frame(pcapoke$x) +pcleg[1:20,] +leglda <- lda(factor(poke$isLegendary)~PC1+PC2,data=pcleg) +leglda +``` + +I might just be high, but I'm pretty sure this indicates PC1 is a pretty good predictor for isLegendary. PC2 doesn't really seem to refer to anything here... + + +##Gender PCA + +Ok, now let's run PCA on the subset that has a gender + +```{r} +pcagenpoke <- prcomp(as.matrix(poke1[,6:11]), scale.=TRUE) +summary(pcagenpoke) +biplot(pcagenpoke) +``` + +Ok cool, two principal components satisfy the Kaiser criterion. Let's take a look at which predictors influence these components... + +```{r} +round(pcagenpoke$rotation[,1:2], 2) +``` +This is looking pretty similar to the full dataset! But, let's see which pokemon each component is referring to. + +```{r} +poke[order(pcagenpoke$x[,1], decreasing=TRUE)[1:4] , 1:11] +``` + + +```{r} +poke[order(pcagenpoke$x[,2], decreasing=TRUE)[1:4] , 1:11] +``` + +The first component doesn't really seem to refer to much at all, just kind of all around generalists maybe. The totals are quite high though, so maybe these are the powerhouses? Wait, let's see how many of them are legendary... + +```{r} +poke[order(pcagenpoke$x[,1], decreasing=TRUE)[1:20],] +``` + + +The first 13 are legendary, this is a good sign. Let's see how PC1 correlates with isLegendary... + +```{r} +library(MASS) +pcgenleg<-data.frame(pcagenpoke$x) +pcgenleg[1:20,] +leggenlda <- lda(factor(poke1$isLegendary)~pcgenleg[,1]+pcgenleg[,2],data=pcgenleg) +leggenlda +``` + +Ok now let's look at a classification table: + +```{r} +lda.pred<-predict(leggenlda,poke1) +lda.class<-lda.pred$class +table(lda.class,poke1$isLegendary) +``` + +So LDA using PC1 and PC2 basically amounts to a naive classifier classifying everything "False" for isLegendary. Let's see if univariate LDA with PC1 only does any better. + +```{r} +leggenlda1 <- lda(factor(poke1$isLegendary)~pcgenleg[,1],data=pcgenleg) +leggenlda1 +``` + +```{r} +lda.pred<-predict(leggenlda1,poke1) +lda.class<-lda.pred$class +table(lda.class,poke1$isLegendary) +``` + +Yea, this is still a naive classifier, NOT VERY USEFUL! + +Let's try a linear model, see if PC1 and PC2 are any good at predicting Pr_Male: + +```{r} +linmod<-lm(poke1$Pr_Male~pcgenleg[,1]+pcgenleg[,2]) +summary(linmod) +linmod<-lm(poke1$Pr_Male~pcgenleg[,1]) +summary(linmod) +plot(pcgenleg[,1],poke1$Pr_Male) +abline(linmod) +``` + +Ok, so the second model is statistically significant. So let's try to interpret this now. The intercept on this linear model is 0.55, which is already above 50%. Oh man that graph looks like garbage. I don't think PCA really did anything here... + + +```{r} +# linmod<-lm(poke1$Catch_Rate~pcgenleg[,1]+pcgenleg[,2]) +# summary(linmod) +linmod<-lm(poke1$Catch_Rate~pcgenleg[,1]) +summary(linmod) +plot(pcgenleg[,1],poke1$Catch_Rate) +abline(linmod) +``` + +Ok, now we're talking. So it looks like PC1 is correlated with the harder to catch Pokemon rather than the legendary ones. Probably a good time to start a new file, this is getting messy... + +```{r} +set.seed(1995) +train<-sample(1:nrow(poke),432) +poke.test<-poke[-train,] +poke.train<-poke[train,] +``` + +#Neural Networks +```{r} +library(gclus) +library(nnet) +library(NeuralNetTools) +set.seed(1995) +spoke <- cbind(scale(trainset[,6:11]), factor(trainset$isLegendary)) +colnames(spoke)[7] <- "isLegendary" +spoke<-data.frame(spoke) +nnpoke <- nnet(factor(isLegendary)~., data=spoke, size=5) +table(trainset$isLegendary, predict(nnpoke, type="class")) +plotnet(nnpoke) +``` + +```{r} +spoke +``` + +```{r} +spoketest <- cbind(scale(testset[,6:11]), factor(testset$isLegendary)) +colnames(spoketest)[7] <- "isLegendary" +spoketest<-data.frame(spoketest) +table(spoketest$isLegendary, predict(nnpoke, newdata=spoketest, type="class")) +``` + +```{r} +attach(data) +trainsetg<-trainset[which(hasGender=='True'),] +testsetg<-testset[which(hasGender=='True'),] +trainsetg<-na.omit(trainsetg) +trainsetg +testsetg<-na.omit(testsetg) +testsetg +``` + +#Neural Net predicting Pr_Male + +Optimizing number of nodes in first layer +```{r} +for(i in 1:5){ + nnmaletr <- neuralnet(Pr_Male ~ Attack + Defense + HP + Sp_Atk + Sp_Def + Speed,data=trainsetg, hidden=c(i,3), threshold=0.01) + print(paste("Number of hidden layer variables in first layer:", i)) + print(paste("MSE: ", mean((compute(nnmaletr, testsetg[,6:11])$net.result-testsetg$Pr_Male)^2))) +} +``` + +Optimizing number of nodes in second layer +```{r} +for(i in 1:5){ + nnmaletr <- neuralnet(Pr_Male ~ Attack + Defense + HP + Sp_Atk + Sp_Def + Speed,data=trainsetg, hidden=c(4,i), threshold=0.01) + print(paste("Number of hidden layer variables in second layer:", i)) + print(paste("MSE: ", mean((compute(nnmaletr, testsetg[,6:11])$net.result-testsetg$Pr_Male)^2))) +} +``` + +```{r} +set.seed(906534) +nnmale <- neuralnet(Pr_Male ~ Attack + Defense + HP + Sp_Atk + Sp_Def + Speed,data=trainsetg, hidden=3, threshold=0.01) +plotnet(nnmale) +mse<-mean((compute(nnmale, testsetg[,6:11])$net.result-testsetg$Pr_Male)^2) +mse +``` +MSE with 2 hidden layers and 4 and 3 nodes: 0.04011755 +MSE with 1 hidden layer and 3 nodes: 0.03988659 + + +```{r} +linmod<-lm(Pr_Male ~ Attack + Defense + HP + Sp_Atk + Sp_Def + Speed,data=trainsetg) +mean((predict(linmod,newdata=testsetg)-testsetg$Pr_Male)^2) +``` +This is pretty close to our neural net modeled above when we use 1 hidden layer and 3 nodes. + +#Neural Net, predicting Generation +```{r} +set.seed(12345) +library(neuralnet) +nnGen <- neuralnet(Generation ~ Attack + Defense + HP + Sp_Atk + Sp_Def + Speed,data=trainsetg, hidden=4, threshold=0.01) +plotnet(nnGen) +``` + +```{r} +mse<-mean((compute(nnGen, testsetg[,6:11])$net.result-testsetg$Generation)^2) +mse +``` +MSE without Pr_Male, 2 hidden 4 and 3 nodes +1.28986 +MSE without Pr_Male included, 5 nodes 1 hidden +1.349612 +MSE with Pr_Male included, 5 nodes 1 hidden +1.466631 + +```{r} +spoketest <- cbind(scale(testsetg[,6:11]), factor(testsetg$Generation)) +colnames(spoketest)[7] <- "Generation" +spoketest<-data.frame(spoketest) +table(spoketest$isLegendary, predict(nnGen, newdata=spoketest, type="class")) +``` From 949f71df25454bde2c1024a81d0fac7031db1c18 Mon Sep 17 00:00:00 2001 From: "Lauren St. Clair" Date: Mon, 1 Apr 2019 12:28:36 -0700 Subject: [PATCH 23/26] Create Scatters.nb.html --- Scatters.nb.html | 363 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 363 insertions(+) create mode 100644 Scatters.nb.html diff --git a/Scatters.nb.html b/Scatters.nb.html new file mode 100644 index 0000000..b6d4d9d --- /dev/null +++ b/Scatters.nb.html @@ -0,0 +1,363 @@ + + + + + + + + + + + + + +R Notebook + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + + + + +
data = read.csv("pokemon_alopez247.csv", header=T)
+ + + + + + +
summary(data)
+ + + +
+

Basic 3d Scatterplots

+
+

Evaluating variables, Total, Defense, and Pr_Male

+ + + +
library(scatterplot3d) 
+attach(pokemon) 
+sp <- scatterplot3d(Total,Defense,Pr_Male, pch=16, highlight.3d=TRUE,
+  type="h", main="3D Scatterplot")
+ + + +

3d Scatterplot without line markers

+ + + +
library(scatterplot3d)
+attach(pokemon)
+scatterplot3d(Total,Defense,Pr_Male, main="3D Scatterplot")
+ + + +
+
+
+

Corelation

+ + + +
library(gclus)
+cleanPoke <- na.omit(pokemon)
+dta <- cleanPoke[c(5:8, 16)] # data, numbers as column numbers 
+dta.r <- abs(cor(dta)) # correlation
+#dta.r[is.na(dta.r)] <- 0.5
+dta.col <- dmat.color(dta.r) # colors
+dta.o <- order.single(dta.r) 
+cpairs(dta, dta.o, panel.colors=dta.col, gap=.5,
+main="Variables Ordered and Colored by Correlation" )
+ + + +

It may be worth exploring how the correlation changes depending on what value is set to Pr_Male NA values when using “ta.r[is.na(dta.r)] <- 0.5.” Below is the correlation jsy between the columns 5:8 for additional clarity.

+ + + +
library(gclus)
+#cleanPoke <- na.omit(pokemon)
+dta <- pokemon[c(5:8)] # data, numbers as column numbers 
+dta.r <- abs(cor(dta)) # correlation
+dta.col <- dmat.color(dta.r) # colors
+dta.o <- order.single(dta.r) 
+cpairs(dta, dta.o, panel.colors=dta.col, gap=.5,
+main="Variables Ordered and Colored by Correlation" )
+ + +
+ +
LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKYGBge3Igc2V0dXAsIGluY2x1ZGU9RkFMU0V9CmtuaXRyOjpvcHRzX2NodW5rJHNldChlY2hvID0gVFJVRSkKYGBgCgoKYGBge3J9CmRhdGEgPSByZWFkLmNzdigicG9rZW1vbl9hbG9wZXoyNDcuY3N2IiwgaGVhZGVyPVQpCmBgYAoKYGBge3J9CnN1bW1hcnkoZGF0YSkKYGBgCgojQmFzaWMgM2QgU2NhdHRlcnBsb3RzIAojI0V2YWx1YXRpbmcgdmFyaWFibGVzLCBUb3RhbCwgRGVmZW5zZSwgYW5kIFByX01hbGUKYGBge3J9CmxpYnJhcnkoc2NhdHRlcnBsb3QzZCkgCmF0dGFjaChwb2tlbW9uKSAKc3AgPC0gc2NhdHRlcnBsb3QzZChUb3RhbCxEZWZlbnNlLFByX01hbGUsIHBjaD0xNiwgaGlnaGxpZ2h0LjNkPVRSVUUsCiAgdHlwZT0iaCIsIG1haW49IjNEIFNjYXR0ZXJwbG90IikKYGBgCgozZCBTY2F0dGVycGxvdCB3aXRob3V0IGxpbmUgbWFya2VycwpgYGB7cn0KbGlicmFyeShzY2F0dGVycGxvdDNkKQphdHRhY2gocG9rZW1vbikKc2NhdHRlcnBsb3QzZChUb3RhbCxEZWZlbnNlLFByX01hbGUsIG1haW49IjNEIFNjYXR0ZXJwbG90IikKYGBgCgojQ29yZWxhdGlvbgpgYGB7cn0KbGlicmFyeShnY2x1cykKY2xlYW5Qb2tlIDwtIG5hLm9taXQocG9rZW1vbikKZHRhIDwtIGNsZWFuUG9rZVtjKDU6OCwgMTYpXSAjIGRhdGEsIG51bWJlcnMgYXMgY29sdW1uIG51bWJlcnMgCmR0YS5yIDwtIGFicyhjb3IoZHRhKSkgIyBjb3JyZWxhdGlvbgojZHRhLnJbaXMubmEoZHRhLnIpXSA8LSAwLjUKZHRhLmNvbCA8LSBkbWF0LmNvbG9yKGR0YS5yKSAjIGNvbG9ycwpkdGEubyA8LSBvcmRlci5zaW5nbGUoZHRhLnIpIApjcGFpcnMoZHRhLCBkdGEubywgcGFuZWwuY29sb3JzPWR0YS5jb2wsIGdhcD0uNSwKbWFpbj0iVmFyaWFibGVzIE9yZGVyZWQgYW5kIENvbG9yZWQgYnkgQ29ycmVsYXRpb24iICkKYGBgCkl0IG1heSBiZSB3b3J0aCBleHBsb3JpbmcgaG93IHRoZSBjb3JyZWxhdGlvbiBjaGFuZ2VzIGRlcGVuZGluZyBvbiB3aGF0IHZhbHVlIGlzIHNldCB0byBQcl9NYWxlIE5BIHZhbHVlcyB3aGVuIHVzaW5nICJ0YS5yW2lzLm5hKGR0YS5yKV0gPC0gMC41LiIgQmVsb3cgaXMgdGhlIGNvcnJlbGF0aW9uIGpzeSBiZXR3ZWVuIHRoZSBjb2x1bW5zIDU6OCBmb3IgYWRkaXRpb25hbCBjbGFyaXR5LiAKCmBgYHtyfQpsaWJyYXJ5KGdjbHVzKQojY2xlYW5Qb2tlIDwtIG5hLm9taXQocG9rZW1vbikKZHRhIDwtIHBva2Vtb25bYyg1OjgpXSAjIGRhdGEsIG51bWJlcnMgYXMgY29sdW1uIG51bWJlcnMgCmR0YS5yIDwtIGFicyhjb3IoZHRhKSkgIyBjb3JyZWxhdGlvbgpkdGEuY29sIDwtIGRtYXQuY29sb3IoZHRhLnIpICMgY29sb3JzCmR0YS5vIDwtIG9yZGVyLnNpbmdsZShkdGEucikgCmNwYWlycyhkdGEsIGR0YS5vLCBwYW5lbC5jb2xvcnM9ZHRhLmNvbCwgZ2FwPS41LAptYWluPSJWYXJpYWJsZXMgT3JkZXJlZCBhbmQgQ29sb3JlZCBieSBDb3JyZWxhdGlvbiIgKQpgYGAK
+ + + +
+ + + + + + + + From a327e7ffb2dd4eb121e0f312aaa4488c07371631 Mon Sep 17 00:00:00 2001 From: "Lauren St. Clair" Date: Mon, 1 Apr 2019 12:29:43 -0700 Subject: [PATCH 24/26] Update laurenedits.Rmd --- laurenedits.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/laurenedits.Rmd b/laurenedits.Rmd index a9cb707..ff165df 100644 --- a/laurenedits.Rmd +++ b/laurenedits.Rmd @@ -36,7 +36,7 @@ scatterplot3d(Total,Defense,Pr_Male, main="3D Scatterplot") ```{r} library(gclus) cleanPoke <- na.omit(pokemon) -dta <- cleanPoke[c(5:8, 16)] # data, numbers as column numbers +dta <- cleanPoke[c(6:8, 16)] # data, numbers as column numbers dta.r <- abs(cor(dta)) # correlation #dta.r[is.na(dta.r)] <- 0.5 dta.col <- dmat.color(dta.r) # colors From 8faa62fbf9f45af92b03dc439fb282eb8d1877c0 Mon Sep 17 00:00:00 2001 From: "Lauren St. Clair" Date: Mon, 1 Apr 2019 12:29:46 -0700 Subject: [PATCH 25/26] Create laurenedits.nb.html --- laurenedits.nb.html | 571 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 571 insertions(+) create mode 100644 laurenedits.nb.html diff --git a/laurenedits.nb.html b/laurenedits.nb.html new file mode 100644 index 0000000..f2059f0 --- /dev/null +++ b/laurenedits.nb.html @@ -0,0 +1,571 @@ + + + + + + + + + + + + + +R Notebook + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + +
+

laurenedits

+ + + +
data = read.csv("pokemon_alopez247.csv", header=T)
+ + + + + + +
summary(data)
+ + + +
+
+

Basic 3d Scatterplots

+
+

Evaluating variables, Total, Defense, and Pr_Male

+ + + +
library(scatterplot3d) 
+attach(pokemon) 
+sp <- scatterplot3d(Total,Defense,Pr_Male, pch=16, highlight.3d=TRUE,
+  type="h", main="3D Scatterplot")
+ + + +

3d Scatterplot without line markers

+ + + +
library(scatterplot3d)
+attach(pokemon)
+scatterplot3d(Total,Defense,Pr_Male, main="3D Scatterplot")
+ + + +
+
+
+

Corelation

+ + + +
library(gclus)
+cleanPoke <- na.omit(pokemon)
+dta <- cleanPoke[c(6:8, 16)] # data, numbers as column numbers 
+dta.r <- abs(cor(dta)) # correlation
+#dta.r[is.na(dta.r)] <- 0.5
+dta.col <- dmat.color(dta.r) # colors
+dta.o <- order.single(dta.r) 
+cpairs(dta, dta.o, panel.colors=dta.col, gap=.5,
+main="Variables Ordered and Colored by Correlation" )
+ + +

+ + + +

It may be worth exploring how the correlation changes depending on what value is set to Pr_Male NA values when using “ta.r[is.na(dta.r)] <- 0.5.” Below is the correlation jsy between the columns 5:8 for additional clarity.

+ + + +
library(gclus)
+#cleanPoke <- na.omit(pokemon)
+dta <- pokemon[c(5:8)] # data, numbers as column numbers 
+dta.r <- abs(cor(dta)) # correlation
+dta.col <- dmat.color(dta.r) # colors
+dta.o <- order.single(dta.r) 
+cpairs(dta, dta.o, panel.colors=dta.col, gap=.5,
+main="Variables Ordered and Colored by Correlation" )
+ + +

+ + + +
+
+

Neural Network, predicting isLegendary

+

Neural net testing isLegendary w/ all

+ + + +
library(nnet)
+library(NeuralNetTools)
+set.seed(53747958)
+numeric_col <- c(5:12, 16, 20:21)
+pokemon[,numeric_col] <- scale(pokemon[,numeric_col])
+colnames(data)[22]<-"isLegendary"
+nnpokemon<-nnet(isLegendary~., data = data, size = 1)
+plotnet(nnpokemon)
+mean(nnpokemon$residuals^2)
+ + + +

Neural net testing isLgendary w/ Defense+Pr_Male+Attack+HP

+ + + +
library(nnet)
+library(neuralnet)
+library(NeuralNetTools)
+set.seed(53747958)
+numeric_col <- c(5:12, 16, 20:21)
+is.na(numeric_col) <- 0.5
+pokemon[,numeric_col] <- scale(pokemon[,numeric_col])
+trainset <- pokemon[1:505, ]
+testset <- pokemon[506:721, ]
+colnames(data)[22]<-"isLegendary"
+nnpokemon<-neuralnet(isLegendary~Defense+Pr_Male+Attack+HP,data=pokemon, hidden=3,act.fct = "logistic",
+                linear.output = FALSE)
+plotnet(nnpokemon)
+nnpokemon$result.matrix
+ + + +

*DOUBLE CHECK

+ + + +
attach(pokemon)
+test=data.frame(Defense,Pr_Male,Attack,HP)
+Predict=compute(nnpokemon,test)
+Predict$net.result
+ + + + + + +
attach(pokemon)
+prob <- Predict$net.result
+pred <- ifelse(prob>0.5, 1, 0)
+pred
+ + + + + + +
#Test the resulting output
+temp_test <- subset(testset, select = c("Defense","Pr_Male", "Attack", "HP"))
+head(temp_test)
+nn.results <- compute(nnpokemon, temp_test)
+results <- data.frame(actual = testset$isLegendary, predicted = nn.results$net.result)
+results
+ + + + + + +
results <- data.frame(actual = testset$isLegendary, prediction = nn.results$net.result)
+results
+ + + +

*DOUBLE CHECK

+ + + +
#mse <- sum((compute(nnpokemon, testset[,2:15])$net.result-test$Price)^2)
+#mse
+ + + +
+
+

Principle Component Analysis

+ + + +
pokemon <- read.csv("~/pokemon_alopez247.csv", stringsAsFactors = FALSE)
+#remove NA from dataset
+cleanPoke <- na.omit(pokemon)
+pcPoke <- prcomp(as.matrix(cleanPoke[,c(6:11)]), scale. = TRUE)
+summary(pcPoke)
+ + +
Importance of components:
+                          PC1    PC2    PC3    PC4     PC5     PC6
+Standard deviation     1.5292 1.0865 0.9572 0.8876 0.69979 0.53602
+Proportion of Variance 0.3898 0.1967 0.1527 0.1313 0.08162 0.04789
+Cumulative Proportion  0.3898 0.5865 0.7392 0.8705 0.95211 1.00000
+ + + + + + +
biplot(pcPoke)
+ + +

+ + + + + + +
plot(pcPoke$x[,1:2])
+ + +

+ + + + + + +
plot(pcPoke$x[,1:2], type="n")
+text(pcPoke$x[,1], pcPoke$x[,2], labels = 1:nrow(pokemon))
+ + +

+ + + + + + +
round(pcPoke$rotation[,1:2], 3)
+ + +
          PC1    PC2
+HP      0.414  0.080
+Attack  0.434  0.111
+Defense 0.365  0.621
+Sp_Atk  0.453 -0.361
+Sp_Def  0.464  0.114
+Speed   0.293 -0.673
+ + + + + + +
pokemon[order(pcPoke$x[,1], decreasing=TRUE)[1:4], 1:11]
+ + +
+ +
+ + +
plot(pcPoke, type="lines")
+abline(a=1, b=0, col="blue", lwd=3)
+ + +

+ + + + + + +
test1 <- hclust(dist(scale(pokemon[,-c(2, 3, 4, 12, 13, 14, 15, 16, 17, 18, 19, 23)])))
+plot(test1)
+test2 <- hclust(dist(pcPoke$x))
+plot(test2)
+ + + + + + +
all.equal(dist(scale(pokemon[,-c(2, 3, 4, 12, 13, 14, 15, 16, 17, 18, 19, 23)])), dist(pcPoke$x), check.attributes = FALSE)
+ + +
[1] "Numeric: lengths (259560, 207046) differ"
+ + +
+ +
LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQpgYGB7ciBzZXR1cCwgaW5jbHVkZT1GQUxTRX0Ka25pdHI6Om9wdHNfY2h1bmskc2V0KGVjaG8gPSBUUlVFKQpgYGAKCiNsYXVyZW5lZGl0cwoKYGBge3J9CmRhdGEgPSByZWFkLmNzdigicG9rZW1vbl9hbG9wZXoyNDcuY3N2IiwgaGVhZGVyPVQpCmBgYAoKYGBge3J9CnN1bW1hcnkoZGF0YSkKYGBgCgojQmFzaWMgM2QgU2NhdHRlcnBsb3RzIAojI0V2YWx1YXRpbmcgdmFyaWFibGVzLCBUb3RhbCwgRGVmZW5zZSwgYW5kIFByX01hbGUKYGBge3J9CmxpYnJhcnkoc2NhdHRlcnBsb3QzZCkgCmF0dGFjaChwb2tlbW9uKSAKc3AgPC0gc2NhdHRlcnBsb3QzZChUb3RhbCxEZWZlbnNlLFByX01hbGUsIHBjaD0xNiwgaGlnaGxpZ2h0LjNkPVRSVUUsCiAgdHlwZT0iaCIsIG1haW49IjNEIFNjYXR0ZXJwbG90IikKYGBgCgozZCBTY2F0dGVycGxvdCB3aXRob3V0IGxpbmUgbWFya2VycwpgYGB7cn0KbGlicmFyeShzY2F0dGVycGxvdDNkKQphdHRhY2gocG9rZW1vbikKc2NhdHRlcnBsb3QzZChUb3RhbCxEZWZlbnNlLFByX01hbGUsIG1haW49IjNEIFNjYXR0ZXJwbG90IikKYGBgCgojQ29yZWxhdGlvbgpgYGB7cn0KbGlicmFyeShnY2x1cykKY2xlYW5Qb2tlIDwtIG5hLm9taXQocG9rZW1vbikKZHRhIDwtIGNsZWFuUG9rZVtjKDY6OCwgMTYpXSAjIGRhdGEsIG51bWJlcnMgYXMgY29sdW1uIG51bWJlcnMgCmR0YS5yIDwtIGFicyhjb3IoZHRhKSkgIyBjb3JyZWxhdGlvbgojZHRhLnJbaXMubmEoZHRhLnIpXSA8LSAwLjUKZHRhLmNvbCA8LSBkbWF0LmNvbG9yKGR0YS5yKSAjIGNvbG9ycwpkdGEubyA8LSBvcmRlci5zaW5nbGUoZHRhLnIpIApjcGFpcnMoZHRhLCBkdGEubywgcGFuZWwuY29sb3JzPWR0YS5jb2wsIGdhcD0uNSwKbWFpbj0iVmFyaWFibGVzIE9yZGVyZWQgYW5kIENvbG9yZWQgYnkgQ29ycmVsYXRpb24iICkKYGBgCkl0IG1heSBiZSB3b3J0aCBleHBsb3JpbmcgaG93IHRoZSBjb3JyZWxhdGlvbiBjaGFuZ2VzIGRlcGVuZGluZyBvbiB3aGF0IHZhbHVlIGlzIHNldCB0byBQcl9NYWxlIE5BIHZhbHVlcyB3aGVuIHVzaW5nICJ0YS5yW2lzLm5hKGR0YS5yKV0gPC0gMC41LiIgQmVsb3cgaXMgdGhlIGNvcnJlbGF0aW9uIGpzeSBiZXR3ZWVuIHRoZSBjb2x1bW5zIDU6OCBmb3IgYWRkaXRpb25hbCBjbGFyaXR5LiAKCmBgYHtyfQpsaWJyYXJ5KGdjbHVzKQojY2xlYW5Qb2tlIDwtIG5hLm9taXQocG9rZW1vbikKZHRhIDwtIHBva2Vtb25bYyg1OjgpXSAjIGRhdGEsIG51bWJlcnMgYXMgY29sdW1uIG51bWJlcnMgCmR0YS5yIDwtIGFicyhjb3IoZHRhKSkgIyBjb3JyZWxhdGlvbgpkdGEuY29sIDwtIGRtYXQuY29sb3IoZHRhLnIpICMgY29sb3JzCmR0YS5vIDwtIG9yZGVyLnNpbmdsZShkdGEucikgCmNwYWlycyhkdGEsIGR0YS5vLCBwYW5lbC5jb2xvcnM9ZHRhLmNvbCwgZ2FwPS41LAptYWluPSJWYXJpYWJsZXMgT3JkZXJlZCBhbmQgQ29sb3JlZCBieSBDb3JyZWxhdGlvbiIgKQpgYGAKCiNOZXVyYWwgTmV0d29yaywgcHJlZGljdGluZyBpc0xlZ2VuZGFyeQpOZXVyYWwgbmV0IHRlc3RpbmcgaXNMZWdlbmRhcnkgdy8gYWxsCmBgYHtyfQpsaWJyYXJ5KG5uZXQpCmxpYnJhcnkoTmV1cmFsTmV0VG9vbHMpCnNldC5zZWVkKDUzNzQ3OTU4KQpudW1lcmljX2NvbCA8LSBjKDU6MTIsIDE2LCAyMDoyMSkKcG9rZW1vblssbnVtZXJpY19jb2xdIDwtIHNjYWxlKHBva2Vtb25bLG51bWVyaWNfY29sXSkKY29sbmFtZXMoZGF0YSlbMjJdPC0iaXNMZWdlbmRhcnkiCm5ucG9rZW1vbjwtbm5ldChpc0xlZ2VuZGFyeX4uLCBkYXRhID0gZGF0YSwgc2l6ZSA9IDEpCnBsb3RuZXQobm5wb2tlbW9uKQptZWFuKG5ucG9rZW1vbiRyZXNpZHVhbHNeMikKYGBgCgpOZXVyYWwgbmV0IHRlc3RpbmcgaXNMZ2VuZGFyeSB3LyBEZWZlbnNlK1ByX01hbGUrQXR0YWNrK0hQCmBgYHtyfQpsaWJyYXJ5KG5uZXQpCmxpYnJhcnkobmV1cmFsbmV0KQpsaWJyYXJ5KE5ldXJhbE5ldFRvb2xzKQpzZXQuc2VlZCg1Mzc0Nzk1OCkKbnVtZXJpY19jb2wgPC0gYyg1OjEyLCAxNiwgMjA6MjEpCmlzLm5hKG51bWVyaWNfY29sKSA8LSAwLjUKcG9rZW1vblssbnVtZXJpY19jb2xdIDwtIHNjYWxlKHBva2Vtb25bLG51bWVyaWNfY29sXSkKdHJhaW5zZXQgPC0gcG9rZW1vblsxOjUwNSwgXQp0ZXN0c2V0IDwtIHBva2Vtb25bNTA2OjcyMSwgXQpjb2xuYW1lcyhkYXRhKVsyMl08LSJpc0xlZ2VuZGFyeSIKbm5wb2tlbW9uPC1uZXVyYWxuZXQoaXNMZWdlbmRhcnl+RGVmZW5zZStQcl9NYWxlK0F0dGFjaytIUCxkYXRhPXBva2Vtb24sIGhpZGRlbj0zLGFjdC5mY3QgPSAibG9naXN0aWMiLAogICAgICAgICAgICAgICAgbGluZWFyLm91dHB1dCA9IEZBTFNFKQpwbG90bmV0KG5ucG9rZW1vbikKbm5wb2tlbW9uJHJlc3VsdC5tYXRyaXgKYGBgCipET1VCTEUgQ0hFQ0sKCmBgYHtyfQphdHRhY2gocG9rZW1vbikKdGVzdD1kYXRhLmZyYW1lKERlZmVuc2UsUHJfTWFsZSxBdHRhY2ssSFApClByZWRpY3Q9Y29tcHV0ZShubnBva2Vtb24sdGVzdCkKUHJlZGljdCRuZXQucmVzdWx0CmBgYAoKYGBge3J9CmF0dGFjaChwb2tlbW9uKQpwcm9iIDwtIFByZWRpY3QkbmV0LnJlc3VsdApwcmVkIDwtIGlmZWxzZShwcm9iPjAuNSwgMSwgMCkKcHJlZApgYGAKCmBgYHtyfQojVGVzdCB0aGUgcmVzdWx0aW5nIG91dHB1dAp0ZW1wX3Rlc3QgPC0gc3Vic2V0KHRlc3RzZXQsIHNlbGVjdCA9IGMoIkRlZmVuc2UiLCJQcl9NYWxlIiwgIkF0dGFjayIsICJIUCIpKQpoZWFkKHRlbXBfdGVzdCkKbm4ucmVzdWx0cyA8LSBjb21wdXRlKG5ucG9rZW1vbiwgdGVtcF90ZXN0KQpyZXN1bHRzIDwtIGRhdGEuZnJhbWUoYWN0dWFsID0gdGVzdHNldCRpc0xlZ2VuZGFyeSwgcHJlZGljdGVkID0gbm4ucmVzdWx0cyRuZXQucmVzdWx0KQpyZXN1bHRzCmBgYAoKYGBge3J9CnJlc3VsdHMgPC0gZGF0YS5mcmFtZShhY3R1YWwgPSB0ZXN0c2V0JGlzTGVnZW5kYXJ5LCBwcmVkaWN0aW9uID0gbm4ucmVzdWx0cyRuZXQucmVzdWx0KQpyZXN1bHRzCmBgYAoKKkRPVUJMRSBDSEVDSwpgYGB7cn0KI21zZSA8LSBzdW0oKGNvbXB1dGUobm5wb2tlbW9uLCB0ZXN0c2V0WywyOjE1XSkkbmV0LnJlc3VsdC10ZXN0JFByaWNlKV4yKQojbXNlCmBgYAoKCiNQcmluY2lwbGUgQ29tcG9uZW50IEFuYWx5c2lzCmBgYHtyfQpwb2tlbW9uIDwtIHJlYWQuY3N2KCJ+L3Bva2Vtb25fYWxvcGV6MjQ3LmNzdiIsIHN0cmluZ3NBc0ZhY3RvcnMgPSBGQUxTRSkKI3JlbW92ZSBOQSBmcm9tIGRhdGFzZXQKY2xlYW5Qb2tlIDwtIG5hLm9taXQocG9rZW1vbikKcGNQb2tlIDwtIHByY29tcChhcy5tYXRyaXgoY2xlYW5Qb2tlWyxjKDY6MTEpXSksIHNjYWxlLiA9IFRSVUUpCnN1bW1hcnkocGNQb2tlKQpgYGAKCmBgYHtyfQpiaXBsb3QocGNQb2tlKQpgYGAKCmBgYHtyfQpwbG90KHBjUG9rZSR4WywxOjJdKQpgYGAKCmBgYHtyfQpwbG90KHBjUG9rZSR4WywxOjJdLCB0eXBlPSJuIikKdGV4dChwY1Bva2UkeFssMV0sIHBjUG9rZSR4WywyXSwgbGFiZWxzID0gMTpucm93KHBva2Vtb24pKQpgYGAKCmBgYHtyfQpyb3VuZChwY1Bva2Ukcm90YXRpb25bLDE6Ml0sIDMpCmBgYAoKYGBge3J9CnBva2Vtb25bb3JkZXIocGNQb2tlJHhbLDFdLCBkZWNyZWFzaW5nPVRSVUUpWzE6NF0sIDE6MTFdCnBsb3QocGNQb2tlLCB0eXBlPSJsaW5lcyIpCmFibGluZShhPTEsIGI9MCwgY29sPSJibHVlIiwgbHdkPTMpCmBgYAoKCmBgYHtyfQp0ZXN0MSA8LSBoY2x1c3QoZGlzdChzY2FsZShwb2tlbW9uWywtYygyLCAzLCA0LCAxMiwgMTMsIDE0LCAxNSwgMTYsIDE3LCAxOCwgMTksIDIzKV0pKSkKcGxvdCh0ZXN0MSkKdGVzdDIgPC0gaGNsdXN0KGRpc3QocGNQb2tlJHgpKQpwbG90KHRlc3QyKQpgYGAKCmBgYHtyfQphbGwuZXF1YWwoZGlzdChzY2FsZShwb2tlbW9uWywtYygyLCAzLCA0LCAxMiwgMTMsIDE0LCAxNSwgMTYsIDE3LCAxOCwgMTksIDIzKV0pKSwgZGlzdChwY1Bva2UkeCksIGNoZWNrLmF0dHJpYnV0ZXMgPSBGQUxTRSkKYGBg
+ + + +
+ + + + + + + + From 8251441346683547dbdc52f9480bd08be1d21544 Mon Sep 17 00:00:00 2001 From: "Lauren St. Clair" Date: Mon, 1 Apr 2019 12:29:49 -0700 Subject: [PATCH 26/26] Create Principle.nb.html --- Principle.nb.html | 376 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 376 insertions(+) create mode 100644 Principle.nb.html diff --git a/Principle.nb.html b/Principle.nb.html new file mode 100644 index 0000000..eef21e2 --- /dev/null +++ b/Principle.nb.html @@ -0,0 +1,376 @@ + + + + + + + + + + + + + +R Notebook + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + +
+

laurenedits

+ + + +
data = read.csv("pokemon_alopez247.csv", header=T)
+ + + + + + +
summary(data)
+ + + +
+
+

Principle Component Analysis

+ + + +
pokemon <- read.csv("~/pokemon_alopez247.csv", stringsAsFactors = FALSE)
+#remove NA from dataset
+cleanPoke <- na.omit(pokemon)
+pcPoke <- prcomp(as.matrix(cleanPoke[,c(5:8,16)]), scale. = TRUE)
+summary(pcPoke)
+ + + + + + +
biplot(pcPoke)
+ + + + + + +
plot(pcPoke$x[,1:2])
+ + + + + + +
plot(pcPoke$x[,1:2], type="n")
+text(pcPoke$x[,1], pcPoke$x[,2], labels = 1:nrow(pokemon))
+ + + + + + +
round(pcPoke$rotation[,1:5], 3)
+ + + + + + +
pokemon[order(pcPoke$x[,1], decreasing=TRUE)[1:4], 1:3]
+plot(pcPoke, type="lines")
+abline(a=1, b=0, col="blue", lwd=3)
+ + + + + + +
test1 <- hclust(dist(scale(pokemon[,-c(2, 3, 4, 12, 13, 14, 15, 16, 17, 18, 19, 23)])))
+plot(test1)
+test2 <- hclust(dist(pcPoke$x))
+plot(test2)
+ + + + + + +
all.equal(dist(scale(pokemon[,-c(2, 3, 4, 12, 13, 14, 15, 16, 17, 18, 19, 23)])), dist(pcPoke$x), check.attributes = FALSE)
+ + +
+ +
LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKCmBgYHtyIHNldHVwLCBpbmNsdWRlPUZBTFNFfQprbml0cjo6b3B0c19jaHVuayRzZXQoZWNobyA9IFRSVUUpCmBgYAoKI2xhdXJlbmVkaXRzCgpgYGB7cn0KZGF0YSA9IHJlYWQuY3N2KCJwb2tlbW9uX2Fsb3BlejI0Ny5jc3YiLCBoZWFkZXI9VCkKYGBgCgpgYGB7cn0Kc3VtbWFyeShkYXRhKQpgYGAKCgojUHJpbmNpcGxlIENvbXBvbmVudCBBbmFseXNpcwpgYGB7cn0KcG9rZW1vbiA8LSByZWFkLmNzdigifi9wb2tlbW9uX2Fsb3BlejI0Ny5jc3YiLCBzdHJpbmdzQXNGYWN0b3JzID0gRkFMU0UpCiNyZW1vdmUgTkEgZnJvbSBkYXRhc2V0CmNsZWFuUG9rZSA8LSBuYS5vbWl0KHBva2Vtb24pCnBjUG9rZSA8LSBwcmNvbXAoYXMubWF0cml4KGNsZWFuUG9rZVssYyg1OjgsMTYpXSksIHNjYWxlLiA9IFRSVUUpCnN1bW1hcnkocGNQb2tlKQpgYGAKCmBgYHtyfQpiaXBsb3QocGNQb2tlKQpgYGAKCmBgYHtyfQpwbG90KHBjUG9rZSR4WywxOjJdKQpgYGAKCmBgYHtyfQpwbG90KHBjUG9rZSR4WywxOjJdLCB0eXBlPSJuIikKdGV4dChwY1Bva2UkeFssMV0sIHBjUG9rZSR4WywyXSwgbGFiZWxzID0gMTpucm93KHBva2Vtb24pKQpgYGAKCmBgYHtyfQpyb3VuZChwY1Bva2Ukcm90YXRpb25bLDE6NV0sIDMpCmBgYAoKYGBge3J9CnBva2Vtb25bb3JkZXIocGNQb2tlJHhbLDFdLCBkZWNyZWFzaW5nPVRSVUUpWzE6NF0sIDE6M10KcGxvdChwY1Bva2UsIHR5cGU9ImxpbmVzIikKYWJsaW5lKGE9MSwgYj0wLCBjb2w9ImJsdWUiLCBsd2Q9MykKYGBgCgpgYGB7cn0KdGVzdDEgPC0gaGNsdXN0KGRpc3Qoc2NhbGUocG9rZW1vblssLWMoMiwgMywgNCwgMTIsIDEzLCAxNCwgMTUsIDE2LCAxNywgMTgsIDE5LCAyMyldKSkpCnBsb3QodGVzdDEpCnRlc3QyIDwtIGhjbHVzdChkaXN0KHBjUG9rZSR4KSkKcGxvdCh0ZXN0MikKYGBgCgpgYGB7cn0KYWxsLmVxdWFsKGRpc3Qoc2NhbGUocG9rZW1vblssLWMoMiwgMywgNCwgMTIsIDEzLCAxNCwgMTUsIDE2LCAxNywgMTgsIDE5LCAyMyldKSksIGRpc3QocGNQb2tlJHgpLCBjaGVjay5hdHRyaWJ1dGVzID0gRkFMU0UpCmBgYA==
+ + + +
+ + + + + + + +