emedema · emedema · Mar 24, 2019 · Mar 25, 2019 · Mar 25, 2019 · Mar 26, 2019
diff --git a/AllModels.Rmd b/AllModels.Rmd
diff --git a/Neural.Rmd b/Neural.Rmd
@@ -0,0 +1,155 @@
+---
+title: "Neural networks, Pokemon"
+output: html_notebook
+---
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(echo = TRUE)
+```
+
+```{r}
+data = read.csv("pokemon_alopez247.csv", header=T)
+```
+
+#Neural Networks
+```{r}
+library(nnet)
+library(NeuralNetTools)
+library(neuralnet)
+set.seed(53747958)
+numeric_col <- c(5:12, 16, 20:21)
+pokemon[,numeric_col] <- scale(pokemon[,numeric_col])
+train<-sample(1:nrow(pokemon),505)
+testset<-poke[-train,]
+trainset<-poke[train,]
+```
+
+```{r}
+library(gclus)
+library(nnet)
+library(NeuralNetTools)
+set.seed(1995)
+spoke <- cbind(scale(trainset[,6:11]), factor(trainset$isLegendary))
+colnames(spoke)[7] <- "isLegendary"
+spoke<-data.frame(spoke)
+nnpoke <- nnet(factor(isLegendary)~., data=spoke, size=9)
+table(trainset$isLegendary, predict(nnpoke, type="class"))
+plotnet(nnpoke)
+```
+
+```{r}
+spoke
+```
+
+```{r}
+spoketest <- cbind(scale(testset[,6:11]), factor(testset$isLegendary))
+colnames(spoketest)[7] <- "isLegendary"
+spoketest<-data.frame(spoketest)
+table(spoketest$isLegendary, predict(nnpoke, newdata=spoketest, type="class"))
+```
+
+```{r}
+attach(data)
+trainsetg<-trainset[which(hasGender=='True'),]
+testsetg<-testset[which(hasGender=='True'),]
+trainsetg<-na.omit(trainsetg)
+trainsetg
+testsetg<-na.omit(testsetg)
+testsetg
+```
+
+##Neural Net predicting Pr_Male
+```{r}
+set.seed(906534)
+library(nnet)
+library(NeuralNetTools)
+library(neuralnet)
+nnmale <- neuralnet(Pr_Male ~ Attack + Defense + HP + Sp_Atk + Sp_Def + Speed,data=trainsetg, hidden=3, threshold=0.01)
+#nnmale <- neuralnet(Pr_Male ~ Attack + Defense + HP + Sp_Atk + Sp_Def + Speed + Weight_kg + Height_m,data=trainsetg, hidden=5, threshold=0.01)
+plotnet(nnmale)
+mse<-mean((compute(nnmale, testsetg[,6:11])$net.result-testsetg$Pr_Male)^2)
+#mse<-mean((compute(nnmale, testsetg[,c(6:11, 20:21)])$net.result-testsetg$Pr_Male)^2)
+mse
+```
+
+Optimizing number of nodes in first layer
+```{r}
+for(i in 1:5){
+  nnmaletr <- neuralnet(Pr_Male ~ Attack + Defense + HP + Sp_Atk + Sp_Def + Speed,data=trainsetg, hidden=c(i,3), threshold=0.01)
+  print(paste("Number of hidden layer variables in first layer:", i))
+  print(paste("MSE: ", mean((compute(nnmaletr, testsetg[,6:11])$net.result-testsetg$Pr_Male)^2)))
+}
+```
+
+Optimizing number of nodes in second layer
+```{r}
+for(i in 1:5){
+  nnmaletr <- neuralnet(Pr_Male ~ Attack + Defense + HP + Sp_Atk + Sp_Def + Speed,data=trainsetg, hidden=c(4,i), threshold=0.01)
+  print(paste("Number of hidden layer variables in second layer:", i))
+  print(paste("MSE: ", mean((compute(nnmaletr, testsetg[,6:11])$net.result-testsetg$Pr_Male)^2)))
+}
+```
+MSE with 2 hidden layers and 4 and 3 nodes: 0.04011755
+MSE with 1 hidden layer and 3 nodes: 0.03988659
+
+```{r}
+linmod<-lm(Pr_Male ~ Attack + Defense + HP + Sp_Atk + Sp_Def + Speed + Weight_kg + Height_m,data=trainsetg)
+mean((predict(linmod,newdata=testsetg)-testsetg$Pr_Male)^2)
+```
+This is pretty close to our neural net modeled above when we use 1 hidden layer and 3 nodes. 
+
+##Neural net predicting hasGender
+```{r}
+library(gclus)
+library(nnet)
+library(NeuralNetTools)
+set.seed(1995)
+gpoke <- cbind(scale(trainset[,c(6:11, 20:22)]), factor(trainset$hasGender))
+colnames(gpoke)[10] <- "hasGender"
+gpoke<-data.frame(gpoke)
+nngend <- nnet(factor(hasGender)~ Attack + Defense + HP + Sp_Atk + Sp_Def + Speed + Catch_Rate + Height_m + Weight_kg, data=gpoke, size=5)
+plotnet(nngend)
+table(trainset$hasGender, predict(nngend, type="class"))
+```
+It appears that our neural net is effective in predicting hasGender, and also easy to generate an overfitted model for. 
+1 hidden layer with 11 nodes appears to overfit the model, 
+         1   2
+  False 45   0
+  True    0 460
+
+Instead 1 hidden layer with 5 nodes appears to have a reasonable misclassification without totally overfitting. Let's try this on gspoketest,
+
+```{r}
+gpoketest <- cbind(scale(trainset[,c(6:11, 20:22)]), factor(testset$hasGender))
+colnames(gpoketest)[10] <- "hasGender"
+gpoketest<-data.frame(gpoketest)
+table(gpoketest$hasGender, predict(nngend, newdata=gpoketest, type="class"))
+```
+
+##Neural net predicting Generation
+```{r}
+library(gclus)
+library(nnet)
+library(neuralnet)
+library(NeuralNetTools)
+set.seed(19127395)
+#nnGeneration <- neuralnet(Generation ~ Attack + Defense + HP + Sp_Atk + Sp_Def + Speed + Pr_Male,data=trainsetg, hidden=5, threshold=0.01)
+nnGeneration <- neuralnet(Generation ~ Attack + Defense + HP + Sp_Atk + Sp_Def + Speed,data=trainsetg, hidden=5, threshold=0.01)
+plotnet(nnGeneration)
+mse<-mean((compute(nnGeneration, testsetg[,6:11])$net.result-testsetg$Generation)^2)
+mse
+```
+
+Optimizing number of nodes in first layer
+```{r}
+for(i in 1:5){
+  nnmaletr <- neuralnet(Generation ~ Attack + Defense + HP + Sp_Atk + Sp_Def + Speed,data=trainsetg, hidden=3, threshold=0.01)
+  print(paste("Number of hidden layer variables in first layer:", i))
+  print(paste("MSE: ", mean((compute(nnmaletr, testsetg[,6:11])$net.result-testsetg$Pr_Male)^2)))
+}
+```
+
+```{r}
+linmod<-lm(Generation ~ Attack + Defense + HP + Sp_Atk + Sp_Def + Speed,data=trainsetg)
+mean((predict(linmod,newdata=testsetg)-testsetg$Generation)^2)
+```
diff --git a/Neural.nb.html b/Neural.nb.html
diff --git a/PCAWithWH.Rmd b/PCAWithWH.Rmd
@@ -0,0 +1,80 @@
+---
+title: "R Notebook"
+output: html_notebook
+---
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(echo = TRUE)
+```
+
+```{r}
+pokemon<-read.csv("pokemon_alopez247.csv")
+poke<-data.frame(pokemon)
+```
+
+```{r}
+pcapoke <- prcomp(as.matrix(poke[,c(6:11, 20:21)]), scale.=TRUE)
+summary(pcapoke)
+biplot(pcapoke)
+```
+
+
+```{r}
+round(pcapoke$rotation[,1:2], 2)
+```
+
+
+```{r}
+poke[order(pcapoke$x[,1], decreasing=TRUE)[1:4] , c(1:11, 20:21)]
+```
+
+```{r}
+poke[order(pcapoke$x[,2], decreasing=TRUE)[1:4] , c(1:11, 20:21)]
+```
+
+```{r}
+poke[order(pcapoke$x[,1], decreasing=TRUE)[1:20],]
+```
+
+Majority are of the legendary type
+
+
+```{r}
+lda.pred<-predict(leggenlda1,poke1)
+lda.class<-lda.pred$class
+table(lda.class,poke1$isLegendary)
+```
+
+Yea, this is still a naive classifier, NOT VERY USEFUL!
+
+Let's try a linear model, see if PC1 and PC2 are any good at predicting Pr_Male:
+
+```{r}
+linmod<-lm(poke1$Pr_Male~pcgenleg[,1]+pcgenleg[,2])
+summary(linmod)
+linmod<-lm(poke1$Pr_Male~pcgenleg[,1])
+summary(linmod)
+plot(pcgenleg[,1],poke1$Pr_Male)
+abline(linmod)
+```
+
+Ok, so the second model is statistically significant. So let's try to interpret this now. The intercept on this linear model is 0.55, which is already above 50%. Oh man that graph looks like garbage. I don't think PCA really did anything here...
+
+
+```{r}
+# linmod<-lm(poke1$Catch_Rate~pcgenleg[,1]+pcgenleg[,2])
+# summary(linmod)
+linmod<-lm(poke1$Catch_Rate~pcgenleg[,1])
+summary(linmod)
+plot(pcgenleg[,1],poke1$Catch_Rate)
+abline(linmod)
+```
+
+Ok, now we're talking. So it looks like PC1 is correlated with the harder to catch Pokemon rather than the legendary ones. Probably a good time to start a new file, this is getting messy...
+
+```{r}
+set.seed(1995)
+train<-sample(1:nrow(poke),432)
+poke.test<-poke[-train,]
+poke.train<-poke[train,]
+```
diff --git a/PCAWithWH.nb.html b/PCAWithWH.nb.html
diff --git a/Principle.Rmd b/Principle.Rmd
@@ -0,0 +1,63 @@
+---
+title: "R Notebook"
+output: html_notebook
+---
+
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(echo = TRUE)
+```
+
+#laurenedits
+
+```{r}
+data = read.csv("pokemon_alopez247.csv", header=T)
+```
+
+```{r}
+summary(data)
+```
+
+
+#Principle Component Analysis
+```{r}
+pokemon <- read.csv("~/pokemon_alopez247.csv", stringsAsFactors = FALSE)
+#remove NA from dataset
+cleanPoke <- na.omit(pokemon)
+pcPoke <- prcomp(as.matrix(cleanPoke[,c(5:8,16)]), scale. = TRUE)
+summary(pcPoke)
+```
+
+```{r}
+biplot(pcPoke)
+```
+
+```{r}
+plot(pcPoke$x[,1:2])
+```
+
+```{r}
+plot(pcPoke$x[,1:2], type="n")
+text(pcPoke$x[,1], pcPoke$x[,2], labels = 1:nrow(pokemon))
+```
+
+```{r}
+round(pcPoke$rotation[,1:5], 3)
+```
+
+```{r}
+pokemon[order(pcPoke$x[,1], decreasing=TRUE)[1:4], 1:3]
+plot(pcPoke, type="lines")
+abline(a=1, b=0, col="blue", lwd=3)
+```
+
+```{r}
+test1 <- hclust(dist(scale(pokemon[,-c(2, 3, 4, 12, 13, 14, 15, 16, 17, 18, 19, 23)])))
+plot(test1)
+test2 <- hclust(dist(pcPoke$x))
+plot(test2)
+```
+
+```{r}
+all.equal(dist(scale(pokemon[,-c(2, 3, 4, 12, 13, 14, 15, 16, 17, 18, 19, 23)])), dist(pcPoke$x), check.attributes = FALSE)
+```
diff --git a/Principle.nb.html b/Principle.nb.html
diff --git a/Scatters.Rmd b/Scatters.Rmd
@@ -0,0 +1,58 @@
+---
+title: "R Notebook"
+output: html_notebook
+---
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(echo = TRUE)
+```
+
+
+```{r}
+data = read.csv("pokemon_alopez247.csv", header=T)
+```
+
+```{r}
+summary(data)
+```
+
+#Basic 3d Scatterplots 
+##Evaluating variables, Total, Defense, and Pr_Male
+```{r}
+library(scatterplot3d) 
+attach(pokemon) 
+sp <- scatterplot3d(Total,Defense,Pr_Male, pch=16, highlight.3d=TRUE,
+  type="h", main="3D Scatterplot")
+```
+
+3d Scatterplot without line markers
+```{r}
+library(scatterplot3d)
+attach(pokemon)
+scatterplot3d(Total,Defense,Pr_Male, main="3D Scatterplot")
+```
+
+#Corelation
+```{r}
+library(gclus)
+cleanPoke <- na.omit(pokemon)
+dta <- cleanPoke[c(5:8, 16)] # data, numbers as column numbers 
+dta.r <- abs(cor(dta)) # correlation
+#dta.r[is.na(dta.r)] <- 0.5
+dta.col <- dmat.color(dta.r) # colors
+dta.o <- order.single(dta.r) 
+cpairs(dta, dta.o, panel.colors=dta.col, gap=.5,
+main="Variables Ordered and Colored by Correlation" )
+```
+It may be worth exploring how the correlation changes depending on what value is set to Pr_Male NA values when using "ta.r[is.na(dta.r)] <- 0.5." Below is the correlation jsy between the columns 5:8 for additional clarity. 
+
+```{r}
+library(gclus)
+#cleanPoke <- na.omit(pokemon)
+dta <- pokemon[c(5:8)] # data, numbers as column numbers 
+dta.r <- abs(cor(dta)) # correlation
+dta.col <- dmat.color(dta.r) # colors
+dta.o <- order.single(dta.r) 
+cpairs(dta, dta.o, panel.colors=dta.col, gap=.5,
+main="Variables Ordered and Colored by Correlation" )
+```
diff --git a/Scatters.nb.html b/Scatters.nb.html