-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathkickstarter_models.R
More file actions
65 lines (43 loc) · 1.62 KB
/
kickstarter_models.R
File metadata and controls
65 lines (43 loc) · 1.62 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
setwd("~/Columbia MSBA/Spring 2019/E4650 - Business Analytics/Project - Kickstarter Analysis/raw_data")
df = read.csv("df_kickstarterFE.csv",header=TRUE,",")
df = df[-c(1)]
df = na.omit(df)
df = subset(df, select=-c(blurb,name))
df = na.omit(df)
head(df,2)
length(df)
library("glmnet")
library("leaps")
library("caret")
library("xlsx")
library("plyr")
library("tree")
library("ISLR")
library("pls")
################### Model Setups #############################
x = as.matrix(df[,-9])
y = df$usd_pledged
ss = sample(1:3, size=nrow(df),replace=TRUE,prob=c(0.5,0.25,0.25))
df_train = df[ss==1,]
df_test = df[ss==2,]
df_validation = df[ss==3,]
train = ss==1
test = ss==2
validation = ss==3
################### Linear Model #############################
################### Logistic Model #############################
### Lasso ###
grid=10^(-3:3)
cv.lasso.log = cv.glmnet(x[train,],y[train], family='gaussian', alpha=1, lambda=grid, standardize=TRUE, type.measure='mse')
bestlam = cv.lasso.log$lambda.min
lasso.log.mod = glmnet(x[train,], y[train], family='gaussian', alpha=1, lambda=bestlam, standardize=TRUE)
coef(lasso.log.mod)
lasso.log.pred = predict(lasso.log.mod, x[validation,],na.rm=TRUE, type='response');
table(lasso.log.pred,y[validation])
lasso.log.error = sum(lasso.log.pred != y[validation])/length(lasso.log.pred)
lasso.log.error
################### KNN Model #############################
################### Clustering Model #############################
### Hierarchical ###
### K-Means ###
################### SVM Model #############################