-
Notifications
You must be signed in to change notification settings - Fork 0
/
2-models.R
63 lines (49 loc) · 1.96 KB
/
2-models.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
library(tidyverse)
library(broom) # To tidy model data
library(magrittr) # Includes exposition pipe %$% and tee pipe %T>%
library(printr) # Printing tables to console is neater
players <- readRDS("data/raw/players.Rds")
rank <- readRDS("data/raw/rank.Rds")
# ------------------------------------------------------------------------------
# Organize the data into a tidy, model-ready data frame
# Label game_type and make it a factor variable for modeling
game_type_labels <- c(rep(NA, 9),"Ranked Duel", "Ranked Doubles",
"Ranked Solo Standard", "Ranked Standard")
rank$game_type <- rank %$%
as.integer(game_type) %>%
game_type_labels[.]
# Keep only that data that we need to simplify modeling
model_data <- rank %>%
filter(season == 5) %>%
inner_join(players, by = "id") %>%
select(mmr, game_type, id, name, matches_played,
shots, saves, mvps, goals, assists, wins) %>%
gather(stat, stat_value, -c(1:4))
# Apply cross-validation to reduce bias error
n <- nrow(model_data)
s <- sample(1:n, n*.8)
train <- model_data[s, ]
test <- model_data[-s, ]
# Add a column of linear models that can be tidied with broom
mmr_models <- train %>%
group_by(game_type, stat) %>%
do(
mod = lm(mmr ~ stat_value, data = .),
original = (.)
)
# ------------------------------------------------------------------------------
# Use broom to investigate models
# Linear models output stats for three observational units:
# 1. Model
# 2. Coefficient
# 3. Sample
# Following the "tidy" methodology, we will need
# to create three tables to hold the data
## Summary statistics (model-level stats):
mmr_ss <- glance(mmr_models, mod) %T>% print()
## Coefficients (component-level stats):
## - All coefficients are significant. This is expected,
## because all stats increase with time
mmr_coef <- tidy(mmr_models, mod) %T>% print()
## Predictions (observation-level stats):
mmr_pred <- augment(mmr_models, mod, newdata = test) %T>% print()