Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Performance rewrite, i.e. nflseedR 2.0 #39

Merged
merged 48 commits into from
Aug 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
d3071da
gotta include the environment to be able to evaluate expressions
mrcaseb Dec 9, 2022
16cfa51
nflseedR version and finish time are useful sims params
mrcaseb Dec 9, 2022
adaf81c
avoid future and dplyr in the combining loops
mrcaseb Dec 9, 2022
1e4705d
drop readr dependency and use data.table to load the fake schedule
mrcaseb Dec 9, 2022
bd764fb
start message improvements
mrcaseb Dec 9, 2022
ed5ec00
dev version and dependency updates
mrcaseb Dec 9, 2022
abe9edd
namespace
mrcaseb Dec 9, 2022
72e9618
stop > cli_abort
mrcaseb Dec 9, 2022
259b9b3
reshape args
mrcaseb Dec 9, 2022
fd01220
transition to nflreadr
mrcaseb Dec 9, 2022
263a920
it's nowhere used anymore
mrcaseb Dec 9, 2022
fea1110
replace glue_collapse calls with cli
mrcaseb Dec 9, 2022
3e6876c
drop glue
mrcaseb Dec 9, 2022
ff6521b
drop crayon
mrcaseb Dec 9, 2022
33109f9
drop curl and use nflreadr to load schedules
mrcaseb Dec 9, 2022
bea8661
more message improvements
mrcaseb Dec 9, 2022
0fa683f
avoid pipe
mrcaseb Dec 9, 2022
52dd49a
it's better to test this stuff
mrcaseb Dec 9, 2022
a93f6b4
fix broken tail method in vignette
mrcaseb Dec 9, 2022
e7c0356
news bullets
mrcaseb Dec 9, 2022
e2b352e
news update
mrcaseb Dec 9, 2022
19cdfba
avoid repeated double_games calls
mrcaseb Dec 15, 2022
f4fe2d6
catch data.tables
mrcaseb Dec 15, 2022
17bea19
import data.table, avoid function maksing through dplyr
mrcaseb Dec 15, 2022
9210660
double games lives somewhere else
mrcaseb Dec 15, 2022
3794072
create nflseedR environment
mrcaseb Dec 15, 2022
ca1ac70
drop tidyr increment version heavily because it's major
mrcaseb Dec 15, 2022
d24eebb
redocument
mrcaseb Dec 15, 2022
d0d2208
rewrite double games and h2h in data.table
mrcaseb Dec 15, 2022
7c18262
gotta left_join and drop NAs because of the new structure of h2h
mrcaseb Dec 15, 2022
8d37e23
make process games available in pkg namespace and create a data.table…
mrcaseb Dec 15, 2022
47df85f
rewrite simulation aggregation in data.table
mrcaseb Dec 15, 2022
e4de132
make simulate week a separate function
mrcaseb Dec 15, 2022
0aaeafc
new simulate week
mrcaseb Dec 15, 2022
8f0c189
avoid multiple double_games calls
mrcaseb Dec 15, 2022
abaef2e
deprecate teams argument
mrcaseb Dec 15, 2022
275989b
compute teams from games with data.table
mrcaseb Dec 15, 2022
fcd98cd
incorporate h2h from input or load it from environment
mrcaseb Dec 15, 2022
73ae8b4
run tests from local games file and test h2h
mrcaseb Dec 16, 2022
ab0226d
import lifecycle and redocument
mrcaseb Dec 16, 2022
794ac25
avoid special data.table character
mrcaseb Dec 16, 2022
c4e2ccf
avoid namespace problems in summary method
mrcaseb Dec 16, 2022
a137950
more NSE notes wtf
mrcaseb Dec 16, 2022
695a2d8
Merge branch 'master' into dt-rewrite
mrcaseb Dec 20, 2022
f47d672
code format
mrcaseb Dec 20, 2022
3b43a28
merge master
mrcaseb Dec 20, 2022
aabbb0b
fix conflicts
mrcaseb Dec 20, 2022
2f36a01
Merge branch 'master' into dt-rewrite
mrcaseb Aug 7, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: nflseedR
Title: Functions to Efficiently Simulate and Evaluate NFL Seasons
Version: 1.2.0
Version: 1.2.0.9001
Authors@R: c(
person("Lee", "Sharpe", role = c("aut", "cph")),
person("Sebastian", "Carl", , "[email protected]", role = c("cre", "aut"))
Expand All @@ -19,13 +19,13 @@ Imports:
furrr,
future,
gsubfn,
lifecycle,
magrittr,
nflreadr (>= 1.1.3),
progressr,
purrr,
rlang,
tibble,
tidyr
tibble
Suggests:
gt,
knitr,
Expand Down
24 changes: 20 additions & 4 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,27 @@ export(fmt_pct_special)
export(load_schedules)
export(load_sharpe_games)
export(simulate_nfl)
import(dplyr)
import(data.table)
import(gsubfn)
importFrom(cli,symbol)
importFrom(data.table,fread)
importFrom(data.table,rbindlist)
importFrom(dplyr,arrange)
importFrom(dplyr,bind_rows)
importFrom(dplyr,case_when)
importFrom(dplyr,everything)
importFrom(dplyr,filter)
importFrom(dplyr,group_by)
importFrom(dplyr,inner_join)
importFrom(dplyr,left_join)
importFrom(dplyr,mutate)
importFrom(dplyr,n)
importFrom(dplyr,pull)
importFrom(dplyr,rename)
importFrom(dplyr,right_join)
importFrom(dplyr,row_number)
importFrom(dplyr,select)
importFrom(dplyr,slice)
importFrom(dplyr,summarize)
importFrom(dplyr,ungroup)
importFrom(furrr,furrr_options)
importFrom(furrr,future_map)
importFrom(future,plan)
Expand All @@ -22,4 +38,4 @@ importFrom(purrr,pluck)
importFrom(rlang,inform)
importFrom(stats,rnorm)
importFrom(tibble,is_tibble)
importFrom(tidyr,pivot_longer)
importFrom(tibble,tibble)
24 changes: 16 additions & 8 deletions R/compute_conference_seeds.R
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,15 @@ compute_conference_seeds <- function(teams,
)
}

if (!is_tibble(teams)) teams <- teams$standings
h2h_flag <- FALSE

if (!is_tibble(teams)){
if(is.null(h2h) & any(names(teams) == "h2h")){
h2h <- teams$h2h
h2h_flag <- TRUE
}
teams <- teams$standings
}

if (!any((names(teams) %in% "div_rank")) | !is.data.frame(teams)) {
cli::cli_abort(
Expand All @@ -57,15 +65,15 @@ compute_conference_seeds <- function(teams,
}

if(is.null(h2h) & tiebreaker_depth > TIEBREAKERS_NONE){
cli::cli_abort(
"You asked for tiebreakers but the argument {.arg h2h} is {.val NULL}. \\
Did you forget to pass the {.val h2h} data frame? It is computed with \\
the function {.fn compute_division_ranks}."
)
# cli::cli_abort(
# "You asked for tiebreakers but the argument {.arg h2h} is {.val NULL}. \\
# Did you forget to pass the {.val h2h} data frame? It is computed with \\
# the function {.fn compute_division_ranks}."
# )
}
if(isFALSE(h2h_flag)) h2h <- compute_h2h(NULL, update = FALSE)

teams <- teams %>%
mutate(conf_rank = NA_real_)
teams$conf_rank <- NA_real_

# seed loop
for (seed_num in seq_len(playoff_seeds))
Expand Down
155 changes: 62 additions & 93 deletions R/compute_division_ranks.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,19 +12,8 @@
#' \code{\link{divisions}} for valid team abbreviations).}
#' \item{result}{Equals home score - away score.}
#' }
#' @param teams This parameter is optional. If it is \code{NULL} the function
#' will compute it internally, otherwise it has to be a data frame of all teams
#' contained in the \code{games} data frame repeated for each simulation ID
#' (\code{sim}). The following variables are required:
#' \describe{
#' \item{sim}{A simulation ID. Normally 1 - n simulated seasons.}
#' \item{team}{Team abbreviation of the team (please see
#' \code{\link{divisions}} for valid team abbreviations).}
#' \item{conf}{Conference abbreviation of the team (please see
#' \code{\link{divisions}} for valid team abbreviations).}
#' \item{division}{Division of the team (please see
#' \code{\link{divisions}} for valid division names).}
#' }
#' @param teams `r lifecycle::badge("deprecated")` This argument is no longer
#' supported. Instead, the functions computes it internally.
#' @param tiebreaker_depth A single value equal to 1, 2, or 3. The default is 3. The
#' value controls the depth of tiebreakers that shall be applied. The deepest
#' currently implemented tiebreaker is strength of schedule. The following
Expand Down Expand Up @@ -66,7 +55,7 @@
#' options(old)
#' }
compute_division_ranks <- function(games,
teams = NULL,
teams = lifecycle::deprecated(),
tiebreaker_depth = 3,
.debug = FALSE,
h2h = NULL) {
Expand Down Expand Up @@ -94,104 +83,87 @@ compute_division_ranks <- function(games,
)
}

if (is.null(teams)) { # compute teams df from games df
pivot_games <- games %>%
select(sim, home_team, away_team) %>%
pivot_longer(cols = c("home_team", "away_team"), values_to = "team") %>%
select(sim, team)

teams <- bind_rows(
data.frame(team = unique(games$away_team)),
data.frame(team = unique(games$home_team))
) %>%
distinct() %>%
left_join(nflseedR::divisions %>% select(-"sdiv"), by = "team") %>%
left_join(pivot_games, by = "team") %>%
select(sim, everything()) %>%
distinct() %>%
arrange(division, team, sim)
if (lifecycle::is_present(teams)) {
lifecycle::deprecate_warn(
when = "2.0.0",
what = "compute_division_ranks(teams)",# = 'is computed internally')"
details = "The function computes the corresponding data internally."
)
}

# double games
games_doubled <- double_games(games)

# record of each team
report("Calculating team data")
teams <- teams %>%
inner_join(games_doubled, by = c("sim", "team")) %>%
filter(game_type == "REG") %>%
group_by(sim, conf, division, team) %>%
summarize(
games = n(),
wins = sum(outcome),
true_wins = sum(outcome == 1),
losses = sum(outcome == 0),
ties = sum(outcome == 0.5)
) %>%
ungroup()
# record of each team
setDT(games_doubled)
team_records <-
merge(
games_doubled["REG", on = "game_type"],
data.table(nflseedR::divisions)[,sdiv:=NULL],
by = "team",
sort = FALSE
)[,
list(
games = .N,
wins = sum(outcome),
true_wins = sum(outcome == 1),
losses = sum(outcome == 0),
ties = sum(outcome == 0.5),
win_pct = sum(outcome) / .N
), by = c("sim", "conf", "division", "team")]

# add in tiebreaker info
teams <- teams %>%
inner_join(games_doubled, by = c("sim", "team")) %>%
filter(game_type == "REG") %>%
inner_join(teams,
by = c("sim" = "sim", "opp" = "team"),
suffix = c("", "_opp")
) %>%
mutate(
win_pct = wins / games,
div_game = ifelse(division == division_opp, 1, 0),
conf_game = ifelse(conf == conf_opp, 1, 0)
teams <- team_records %>%
merge(
games_doubled["REG", on = "game_type"],
by = c("sim", "team"),
sort = FALSE
) %>%
group_by(sim, conf, division, team, games, wins, true_wins, losses, ties, win_pct) %>%
summarize(
div_pct = ifelse(sum(div_game) == 0, 0.5,
sum(div_game * outcome) / sum(div_game)
),
conf_pct = ifelse(sum(conf_game) == 0, 0.5,
sum(conf_game * outcome) / sum(conf_game)
),
sov = ifelse(sum(outcome == 1) == 0, 0,
sum(wins_opp * (outcome == 1)) /
sum(games_opp * (outcome == 1))
),
sos = sum(wins_opp) / sum(games_opp)
) %>%
ungroup()
merge(
team_records,
by.x = c("sim", "opp"),
by.y = c("sim", "team"),
suffixes = c("", "_opp"),
sort = FALSE
)
teams[, div_game := fifelse(division == division_opp, 1, 0)]
teams[, conf_game := fifelse(conf == conf_opp, 1, 0)]
teams <- teams[, list(
div_pct = fifelse(
sum(div_game) == 0, 0.5,
sum(div_game * outcome) / sum(div_game)
),
conf_pct = fifelse(
sum(conf_game) == 0, 0.5,
sum(conf_game * outcome) / sum(conf_game)
),
sov = fifelse(
sum(outcome == 1) == 0, 0,
sum(wins_opp * (outcome == 1)) / sum(games_opp * (outcome == 1))
),
sos = sum(wins_opp) / sum(games_opp)
), by = c("sim", "conf", "division", "team", "games", "wins",
"true_wins", "losses", "ties", "win_pct")][order(sim, conf, division, team)]

# below only if there are tiebreakers
if (is.null(h2h) & tiebreaker_depth > TIEBREAKERS_NONE) {
report("Calculating head to head")
h2h <- teams %>%
select(sim, team) %>%
inner_join(teams %>% select(sim, team),
by = "sim", suffix = c("", "_opp")
) %>%
rename(opp = team_opp) %>%
arrange(sim, team, opp) %>%
left_join(games_doubled %>% filter(game_type == "REG"),
by = c("sim", "team", "opp")
) %>%
group_by(sim, team, opp) %>%
summarize(
h2h_games = sum(!is.na(outcome)),
h2h_wins = sum(outcome, na.rm = TRUE),
h2h_played = ifelse(h2h_games > 0, 1, 0)
) %>%
ungroup()
h2h <- compute_h2h(games_doubled)
}

#### FIND DIVISION RANKS ####

# initialize division rank
teams <- teams %>%
mutate(div_rank = NA_real_)
teams$div_rank <- NA_real_

# determine division ranks
dr <- 0
while (any(is.na(teams$div_rank))) {
# increment division rank
dr <- dr + 1
if(dr > 4){
cli::cli_abort("Aborting because division rank computation entered infinite loop!")
}
report("Calculating division rank #{dr}")

# update teams with this rank
Expand All @@ -210,10 +182,7 @@ compute_division_ranks <- function(games,
select(-new_rank)
}

max_reg_week <- max(games$week[games$game_type == "REG"], na.rm = TRUE)

teams <- teams %>%
mutate(max_reg_week = max_reg_week)
teams$max_reg_week <- max(games$week[games$game_type == "REG"], na.rm = TRUE)

list(
"standings" = tibble::as_tibble(teams),
Expand Down
38 changes: 23 additions & 15 deletions R/compute_draft_order.R
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,15 @@ compute_draft_order <- function(teams,
)
}

if (!is_tibble(teams)) teams <- teams$standings
h2h_flag <- FALSE

if (!is_tibble(teams)){
if(is.null(h2h) & any(names(teams) == "h2h")){
h2h <- teams$h2h
h2h_flag <- TRUE
}
teams <- teams$standings
}

required_vars <- c(
"sim",
Expand Down Expand Up @@ -74,13 +82,15 @@ compute_draft_order <- function(teams,
}

if (is.null(h2h) & tiebreaker_depth > TIEBREAKERS_NONE) {
cli::cli_abort(
"You asked for tiebreakers but the argument {.arg h2h} is {.val NULL}. \\
Did you forget to pass the {.val h2h} data frame? It is computed with \\
the function {.fn compute_division_ranks}."
)
# cli::cli_abort(
# "You asked for tiebreakers but the argument {.arg h2h} is {.val NULL}. \\
# Did you forget to pass the {.val h2h} data frame? It is computed with \\
# the function {.fn compute_division_ranks}."
# )
}

if(isFALSE(h2h_flag)) h2h <- compute_h2h(NULL, update = FALSE)

games <- strip_nflverse_attributes(games)

if (any(is.na(teams$exit))){
Expand Down Expand Up @@ -116,10 +126,12 @@ compute_draft_order <- function(teams,
# playoff weeks
for (week_num in first_playoff_week:week_max) {

# record losers
teams <- games %>%
week_games_doubled <- games %>%
filter(week == week_num) %>%
double_games() %>%
double_games()

# record losers
teams <- week_games_doubled %>%
filter(outcome == 0) %>%
select(sim, team, outcome) %>%
right_join(teams, by = c("sim", "team")) %>%
Expand All @@ -129,9 +141,7 @@ compute_draft_order <- function(teams,
# if super bowl, record winner
if (any(playoff_teams$conf == "SB")) {
# super bowl winner exit is +1 to SB week
teams <- games %>%
filter(week == week_num) %>%
double_games() %>%
teams <- week_games_doubled %>%
filter(outcome == 1) %>%
select(sim, team, outcome) %>%
right_join(teams, by = c("sim", "team")) %>%
Expand All @@ -140,9 +150,7 @@ compute_draft_order <- function(teams,
}

# filter to winners or byes
playoff_teams <- games %>%
filter(week == week_num) %>%
double_games() %>%
playoff_teams <- week_games_doubled %>%
right_join(playoff_teams, by = c("sim", "team")) %>%
filter(is.na(result) | result > 0) %>%
select(sim, conf, seed, team) %>%
Expand Down
Loading
Loading