Skip to content

Commit

Permalink
Merge pull request #39 from nflverse/dt-rewrite
Browse files Browse the repository at this point in the history
Performance rewrite, i.e. nflseedR 2.0
  • Loading branch information
mrcaseb authored Aug 7, 2023
2 parents 51641bf + 2f36a01 commit b249dbd
Show file tree
Hide file tree
Showing 21 changed files with 706 additions and 497 deletions.
6 changes: 3 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: nflseedR
Title: Functions to Efficiently Simulate and Evaluate NFL Seasons
Version: 1.2.0
Version: 1.2.0.9001
Authors@R: c(
person("Lee", "Sharpe", role = c("aut", "cph")),
person("Sebastian", "Carl", , "[email protected]", role = c("cre", "aut"))
Expand All @@ -19,13 +19,13 @@ Imports:
furrr,
future,
gsubfn,
lifecycle,
magrittr,
nflreadr (>= 1.1.3),
progressr,
purrr,
rlang,
tibble,
tidyr
tibble
Suggests:
gt,
knitr,
Expand Down
24 changes: 20 additions & 4 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,27 @@ export(fmt_pct_special)
export(load_schedules)
export(load_sharpe_games)
export(simulate_nfl)
import(dplyr)
import(data.table)
import(gsubfn)
importFrom(cli,symbol)
importFrom(data.table,fread)
importFrom(data.table,rbindlist)
importFrom(dplyr,arrange)
importFrom(dplyr,bind_rows)
importFrom(dplyr,case_when)
importFrom(dplyr,everything)
importFrom(dplyr,filter)
importFrom(dplyr,group_by)
importFrom(dplyr,inner_join)
importFrom(dplyr,left_join)
importFrom(dplyr,mutate)
importFrom(dplyr,n)
importFrom(dplyr,pull)
importFrom(dplyr,rename)
importFrom(dplyr,right_join)
importFrom(dplyr,row_number)
importFrom(dplyr,select)
importFrom(dplyr,slice)
importFrom(dplyr,summarize)
importFrom(dplyr,ungroup)
importFrom(furrr,furrr_options)
importFrom(furrr,future_map)
importFrom(future,plan)
Expand All @@ -22,4 +38,4 @@ importFrom(purrr,pluck)
importFrom(rlang,inform)
importFrom(stats,rnorm)
importFrom(tibble,is_tibble)
importFrom(tidyr,pivot_longer)
importFrom(tibble,tibble)
24 changes: 16 additions & 8 deletions R/compute_conference_seeds.R
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,15 @@ compute_conference_seeds <- function(teams,
)
}

if (!is_tibble(teams)) teams <- teams$standings
h2h_flag <- FALSE

if (!is_tibble(teams)){
if(is.null(h2h) & any(names(teams) == "h2h")){
h2h <- teams$h2h
h2h_flag <- TRUE
}
teams <- teams$standings
}

if (!any((names(teams) %in% "div_rank")) | !is.data.frame(teams)) {
cli::cli_abort(
Expand All @@ -57,15 +65,15 @@ compute_conference_seeds <- function(teams,
}

if(is.null(h2h) & tiebreaker_depth > TIEBREAKERS_NONE){
cli::cli_abort(
"You asked for tiebreakers but the argument {.arg h2h} is {.val NULL}. \\
Did you forget to pass the {.val h2h} data frame? It is computed with \\
the function {.fn compute_division_ranks}."
)
# cli::cli_abort(
# "You asked for tiebreakers but the argument {.arg h2h} is {.val NULL}. \\
# Did you forget to pass the {.val h2h} data frame? It is computed with \\
# the function {.fn compute_division_ranks}."
# )
}
if(isFALSE(h2h_flag)) h2h <- compute_h2h(NULL, update = FALSE)

teams <- teams %>%
mutate(conf_rank = NA_real_)
teams$conf_rank <- NA_real_

# seed loop
for (seed_num in seq_len(playoff_seeds))
Expand Down
155 changes: 62 additions & 93 deletions R/compute_division_ranks.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,19 +12,8 @@
#' \code{\link{divisions}} for valid team abbreviations).}
#' \item{result}{Equals home score - away score.}
#' }
#' @param teams This parameter is optional. If it is \code{NULL} the function
#' will compute it internally, otherwise it has to be a data frame of all teams
#' contained in the \code{games} data frame repeated for each simulation ID
#' (\code{sim}). The following variables are required:
#' \describe{
#' \item{sim}{A simulation ID. Normally 1 - n simulated seasons.}
#' \item{team}{Team abbreviation of the team (please see
#' \code{\link{divisions}} for valid team abbreviations).}
#' \item{conf}{Conference abbreviation of the team (please see
#' \code{\link{divisions}} for valid team abbreviations).}
#' \item{division}{Division of the team (please see
#' \code{\link{divisions}} for valid division names).}
#' }
#' @param teams `r lifecycle::badge("deprecated")` This argument is no longer
#' supported. Instead, the functions computes it internally.
#' @param tiebreaker_depth A single value equal to 1, 2, or 3. The default is 3. The
#' value controls the depth of tiebreakers that shall be applied. The deepest
#' currently implemented tiebreaker is strength of schedule. The following
Expand Down Expand Up @@ -66,7 +55,7 @@
#' options(old)
#' }
compute_division_ranks <- function(games,
teams = NULL,
teams = lifecycle::deprecated(),
tiebreaker_depth = 3,
.debug = FALSE,
h2h = NULL) {
Expand Down Expand Up @@ -94,104 +83,87 @@ compute_division_ranks <- function(games,
)
}

if (is.null(teams)) { # compute teams df from games df
pivot_games <- games %>%
select(sim, home_team, away_team) %>%
pivot_longer(cols = c("home_team", "away_team"), values_to = "team") %>%
select(sim, team)

teams <- bind_rows(
data.frame(team = unique(games$away_team)),
data.frame(team = unique(games$home_team))
) %>%
distinct() %>%
left_join(nflseedR::divisions %>% select(-"sdiv"), by = "team") %>%
left_join(pivot_games, by = "team") %>%
select(sim, everything()) %>%
distinct() %>%
arrange(division, team, sim)
if (lifecycle::is_present(teams)) {
lifecycle::deprecate_warn(
when = "2.0.0",
what = "compute_division_ranks(teams)",# = 'is computed internally')"
details = "The function computes the corresponding data internally."
)
}

# double games
games_doubled <- double_games(games)

# record of each team
report("Calculating team data")
teams <- teams %>%
inner_join(games_doubled, by = c("sim", "team")) %>%
filter(game_type == "REG") %>%
group_by(sim, conf, division, team) %>%
summarize(
games = n(),
wins = sum(outcome),
true_wins = sum(outcome == 1),
losses = sum(outcome == 0),
ties = sum(outcome == 0.5)
) %>%
ungroup()
# record of each team
setDT(games_doubled)
team_records <-
merge(
games_doubled["REG", on = "game_type"],
data.table(nflseedR::divisions)[,sdiv:=NULL],
by = "team",
sort = FALSE
)[,
list(
games = .N,
wins = sum(outcome),
true_wins = sum(outcome == 1),
losses = sum(outcome == 0),
ties = sum(outcome == 0.5),
win_pct = sum(outcome) / .N
), by = c("sim", "conf", "division", "team")]

# add in tiebreaker info
teams <- teams %>%
inner_join(games_doubled, by = c("sim", "team")) %>%
filter(game_type == "REG") %>%
inner_join(teams,
by = c("sim" = "sim", "opp" = "team"),
suffix = c("", "_opp")
) %>%
mutate(
win_pct = wins / games,
div_game = ifelse(division == division_opp, 1, 0),
conf_game = ifelse(conf == conf_opp, 1, 0)
teams <- team_records %>%
merge(
games_doubled["REG", on = "game_type"],
by = c("sim", "team"),
sort = FALSE
) %>%
group_by(sim, conf, division, team, games, wins, true_wins, losses, ties, win_pct) %>%
summarize(
div_pct = ifelse(sum(div_game) == 0, 0.5,
sum(div_game * outcome) / sum(div_game)
),
conf_pct = ifelse(sum(conf_game) == 0, 0.5,
sum(conf_game * outcome) / sum(conf_game)
),
sov = ifelse(sum(outcome == 1) == 0, 0,
sum(wins_opp * (outcome == 1)) /
sum(games_opp * (outcome == 1))
),
sos = sum(wins_opp) / sum(games_opp)
) %>%
ungroup()
merge(
team_records,
by.x = c("sim", "opp"),
by.y = c("sim", "team"),
suffixes = c("", "_opp"),
sort = FALSE
)
teams[, div_game := fifelse(division == division_opp, 1, 0)]
teams[, conf_game := fifelse(conf == conf_opp, 1, 0)]
teams <- teams[, list(
div_pct = fifelse(
sum(div_game) == 0, 0.5,
sum(div_game * outcome) / sum(div_game)
),
conf_pct = fifelse(
sum(conf_game) == 0, 0.5,
sum(conf_game * outcome) / sum(conf_game)
),
sov = fifelse(
sum(outcome == 1) == 0, 0,
sum(wins_opp * (outcome == 1)) / sum(games_opp * (outcome == 1))
),
sos = sum(wins_opp) / sum(games_opp)
), by = c("sim", "conf", "division", "team", "games", "wins",
"true_wins", "losses", "ties", "win_pct")][order(sim, conf, division, team)]

# below only if there are tiebreakers
if (is.null(h2h) & tiebreaker_depth > TIEBREAKERS_NONE) {
report("Calculating head to head")
h2h <- teams %>%
select(sim, team) %>%
inner_join(teams %>% select(sim, team),
by = "sim", suffix = c("", "_opp")
) %>%
rename(opp = team_opp) %>%
arrange(sim, team, opp) %>%
left_join(games_doubled %>% filter(game_type == "REG"),
by = c("sim", "team", "opp")
) %>%
group_by(sim, team, opp) %>%
summarize(
h2h_games = sum(!is.na(outcome)),
h2h_wins = sum(outcome, na.rm = TRUE),
h2h_played = ifelse(h2h_games > 0, 1, 0)
) %>%
ungroup()
h2h <- compute_h2h(games_doubled)
}

#### FIND DIVISION RANKS ####

# initialize division rank
teams <- teams %>%
mutate(div_rank = NA_real_)
teams$div_rank <- NA_real_

# determine division ranks
dr <- 0
while (any(is.na(teams$div_rank))) {
# increment division rank
dr <- dr + 1
if(dr > 4){
cli::cli_abort("Aborting because division rank computation entered infinite loop!")
}
report("Calculating division rank #{dr}")

# update teams with this rank
Expand All @@ -210,10 +182,7 @@ compute_division_ranks <- function(games,
select(-new_rank)
}

max_reg_week <- max(games$week[games$game_type == "REG"], na.rm = TRUE)

teams <- teams %>%
mutate(max_reg_week = max_reg_week)
teams$max_reg_week <- max(games$week[games$game_type == "REG"], na.rm = TRUE)

list(
"standings" = tibble::as_tibble(teams),
Expand Down
38 changes: 23 additions & 15 deletions R/compute_draft_order.R
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,15 @@ compute_draft_order <- function(teams,
)
}

if (!is_tibble(teams)) teams <- teams$standings
h2h_flag <- FALSE

if (!is_tibble(teams)){
if(is.null(h2h) & any(names(teams) == "h2h")){
h2h <- teams$h2h
h2h_flag <- TRUE
}
teams <- teams$standings
}

required_vars <- c(
"sim",
Expand Down Expand Up @@ -74,13 +82,15 @@ compute_draft_order <- function(teams,
}

if (is.null(h2h) & tiebreaker_depth > TIEBREAKERS_NONE) {
cli::cli_abort(
"You asked for tiebreakers but the argument {.arg h2h} is {.val NULL}. \\
Did you forget to pass the {.val h2h} data frame? It is computed with \\
the function {.fn compute_division_ranks}."
)
# cli::cli_abort(
# "You asked for tiebreakers but the argument {.arg h2h} is {.val NULL}. \\
# Did you forget to pass the {.val h2h} data frame? It is computed with \\
# the function {.fn compute_division_ranks}."
# )
}

if(isFALSE(h2h_flag)) h2h <- compute_h2h(NULL, update = FALSE)

games <- strip_nflverse_attributes(games)

if (any(is.na(teams$exit))){
Expand Down Expand Up @@ -116,10 +126,12 @@ compute_draft_order <- function(teams,
# playoff weeks
for (week_num in first_playoff_week:week_max) {

# record losers
teams <- games %>%
week_games_doubled <- games %>%
filter(week == week_num) %>%
double_games() %>%
double_games()

# record losers
teams <- week_games_doubled %>%
filter(outcome == 0) %>%
select(sim, team, outcome) %>%
right_join(teams, by = c("sim", "team")) %>%
Expand All @@ -129,9 +141,7 @@ compute_draft_order <- function(teams,
# if super bowl, record winner
if (any(playoff_teams$conf == "SB")) {
# super bowl winner exit is +1 to SB week
teams <- games %>%
filter(week == week_num) %>%
double_games() %>%
teams <- week_games_doubled %>%
filter(outcome == 1) %>%
select(sim, team, outcome) %>%
right_join(teams, by = c("sim", "team")) %>%
Expand All @@ -140,9 +150,7 @@ compute_draft_order <- function(teams,
}

# filter to winners or byes
playoff_teams <- games %>%
filter(week == week_num) %>%
double_games() %>%
playoff_teams <- week_games_doubled %>%
right_join(playoff_teams, by = c("sim", "team")) %>%
filter(is.na(result) | result > 0) %>%
select(sim, conf, seed, team) %>%
Expand Down
Loading

0 comments on commit b249dbd

Please sign in to comment.