diff --git a/DESCRIPTION b/DESCRIPTION index 0d03cdb..7d2951b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: nflseedR Title: Functions to Efficiently Simulate and Evaluate NFL Seasons -Version: 1.2.0 +Version: 1.2.0.9001 Authors@R: c( person("Lee", "Sharpe", role = c("aut", "cph")), person("Sebastian", "Carl", , "mrcaseb@gmail.com", role = c("cre", "aut")) @@ -19,13 +19,13 @@ Imports: furrr, future, gsubfn, + lifecycle, magrittr, nflreadr (>= 1.1.3), progressr, purrr, rlang, - tibble, - tidyr + tibble Suggests: gt, knitr, diff --git a/NAMESPACE b/NAMESPACE index 440a7d0..548daa9 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -8,11 +8,27 @@ export(fmt_pct_special) export(load_schedules) export(load_sharpe_games) export(simulate_nfl) -import(dplyr) +import(data.table) import(gsubfn) importFrom(cli,symbol) -importFrom(data.table,fread) -importFrom(data.table,rbindlist) +importFrom(dplyr,arrange) +importFrom(dplyr,bind_rows) +importFrom(dplyr,case_when) +importFrom(dplyr,everything) +importFrom(dplyr,filter) +importFrom(dplyr,group_by) +importFrom(dplyr,inner_join) +importFrom(dplyr,left_join) +importFrom(dplyr,mutate) +importFrom(dplyr,n) +importFrom(dplyr,pull) +importFrom(dplyr,rename) +importFrom(dplyr,right_join) +importFrom(dplyr,row_number) +importFrom(dplyr,select) +importFrom(dplyr,slice) +importFrom(dplyr,summarize) +importFrom(dplyr,ungroup) importFrom(furrr,furrr_options) importFrom(furrr,future_map) importFrom(future,plan) @@ -22,4 +38,4 @@ importFrom(purrr,pluck) importFrom(rlang,inform) importFrom(stats,rnorm) importFrom(tibble,is_tibble) -importFrom(tidyr,pivot_longer) +importFrom(tibble,tibble) diff --git a/R/compute_conference_seeds.R b/R/compute_conference_seeds.R index 2992177..a97c9aa 100644 --- a/R/compute_conference_seeds.R +++ b/R/compute_conference_seeds.R @@ -47,7 +47,15 @@ compute_conference_seeds <- function(teams, ) } - if (!is_tibble(teams)) teams <- teams$standings + h2h_flag <- FALSE + + if (!is_tibble(teams)){ + if(is.null(h2h) & any(names(teams) == "h2h")){ + h2h <- teams$h2h + h2h_flag <- TRUE + } + teams <- teams$standings + } if (!any((names(teams) %in% "div_rank")) | !is.data.frame(teams)) { cli::cli_abort( @@ -57,15 +65,15 @@ compute_conference_seeds <- function(teams, } if(is.null(h2h) & tiebreaker_depth > TIEBREAKERS_NONE){ - cli::cli_abort( - "You asked for tiebreakers but the argument {.arg h2h} is {.val NULL}. \\ - Did you forget to pass the {.val h2h} data frame? It is computed with \\ - the function {.fn compute_division_ranks}." - ) + # cli::cli_abort( + # "You asked for tiebreakers but the argument {.arg h2h} is {.val NULL}. \\ + # Did you forget to pass the {.val h2h} data frame? It is computed with \\ + # the function {.fn compute_division_ranks}." + # ) } + if(isFALSE(h2h_flag)) h2h <- compute_h2h(NULL, update = FALSE) - teams <- teams %>% - mutate(conf_rank = NA_real_) + teams$conf_rank <- NA_real_ # seed loop for (seed_num in seq_len(playoff_seeds)) diff --git a/R/compute_division_ranks.R b/R/compute_division_ranks.R index 0c07947..05680da 100644 --- a/R/compute_division_ranks.R +++ b/R/compute_division_ranks.R @@ -12,19 +12,8 @@ #' \code{\link{divisions}} for valid team abbreviations).} #' \item{result}{Equals home score - away score.} #' } -#' @param teams This parameter is optional. If it is \code{NULL} the function -#' will compute it internally, otherwise it has to be a data frame of all teams -#' contained in the \code{games} data frame repeated for each simulation ID -#' (\code{sim}). The following variables are required: -#' \describe{ -#' \item{sim}{A simulation ID. Normally 1 - n simulated seasons.} -#' \item{team}{Team abbreviation of the team (please see -#' \code{\link{divisions}} for valid team abbreviations).} -#' \item{conf}{Conference abbreviation of the team (please see -#' \code{\link{divisions}} for valid team abbreviations).} -#' \item{division}{Division of the team (please see -#' \code{\link{divisions}} for valid division names).} -#' } +#' @param teams `r lifecycle::badge("deprecated")` This argument is no longer +#' supported. Instead, the functions computes it internally. #' @param tiebreaker_depth A single value equal to 1, 2, or 3. The default is 3. The #' value controls the depth of tiebreakers that shall be applied. The deepest #' currently implemented tiebreaker is strength of schedule. The following @@ -66,7 +55,7 @@ #' options(old) #' } compute_division_ranks <- function(games, - teams = NULL, + teams = lifecycle::deprecated(), tiebreaker_depth = 3, .debug = FALSE, h2h = NULL) { @@ -94,104 +83,87 @@ compute_division_ranks <- function(games, ) } - if (is.null(teams)) { # compute teams df from games df - pivot_games <- games %>% - select(sim, home_team, away_team) %>% - pivot_longer(cols = c("home_team", "away_team"), values_to = "team") %>% - select(sim, team) - - teams <- bind_rows( - data.frame(team = unique(games$away_team)), - data.frame(team = unique(games$home_team)) - ) %>% - distinct() %>% - left_join(nflseedR::divisions %>% select(-"sdiv"), by = "team") %>% - left_join(pivot_games, by = "team") %>% - select(sim, everything()) %>% - distinct() %>% - arrange(division, team, sim) + if (lifecycle::is_present(teams)) { + lifecycle::deprecate_warn( + when = "2.0.0", + what = "compute_division_ranks(teams)",# = 'is computed internally')" + details = "The function computes the corresponding data internally." + ) } # double games games_doubled <- double_games(games) - # record of each team report("Calculating team data") - teams <- teams %>% - inner_join(games_doubled, by = c("sim", "team")) %>% - filter(game_type == "REG") %>% - group_by(sim, conf, division, team) %>% - summarize( - games = n(), - wins = sum(outcome), - true_wins = sum(outcome == 1), - losses = sum(outcome == 0), - ties = sum(outcome == 0.5) - ) %>% - ungroup() + # record of each team + setDT(games_doubled) + team_records <- + merge( + games_doubled["REG", on = "game_type"], + data.table(nflseedR::divisions)[,sdiv:=NULL], + by = "team", + sort = FALSE + )[, + list( + games = .N, + wins = sum(outcome), + true_wins = sum(outcome == 1), + losses = sum(outcome == 0), + ties = sum(outcome == 0.5), + win_pct = sum(outcome) / .N + ), by = c("sim", "conf", "division", "team")] # add in tiebreaker info - teams <- teams %>% - inner_join(games_doubled, by = c("sim", "team")) %>% - filter(game_type == "REG") %>% - inner_join(teams, - by = c("sim" = "sim", "opp" = "team"), - suffix = c("", "_opp") - ) %>% - mutate( - win_pct = wins / games, - div_game = ifelse(division == division_opp, 1, 0), - conf_game = ifelse(conf == conf_opp, 1, 0) + teams <- team_records %>% + merge( + games_doubled["REG", on = "game_type"], + by = c("sim", "team"), + sort = FALSE ) %>% - group_by(sim, conf, division, team, games, wins, true_wins, losses, ties, win_pct) %>% - summarize( - div_pct = ifelse(sum(div_game) == 0, 0.5, - sum(div_game * outcome) / sum(div_game) - ), - conf_pct = ifelse(sum(conf_game) == 0, 0.5, - sum(conf_game * outcome) / sum(conf_game) - ), - sov = ifelse(sum(outcome == 1) == 0, 0, - sum(wins_opp * (outcome == 1)) / - sum(games_opp * (outcome == 1)) - ), - sos = sum(wins_opp) / sum(games_opp) - ) %>% - ungroup() + merge( + team_records, + by.x = c("sim", "opp"), + by.y = c("sim", "team"), + suffixes = c("", "_opp"), + sort = FALSE + ) + teams[, div_game := fifelse(division == division_opp, 1, 0)] + teams[, conf_game := fifelse(conf == conf_opp, 1, 0)] + teams <- teams[, list( + div_pct = fifelse( + sum(div_game) == 0, 0.5, + sum(div_game * outcome) / sum(div_game) + ), + conf_pct = fifelse( + sum(conf_game) == 0, 0.5, + sum(conf_game * outcome) / sum(conf_game) + ), + sov = fifelse( + sum(outcome == 1) == 0, 0, + sum(wins_opp * (outcome == 1)) / sum(games_opp * (outcome == 1)) + ), + sos = sum(wins_opp) / sum(games_opp) + ), by = c("sim", "conf", "division", "team", "games", "wins", + "true_wins", "losses", "ties", "win_pct")][order(sim, conf, division, team)] # below only if there are tiebreakers if (is.null(h2h) & tiebreaker_depth > TIEBREAKERS_NONE) { - report("Calculating head to head") - h2h <- teams %>% - select(sim, team) %>% - inner_join(teams %>% select(sim, team), - by = "sim", suffix = c("", "_opp") - ) %>% - rename(opp = team_opp) %>% - arrange(sim, team, opp) %>% - left_join(games_doubled %>% filter(game_type == "REG"), - by = c("sim", "team", "opp") - ) %>% - group_by(sim, team, opp) %>% - summarize( - h2h_games = sum(!is.na(outcome)), - h2h_wins = sum(outcome, na.rm = TRUE), - h2h_played = ifelse(h2h_games > 0, 1, 0) - ) %>% - ungroup() + h2h <- compute_h2h(games_doubled) } #### FIND DIVISION RANKS #### # initialize division rank - teams <- teams %>% - mutate(div_rank = NA_real_) + teams$div_rank <- NA_real_ # determine division ranks dr <- 0 while (any(is.na(teams$div_rank))) { # increment division rank dr <- dr + 1 + if(dr > 4){ + cli::cli_abort("Aborting because division rank computation entered infinite loop!") + } report("Calculating division rank #{dr}") # update teams with this rank @@ -210,10 +182,7 @@ compute_division_ranks <- function(games, select(-new_rank) } - max_reg_week <- max(games$week[games$game_type == "REG"], na.rm = TRUE) - - teams <- teams %>% - mutate(max_reg_week = max_reg_week) + teams$max_reg_week <- max(games$week[games$game_type == "REG"], na.rm = TRUE) list( "standings" = tibble::as_tibble(teams), diff --git a/R/compute_draft_order.R b/R/compute_draft_order.R index a865ebe..a0dd079 100644 --- a/R/compute_draft_order.R +++ b/R/compute_draft_order.R @@ -43,7 +43,15 @@ compute_draft_order <- function(teams, ) } - if (!is_tibble(teams)) teams <- teams$standings + h2h_flag <- FALSE + + if (!is_tibble(teams)){ + if(is.null(h2h) & any(names(teams) == "h2h")){ + h2h <- teams$h2h + h2h_flag <- TRUE + } + teams <- teams$standings + } required_vars <- c( "sim", @@ -74,13 +82,15 @@ compute_draft_order <- function(teams, } if (is.null(h2h) & tiebreaker_depth > TIEBREAKERS_NONE) { - cli::cli_abort( - "You asked for tiebreakers but the argument {.arg h2h} is {.val NULL}. \\ - Did you forget to pass the {.val h2h} data frame? It is computed with \\ - the function {.fn compute_division_ranks}." - ) + # cli::cli_abort( + # "You asked for tiebreakers but the argument {.arg h2h} is {.val NULL}. \\ + # Did you forget to pass the {.val h2h} data frame? It is computed with \\ + # the function {.fn compute_division_ranks}." + # ) } + if(isFALSE(h2h_flag)) h2h <- compute_h2h(NULL, update = FALSE) + games <- strip_nflverse_attributes(games) if (any(is.na(teams$exit))){ @@ -116,10 +126,12 @@ compute_draft_order <- function(teams, # playoff weeks for (week_num in first_playoff_week:week_max) { - # record losers - teams <- games %>% + week_games_doubled <- games %>% filter(week == week_num) %>% - double_games() %>% + double_games() + + # record losers + teams <- week_games_doubled %>% filter(outcome == 0) %>% select(sim, team, outcome) %>% right_join(teams, by = c("sim", "team")) %>% @@ -129,9 +141,7 @@ compute_draft_order <- function(teams, # if super bowl, record winner if (any(playoff_teams$conf == "SB")) { # super bowl winner exit is +1 to SB week - teams <- games %>% - filter(week == week_num) %>% - double_games() %>% + teams <- week_games_doubled %>% filter(outcome == 1) %>% select(sim, team, outcome) %>% right_join(teams, by = c("sim", "team")) %>% @@ -140,9 +150,7 @@ compute_draft_order <- function(teams, } # filter to winners or byes - playoff_teams <- games %>% - filter(week == week_num) %>% - double_games() %>% + playoff_teams <- week_games_doubled %>% right_join(playoff_teams, by = c("sim", "team")) %>% filter(is.na(result) | result > 0) %>% select(sim, conf, seed, team) %>% diff --git a/R/conference_tiebreaker.R b/R/conference_tiebreaker.R index 69b2ac7..c93dce0 100644 --- a/R/conference_tiebreaker.R +++ b/R/conference_tiebreaker.R @@ -47,16 +47,16 @@ break_conference_ties <- function(u, r, h2h, tb_depth, .debug = FALSE) { ) %>% rename(opp = team_opp) %>% filter(team != opp) %>% - inner_join(h2h, by = c("sim", "team", "opp")) %>% + left_join(h2h, by = c("sim", "team", "opp")) %>% group_by( sim, conf, division, div_winner, div_best_left, team, conf_pct, sov, sos, tied_teams ) %>% summarize(value = case_when( - max(tied_teams) < min_tied ~ NA_real_, # not enough tied teams - sum(h2h_games) < (max(tied_teams) - 1) ~ 0, # didn't play vs. each other tied team - sum(h2h_wins) == 0 ~ -1, # got swept by other tied teams - sum(h2h_wins) == (max(tied_teams) - 1) ~ 1, # swept other tied teams + max(tied_teams, na.rm = TRUE) < min_tied ~ NA_real_, # not enough tied teams + sum(h2h_games, na.rm = TRUE) < (max(tied_teams) - 1) ~ 0, # didn't play vs. each other tied team + sum(h2h_wins, na.rm = TRUE) == 0 ~ -1, # got swept by other tied teams + sum(h2h_wins, na.rm = TRUE) == (max(tied_teams) - 1) ~ 1, # swept other tied teams TRUE ~ 0, # split vs. other tied teams )) %>% ungroup() %>% @@ -81,7 +81,7 @@ break_conference_ties <- function(u, r, h2h, tb_depth, .debug = FALSE) { # common games if (isTRUE(.debug)) report("CONF ({min_tied}): Common Record") list[u, tied] <- tied %>% - inner_join(h2h, by = c("sim", "team")) %>% + left_join(h2h, by = c("sim", "team")) %>% filter(h2h_played == 1) %>% group_by(sim, conf, opp) %>% mutate(common = (tied_teams == n())) %>% diff --git a/R/division_tiebreaker.R b/R/division_tiebreaker.R index b873061..d26788c 100644 --- a/R/division_tiebreaker.R +++ b/R/division_tiebreaker.R @@ -31,12 +31,12 @@ break_division_ties <- function(u, r, h2h, tb_depth, .debug = FALSE) { ) %>% rename(opp = team_opp) %>% filter(team != opp) %>% - inner_join(h2h, by = c("sim", "team", "opp")) %>% + left_join(h2h, by = c("sim", "team", "opp")) %>% group_by(sim, division, team, div_pct, conf_pct, sov, sos, tied_teams) %>% summarize(value = case_when( - max(tied_teams) < min_tied ~ NA_real_, - sum(h2h_games) == 0 ~ 0.5, - TRUE ~ sum(h2h_wins) / sum(h2h_games) + max(tied_teams, na.rm = TRUE) < min_tied ~ NA_real_, + sum(h2h_games, na.rm = TRUE) == 0 ~ 0.5, + TRUE ~ sum(h2h_wins, na.rm = TRUE) / sum(h2h_games, na.rm = TRUE) )) %>% ungroup() %>% process_div_ties(u, r) @@ -60,7 +60,7 @@ break_division_ties <- function(u, r, h2h, tb_depth, .debug = FALSE) { # common games if (isTRUE(.debug)) report("DIV ({min_tied}): Common Record") list[u, tied] <- tied %>% - inner_join(h2h, by = c("sim", "team")) %>% + left_join(h2h, by = c("sim", "team")) %>% filter(h2h_played == 1) %>% group_by(sim, division, opp) %>% mutate(common = (tied_teams == n())) %>% diff --git a/R/draft_tiebreaker.R b/R/draft_tiebreaker.R index 001b83c..218d714 100644 --- a/R/draft_tiebreaker.R +++ b/R/draft_tiebreaker.R @@ -64,12 +64,12 @@ break_draft_ties <- function(u, r, h2h, tb_depth, .debug = FALSE) { inner_join(tied %>% select(sim, team), by = c("sim"), suffix = c("", "_opp")) %>% rename(opp = team_opp) %>% filter(team != opp) %>% - inner_join(h2h, by = c("sim", "team", "opp")) %>% + left_join(h2h, by = c("sim", "team", "opp")) %>% group_by(sim, team, sov, tied_teams) %>% summarize(value = case_when( - sum(h2h_games) < (max(tied_teams) - 1) ~ 0, # didn't play vs. each other tied team - sum(h2h_wins) == 0 ~ -1, # got swept by other tied teams - sum(h2h_wins) == (max(tied_teams) - 1) ~ 1, # swept other tied teams + sum(h2h_games, na.rm = TRUE) < (max(tied_teams) - 1) ~ 0, # didn't play vs. each other tied team + sum(h2h_wins, na.rm = TRUE) == 0 ~ -1, # got swept by other tied teams + sum(h2h_wins, na.rm = TRUE) == (max(tied_teams) - 1) ~ 1, # swept other tied teams TRUE ~ 0, # won some, lost others )) %>% ungroup() %>% @@ -82,7 +82,7 @@ break_draft_ties <- function(u, r, h2h, tb_depth, .debug = FALSE) { # common games if (isTRUE(.debug)) report("DRAFT: Common Record") list[u, tied] <- tied %>% - inner_join(h2h, by = c("sim", "team")) %>% + left_join(h2h, by = c("sim", "team")) %>% filter(h2h_played == 1) %>% group_by(sim, opp) %>% mutate(common = (tied_teams == n())) %>% diff --git a/R/environment_data.R b/R/environment_data.R new file mode 100644 index 0000000..3a3645e --- /dev/null +++ b/R/environment_data.R @@ -0,0 +1,63 @@ +# this makes it so there's two rows per game (one/team) +#' @import data.table +double_games <- function(g){ + setDT(g) + away <- g[,list(sim, game_type, week, team = away_team, opp = home_team, result = -result)] + home <- g[,list(sim, game_type, week, team = home_team, opp = away_team, result)] + out <- rbind(away, home) + out[, outcome := fcase( + is.na(result), NA_real_, + result > 0, 1, + result < 0, 0, + default = 0.5 + )] + tibble::as_tibble(out) +} + +#' @import data.table +compute_h2h <- function(gd, update = TRUE){ + if (!".h2h" %in% ls(envir = .nflseedR_env, all.names = !update)){ + report("Calculating head to head") + setDT(gd, key = c("sim", "team", "opp")) + out <- gd[game_type == "REG", list( + h2h_games = .N, + h2h_wins = sum(outcome), + h2h_played = fifelse(.N > 0, 1, 0) + ), by = c("sim", "team", "opp")] + assign(".h2h", out, envir = .nflseedR_env) + } + get(".h2h", envir = .nflseedR_env) +} + + +# Unused data.frame variants ---------------------------------------------- + +# double_games.data.frame <- function(g) { +# g1 <- g %>% +# select(sim, game_type, week, away_team, home_team, result) %>% +# rename(team = away_team, opp = home_team) %>% +# mutate(result = -1 * result) +# g2 <- g %>% +# select(sim, game_type, week, away_team, home_team, result) %>% +# rename(team = home_team, opp = away_team) +# g <- bind_rows(g1, g2) %>% +# mutate(outcome = case_when( +# result > 0 ~ 1, +# result < 0 ~ 0, +# result == 0 ~ 0.5, +# TRUE ~ NA_real_ +# )) +# g +# } + +# compute_h2h.data.frame <- function(gd) { +# gd %>% +# filter(game_type == "REG") %>% +# group_by(sim, team, opp) %>% +# summarize( +# h2h_games = n(), +# h2h_wins = sum(outcome, na.rm = TRUE), +# h2h_played = ifelse(h2h_games > 0, 1, 0) +# ) %>% +# ungroup() +# } diff --git a/R/nflseedR-package.R b/R/nflseedR-package.R index 88b1831..b4121b4 100644 --- a/R/nflseedR-package.R +++ b/R/nflseedR-package.R @@ -4,10 +4,12 @@ # The following block is used by usethis to automatically manage # roxygen namespace tags. Modify with care! ## usethis namespace: start -#' @import dplyr +#' @import data.table #' @import gsubfn +#' @importFrom dplyr select mutate rename left_join inner_join n arrange group_by +#' @importFrom dplyr ungroup filter case_when summarize pull right_join everything +#' @importFrom dplyr slice bind_rows row_number #' @importFrom cli symbol -#' @importFrom data.table rbindlist fread #' @importFrom furrr future_map furrr_options #' @importFrom future plan #' @importFrom magrittr %>% @@ -15,7 +17,6 @@ #' @importFrom purrr pluck #' @importFrom rlang inform #' @importFrom stats rnorm -#' @importFrom tibble is_tibble -#' @importFrom tidyr pivot_longer +#' @importFrom tibble is_tibble tibble ## usethis namespace: end NULL diff --git a/R/silence_tidy_eval_notes.R b/R/silence_tidy_eval_notes.R index 349e9e9..b16eb0c 100644 --- a/R/silence_tidy_eval_notes.R +++ b/R/silence_tidy_eval_notes.R @@ -73,4 +73,7 @@ conf_rank <- team_logo_espn <- nfc_team <- losses <- + sdiv <- + count <- + location <- NULL diff --git a/R/sim_helper.R b/R/sim_helper.R index 757b108..86de852 100644 --- a/R/sim_helper.R +++ b/R/sim_helper.R @@ -42,124 +42,17 @@ simulate_round <- function(sim_round, stop("`playoff_seeds` must be between 1 and ",max_seeds) } - # function to simulate a week - simulate_week <- function(teams, games, week_num, test_week, ...) { - - # recall old data for comparison - old_teams <- teams - old_games <- games %>% - rename(.old_result = result) - - # estimate and simulate games - return_value <- process_games(teams, games, week_num, ...) - - # testing? - if (!is.null(test_week) && week_num == test_week) { - return(return_value) - } - - # did we get the right data back? - problems <- c() - if (typeof(return_value) != "list") { - problems[length(problems) + 1] <- "the returned value was not a list" - } else { - if (!("teams" %in% names(return_value))) { - problems[length(problems) + 1] <- "`teams` was not in the returned list" - } else { - teams <- return_value$teams - if (!is_tibble(teams)) { - problems[length(problems) + 1] <- "`teams` was not a tibble" - } else { - if (nrow(teams) != nrow(old_teams)) { - problems[length(problems) + 1] <- paste( - "`teams` changed from", nrow(old_teams), "to", - nrow(teams), "rows", - collapse = " " - ) - } - for (cname in colnames(old_teams)) { - if (!(cname %in% colnames(teams))) { - problems[length(problems) + 1] <- paste( - "`teams` column `", cname, "` was removed" - ) - } - } - } - } - if (!("games" %in% names(return_value))) { - problems[length(problems) + 1] <- "`games` was not in the returned list" - } else { - games <- return_value$games - if (!is_tibble(games)) { - problems[length(problems) + 1] <- "`games` was not a tibble" - } else { - if (nrow(games) != nrow(old_games)) { - problems[length(problems) + 1] <- paste( - "`games` changed from", nrow(old_games), "to", - nrow(games), "rows", - collapse = " " - ) - } - for (cname in colnames(old_games)) { - if (!(cname %in% colnames(games)) && cname != ".old_result") { - problems[length(problems) + 1] <- paste( - "`teams` column `", cname, "` was removed" - ) - } - } - } - } - } - - # report data structure problems - problems <- paste(problems, collapse = ", ") - if (problems != "") { - stop( - "During Week ", week_num, ", your `process_games()` function had the ", - "following issues: ", problems, ". " - ) - } - - # identify improper results values - problems <- old_games %>% - inner_join(games, by = intersect(colnames(old_games), colnames(games))) %>% - mutate(problem = case_when( - week == week_num & is.na(result) ~ - "a result from the current week is missing", - week != week_num & !is.na(.old_result) & is.na(result) ~ - "a known result outside the current week was blanked out", - week != week_num & is.na(.old_result) & !is.na(result) ~ - "a result outside the current week was entered", - week != week_num & .old_result != result ~ - "a known result outside the current week was updated", - !is.na(.old_result) & is.na(result) ~ - "a known result was blanked out", - !is.na(result) & result == 0 & game_type != "REG" ~ - "a playoff game resulted in a tie (had result == 0)", - TRUE ~ NA_character_ - )) %>% - filter(!is.na(problem)) %>% - pull(problem) %>% - unique() %>% - paste(collapse = ", ") - - # report result value problems - if (problems != "") { - stop( - "During Week ", week_num, ", your `process_games()` function had the", - "following issues: ", problems, ". Make sure you only change results ", - "when week == week_num & is.na(result)" - ) - } - - return(list(teams = teams, games = games)) - } - # simulate remaining regular season games for (week_num in weeks_to_sim) { return_value <- - simulate_week(teams, games, week_num, test_week, ...) + simulate_week(teams = teams, + games = games, + week_num = week_num, + process_games = process_games, + test_week = test_week, + .debug = .debug, + ...) if (!is.null(test_week) && week_num == test_week) { return(return_value) } @@ -266,16 +159,24 @@ simulate_round <- function(sim_round, # process any new games return_value <- - simulate_week(teams, games, week_num, test_week, ...) + simulate_week(teams = teams, + games = games, + week_num = week_num, + process_games = process_games, + test_week = test_week, + .debug = .debug, + ...) if (!is.null(test_week) && week_num == test_week) { return(return_value) } list[teams, games] <- return_value - # record losers - teams <- games %>% + week_games_doubled <- games %>% filter(week == week_num) %>% - double_games() %>% + double_games() + + # record losers + teams <- week_games_doubled %>% filter(outcome == 0) %>% select(sim, team, outcome) %>% right_join(teams, by = c("sim", "team")) %>% @@ -285,9 +186,7 @@ simulate_round <- function(sim_round, # if super bowl, record winner if (any(playoff_teams$conf == "SB")) { # super bowl winner exit is +1 to SB week - teams <- games %>% - filter(week == week_num) %>% - double_games() %>% + teams <- week_games_doubled %>% filter(outcome == 1) %>% select(sim, team, outcome) %>% right_join(teams, by = c("sim", "team")) %>% @@ -296,9 +195,7 @@ simulate_round <- function(sim_round, } # filter to winners or byes - playoff_teams <- games %>% - filter(week == week_num) %>% - double_games() %>% + playoff_teams <- week_games_doubled %>% right_join(playoff_teams, by = c("sim", "team")) %>% filter(is.na(result) | result > 0) %>% select(sim, conf, seed, team) %>% @@ -332,7 +229,7 @@ simulate_round <- function(sim_round, } else { if (!is_tibble(teams)) teams <- teams$standings teams$draft_order <- NA_real_ - teams <- teams %>% + teams <- tibble::as_tibble(teams) %>% dplyr::select( dplyr::any_of(c( "sim", "team", "conf", "division", "games", @@ -346,3 +243,123 @@ simulate_round <- function(sim_round, list("teams" = teams, "games" = games) } + +# function to simulate a week +simulate_week <- function(teams, + games, + week_num, + process_games, + test_week = NULL, + .debug = FALSE, + ...) { + # estimate and simulate games + return_value <- process_games(teams, games, week_num, ...) + + # testing? + if (!is.null(test_week) && week_num == test_week) { + return(return_value) + } + + if(isTRUE(.debug) && FALSE){ + # recall old data for comparison + old_teams <- teams + old_games <- games %>% + rename(.old_result = result) + # did we get the right data back? + # currently, we will catch a maximum of 9 problems. Allocate the vector + problems <- vector("character", length = 9L) + i <- 0 + if (typeof(return_value) != "list") { + problems[i + 1] <- "the returned value was not a list" + } else { + if (!("teams" %in% names(return_value))) { + problems[i + 2] <- "`teams` was not in the returned list" + } else { + teams <- return_value$teams + if (!is_tibble(teams)) { + problems[i + 3] <- "`teams` was not a tibble" + } else { + if (nrow(teams) != nrow(old_teams)) { + problems[i + 4] <- paste( + "`teams` changed from", nrow(old_teams), "to", + nrow(teams), "rows", + collapse = " " + ) + } + for (cname in colnames(old_teams)) { + if (!(cname %in% colnames(teams))) { + problems[i + 5] <- paste( + "`teams` column `", cname, "` was removed" + ) + } + } + } + } + if (!("games" %in% names(return_value))) { + problems[i + 6] <- "`games` was not in the returned list" + } else { + games <- return_value$games + if (!is_tibble(games)) { + problems[i + 7] <- "`games` was not a tibble" + } else { + if (nrow(games) != nrow(old_games)) { + problems[i + 8] <- paste( + "`games` changed from", nrow(old_games), "to", + nrow(games), "rows", + collapse = " " + ) + } + for (cname in colnames(old_games)) { + if (!(cname %in% colnames(games)) && cname != ".old_result") { + problems[i + 9] <- paste( + "`teams` column `", cname, "` was removed" + ) + } + } + } + } + } + + # report data structure problems + problems <- problems[problems != ""] + if (length(problems)) { + cli::cli_abort( + "During Week {week_num}, your {.code process_games} function had the \\ + following issues: {problems}." + ) + } + + # identify improper results values + problems <- old_games %>% + inner_join(games, by = intersect(colnames(old_games), colnames(games))) %>% + mutate(problem = case_when( + week == week_num & is.na(result) ~ + "a result from the current week is missing", + week != week_num & !is.na(.old_result) & is.na(result) ~ + "a known result outside the current week was blanked out", + week != week_num & is.na(.old_result) & !is.na(result) ~ + "a result outside the current week was entered", + week != week_num & .old_result != result ~ + "a known result outside the current week was updated", + !is.na(.old_result) & is.na(result) ~ + "a known result was blanked out", + !is.na(result) & result == 0 & game_type != "REG" ~ + "a playoff game resulted in a tie (had result == 0)", + TRUE ~ NA_character_ + )) %>% + filter(!is.na(problem)) %>% + pull(problem) %>% + unique() + + # report result value problems + if (problems != "") { + cli::cli_abort( + "During Week {week_num}, your {.code process_games} function had the \\ + following issues: {problems}. Make sure you only change results \\ + when {.code week == week_num} & {.code is.na(result)}" + ) + } + } + + list("teams" = return_value$teams, "games" = return_value$games) +} diff --git a/R/simulate_nfl.R b/R/simulate_nfl.R index 11c491b..17022f2 100644 --- a/R/simulate_nfl.R +++ b/R/simulate_nfl.R @@ -131,109 +131,7 @@ simulate_nfl <- function(nfl_season = NULL, # Define simple estimate and simulate functions - if (is.null(process_games)) { - process_games <- function(teams, games, week_num, ...) { - # teams = teams data - # games = games data - # - # this example estimates at PK/0 and 50% - # estimate = is the median spread expected (positive = home team favored) - # wp = is the probability of the team winning the game - # - # only simulate games through week week_num - # only simulate games with is.na(result) - # result = how many points home team won by - - # round out (away from zero) - round_out <- function(x) { - x[!is.na(x) & x < 0] <- floor(x[!is.na(x) & x < 0]) - x[!is.na(x) & x > 0] <- ceiling(x[!is.na(x) & x > 0]) - return(x) - } - - # get elo if not in teams data already - if (!("elo" %in% colnames(teams))) { - args <- list(...) - if ("elo" %in% names(args)) { - # pull from custom arguments - teams <- teams %>% - dplyr::inner_join(args$elo %>% select(team, elo), by = c("team" = "team")) - } else { - # start everyone at a random default elo - ratings <- tibble( - team = unique(teams$team), - elo = rnorm(length(unique(team)), 1500, 150) - ) - teams <- teams %>% - dplyr::inner_join(ratings, by = "team") - } - } - - # pull ratings from teams data - ratings <- teams %>% select(sim, team, elo) - - # mark estimate, wp, and result for games - games <- games %>% - dplyr::inner_join(ratings, by = c("sim" = "sim", "away_team" = "team")) %>% - dplyr::rename(away_elo = elo) %>% - dplyr::inner_join(ratings, by = c("sim" = "sim", "home_team" = "team")) %>% - dplyr::rename(home_elo = elo) %>% - dplyr::mutate( - elo_diff = home_elo - away_elo, - elo_diff = elo_diff + ifelse(location == "Home", 20, 0), - elo_diff = elo_diff + (home_rest - away_rest) / 7 * 25, - elo_diff = elo_diff * ifelse(game_type == "REG", 1, 1.2), - wp = 1 / (10^(-elo_diff / 400) + 1), - estimate = elo_diff / 25, - result = case_when( - is.na(result) & week == week_num ~ - as.integer(round_out(rnorm(n(), estimate, 13))), - TRUE ~ as.integer(result) - ), - outcome = case_when( - is.na(result) ~ NA_real_, - result > 0 ~ 1, - result < 0 ~ 0, - TRUE ~ 0.5 - ), - elo_input = case_when( - is.na(result) ~ NA_real_, - result > 0 ~ elo_diff * 0.001 + 2.2, - result < 0 ~ -elo_diff * 0.001 + 2.2, - TRUE ~ 1.0, - ), - elo_mult = log(pmax(abs(result), 1) + 1.0) * 2.2 / elo_input, - elo_shift = 20 * elo_mult * (outcome - wp) - ) %>% - dplyr::select( - -away_elo, -home_elo, -elo_diff, -wp, -estimate, - -outcome, -elo_input, -elo_mult - ) - - # apply elo shifts - teams <- teams %>% - dplyr::left_join(games %>% - filter(week == week_num) %>% - select(sim, away_team, elo_shift), - by = c("sim" = "sim", "team" = "away_team") - ) %>% - dplyr::mutate(elo = elo - ifelse(!is.na(elo_shift), elo_shift, 0)) %>% - dplyr::select(-elo_shift) %>% - dplyr::left_join(games %>% - filter(week == week_num) %>% - select(sim, home_team, elo_shift), - by = c("sim" = "sim", "team" = "home_team") - ) %>% - dplyr::mutate(elo = elo + ifelse(!is.na(elo_shift), elo_shift, 0)) %>% - dplyr::select(-elo_shift) - - # remove elo shift - games <- games %>% - dplyr::select(-elo_shift) - - return(list(teams = teams, games = games)) - } - } + if (is.null(process_games)) process_games <- default_process_games # Catch invalid input @@ -401,51 +299,68 @@ simulate_nfl <- function(nfl_season = NULL, # and conf columns if(sb_exit < 20) sb_exit <- NA_real_ - overall <- all_teams %>% - group_by(conf, division, team) %>% - summarize( - wins = mean(wins), - playoff = mean(!is.na(seed)), - div1 = mean(div_rank == 1), - seed1 = mean(!is.na(seed) & seed == 1), - won_conf = mean(exit >= sb_exit - 1), - won_sb = mean(exit == sb_exit), - draft1 = mean(draft_order == 1), - draft5 = mean(draft_order <= 5) - ) %>% - ungroup() + overall <- all_teams[, list( + wins = mean(wins), + playoff = mean(!is.na(seed)), + div1 = mean(div_rank == 1), + seed1 = mean(!is.na(seed) & seed == 1), + won_conf = mean(exit >= sb_exit - 1), + won_sb = mean(exit == sb_exit), + draft1 = mean(draft_order == 1), + draft5 = mean(draft_order <= 5) + ), keyby = c("conf", "division", "team")] + + # take all teams and repeat them for each half win and repeat this for each + # simulation. The length of the half win sequence equals 2 * games + 1 + team_vec <- rep( + sort(unique(all_teams$team)), + each = (max(all_teams$games) * 2 + 1) * length(unique(all_teams$sim)) + ) - team_wins <- - tibble( - team = rep(sort(unique(all_teams$team)), each = max(all_teams$games) * 2 + 1), - wins = rep(seq(0, max(all_teams$games), 0.5), length(unique(all_teams$team))) - ) %>% - inner_join( - all_teams %>% select(team, true_wins), - by = c("team") - ) %>% - group_by(team, wins) %>% - summarize( - over_prob = mean(true_wins > wins), - under_prob = mean(true_wins < wins) - ) %>% - ungroup() - - game_summary <- - all_games %>% - group_by(game_type, week, away_team, home_team) %>% - summarise( - away_wins = sum(result < 0), - home_wins = sum(result > 0), - ties = sum(result == 0), - result = mean(result), - # != number of simulations in the postseason - games_played = away_wins + home_wins + ties, - away_percentage = (away_wins + 0.5 * ties) / games_played, - home_percentage = (home_wins + 0.5 * ties) / games_played - ) %>% - ungroup() %>% - arrange(week) + # Create the win sequence vector and repeat every win for every sim + # Take this and repeat it for every team + wins_vec <- rep( + seq(0, max(all_teams$games), 0.5), + each = length(unique(all_teams$sim)) + ) %>% + rep(length(unique(all_teams$team))) + + # create sequence of sims and repeat it for every half win and for every team + sims_vec <- rep( + sort(unique(all_teams$sim)), + (max(all_teams$games) * 2 + 1) * length(unique(all_teams$team)) + ) + + team_wins <- data.table( + sim = sims_vec, + team = team_vec, + wins = wins_vec, + key = c("team", "wins") + ) %>% + merge( + all_teams[,list(sim, team, true_wins)], + by = c("sim", "team"), + sort = FALSE + ) + + team_wins <- team_wins[,list( + over_prob = mean(true_wins > wins), + under_prob = mean(true_wins < wins) + ), keyby = c("team", "wins")] + + + ## Game Summary + game_summary <- all_games[,list( + away_wins = sum(result < 0), + home_wins = sum(result > 0), + ties = sum(result == 0), + result = mean(result) + ), keyby = c("game_type", "week", "away_team", "home_team")] + game_summary[, games_played := away_wins + home_wins + ties] + game_summary[,`:=`( + away_percentage = (away_wins + 0.5 * ties) / games_played, + home_percentage = (home_wins + 0.5 * ties) / games_played + )] report("DONE!") @@ -453,11 +368,11 @@ simulate_nfl <- function(nfl_season = NULL, out <- structure( list( - "teams" = all_teams, - "games" = all_games, - "overall" = overall, - "team_wins" = team_wins, - "game_summary" = game_summary, + "teams" = tibble::as_tibble(all_teams), + "games" = tibble::as_tibble(all_games), + "overall" = tibble::as_tibble(overall), + "team_wins" = tibble::as_tibble(team_wins), + "game_summary" = tibble::as_tibble(game_summary), "sim_params" = list( "nfl_season" = nfl_season, "playoff_seeds" = playoff_seeds, @@ -480,3 +395,217 @@ simulate_nfl <- function(nfl_season = NULL, out } + + +default_process_games <- function(teams, games, week_num, ...) { + # teams = teams data + # games = games data + # + # this example estimates at PK/0 and 50% + # estimate = is the median spread expected (positive = home team favored) + # wp = is the probability of the team winning the game + # + # only simulate games through week week_num + # only simulate games with is.na(result) + # result = how many points home team won by + + # round out (away from zero) + round_out <- function(x) { + x[!is.na(x) & x < 0] <- floor(x[!is.na(x) & x < 0]) + x[!is.na(x) & x > 0] <- ceiling(x[!is.na(x) & x > 0]) + return(x) + } + + # get elo if not in teams data already + if (!("elo" %in% colnames(teams))) { + args <- list(...) + if ("elo" %in% names(args)) { + # pull from custom arguments + teams <- teams %>% + dplyr::inner_join(args$elo %>% select(team, elo), by = c("team" = "team")) + } else { + # start everyone at a random default elo + ratings <- tibble( + team = unique(teams$team), + elo = rnorm(length(unique(team)), 1500, 150) + ) + teams <- teams %>% + dplyr::inner_join(ratings, by = "team") + } + } + + # pull ratings from teams data + ratings <- teams %>% select(sim, team, elo) + + # mark estimate, wp, and result for games + games <- games %>% + dplyr::inner_join(ratings, by = c("sim" = "sim", "away_team" = "team")) %>% + dplyr::rename(away_elo = elo) %>% + dplyr::inner_join(ratings, by = c("sim" = "sim", "home_team" = "team")) %>% + dplyr::rename(home_elo = elo) %>% + dplyr::mutate( + elo_diff = home_elo - away_elo, + elo_diff = elo_diff + ifelse(location == "Home", 20, 0), + elo_diff = elo_diff + (home_rest - away_rest) / 7 * 25, + elo_diff = elo_diff * ifelse(game_type == "REG", 1, 1.2), + wp = 1 / (10^(-elo_diff / 400) + 1), + estimate = elo_diff / 25, + result = case_when( + is.na(result) & week == week_num ~ + as.integer(round_out(rnorm(n(), estimate, 13))), + TRUE ~ as.integer(result) + ), + outcome = case_when( + is.na(result) ~ NA_real_, + result > 0 ~ 1, + result < 0 ~ 0, + TRUE ~ 0.5 + ), + elo_input = case_when( + is.na(result) ~ NA_real_, + result > 0 ~ elo_diff * 0.001 + 2.2, + result < 0 ~ -elo_diff * 0.001 + 2.2, + TRUE ~ 1.0, + ), + elo_mult = log(pmax(abs(result), 1) + 1.0) * 2.2 / elo_input, + elo_shift = 20 * elo_mult * (outcome - wp) + ) %>% + dplyr::select( + -away_elo, -home_elo, -elo_diff, -wp, -estimate, + -outcome, -elo_input, -elo_mult + ) + + # apply elo shifts + teams <- teams %>% + dplyr::left_join(games %>% + filter(week == week_num) %>% + select(sim, away_team, elo_shift), + by = c("sim" = "sim", "team" = "away_team") + ) %>% + dplyr::mutate(elo = elo - ifelse(!is.na(elo_shift), elo_shift, 0)) %>% + dplyr::select(-elo_shift) %>% + dplyr::left_join(games %>% + filter(week == week_num) %>% + select(sim, home_team, elo_shift), + by = c("sim" = "sim", "team" = "home_team") + ) %>% + dplyr::mutate(elo = elo + ifelse(!is.na(elo_shift), elo_shift, 0)) %>% + dplyr::select(-elo_shift) + + # remove elo shift + games <- games %>% + dplyr::select(-elo_shift) + + return(list(teams = teams, games = games)) +} + +# rewritten in data.table +default_process_games_dt <- function(teams, games, week_num, ...) { + cli::cli_progress_step( + "Compute week {.val #{week_num}}" + ) + # teams = teams data + # games = games data + # + # this example estimates at PK/0 and 50% + # estimate = is the median spread expected (positive = home team favored) + # wp = is the probability of the team winning the game + # + # only simulate games through week week_num + # only simulate games with is.na(result) + # result = how many points home team won by + + # round out (away from zero) + round_out <- function(x) { + x[!is.na(x) & x < 0] <- floor(x[!is.na(x) & x < 0]) + x[!is.na(x) & x > 0] <- ceiling(x[!is.na(x) & x > 0]) + return(x) + } + + setDT(games, key = c("sim", "week")) + setDT(teams, key = c("sim", "team")) + + # get elo if not in teams data already + if (!("elo" %in% colnames(teams))) { + args <- list(...) + if ("elo" %in% names(args)) { + # pull from custom arguments + ratings <- setDT(args$elo, key = "team") + teams <- merge(teams, ratings[,list(team, elo)]) + } else { + # start everyone at a random default elo + ratings <- data.table( + team = unique(teams$team), + elo = rnorm(length(unique(teams$team)), 1500, 150), + key = "team" + ) + teams <- merge(teams, ratings) + } + } + + # merge elo values to home and away teams + games <- merge(x = games, y = teams[,list(sim, team, away_elo = elo)], + by.x = c("sim", "away_team"), + by.y = c("sim", "team"), + sort = FALSE) + games <- merge(x = games, y = teams[,list(sim, team, home_elo = elo)], + by.x = c("sim", "home_team"), + by.y = c("sim", "team"), + sort = FALSE) + + # create elo diff + games[, elo_diff := home_elo - away_elo + (home_rest - away_rest) / 7 * 25] + # adjust elo diff for location = HOME + games["Home", elo_diff := elo_diff + 20, on = "location"] + # adjust elo_diff for game type = REG + games["REG", elo_diff := elo_diff * 1.2, on = "game_type"] + # create wp and estimate + games[, `:=`(wp = 1 / (10^(-elo_diff / 400) + 1), + estimate = elo_diff / 25)] + # adjust result in current week + games[week_num == week & is.na(result), + result := as.integer(round_out(rnorm(.N, estimate, 13)))] + # compute elo shift + games[, `:=`( + outcome = fcase( + is.na(result), NA_real_, + result > 0, 1, + result < 0, 0, + default = 0.5 + ), + elo_input = fcase( + is.na(result), NA_real_, + result > 0, elo_diff * 0.001 + 2.2, + result < 0, -elo_diff * 0.001 + 2.2, + default = 1.0 + ) + )] + games[, elo_mult := log(pmax(abs(result), 1) + 1.0) * 2.2 / elo_input] + games[, elo_shift := 20 * elo_mult * (outcome - wp)] + + # drop irrelevant columns + drop_cols <- c("away_elo", "home_elo", "elo_diff", "wp", "estimate", + "outcome", "elo_input", "elo_mult") + games[, (drop_cols) := NULL] + + # apply away team elo shift + away_teams <- games[list(week_num), + list(sim, team = away_team, elo_shift = -elo_shift), + on = "week"] + teams <- merge(teams, away_teams, by = c("sim", "team"), all = TRUE) + teams[!is.na(elo_shift), elo := elo + elo_shift] + teams[, elo_shift := NULL] + + # apply home team elo shift + home_teams <- games[list(week_num), + list(sim, team = home_team, elo_shift), + on = "week"] + teams <- merge(teams, home_teams, by = c("sim", "team"), all = TRUE) + teams[!is.na(elo_shift), elo := elo + elo_shift] + teams[, elo_shift := NULL] + + # remove elo shift + games[, elo_shift := NULL] + + list("teams" = teams, "games" = games) +} diff --git a/R/summary_nflseedR.R b/R/summary_nflseedR.R index f151515..f5905c4 100644 --- a/R/summary_nflseedR.R +++ b/R/summary_nflseedR.R @@ -44,9 +44,9 @@ summary.nflseedR_simulation <- function(object, ...){ ) data <- object$overall %>% - mutate( + dplyr::mutate( division = gsub("AFC |NFC ", "", division), - division = case_when( + division = dplyr::case_when( division == "East" ~ "E A S T", division == "North" ~ "N O R T H", division == "South" ~ "S O U T H", @@ -63,23 +63,23 @@ summary.nflseedR_simulation <- function(object, ...){ hide_me <- names(column_is_empty[column_is_empty == FALSE]) afc <- data %>% - filter(conf == "AFC") %>% - select(-conf) %>% - arrange(division, desc(wins), desc(playoff)) + dplyr::filter(conf == "AFC") %>% + dplyr::select(-conf) %>% + dplyr::arrange(division, dplyr::desc(wins), dplyr::desc(playoff)) names(afc) <- paste0("afc_", names(afc)) nfc <- data %>% - filter(conf == "NFC") %>% - select(-conf) %>% - arrange(division, desc(wins), desc(playoff)) + dplyr::filter(conf == "NFC") %>% + dplyr::select(-conf) %>% + dplyr::arrange(division, dplyr::desc(wins), dplyr::desc(playoff)) names(nfc) <- paste0("nfc_", names(nfc)) - tbl <- bind_cols(afc, nfc) + tbl <- dplyr::bind_cols(afc, nfc) tbl %>% - group_by(afc_division) %>% + dplyr::group_by(afc_division) %>% gt::gt() %>% # see below table_theme() %>% @@ -164,13 +164,13 @@ summary.nflseedR_simulation <- function(object, ...){ locations = gt::cells_body(gt::ends_with("team")), fn = function(x){ url <- data.frame(team_abbr = x) %>% - left_join( + dplyr::left_join( nflreadr::load_teams() %>% - filter(!team_abbr %in% c("LAR", "OAK", "SD", "STL")) %>% - select(team_abbr, team_logo_espn), + dplyr::filter(!team_abbr %in% c("LAR", "OAK", "SD", "STL")) %>% + dplyr::select(team_abbr, team_logo_espn), by = "team_abbr" ) %>% - pull(team_logo_espn) + dplyr::pull(team_logo_espn) gt::web_image(url = url, height = 30) }) %>% gt::tab_source_note("nflseedR") %>% @@ -307,5 +307,3 @@ table_colors_negative <- c("white", "#FFF2DFFF", "#FFDFB2FF", "#FFCC7FFF", "#FFB74CFF", "#FFA626FF", "#FF9800FF", "#FA8C00FF", "#F47B00FF", "#EE6C00FF", "#E55100FF" ) - - diff --git a/R/utils.R b/R/utils.R index 94028e1..751795b 100644 --- a/R/utils.R +++ b/R/utils.R @@ -7,25 +7,6 @@ report <- function(msg, .cli_fct(c(format(Sys.time(), '%H:%M:%S'), " | ", msg), ..., .envir = .envir) } -# this makes it so there's two rows per game (one/team) -double_games <- function(g) { - g1 <- g %>% - select(sim, game_type, week, away_team, home_team, result) %>% - rename(team = away_team, opp = home_team) %>% - mutate(result = -1 * result) - g2 <- g %>% - select(sim, game_type, week, away_team, home_team, result) %>% - rename(team = home_team, opp = away_team) - g <- bind_rows(g1, g2) %>% - mutate(outcome = case_when( - result > 0 ~ 1, - result < 0 ~ 0, - result == 0 ~ 0.5, - TRUE ~ NA_real_ - )) - return(g) -} - is_single_digit_numeric <- function(x) is.numeric(x) && length(x) == 1L && !is.na(x) # Identify sessions with sequential future resolving diff --git a/R/zzz.R b/R/zzz.R new file mode 100644 index 0000000..b0b1bf0 --- /dev/null +++ b/R/zzz.R @@ -0,0 +1 @@ +.nflseedR_env <- new.env(parent = emptyenv()) diff --git a/man/compute_division_ranks.Rd b/man/compute_division_ranks.Rd index 3a60e00..aeed601 100644 --- a/man/compute_division_ranks.Rd +++ b/man/compute_division_ranks.Rd @@ -6,7 +6,7 @@ \usage{ compute_division_ranks( games, - teams = NULL, + teams = lifecycle::deprecated(), tiebreaker_depth = 3, .debug = FALSE, h2h = NULL @@ -26,19 +26,8 @@ following variables are required: \item{result}{Equals home score - away score.} }} -\item{teams}{This parameter is optional. If it is \code{NULL} the function -will compute it internally, otherwise it has to be a data frame of all teams -contained in the \code{games} data frame repeated for each simulation ID -(\code{sim}). The following variables are required: -\describe{ -\item{sim}{A simulation ID. Normally 1 - n simulated seasons.} -\item{team}{Team abbreviation of the team (please see -\code{\link{divisions}} for valid team abbreviations).} -\item{conf}{Conference abbreviation of the team (please see -\code{\link{divisions}} for valid team abbreviations).} -\item{division}{Division of the team (please see -\code{\link{divisions}} for valid division names).} -}} +\item{teams}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} This argument is no longer +supported. Instead, the functions computes it internally.} \item{tiebreaker_depth}{A single value equal to 1, 2, or 3. The default is 3. The value controls the depth of tiebreakers that shall be applied. The deepest diff --git a/tests/testthat/games.rds b/tests/testthat/games.rds new file mode 100644 index 0000000..817b3d7 Binary files /dev/null and b/tests/testthat/games.rds differ diff --git a/tests/testthat/helpers.R b/tests/testthat/helpers.R index acf6528..9d6c618 100644 --- a/tests/testthat/helpers.R +++ b/tests/testthat/helpers.R @@ -1,9 +1,16 @@ -load_test_games <- function(){ - g <- nflseedR::load_sharpe_games() +test_dir <- getwd() - if (!nrow(g) > 0) return(tibble::tibble()) +load_test_games <- function(dir = test_dir){ + readRDS(file.path(dir, "games.rds")) +} - g %>% - dplyr::filter(season %in% 2014:2019) %>% - dplyr::select(sim = season, game_type, week, away_team, home_team, result) +load_reference <- function(type = c("div", "conf", "draft"), + dir = test_dir){ + type <- match.arg(type) + file_name <- switch (type, + "div" = "reference_div_ranks.rds", + "conf" = "reference_conf_seeds.rds", + "draft" = "reference_draft_order.rds", + ) + readRDS(file.path(dir, file_name)) } diff --git a/tests/testthat/test-ranks_seeds_draftorder.R b/tests/testthat/test-ranks_seeds_draftorder.R index 58d42b9..2e1947e 100644 --- a/tests/testthat/test-ranks_seeds_draftorder.R +++ b/tests/testthat/test-ranks_seeds_draftorder.R @@ -1,40 +1,35 @@ -source("helpers.R") - test_that("compute_division_ranks() works for multiple seasons", { g <- load_test_games() - skip_if_not(nrow(g) > 0, message = NULL) - ref <- readRDS("reference_div_ranks.rds") + div_ranks <- compute_division_ranks(g) + div_ranks <- div_ranks$standings %>% strip_nflverse_attributes() - div_ranks <- g %>% - compute_division_ranks() + exp <- load_reference("div") - expect_identical(div_ranks$standings, ref) + expect_identical(div_ranks, exp) }) test_that("compute_conference_seeds() works for multiple seasons", { g <- load_test_games() - skip_if_not(nrow(g) > 0, message = NULL) - - ref <- readRDS("reference_conf_seeds.rds") - conf_seeds <- g %>% compute_division_ranks() %>% - compute_conference_seeds(h2h = .$h2h, playoff_seeds = 6) + compute_conference_seeds(playoff_seeds = 6) + conf_seeds <- conf_seeds$standings %>% strip_nflverse_attributes() + + exp <- load_reference("conf") - expect_identical(conf_seeds$standings, ref) + expect_identical(conf_seeds, exp) }) test_that("compute_draft_order() works for multiple seasons", { g <- load_test_games() - skip_if_not(nrow(g) > 0, message = NULL) - - ref <- readRDS("reference_draft_order.rds") - draft_order <- g %>% compute_division_ranks() %>% - compute_conference_seeds(h2h = .$h2h, playoff_seeds = 6) %>% - compute_draft_order(games = g, h2h = .$h2h) + compute_conference_seeds(playoff_seeds = 6) %>% + compute_draft_order(games = g) + draft_order <- strip_nflverse_attributes(draft_order) + + exp <- load_reference("draft") - expect_identical(draft_order, ref) + expect_identical(draft_order, exp) }) diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index 157d666..e48a6fe 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -26,7 +26,7 @@ test_that("double_games works", { result = 33 ) - d <- data.frame( + d <- data.table( sim = c(2020, 2020), game_type = c("REG", "REG"), week = c(16L, 16L), @@ -34,6 +34,30 @@ test_that("double_games works", { opp = c("XYZ", "ABC"), result = c(-33, 33), outcome = c(0, 1) - ) + ) %>% tibble::as_tibble() expect_identical(double_games(g), d) }) + +test_that("h2h works", { + g <- data.frame( + sim = 2020, + game_type = "REG", + week = 16L, + away_team = "ABC", + home_team = "XYZ", + result = 33 + ) + + h2h <- compute_h2h(double_games(g)) + + exp <- data.table( + sim = c(2020, 2020), + team = c("ABC", "XYZ"), + opp = c("XYZ", "ABC"), + h2h_games = 1L, + h2h_wins = c(0, 1), + h2h_played = 1, + key = c("sim", "team", "opp") + ) + expect_identical(h2h, exp) +})