From d3071dada88ae63b97b4bb43134635ffb3049863 Mon Sep 17 00:00:00 2001 From: mrcaseb Date: Fri, 9 Dec 2022 11:40:58 +0100 Subject: [PATCH 01/44] gotta include the environment to be able to evaluate expressions --- R/utils.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/utils.R b/R/utils.R index 6cd0a85..84d90f9 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,7 +1,7 @@ # progress report using rlang to avoid usethis dependency -report <- function(msg, .cli_fct = cli::cli_alert_info) { - .cli_fct(c(format(Sys.time(), '%H:%M:%S'), " | ", msg)) +report <- function(msg, ..., .cli_fct = cli::cli_alert_info, .envir = parent.frame()) { + .cli_fct(c(format(Sys.time(), '%H:%M:%S'), " | ", msg), ..., .envir = .envir) } sim_info <- function(msg) { From 16cfa51a620eb6c4a1f6f3687a7b6a3035380b08 Mon Sep 17 00:00:00 2001 From: mrcaseb Date: Fri, 9 Dec 2022 11:41:37 +0100 Subject: [PATCH 02/44] nflseedR version and finish time are useful sims params --- R/simulate_nfl.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/simulate_nfl.R b/R/simulate_nfl.R index df4753d..61015c1 100644 --- a/R/simulate_nfl.R +++ b/R/simulate_nfl.R @@ -448,7 +448,9 @@ simulate_nfl <- function(nfl_season = NULL, "sims_per_round" = sims_per_round, ".debug" = .debug, "print_summary" = print_summary, - "sim_include" = sim_include + "sim_include" = sim_include, + "nflseedR_version" = utils::packageVersion("nflseedR"), + "finished_at" = Sys.time() ) ), class = "nflseedR_simulation" From adaf81cda9ffe1953efd9f140f437d13845c8755 Mon Sep 17 00:00:00 2001 From: mrcaseb Date: Fri, 9 Dec 2022 11:43:13 +0100 Subject: [PATCH 03/44] avoid future and dplyr in the combining loops this is a big speed bump because dplyr bind_rows is slow at binding lists and data.table is a reverse dependency anyways --- R/simulate_nfl.R | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/R/simulate_nfl.R b/R/simulate_nfl.R index 61015c1..f250530 100644 --- a/R/simulate_nfl.R +++ b/R/simulate_nfl.R @@ -358,16 +358,24 @@ simulate_nfl <- function(nfl_season = NULL, if (isTRUE(.debug)) eval(run) else suppressMessages(eval(run)) if (!is.null(test_week)) { - report(glue( - "Aborting and returning your `process_games` function's results from Week {test_week}" - )) + report( + "Aborting and returning your {.code process_games} function's \\ + results from Week {test_week}" + , wrap = TRUE + ) return(all[[1]]) } report("Combining simulation data") - all_teams <- furrr::future_map_dfr(all, ~ .x$teams) - all_games <- furrr::future_map_dfr(all, ~ .x$games) + # `all` is a list of rounds where every round is containing the dataframes + # "teams" and "games". We loop over the list with purrr (that's not really bad + # because the length of the loop only is the number of rounds) but don't + # convert to a dataframe/tibble because dplyr::bind_rows() is too slow. + # Instead, we bind with data.table afterwards, it's a reverse dependency + # through nflreadr anyways. + all_teams <- data.table::rbindlist(purrr::map(all, ~ .x$teams)) + all_games <- data.table::rbindlist(purrr::map(all, ~ .x$games)) report("Aggregating across simulations") From 1e4705de46f61efb9acc792488609d998e87cb73 Mon Sep 17 00:00:00 2001 From: mrcaseb Date: Fri, 9 Dec 2022 11:43:45 +0100 Subject: [PATCH 04/44] drop readr dependency and use data.table to load the fake schedule --- R/simulate_nfl.R | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/R/simulate_nfl.R b/R/simulate_nfl.R index f250530..bd493b2 100644 --- a/R/simulate_nfl.R +++ b/R/simulate_nfl.R @@ -278,11 +278,10 @@ simulate_nfl <- function(nfl_season = NULL, { fn <- glue::glue("https://github.com/nflverse/nfldata/blob/master/fake_schedule_{nfl_season}.csv?raw=true") tryCatch({ - options(readr.num_columns = 0) - schedule <- readr::read_csv(fn) - sim_info(glue::glue("No actual schedule exists for {nfl_season}, using fake schedule with correct opponents")) + schedule <- data.table::fread(fn) + cli::cli_alert_info("No actual schedule exists for {.val {nfl_season}}, using fake schedule with correct opponents.") }, error = function(cond) { - stop("Unable to locate a schedule for ", nfl_season) + cli::cli_abort("Unable to locate a schedule for {.val {nfl_season}}") }) } From bd764fb49a1c746954677e6bfa1d48b809fc4449 Mon Sep 17 00:00:00 2001 From: mrcaseb Date: Fri, 9 Dec 2022 11:44:02 +0100 Subject: [PATCH 05/44] start message improvements --- R/simulate_nfl.R | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/R/simulate_nfl.R b/R/simulate_nfl.R index bd493b2..2335ef1 100644 --- a/R/simulate_nfl.R +++ b/R/simulate_nfl.R @@ -322,14 +322,20 @@ simulate_nfl <- function(nfl_season = NULL, } if (sim_rounds > 1 && is_sequential()) { - sim_info(c( - "Computation in multiple rounds can be accelerated with parallel processing.", - "You should consider calling a `future::plan()`. Please see the function documentation for further information.", - "Will go on sequentially..." - )) + cli::cli_inform(c( + "i" = "Computation in multiple rounds can be accelerated + with parallel processing.", + "i" = "You should consider calling a {.code future::plan()}. + Please see the function documentation for further information.", + "i" = "Will go on sequentially..." + ), wrap = TRUE + ) } - report(glue("Beginning simulation of {simulations} seasons in {sim_rounds} {ifelse(sim_rounds == 1, 'round', 'rounds')}")) + report( + "Beginning simulation of {.val {simulations}} season{?s} \\ + in {.val {sim_rounds}} round{?s}" + ) p <- progressr::progressor(along = seq_len(sim_rounds)) @@ -434,6 +440,8 @@ simulate_nfl <- function(nfl_season = NULL, ungroup() %>% arrange(week) + report("DONE!") + if (isTRUE(print_summary)) print(overall) out <- structure( From ed5ec00e722f9807c65b4b4b6d4e23c6a42636ee Mon Sep 17 00:00:00 2001 From: mrcaseb Date: Fri, 9 Dec 2022 11:45:01 +0100 Subject: [PATCH 06/44] dev version and dependency updates we need data.table for quick list binding and can use it to replace csv download with readr --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 5e23309..158538d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: nflseedR Title: Functions to Efficiently Simulate and Evaluate NFL Seasons -Version: 1.1.0.9001 +Version: 1.1.0.9002 Authors@R: c(person(given = "Lee", family = "Sharpe", @@ -21,6 +21,7 @@ Imports: cli, crayon, curl, + data.table, dplyr, furrr, future, @@ -30,7 +31,6 @@ Imports: nflreadr (>= 1.1.3), progressr, purrr, - readr, rlang, tibble, tidyr From abe9edd7f1b773fe4bb4bee40ab57e371cb631d4 Mon Sep 17 00:00:00 2001 From: mrcaseb Date: Fri, 9 Dec 2022 11:59:54 +0100 Subject: [PATCH 07/44] namespace --- NAMESPACE | 3 ++- R/nflseedR-package.R | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 3a2643d..48e35d3 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -10,9 +10,10 @@ import(dplyr) import(gsubfn) importFrom(cli,symbol) importFrom(crayon,red) +importFrom(data.table,fread) +importFrom(data.table,rbindlist) importFrom(furrr,furrr_options) importFrom(furrr,future_map) -importFrom(furrr,future_map_dfr) importFrom(future,plan) importFrom(glue,glue) importFrom(glue,glue_collapse) diff --git a/R/nflseedR-package.R b/R/nflseedR-package.R index 7058f4f..06984d9 100644 --- a/R/nflseedR-package.R +++ b/R/nflseedR-package.R @@ -8,7 +8,8 @@ #' @import gsubfn #' @importFrom cli symbol #' @importFrom crayon red -#' @importFrom furrr future_map future_map_dfr furrr_options +#' @importFrom data.table rbindlist fread +#' @importFrom furrr future_map furrr_options #' @importFrom future plan #' @importFrom glue glue glue_collapse #' @importFrom magrittr %>% From 72e96186c47b1f8e45035777ff290d1c0e8b602a Mon Sep 17 00:00:00 2001 From: mrcaseb Date: Fri, 9 Dec 2022 12:09:59 +0100 Subject: [PATCH 08/44] stop > cli_abort --- R/simulate_nfl.R | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/R/simulate_nfl.R b/R/simulate_nfl.R index 2335ef1..5f4f0ac 100644 --- a/R/simulate_nfl.R +++ b/R/simulate_nfl.R @@ -242,18 +242,19 @@ simulate_nfl <- function(nfl_season = NULL, is_single_digit_numeric(simulations), is_single_digit_numeric(sims_per_round) )) { - stop( - "One or more of the parameters `nfl_season`, `tiebreaker_depth`, `test_week`, ", - "`simulations` and `sims_per_round` are not single digit numeric values!" + cli::cli_abort( + "One or more of the parameters {.arg nfl_season}, {.arg tiebreaker_depth}, \\ + {.arg test_week}, {.arg simulations} and {.arg sims_per_round} are not \\ + single digit numeric values!" ) } if (!is.function(process_games)) { - stop("The parameter `process_games` has to be a function!") + cli::cli_abort("The parameter {.arg process_games} has to be a function!") } if (nfl_season < 2002) { - stop("The earliest season that can be simulated is 2002.") + cli::cli_abort("The earliest season that can be simulated is 2002.") } #### LOAD DATA #### From 259b9b34d1d7769c30510ba07b8402fcd116427e Mon Sep 17 00:00:00 2001 From: mrcaseb Date: Fri, 9 Dec 2022 12:10:12 +0100 Subject: [PATCH 09/44] reshape args --- R/utils.R | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/R/utils.R b/R/utils.R index 84d90f9..e762910 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,6 +1,9 @@ # progress report using rlang to avoid usethis dependency -report <- function(msg, ..., .cli_fct = cli::cli_alert_info, .envir = parent.frame()) { +report <- function(msg, + ..., + .cli_fct = cli::cli_alert_info, + .envir = parent.frame()) { .cli_fct(c(format(Sys.time(), '%H:%M:%S'), " | ", msg), ..., .envir = .envir) } From fd01220e471c782fcea2086afc77ade9f0bdf605 Mon Sep 17 00:00:00 2001 From: mrcaseb Date: Fri, 9 Dec 2022 12:10:36 +0100 Subject: [PATCH 10/44] transition to nflreadr --- R/simulate_nfl.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/simulate_nfl.R b/R/simulate_nfl.R index 5f4f0ac..3390d86 100644 --- a/R/simulate_nfl.R +++ b/R/simulate_nfl.R @@ -261,7 +261,7 @@ simulate_nfl <- function(nfl_season = NULL, # load games data report("Loading games data") - schedule <- load_sharpe_games() %>% + schedule <- nflreadr::load_schedules() %>% select( season, game_type, week, away_team, home_team, away_rest, home_rest, location, result From 263a920e258e39a588714c67d8105f6c645c64b3 Mon Sep 17 00:00:00 2001 From: mrcaseb Date: Fri, 9 Dec 2022 13:35:22 +0100 Subject: [PATCH 11/44] it's nowhere used anymore --- R/utils.R | 11 ----------- tests/testthat/test-utils.R | 3 +-- 2 files changed, 1 insertion(+), 13 deletions(-) diff --git a/R/utils.R b/R/utils.R index e762910..31fc1b4 100644 --- a/R/utils.R +++ b/R/utils.R @@ -7,17 +7,6 @@ report <- function(msg, .cli_fct(c(format(Sys.time(), '%H:%M:%S'), " | ", msg), ..., .envir = .envir) } -sim_info <- function(msg) { - rlang::inform( - paste0( - crayon::yellow(cli::symbol$info), - " ", - msg, - collapse = "\n" - ) - ) -} - # this makes it so there's two rows per game (one/team) double_games <- function(g) { g1 <- g %>% diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index f931f83..175fc9c 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -14,10 +14,9 @@ test_that("is_sequential works", { expect_false(is_sequential()) }) -test_that("sim_info and report works", { +test_that("report works", { skip_on_cran() - expect_message(sim_info("this is a message")) expect_message(report("this is a message")) }) From fea11100efe97496f445a431749ce1d671b42364 Mon Sep 17 00:00:00 2001 From: mrcaseb Date: Fri, 9 Dec 2022 13:39:10 +0100 Subject: [PATCH 12/44] replace glue_collapse calls with cli --- R/compute_division_ranks.R | 8 +++----- R/compute_draft_order.R | 8 +++----- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/R/compute_division_ranks.R b/R/compute_division_ranks.R index f4250ba..8bc168e 100644 --- a/R/compute_division_ranks.R +++ b/R/compute_division_ranks.R @@ -86,11 +86,9 @@ compute_division_ranks <- function(games, ) if (!sum(names(games) %in% required_vars, na.rm = TRUE) >= 6 | !is.data.frame(games)) { - stop( - "The argument `games` has to be a data frame including ", - "all of the following variables: ", - glue_collapse(required_vars, sep = ", ", last = " and "), - "!" + cli::cli_abort( + "The argument {.arg games} has to be a data frame including \\ + all of the following variables: {.val {required_vars}}!" ) } diff --git a/R/compute_draft_order.R b/R/compute_draft_order.R index 4633a02..2eb27e9 100644 --- a/R/compute_draft_order.R +++ b/R/compute_draft_order.R @@ -53,11 +53,9 @@ compute_draft_order <- function(teams, ) if (!sum(names(games) %in% required_vars, na.rm = TRUE) >= 6 | !is.data.frame(games)) { - stop( - "The argument `games` has to be a data frame including ", - "all of the following variables: ", - glue_collapse(required_vars, sep = ", ", last = " and "), - "!" + cli::cli_abort( + "The argument {.arg games} has to be a data frame including \\ + all of the following variables: {.val {required_vars}}!" ) } From 3e6876cf57bb8a59469a7a9d57c5e325918ce0e0 Mon Sep 17 00:00:00 2001 From: mrcaseb Date: Fri, 9 Dec 2022 13:45:45 +0100 Subject: [PATCH 13/44] drop glue --- DESCRIPTION | 20 +++++++------------- NAMESPACE | 2 -- R/conference_tiebreaker.R | 12 ++++++------ R/division_tiebreaker.R | 12 ++++++------ R/draft_tiebreaker.R | 10 +++++----- R/nflseedR-package.R | 1 - R/simulate_nfl.R | 6 +++++- data-raw/dependency-helper.R | 3 +-- 8 files changed, 30 insertions(+), 36 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 158538d..7063800 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,17 +1,12 @@ Package: nflseedR -Title: Functions to Efficiently Simulate and Evaluate NFL - Seasons +Title: Functions to Efficiently Simulate and Evaluate NFL Seasons Version: 1.1.0.9002 -Authors@R: - c(person(given = "Lee", - family = "Sharpe", - role = c("aut", "cph")), - person(given = "Sebastian", - family = "Carl", - role = c("cre", "aut"), - email = "mrcaseb@gmail.com")) -Description: A set of functions to simulate National Football - League seasons including the sophisticated tie-breaking procedures. +Authors@R: c( + person("Lee", "Sharpe", role = c("aut", "cph")), + person("Sebastian", "Carl", , "mrcaseb@gmail.com", role = c("cre", "aut")) + ) +Description: A set of functions to simulate National Football League + seasons including the sophisticated tie-breaking procedures. License: MIT + file LICENSE URL: https://nflseedr.com, https://github.com/nflverse/nflseedR BugReports: https://github.com/nflverse/nflseedR/issues @@ -25,7 +20,6 @@ Imports: dplyr, furrr, future, - glue, gsubfn, magrittr, nflreadr (>= 1.1.3), diff --git a/NAMESPACE b/NAMESPACE index 48e35d3..17d4dfe 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -15,8 +15,6 @@ importFrom(data.table,rbindlist) importFrom(furrr,furrr_options) importFrom(furrr,future_map) importFrom(future,plan) -importFrom(glue,glue) -importFrom(glue,glue_collapse) importFrom(magrittr,"%>%") importFrom(progressr,progressor) importFrom(purrr,pluck) diff --git a/R/conference_tiebreaker.R b/R/conference_tiebreaker.R index 5996648..69b2ac7 100644 --- a/R/conference_tiebreaker.R +++ b/R/conference_tiebreaker.R @@ -24,7 +24,7 @@ break_conference_ties <- function(u, r, h2h, tb_depth, .debug = FALSE) { # if not all division winners, reduce to best per division if (tied %>% filter(div_winner) %>% nrow() == 0) { - if (isTRUE(.debug)) report(glue("CONF ({min_tied}): Best-in-division reduction")) + if (isTRUE(.debug)) report("CONF ({min_tied}): Best-in-division reduction") list[u, tied] <- tied %>% group_by(sim, conf, division) %>% mutate(value = case_when( @@ -39,7 +39,7 @@ break_conference_ties <- function(u, r, h2h, tb_depth, .debug = FALSE) { } # head-to-head sweep - if (isTRUE(.debug)) report(glue("CONF ({min_tied}): Head-to-head Sweep")) + if (isTRUE(.debug)) report("CONF ({min_tied}): Head-to-head Sweep") list[u, tied] <- tied %>% inner_join(tied %>% select(sim, conf, team, div_winner, div_best_left, win_pct), by = c("sim", "conf", "div_winner", "div_best_left", "win_pct"), @@ -66,7 +66,7 @@ break_conference_ties <- function(u, r, h2h, tb_depth, .debug = FALSE) { if (tied %>% filter(tied_teams >= min_tied) %>% nrow() == 0) next # conference record - if (isTRUE(.debug)) report(glue("CONF ({min_tied}): Conference Record")) + if (isTRUE(.debug)) report("CONF ({min_tied}): Conference Record") list[u, tied] <- tied %>% mutate(value = case_when( tied_teams < min_tied ~ NA_real_, @@ -79,7 +79,7 @@ break_conference_ties <- function(u, r, h2h, tb_depth, .debug = FALSE) { if (tied %>% filter(tied_teams >= min_tied) %>% nrow() == 0) next # common games - if (isTRUE(.debug)) report(glue("CONF ({min_tied}): Common Record")) + if (isTRUE(.debug)) report("CONF ({min_tied}): Common Record") list[u, tied] <- tied %>% inner_join(h2h, by = c("sim", "team")) %>% filter(h2h_played == 1) %>% @@ -100,7 +100,7 @@ break_conference_ties <- function(u, r, h2h, tb_depth, .debug = FALSE) { if (tied %>% filter(tied_teams >= min_tied) %>% nrow() == 0) next # strength of victory - if (isTRUE(.debug)) report(glue("CONF ({min_tied}): Strength of Victory")) + if (isTRUE(.debug)) report("CONF ({min_tied}): Strength of Victory") list[u, tied] <- tied %>% mutate(value = case_when( tied_teams < min_tied ~ NA_real_, @@ -112,7 +112,7 @@ break_conference_ties <- function(u, r, h2h, tb_depth, .debug = FALSE) { if (tied %>% filter(tied_teams >= min_tied) %>% nrow() == 0) next # strength of schedule - if (isTRUE(.debug)) report(glue("CONF ({min_tied}): Strength of Schedule")) + if (isTRUE(.debug)) report("CONF ({min_tied}): Strength of Schedule") list[u, tied] <- tied %>% mutate(value = case_when( tied_teams < min_tied ~ NA_real_, diff --git a/R/division_tiebreaker.R b/R/division_tiebreaker.R index 3608e95..b873061 100644 --- a/R/division_tiebreaker.R +++ b/R/division_tiebreaker.R @@ -23,7 +23,7 @@ break_division_ties <- function(u, r, h2h, tb_depth, .debug = FALSE) { if (tied %>% filter(tied_teams >= min_tied) %>% nrow() == 0) next # head-to-head - if (isTRUE(.debug)) report(glue("DIV ({min_tied}): Head-to-head")) + if (isTRUE(.debug)) report("DIV ({min_tied}): Head-to-head") list[u, tied] <- tied %>% inner_join(tied %>% select(sim, division, team, win_pct), by = c("sim", "division", "win_pct"), @@ -45,7 +45,7 @@ break_division_ties <- function(u, r, h2h, tb_depth, .debug = FALSE) { if (tied %>% filter(tied_teams >= min_tied) %>% nrow() == 0) next # division record - if (isTRUE(.debug)) report(glue("DIV ({min_tied}): Division Record")) + if (isTRUE(.debug)) report("DIV ({min_tied}): Division Record") list[u, tied] <- tied %>% mutate(value = case_when( tied_teams < min_tied ~ NA_real_, @@ -58,7 +58,7 @@ break_division_ties <- function(u, r, h2h, tb_depth, .debug = FALSE) { if (tied %>% filter(tied_teams >= min_tied) %>% nrow() == 0) next # common games - if (isTRUE(.debug)) report(glue("DIV ({min_tied}): Common Record")) + if (isTRUE(.debug)) report("DIV ({min_tied}): Common Record") list[u, tied] <- tied %>% inner_join(h2h, by = c("sim", "team")) %>% filter(h2h_played == 1) %>% @@ -78,7 +78,7 @@ break_division_ties <- function(u, r, h2h, tb_depth, .debug = FALSE) { if (tied %>% filter(tied_teams >= min_tied) %>% nrow() == 0) next # conference record - if (isTRUE(.debug)) report(glue("DIV ({min_tied}): Conference Record")) + if (isTRUE(.debug)) report("DIV ({min_tied}): Conference Record") list[u, tied] <- tied %>% mutate(value = case_when( tied_teams < min_tied ~ NA_real_, @@ -90,7 +90,7 @@ break_division_ties <- function(u, r, h2h, tb_depth, .debug = FALSE) { if (tied %>% filter(tied_teams >= min_tied) %>% nrow() == 0) next # strength of victory - if (isTRUE(.debug)) report(glue("DIV ({min_tied}): Strength of Victory")) + if (isTRUE(.debug)) report("DIV ({min_tied}): Strength of Victory") list[u, tied] <- tied %>% mutate(value = case_when( tied_teams < min_tied ~ NA_real_, @@ -102,7 +102,7 @@ break_division_ties <- function(u, r, h2h, tb_depth, .debug = FALSE) { if (tied %>% filter(tied_teams >= min_tied) %>% nrow() == 0) next # strength of schedule - if (isTRUE(.debug)) report(glue("DIV ({min_tied}): Strength of Schedule")) + if (isTRUE(.debug)) report("DIV ({min_tied}): Strength of Schedule") list[u, tied] <- tied %>% mutate(value = case_when( tied_teams < min_tied ~ NA_real_, diff --git a/R/draft_tiebreaker.R b/R/draft_tiebreaker.R index 9974731..001b83c 100644 --- a/R/draft_tiebreaker.R +++ b/R/draft_tiebreaker.R @@ -20,7 +20,7 @@ break_draft_ties <- function(u, r, h2h, tb_depth, .debug = FALSE) { if (tied %>% filter(tied_teams >= min_tied) %>% nrow() == 0) next # divisional tiebreakers - if (isTRUE(.debug)) report(glue("DRAFT: Divisional Rank")) + if (isTRUE(.debug)) report("DRAFT: Divisional Rank") list[u, tied] <- tied %>% group_by(sim) %>% mutate(value = case_when( @@ -34,7 +34,7 @@ break_draft_ties <- function(u, r, h2h, tb_depth, .debug = FALSE) { if (tied %>% filter(tied_teams >= min_tied) %>% nrow() == 0) next # conference tiebreakers - if (isTRUE(.debug)) report(glue("DRAFT: Conference Rank")) + if (isTRUE(.debug)) report("DRAFT: Conference Rank") list[u, tied] <- tied %>% mutate( div_winner = NA, # we don't care about div winners here @@ -59,7 +59,7 @@ break_draft_ties <- function(u, r, h2h, tb_depth, .debug = FALSE) { if (tied %>% filter(tied_teams >= min_tied) %>% nrow() == 0) next # head-to-head sweep - if (isTRUE(.debug)) report(glue("DRAFT: Head-to-head Sweep")) + if (isTRUE(.debug)) report("DRAFT: Head-to-head Sweep") list[u, tied] <- tied %>% inner_join(tied %>% select(sim, team), by = c("sim"), suffix = c("", "_opp")) %>% rename(opp = team_opp) %>% @@ -80,7 +80,7 @@ break_draft_ties <- function(u, r, h2h, tb_depth, .debug = FALSE) { if (tied %>% filter(tied_teams >= min_tied) %>% nrow() == 0) next # common games - if (isTRUE(.debug)) report(glue("DRAFT: Common Record")) + if (isTRUE(.debug)) report("DRAFT: Common Record") list[u, tied] <- tied %>% inner_join(h2h, by = c("sim", "team")) %>% filter(h2h_played == 1) %>% @@ -100,7 +100,7 @@ break_draft_ties <- function(u, r, h2h, tb_depth, .debug = FALSE) { if (tied %>% filter(tied_teams >= min_tied) %>% nrow() == 0) next # strength of victory - if (isTRUE(.debug)) report(glue("DRAFT: Strength of Victory")) + if (isTRUE(.debug)) report("DRAFT: Strength of Victory") list[u, tied] <- tied %>% mutate(value = sov) %>% process_draft_ties(u, r) diff --git a/R/nflseedR-package.R b/R/nflseedR-package.R index 06984d9..8b6c9d5 100644 --- a/R/nflseedR-package.R +++ b/R/nflseedR-package.R @@ -11,7 +11,6 @@ #' @importFrom data.table rbindlist fread #' @importFrom furrr future_map furrr_options #' @importFrom future plan -#' @importFrom glue glue glue_collapse #' @importFrom magrittr %>% #' @importFrom progressr progressor #' @importFrom purrr pluck diff --git a/R/simulate_nfl.R b/R/simulate_nfl.R index 3390d86..a94b5f9 100644 --- a/R/simulate_nfl.R +++ b/R/simulate_nfl.R @@ -277,7 +277,11 @@ simulate_nfl <- function(nfl_season = NULL, if (nrow(schedule) == 0) { - fn <- glue::glue("https://github.com/nflverse/nfldata/blob/master/fake_schedule_{nfl_season}.csv?raw=true") + fn <- paste0( + "https://github.com/nflverse/nfldata/blob/master/fake_schedule_", + nfl_season, + ".csv?raw=true" + ) tryCatch({ schedule <- data.table::fread(fn) cli::cli_alert_info("No actual schedule exists for {.val {nfl_season}}, using fake schedule with correct opponents.") diff --git a/data-raw/dependency-helper.R b/data-raw/dependency-helper.R index 93ef887..213157a 100644 --- a/data-raw/dependency-helper.R +++ b/data-raw/dependency-helper.R @@ -5,7 +5,6 @@ imports <- c( "crayon", "cli", "dplyr", - "glue", "gsubfn", "purrr", "tidyr", @@ -13,7 +12,7 @@ imports <- c( "furrr", "progressr", "future", - "readr" + "data.table" ) purrr::walk(imports, usethis::use_package, "Imports") usethis::use_tidy_description() From ff6521b3a2e3da9b9ae58bd2b5a52e1ab5e75522 Mon Sep 17 00:00:00 2001 From: mrcaseb Date: Fri, 9 Dec 2022 13:48:13 +0100 Subject: [PATCH 14/44] drop crayon --- DESCRIPTION | 1 - NAMESPACE | 1 - R/nflseedR-package.R | 1 - data-raw/dependency-helper.R | 1 - 4 files changed, 4 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 7063800..927e288 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -14,7 +14,6 @@ Depends: R (>= 3.5.0) Imports: cli, - crayon, curl, data.table, dplyr, diff --git a/NAMESPACE b/NAMESPACE index 17d4dfe..3618dc6 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -9,7 +9,6 @@ export(simulate_nfl) import(dplyr) import(gsubfn) importFrom(cli,symbol) -importFrom(crayon,red) importFrom(data.table,fread) importFrom(data.table,rbindlist) importFrom(furrr,furrr_options) diff --git a/R/nflseedR-package.R b/R/nflseedR-package.R index 8b6c9d5..88b1831 100644 --- a/R/nflseedR-package.R +++ b/R/nflseedR-package.R @@ -7,7 +7,6 @@ #' @import dplyr #' @import gsubfn #' @importFrom cli symbol -#' @importFrom crayon red #' @importFrom data.table rbindlist fread #' @importFrom furrr future_map furrr_options #' @importFrom future plan diff --git a/data-raw/dependency-helper.R b/data-raw/dependency-helper.R index 213157a..0844eda 100644 --- a/data-raw/dependency-helper.R +++ b/data-raw/dependency-helper.R @@ -2,7 +2,6 @@ imports <- c( "magrittr", "rlang", - "crayon", "cli", "dplyr", "gsubfn", From 33109f93828743a345db17cec1b11183b04f8230 Mon Sep 17 00:00:00 2001 From: mrcaseb Date: Fri, 9 Dec 2022 13:56:54 +0100 Subject: [PATCH 15/44] drop curl and use nflreadr to load schedules --- DESCRIPTION | 1 - NAMESPACE | 1 + R/load_sharpe_games.R | 17 ++++++----------- man/{load_sharpe_games.Rd => load_schedules.Rd} | 17 +++++++++++++++-- 4 files changed, 22 insertions(+), 14 deletions(-) rename man/{load_sharpe_games.Rd => load_schedules.Rd} (93%) diff --git a/DESCRIPTION b/DESCRIPTION index 927e288..2ef0a42 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -14,7 +14,6 @@ Depends: R (>= 3.5.0) Imports: cli, - curl, data.table, dplyr, furrr, diff --git a/NAMESPACE b/NAMESPACE index 3618dc6..32fdc03 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,6 +4,7 @@ S3method(summary,nflseedR_simulation) export(compute_conference_seeds) export(compute_division_ranks) export(compute_draft_order) +export(load_schedules) export(load_sharpe_games) export(simulate_nfl) import(dplyr) diff --git a/R/load_sharpe_games.R b/R/load_sharpe_games.R index cebf8b2..3836036 100644 --- a/R/load_sharpe_games.R +++ b/R/load_sharpe_games.R @@ -5,6 +5,8 @@ #' function is a convenient helper to download the file into memory without #' having to remember the correct url. #' +#' @inheritDotParams nflreadr::load_schedules +#' @seealso The internally called function [nflreadr::load_schedules()] #' @examples #' \donttest{ #' games <- load_sharpe_games() @@ -90,15 +92,8 @@ #' \item{stadium}{Name of the stadium.} #' } #' @export -load_sharpe_games <- function(){ - fetched <- try(curl::curl_fetch_memory("https://github.com/nflverse/nfldata/blob/master/data/games.rds?raw=true"), silent = TRUE) - if (inherits(fetched, "try-error") || fetched$status_code != 200) return(tibble::tibble()) - read_raw_rds(fetched$content) -} +load_schedules <- function(...) nflreadr::load_schedules(...) -read_raw_rds <- function(raw) { - con <- gzcon(rawConnection(raw)) - ret <- readRDS(con) - close(con) - return(ret) -} +#' @export +#' @rdname load_schedules +load_sharpe_games <- load_schedules diff --git a/man/load_sharpe_games.Rd b/man/load_schedules.Rd similarity index 93% rename from man/load_sharpe_games.Rd rename to man/load_schedules.Rd index e6b1f9c..a8571dd 100644 --- a/man/load_sharpe_games.Rd +++ b/man/load_schedules.Rd @@ -1,10 +1,20 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/load_sharpe_games.R -\name{load_sharpe_games} +\name{load_schedules} +\alias{load_schedules} \alias{load_sharpe_games} \title{Load Lee Sharpe's Games File} \usage{ -load_sharpe_games() +load_schedules(...) + +load_sharpe_games(...) +} +\arguments{ +\item{...}{ + Arguments passed on to \code{\link[nflreadr:load_schedules]{nflreadr::load_schedules}} + \describe{ + \item{\code{seasons}}{a numeric vector of seasons to return, default \code{TRUE} returns all available data.} + }} } \value{ A data frame containing the following variables for all NFL games @@ -95,3 +105,6 @@ future::plan("sequential") } } } +\seealso{ +The internally called function \code{\link[nflreadr:load_schedules]{nflreadr::load_schedules()}} +} From bea866105983d89c9007cdd919463fb12f06e85f Mon Sep 17 00:00:00 2001 From: mrcaseb Date: Fri, 9 Dec 2022 15:41:28 +0100 Subject: [PATCH 16/44] more message improvements and games data attributes modification for consistency --- R/compute_conference_seeds.R | 26 +++++++++------ R/compute_division_ranks.R | 13 +++++--- R/compute_draft_order.R | 35 +++++++++++--------- R/utils.R | 9 +++++ tests/testthat/test-ranks_seeds_draftorder.R | 18 +++++----- 5 files changed, 60 insertions(+), 41 deletions(-) diff --git a/R/compute_conference_seeds.R b/R/compute_conference_seeds.R index 7d52bd1..b7e7f56 100644 --- a/R/compute_conference_seeds.R +++ b/R/compute_conference_seeds.R @@ -39,25 +39,26 @@ compute_conference_seeds <- function(teams, playoff_seeds = 7) { # catch invalid input if (!isTRUE(tiebreaker_depth %in% 1:3)) { - stop( - "The argument `tiebreaker_depth` has to be", - "a single value in the range of 1-3!" + cli::cli_abort( + "The argument {.arg tiebreaker_depth} has to be \\ + a single value in the range of 1-3!" ) } if (!is_tibble(teams)) teams <- teams$standings if (!any((names(teams) %in% "div_rank")) | !is.data.frame(teams)) { - stop( - "The argument `teams` has to be a data frame including ", - "the variable `div_rank` as computed by `compute_division_ranks()`!" + cli::cli_abort( + "The argument {.arg teams} has to be a data frame including \\ + the variable {.val div_rank} as computed by {.fn compute_division_ranks}!" ) } if(is.null(h2h) & tiebreaker_depth > TIEBREAKERS_NONE){ - stop("You asked for tiebreakers but the argument `h2h` is NULL. ", - "Did you forget to pass the `h2h` data frame? It is computed with the ", - "function `compute_division_ranks()`." + cli::cli_abort( + "You asked for tiebreakers but the argument {.arg h2h} is {.val NULL}. \\ + Did you forget to pass the {.val h2h} data frame? It is computed with \\ + the function {.fn compute_division_ranks}." ) } @@ -67,7 +68,7 @@ compute_conference_seeds <- function(teams, # seed loop for (seed_num in seq_len(playoff_seeds)) { - report(paste0("Calculating seed #", seed_num)) + report("Calculating seed #{seed_num}") # find teams at this seed update <- teams %>% @@ -96,5 +97,8 @@ compute_conference_seeds <- function(teams, mutate(exit = ifelse(is.na(seed), max_reg_week, NA_real_)) %>% select(-max_reg_week) - return(list(standings = teams, h2h = h2h)) + list( + "standings" = tibble::as_tibble(teams), + "h2h" = tibble::as_tibble(h2h) + ) } diff --git a/R/compute_division_ranks.R b/R/compute_division_ranks.R index 8bc168e..1dda54d 100644 --- a/R/compute_division_ranks.R +++ b/R/compute_division_ranks.R @@ -70,9 +70,9 @@ compute_division_ranks <- function(games, h2h = NULL) { # catch invalid input if (!isTRUE(tiebreaker_depth %in% 1:3)) { - stop( - "The argument `tiebreaker_depth` has to be", - "a single value in the range of 1-3!" + cli::cli_abort( + "The argument {.arg tiebreaker_depth} has to be \\ + a single value in the range of 1-3!" ) } @@ -190,7 +190,7 @@ compute_division_ranks <- function(games, while (any(is.na(teams$div_rank))) { # increment division rank dr <- dr + 1 - report(paste0("Calculating division rank #", dr)) + report("Calculating division rank #{dr}") # update teams with this rank update <- teams %>% @@ -213,5 +213,8 @@ compute_division_ranks <- function(games, teams <- teams %>% mutate(max_reg_week = max_reg_week) - return(list(standings = teams, h2h = h2h)) + list( + "standings" = tibble::as_tibble(teams), + "h2h" = tibble::as_tibble(h2h) + ) } diff --git a/R/compute_draft_order.R b/R/compute_draft_order.R index 2eb27e9..7f167e2 100644 --- a/R/compute_draft_order.R +++ b/R/compute_draft_order.R @@ -60,25 +60,27 @@ compute_draft_order <- function(teams, } if (!any(games$game_type %in% "SB")) { - stop( - "Can't compute draft order for an incomplete season. It looks like the ", - "`games` data frame is missing the game_type 'SB'!" + cli::cli_abort( + "Can't compute draft order for an incomplete season. It looks like the \\ + {.arg games} dataframe is missing the game_type {.val SB}!" ) } else if (any(is.na(games$result[games$game_type == "SB"]))){ - stop( - "Can't compute draft order for an incomplete season. It looks like the ", - "`games` data frame is missing the result for game_type 'SB'!" + cli::cli_abort( + "Can't compute draft order for an incomplete season. It looks like the \\ + {.arg games} dataframe is missing the result for game_type {.val SB}!" ) } if (is.null(h2h) & tiebreaker_depth > TIEBREAKERS_NONE) { - stop( - "You asked for tiebreakers but the argument `h2h` is NULL. ", - "Did you forget to pass the `h2h` data frame? It is computed with the ", - "function `compute_division_ranks()`." + cli::cli_abort( + "You asked for tiebreakers but the argument {.arg h2h} is {.val NULL}. \\ + Did you forget to pass the {.val h2h} data frame? It is computed with \\ + the function {.fn compute_division_ranks}." ) } + games <- strip_nflverse_attributes(games) + if (any(is.na(teams$exit))){ # week tracker week_num <- games %>% @@ -171,7 +173,7 @@ compute_draft_order <- function(teams, for (do_num in rev(seq_len(max_do_num))) { # progress - report(paste0("Calculating draft order #", do_num)) + report("Calculating draft order #{do_num}") # teams we can update update <- teams %>% @@ -193,12 +195,13 @@ compute_draft_order <- function(teams, # playoff error? if (any(is.na(teams$draft_order))) { - stop( - "The playoff games did not function normally. Make sure that either `fresh_season` ", - "or `fresh_playoffs` to `TRUE`, or have playoff_seeds match the correct number of ", - "seeds for the season being simulated." + cli::cli_abort( + "The playoff games did not function normally. Make sure to set either \\ + {.arg fresh_season} or {.arg fresh_playoffs} to {.val TRUE}, or have \\ + {.arg playoff_seeds} match the correct number of seeds for the season \\ + being simulated." ) } - return(teams) + tibble::as_tibble(teams) } diff --git a/R/utils.R b/R/utils.R index 31fc1b4..2e53ab9 100644 --- a/R/utils.R +++ b/R/utils.R @@ -30,3 +30,12 @@ is_single_digit_numeric <- function(x) is.numeric(x) && length(x) == 1L && !is.n # Identify sessions with sequential future resolving is_sequential <- function() inherits(future::plan(), "sequential") + +# strip nflverse attributes from games dataframe as they are misleading +# .internal.selfref is a data.table attribute that is not necessary in this case +strip_nflverse_attributes <- function(df){ + input_attrs <- attributes(df) |> names() + input_remove <- input_attrs[grepl("nflverse|.internal.selfref", input_attrs)] + attributes(df)[input_remove] <- NULL + df +} diff --git a/tests/testthat/test-ranks_seeds_draftorder.R b/tests/testthat/test-ranks_seeds_draftorder.R index b8d48f2..58d42b9 100644 --- a/tests/testthat/test-ranks_seeds_draftorder.R +++ b/tests/testthat/test-ranks_seeds_draftorder.R @@ -7,9 +7,9 @@ test_that("compute_division_ranks() works for multiple seasons", { ref <- readRDS("reference_div_ranks.rds") div_ranks <- g %>% - nflseedR::compute_division_ranks() + compute_division_ranks() - expect_identical(ref, div_ranks$standings) + expect_identical(div_ranks$standings, ref) }) test_that("compute_conference_seeds() works for multiple seasons", { @@ -19,10 +19,10 @@ test_that("compute_conference_seeds() works for multiple seasons", { ref <- readRDS("reference_conf_seeds.rds") conf_seeds <- g %>% - nflseedR::compute_division_ranks() %>% - nflseedR::compute_conference_seeds(h2h = .$h2h, playoff_seeds = 6) + compute_division_ranks() %>% + compute_conference_seeds(h2h = .$h2h, playoff_seeds = 6) - expect_identical(ref, conf_seeds$standings) + expect_identical(conf_seeds$standings, ref) }) test_that("compute_draft_order() works for multiple seasons", { @@ -32,9 +32,9 @@ test_that("compute_draft_order() works for multiple seasons", { ref <- readRDS("reference_draft_order.rds") draft_order <- g %>% - nflseedR::compute_division_ranks() %>% - nflseedR::compute_conference_seeds(h2h = .$h2h, playoff_seeds = 6) %>% - nflseedR::compute_draft_order(games = g, h2h = .$h2h) + compute_division_ranks() %>% + compute_conference_seeds(h2h = .$h2h, playoff_seeds = 6) %>% + compute_draft_order(games = g, h2h = .$h2h) - expect_identical(ref, draft_order) + expect_identical(draft_order, ref) }) From 0fa683ff1fc31be2725c9315555d65bc5b45c1f1 Mon Sep 17 00:00:00 2001 From: mrcaseb Date: Fri, 9 Dec 2022 15:43:53 +0100 Subject: [PATCH 17/44] avoid pipe --- R/utils.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/utils.R b/R/utils.R index 2e53ab9..94028e1 100644 --- a/R/utils.R +++ b/R/utils.R @@ -34,7 +34,7 @@ is_sequential <- function() inherits(future::plan(), "sequential") # strip nflverse attributes from games dataframe as they are misleading # .internal.selfref is a data.table attribute that is not necessary in this case strip_nflverse_attributes <- function(df){ - input_attrs <- attributes(df) |> names() + input_attrs <- names(attributes(df)) input_remove <- input_attrs[grepl("nflverse|.internal.selfref", input_attrs)] attributes(df)[input_remove] <- NULL df From 52dd49a13792893874a6abfcb4e91046d5145850 Mon Sep 17 00:00:00 2001 From: mrcaseb Date: Fri, 9 Dec 2022 15:50:21 +0100 Subject: [PATCH 18/44] it's better to test this stuff --- tests/testthat/test-utils.R | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index 175fc9c..157d666 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -1,6 +1,4 @@ test_that("is_single_digit_numeric works", { - skip_on_cran() - expect_true(is_single_digit_numeric(1234)) expect_false(is_single_digit_numeric(c(1, 2, 3, 4))) }) @@ -15,14 +13,10 @@ test_that("is_sequential works", { }) test_that("report works", { - skip_on_cran() - expect_message(report("this is a message")) }) test_that("double_games works", { - skip_on_cran() - g <- data.frame( sim = 2020, game_type = "REG", From a93f6b47255a1f6c12e03859209711a1071d4e71 Mon Sep 17 00:00:00 2001 From: mrcaseb Date: Fri, 9 Dec 2022 16:08:15 +0100 Subject: [PATCH 19/44] fix broken tail method in vignette --- vignettes/articles/nflseedR.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/articles/nflseedR.Rmd b/vignettes/articles/nflseedR.Rmd index 24a1609..24e65d3 100644 --- a/vignettes/articles/nflseedR.Rmd +++ b/vignettes/articles/nflseedR.Rmd @@ -45,7 +45,7 @@ options(warn = -1) ```{r} games <- nflseedR::load_sharpe_games() -games %>% utils::tail(20) %>% knitr::kable() +games %>% dplyr::slice_tail(n = 20) %>% knitr::kable() ``` This pulls game information from the games.rds file (equivalent to the games.csv file) from From e7c035631a8f1c60b775b871a54fbb5a74cc5b37 Mon Sep 17 00:00:00 2001 From: mrcaseb Date: Fri, 9 Dec 2022 16:18:17 +0100 Subject: [PATCH 20/44] news bullets --- NEWS.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/NEWS.md b/NEWS.md index 76e169b..503123a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,7 +1,9 @@ # nflseedR (development version) -* `simulate_nfl()` gained the new argument `sim_include` to allow more access to what is actually being simulated. This makes it possible skip playoff simulation or the (possibly heavy) computation of draft order. -* The `summary` method `summary.nflseedR_simulation()` now hides columns where all values are `NA`. This is useful if `simulate_nfl()` skips the postseason or draftorder. The method also reformats the number of simulations in the subtitle, e.g. from "10000" to "10k" (this requires scales >= 1.2.0, but it is a good idea to update scales anyways). +* `simulate_nfl()` gained the new argument `sim_include` to allow more access to what is actually being simulated. This makes it possible skip playoff simulation or the (possibly heavy) computation of draft order. (#34) +* The `summary` method `summary.nflseedR_simulation()` now hides columns where all values are `NA`. This is useful if `simulate_nfl()` skips the postseason or draft order. The method also reformats the number of simulations in the subtitle, e.g. from "10000" to "10k" (this requires scales >= 1.2.0, but it is a good idea to update scales anyways). (#35) +* `simulate_nfl()` now uses data.table to combine simulation rounds data. This is a significant performance improvement. (#36) +* Lots of internal improvements to reduce package dependencies and messaging. (#36) # nflseedR 1.1.0 From e2b352ea3e8684854312e2106f6e7a9a7203e7c1 Mon Sep 17 00:00:00 2001 From: mrcaseb Date: Fri, 9 Dec 2022 16:21:42 +0100 Subject: [PATCH 21/44] news update --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 503123a..506b38f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,7 +2,7 @@ * `simulate_nfl()` gained the new argument `sim_include` to allow more access to what is actually being simulated. This makes it possible skip playoff simulation or the (possibly heavy) computation of draft order. (#34) * The `summary` method `summary.nflseedR_simulation()` now hides columns where all values are `NA`. This is useful if `simulate_nfl()` skips the postseason or draft order. The method also reformats the number of simulations in the subtitle, e.g. from "10000" to "10k" (this requires scales >= 1.2.0, but it is a good idea to update scales anyways). (#35) -* `simulate_nfl()` now uses data.table to combine simulation rounds data. This is a significant performance improvement. (#36) +* `simulate_nfl()` now uses data.table to combine simulation rounds data. This is a significant performance improvement. The returned list `"sim_params"` now includes the package version of nflseedR (for debugging) and the current system time when the simulation was finished. (#36) * Lots of internal improvements to reduce package dependencies and messaging. (#36) # nflseedR 1.1.0 From 19cdfba9aa31eb5c6fc41b810e7af62376d34f28 Mon Sep 17 00:00:00 2001 From: mrcaseb Date: Thu, 15 Dec 2022 18:24:56 +0100 Subject: [PATCH 22/44] avoid repeated double_games calls --- R/sim_helper.R | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/R/sim_helper.R b/R/sim_helper.R index 757b108..c3335b4 100644 --- a/R/sim_helper.R +++ b/R/sim_helper.R @@ -266,16 +266,24 @@ simulate_round <- function(sim_round, # process any new games return_value <- - simulate_week(teams, games, week_num, test_week, ...) + simulate_week(teams = teams, + games = games, + week_num = week_num, + process_games = process_games, + test_week = test_week, + .debug = .debug, + ...) if (!is.null(test_week) && week_num == test_week) { return(return_value) } list[teams, games] <- return_value - # record losers - teams <- games %>% + week_games_doubled <- games %>% filter(week == week_num) %>% - double_games() %>% + double_games() + + # record losers + teams <- week_games_doubled %>% filter(outcome == 0) %>% select(sim, team, outcome) %>% right_join(teams, by = c("sim", "team")) %>% @@ -285,9 +293,7 @@ simulate_round <- function(sim_round, # if super bowl, record winner if (any(playoff_teams$conf == "SB")) { # super bowl winner exit is +1 to SB week - teams <- games %>% - filter(week == week_num) %>% - double_games() %>% + teams <- week_games_doubled %>% filter(outcome == 1) %>% select(sim, team, outcome) %>% right_join(teams, by = c("sim", "team")) %>% @@ -296,9 +302,7 @@ simulate_round <- function(sim_round, } # filter to winners or byes - playoff_teams <- games %>% - filter(week == week_num) %>% - double_games() %>% + playoff_teams <- week_games_doubled %>% right_join(playoff_teams, by = c("sim", "team")) %>% filter(is.na(result) | result > 0) %>% select(sim, conf, seed, team) %>% From f4fe2d68f8ccefcf633876f9fb705687e065b10c Mon Sep 17 00:00:00 2001 From: mrcaseb Date: Thu, 15 Dec 2022 18:25:23 +0100 Subject: [PATCH 23/44] catch data.tables --- R/sim_helper.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/sim_helper.R b/R/sim_helper.R index c3335b4..8760a5a 100644 --- a/R/sim_helper.R +++ b/R/sim_helper.R @@ -336,7 +336,7 @@ simulate_round <- function(sim_round, } else { if (!is_tibble(teams)) teams <- teams$standings teams$draft_order <- NA_real_ - teams <- teams %>% + teams <- tibble::as_tibble(teams) %>% dplyr::select( dplyr::any_of(c( "sim", "team", "conf", "division", "games", From 17bea19c236f5b524b25328355801a0009e869cc Mon Sep 17 00:00:00 2001 From: mrcaseb Date: Thu, 15 Dec 2022 18:25:55 +0100 Subject: [PATCH 24/44] import data.table, avoid function maksing through dplyr drop tidyr --- R/nflseedR-package.R | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/R/nflseedR-package.R b/R/nflseedR-package.R index 88b1831..b4121b4 100644 --- a/R/nflseedR-package.R +++ b/R/nflseedR-package.R @@ -4,10 +4,12 @@ # The following block is used by usethis to automatically manage # roxygen namespace tags. Modify with care! ## usethis namespace: start -#' @import dplyr +#' @import data.table #' @import gsubfn +#' @importFrom dplyr select mutate rename left_join inner_join n arrange group_by +#' @importFrom dplyr ungroup filter case_when summarize pull right_join everything +#' @importFrom dplyr slice bind_rows row_number #' @importFrom cli symbol -#' @importFrom data.table rbindlist fread #' @importFrom furrr future_map furrr_options #' @importFrom future plan #' @importFrom magrittr %>% @@ -15,7 +17,6 @@ #' @importFrom purrr pluck #' @importFrom rlang inform #' @importFrom stats rnorm -#' @importFrom tibble is_tibble -#' @importFrom tidyr pivot_longer +#' @importFrom tibble is_tibble tibble ## usethis namespace: end NULL From 921066064241d61e1cca6535f4c6f280cd881b26 Mon Sep 17 00:00:00 2001 From: mrcaseb Date: Thu, 15 Dec 2022 18:26:13 +0100 Subject: [PATCH 25/44] double games lives somewhere else --- R/utils.R | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/R/utils.R b/R/utils.R index 94028e1..751795b 100644 --- a/R/utils.R +++ b/R/utils.R @@ -7,25 +7,6 @@ report <- function(msg, .cli_fct(c(format(Sys.time(), '%H:%M:%S'), " | ", msg), ..., .envir = .envir) } -# this makes it so there's two rows per game (one/team) -double_games <- function(g) { - g1 <- g %>% - select(sim, game_type, week, away_team, home_team, result) %>% - rename(team = away_team, opp = home_team) %>% - mutate(result = -1 * result) - g2 <- g %>% - select(sim, game_type, week, away_team, home_team, result) %>% - rename(team = home_team, opp = away_team) - g <- bind_rows(g1, g2) %>% - mutate(outcome = case_when( - result > 0 ~ 1, - result < 0 ~ 0, - result == 0 ~ 0.5, - TRUE ~ NA_real_ - )) - return(g) -} - is_single_digit_numeric <- function(x) is.numeric(x) && length(x) == 1L && !is.na(x) # Identify sessions with sequential future resolving From 37940728e661a550c779a25004fbda1d782c5abf Mon Sep 17 00:00:00 2001 From: mrcaseb Date: Thu, 15 Dec 2022 18:26:28 +0100 Subject: [PATCH 26/44] create nflseedR environment --- R/zzz.R | 1 + 1 file changed, 1 insertion(+) create mode 100644 R/zzz.R diff --git a/R/zzz.R b/R/zzz.R new file mode 100644 index 0000000..b0b1bf0 --- /dev/null +++ b/R/zzz.R @@ -0,0 +1 @@ +.nflseedR_env <- new.env(parent = emptyenv()) From ca1ac703b6cf04e533f600f3c48d4f6213af3cea Mon Sep 17 00:00:00 2001 From: mrcaseb Date: Thu, 15 Dec 2022 18:27:01 +0100 Subject: [PATCH 27/44] drop tidyr increment version heavily because it's major --- DESCRIPTION | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 2ef0a42..802bed9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: nflseedR Title: Functions to Efficiently Simulate and Evaluate NFL Seasons -Version: 1.1.0.9002 +Version: 1.1.0.9100 Authors@R: c( person("Lee", "Sharpe", role = c("aut", "cph")), person("Sebastian", "Carl", , "mrcaseb@gmail.com", role = c("cre", "aut")) @@ -24,8 +24,7 @@ Imports: progressr, purrr, rlang, - tibble, - tidyr + tibble Suggests: gt, knitr, @@ -36,4 +35,4 @@ Config/testthat/edition: 3 Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.2 +RoxygenNote: 7.2.3 From d24eebb0d3b8e8a86b72dced63c0bb41f48571af Mon Sep 17 00:00:00 2001 From: mrcaseb Date: Thu, 15 Dec 2022 18:27:19 +0100 Subject: [PATCH 28/44] redocument --- NAMESPACE | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 32fdc03..52b7e5e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -7,11 +7,27 @@ export(compute_draft_order) export(load_schedules) export(load_sharpe_games) export(simulate_nfl) -import(dplyr) +import(data.table) import(gsubfn) importFrom(cli,symbol) -importFrom(data.table,fread) -importFrom(data.table,rbindlist) +importFrom(dplyr,arrange) +importFrom(dplyr,bind_rows) +importFrom(dplyr,case_when) +importFrom(dplyr,everything) +importFrom(dplyr,filter) +importFrom(dplyr,group_by) +importFrom(dplyr,inner_join) +importFrom(dplyr,left_join) +importFrom(dplyr,mutate) +importFrom(dplyr,n) +importFrom(dplyr,pull) +importFrom(dplyr,rename) +importFrom(dplyr,right_join) +importFrom(dplyr,row_number) +importFrom(dplyr,select) +importFrom(dplyr,slice) +importFrom(dplyr,summarize) +importFrom(dplyr,ungroup) importFrom(furrr,furrr_options) importFrom(furrr,future_map) importFrom(future,plan) @@ -21,4 +37,4 @@ importFrom(purrr,pluck) importFrom(rlang,inform) importFrom(stats,rnorm) importFrom(tibble,is_tibble) -importFrom(tidyr,pivot_longer) +importFrom(tibble,tibble) From d0d2208dea1c49d7730552720923dcb3f7980b67 Mon Sep 17 00:00:00 2001 From: mrcaseb Date: Thu, 15 Dec 2022 18:28:41 +0100 Subject: [PATCH 29/44] rewrite double games and h2h in data.table and save h2h in pkg environment h2h has a new structure and is now only holding games that are listed in double games --- R/environment_data.R | 63 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 R/environment_data.R diff --git a/R/environment_data.R b/R/environment_data.R new file mode 100644 index 0000000..3a3645e --- /dev/null +++ b/R/environment_data.R @@ -0,0 +1,63 @@ +# this makes it so there's two rows per game (one/team) +#' @import data.table +double_games <- function(g){ + setDT(g) + away <- g[,list(sim, game_type, week, team = away_team, opp = home_team, result = -result)] + home <- g[,list(sim, game_type, week, team = home_team, opp = away_team, result)] + out <- rbind(away, home) + out[, outcome := fcase( + is.na(result), NA_real_, + result > 0, 1, + result < 0, 0, + default = 0.5 + )] + tibble::as_tibble(out) +} + +#' @import data.table +compute_h2h <- function(gd, update = TRUE){ + if (!".h2h" %in% ls(envir = .nflseedR_env, all.names = !update)){ + report("Calculating head to head") + setDT(gd, key = c("sim", "team", "opp")) + out <- gd[game_type == "REG", list( + h2h_games = .N, + h2h_wins = sum(outcome), + h2h_played = fifelse(.N > 0, 1, 0) + ), by = c("sim", "team", "opp")] + assign(".h2h", out, envir = .nflseedR_env) + } + get(".h2h", envir = .nflseedR_env) +} + + +# Unused data.frame variants ---------------------------------------------- + +# double_games.data.frame <- function(g) { +# g1 <- g %>% +# select(sim, game_type, week, away_team, home_team, result) %>% +# rename(team = away_team, opp = home_team) %>% +# mutate(result = -1 * result) +# g2 <- g %>% +# select(sim, game_type, week, away_team, home_team, result) %>% +# rename(team = home_team, opp = away_team) +# g <- bind_rows(g1, g2) %>% +# mutate(outcome = case_when( +# result > 0 ~ 1, +# result < 0 ~ 0, +# result == 0 ~ 0.5, +# TRUE ~ NA_real_ +# )) +# g +# } + +# compute_h2h.data.frame <- function(gd) { +# gd %>% +# filter(game_type == "REG") %>% +# group_by(sim, team, opp) %>% +# summarize( +# h2h_games = n(), +# h2h_wins = sum(outcome, na.rm = TRUE), +# h2h_played = ifelse(h2h_games > 0, 1, 0) +# ) %>% +# ungroup() +# } From 7c18262751e5a5bac2649c7835ac78452bcc14db Mon Sep 17 00:00:00 2001 From: mrcaseb Date: Thu, 15 Dec 2022 18:29:25 +0100 Subject: [PATCH 30/44] gotta left_join and drop NAs because of the new structure of h2h --- R/conference_tiebreaker.R | 12 ++++++------ R/division_tiebreaker.R | 10 +++++----- R/draft_tiebreaker.R | 10 +++++----- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/R/conference_tiebreaker.R b/R/conference_tiebreaker.R index 69b2ac7..c93dce0 100644 --- a/R/conference_tiebreaker.R +++ b/R/conference_tiebreaker.R @@ -47,16 +47,16 @@ break_conference_ties <- function(u, r, h2h, tb_depth, .debug = FALSE) { ) %>% rename(opp = team_opp) %>% filter(team != opp) %>% - inner_join(h2h, by = c("sim", "team", "opp")) %>% + left_join(h2h, by = c("sim", "team", "opp")) %>% group_by( sim, conf, division, div_winner, div_best_left, team, conf_pct, sov, sos, tied_teams ) %>% summarize(value = case_when( - max(tied_teams) < min_tied ~ NA_real_, # not enough tied teams - sum(h2h_games) < (max(tied_teams) - 1) ~ 0, # didn't play vs. each other tied team - sum(h2h_wins) == 0 ~ -1, # got swept by other tied teams - sum(h2h_wins) == (max(tied_teams) - 1) ~ 1, # swept other tied teams + max(tied_teams, na.rm = TRUE) < min_tied ~ NA_real_, # not enough tied teams + sum(h2h_games, na.rm = TRUE) < (max(tied_teams) - 1) ~ 0, # didn't play vs. each other tied team + sum(h2h_wins, na.rm = TRUE) == 0 ~ -1, # got swept by other tied teams + sum(h2h_wins, na.rm = TRUE) == (max(tied_teams) - 1) ~ 1, # swept other tied teams TRUE ~ 0, # split vs. other tied teams )) %>% ungroup() %>% @@ -81,7 +81,7 @@ break_conference_ties <- function(u, r, h2h, tb_depth, .debug = FALSE) { # common games if (isTRUE(.debug)) report("CONF ({min_tied}): Common Record") list[u, tied] <- tied %>% - inner_join(h2h, by = c("sim", "team")) %>% + left_join(h2h, by = c("sim", "team")) %>% filter(h2h_played == 1) %>% group_by(sim, conf, opp) %>% mutate(common = (tied_teams == n())) %>% diff --git a/R/division_tiebreaker.R b/R/division_tiebreaker.R index b873061..d26788c 100644 --- a/R/division_tiebreaker.R +++ b/R/division_tiebreaker.R @@ -31,12 +31,12 @@ break_division_ties <- function(u, r, h2h, tb_depth, .debug = FALSE) { ) %>% rename(opp = team_opp) %>% filter(team != opp) %>% - inner_join(h2h, by = c("sim", "team", "opp")) %>% + left_join(h2h, by = c("sim", "team", "opp")) %>% group_by(sim, division, team, div_pct, conf_pct, sov, sos, tied_teams) %>% summarize(value = case_when( - max(tied_teams) < min_tied ~ NA_real_, - sum(h2h_games) == 0 ~ 0.5, - TRUE ~ sum(h2h_wins) / sum(h2h_games) + max(tied_teams, na.rm = TRUE) < min_tied ~ NA_real_, + sum(h2h_games, na.rm = TRUE) == 0 ~ 0.5, + TRUE ~ sum(h2h_wins, na.rm = TRUE) / sum(h2h_games, na.rm = TRUE) )) %>% ungroup() %>% process_div_ties(u, r) @@ -60,7 +60,7 @@ break_division_ties <- function(u, r, h2h, tb_depth, .debug = FALSE) { # common games if (isTRUE(.debug)) report("DIV ({min_tied}): Common Record") list[u, tied] <- tied %>% - inner_join(h2h, by = c("sim", "team")) %>% + left_join(h2h, by = c("sim", "team")) %>% filter(h2h_played == 1) %>% group_by(sim, division, opp) %>% mutate(common = (tied_teams == n())) %>% diff --git a/R/draft_tiebreaker.R b/R/draft_tiebreaker.R index 001b83c..218d714 100644 --- a/R/draft_tiebreaker.R +++ b/R/draft_tiebreaker.R @@ -64,12 +64,12 @@ break_draft_ties <- function(u, r, h2h, tb_depth, .debug = FALSE) { inner_join(tied %>% select(sim, team), by = c("sim"), suffix = c("", "_opp")) %>% rename(opp = team_opp) %>% filter(team != opp) %>% - inner_join(h2h, by = c("sim", "team", "opp")) %>% + left_join(h2h, by = c("sim", "team", "opp")) %>% group_by(sim, team, sov, tied_teams) %>% summarize(value = case_when( - sum(h2h_games) < (max(tied_teams) - 1) ~ 0, # didn't play vs. each other tied team - sum(h2h_wins) == 0 ~ -1, # got swept by other tied teams - sum(h2h_wins) == (max(tied_teams) - 1) ~ 1, # swept other tied teams + sum(h2h_games, na.rm = TRUE) < (max(tied_teams) - 1) ~ 0, # didn't play vs. each other tied team + sum(h2h_wins, na.rm = TRUE) == 0 ~ -1, # got swept by other tied teams + sum(h2h_wins, na.rm = TRUE) == (max(tied_teams) - 1) ~ 1, # swept other tied teams TRUE ~ 0, # won some, lost others )) %>% ungroup() %>% @@ -82,7 +82,7 @@ break_draft_ties <- function(u, r, h2h, tb_depth, .debug = FALSE) { # common games if (isTRUE(.debug)) report("DRAFT: Common Record") list[u, tied] <- tied %>% - inner_join(h2h, by = c("sim", "team")) %>% + left_join(h2h, by = c("sim", "team")) %>% filter(h2h_played == 1) %>% group_by(sim, opp) %>% mutate(common = (tied_teams == n())) %>% From 8d37e233832c5f5b6e807f437cde23b6158f1028 Mon Sep 17 00:00:00 2001 From: mrcaseb Date: Thu, 15 Dec 2022 18:30:25 +0100 Subject: [PATCH 31/44] make process games available in pkg namespace and create a data.table variant --- R/simulate_nfl.R | 317 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 214 insertions(+), 103 deletions(-) diff --git a/R/simulate_nfl.R b/R/simulate_nfl.R index a94b5f9..f0297a2 100644 --- a/R/simulate_nfl.R +++ b/R/simulate_nfl.R @@ -129,109 +129,7 @@ simulate_nfl <- function(nfl_season = NULL, # Define simple estimate and simulate functions - if (is.null(process_games)) { - process_games <- function(teams, games, week_num, ...) { - # teams = teams data - # games = games data - # - # this example estimates at PK/0 and 50% - # estimate = is the median spread expected (positive = home team favored) - # wp = is the probability of the team winning the game - # - # only simulate games through week week_num - # only simulate games with is.na(result) - # result = how many points home team won by - - # round out (away from zero) - round_out <- function(x) { - x[!is.na(x) & x < 0] <- floor(x[!is.na(x) & x < 0]) - x[!is.na(x) & x > 0] <- ceiling(x[!is.na(x) & x > 0]) - return(x) - } - - # get elo if not in teams data already - if (!("elo" %in% colnames(teams))) { - args <- list(...) - if ("elo" %in% names(args)) { - # pull from custom arguments - teams <- teams %>% - dplyr::inner_join(args$elo %>% select(team, elo), by = c("team" = "team")) - } else { - # start everyone at a random default elo - ratings <- tibble( - team = unique(teams$team), - elo = rnorm(length(unique(team)), 1500, 150) - ) - teams <- teams %>% - dplyr::inner_join(ratings, by = "team") - } - } - - # pull ratings from teams data - ratings <- teams %>% select(sim, team, elo) - - # mark estimate, wp, and result for games - games <- games %>% - dplyr::inner_join(ratings, by = c("sim" = "sim", "away_team" = "team")) %>% - dplyr::rename(away_elo = elo) %>% - dplyr::inner_join(ratings, by = c("sim" = "sim", "home_team" = "team")) %>% - dplyr::rename(home_elo = elo) %>% - dplyr::mutate( - elo_diff = home_elo - away_elo, - elo_diff = elo_diff + ifelse(location == "Home", 20, 0), - elo_diff = elo_diff + (home_rest - away_rest) / 7 * 25, - elo_diff = elo_diff * ifelse(game_type == "REG", 1, 1.2), - wp = 1 / (10^(-elo_diff / 400) + 1), - estimate = elo_diff / 25, - result = case_when( - is.na(result) & week == week_num ~ - as.integer(round_out(rnorm(n(), estimate, 13))), - TRUE ~ as.integer(result) - ), - outcome = case_when( - is.na(result) ~ NA_real_, - result > 0 ~ 1, - result < 0 ~ 0, - TRUE ~ 0.5 - ), - elo_input = case_when( - is.na(result) ~ NA_real_, - result > 0 ~ elo_diff * 0.001 + 2.2, - result < 0 ~ -elo_diff * 0.001 + 2.2, - TRUE ~ 1.0, - ), - elo_mult = log(pmax(abs(result), 1) + 1.0) * 2.2 / elo_input, - elo_shift = 20 * elo_mult * (outcome - wp) - ) %>% - dplyr::select( - -away_elo, -home_elo, -elo_diff, -wp, -estimate, - -outcome, -elo_input, -elo_mult - ) - - # apply elo shifts - teams <- teams %>% - dplyr::left_join(games %>% - filter(week == week_num) %>% - select(sim, away_team, elo_shift), - by = c("sim" = "sim", "team" = "away_team") - ) %>% - dplyr::mutate(elo = elo - ifelse(!is.na(elo_shift), elo_shift, 0)) %>% - dplyr::select(-elo_shift) %>% - dplyr::left_join(games %>% - filter(week == week_num) %>% - select(sim, home_team, elo_shift), - by = c("sim" = "sim", "team" = "home_team") - ) %>% - dplyr::mutate(elo = elo + ifelse(!is.na(elo_shift), elo_shift, 0)) %>% - dplyr::select(-elo_shift) - - # remove elo shift - games <- games %>% - dplyr::select(-elo_shift) - - return(list(teams = teams, games = games)) - } - } + if (is.null(process_games)) process_games <- default_process_games # Catch invalid input @@ -478,3 +376,216 @@ simulate_nfl <- function(nfl_season = NULL, out } + + +default_process_games <- function(teams, games, week_num, ...) { + # teams = teams data + # games = games data + # + # this example estimates at PK/0 and 50% + # estimate = is the median spread expected (positive = home team favored) + # wp = is the probability of the team winning the game + # + # only simulate games through week week_num + # only simulate games with is.na(result) + # result = how many points home team won by + + # round out (away from zero) + round_out <- function(x) { + x[!is.na(x) & x < 0] <- floor(x[!is.na(x) & x < 0]) + x[!is.na(x) & x > 0] <- ceiling(x[!is.na(x) & x > 0]) + return(x) + } + + # get elo if not in teams data already + if (!("elo" %in% colnames(teams))) { + args <- list(...) + if ("elo" %in% names(args)) { + # pull from custom arguments + teams <- teams %>% + dplyr::inner_join(args$elo %>% select(team, elo), by = c("team" = "team")) + } else { + # start everyone at a random default elo + ratings <- tibble( + team = unique(teams$team), + elo = rnorm(length(unique(team)), 1500, 150) + ) + teams <- teams %>% + dplyr::inner_join(ratings, by = "team") + } + } + + # pull ratings from teams data + ratings <- teams %>% select(sim, team, elo) + + # mark estimate, wp, and result for games + games <- games %>% + dplyr::inner_join(ratings, by = c("sim" = "sim", "away_team" = "team")) %>% + dplyr::rename(away_elo = elo) %>% + dplyr::inner_join(ratings, by = c("sim" = "sim", "home_team" = "team")) %>% + dplyr::rename(home_elo = elo) %>% + dplyr::mutate( + elo_diff = home_elo - away_elo, + elo_diff = elo_diff + ifelse(location == "Home", 20, 0), + elo_diff = elo_diff + (home_rest - away_rest) / 7 * 25, + elo_diff = elo_diff * ifelse(game_type == "REG", 1, 1.2), + wp = 1 / (10^(-elo_diff / 400) + 1), + estimate = elo_diff / 25, + result = case_when( + is.na(result) & week == week_num ~ + as.integer(round_out(rnorm(n(), estimate, 13))), + TRUE ~ as.integer(result) + ), + outcome = case_when( + is.na(result) ~ NA_real_, + result > 0 ~ 1, + result < 0 ~ 0, + TRUE ~ 0.5 + ), + elo_input = case_when( + is.na(result) ~ NA_real_, + result > 0 ~ elo_diff * 0.001 + 2.2, + result < 0 ~ -elo_diff * 0.001 + 2.2, + TRUE ~ 1.0, + ), + elo_mult = log(pmax(abs(result), 1) + 1.0) * 2.2 / elo_input, + elo_shift = 20 * elo_mult * (outcome - wp) + ) %>% + dplyr::select( + -away_elo, -home_elo, -elo_diff, -wp, -estimate, + -outcome, -elo_input, -elo_mult + ) + + # apply elo shifts + teams <- teams %>% + dplyr::left_join(games %>% + filter(week == week_num) %>% + select(sim, away_team, elo_shift), + by = c("sim" = "sim", "team" = "away_team") + ) %>% + dplyr::mutate(elo = elo - ifelse(!is.na(elo_shift), elo_shift, 0)) %>% + dplyr::select(-elo_shift) %>% + dplyr::left_join(games %>% + filter(week == week_num) %>% + select(sim, home_team, elo_shift), + by = c("sim" = "sim", "team" = "home_team") + ) %>% + dplyr::mutate(elo = elo + ifelse(!is.na(elo_shift), elo_shift, 0)) %>% + dplyr::select(-elo_shift) + + # remove elo shift + games <- games %>% + dplyr::select(-elo_shift) + + return(list(teams = teams, games = games)) +} + +# rewritten in data.table +default_process_games_dt <- function(teams, games, week_num, ...) { + cli::cli_progress_step( + "Compute week {.val #{week_num}}" + ) + # teams = teams data + # games = games data + # + # this example estimates at PK/0 and 50% + # estimate = is the median spread expected (positive = home team favored) + # wp = is the probability of the team winning the game + # + # only simulate games through week week_num + # only simulate games with is.na(result) + # result = how many points home team won by + + # round out (away from zero) + round_out <- function(x) { + x[!is.na(x) & x < 0] <- floor(x[!is.na(x) & x < 0]) + x[!is.na(x) & x > 0] <- ceiling(x[!is.na(x) & x > 0]) + return(x) + } + + setDT(games, key = c("sim", "week")) + setDT(teams, key = c("sim", "team")) + + # get elo if not in teams data already + if (!("elo" %in% colnames(teams))) { + args <- list(...) + if ("elo" %in% names(args)) { + # pull from custom arguments + ratings <- setDT(args$elo, key = "team") + teams <- merge(teams, ratings[,.(team, elo)]) + } else { + # start everyone at a random default elo + ratings <- data.table( + team = unique(teams$team), + elo = rnorm(length(unique(teams$team)), 1500, 150), + key = "team" + ) + teams <- merge(teams, ratings) + } + } + + # merge elo values to home and away teams + games <- merge(x = games, y = teams[,.(sim, team, away_elo = elo)], + by.x = c("sim", "away_team"), + by.y = c("sim", "team"), + sort = FALSE) + games <- merge(x = games, y = teams[,.(sim, team, home_elo = elo)], + by.x = c("sim", "home_team"), + by.y = c("sim", "team"), + sort = FALSE) + + # create elo diff + games[, elo_diff := home_elo - away_elo + (home_rest - away_rest) / 7 * 25] + # adjust elo diff for location = HOME + games["Home", elo_diff := elo_diff + 20, on = "location"] + # adjust elo_diff for game type = REG + games["REG", elo_diff := elo_diff * 1.2, on = "game_type"] + # create wp and estimate + games[, `:=`(wp = 1 / (10^(-elo_diff / 400) + 1), + estimate = elo_diff / 25)] + # adjust result in current week + games[week_num == week & is.na(result), + result := as.integer(round_out(rnorm(.N, estimate, 13)))] + # compute elo shift + games[, `:=`( + outcome = fcase( + is.na(result), NA_real_, + result > 0, 1, + result < 0, 0, + default = 0.5 + ), + elo_input = fcase( + is.na(result), NA_real_, + result > 0, elo_diff * 0.001 + 2.2, + result < 0, -elo_diff * 0.001 + 2.2, + default = 1.0 + ) + )] + games[, elo_mult := log(pmax(abs(result), 1) + 1.0) * 2.2 / elo_input] + games[, elo_shift := 20 * elo_mult * (outcome - wp)] + + # drop irrelevant columns + drop_cols <- c("away_elo", "home_elo", "elo_diff", "wp", "estimate", + "outcome", "elo_input", "elo_mult") + games[, (drop_cols) := NULL] + + # apply away team elo shift + away_teams <- games[.(week_num), + .(sim, team = away_team, elo_shift = -elo_shift), + on = "week"] + teams <- merge(teams, away_teams, by = c("sim", "team"), all = TRUE) + teams[!is.na(elo_shift), elo := elo + elo_shift] + teams[, elo_shift := NULL] + + # apply home team elo shift + home_teams <- games[.(week_num), + .(sim, team = home_team, elo_shift), + on = "week"] + teams <- merge(teams, home_teams, by = c("sim", "team"), all = TRUE) + teams[!is.na(elo_shift), elo := elo + elo_shift] + teams[, elo_shift := NULL] + + # remove elo shift + games[, elo_shift := NULL] + + list("teams" = teams, "games" = games) From 47df85f8fd072ac7f4d4fa68f3244a3dcfcc14e0 Mon Sep 17 00:00:00 2001 From: mrcaseb Date: Thu, 15 Dec 2022 18:31:21 +0100 Subject: [PATCH 32/44] rewrite simulation aggregation in data.table --- R/simulate_nfl.R | 116 +++++++++++++++++++++++++++-------------------- 1 file changed, 67 insertions(+), 49 deletions(-) diff --git a/R/simulate_nfl.R b/R/simulate_nfl.R index f0297a2..50f05ee 100644 --- a/R/simulate_nfl.R +++ b/R/simulate_nfl.R @@ -297,51 +297,68 @@ simulate_nfl <- function(nfl_season = NULL, # and conf columns if(sb_exit < 20) sb_exit <- NA_real_ - overall <- all_teams %>% - group_by(conf, division, team) %>% - summarize( - wins = mean(wins), - playoff = mean(!is.na(seed)), - div1 = mean(div_rank == 1), - seed1 = mean(!is.na(seed) & seed == 1), - won_conf = mean(exit >= sb_exit - 1), - won_sb = mean(exit == sb_exit), - draft1 = mean(draft_order == 1), - draft5 = mean(draft_order <= 5) - ) %>% - ungroup() + overall <- all_teams[, list( + wins = mean(wins), + playoff = mean(!is.na(seed)), + div1 = mean(div_rank == 1), + seed1 = mean(!is.na(seed) & seed == 1), + won_conf = mean(exit >= sb_exit - 1), + won_sb = mean(exit == sb_exit), + draft1 = mean(draft_order == 1), + draft5 = mean(draft_order <= 5) + ), keyby = c("conf", "division", "team")] + + # take all teams and repeat them for each half win and repeat this for each + # simulation. The length of the half win sequence equals 2 * games + 1 + team_vec <- rep( + sort(unique(all_teams$team)), + each = (max(all_teams$games) * 2 + 1) * length(unique(all_teams$sim)) + ) - team_wins <- - tibble( - team = rep(sort(unique(all_teams$team)), each = max(all_teams$games) * 2 + 1), - wins = rep(seq(0, max(all_teams$games), 0.5), length(unique(all_teams$team))) - ) %>% - inner_join( - all_teams %>% select(team, true_wins), - by = c("team") - ) %>% - group_by(team, wins) %>% - summarize( - over_prob = mean(true_wins > wins), - under_prob = mean(true_wins < wins) - ) %>% - ungroup() - - game_summary <- - all_games %>% - group_by(game_type, week, away_team, home_team) %>% - summarise( - away_wins = sum(result < 0), - home_wins = sum(result > 0), - ties = sum(result == 0), - result = mean(result), - # != number of simulations in the postseason - games_played = away_wins + home_wins + ties, - away_percentage = (away_wins + 0.5 * ties) / games_played, - home_percentage = (home_wins + 0.5 * ties) / games_played - ) %>% - ungroup() %>% - arrange(week) + # Create the win sequence vector and repeat every win for every sim + # Take this and repeat it for every team + wins_vec <- rep( + seq(0, max(all_teams$games), 0.5), + each = length(unique(all_teams$sim)) + ) %>% + rep(length(unique(all_teams$team))) + + # create sequence of sims and repeat it for every half win and for every team + sims_vec <- rep( + sort(unique(all_teams$sim)), + (max(all_teams$games) * 2 + 1) * length(unique(all_teams$team)) + ) + + team_wins <- data.table( + sim = sims_vec, + team = team_vec, + wins = wins_vec, + key = c("team", "wins") + ) %>% + merge( + all_teams[,list(sim, team, true_wins)], + by = c("sim", "team"), + sort = FALSE + ) + + team_wins <- team_wins[,list( + over_prob = mean(true_wins > wins), + under_prob = mean(true_wins < wins) + ), keyby = c("team", "wins")] + + + ## Game Summary + game_summary <- all_games[,list( + away_wins = sum(result < 0), + home_wins = sum(result > 0), + ties = sum(result == 0), + result = mean(result) + ), keyby = c("game_type", "week", "away_team", "home_team")] + game_summary[, games_played := away_wins + home_wins + ties] + game_summary[,`:=`( + away_percentage = (away_wins + 0.5 * ties) / games_played, + home_percentage = (home_wins + 0.5 * ties) / games_played + )] report("DONE!") @@ -349,11 +366,11 @@ simulate_nfl <- function(nfl_season = NULL, out <- structure( list( - "teams" = all_teams, - "games" = all_games, - "overall" = overall, - "team_wins" = team_wins, - "game_summary" = game_summary, + "teams" = tibble::as_tibble(all_teams), + "games" = tibble::as_tibble(all_games), + "overall" = tibble::as_tibble(overall), + "team_wins" = tibble::as_tibble(team_wins), + "game_summary" = tibble::as_tibble(game_summary), "sim_params" = list( "nfl_season" = nfl_season, "playoff_seeds" = playoff_seeds, @@ -589,3 +606,4 @@ default_process_games_dt <- function(teams, games, week_num, ...) { games[, elo_shift := NULL] list("teams" = teams, "games" = games) +} From e4de13235b921894307b035d1812a43f59cd4e5e Mon Sep 17 00:00:00 2001 From: mrcaseb Date: Thu, 15 Dec 2022 18:31:51 +0100 Subject: [PATCH 33/44] make simulate week a separate function --- R/sim_helper.R | 121 +++---------------------------------------------- 1 file changed, 7 insertions(+), 114 deletions(-) diff --git a/R/sim_helper.R b/R/sim_helper.R index 8760a5a..0c87212 100644 --- a/R/sim_helper.R +++ b/R/sim_helper.R @@ -42,124 +42,17 @@ simulate_round <- function(sim_round, stop("`playoff_seeds` must be between 1 and ",max_seeds) } - # function to simulate a week - simulate_week <- function(teams, games, week_num, test_week, ...) { - - # recall old data for comparison - old_teams <- teams - old_games <- games %>% - rename(.old_result = result) - - # estimate and simulate games - return_value <- process_games(teams, games, week_num, ...) - - # testing? - if (!is.null(test_week) && week_num == test_week) { - return(return_value) - } - - # did we get the right data back? - problems <- c() - if (typeof(return_value) != "list") { - problems[length(problems) + 1] <- "the returned value was not a list" - } else { - if (!("teams" %in% names(return_value))) { - problems[length(problems) + 1] <- "`teams` was not in the returned list" - } else { - teams <- return_value$teams - if (!is_tibble(teams)) { - problems[length(problems) + 1] <- "`teams` was not a tibble" - } else { - if (nrow(teams) != nrow(old_teams)) { - problems[length(problems) + 1] <- paste( - "`teams` changed from", nrow(old_teams), "to", - nrow(teams), "rows", - collapse = " " - ) - } - for (cname in colnames(old_teams)) { - if (!(cname %in% colnames(teams))) { - problems[length(problems) + 1] <- paste( - "`teams` column `", cname, "` was removed" - ) - } - } - } - } - if (!("games" %in% names(return_value))) { - problems[length(problems) + 1] <- "`games` was not in the returned list" - } else { - games <- return_value$games - if (!is_tibble(games)) { - problems[length(problems) + 1] <- "`games` was not a tibble" - } else { - if (nrow(games) != nrow(old_games)) { - problems[length(problems) + 1] <- paste( - "`games` changed from", nrow(old_games), "to", - nrow(games), "rows", - collapse = " " - ) - } - for (cname in colnames(old_games)) { - if (!(cname %in% colnames(games)) && cname != ".old_result") { - problems[length(problems) + 1] <- paste( - "`teams` column `", cname, "` was removed" - ) - } - } - } - } - } - - # report data structure problems - problems <- paste(problems, collapse = ", ") - if (problems != "") { - stop( - "During Week ", week_num, ", your `process_games()` function had the ", - "following issues: ", problems, ". " - ) - } - - # identify improper results values - problems <- old_games %>% - inner_join(games, by = intersect(colnames(old_games), colnames(games))) %>% - mutate(problem = case_when( - week == week_num & is.na(result) ~ - "a result from the current week is missing", - week != week_num & !is.na(.old_result) & is.na(result) ~ - "a known result outside the current week was blanked out", - week != week_num & is.na(.old_result) & !is.na(result) ~ - "a result outside the current week was entered", - week != week_num & .old_result != result ~ - "a known result outside the current week was updated", - !is.na(.old_result) & is.na(result) ~ - "a known result was blanked out", - !is.na(result) & result == 0 & game_type != "REG" ~ - "a playoff game resulted in a tie (had result == 0)", - TRUE ~ NA_character_ - )) %>% - filter(!is.na(problem)) %>% - pull(problem) %>% - unique() %>% - paste(collapse = ", ") - - # report result value problems - if (problems != "") { - stop( - "During Week ", week_num, ", your `process_games()` function had the", - "following issues: ", problems, ". Make sure you only change results ", - "when week == week_num & is.na(result)" - ) - } - - return(list(teams = teams, games = games)) - } - # simulate remaining regular season games for (week_num in weeks_to_sim) { return_value <- - simulate_week(teams, games, week_num, test_week, ...) + simulate_week(teams = teams, + games = games, + week_num = week_num, + process_games = process_games, + test_week = test_week, + .debug = .debug, + ...) if (!is.null(test_week) && week_num == test_week) { return(return_value) } From 0aaeafc161a0e1a85d41ae294cef23e07918582d Mon Sep 17 00:00:00 2001 From: mrcaseb Date: Thu, 15 Dec 2022 18:33:20 +0100 Subject: [PATCH 34/44] new simulate week looks for problems only if .debug = TRUE allocates the problem vector for more speed uses cli to output problems needs some adjustments for data.table output --- R/sim_helper.R | 120 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 120 insertions(+) diff --git a/R/sim_helper.R b/R/sim_helper.R index 0c87212..86de852 100644 --- a/R/sim_helper.R +++ b/R/sim_helper.R @@ -243,3 +243,123 @@ simulate_round <- function(sim_round, list("teams" = teams, "games" = games) } + +# function to simulate a week +simulate_week <- function(teams, + games, + week_num, + process_games, + test_week = NULL, + .debug = FALSE, + ...) { + # estimate and simulate games + return_value <- process_games(teams, games, week_num, ...) + + # testing? + if (!is.null(test_week) && week_num == test_week) { + return(return_value) + } + + if(isTRUE(.debug) && FALSE){ + # recall old data for comparison + old_teams <- teams + old_games <- games %>% + rename(.old_result = result) + # did we get the right data back? + # currently, we will catch a maximum of 9 problems. Allocate the vector + problems <- vector("character", length = 9L) + i <- 0 + if (typeof(return_value) != "list") { + problems[i + 1] <- "the returned value was not a list" + } else { + if (!("teams" %in% names(return_value))) { + problems[i + 2] <- "`teams` was not in the returned list" + } else { + teams <- return_value$teams + if (!is_tibble(teams)) { + problems[i + 3] <- "`teams` was not a tibble" + } else { + if (nrow(teams) != nrow(old_teams)) { + problems[i + 4] <- paste( + "`teams` changed from", nrow(old_teams), "to", + nrow(teams), "rows", + collapse = " " + ) + } + for (cname in colnames(old_teams)) { + if (!(cname %in% colnames(teams))) { + problems[i + 5] <- paste( + "`teams` column `", cname, "` was removed" + ) + } + } + } + } + if (!("games" %in% names(return_value))) { + problems[i + 6] <- "`games` was not in the returned list" + } else { + games <- return_value$games + if (!is_tibble(games)) { + problems[i + 7] <- "`games` was not a tibble" + } else { + if (nrow(games) != nrow(old_games)) { + problems[i + 8] <- paste( + "`games` changed from", nrow(old_games), "to", + nrow(games), "rows", + collapse = " " + ) + } + for (cname in colnames(old_games)) { + if (!(cname %in% colnames(games)) && cname != ".old_result") { + problems[i + 9] <- paste( + "`teams` column `", cname, "` was removed" + ) + } + } + } + } + } + + # report data structure problems + problems <- problems[problems != ""] + if (length(problems)) { + cli::cli_abort( + "During Week {week_num}, your {.code process_games} function had the \\ + following issues: {problems}." + ) + } + + # identify improper results values + problems <- old_games %>% + inner_join(games, by = intersect(colnames(old_games), colnames(games))) %>% + mutate(problem = case_when( + week == week_num & is.na(result) ~ + "a result from the current week is missing", + week != week_num & !is.na(.old_result) & is.na(result) ~ + "a known result outside the current week was blanked out", + week != week_num & is.na(.old_result) & !is.na(result) ~ + "a result outside the current week was entered", + week != week_num & .old_result != result ~ + "a known result outside the current week was updated", + !is.na(.old_result) & is.na(result) ~ + "a known result was blanked out", + !is.na(result) & result == 0 & game_type != "REG" ~ + "a playoff game resulted in a tie (had result == 0)", + TRUE ~ NA_character_ + )) %>% + filter(!is.na(problem)) %>% + pull(problem) %>% + unique() + + # report result value problems + if (problems != "") { + cli::cli_abort( + "During Week {week_num}, your {.code process_games} function had the \\ + following issues: {problems}. Make sure you only change results \\ + when {.code week == week_num} & {.code is.na(result)}" + ) + } + } + + list("teams" = return_value$teams, "games" = return_value$games) +} From 8f0c1892c46bb68712f2504722c5ee2f696f57a8 Mon Sep 17 00:00:00 2001 From: mrcaseb Date: Thu, 15 Dec 2022 18:33:46 +0100 Subject: [PATCH 35/44] avoid multiple double_games calls --- R/compute_draft_order.R | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/R/compute_draft_order.R b/R/compute_draft_order.R index 7f167e2..0aad495 100644 --- a/R/compute_draft_order.R +++ b/R/compute_draft_order.R @@ -114,10 +114,12 @@ compute_draft_order <- function(teams, # playoff weeks for (week_num in first_playoff_week:week_max) { - # record losers - teams <- games %>% + week_games_doubled <- games %>% filter(week == week_num) %>% - double_games() %>% + double_games() + + # record losers + teams <- week_games_doubled %>% filter(outcome == 0) %>% select(sim, team, outcome) %>% right_join(teams, by = c("sim", "team")) %>% @@ -127,9 +129,7 @@ compute_draft_order <- function(teams, # if super bowl, record winner if (any(playoff_teams$conf == "SB")) { # super bowl winner exit is +1 to SB week - teams <- games %>% - filter(week == week_num) %>% - double_games() %>% + teams <- week_games_doubled %>% filter(outcome == 1) %>% select(sim, team, outcome) %>% right_join(teams, by = c("sim", "team")) %>% @@ -138,9 +138,7 @@ compute_draft_order <- function(teams, } # filter to winners or byes - playoff_teams <- games %>% - filter(week == week_num) %>% - double_games() %>% + playoff_teams <- week_games_doubled %>% right_join(playoff_teams, by = c("sim", "team")) %>% filter(is.na(result) | result > 0) %>% select(sim, conf, seed, team) %>% From abaef2ec08e47aa2280dd2a2093264c535b94a0f Mon Sep 17 00:00:00 2001 From: mrcaseb Date: Thu, 15 Dec 2022 18:35:22 +0100 Subject: [PATCH 36/44] deprecate teams argument --- R/compute_division_ranks.R | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/R/compute_division_ranks.R b/R/compute_division_ranks.R index 1dda54d..64b88e0 100644 --- a/R/compute_division_ranks.R +++ b/R/compute_division_ranks.R @@ -12,19 +12,8 @@ #' \code{\link{divisions}} for valid team abbreviations).} #' \item{result}{Equals home score - away score.} #' } -#' @param teams This parameter is optional. If it is \code{NULL} the function -#' will compute it internally, otherwise it has to be a data frame of all teams -#' contained in the \code{games} data frame repeated for each simulation ID -#' (\code{sim}). The following variables are required: -#' \describe{ -#' \item{sim}{A simulation ID. Normally 1 - n simulated seasons.} -#' \item{team}{Team abbreviation of the team (please see -#' \code{\link{divisions}} for valid team abbreviations).} -#' \item{conf}{Conference abbreviation of the team (please see -#' \code{\link{divisions}} for valid team abbreviations).} -#' \item{division}{Division of the team (please see -#' \code{\link{divisions}} for valid division names).} -#' } +#' @param teams `r lifecycle::badge("deprecated")` This argument is no longer +#' supported. Instead, the functions computes it internally. #' @param tiebreaker_depth A single value equal to 1, 2, or 3. The default is 3. The #' value controls the depth of tiebreakers that shall be applied. The deepest #' currently implemented tiebreaker is strength of schedule. The following @@ -64,7 +53,7 @@ #' options(old) #' } compute_division_ranks <- function(games, - teams = NULL, + teams = lifecycle::deprecated(), tiebreaker_depth = 3, .debug = FALSE, h2h = NULL) { From 275989b20d3bbd239f5f0d06a2f4a59e354d42c0 Mon Sep 17 00:00:00 2001 From: mrcaseb Date: Thu, 15 Dec 2022 18:36:01 +0100 Subject: [PATCH 37/44] compute teams from games with data.table --- R/compute_division_ranks.R | 136 ++++++++++++++++--------------------- 1 file changed, 57 insertions(+), 79 deletions(-) diff --git a/R/compute_division_ranks.R b/R/compute_division_ranks.R index 64b88e0..8a13f06 100644 --- a/R/compute_division_ranks.R +++ b/R/compute_division_ranks.R @@ -81,104 +81,85 @@ compute_division_ranks <- function(games, ) } - if (is.null(teams)) { # compute teams df from games df - pivot_games <- games %>% - select(sim, home_team, away_team) %>% - pivot_longer(cols = c("home_team", "away_team"), values_to = "team") %>% - select(sim, team) - - teams <- bind_rows( - data.frame(team = unique(games$away_team)), - data.frame(team = unique(games$home_team)) - ) %>% - distinct() %>% - left_join(nflseedR::divisions %>% select(-"sdiv"), by = "team") %>% - left_join(pivot_games, by = "team") %>% - select(sim, everything()) %>% - distinct() %>% - arrange(division, team, sim) + if (lifecycle::is_present(teams)) { + lifecycle::deprecate_warn( + when = "2.0.0", + what = "compute_division_ranks(teams)",# = 'is computed internally')" + details = "The function computes the corresponding data internally." + ) } # double games games_doubled <- double_games(games) - # record of each team report("Calculating team data") - teams <- teams %>% - inner_join(games_doubled, by = c("sim", "team")) %>% - filter(game_type == "REG") %>% - group_by(sim, conf, division, team) %>% - summarize( - games = n(), - wins = sum(outcome), - true_wins = sum(outcome == 1), - losses = sum(outcome == 0), - ties = sum(outcome == 0.5) - ) %>% - ungroup() + # record of each team + setDT(games_doubled) + team_records <- + merge( + games_doubled["REG", on = "game_type"], + data.table(nflseedR::divisions)[,sdiv:=NULL], + by = "team", + sort = FALSE + )[, + list( + games = .N, + wins = sum(outcome), + true_wins = sum(outcome == 1), + losses = sum(outcome == 0), + ties = sum(outcome == 0.5), + win_pct = sum(outcome) / .N + ), by = c("sim", "conf", "division", "team")] # add in tiebreaker info - teams <- teams %>% - inner_join(games_doubled, by = c("sim", "team")) %>% - filter(game_type == "REG") %>% - inner_join(teams, - by = c("sim" = "sim", "opp" = "team"), - suffix = c("", "_opp") - ) %>% - mutate( - win_pct = wins / games, - div_game = ifelse(division == division_opp, 1, 0), - conf_game = ifelse(conf == conf_opp, 1, 0) - ) %>% - group_by(sim, conf, division, team, games, wins, true_wins, losses, ties, win_pct) %>% - summarize( - div_pct = ifelse(sum(div_game) == 0, 0.5, - sum(div_game * outcome) / sum(div_game) - ), - conf_pct = ifelse(sum(conf_game) == 0, 0.5, - sum(conf_game * outcome) / sum(conf_game) - ), - sov = ifelse(sum(outcome == 1) == 0, 0, - sum(wins_opp * (outcome == 1)) / - sum(games_opp * (outcome == 1)) - ), - sos = sum(wins_opp) / sum(games_opp) + teams <- team_records %>% + merge( + games_doubled["REG", on = "game_type"], + by = c("sim", "team"), + sort = FALSE ) %>% - ungroup() + merge( + team_records, + by.x = c("sim", "opp"), + by.y = c("sim", "team"), + suffixes = c("", "_opp"), + sort = FALSE + ) + teams[, div_game := fifelse(division == division_opp, 1, 0)] + teams[, conf_game := fifelse(conf == conf_opp, 1, 0)] + teams <- teams[, list( + div_pct = fifelse(sum(div_game) == 0, 0.5, + sum(div_game * outcome) / sum(div_game) + ), + conf_pct = fifelse(sum(conf_game) == 0, 0.5, + sum(conf_game * outcome) / sum(conf_game) + ), + sov = fifelse(sum(outcome == 1) == 0, 0, + sum(wins_opp * (outcome == 1)) / + sum(games_opp * (outcome == 1)) + ), + sos = sum(wins_opp) / sum(games_opp) + ), by = c("sim", "conf", "division", "team", "games", "wins", + "true_wins", "losses", "ties", "win_pct")][order(sim, conf, division, team)] # below only if there are tiebreakers if (is.null(h2h) & tiebreaker_depth > TIEBREAKERS_NONE) { - report("Calculating head to head") - h2h <- teams %>% - select(sim, team) %>% - inner_join(teams %>% select(sim, team), - by = "sim", suffix = c("", "_opp") - ) %>% - rename(opp = team_opp) %>% - arrange(sim, team, opp) %>% - left_join(games_doubled %>% filter(game_type == "REG"), - by = c("sim", "team", "opp") - ) %>% - group_by(sim, team, opp) %>% - summarize( - h2h_games = sum(!is.na(outcome)), - h2h_wins = sum(outcome, na.rm = TRUE), - h2h_played = ifelse(h2h_games > 0, 1, 0) - ) %>% - ungroup() + h2h <- compute_h2h(games_doubled) } #### FIND DIVISION RANKS #### # initialize division rank - teams <- teams %>% - mutate(div_rank = NA_real_) + teams$div_rank <- NA_real_ # determine division ranks dr <- 0 while (any(is.na(teams$div_rank))) { # increment division rank dr <- dr + 1 + if(dr > 4){ + cli::cli_abort("Aborting because division rank computation entered infinite loop!") + } report("Calculating division rank #{dr}") # update teams with this rank @@ -197,10 +178,7 @@ compute_division_ranks <- function(games, select(-new_rank) } - max_reg_week <- max(games$week[games$game_type == "REG"], na.rm = TRUE) - - teams <- teams %>% - mutate(max_reg_week = max_reg_week) + teams$max_reg_week <- max(games$week[games$game_type == "REG"], na.rm = TRUE) list( "standings" = tibble::as_tibble(teams), From fcd98cdbe01e455ed3a955e72d5d2088053a2852 Mon Sep 17 00:00:00 2001 From: mrcaseb Date: Thu, 15 Dec 2022 18:40:03 +0100 Subject: [PATCH 38/44] incorporate h2h from input or load it from environment --- R/compute_conference_seeds.R | 24 ++++++++++++++++-------- R/compute_draft_order.R | 22 ++++++++++++++++------ 2 files changed, 32 insertions(+), 14 deletions(-) diff --git a/R/compute_conference_seeds.R b/R/compute_conference_seeds.R index b7e7f56..20aa98c 100644 --- a/R/compute_conference_seeds.R +++ b/R/compute_conference_seeds.R @@ -45,7 +45,15 @@ compute_conference_seeds <- function(teams, ) } - if (!is_tibble(teams)) teams <- teams$standings + h2h_flag <- FALSE + + if (!is_tibble(teams)){ + if(is.null(h2h) & any(names(teams) == "h2h")){ + h2h <- teams$h2h + h2h_flag <- TRUE + } + teams <- teams$standings + } if (!any((names(teams) %in% "div_rank")) | !is.data.frame(teams)) { cli::cli_abort( @@ -55,15 +63,15 @@ compute_conference_seeds <- function(teams, } if(is.null(h2h) & tiebreaker_depth > TIEBREAKERS_NONE){ - cli::cli_abort( - "You asked for tiebreakers but the argument {.arg h2h} is {.val NULL}. \\ - Did you forget to pass the {.val h2h} data frame? It is computed with \\ - the function {.fn compute_division_ranks}." - ) + # cli::cli_abort( + # "You asked for tiebreakers but the argument {.arg h2h} is {.val NULL}. \\ + # Did you forget to pass the {.val h2h} data frame? It is computed with \\ + # the function {.fn compute_division_ranks}." + # ) } + if(isFALSE(h2h_flag)) h2h <- compute_h2h(NULL, update = FALSE) - teams <- teams %>% - mutate(conf_rank = NA_real_) + teams$conf_rank <- NA_real_ # seed loop for (seed_num in seq_len(playoff_seeds)) diff --git a/R/compute_draft_order.R b/R/compute_draft_order.R index 0aad495..71d31bd 100644 --- a/R/compute_draft_order.R +++ b/R/compute_draft_order.R @@ -41,7 +41,15 @@ compute_draft_order <- function(teams, ) } - if (!is_tibble(teams)) teams <- teams$standings + h2h_flag <- FALSE + + if (!is_tibble(teams)){ + if(is.null(h2h) & any(names(teams) == "h2h")){ + h2h <- teams$h2h + h2h_flag <- TRUE + } + teams <- teams$standings + } required_vars <- c( "sim", @@ -72,13 +80,15 @@ compute_draft_order <- function(teams, } if (is.null(h2h) & tiebreaker_depth > TIEBREAKERS_NONE) { - cli::cli_abort( - "You asked for tiebreakers but the argument {.arg h2h} is {.val NULL}. \\ - Did you forget to pass the {.val h2h} data frame? It is computed with \\ - the function {.fn compute_division_ranks}." - ) + # cli::cli_abort( + # "You asked for tiebreakers but the argument {.arg h2h} is {.val NULL}. \\ + # Did you forget to pass the {.val h2h} data frame? It is computed with \\ + # the function {.fn compute_division_ranks}." + # ) } + if(isFALSE(h2h_flag)) h2h <- compute_h2h(NULL, update = FALSE) + games <- strip_nflverse_attributes(games) if (any(is.na(teams$exit))){ From 73ae8b4c06dc88e47743ef5a124f11e57010b43c Mon Sep 17 00:00:00 2001 From: mrcaseb Date: Fri, 16 Dec 2022 16:57:56 +0100 Subject: [PATCH 39/44] run tests from local games file and test h2h --- tests/testthat/games.rds | Bin 0 -> 7556 bytes tests/testthat/helpers.R | 19 ++++++---- tests/testthat/test-ranks_seeds_draftorder.R | 35 ++++++++----------- tests/testthat/test-utils.R | 28 +++++++++++++-- 4 files changed, 54 insertions(+), 28 deletions(-) create mode 100644 tests/testthat/games.rds diff --git a/tests/testthat/games.rds b/tests/testthat/games.rds new file mode 100644 index 0000000000000000000000000000000000000000..817b3d7d86b2b5d61d53181a30470162a0963ed8 GIT binary patch literal 7556 zcmZ8_XE+=Tw=Ie2z4s8otM`d6iC$ue&gh~9qxTw#i82VmAPhkgEu;4qZIod2(MIoW z^j^+;zI*Pu&vW;`z1G@$J^Ro8v7W{g;{2x`@8`_wSG?(%sG4K5*)i7Urd7 z5&=&{%R2!!&sZ4fH8*143d%GfO`*?IR^0Eag0e1xvPv@h*X%@NJ8VKbkm7ZyJ;m$m z)B4}mrQD^FU)+rRaujd&Fd=76<&&ir?HNTSB?bwk6h=9tO%@4lGB?M-`{K5`_Iq*T zddyj`wvp8RO|VcLhpC(Mp99aiH@jbW{E)jnDfqVImzZ-o^RrpqTB){8 zF7)SFYdM2O9k$! zeR~4TfT6J4IAdTnGM zhYGt)dxf3Y{rDu6=3{LtY#~&}KSD8#$KNT5!q`-oEs4Q8_ugf%4R9CVVJh*FMHjBI zEB;YGNw=MLv}V^KvaUZyO=~TvJNgyc2xW#z%0+erhpvZf1ixuM5^GbM{-JhA6k4rq48BS zP^u{DM?Et>+0G}o0?3DV@-`BpExhjwUF582ELoLRo_#CDbsh(pQWLvd;YYSg>5pHH zUUKSBQ99Y#WGBZsc(O3C2ogm@%Iv%GAVRAQ1(mj4f#{yK^)nwYT1LkN5YLGVKER_Z z{rjKp#puu0m1P!dx9Zw=dzxE|Rpq;VEKf?d0O`m^M1`Gv=E(Du^tKzxElGb!Z&$ieuYj19?# zc#&A1o1wjrV*x0ttQni}FFn>(z8zi-*aW=Iq^zKX_qGV&C=}}?i z`sF40cEPmjI|cB!`m0Xu&L3j+wwRhYqN@JlHzPK$cGsS(h3V8SaEkIsu-jfa>8osj z2{A;+CRUvcPr26P7nn*oLi^5Am`rO*oYN)rez^?Es*`WIF>e$W;LQ8mN7T=))$rJi z40&hfaFc)~L>*X&p+Dv?G*_behdDUP=UDZXNoa(J{#;IniBz?hos*WUW#HA3EiI?W zq&P+-*!FETaZ9j1{mciTj1P?or3U2saB%r~`o4|ZK2g?tL%%MMfB9M{WE5Lq>{UVJqwA0bDy1#`7xx?@M` zu#4|eQvQ*&azEWlE2K|nRfprPuahDt&G>V6%@sWDx}JsiAV*ztULd8}9xA(gqC+xe zjtBm86+m>N?qN=wMybAWG>x1c_N8)sHLvuCa6Ye7dh{Q(rw8AG&F5jqzJ98KHcvq} zO3!Bm)w+xlj`@yN)WXeMBTUl(5d)hHuXX7Zhq37oY%wYxkE11()EJSg{~!;lzQHwT zrH(kREfHhRy4jVTsV3tYIAwqjOnU5zR$8j`;*#0#oIIutNlJ@VBFFpc$Y3$^r8>UJ zdd`E+aMuBG)XVeEjGU99aYoh#b^SPeGpgXGSlWbFuDHfy)X^YEE(HzoKG|f<>+?{B zhgYtgnmQxZfT;}J4{kBBF7FFlo?sox#(x7oAe+e6@jwdcfdcvvgF9rO1J@gQpuXov ztsK7izmE|T)K+^>gn7QGEd7~h(CCYlDRa{vky)d=boXKG zU1p7#IK-BO!2rZ&f8Msdt6Sdt^T$*l?wQMXZCJlC=ck4p&4F}-e6@h8(W0#{v~D_I zcEB$^e=w#oG^2j*V7L|Wx9mj{J}%gu)3f>3P_idJXXRLIpv*iq2$!W{NwM%4e7So7 zwHt54=SM!vewCqGqdz6cWzU#JhoP)?+{z6h{KPZpX~W6&i*gai6-~%a;Fev~-$vA& zV+&M~5Pp8q4t?U*KvF?dIR{4T<- zS0^P-BIqZdTO#9*S3m5Ap{QOOy#5R?i33;`aHBWIrrnV7XC>B8!10&ZdfI9XdP+Hm zuV`uPsY&{i?-#W!@#|x5L>4^bHHM3xV|>CJvQ)Ihn-PLhOKal1GgmrgQFX5`$Nn{0otbD5flAlO;f%P*4QyYzCWYYiN?uvP#eEoR3gx5S@PS;}>Q z*RkFLGY!GFe*Xqv_G z-4Dw}3qFH2OJf>CbW;QF#DxyuI%OGgyaolM-^uI+F6A@a(5(qLP_tzJUB|<5EKrC9 zUI`8idUht-2Z6ZV%fi1F*~T_HomPMrszG}Da4+wMEaZY-oD7-psjZ%vZ7a>!>hCRl z+F(f?Fns$!h}OwVl~zdX9sfzgCjuJrn%2jwZi8)&i#TccnV_1Z4BHGHc%?x3v(;IE zFhq(NhmX$1N7cIEgX?K^WnUcHP`>n42u;IAYCM@|+Eig*7kw(2JTvEevEUNvJ6BH< z^0!oH7LMCXWQ1)JKT1qfhgoBd8r6cWoHsk)ofG!Ax|dxm8@=Sg_Jdn_GxY zgf+7}ROnE++`>vWwdG`I)YoX|P+)#Rq~B){y`(Zb{Q-SnYP)^+U|EOP$K=mRcyvHat1rZe|rQ^`|?526vJgX0-yRp8wJfmPE*UCga-C z`AIS_>OcyT@t*CX8t-*}aBPIUDe|<%lcL00d;RIi^So>B7`VB%=Ie>W7UaMT-@-sF3%S_q)g6;5bQ>3y zfHZR&f(327k|iO`Zl9O1Zj>-D0DkENaZv%UyS>TGGA>vvPXt} z=pa;N{k1s#6BtPaGk}KSi?Fks#HZ7zJ8yJnyu64_)L0@x3Lg(7Jg|0?^V1pXL~IM2 z@+8791Km$15(afyIJuOuQ&mSWv!4OWpVGlJ*Ph zZt|&J++<#7R0UGz*;d%~@ITXysTeI{A|@3<51JDLNm0@@A8Y9^4rV`9f2&n_+WUH2 zQ4CQwQ!*9*NV73c=)MIpfEib6*v2IaUj30-7s`b^n|I7CHtzH$;Gc8M6g>LM6znhr z?OQq!HMZgHSLmgo@_bTQsqZH${yhBDLEKTI~DsqWyJk^T^ciswQm7UbKE^V@5s0gQ81m(?XR zQBeYW`-7smIRu|rZlsR}@MWk-DyuOs zxTVJ)FX2%L&e*`>Scdk;G{x zLk4upoYsz~YUZr2Xj`e+h!d@3ev;%mE=yob%~Jj8;axn3>z9FMeOMCmDeUwZQdlgFf?kdLOwJ zR`r#)qf?)8S;T0j3tm}G`kzg6y;`M%*Q>An=ih?lbygX<(E~3;nXa~IhWWn)9zG*w zuYk$e;7Xij+NEr&?5{H86i5 zZ{z&B>!Go)X9?JJ&X^V~$}aGd$=jcA$caiy7FhEem0RaPtX&_P$${KnahrpTqw&id zsy?y25Z}D9O^l7tiQ9f=IOt1nrq;f!Xs#3#(@>V`e?;l(7=|eH^`v?lJY?ZRqE)}9 z;UgN~D@7Oob=o`qV=Y-Fvi~RO`O9PvN2ZDz>MnUTCM9_RcLBQjZ!v5y>L`meqzhvH zerU1$l~Ks=VhOMNdw!#&%{aq=U#Mz&P1)Clx^0sMCU6gt_P zuhg{}y?RB(9WJO=|3iT}K9v+M%pc`81;ArGx>BC)5_{m-Cc0iV{39Dnb zWgYm;>!35YG2Xn!!OTzzQ6%EpsAiy8=15|7%X7?^%FwF}nXVOP zhgGM|YMj3IT87&@e{A;M;YI7)2iI-gIeCwE7iYd<@tu|uE?a&7mM;hBHV|A{1L$mC zY+MP^79K!uNOc`=y-TYT^RdeDzsy?^zWw{dj?AP#`^akXwU@TO0KLKSB`{a~K{VEZ zrG5g&|3QmfX*O+8_=OIL7A;M}cL!QXt;;-_az{+%ofJ0cQJN@AL?e4~%khb^B5P|A z+t3*AM^IWmn8x7-=rp1N{KtUCxm-K5Oo!6ra$paoDJVs9Y1a-7sG&=ckS%j0{YS4- zm!zk?U2GNC@dFpxRouP9Y&eEPnaUo-v3Ej-9FFn(0Qu`k6v-OSFVkK#Epj>o_nP%@ z65|;Gw!t2IH%E`y({1vskP!hXGmS&ukX`9h^Q_o2LcQU<^+#1hn>z^t6*dVEJ%+aX z#SK^Wg(2cD8f9WaMub)EF4KK(VvExxJ=_I@#lL(Ta8`026q18p{J@j+&E-b5QLmukz3ia}86%P9Eet;=<+g?YoqTNBYp{lJ@}*c?l)5l=$0c!KHp7dKUpx zxoC8Fx!P`>NxL;-Zodgr+Ma~ox4}S8Z^JP!nAFO#SEd&bufz#=bxcIjCbH|S2{UC@cL(Lly!!U;&hsuv3RFJFIkX-JpYiY^5{)Pa zqnvN0!I9etc<#uh0wzKQg@1W4v0vJRDVn+`hivZp8|P7wfrvxno^6?PMQAPQS#z044q=C=&)c(x%4KK4#H~i+U zsQk$M<2zZS2b+xh&6x0;@1D&jxBixr->3zFgzY!J4@x^1Ih|cX%RHf0lO7>bD3+BE z+I}~BKKm;0iQD!Gg$q1&6QL38V+@OPuqi5(Do|Y#E_$`aOs4j!)RLKZxL80Qzv)h| zJ&bXU^SE?vBOTemvaXE^d%h5Ad;xr~NE5Z6ehA11Fa@dFw{s{mG9+NNkl^@xwq+Zb z!#f@MATvoudawz%(}&OZWg)!LNk*-!d>{Wk5l#t}^FtN2Dji?@H2DKtP74;Y*onMv zqLG1lULf|140*qa2xm0XE!;leK)q^|CO1JF@Gx&NE_ZP{ct;o#I_xRSmt_fr(b`?1G!+D2zffNi(dSH%s-hAe4=k!uKq!{D5up;(H9& zvNV*%cUif%tu3_-N_g+o(}j-C)v)hfwOtOW(Q4veT6>%zX2d-s_tyGG5X#vm{XBb* zzRa>~CmIP12KT_p zZ_y-}r|lQlC05;RteKo>DzE*c`~0}Yc(Zj355`0m{LfLPtA_7&*TAUazMy5$zt>xdJEID6eUom}`OC^VGn1 zwOZcyL4`R;vcsk~gjj@Rc;L^S-!lcVR#hkH9ufUI2owl5e51e~r(7VqPWjQ^r^2#Hh8JOMY?0bTlcn$g8)TWHen>VTMY+12$vL&E^99f z@lrHehMWlTHw#+=PXajV)Fk*`fG0&?eCpINa#y>fX3leJ6NtHa=}Us05iy@HO0l{(Jj81W%lCSG4`&$6eP0&a#}MVJ zV{a|BK;{$p8cQN7Pj{u=&|JL8a|&83NQCcCXBV)y6feRV;1t9Mk(&bfyNeeo_W`+f zrmL(|^3PBSDvdGNv9qOLpvlYn=a2Ov?Nv@cKVgdqKC3>+1=pdq>)!t}tc@ZFexkHJ z!@oUAAz7ZMe9qVw@jQz>cC$I^q~Ge83`|^yI@5tJ9=K%x`P(N zfkOjhgL(S)$D_DMgtzjPqqv|{r`TSjak&3PcwR_Uxxd-n%Ti#;8VisZebHSVpjus2 zUN9726|4~`{rrA^8~!*-;@U~B8>avHY985LA2D`|LH_$`awkAYyzwvat9-3+PZRW` z`;XZXz^c=ULVjIMNht5WK$ooj#m@=S6t6e9P`g3LqKq_= zj}>!EQxJ1z+>2bZjTob282c6+vB79xWSQl3=|13zp>e;yinJOqU@cC`ouWzr$K)2k zu%NttpbC$=W2$nr5aUlp-icb}yW+=uSk!jt5y%eAil@{QUVp&unEQVZI)i1o|?>_U-WVot*vpuqR1`;NT81@Hj-(+O<_NI`bV=WV??k4mxG$H_0 z|CElFliEXx2`KRX`gEF&D2V}SPO?WgV~<(vqY@8m>$Q#yPc;*r%Mynqx+5P7E*1`c zOh15ANm^ZXB-t1iZdQ$kGHzdrY?taUD{Q#fyPe6cmX;cgsgKsTmh58G11}PtkG>zC zIjp7eZ4cQusHwNtcI5Z?B)zYPm#`|Ce1t}=Mr=_o-V2hepP0p|$zgr#-qJ*Khj<(Cz zD@i`>nIR9q)1n(YFdib1=JDvtAJOX?LlH~($0y?5v5VE>T7J$>wxpl zdGAjMnqJV=dUX8;O4W4NV@G!o^?R^25M0cDne27m7I?oh&^gxANFpLE0+Oz;@zfP$ zoK>zpOw8T-aCBdTLiL&miwp{hSk05_UIT|wtxww_xnpSXrQiCk9El96?x~1it|iF4 zZ`n6LqQhg`%2Zyi-a8z}rn~7&Lsf$~gV%)R?OA6EZiUZL=dHz1f8y=?tJ|vUW)XO) zS8IiM!2Ro{?Xii7>-?L!_tw4bfomcTxS^f9!NmiJ~ z+k>&r`CAKrm-l3ET+s}hn_Mjht_@}ZVfjgqpW3dp#-EUAhocfBncGKllpb~!!7HRN zdWKcEUf80A?27>Z!|{PmIV^d@@@>x*zRbfJj3~%;;zXc%MS)?w)W4^BZ+6zyiD7fN z4a4}i9EvV$t*8+VbO?F}zP}B@7QzyC2e=+lZ-G$ZW^S!Kq({AXxCDPugO=G5j#qng zBqvx)!l=2xaO>cz*jbQ5iT9eI!i}PP~ok@}a literal 0 HcmV?d00001 diff --git a/tests/testthat/helpers.R b/tests/testthat/helpers.R index acf6528..9d6c618 100644 --- a/tests/testthat/helpers.R +++ b/tests/testthat/helpers.R @@ -1,9 +1,16 @@ -load_test_games <- function(){ - g <- nflseedR::load_sharpe_games() +test_dir <- getwd() - if (!nrow(g) > 0) return(tibble::tibble()) +load_test_games <- function(dir = test_dir){ + readRDS(file.path(dir, "games.rds")) +} - g %>% - dplyr::filter(season %in% 2014:2019) %>% - dplyr::select(sim = season, game_type, week, away_team, home_team, result) +load_reference <- function(type = c("div", "conf", "draft"), + dir = test_dir){ + type <- match.arg(type) + file_name <- switch (type, + "div" = "reference_div_ranks.rds", + "conf" = "reference_conf_seeds.rds", + "draft" = "reference_draft_order.rds", + ) + readRDS(file.path(dir, file_name)) } diff --git a/tests/testthat/test-ranks_seeds_draftorder.R b/tests/testthat/test-ranks_seeds_draftorder.R index 58d42b9..2e1947e 100644 --- a/tests/testthat/test-ranks_seeds_draftorder.R +++ b/tests/testthat/test-ranks_seeds_draftorder.R @@ -1,40 +1,35 @@ -source("helpers.R") - test_that("compute_division_ranks() works for multiple seasons", { g <- load_test_games() - skip_if_not(nrow(g) > 0, message = NULL) - ref <- readRDS("reference_div_ranks.rds") + div_ranks <- compute_division_ranks(g) + div_ranks <- div_ranks$standings %>% strip_nflverse_attributes() - div_ranks <- g %>% - compute_division_ranks() + exp <- load_reference("div") - expect_identical(div_ranks$standings, ref) + expect_identical(div_ranks, exp) }) test_that("compute_conference_seeds() works for multiple seasons", { g <- load_test_games() - skip_if_not(nrow(g) > 0, message = NULL) - - ref <- readRDS("reference_conf_seeds.rds") - conf_seeds <- g %>% compute_division_ranks() %>% - compute_conference_seeds(h2h = .$h2h, playoff_seeds = 6) + compute_conference_seeds(playoff_seeds = 6) + conf_seeds <- conf_seeds$standings %>% strip_nflverse_attributes() + + exp <- load_reference("conf") - expect_identical(conf_seeds$standings, ref) + expect_identical(conf_seeds, exp) }) test_that("compute_draft_order() works for multiple seasons", { g <- load_test_games() - skip_if_not(nrow(g) > 0, message = NULL) - - ref <- readRDS("reference_draft_order.rds") - draft_order <- g %>% compute_division_ranks() %>% - compute_conference_seeds(h2h = .$h2h, playoff_seeds = 6) %>% - compute_draft_order(games = g, h2h = .$h2h) + compute_conference_seeds(playoff_seeds = 6) %>% + compute_draft_order(games = g) + draft_order <- strip_nflverse_attributes(draft_order) + + exp <- load_reference("draft") - expect_identical(draft_order, ref) + expect_identical(draft_order, exp) }) diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index 157d666..e48a6fe 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -26,7 +26,7 @@ test_that("double_games works", { result = 33 ) - d <- data.frame( + d <- data.table( sim = c(2020, 2020), game_type = c("REG", "REG"), week = c(16L, 16L), @@ -34,6 +34,30 @@ test_that("double_games works", { opp = c("XYZ", "ABC"), result = c(-33, 33), outcome = c(0, 1) - ) + ) %>% tibble::as_tibble() expect_identical(double_games(g), d) }) + +test_that("h2h works", { + g <- data.frame( + sim = 2020, + game_type = "REG", + week = 16L, + away_team = "ABC", + home_team = "XYZ", + result = 33 + ) + + h2h <- compute_h2h(double_games(g)) + + exp <- data.table( + sim = c(2020, 2020), + team = c("ABC", "XYZ"), + opp = c("XYZ", "ABC"), + h2h_games = 1L, + h2h_wins = c(0, 1), + h2h_played = 1, + key = c("sim", "team", "opp") + ) + expect_identical(h2h, exp) +}) From ab0226d4cd32d7f3b9c1f2cbceb0f2706e9a0106 Mon Sep 17 00:00:00 2001 From: mrcaseb Date: Fri, 16 Dec 2022 17:05:43 +0100 Subject: [PATCH 40/44] import lifecycle and redocument --- DESCRIPTION | 1 + man/compute_division_ranks.Rd | 17 +++-------------- 2 files changed, 4 insertions(+), 14 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 802bed9..8bf1d53 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -19,6 +19,7 @@ Imports: furrr, future, gsubfn, + lifecycle, magrittr, nflreadr (>= 1.1.3), progressr, diff --git a/man/compute_division_ranks.Rd b/man/compute_division_ranks.Rd index 139e3ce..9c5ad68 100644 --- a/man/compute_division_ranks.Rd +++ b/man/compute_division_ranks.Rd @@ -6,7 +6,7 @@ \usage{ compute_division_ranks( games, - teams = NULL, + teams = lifecycle::deprecated(), tiebreaker_depth = 3, .debug = FALSE, h2h = NULL @@ -26,19 +26,8 @@ following variables are required: \item{result}{Equals home score - away score.} }} -\item{teams}{This parameter is optional. If it is \code{NULL} the function -will compute it internally, otherwise it has to be a data frame of all teams -contained in the \code{games} data frame repeated for each simulation ID -(\code{sim}). The following variables are required: -\describe{ -\item{sim}{A simulation ID. Normally 1 - n simulated seasons.} -\item{team}{Team abbreviation of the team (please see -\code{\link{divisions}} for valid team abbreviations).} -\item{conf}{Conference abbreviation of the team (please see -\code{\link{divisions}} for valid team abbreviations).} -\item{division}{Division of the team (please see -\code{\link{divisions}} for valid division names).} -}} +\item{teams}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} This argument is no longer +supported. Instead, the functions computes it internally.} \item{tiebreaker_depth}{A single value equal to 1, 2, or 3. The default is 3. The value controls the depth of tiebreakers that shall be applied. The deepest From 794ac25ae995fe109ca1fee4ab6bdc119ff0d973 Mon Sep 17 00:00:00 2001 From: mrcaseb Date: Fri, 16 Dec 2022 17:07:23 +0100 Subject: [PATCH 41/44] avoid special data.table character --- R/simulate_nfl.R | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/R/simulate_nfl.R b/R/simulate_nfl.R index 50f05ee..ad8820c 100644 --- a/R/simulate_nfl.R +++ b/R/simulate_nfl.R @@ -529,7 +529,7 @@ default_process_games_dt <- function(teams, games, week_num, ...) { if ("elo" %in% names(args)) { # pull from custom arguments ratings <- setDT(args$elo, key = "team") - teams <- merge(teams, ratings[,.(team, elo)]) + teams <- merge(teams, ratings[,list(team, elo)]) } else { # start everyone at a random default elo ratings <- data.table( @@ -542,11 +542,11 @@ default_process_games_dt <- function(teams, games, week_num, ...) { } # merge elo values to home and away teams - games <- merge(x = games, y = teams[,.(sim, team, away_elo = elo)], + games <- merge(x = games, y = teams[,list(sim, team, away_elo = elo)], by.x = c("sim", "away_team"), by.y = c("sim", "team"), sort = FALSE) - games <- merge(x = games, y = teams[,.(sim, team, home_elo = elo)], + games <- merge(x = games, y = teams[,list(sim, team, home_elo = elo)], by.x = c("sim", "home_team"), by.y = c("sim", "team"), sort = FALSE) @@ -587,16 +587,16 @@ default_process_games_dt <- function(teams, games, week_num, ...) { games[, (drop_cols) := NULL] # apply away team elo shift - away_teams <- games[.(week_num), - .(sim, team = away_team, elo_shift = -elo_shift), + away_teams <- games[list(week_num), + list(sim, team = away_team, elo_shift = -elo_shift), on = "week"] teams <- merge(teams, away_teams, by = c("sim", "team"), all = TRUE) teams[!is.na(elo_shift), elo := elo + elo_shift] teams[, elo_shift := NULL] # apply home team elo shift - home_teams <- games[.(week_num), - .(sim, team = home_team, elo_shift), + home_teams <- games[list(week_num), + list(sim, team = home_team, elo_shift), on = "week"] teams <- merge(teams, home_teams, by = c("sim", "team"), all = TRUE) teams[!is.na(elo_shift), elo := elo + elo_shift] From c4e2ccffbe3ac91ed3234664e1c80e3fd212bae0 Mon Sep 17 00:00:00 2001 From: mrcaseb Date: Fri, 16 Dec 2022 17:10:25 +0100 Subject: [PATCH 42/44] avoid namespace problems in summary method --- R/summary_nflseedR.R | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/R/summary_nflseedR.R b/R/summary_nflseedR.R index ad5dad1..5ec005f 100644 --- a/R/summary_nflseedR.R +++ b/R/summary_nflseedR.R @@ -43,9 +43,9 @@ summary.nflseedR_simulation <- function(object, ...){ ) data <- object$overall %>% - mutate( + dplyr::mutate( division = gsub("AFC |NFC ", "", division), - division = case_when( + division = dplyr::case_when( division == "East" ~ "E A S T", division == "North" ~ "N O R T H", division == "South" ~ "S O U T H", @@ -62,23 +62,23 @@ summary.nflseedR_simulation <- function(object, ...){ hide_me <- names(column_is_empty[column_is_empty == FALSE]) afc <- data %>% - filter(conf == "AFC") %>% - select(-conf) %>% - arrange(division, desc(wins), desc(playoff)) + dplyr::filter(conf == "AFC") %>% + dplyr::select(-conf) %>% + dplyr::arrange(division, dplyr::desc(wins), dplyr::desc(playoff)) names(afc) <- paste0("afc_", names(afc)) nfc <- data %>% - filter(conf == "NFC") %>% - select(-conf) %>% - arrange(division, desc(wins), desc(playoff)) + dplyr::filter(conf == "NFC") %>% + dplyr::select(-conf) %>% + dplyr::arrange(division, dplyr::desc(wins), dplyr::desc(playoff)) names(nfc) <- paste0("nfc_", names(nfc)) - tbl <- bind_cols(afc, nfc) + tbl <- dplyr::bind_cols(afc, nfc) tbl %>% - group_by(afc_division) %>% + dplyr::group_by(afc_division) %>% gt::gt() %>% # see below table_theme() %>% @@ -145,13 +145,13 @@ summary.nflseedR_simulation <- function(object, ...){ locations = gt::cells_body(gt::ends_with("team")), fn = function(x){ url <- data.frame(team_abbr = x) %>% - left_join( + dplyr::left_join( nflreadr::load_teams() %>% - filter(!team_abbr %in% c("LAR", "OAK", "SD", "STL")) %>% - select(team_abbr, team_logo_espn), + dplyr::filter(!team_abbr %in% c("LAR", "OAK", "SD", "STL")) %>% + dplyr::select(team_abbr, team_logo_espn), by = "team_abbr" ) %>% - pull(team_logo_espn) + dplyr::pull(team_logo_espn) gt::web_image(url = url, height = 30) }) %>% gt::tab_source_note("nflseedR") %>% @@ -284,5 +284,3 @@ table_colors_negative <- c("white", "#FFF2DFFF", "#FFDFB2FF", "#FFCC7FFF", "#FFB74CFF", "#FFA626FF", "#FF9800FF", "#FA8C00FF", "#F47B00FF", "#EE6C00FF", "#E55100FF" ) - - From a1379506dfa4dd466ba6bbb80638ca615d1ba895 Mon Sep 17 00:00:00 2001 From: mrcaseb Date: Fri, 16 Dec 2022 17:18:58 +0100 Subject: [PATCH 43/44] more NSE notes wtf --- R/silence_tidy_eval_notes.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/R/silence_tidy_eval_notes.R b/R/silence_tidy_eval_notes.R index 349e9e9..b16eb0c 100644 --- a/R/silence_tidy_eval_notes.R +++ b/R/silence_tidy_eval_notes.R @@ -73,4 +73,7 @@ conf_rank <- team_logo_espn <- nfc_team <- losses <- + sdiv <- + count <- + location <- NULL From f47d6729499d5a6ac07b77c8e358239497c18f3c Mon Sep 17 00:00:00 2001 From: mrcaseb Date: Tue, 20 Dec 2022 14:09:34 +0100 Subject: [PATCH 44/44] code format --- R/compute_division_ranks.R | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/R/compute_division_ranks.R b/R/compute_division_ranks.R index 8a13f06..3175151 100644 --- a/R/compute_division_ranks.R +++ b/R/compute_division_ranks.R @@ -128,15 +128,17 @@ compute_division_ranks <- function(games, teams[, div_game := fifelse(division == division_opp, 1, 0)] teams[, conf_game := fifelse(conf == conf_opp, 1, 0)] teams <- teams[, list( - div_pct = fifelse(sum(div_game) == 0, 0.5, - sum(div_game * outcome) / sum(div_game) + div_pct = fifelse( + sum(div_game) == 0, 0.5, + sum(div_game * outcome) / sum(div_game) ), - conf_pct = fifelse(sum(conf_game) == 0, 0.5, - sum(conf_game * outcome) / sum(conf_game) + conf_pct = fifelse( + sum(conf_game) == 0, 0.5, + sum(conf_game * outcome) / sum(conf_game) ), - sov = fifelse(sum(outcome == 1) == 0, 0, - sum(wins_opp * (outcome == 1)) / - sum(games_opp * (outcome == 1)) + sov = fifelse( + sum(outcome == 1) == 0, 0, + sum(wins_opp * (outcome == 1)) / sum(games_opp * (outcome == 1)) ), sos = sum(wins_opp) / sum(games_opp) ), by = c("sim", "conf", "division", "team", "games", "wins",