Skip to content

Commit

Permalink
build stat ID df
Browse files Browse the repository at this point in the history
  • Loading branch information
mrcaseb committed Nov 23, 2023
1 parent 257dee6 commit 8613ffa
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 42 deletions.
60 changes: 60 additions & 0 deletions R/build_playstats.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
build_playstats <- function(seasons = nflreadr::most_recent_season(),
stat_ids = 1:1000,
dir = getOption("nflfastR.raw_directory", default = NULL),
skip_local = FALSE){

if (is_sequential()) {
cli::cli_alert_info(
"It is recommended to use parallel processing when using this function. \\
Please consider running {.code future::plan(\"multisession\")}! \\
Will go on sequentially...", wrap = TRUE)
}

games <- nflreadr::load_schedules() %>%
dplyr::filter(!is.na(.data$result), .data$season %in% seasons) %>%
dplyr::pull(.data$game_id)

p <- progressr::progressor(along = games)

l <- furrr::future_map(
games,
function(id, p, dir, skip_local){
raw_data <- load_raw_game(id, dir = dir, skip_local = skip_local)
out <- raw_data$data$viewer$gameDetail$plays[, c("playId", "playStats")]
out$game_id <- as.character(id)
# out$desc <- raw_data$data$viewer$gameDetail$plays$playDescriptionWithJerseyNumbers
p(sprintf("ID=%s", as.character(id)))
out
},
p = p,
dir = dir,
skip_local = skip_local
)

out <- data.table::rbindlist(l) %>%
tidyr::unnest(cols = c("playStats")) %>%
janitor::clean_names() %>%
dplyr::filter(.data$stat_id %in% stat_ids) %>%
dplyr::mutate(
season = as.integer(substr(.data$game_id, 1, 4)),
week = as.integer(substr(.data$game_id, 6, 7))
) %>%
decode_player_ids() %>%
dplyr::select(
"game_id",
"season",
"week",
"play_id",
"stat_id",
"yards",
"team_abbr" = "team_abbreviation",
"player_name",
"gsis_player_id",
# "desc"
) %>%
dplyr::mutate_if(
.predicate = is.character,
.funs = ~dplyr::na_if(.x, "")
)
out
}
42 changes: 0 additions & 42 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -117,48 +117,6 @@ load_raw_game <- function(game_id,
# Identify sessions with sequential future resolving
is_sequential <- function() inherits(future::plan(), "sequential")

check_stat_ids <- function(seasons,
stat_ids = 1:500,
dir = getOption("nflfastR.raw_directory", default = NULL),
skip_local = FALSE){

if (is_sequential()) {
cli::cli_alert_info(
"It is recommended to use parallel processing when using this function. \\
Please consider running {.code future::plan(\"multisession\")}! \\
Will go on sequentially...", wrap = TRUE)
}

games <- nflreadr::load_schedules() %>%
dplyr::filter(!is.na(.data$result), .data$season %in% seasons) %>%
dplyr::pull(.data$game_id)

p <- progressr::progressor(along = games)

furrr::future_map_dfr(games, function(id, stats, p, dir, skip_local){
raw_data <- load_raw_game(id, dir = dir, skip_local = skip_local)
plays <- janitor::clean_names(raw_data$data$viewer$gameDetail$plays) %>%
dplyr::select("play_id", "play_stats", "desc" = .data$play_description_with_jersey_numbers)

p(sprintf("ID=%s", as.character(id)))

tidyr::unnest(plays, cols = c("play_stats")) %>%
janitor::clean_names() %>%
dplyr::filter(.data$stat_id %in% stats) %>%
dplyr::mutate(game_id = as.character(id)) %>%
dplyr::select(
"game_id",
"play_id",
"stat_id",
"yards",
"team_abbr" = "team_abbreviation",
"player_name",
"gsis_player_id",
"desc"
)
}, stats = stat_ids, p = p, dir = dir, skip_local = skip_local)
}

# take a time string of the format "MM:SS" and convert it to seconds
time_to_seconds <- function(time){
as.numeric(strptime(time, format = "%M:%S")) -
Expand Down

0 comments on commit 8613ffa

Please sign in to comment.