diff --git a/DESCRIPTION b/DESCRIPTION index ff167b5f..84f28609 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: nflfastR Title: Functions to Efficiently Access NFL Play by Play Data -Version: 4.5.1.9002 +Version: 4.5.1.9003 Authors@R: c(person(given = "Sebastian", family = "Carl", diff --git a/NEWS.md b/NEWS.md index 8e9818c5..0000dfe8 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,6 +2,7 @@ - internal function `get_pbp_nfl()` now uses `ifelse()` instead of `dplyr::if_else()` to handle some null-checking, fixes bug found in 2022_21_CIN_KC match. (v4.5.1.9001) - The function `calculate_player_stats()` now summarises target share and air yards share correctly when called with argument `weekly = FALSE` (#413) +- The function `calculate_player_stats_def()` no longer errors when small subsets of pbp data are missing stats. (#415) # nflfastR 4.5.1 diff --git a/R/aggregate_game_stats_def.R b/R/aggregate_game_stats_def.R index eef4bfbe..19731941 100644 --- a/R/aggregate_game_stats_def.R +++ b/R/aggregate_game_stats_def.R @@ -114,6 +114,10 @@ calculate_player_stats_def <- function(pbp, weekly = FALSE) { values_fill = 0L, values_fn = sum ) %>% + add_column_if_missing( + "solo_tackle", "tackle_with_assist", "tackle_for_loss", "assist_tackle", + "forced_fumble_player" + ) %>% dplyr::mutate( tackles = .data$solo_tackle + .data$tackle_with_assist ) %>% @@ -196,8 +200,9 @@ calculate_player_stats_def <- function(pbp, weekly = FALSE) { names_from = .data$desc, values_from = c(.data$n, .data$sack_yards), values_fn = sum, - values_fill = 0 + values_fill = 0L ) %>% + add_column_if_missing("n_sack", "n_qb_hit", "sack_yards_sack") %>% dplyr::select( "season", "week", @@ -243,7 +248,10 @@ calculate_player_stats_def <- function(pbp, weekly = FALSE) { names_from = "desc", values_from = c("n","return_yards"), values_fn = sum, - values_fill = 0 + values_fill = 0L + ) %>% + add_column_if_missing( + "n_interception", "n_pass_defense", "return_yards_interception" ) %>% dplyr::select( "season", @@ -305,7 +313,7 @@ calculate_player_stats_def <- function(pbp, weekly = FALSE) { names_from = .data$desc, values_from = .data$n, values_fn = sum, - values_fill = 0 + values_fill = 0L ) %>% # Renaming fails if the columns don't exist. So we row bind a dummy tibble # including the relevant columns. The row will be filtered after renaming @@ -334,10 +342,12 @@ calculate_player_stats_def <- function(pbp, weekly = FALSE) { .data$defteam == .data$fumble_recovery_2_team ) %>% dplyr::mutate( + # use data.table fifelse because base ifelse changed data type to logical + # if there are 0 rows fumble_recovery_1_player_id = - ifelse(.data$defteam != .data$fumbled_1_team, .data$fumble_recovery_1_player_id, NA), + data.table::fifelse(.data$defteam != .data$fumbled_1_team, .data$fumble_recovery_1_player_id, NA_character_), fumble_recovery_2_player_id = - ifelse(.data$defteam != .data$fumbled_2_team, .data$fumble_recovery_2_player_id, NA) + data.table::fifelse(.data$defteam != .data$fumbled_2_team, .data$fumble_recovery_2_player_id, NA_character_) ) %>% dplyr::select( "season", "week", @@ -357,9 +367,10 @@ calculate_player_stats_def <- function(pbp, weekly = FALSE) { names_from = .data$desc, values_from = .data$n, values_fn = sum, - values_fill = 0 + values_fill = 0L ) %>% dplyr::filter(!is.na(.data$player_id)) %>% + add_column_if_missing("fumble_recovery") %>% dplyr::rename("fumble_recovery_opp" = "fumble_recovery") %>% dplyr::group_by(.data$season, .data$week, .data$team, .data$player_id) %>% dplyr::summarise( @@ -445,8 +456,9 @@ calculate_player_stats_def <- function(pbp, weekly = FALSE) { names_from = .data$desc, values_from = c(.data$n, .data$yards), values_fn = sum, - values_fill = 0 + values_fill = 0L ) %>% + add_column_if_missing("n_penalty", "yards_penalty") %>% dplyr::select( "season", "week", "team", "player_id", "penalty" = "n_penalty", @@ -587,3 +599,14 @@ calculate_player_stats_def <- function(pbp, weekly = FALSE) { player_df } + +# This function checks if the variables in ... exists as column +# names in the argument .data. If not, it adds those columns and assigns +# them the value in the argument value +add_column_if_missing <- function(.data, ..., value = 0L){ + dots <- rlang::list2(...) + new_cols <- dots[!dots %in% names(.data)] + .data[,unlist(new_cols)] <- value + .data +} +