diff --git a/DESCRIPTION b/DESCRIPTION index 013d3cad..50d7e404 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: nflfastR Title: Functions to Efficiently Access NFL Play by Play Data -Version: 4.0.0.9015 +Version: 4.0.0.9016 Authors@R: c(person(given = "Sebastian", family = "Carl", diff --git a/NEWS.md b/NEWS.md index 4d68987d..1a7b9329 100644 --- a/NEWS.md +++ b/NEWS.md @@ -27,6 +27,7 @@ * `fixed_drive` now increments properly on onside kick recoveries (#215) * `fixed_drive` no longer counts a muffed kickoff as a one-play drive on its own (#217) * `fixed_drive` now properly increments after a safety (#219) +* Improved parser for `penalty_type` and updated the description of the variable to make more clear it's the first penalty that happened on a play. (#223) * Heavy performance boost for the function `clean_pbp()` # nflfastR 4.0.0 diff --git a/R/helper_add_nflscrapr_mutations.R b/R/helper_add_nflscrapr_mutations.R index 2b07f4fe..e2641649 100644 --- a/R/helper_add_nflscrapr_mutations.R +++ b/R/helper_add_nflscrapr_mutations.R @@ -80,10 +80,12 @@ add_nflscrapr_mutations <- function(pbp) { penalty_type = dplyr::if_else( .data$penalty == 1, .data$play_description %>% - stringr::str_extract("PENALTY on (.){2,35},.+, [0-9]{1,2} yard(s),") %>% - stringr::str_extract(", (([:alpha:])+([:space:])?)+,") %>% - stringr::str_remove_all(",") %>% - stringr::str_trim(), NA_character_ + stringr::str_extract("(?<=PENALTY on .{1,50}, ).{1,50}(?=, [0-9]{1,2} yard)") %>% + # Face Mask penalties include the yardage as string (either 5 Yards or 15 Yards) + # We remove the 15 Yards part and just keep the additional info if it's a + # 5 yard Face Mask penalty + stringr::str_remove("\\([0-9]{2}+ Yards\\)") %>% + stringr::str_squish(), NA_character_ ), # Make plays marked with down == 0 as NA: down = dplyr::if_else( diff --git a/R/top-level_scraper.R b/R/top-level_scraper.R index a4013d26..d46e2f89 100644 --- a/R/top-level_scraper.R +++ b/R/top-level_scraper.R @@ -319,7 +319,7 @@ #' \item{penalty_yards}{Yards gained (or lost) by the posteam from the penalty.} #' \item{replay_or_challenge}{Binary indicator for whether or not a replay or challenge.} #' \item{replay_or_challenge_result}{String indicating the result of the replay or challenge.} -#' \item{penalty_type}{String indicating the penalty type.} +#' \item{penalty_type}{String indicating the penalty type of the first penalty in the given play. Will be `NA` if `desc` is missing the type.} #' \item{defensive_two_point_attempt}{Binary indicator whether or not the defense was able to have an attempt on a two point conversion, this results following a turnover.} #' \item{defensive_two_point_conv}{Binary indicator whether or not the defense successfully scored on the two point conversion.} #' \item{defensive_extra_point_attempt}{Binary indicator whether or not the defense was able to have an attempt on an extra point attempt, this results following a blocked attempt that the defense recovers the ball.} diff --git a/data-raw/variable_list.txt b/data-raw/variable_list.txt index 769eea73..395f7fc6 100644 --- a/data-raw/variable_list.txt +++ b/data-raw/variable_list.txt @@ -276,7 +276,7 @@ #' \item{penalty_yards}{Yards gained (or lost) by the posteam from the penalty.} #' \item{replay_or_challenge}{Binary indicator for whether or not a replay or challenge.} #' \item{replay_or_challenge_result}{String indicating the result of the replay or challenge.} -#' \item{penalty_type}{String indicating the penalty type.} +#' \item{penalty_type}{String indicating the penalty type of the first penalty in the given play. Will be `NA` if `desc` is missing the type.} #' \item{defensive_two_point_attempt}{Binary indicator whether or not the defense was able to have an attempt on a two point conversion, this results following a turnover.} #' \item{defensive_two_point_conv}{Binary indicator whether or not the defense successfully scored on the two point conversion.} #' \item{defensive_extra_point_attempt}{Binary indicator whether or not the defense was able to have an attempt on an extra point attempt, this results following a blocked attempt that the defense recovers the ball.} diff --git a/data/field_descriptions.rda b/data/field_descriptions.rda index 157dc264..9129a577 100644 Binary files a/data/field_descriptions.rda and b/data/field_descriptions.rda differ diff --git a/man/fast_scraper.Rd b/man/fast_scraper.Rd index 40e2dcf0..7d4602f6 100644 --- a/man/fast_scraper.Rd +++ b/man/fast_scraper.Rd @@ -322,7 +322,7 @@ Please see the description of \code{lateral_rusher_player_name} for further info \item{penalty_yards}{Yards gained (or lost) by the posteam from the penalty.} \item{replay_or_challenge}{Binary indicator for whether or not a replay or challenge.} \item{replay_or_challenge_result}{String indicating the result of the replay or challenge.} -\item{penalty_type}{String indicating the penalty type.} +\item{penalty_type}{String indicating the penalty type of the first penalty in the given play. Will be \code{NA} if \code{desc} is missing the type.} \item{defensive_two_point_attempt}{Binary indicator whether or not the defense was able to have an attempt on a two point conversion, this results following a turnover.} \item{defensive_two_point_conv}{Binary indicator whether or not the defense successfully scored on the two point conversion.} \item{defensive_extra_point_attempt}{Binary indicator whether or not the defense was able to have an attempt on an extra point attempt, this results following a blocked attempt that the defense recovers the ball.}