Skip to content

Commit

Permalink
Merge pull request #4 from mrcaseb/ben
Browse files Browse the repository at this point in the history
Add series and series_success
  • Loading branch information
guga31bb authored Apr 29, 2020
2 parents 4e2c8ee + dafe5ed commit b6c8de9
Show file tree
Hide file tree
Showing 5 changed files with 73 additions and 2 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ Type: Package
Package: nflfastR
Title: Functions to Efficiently Scrape NFL Play by Play and
Roster Data
Version: 1.0.3
Version: 1.1.0
Authors@R:
c(person(given = "Sebastian",
family = "Carl",
Expand Down
65 changes: 65 additions & 0 deletions R/helper_add_series_data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
################################################################################
# Author: Sebastian Carl
# Purpose: Function to add series variables analogue Lee Sharpe's Version
# Code Style Guide: styler::tidyverse_style()
################################################################################

## series =
## starts at 1, each new first down increments, numbers shared across both teams
## NA: kickoffs, extra point/two point conversion attempts, non-plays, no posteam
## series_success =
## 1: scored touchdown, gained enough yards for first down
## 0: punt, interception, fumble lost, turnover on downs, 4th down FG attempt
## NA: series is NA, series contains QB spike/kneel, half ended with none of above

add_series_data <- function(pbp) {
out <-
pbp %>%
dplyr::group_by(game_id) %>%
dplyr::mutate(
# make down numeric
down = as.numeric(down),
# create a first down indicator which marks first down for the offense
# AND first down after change of possesion (-> drivenumber increases)
# we don't want a first down being indicated for XP, 2P, KO
first_down = dplyr::if_else(
(first_down_rush == 1 | first_down_pass == 1 |
first_down_penalty == 1 | drive < dplyr::lead(drive)) &
(extra_point_attempt == 0 & two_point_attempt == 0 & kickoff_attempt == 0),
1, 0
),
# after setting the first down indicator we modificate it for the end of a half
first_down = dplyr::if_else(game_half != dplyr::lead(game_half), 1, first_down),
# the 'trigger' is being used for calculatung cumsum because we don't want the
# series number to increase in the play the first down occured but in the next play
trigger = dplyr::lag(first_down, 1, 0)
) %>%
# now compute series number with cumsum (for the calculation NA are being relaced with 0)
dplyr::mutate(series = cumsum(tidyr::replace_na(trigger, 0)) + 1) %>%
dplyr::mutate(
# now modificated series number for special cases
series = dplyr::if_else(
kickoff_attempt == 1 | extra_point_attempt == 1 |
two_point_attempt == 1 | is.na(down) |
is.na(posteam),
NA_real_,
series
),
series_success = dplyr::case_when(
is.na(series) | qb_kneel == 1 | qb_spike == 1 ~ NA_real_,
touchdown == 1 | first_down_rush == 1 | first_down_pass == 1 |
first_down_penalty == 1 ~ 1,
punt_attempt == 1 | interception == 1 | fumble_lost == 1 |
fourth_down_failed == 1 | field_goal_attempt == 1 ~ 0,
TRUE ~ 0
)
) %>%
dplyr::group_by(game_id, series) %>%
# set series_success value for the whole series
dplyr::mutate(series_success = last(series_success)) %>%
dplyr::ungroup() %>%
dplyr::select(-first_down, -trigger)

message("added series variables")
return(out)
}
2 changes: 1 addition & 1 deletion R/helper_variable_selector.R
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,6 @@ rs_cols <- c(
"drive_time_of_possession", "drive_inside20", "drive_first_downs",
"drive_possession_team_abbr", "scoring_team_abbr", "scoring_type",
"alert_play_type", "play_type_nfl", "time_of_day",
"yards", "end_yardline_side", "end_yardline_number"
"yards", "end_yardline_side", "end_yardline_number", "series", "series_success"
)

4 changes: 4 additions & 0 deletions R/top-level_scraper.R
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,8 @@
#' \item{yards} - Analogue yards_gained but with the kicking team being the possession team (which means that there are many yards gained through kickoffs and punts).
#' \item{end_yardline_side} - String indicating the side of the field at the end of the given play.
#' \item{end_yardline_number} - Yardline number within the above given side at the end of the given play.
#' \item{series} - Starts at 1, each new first down increments, numbers shared across both teams. Is NA for: kickoffs, extra point/two point conversion attempts, no posteam.
#' \item{series_success} - 1 when scored touchdown, gained enough yards for first down. 0 when punt, interception, fumble lost, turnover on downs, 4th down FG attempt. NA when series is NA, series contains QB spike/kneel.
#' }
#' @export
#' @examples
Expand Down Expand Up @@ -352,6 +354,7 @@ fast_scraper <- function(game_ids, source = "rs", pp = FALSE) {
add_wp() %>%
add_air_yac_wp() %>%
add_cp() %>%
add_series_data() %>%
select_variables()
}
})
Expand Down Expand Up @@ -379,6 +382,7 @@ fast_scraper <- function(game_ids, source = "rs", pp = FALSE) {
add_wp() %>%
add_air_yac_wp() %>%
add_cp() %>%
add_series_data() %>%
select_variables()
}
})
Expand Down
2 changes: 2 additions & 0 deletions man/fast_scraper.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit b6c8de9

Please sign in to comment.