diff --git a/DESCRIPTION b/DESCRIPTION index 762eb79..ac4beac 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: ffanalytics Type: Package Title: Scrape Data For Fantasy Football -Version: 3.0.0.0011 +Version: 3.1.0.0000 Authors@R: c(person("Dennis", "Andersen", email = "andersen.dennis@outlook.com", role = c("aut")), @@ -29,10 +29,11 @@ Imports: dplyr, rrapply Depends: - R (>= 2.10) + R (>= 4.10) RoxygenNote: 7.2.1 Collate: 'adp_functions.R' + 'caching_helpers.R' 'calc_projections.R' 'custom_scoring.R' 'ffanalytics.R' diff --git a/NAMESPACE b/NAMESPACE index 4de7385..9402da7 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -6,9 +6,11 @@ export(add_ecr) export(add_player_info) export(add_uncertainty) export(cbs_draft) +export(clear_ffanalytics_cache) export(custom_scoring) export(ffc_draft) export(get_adp) +export(list_ffanalytics_cache) export(mfl_draft) export(nfl_draft) export(projections_table) @@ -24,4 +26,5 @@ import(rrapply) import(rvest) import(tidyr) importFrom(data.table,fread) +importFrom(data.table,rbindlist) importFrom(readxl,read_xlsx) diff --git a/R/adp_functions.R b/R/adp_functions.R index 803d399..101963e 100644 --- a/R/adp_functions.R +++ b/R/adp_functions.R @@ -8,16 +8,26 @@ rts_draft <- function(metric = c("adp", "aav")){ metric = match.arg(tolower(metric), c("adp", "aav")) - is_aav = (metric == "aav") + obj_name = paste0("RTS ", toupper(metric)) + is_cached = obj_name %in% list_ffanalytics_cache(quiet = TRUE)$object - draft_url = "https://www.freedraftguide.com/football/adp-aav-provider.php?NUM=&STYLE=0&AAV=" - if(is_aav) { - draft_url = paste0(draft_url, "YES") - } + if(is_cached) { + rts_json = get_cached_object(sprintf("rts_%s.rds", metric)) + } else { + draft_url = "https://www.freedraftguide.com/football/adp-aav-provider.php?NUM=&STYLE=0&AAV=" + is_aav = (metric == "aav") - rts_json = httr2::request(draft_url) %>% - httr2::req_perform() %>% - httr2::resp_body_json() + if(is_aav) { + draft_url = paste0(draft_url, "YES") + } + + rts_json = httr2::request(draft_url) %>% + httr2::req_user_agent("ffanalytics R package (https://github.com/FantasyFootballAnalytics/ffanalytics)") %>% + httr2::req_perform() %>% + httr2::resp_body_json() + + cache_object(rts_json, sprintf("rts_%s.rds", metric)) + } dplyr::bind_rows(rts_json$player_list) %>% dplyr::rename(rts_id = player_id) %>% @@ -39,7 +49,15 @@ rts_draft <- function(metric = c("adp", "aav")){ #' This function scrapes ADP data from CBS Sports #' @return A \link{data.frame} with the results. #' @export -cbs_draft <- function(metric = "adp") { +cbs_draft = function(metric = "adp") { + + is_cached = "CBS ADP" %in% list_ffanalytics_cache(quiet = TRUE)$object + + if(is_cached) { + out_df = get_cached_object("cbs_adp.rds") + return(out_df) + } + draft_url <- "https://www.cbssports.com/fantasy/football/draft/averages/both/h2h/all" draft_page <- rvest::read_html(draft_url) @@ -48,13 +66,15 @@ cbs_draft <- function(metric = "adp") { rvest::html_elements("span.CellPlayerName--long > span > a") %>% rvest::html_attr("href") %>% dirname() %>% + dirname() %>% basename() - draft_page %>% + out_df = draft_page %>% rvest::html_element("#TableBase > div > div > table") %>% rvest::html_table() %>% - tidyr::extract(Player, c("player", "pos", "team"), - "\\n\\s+(.*?)\\n\\s+([A-Z]{1,3})\\s+([A-Z]{2,3})$") %>% + tidyr::extract( + Player, c("player", "pos", "team"), + "\\n\\s+(.*?)\\n\\s+([A-Z]{1,3})\\s+([A-Z]{2,3})") %>% dplyr::transmute( id = get_mfl_id(cbs_id, player_name = player, pos = pos, team = team), cbs_id = cbs_id, @@ -68,6 +88,9 @@ cbs_draft <- function(metric = "adp") { percent_drafted = Pct ) + cache_object(out_df, "cbs_adp.rds") + out_df + } #' Get ADP/AAV data from Yahoo @@ -80,65 +103,70 @@ cbs_draft <- function(metric = "adp") { yahoo_draft = function(metric = c("adp", "aav")) { metric = match.arg(tolower(metric), c("adp", "aav")) is_aav = (metric == "aav") + is_cached = "Yahoo ADP/AAV" %in% list_ffanalytics_cache(TRUE)$object - draft_url <- sprintf("https://football.fantasysports.yahoo.com/f1/draftanalysis?tab=%s&pos=ALL", - if(is_aav) "AD" else "SD") - html_session = rvest::session(draft_url) + if(is_aav) { + adp_aav_cols = c("aav", "projected_av", "percent_drafted") + } else { + adp_aav_cols = c("adp", "percent_drafted") + } - max_pages = 4 + (as.integer(is_aav) * 4) - l_yahoo = vector("list", max_pages) - i = 0 - while(i < max_pages) { - next_page = paste0(html_session$url, "&count=", i * 50) - i = i + 1 + if(is_cached) { + out_df = get_cached_object("yahoo_adp_aav.rds") %>% + dplyr::select(id, yahoo_id, player_name, team, pos, dplyr::all_of(adp_aav_cols)) + return(out_df) + } - html_page = html_session %>% - rvest::session_jump_to(next_page) %>% - rvest::read_html() + req_obj = request("https://pub-api-ro.fantasysports.yahoo.com/fantasy/v2/league/423.l.public;out=settings/players;position=ALL;start=0;count=200;sort=rank_season;search=;out=auction_values;out=expert_ranks;expert_ranks.rank_type=projected_season_remaining/draft_analysis;cut_types=diamond;slices=last7days?format=json_f") %>% + req_method("GET") %>% + req_headers( + Accept = "*/*", + Host = "pub-api-ro.fantasysports.yahoo.com", + Origin = "https://football.fantasysports.yahoo.com", + Connection = "keep-alive", + `Sec-Fetch-Dest` = "empty", + `Sec-Fetch-Mode` = "no-cors", + `Sec-Fetch-Site` = "none", + `Accept-Encoding` = "gzip, deflate, br", + `Accept-Language` = "en-US,en;q=0.9" + ) %>% + httr2::req_user_agent("ffanalytics R package (https://github.com/FantasyFootballAnalytics/ffanalytics)") %>% + httr2::req_perform() %>% + httr2::resp_body_json() - yahoo_id = html_page %>% - html_elements("table > tbody > tr > td > div > div > span > a") %>% - html_attr("data-ys-playerid") + player_data = req_obj$fantasy_content$league$players %>% + lapply(function(x) { + data.frame( + player_name = x$player$name$full, + yahoo_id = x$player$player_id, + team = x$player$editorial_team_abbr, + pos = x$player$eligible_positions[[1]]$position, + adp = x$player$draft_analysis$average_pick, + percent_drafted = x$player$draft_analysis$percent_drafted, + aav = x$player$draft_analysis$average_cost, + projected_av = x$player$projected_auction_value + ) + }) - yahoo_tbl = html_page %>% - rvest::html_element("table") %>% - rvest::html_table() %>% - dplyr::mutate(stats_id = yahoo_id) - l_yahoo[[i]] = yahoo_tbl - Sys.sleep(1) - } + out_df = data.table::rbindlist(player_data) %>% + dplyr::tibble() %>% + dplyr::mutate(dplyr::across(team:projected_av, ~ replace(.x, .x == "-", NA))) %>% + dplyr::mutate(dplyr::across(team:projected_av, ~ type.convert(.x, as.is = TRUE))) %>% + dplyr::mutate( + team = toupper(team), + team = ifelse(team %in% names(team_corrections), unlist(team_corrections)[team], team), + stats_id = yahoo_id, + id = get_mfl_id(stats_id, player_name = player_name, pos = pos, team = team), + percent_drafted = percent_drafted * 100 + ) - output_df = dplyr::bind_rows(l_yahoo) %>% - dplyr::mutate(Name = sapply(strsplit(Name, "\\s*\n\\s*"), `[`, 2)) %>% - tidyr::extract(Name, c("player", "team", "pos"), "(.*?)\\s+([A-Za-z]+)\\s+-\\s+([A-Z]+)$") - names(output_df) = gsub("[^[:alnum:]]$", "", names(output_df)) + cache_object(out_df, "yahoo_adp_aav.rds") + out_df %>% + dplyr::select(id, yahoo_id, player_name, team, pos, dplyr::all_of(adp_aav_cols)) - if(is_aav) { - output_df %>% - dplyr::transmute( - id = get_mfl_id(stats_id, player_name = player, pos = pos, team = team), - yahoo_id = stats_id, - player, - team, - pos, - aav = as.numeric(gsub("[^[:digit:].]*", "", `Avg Salary`)), - percent_drafted = as.numeric(sub("%", "", `Percent Drafted`)) - ) - } else { - output_df %>% - dplyr::transmute( - id = get_mfl_id(stats_id, player_name = player, pos = pos, team = team), - yahoo_id = stats_id, - player, - team, - pos, - adp = `Avg Pick`, - percent_drafted = as.numeric(sub("%", "", `Percent Drafted`, fixed = TRUE)) - ) - } } @@ -150,6 +178,13 @@ yahoo_draft = function(metric = c("adp", "aav")) { nfl_draft = function(metric = "adp") { year = get_scrape_year() + is_cached = "NFL ADP" %in% list_ffanalytics_cache(quiet = TRUE)$object + + if(is_cached) { + out_df = get_cached_object("nfl_adp.rds") + return(out_df) + } + nfl_url = paste0("https://fantasy.nfl.com/draftcenter/breakdown?leagueId=&offset=1&count=200&position=all&season=", year, "&sort=draftAveragePosition") @@ -162,22 +197,25 @@ nfl_draft = function(metric = "adp") { extract(X1, c("player", "pos", "team"), "(.*?)\\s+([A-Z]{2,3}).*?([A-Z]{2,3}).*") %>% rename(adp = X2, avg_round = X3, average_salary = X4) - nfl_ids = html_page %>% + nfl_id = html_page %>% html_elements("tbody > tr > td > div > a") %>% html_attr("href") %>% unique() %>% sub(".*playerId=", "", .) - nfl_table %>% - mutate(id = get_mfl_id(nfl_ids, player_name = player, pos = pos, team = team), - nfl_id = nfl_ids) + out_df = nfl_table %>% + mutate(id = get_mfl_id(nfl_id, player_name = player, pos = pos, team = team), + nfl_id = !!nfl_id) + + cache_object(out_df, "nfl_adp.rds") + out_df } #' Get ADP or AAV data from MyFantasyLeague #' #' This function scrapes ADP or AAV data from MyFantasyLeague. More details on -#' the API available at \link{https://api.myfantasyleague.com/2022/api_info?STATE=details} +#' the API available at \link{https://api.myfantasyleague.com/2023/api_info?STATE=details} #' @param metric Indicated whether to pull ADP (default) or AAV #' @param period Includes metric for drafts following this time-period #' @param format Scoring system for receptions @@ -194,9 +232,11 @@ mfl_draft = function(metric = c("adp", "aav"), is_keeper = c("No", "Keeper", "Rookie Only"), is_mock = c("No", "Mock", "All Leagues"), cutoff = 10) { + # Todo: clean up the way arguments are input - is_aav = (metric == "aav") metric = match.arg(tolower(metric), c("adp", "aav")) + is_aav = (metric == "aav") + period = match.arg(toupper(period), c("RECENT", "ALL", "DRAFT", "JUNE", "JULY", "AUG1", "AUG15", "START", "MID", "PLAYOFF")) fcount = match.arg(as.character(nteams), as.character(c(12, 8, 10, 14, 16))) format = match.arg(as.character(format), c("All Leagues", "PPR", "Std")) @@ -204,6 +244,24 @@ mfl_draft = function(metric = c("adp", "aav"), is_mock = match.arg(as.character(is_mock), c("No", "Mock", "All Leagues")) cutoff = as.integer(cutoff) + # Checking to see if default arguments are used / the result may be cached + is_cache_format = ( + period == "RECENT" + && fcount == "12" + && format == "All Leagues" + && is_keeper == "No" + && is_mock == "No" + && cutoff == "10" + ) + + obj_name = paste0("MFL ", toupper(metric)) + is_cached = obj_name %in% list_ffanalytics_cache(quiet = TRUE)$object + + if(is_cached && is_cache_format) { + out_df = get_cached_object(sprintf("mfl_%s.rds", metric)) + return(out_df) + } + format = switch(format, "All Leagues" = -1, "PPR" = 1, @@ -216,34 +274,37 @@ mfl_draft = function(metric = c("adp", "aav"), "All Leagues" = -1 ) - - if(is_aav) { url = sprintf("https://api.myfantasyleague.com/%d/export?TYPE=%s&PERIOD=%s&IS_PPR=%d&IS_KEEPER=%s&JSON=1", get_scrape_year(), metric, period, format, is_keeper) cols = setNames(c("id", "averageValue", "minValue", "maxValue", "auctionSelPct"), - c("id", "aav", "min_aav", "max_aav", "draft_percentage")) + c("id", "aav", "min_av", "max_av", "draft_percentage")) } else { url = sprintf("https://api.myfantasyleague.com/%d/export?TYPE=%s&PERIOD=%s&FCOUNT=%s&IS_PPR=%s&IS_KEEPER=%s&IS_MOCK=%s&CUTOFF=%d&DETAILS=&JSON=1", get_scrape_year(), metric, period, fcount, format, is_keeper, is_mock, cutoff) cols = setNames(c("id", "averagePick", "minPick", "maxPick", "draftSelPct"), - c("id", "adp", "min_adp", "max_adp", "draft_percentage")) + c("id", "adp", "min_dp", "max_dp", "draft_percentage")) } mfl_json = httr2::request(url) %>% + httr2::req_user_agent("ffanalytics R package (https://github.com/FantasyFootballAnalytics/ffanalytics)") %>% httr2::req_perform() %>% httr2::resp_body_json() - out = mfl_json[[metric]]$player %>% + out_df = mfl_json[[metric]]$player %>% dplyr::bind_rows() %>% dplyr::select(!!!cols) %>% type.convert(as.is = TRUE) %>% dplyr::mutate(id = as.character(id)) if(is_aav) { # $1000 split among N franchises (adjusted to ~$200 per team) - out$aav = out$aav * (200 / (1000 / as.integer(fcount))) + out_df$aav = out_df$aav * (200 / (1000 / as.integer(fcount))) } - out + + if(is_cache_format) { + cache_object(out_df, sprintf("mfl_%s.rds", metric)) + } + out_df } @@ -268,16 +329,31 @@ ffc_draft <- function(format= c("standard", "ppr", "half-ppr", "2qb", "dynasty", pos <- match.arg(pos, c("all", "qb", "rb", "wr", "te", "def", "pk")) n_teams <- match.arg(n_teams, c("12", "8", "10", "14")) + # Checking to see if default arguments are used / the result may be cached + is_cache_format = ( + n_teams == "12" + && format == "standard" + && pos == "all" + ) + + is_cached = "FFC ADP" %in% list_ffanalytics_cache(TRUE)$object + + if(is_cached && is_cache_format) { + out_df = get_cached_object("ffc_adp.rds") + return(out_df) + } + ffc_url <- paste0("https://fantasyfootballcalculator.com/api/v1/adp/", format, "?teams=", n_teams, "&year=", get_scrape_year(), "&position=", pos) ffc_json = ffc_url %>% httr2::request() %>% + httr2::req_user_agent("ffanalytics R package (https://github.com/FantasyFootballAnalytics/ffanalytics)") %>% httr2::req_perform() %>% httr2::resp_body_json(check_type = FALSE) - dplyr::bind_rows(ffc_json$players) %>% + out_df = dplyr::bind_rows(ffc_json$players) %>% dplyr::transmute( id = get_mfl_id(player_name = name, team = team, pos = position), ffc_id = player_id, @@ -286,6 +362,12 @@ ffc_draft <- function(format= c("standard", "ppr", "half-ppr", "2qb", "dynasty", team = team, adp ) + + if(is_cache_format) { + cache_object(out_df, "ffc_adp.rds") + } + out_df + } diff --git a/R/caching_helpers.R b/R/caching_helpers.R new file mode 100644 index 0000000..67947c1 --- /dev/null +++ b/R/caching_helpers.R @@ -0,0 +1,203 @@ + + + +# External ---- + +#' Manually clear cache associated with ffanalytics package +#' +#' Clears all scraped data cache associated with the ffanalytics package, or a +#' subset of objects from the cache +#' @param ffa_objects (\emph{optional}) a character vector of object names (i.e., +#' returned by \code{list_ffanalytics_cache()}) +#' @export +clear_ffanalytics_cache = function(ffa_objects = NULL) { + + ensure_cache_dir_exists() + cache_dir = tools::R_user_dir("ffanalytics", "cache") + current_file_names = list.files(cache_dir, full.names = TRUE) + + + if(!is.null(ffa_objects)) { + object_rds_names = setNames(names(cache_file_names), cache_file_names)[ffa_objects] + files_to_remove = basename(current_file_names) %in% object_rds_names + + if(any(files_to_remove, na.rm = TRUE)) { + file.remove(current_file_names[files_to_remove]) + if(sum(files_to_remove, na.rm = TRUE) != length(ffa_objects)) { + message("Note: Not all of the listed objects were removed\n\nUse `list_ffanalytics_cache()` to check object names") + } + } else { + message("Note: None of the listed objects were removed\n\nUse `list_ffanalytics_cache()` to see object names") + } + + } else { + file.remove(current_file_names) + } + invisible() +} + +#' Checks the scrapes that are currently cached +#' +#' Checks the cached data, removes cached objects >= 8 hours old, and returns a list +#' of objects that are currently cached +#' @param quiet whether the function should return a message if cache is empty +#' @return A \code{data.frame} with the object (scrape) name, and time since caching +#' @export +list_ffanalytics_cache = function(quiet = FALSE) { + cache_dir = tools::R_user_dir("ffanalytics", "cache") + clear_cache_by_time() + + current_file_names = list.files(cache_dir) + if(length(current_file_names) == 0L && !quiet) { + message("ffanalytics cache is empty") + } + file_mtimes = file.mtime(list.files(cache_dir, full.names = TRUE)) + file_order = order(file_mtimes, decreasing = TRUE) + secs_since_cache = as.numeric(difftime(Sys.time(), file_mtimes, units = "secs")) + + dplyr::tibble( + object = cache_file_names[current_file_names], + hr_min_since_cache = format(as.POSIXct(secs_since_cache, origin = '1970-01-01', tz = 'UTC'), '%H:%M') + )[file_order, ] +} + +# Internal ---- + +cache_object = function(object, file_name) { + ensure_cache_dir_exists() + clear_cache_by_time() + cache_dir = tools::R_user_dir("ffanalytics", "cache") + + current_file_names = list.files(cache_dir) + + if(!file_name %in% current_file_names) { + saveRDS( + object, + file.path(tools::R_user_dir("ffanalytics", "cache"), file_name) + ) + } +} + +get_cached_object = function(file_name) { + cache_dir = tools::R_user_dir("ffanalytics", "cache") + readRDS(file.path(cache_dir, file_name)) +} + +ensure_cache_dir_exists = function() { + cache_dir = tools::R_user_dir("ffanalytics", "cache") + + if(!file.exists(cache_dir)) { + dir.create(cache_dir, recursive = TRUE, showWarnings = FALSE) + } +} + +clear_cache_by_time = function() { + cache_dir = tools::R_user_dir("ffanalytics", "cache") + file_names = list.files(cache_dir, full.names = TRUE) + + if(length(file_names) == 0) { + return(NULL) + } + + file_mtimes = file.mtime(file_names) + files_to_clear = difftime(Sys.time(), file_mtimes, units = "hours") > 8 + + if(any(files_to_clear, na.rm = TRUE)) { + file.remove(file_names[files_to_clear]) + } +} + +cache_file_names = c( + "yahoo_adp_aav.rds" = "Yahoo ADP/AAV", + "cbs_adp.rds" = "CBS ADP", + "rts_adp.rds" = "RTS ADP", + "rts_aav.rds" = "RTS AAV", + "nfl_adp.rds" = "NFL ADP", + "mfl_adp.rds" = "MFL ADP", + "mfl_aav.rds" = "MFL AAV", + "ffc_adp.rds" = "FFC ADP", + "ecr_draft_overall_std.rds" = "ECR Draft Overall Std", + "ecr_draft_overall_half.rds" = "ECR Draft Overall Half", + "ecr_draft_overall_ppr.rds" = "ECR Draft Overall PPR", + "ecr_weekly_overall_std.rds" = "ECR Weekly Overall Std", + "ecr_weekly_overall_half.rds" = "ECR Weekly Overall Half", + "ecr_weekly_overall_ppr.rds" = "ECR Weekly Overall PPR", + "ecr_draft_qb_std.rds" = "ECR Draft QB Std", + "ecr_draft_qb_half.rds" = "ECR Draft QB Half", + "ecr_draft_qb_ppr.rds" = "ECR Draft QB PPR", + "ecr_weekly_qb_std.rds" = "ECR Weekly QB Std", + "ecr_weekly_qb_half.rds" = "ECR Weekly QB Half", + "ecr_weekly_qb_ppr.rds" = "ECR Weekly QB PPR", + "ecr_draft_rb_std.rds" = "ECR Draft RB Std", + "ecr_draft_rb_half.rds" = "ECR Draft RB Half", + "ecr_draft_rb_ppr.rds" = "ECR Draft RB PPR", + "ecr_weekly_rb_std.rds" = "ECR Weekly RB Std", + "ecr_weekly_rb_half.rds" = "ECR Weekly RB Half", + "ecr_weekly_rb_ppr.rds" = "ECR Weekly RB PPR", + "ecr_draft_wr_std.rds" = "ECR Draft WR Std", + "ecr_draft_wr_half.rds" = "ECR Draft WR Half", + "ecr_draft_wr_ppr.rds" = "ECR Draft WR PPR", + "ecr_weekly_wr_std.rds" = "ECR Weekly WR Std", + "ecr_weekly_wr_half.rds" = "ECR Weekly WR Half", + "ecr_weekly_wr_ppr.rds" = "ECR Weekly WR PPR", + "ecr_draft_te_std.rds" = "ECR Draft TE Std", + "ecr_draft_te_half.rds" = "ECR Draft TE Half", + "ecr_draft_te_ppr.rds" = "ECR Draft TE PPR", + "ecr_weekly_te_std.rds" = "ECR Weekly TE Std", + "ecr_weekly_te_half.rds" = "ECR Weekly TE Half", + "ecr_weekly_te_ppr.rds" = "ECR Weekly TE PPR", + "ecr_draft_k_std.rds" = "ECR Draft K Std", + "ecr_draft_k_half.rds" = "ECR Draft K Half", + "ecr_draft_k_ppr.rds" = "ECR Draft K PPR", + "ecr_weekly_k_std.rds" = "ECR Weekly K Std", + "ecr_weekly_k_half.rds" = "ECR Weekly K Half", + "ecr_weekly_k_ppr.rds" = "ECR Weekly K PPR", + "ecr_draft_superflex_std.rds" = "ECR Draft SUPERFLEX Std", + "ecr_draft_superflex_half.rds" = "ECR Draft SUPERFLEX Half", + "ecr_draft_superflex_ppr.rds" = "ECR Draft SUPERFLEX PPR", + "ecr_weekly_superflex_std.rds" = "ECR Weekly SUPERFLEX Std", + "ecr_weekly_superflex_half.rds" = "ECR Weekly SUPERFLEX Half", + "ecr_weekly_superflex_ppr.rds" = "ECR Weekly SUPERFLEX PPR", + "ecr_draft_dst_std.rds" = "ECR Draft DST Std", + "ecr_draft_dst_half.rds" = "ECR Draft DST Half", + "ecr_draft_dst_ppr.rds" = "ECR Draft DST PPR", + "ecr_weekly_dst_std.rds" = "ECR Weekly DST Std", + "ecr_weekly_dst_half.rds" = "ECR Weekly DST Half", + "ecr_weekly_dst_ppr.rds" = "ECR Weekly DST PPR", + "ecr_draft_idp_std.rds" = "ECR Draft IDP Std", + "ecr_draft_idp_half.rds" = "ECR Draft IDP Half", + "ecr_draft_idp_ppr.rds" = "ECR Draft IDP PPR", + "ecr_weekly_idp_std.rds" = "ECR Weekly IDP Std", + "ecr_weekly_idp_half.rds" = "ECR Weekly IDP Half", + "ecr_weekly_idp_ppr.rds" = "ECR Weekly IDP PPR", + "ecr_draft_dl_std.rds" = "ECR Draft DL Std", + "ecr_draft_dl_half.rds" = "ECR Draft DL Half", + "ecr_draft_dl_ppr.rds" = "ECR Draft DL PPR", + "ecr_weekly_dl_std.rds" = "ECR Weekly DL Std", + "ecr_weekly_dl_half.rds" = "ECR Weekly DL Half", + "ecr_weekly_dl_ppr.rds" = "ECR Weekly DL PPR", + "ecr_draft_lb_std.rds" = "ECR Draft LB Std", + "ecr_draft_lb_half.rds" = "ECR Draft LB Half", + "ecr_draft_lb_ppr.rds" = "ECR Draft LB PPR", + "ecr_weekly_lb_std.rds" = "ECR Weekly LB Std", + "ecr_weekly_lb_half.rds" = "ECR Weekly LB Half", + "ecr_weekly_lb_ppr.rds" = "ECR Weekly LB PPR", + "ecr_draft_db_std.rds" = "ECR Draft DB Std", + "ecr_draft_db_half.rds" = "ECR Draft DB Half", + "ecr_draft_db_ppr.rds" = "ECR Draft DB PPR", + "ecr_weekly_db_std.rds" = "ECR Weekly DB Std", + "ecr_weekly_db_half.rds" = "ECR Weekly DB Half", + "ecr_weekly_db_ppr.rds" = "ECR Weekly DB PPR" +) + + + + + + + + + + + + diff --git a/R/calc_projections.R b/R/calc_projections.R index 30b11cb..6465386 100644 --- a/R/calc_projections.R +++ b/R/calc_projections.R @@ -439,7 +439,7 @@ add_ecr <- function(projection_table){ lg_type <- attr(projection_table, "lg_type") season <- attr(projection_table, "season") week <- attr(projection_table, "week") - message("Scraping ECR data") + message("Scraping ECR data (w/ 2 second delay between pages if not cached)") if(week == 0) { rank_per = "draft" @@ -463,10 +463,19 @@ add_ecr <- function(projection_table){ scraped_ecr = vector("list", length(lg_type)) for(i in seq_along(lg_type)) { + cached_objects = names(list_ffanalytics_cache(TRUE)$object) + req_obj = paste0( + "ecr_", rank_per, "_", + tolower(names(lg_type)[i]), "_", + tolower(lg_type[i]), ".rds" + ) + if(!req_obj %in% cached_objects) { + Sys.sleep(2) + } scraped_ecr[[i]] = scrape_ecr(rank_period = rank_per, position = names(lg_type)[i], rank_type = lg_type[i]) - Sys.sleep(1) + } pos_ecr = dplyr::bind_rows(scraped_ecr) %>% dplyr::select(id, pos_ecr = avg, sd_ecr = std_dev) diff --git a/R/ffanalytics.R b/R/ffanalytics.R index 884a8b2..5ddfd6f 100644 --- a/R/ffanalytics.R +++ b/R/ffanalytics.R @@ -1,17 +1,27 @@ #' @import dplyr tidyr purrr httr2 rvest rrapply -#' @importFrom data.table fread +#' @importFrom data.table fread rbindlist #' @importFrom readxl read_xlsx .onLoad <- function(libname, pkgname){ - player_table <- data.table::fread("https://s3.us-east-2.amazonaws.com/ffanalytics/packagedata/player_table.csv", - colClasses = c("character", "character", "character", "character", "character", - "integer", "integer", "character", "integer", "integer", - "Date", "integer", "integer"), - col.names = c("id", "last_name", "first_name", "position", "team", "weight", - "draft_year", "draft_team", "draft_round", "draft_pick", "birthdate", - "age", "exp"), + player_table = data.table::fread("https://s3.us-east-2.amazonaws.com/ffanalytics/packagedata/player_table.csv", + colClasses = c("character", "character", "character", "character", "character", + "integer", "integer", "character", "integer", "integer", + "Date", "integer", "integer"), + col.names = c("id", "last_name", "first_name", "position", "team", "weight", + "draft_year", "draft_team", "draft_round", "draft_pick", "birthdate", + "age", "exp"), sep = ",", skip = 0, data.table = FALSE, showProgress = FALSE) - player_table <- dplyr::as_tibble(player_table) - environment(player_table) <- asNamespace("ffanalytics") + player_table = dplyr::tibble(player_table) + environment(player_table) = asNamespace("ffanalytics") assignInNamespace("player_table", player_table, ns = "ffanalytics") + +} + +.onAttach <- function(libname, pkgname) { + packageStartupMessage( + "Note: the ffanalytics package locally caches ADP & ECR data scrapes. Cached scrapes", + "\nolder than 8 hours are dropped (upon checking)", + "\n - See ?clear_ffanalytics_cache() for how to manually clear the cache", + "\n - Use list_ffanalytics_cache() to see what is currently cached" + ) } diff --git a/R/helper_funcs.R b/R/helper_funcs.R index 293521d..619a309 100644 --- a/R/helper_funcs.R +++ b/R/helper_funcs.R @@ -120,12 +120,10 @@ rename_vec = function(x, new_names, old_names = NULL) { x } - omit_NA = function(x) { x[!is.na(x)] } - row_sd = function(x, na.rm = FALSE) { if(is.data.frame(x)) { x = do.call(cbind, x) @@ -146,4 +144,35 @@ row_sd = function(x, na.rm = FALSE) { r_sd } +# Returns new player_id table +update_player_id_table = function(player_id_table = NULL, id_column, value) { + +} + +get_pos_src_from_scrape = function(data_result) { + data_by_pos_src = lapply(data_result, function(x) { + split(x, x$data_src) + }) + src_pos = stack(lapply(data_by_pos_src, names)) + split(as.character(src_pos$ind), src_pos$values) +} + +# TODO: This may be supersceeded by caching at the scrape level +extract_src_scrapes_from_scrape = function(data_result) { + pos_src = get_pos_src_from_scrape(data_result) + + lapply(setNames(names(pos_src), names(pos_src)), function(x) { + positions = setNames(pos_src[[x]], pos_src[[x]]) + lapply(positions, function(pos) { + data_result[[pos]][data_result[[pos]]$data_src == x,] + }) + }) +} + + + + + + + diff --git a/R/scrape_ecr.R b/R/scrape_ecr.R index ac746db..7bf47a3 100644 --- a/R/scrape_ecr.R +++ b/R/scrape_ecr.R @@ -10,6 +10,18 @@ scrape_ecr <- function(rank_period = c("draft", "weekly", "ros", "dynasty", "roo rank_type = match.arg(rank_type, c("Std", "PPR", "Half")) + is_cache_format = rank_period %in% c("draft", "weekly") + + obj_name = paste("ECR", tools::toTitleCase(rank_period), position, rank_type) + is_cached = obj_name %in% list_ffanalytics_cache(TRUE)$object + file_name = sprintf("ecr_%s_%s_%s.rds", rank_period, tolower(position), tolower(rank_type)) + + if(is_cached && is_cache_format) { + out_df = get_cached_object(file_name) + return(out_df) + } + + if (rank_period == "weekly" & any(position == "Overall")) { stop("Overall weekly ranks are not provided", call. = FALSE) } @@ -73,7 +85,7 @@ scrape_ecr <- function(rank_period = c("draft", "weekly", "ros", "dynasty", "roo # rank_tab = lapply(rank_tab, `[`, c("player_id", "rank_ave", "rank_std")) - bind_rows(rank_tab) %>% + out_df = bind_rows(rank_tab) %>% mutate(fantasypro_num_id = player_id) %>% transmute(id = get_mfl_id(fantasypro_num_id, player_name = player_name, team = player_team_id, pos = player_position_id), @@ -83,6 +95,11 @@ scrape_ecr <- function(rank_period = c("draft", "weekly", "ros", "dynasty", "roo ecr_min = as.integer(rank_min), ecr_max = as.integer(rank_max)) + if(is_cache_format) { + cache_object(out_df, file_name) + } + out_df + } diff --git a/R/source_scrapes.R b/R/source_scrapes.R index 8cd4067..fc11fb8 100644 --- a/R/source_scrapes.R +++ b/R/source_scrapes.R @@ -3,7 +3,6 @@ # CBS ---- scrape_cbs = function(pos = c("QB", "RB", "WR", "TE", "K", "DST"), season = NULL, week = NULL, draft = TRUE, weekly = TRUE) { - message("\nThe CBS scrape uses a 2 second delay between pages") if(is.null(season)) { season = get_scrape_year() @@ -18,6 +17,8 @@ scrape_cbs = function(pos = c("QB", "RB", "WR", "TE", "K", "DST"), season = NULL scrape_week = week } + message("\nThe CBS scrape uses a 2 second delay between pages") + base_link = paste0("https://www.cbssports.com/fantasy/football/") site_session = rvest::session(base_link) @@ -171,6 +172,7 @@ scrape_nfl = function(pos = c("QB", "RB", "WR", "TE", "K", "DST"), season = NULL "(.*?)\\s+\\b(QB|RB|WR|TE|K)\\b.*?([A-Z]{2,3})") } else { out_df$team = sub("\\s+DEF$", "", out_df$team) + out_df$pos = "DST" } if(pos %in% c("RB", "WR", "TE") && "pass_int" %in% names(out_df)) { @@ -192,12 +194,12 @@ scrape_nfl = function(pos = c("QB", "RB", "WR", "TE", "K", "DST"), season = NULL # Adding IDs out_df$id = get_mfl_id( out_df$nfl_id, - player_name = out_df$player, + player_name = if(pos == "DST") NULL else out_df$player, pos = out_df$pos, team = out_df$team ) out_df = out_df %>% - dplyr::select(id, src_id = nfl_id, player, pos, team, dplyr::everything()) + dplyr::select(id, src_id = nfl_id, any_of("player"), pos, team, dplyr::everything()) Sys.sleep(2L) # temporary, until I get an argument for honoring the crawl delay @@ -1191,7 +1193,6 @@ scrape_espn = function(pos = c("QB", "RB", "WR", "TE", "K", "DST"), season = NUL l_players[[i]]$position = pos } - # TODO: Adding MFL ID, type.convert, reorder columns out_df = dplyr::bind_rows(l_players) out_df$data_src = "ESPN" diff --git a/R/sysdata.rda b/R/sysdata.rda index 2c0029b..f2a9385 100644 Binary files a/R/sysdata.rda and b/R/sysdata.rda differ diff --git a/README.md b/README.md index 6b15501..b3e5d37 100644 --- a/README.md +++ b/README.md @@ -56,8 +56,8 @@ season the user would run: ``` my_scrape <- scrape_data(src = c("CBS", "NFL", "NumberFire"), pos = c("QB", "RB", "WR", "TE", "DST"), - season = 2022, - week = NULL) # NULL brings in the current week + season = NULL, # NULL grabs the current week + week = NULL) # NULL grabs the current week ``` `my_scrape` will be a list of tibbles, one for each position scraped, which contains diff --git a/data-raw/player_directories.R b/data-raw/player_directories.R index 04f0d65..f57b39f 100644 --- a/data-raw/player_directories.R +++ b/data-raw/player_directories.R @@ -29,15 +29,15 @@ cbs_data = lapply(cbs_links, function(page) { cols = unique(c(cols12, cols3)) - data.frame(player_names = basename(cols), - player_id = basename(dirname(cols)), + data.frame(player_names = basename(dirname(cols)), + player_id = basename(dirname(dirname(cols))), position = basename(page)) }) final_cbs = dplyr::bind_rows(cbs_data) %>% dplyr::transmute(cbs_id = player_id, id = get_mfl_id(player_name = player_names, pos = position), - merge_id = paste0(sub("\\-", "", player_names), "_", tolower(position))) + merge_id = paste0(gsub("\\(-)", "", player_names), "_", tolower(position))) #### FFToday Players #### ---- @@ -75,7 +75,7 @@ final_fft = dplyr::bind_rows(fft_data) %>% # Getting Players from last years stats # Getting links fp_pos = c("QB", "RB", "WR", "TE", "K", "DST", "DL", "LB", "DB") -last_year = 2021 +last_year = 2022 fp_lastyr_links = paste0("https://www.fantasypros.com/nfl/stats/", tolower(fp_pos), ".php?year=", last_year) @@ -233,7 +233,7 @@ rm(list = ls(pattern = "^fp_")) # testing new NFL method because they have not updated player ids on player page -season = 2022 +season = 2023 week = ffanalytics:::get_scrape_week() pos = c("QB", "RB", "WR", "TE", "K") @@ -386,7 +386,7 @@ final_flfl = final_flfl %>% fleaflicker_id) #### Yahoo ---- - +# TODO: update w/ new API html_session = rvest::session("https://football.fantasysports.yahoo.com/f1/draftanalysis?tab=AD&pos=ALL&sort=DA_AP") l_yahoo = list() @@ -431,6 +431,10 @@ final_yahoo = final_yahoo %>% merge_id, stats_id) +# Getting ESPN ID's + + + # Cleaning up above scrapes @@ -443,7 +447,7 @@ gc() curr_ids = ffanalytics:::player_ids -my_fl_ids = httr::GET("https://api.myfantasyleague.com/2022/export?TYPE=players&L=&APIKEY=&DETAILS=1&SINCE=&PLAYERS=&JSON=1") %>% +my_fl_ids = httr::GET("https://api.myfantasyleague.com/2023/export?TYPE=players&L=&APIKEY=&DETAILS=1&SINCE=&PLAYERS=&JSON=1") %>% httr::content() %>% `[[`("players") %>% `[[`("player") %>% diff --git a/man/clear_ffanalytics_cache.Rd b/man/clear_ffanalytics_cache.Rd new file mode 100644 index 0000000..9c0e7bb --- /dev/null +++ b/man/clear_ffanalytics_cache.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/caching_helpers.R +\name{clear_ffanalytics_cache} +\alias{clear_ffanalytics_cache} +\title{Manually clear cache associated with ffanalytics package} +\usage{ +clear_ffanalytics_cache(ffa_objects = NULL) +} +\arguments{ +\item{ffa_objects}{(\emph{optional}) a character vector of object names (i.e., +returned by \code{list_ffanalytics_cache()})} +} +\description{ +Clears all scraped data cache associated with the ffanalytics package, or a +subset of objects from the cache +} diff --git a/man/list_ffanalytics_cache.Rd b/man/list_ffanalytics_cache.Rd new file mode 100644 index 0000000..e801e31 --- /dev/null +++ b/man/list_ffanalytics_cache.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/caching_helpers.R +\name{list_ffanalytics_cache} +\alias{list_ffanalytics_cache} +\title{Checks the scrapes that are currently cached} +\usage{ +list_ffanalytics_cache(quiet = FALSE) +} +\arguments{ +\item{quiet}{whether the function should return a message if cache is empty} +} +\value{ +A \code{data.frame} with the object (scrape) name, and time since caching +} +\description{ +Checks the cached data, removes cached objects >= 8 hours old, and returns a list +of objects that are currently cached +} diff --git a/man/mfl_draft.Rd b/man/mfl_draft.Rd index 5a83647..a95f1fe 100644 --- a/man/mfl_draft.Rd +++ b/man/mfl_draft.Rd @@ -35,5 +35,5 @@ A \link{data.frame} with the results. } \description{ This function scrapes ADP or AAV data from MyFantasyLeague. More details on -the API available at \link{https://api.myfantasyleague.com/2022/api_info?STATE=details} +the API available at \link{https://api.myfantasyleague.com/2023/api_info?STATE=details} }