From 9e3237a9bb1f3b5450238a7d38c6325ad556f085 Mon Sep 17 00:00:00 2001 From: Tan Ho <38083823+tanho63@users.noreply.github.com> Date: Fri, 6 May 2022 13:41:32 -0400 Subject: [PATCH] nflverse_save (#3) * nflverse_save * Update DESCRIPTION * add include_gz param --- DESCRIPTION | 9 +++-- NAMESPACE | 1 + NEWS.md | 4 ++ R/upload.R | 83 ++++++++++++++++++++++++++++++++---------- man/nflverse_save.Rd | 31 ++++++++++++++++ man/nflverse_upload.Rd | 4 +- nflverse-data.Rproj | 1 + 7 files changed, 110 insertions(+), 23 deletions(-) create mode 100644 man/nflverse_save.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 226c92a..482a595 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: nflversedata Title: nflverse data storage functions -Version: 0.0.2 +Version: 0.0.3 Authors@R: person("Tan", "Ho", , "tan@tanho.ca", role = c("aut", "cre"), comment = c(ORCID = "0000-0001-8388-5155")) @@ -12,12 +12,15 @@ BugReports: https://github.com/nflverse/nflverse-data/issues Depends: R (>= 4.1.0) Imports: - cli, + cli (>= 3.2.0), gh (>= 1.3.0), glue (>= 1.5.0), httr (>= 1.4.0), jsonlite (>= 1.7.0), - piggyback (>= 0.1.1) + piggyback (>= 0.1.2), + arrow (>= 5.0.0), + qs (>= 0.25.1), + data.table (>= 1.14.0) Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) diff --git a/NAMESPACE b/NAMESPACE index 7a6ed2b..e44a47f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,3 +1,4 @@ # Generated by roxygen2: do not edit by hand +export(nflverse_save) export(nflverse_upload) diff --git a/NEWS.md b/NEWS.md index 37d8d1b..f9ba94a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +# nflversedata 0.0.3 + +* Added `nflverse_save()` which automates saving to the four main supported file formats (csv, rds, parquet, qs) and uploading to a specified release tag. + # nflversedata 0.0.2 * Added a `NEWS.md` file to track changes to the package. diff --git a/R/upload.R b/R/upload.R index 8e3584e..09bb422 100644 --- a/R/upload.R +++ b/R/upload.R @@ -10,32 +10,77 @@ nflverse_upload <- function(files, tag, ...){ # upload files piggyback::pb_upload(files, repo = "nflverse/nflverse-data", tag = tag, ...) update_release_timestamp(tag) - cli::cli_alert("Uploaded {length(files)} to nflverse/nflverse-data @ {tag} on {Sys.time()}") } update_release_timestamp <- function(tag){ - x <- tempdir(check = TRUE) - on.exit(file.remove(file.path(x,"timestamp.txt")), add = TRUE) - - update_time <- format(Sys.time(),tz = "America/Toronto",usetz = TRUE) - writeLines(update_time, file.path(x,"timestamp.txt")) + temp_dir <- tempdir(check = TRUE) - list(last_updated = update_time) |> jsonlite::toJSON(auto_unbox = TRUE) |> writeLines(file.path(x,"timestamp.json")) + update_time <- format(Sys.time(), tz = "America/Toronto", usetz = TRUE) + writeLines(update_time, file.path(temp_dir, "timestamp.txt")) - piggyback::pb_upload(file.path(x,"timestamp.txt"), repo = "nflverse/nflverse-data", tag = tag, overwrite = TRUE) - piggyback::pb_upload(file.path(x,"timestamp.json"), repo = "nflverse/nflverse-data", tag = tag, overwrite = TRUE) + list(last_updated = update_time) |> + jsonlite::toJSON(auto_unbox = TRUE) |> + writeLines(file.path(temp_dir,"timestamp.json")) - # current_release <- httr::GET(glue::glue("https://api.github.com/repos/nflverse/nflverse-data/releases/tags/{tag}")) |> - # httr::content() - # - # current_body <- current_release$body - # - # new_body <- gsub("Last Updated: .*$", "",x = current_body) |> paste0("Last Updated: ",update_time) - # - # update_result <- httr::PATCH(glue::glue("https://api.github.com/repos/nflverse/nflverse-data/releases/{current_release$id}"), - # httr::add_headers(Authorization = paste("token",gh::gh_token())), - # body = jsonlite::toJSON(list(body = new_body),auto_unbox = TRUE)) + piggyback::pb_upload(file.path(temp_dir,"timestamp.txt"), repo = "nflverse/nflverse-data", tag = tag, overwrite = TRUE) + piggyback::pb_upload(file.path(temp_dir,"timestamp.json"), repo = "nflverse/nflverse-data", tag = tag, overwrite = TRUE) invisible(NULL) } + +#' Save files to nflverse release +#' +#' This functions attaches nflverse attributes like type and timestamp, saves +#' data to a temporary directory in all four of csv, rds, parquet, and qs formats, +#' and then uploads to nflverse-data repository for a specified release tag. +#' +#' @param data_frame data_frame to save +#' @param file_name file_name to upload as, without the file extension +#' @param nflverse_type metadata: name/information to add to data +#' @param release_tag name of release to upload to +#' @param gh_token a GitHub token, defaults to gh::gh_token() +#' +#' @export +nflverse_save <- function(data_frame, + file_name, + nflverse_type, + release_tag, + .token = gh::gh_token(), + include_gz = FALSE + ){ + + stopifnot( + is.data.frame(data_frame), + is.character(file_name), + is.character(nflverse_type), + is.character(release_tag), + is.character(gh_token), + length(file_name) == 1, + length(nflverse_type) == 1, + length(release_tag) == 1, + length(gh_token) == 1 + ) + + attr(data_frame,"nflverse_type") <- nflverse_type + attr(data_frame,"nflverse_data") <- Sys.time() + + temp_dir <- tempdir(check = TRUE) + + saveRDS(data_frame,file.path(temp_dir,paste0(file_name,".rds"))) + data.table::fwrite(data_frame, file.path(temp_dir,paste0(file_name,".csv"))) + if(include_gz) data.table::fwrite(data_frame, file.path(temp_dir,paste0(file_name,".csv.gz"))) + arrow::write_parquet(data_frame, file.path(temp_dir, paste0(file_name,".parquet"))) + qs::qsave(data_frame, + file.path(temp_dir,paste0(file_name,".qs")), + preset = "custom", + algorithm = "zstd_stream", + compress_level = 22, + shuffle_control = 15) + + .filetypes <- if(include_gz) c(".rds",".csv",".csv.gz",".parquet",".qs") else c(".rds",".csv",".parquet",".qs") + + .file_names <- file.path(temp_dir, paste0(file_name,.filetypes)) + + nflverse_upload(.file_names,tag = release_tag, .token = .token) +} diff --git a/man/nflverse_save.Rd b/man/nflverse_save.Rd new file mode 100644 index 0000000..4bc260b --- /dev/null +++ b/man/nflverse_save.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/upload.R +\name{nflverse_save} +\alias{nflverse_save} +\title{Save files to nflverse release} +\usage{ +nflverse_save( + data_frame, + file_name, + nflverse_type, + release_tag, + .token = gh::gh_token(), + include_gz = FALSE +) +} +\arguments{ +\item{data_frame}{data_frame to save} + +\item{file_name}{file_name to upload as, without the file extension} + +\item{nflverse_type}{metadata: name/information to add to data} + +\item{release_tag}{name of release to upload to} + +\item{gh_token}{a GitHub token, defaults to gh::gh_token()} +} +\description{ +This functions attaches nflverse attributes like type and timestamp, saves +data to a temporary directory in all four of csv, rds, parquet, and qs formats, +and then uploads to nflverse-data repository for a specified release tag. +} diff --git a/man/nflverse_upload.Rd b/man/nflverse_upload.Rd index 1336eb1..75948c6 100644 --- a/man/nflverse_upload.Rd +++ b/man/nflverse_upload.Rd @@ -4,12 +4,14 @@ \alias{nflverse_upload} \title{Upload to nflverse release} \usage{ -nflverse_upload(files, tag) +nflverse_upload(files, tag, ...) } \arguments{ \item{files}{vector of filepaths to upload} \item{tag}{release name} + +\item{...}{other args passed to \code{piggyback::pb_upload()}} } \description{ Upload to nflverse release diff --git a/nflverse-data.Rproj b/nflverse-data.Rproj index 6a3ede2..69fafd4 100644 --- a/nflverse-data.Rproj +++ b/nflverse-data.Rproj @@ -19,3 +19,4 @@ LineEndingConversion: Posix BuildType: Package PackageUseDevtools: Yes PackageInstallArgs: --no-multiarch --with-keep.source +PackageRoxygenize: rd,collate,namespace