From 55a5b339cc7374d1e2deb649ecee1b5746323089 Mon Sep 17 00:00:00 2001 From: Tan Ho Date: Sun, 29 May 2022 12:57:43 -0400 Subject: [PATCH] 0.0.5 - granular file type control in nflverse_save --- DESCRIPTION | 4 ++-- NAMESPACE | 1 + NEWS.md | 4 ++++ R/upload.R | 36 ++++++++++++++++++++++-------------- man/nflverse_archive.Rd | 14 ++++++++++++++ man/nflverse_save.Rd | 4 +++- 6 files changed, 46 insertions(+), 17 deletions(-) create mode 100644 man/nflverse_archive.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 5482ae0..00cfb83 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: nflversedata Title: nflverse data storage functions -Version: 0.0.4 +Version: 0.0.5 Authors@R: person("Tan", "Ho", , "tan@tanho.ca", role = c("aut", "cre"), comment = c(ORCID = "0000-0001-8388-5155")) @@ -28,4 +28,4 @@ Remotes: Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.1.2 +RoxygenNote: 7.2.0 diff --git a/NAMESPACE b/NAMESPACE index e44a47f..439be1e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,4 +1,5 @@ # Generated by roxygen2: do not edit by hand +export(nflverse_archive) export(nflverse_save) export(nflverse_upload) diff --git a/NEWS.md b/NEWS.md index 0aed5db..0357f6a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +# nflversedata 0.0.5 + +* Added file_types argument which takes a vector of file types and defaults to `c("csv","rds","qs","parquet")` + # nflversedata 0.0.4 * Added `nflverse_archive()` which automates archiving the rds version of every nflverse-data release asset and pushes it to https://github.com/nflverse/nflverse-data-archives diff --git a/R/upload.R b/R/upload.R index 8956cab..94404a7 100644 --- a/R/upload.R +++ b/R/upload.R @@ -40,6 +40,7 @@ update_release_timestamp <- function(tag){ #' @param nflverse_type metadata: name/information to add to data #' @param release_tag name of release to upload to #' @param .token a GitHub token, defaults to gh::gh_token() +#' @param file_types one or more of c("rds","csv","parquet","qs","csv.gz") #' #' @export nflverse_save <- function(data_frame, @@ -47,8 +48,8 @@ nflverse_save <- function(data_frame, nflverse_type, release_tag, .token = gh::gh_token(), - include_gz = FALSE - ){ + file_types = c("rds","csv","parquet","qs") +){ stopifnot( is.data.frame(data_frame), @@ -56,29 +57,36 @@ nflverse_save <- function(data_frame, is.character(nflverse_type), is.character(release_tag), is.character(.token), + is.character(file_types), length(file_name) == 1, length(nflverse_type) == 1, length(release_tag) == 1, - length(.token) == 1 + length(.token) == 1, + length(file_types) >= 1 ) attr(data_frame,"nflverse_type") <- nflverse_type attr(data_frame,"nflverse_timestamp") <- Sys.time() temp_dir <- tempdir(check = TRUE) + ft <- rlang::arg_match(file_types, + values = c("rds","csv","csv.gz","parquet","qs"), + multiple = TRUE) - saveRDS(data_frame,file.path(temp_dir,paste0(file_name,".rds"))) - data.table::fwrite(data_frame, file.path(temp_dir,paste0(file_name,".csv"))) - if(include_gz) data.table::fwrite(data_frame, file.path(temp_dir,paste0(file_name,".csv.gz"))) - arrow::write_parquet(data_frame, file.path(temp_dir, paste0(file_name,".parquet"))) - qs::qsave(data_frame, - file.path(temp_dir,paste0(file_name,".qs")), - preset = "custom", - algorithm = "zstd_stream", - compress_level = 22, - shuffle_control = 15) + if("rds" %in% ft) saveRDS(data_frame,file.path(temp_dir,paste0(file_name,".rds"))) + if("csv" %in% ft) data.table::fwrite(data_frame, file.path(temp_dir,paste0(file_name,".csv"))) + if("csv.gz" %in% ft) data.table::fwrite(data_frame, file.path(temp_dir,paste0(file_name,".csv.gz"))) + if("parquet" %in% ft) arrow::write_parquet(data_frame, file.path(temp_dir, paste0(file_name,".parquet"))) + if("qs" %in% ft){ + qs::qsave(data_frame, + file.path(temp_dir,paste0(file_name,".qs")), + preset = "custom", + algorithm = "zstd_stream", + compress_level = 22, + shuffle_control = 15) + } - .filetypes <- if(include_gz) c(".rds",".csv",".csv.gz",".parquet",".qs") else c(".rds",".csv",".parquet",".qs") + .filetypes <- paste0(".",ft) .file_names <- file.path(temp_dir, paste0(file_name,.filetypes)) diff --git a/man/nflverse_archive.Rd b/man/nflverse_archive.Rd new file mode 100644 index 0000000..1c31399 --- /dev/null +++ b/man/nflverse_archive.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/archive.R +\name{nflverse_archive} +\alias{nflverse_archive} +\title{Archive nflverse} +\usage{ +nflverse_archive(release_name) +} +\arguments{ +\item{release_name}{} +} +\description{ +Archive nflverse +} diff --git a/man/nflverse_save.Rd b/man/nflverse_save.Rd index 9d14117..2b82bfc 100644 --- a/man/nflverse_save.Rd +++ b/man/nflverse_save.Rd @@ -10,7 +10,7 @@ nflverse_save( nflverse_type, release_tag, .token = gh::gh_token(), - include_gz = FALSE + file_types = c("rds", "csv", "parquet", "qs") ) } \arguments{ @@ -23,6 +23,8 @@ nflverse_save( \item{release_tag}{name of release to upload to} \item{.token}{a GitHub token, defaults to gh::gh_token()} + +\item{file_types}{one or more of c("rds","csv","parquet","qs","csv.gz")} } \description{ This functions attaches nflverse attributes like type and timestamp, saves