From 4d6b88c217b31b7444242ff027d2c08801cc7bc1 Mon Sep 17 00:00:00 2001 From: Collin Schwantes Date: Fri, 19 May 2023 08:56:45 -0600 Subject: [PATCH 1/5] improved functionality of key and prefix variables so that aws paths are easier to set --- DESCRIPTION | 2 +- R/utils-aws-upload.R | 63 +++++++++++++++++++++++++++++++----------- man/aws_s3_download.Rd | 3 +- man/aws_s3_upload.Rd | 18 ++++++------ 4 files changed, 60 insertions(+), 26 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index f09b106..d7231ea 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: containerTemplateUtils Title: Provides utility functions to the container-template repository -Version: 0.0.0.9003 +Version: 0.0.0.9004 Authors@R: c(person(given = "Collin", family = "Schwantes", diff --git a/R/utils-aws-upload.R b/R/utils-aws-upload.R index ff9d02f..083ef04 100644 --- a/R/utils-aws-upload.R +++ b/R/utils-aws-upload.R @@ -1,15 +1,20 @@ #' Upload files or folders to AWS #' -#' @param path The path to the file(s) or folder(s) to be uploaded -#' @param bucket The name of the bucket to be uploaded to -#' @param key The key or name for the file or folder to take in the bucket. -#' Should end with "/" for folders. Use "" to upload files in folder without +#' When uploading folders, the subdirectory structure will be preserved. To +#' upload files from a folder without preserving the directory structure, +#' pass a vector of file paths to the path arugment. +#' +#' +#' @param path String. The path to the file(s) or folder(s) to be uploaded +#' @param bucket String. The name of the bucket to be uploaded to +#' @param key String. The key or name for the file or folder to take in the bucket. +#' Should end with "/" for folders. Use "" (empty string) to upload files in folder without #' top-level folder. -#' @param prefix A prefix to prepend to the file or folder keys. Generally +#' @param prefix String. A prefix to prepend to the file or folder keys. Generally #' should end with "/" -#' @param check Whether to check if the exact file already exists in the bucket +#' @param check Logical. Whether to check if the exact file already exists in the bucket #' and skip uploading. Defaults to TRUE -#' @param error Whether error out if the file is missing, folder is empty, or +#' @param error Logical. Whether error out if the file is missing, folder is empty, or #' system environment variables are missing. Otherwise a message will print #' but an empty list will be returned. #' @param file_type String. Provide a file type from [mime::mimemap()] (e.g. "html","csv") @@ -53,22 +58,44 @@ aws_s3_upload <- function(path, bucket, key = basename(path), prefix = "", } - if (!file.exists(path)) { + + # Single path (directory or file) workflow + + file_check <- utils::file_test(op = "-f",path) + dir_check <- utils::file_test(op = "-d",path) + + # if neither the file nor the directory exist, break or warn + if (!file_check & !dir_check) { if (error) { - stop("File not found.") + err_msg <- glue::glue("Neither File nor Directory not found. Argument supplied + to path does not appear to exist. {path}") + stop(err_msg) } else { - message("No file found. No upload, returning empty list") + msg <- glue::glue("Neither File nor Directory not found. Argument supplied + to path does not appear to exist. {path} + + Returning an empty list.") + message(msg) return(list()) } } + + # single path workflow svc <- paws::s3() - # if the file exists and the dir does not, - if (file.exists(path) && !dir.exists(path)) { - out <- list(aws_s3_upload_single(path, paste0(prefix, key), bucket, check, svc, file_type)) - } else if (file.exists(path) && dir.exists(path)) { + # if path is a single file + if (file_check) { + out <- list(aws_s3_upload_single(path = path, + key = paste0(prefix, key), + bucket = bucket, + check = check, + svc = svc, + file_type = file_type)) + } + # if path is a directory + if(dir_check){ files <- list.files(path, recursive = TRUE, full.names = TRUE, all.files = TRUE) if (!length(files)) { @@ -81,8 +108,12 @@ aws_s3_upload <- function(path, bucket, key = basename(path), prefix = "", } # Create prefixed records (archive or branches) - keys <- paste0(prefix, gsub(paste0("^", basename(path)), key, files)) - keys <- gsub("/+", "/", keys) + #keys <- paste0(prefix, gsub(paste0("^", basename(path)), key, files)) + + file_paths <- gsub(paste0("^", basename(path)), "", files) + + keys <- sprintf("%s/%s/%s",prefix,key,file_paths) + keys <- gsub("/{2,}", "/", keys) ## correcting multiple slashes in key out <- mapply(aws_s3_upload_single, path = files, diff --git a/man/aws_s3_download.Rd b/man/aws_s3_download.Rd index 3d654d1..d96ab76 100644 --- a/man/aws_s3_download.Rd +++ b/man/aws_s3_download.Rd @@ -38,5 +38,6 @@ files \description{ Providing a key that is a folder and path that is a folder will result in the whole folder being copied to the path location. If you supply "" to key, -the whole bucket will be downloaded. +the whole bucket will be downloaded. Use \code{copy_s3_dir_structure} to copy the +bucket "directory" structure. } diff --git a/man/aws_s3_upload.Rd b/man/aws_s3_upload.Rd index dbfb8bd..3094c38 100644 --- a/man/aws_s3_upload.Rd +++ b/man/aws_s3_upload.Rd @@ -15,21 +15,21 @@ aws_s3_upload( ) } \arguments{ -\item{path}{The path to the file(s) or folder(s) to be uploaded} +\item{path}{String. The path to the file(s) or folder(s) to be uploaded} -\item{bucket}{The name of the bucket to be uploaded to} +\item{bucket}{String. The name of the bucket to be uploaded to} -\item{key}{The key or name for the file or folder to take in the bucket. -Should end with "/" for folders. Use "" to upload files in folder without +\item{key}{String. The key or name for the file or folder to take in the bucket. +Should end with "/" for folders. Use "" (empty string) to upload files in folder without top-level folder.} -\item{prefix}{A prefix to prepend to the file or folder keys. Generally +\item{prefix}{String. A prefix to prepend to the file or folder keys. Generally should end with "/"} -\item{check}{Whether to check if the exact file already exists in the bucket +\item{check}{Logical. Whether to check if the exact file already exists in the bucket and skip uploading. Defaults to TRUE} -\item{error}{Whether error out if the file is missing, folder is empty, or +\item{error}{Logical. Whether error out if the file is missing, folder is empty, or system environment variables are missing. Otherwise a message will print but an empty list will be returned.} @@ -41,5 +41,7 @@ A list, each element being having the key and etag (hash) of uploaded files } \description{ -Upload files or folders to AWS +When uploading folders, the subdirectory structure will be preserved. To +upload files from a folder without preserving the directory structure, +pass a vector of file paths to the path arugment. } From 91f6b61f8f999492770db3fa8f36dbe73ed3da11 Mon Sep 17 00:00:00 2001 From: Collin Schwantes Date: Fri, 19 May 2023 09:28:13 -0600 Subject: [PATCH 2/5] updated documentation --- R/utils-aws-upload.R | 5 ++++- man/aws_s3_upload.Rd | 6 +++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/R/utils-aws-upload.R b/R/utils-aws-upload.R index 083ef04..60d9767 100644 --- a/R/utils-aws-upload.R +++ b/R/utils-aws-upload.R @@ -2,7 +2,10 @@ #' #' When uploading folders, the subdirectory structure will be preserved. To #' upload files from a folder without preserving the directory structure, -#' pass a vector of file paths to the path arugment. +#' pass a vector of file paths to the path argument. +#' +#' If you would like the change the directory structure, pass in a vector of +#' file paths and a corresponding vector of keys. #' #' #' @param path String. The path to the file(s) or folder(s) to be uploaded diff --git a/man/aws_s3_upload.Rd b/man/aws_s3_upload.Rd index 3094c38..2839904 100644 --- a/man/aws_s3_upload.Rd +++ b/man/aws_s3_upload.Rd @@ -43,5 +43,9 @@ files \description{ When uploading folders, the subdirectory structure will be preserved. To upload files from a folder without preserving the directory structure, -pass a vector of file paths to the path arugment. +pass a vector of file paths to the path argument. +} +\details{ +If you would like the change the directory structure, pass in a vector of +file paths and a corresponding vector of keys. } From d50d565bb38a79b5fe8f034f6d10ba14d9219cba Mon Sep 17 00:00:00 2001 From: Collin Schwantes Date: Fri, 19 May 2023 11:44:48 -0600 Subject: [PATCH 3/5] change key precursor so that folder component is removed from file_paths --- R/utils-aws-upload.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/utils-aws-upload.R b/R/utils-aws-upload.R index 60d9767..5a725e5 100644 --- a/R/utils-aws-upload.R +++ b/R/utils-aws-upload.R @@ -113,7 +113,8 @@ aws_s3_upload <- function(path, bucket, key = basename(path), prefix = "", # Create prefixed records (archive or branches) #keys <- paste0(prefix, gsub(paste0("^", basename(path)), key, files)) - file_paths <- gsub(paste0("^", basename(path)), "", files) + ## drop the path argument from the key precursor + file_paths <- gsub(paste0("^", path), "", files) keys <- sprintf("%s/%s/%s",prefix,key,file_paths) keys <- gsub("/{2,}", "/", keys) ## correcting multiple slashes in key From 2d1d52d91c7846d4e78b75be9e3007f682b71b94 Mon Sep 17 00:00:00 2001 From: Collin Schwantes Date: Tue, 23 May 2023 10:40:12 -0600 Subject: [PATCH 4/5] updated function to place single files in directories and added examples --- R/utils-aws-upload.R | 147 ++++++++++++++++++++++++++++++++++++++++++- man/aws_s3_upload.Rd | 128 ++++++++++++++++++++++++++++++++++++- 2 files changed, 271 insertions(+), 4 deletions(-) diff --git a/R/utils-aws-upload.R b/R/utils-aws-upload.R index 5a725e5..0ca3465 100644 --- a/R/utils-aws-upload.R +++ b/R/utils-aws-upload.R @@ -10,8 +10,8 @@ #' #' @param path String. The path to the file(s) or folder(s) to be uploaded #' @param bucket String. The name of the bucket to be uploaded to -#' @param key String. The key or name for the file or folder to take in the bucket. -#' Should end with "/" for folders. Use "" (empty string) to upload files in folder without +#' @param key String. The "path" of the file(s) or folder(s) in the AWS bucket. +#' Should end with "/" for folders. Use "" (an empty string) to upload files in folder without #' top-level folder. #' @param prefix String. A prefix to prepend to the file or folder keys. Generally #' should end with "/" @@ -28,6 +28,129 @@ #' #' @export aws_s3_upload #' +#' @examples +#' \dontrun{ +#' +#' # Upload a single file to a specific location in the bucket. +#' # this will take the readme.md file and place in the exact location +#' # specified by the key and prefix. Notice the key ends in a file +#' # extension. +#' +#' containerTemplateUtils::aws_s3_upload(path = "README.md", +#' key = "test/key/param/readme.md", +#' error = TRUE, +#' bucket =Sys.getenv("AWS_BUCKET")) +#' +#' # A vector of paths with a matching vector of keys +#' # will also result in exact placement. +#' +#' paths <- list.files("R",full.names = TRUE ) +#' +#' file_names <- basename(paths) +#' +#' keys <- sprintf("%s/%s","example_dir",file_names) +#' +#' containerTemplateUtils::aws_s3_upload(path = paths, +#' key = keys, +#' error = TRUE, +#' bucket =Sys.getenv("AWS_BUCKET")) +#' +#' # aws keys will be "example_dir/" +#' +#' # Supplying a single file path and key with no file extension will +#' # result in the key being treated as a directory and the file being placed +#' # in that directory. +#' +#' containerTemplateUtils::aws_s3_upload(path = "R/utils-aws-upload.R", +#' key = "test/key/param", +#' error = TRUE, +#' bucket =Sys.getenv("AWS_BUCKET")) +#' +#' # aws key will be "test/key/param/utils-aws-upload.R" +#' +#' # Supplying a single file path and no key argument will result in the file +#' # being uploaded to the top level directory of the bucket. +#' +#' containerTemplateUtils::aws_s3_upload(path = "R/utils-aws-upload.R", +#' error = TRUE, +#' bucket =Sys.getenv("AWS_BUCKET")) +#' +#' # aws key will be "./utils-aws-upload.R" +#' +#' # If the path argument is a folder, the key argument should also be a folder. +#' # Files from the folder will be uploaded into that directory. +#' +#' containerTemplateUtils::aws_s3_upload(path = "R/", +#' key = "test/upload_folder/", +#' error = TRUE, +#' bucket =Sys.getenv("AWS_BUCKET")) +#' +#' # aws keys will be "test/upload_nested_folder/" +#' +#' # If the path argument is a folder with sub-directories, the structure of +#' # the sub-directories will be preserved. +#' +#' dir.create("example_with_sub_dirs") +#' dir.create("example_with_sub_dirs/sub_dir") +#' file.create("example_with_sub_dirs/sub_dir/test.txt") +#' +#' containerTemplateUtils::aws_s3_upload(path = "example_with_sub_dirs/", +#' key = "test/upload_nested_folder/", +#' error = TRUE, +#' bucket =Sys.getenv("AWS_BUCKET")) +#' +#' # aws key will be "test/upload_nested_folder/example_with_sub_dirs/sub_dir/test.txt" +#' +#' # If the path argument is a folder and no key argument is supplied, +#' # the local directory structure will be copied to the S3 bucket. +#' +#' containerTemplateUtils::aws_s3_upload(path = "example_with_sub_dirs/", +#' error = TRUE, +#' bucket =Sys.getenv("AWS_BUCKET")) +#' +#' # aws keys will be "R/" +#' +#' # If the path argument is a folder and key is an empty string, then only +#' # the files from the folder will be uploaded. +#' +#' containerTemplateUtils::aws_s3_upload(path = "R/", +#' key = "", +#' error = TRUE, +#' bucket =Sys.getenv("AWS_BUCKET")) +#' +#' # aws keys will be "./" +#' +#' # The prefix argument can be used to add a directory to the beginning of +#' # a path in the AWS bucket.This can be used with files or folders. +#' +#' containerTemplateUtils::aws_s3_upload(path = "R/", +#' key = "example_r_scripts", +#' error = TRUE, +#' prefix = "my_example_prefix", +#' bucket =Sys.getenv("AWS_BUCKET")) +#' +#' # aws keys will be "my_example_prefix/example_r_scripts/" +#' +#' # This can be useful if you're using version control +#' # systems like git and would like to organize files by branch +#' +#' library(gert) +#' git_prefix <- gert::git_branch() +#' +#' containerTemplateUtils::aws_s3_upload(path = "R/", +#' key = "", +#' error = TRUE, +#' prefix = git_prefix, +#' bucket =Sys.getenv("AWS_BUCKET")) +#' +#' # aws keys will be "/" +#' +#' +#' +#' } +#' +#' +#' aws_s3_upload <- function(path, bucket, key = basename(path), prefix = "", check = TRUE, error = FALSE, file_type = "guess") { @@ -90,6 +213,26 @@ aws_s3_upload <- function(path, bucket, key = basename(path), prefix = "", # if path is a single file if (file_check) { + + # check that the key has a file extension + + key_ext_check <- check_for_file_extension(key) + + if(!key_ext_check){ + + key_1 <- sprintf("%s/%s/%s",prefix,key, path) + key <- gsub("/{2,}", "/", key_1) ## correcting multiple slashes in key + + wrn_msg <- glue::glue( + "Path is a single file and key does not have a file extension. + Treating key as a subdirectory. + Path in AWS is: + {key}") + + warning(wrn_msg) + + } + out <- list(aws_s3_upload_single(path = path, key = paste0(prefix, key), bucket = bucket, diff --git a/man/aws_s3_upload.Rd b/man/aws_s3_upload.Rd index 2839904..e7aa8ed 100644 --- a/man/aws_s3_upload.Rd +++ b/man/aws_s3_upload.Rd @@ -19,8 +19,8 @@ aws_s3_upload( \item{bucket}{String. The name of the bucket to be uploaded to} -\item{key}{String. The key or name for the file or folder to take in the bucket. -Should end with "/" for folders. Use "" (empty string) to upload files in folder without +\item{key}{String. The "path" of the file(s) or folder(s) in the AWS bucket. +Should end with "/" for folders. Use "" (an empty string) to upload files in folder without top-level folder.} \item{prefix}{String. A prefix to prepend to the file or folder keys. Generally @@ -48,4 +48,128 @@ pass a vector of file paths to the path argument. \details{ If you would like the change the directory structure, pass in a vector of file paths and a corresponding vector of keys. +} +\examples{ +\dontrun{ + +# Upload a single file to a specific location in the bucket. +# this will take the readme.md file and place in the exact location +# specified by the key and prefix. Notice the key ends in a file +# extension. + +containerTemplateUtils::aws_s3_upload(path = "README.md", +key = "test/key/param/readme.md", +error = TRUE, +bucket =Sys.getenv("AWS_BUCKET")) + +# A vector of paths with a matching vector of keys +# will also result in exact placement. + +paths <- list.files("R",full.names = TRUE ) + +file_names <- basename(paths) + +keys <- sprintf("\%s/\%s","example_dir",file_names) + +containerTemplateUtils::aws_s3_upload(path = paths, +key = keys, +error = TRUE, +bucket =Sys.getenv("AWS_BUCKET")) + +# aws keys will be "example_dir/" + +# Supplying a single file path and key with no file extension will +# result in the key being treated as a directory and the file being placed +# in that directory. + +containerTemplateUtils::aws_s3_upload(path = "R/utils-aws-upload.R", + key = "test/key/param", + error = TRUE, + bucket =Sys.getenv("AWS_BUCKET")) + +# aws key will be "test/key/param/utils-aws-upload.R" + +# Supplying a single file path and no key argument will result in the file +# being uploaded to the top level directory of the bucket. + +containerTemplateUtils::aws_s3_upload(path = "R/utils-aws-upload.R", + error = TRUE, + bucket =Sys.getenv("AWS_BUCKET")) + +# aws key will be "./utils-aws-upload.R" + +# If the path argument is a folder, the key argument should also be a folder. +# Files from the folder will be uploaded into that directory. + +containerTemplateUtils::aws_s3_upload(path = "R/", + key = "test/upload_folder/", + error = TRUE, + bucket =Sys.getenv("AWS_BUCKET")) + +# aws keys will be "test/upload_nested_folder/" + +# If the path argument is a folder with sub-directories, the structure of +# the sub-directories will be preserved. + +dir.create("example_with_sub_dirs") +dir.create("example_with_sub_dirs/sub_dir") +file.create("example_with_sub_dirs/sub_dir/test.txt") + +containerTemplateUtils::aws_s3_upload(path = "example_with_sub_dirs/", + key = "test/upload_nested_folder/", + error = TRUE, + bucket =Sys.getenv("AWS_BUCKET")) + +# aws key will be "test/upload_nested_folder/example_with_sub_dirs/sub_dir/test.txt" + +# If the path argument is a folder and no key argument is supplied, +# the local directory structure will be copied to the S3 bucket. + +containerTemplateUtils::aws_s3_upload(path = "example_with_sub_dirs/", + error = TRUE, + bucket =Sys.getenv("AWS_BUCKET")) + +# aws keys will be "R/" + +# If the path argument is a folder and key is an empty string, then only +# the files from the folder will be uploaded. + +containerTemplateUtils::aws_s3_upload(path = "R/", + key = "", + error = TRUE, + bucket =Sys.getenv("AWS_BUCKET")) + +# aws keys will be "./" + +# The prefix argument can be used to add a directory to the beginning of +# a path in the AWS bucket.This can be used with files or folders. + +containerTemplateUtils::aws_s3_upload(path = "R/", + key = "example_r_scripts", + error = TRUE, + prefix = "my_example_prefix", + bucket =Sys.getenv("AWS_BUCKET")) + +# aws keys will be "my_example_prefix/example_r_scripts/" + +# This can be useful if you're using version control +# systems like git and would like to organize files by branch + +library(gert) +git_prefix <- gert::git_branch() + +containerTemplateUtils::aws_s3_upload(path = "R/", + key = "", + error = TRUE, + prefix = git_prefix, + bucket =Sys.getenv("AWS_BUCKET")) + +# aws keys will be "/" + + + +} + + + } From 187b3bd00bf6222f895513619ab1752a735ff5b9 Mon Sep 17 00:00:00 2001 From: Collin Schwantes Date: Tue, 23 May 2023 11:32:59 -0600 Subject: [PATCH 5/5] fixed a typo in an error message --- R/utils-aws-upload.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/utils-aws-upload.R b/R/utils-aws-upload.R index 0ca3465..25af5ac 100644 --- a/R/utils-aws-upload.R +++ b/R/utils-aws-upload.R @@ -193,11 +193,11 @@ aws_s3_upload <- function(path, bucket, key = basename(path), prefix = "", # if neither the file nor the directory exist, break or warn if (!file_check & !dir_check) { if (error) { - err_msg <- glue::glue("Neither File nor Directory not found. Argument supplied + err_msg <- glue::glue("Neither File nor Directory found. Argument supplied to path does not appear to exist. {path}") stop(err_msg) } else { - msg <- glue::glue("Neither File nor Directory not found. Argument supplied + msg <- glue::glue("Neither File nor Directory found. Argument supplied to path does not appear to exist. {path} Returning an empty list.")