Skip to content

Commit

Permalink
Merge pull request #23 from ecohealthalliance/enhancement/aws_upload
Browse files Browse the repository at this point in the history
improved functionality of key and prefix variables so that aws paths …
  • Loading branch information
collinschwantes authored May 23, 2023
2 parents c661a6a + 187b3bd commit 48c6ea4
Show file tree
Hide file tree
Showing 4 changed files with 335 additions and 26 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: containerTemplateUtils
Title: Provides utility functions to the container-template repository
Version: 0.0.0.9003
Version: 0.0.0.9004
Authors@R:
c(person(given = "Collin",
family = "Schwantes",
Expand Down
210 changes: 194 additions & 16 deletions R/utils-aws-upload.R
Original file line number Diff line number Diff line change
@@ -1,15 +1,23 @@
#' Upload files or folders to AWS
#'
#' @param path The path to the file(s) or folder(s) to be uploaded
#' @param bucket The name of the bucket to be uploaded to
#' @param key The key or name for the file or folder to take in the bucket.
#' Should end with "/" for folders. Use "" to upload files in folder without
#' When uploading folders, the subdirectory structure will be preserved. To
#' upload files from a folder without preserving the directory structure,
#' pass a vector of file paths to the path argument.
#'
#' If you would like the change the directory structure, pass in a vector of
#' file paths and a corresponding vector of keys.
#'
#'
#' @param path String. The path to the file(s) or folder(s) to be uploaded
#' @param bucket String. The name of the bucket to be uploaded to
#' @param key String. The "path" of the file(s) or folder(s) in the AWS bucket.
#' Should end with "/" for folders. Use "" (an empty string) to upload files in folder without
#' top-level folder.
#' @param prefix A prefix to prepend to the file or folder keys. Generally
#' @param prefix String. A prefix to prepend to the file or folder keys. Generally
#' should end with "/"
#' @param check Whether to check if the exact file already exists in the bucket
#' @param check Logical. Whether to check if the exact file already exists in the bucket
#' and skip uploading. Defaults to TRUE
#' @param error Whether error out if the file is missing, folder is empty, or
#' @param error Logical. Whether error out if the file is missing, folder is empty, or
#' system environment variables are missing. Otherwise a message will print
#' but an empty list will be returned.
#' @param file_type String. Provide a file type from [mime::mimemap()] (e.g. "html","csv")
Expand All @@ -20,6 +28,129 @@
#'
#' @export aws_s3_upload
#'
#' @examples
#' \dontrun{
#'
#' # Upload a single file to a specific location in the bucket.
#' # this will take the readme.md file and place in the exact location
#' # specified by the key and prefix. Notice the key ends in a file
#' # extension.
#'
#' containerTemplateUtils::aws_s3_upload(path = "README.md",
#' key = "test/key/param/readme.md",
#' error = TRUE,
#' bucket =Sys.getenv("AWS_BUCKET"))
#'
#' # A vector of paths with a matching vector of keys
#' # will also result in exact placement.
#'
#' paths <- list.files("R",full.names = TRUE )
#'
#' file_names <- basename(paths)
#'
#' keys <- sprintf("%s/%s","example_dir",file_names)
#'
#' containerTemplateUtils::aws_s3_upload(path = paths,
#' key = keys,
#' error = TRUE,
#' bucket =Sys.getenv("AWS_BUCKET"))
#'
#' # aws keys will be "example_dir/<file_name>"
#'
#' # Supplying a single file path and key with no file extension will
#' # result in the key being treated as a directory and the file being placed
#' # in that directory.
#'
#' containerTemplateUtils::aws_s3_upload(path = "R/utils-aws-upload.R",
#' key = "test/key/param",
#' error = TRUE,
#' bucket =Sys.getenv("AWS_BUCKET"))
#'
#' # aws key will be "test/key/param/utils-aws-upload.R"
#'
#' # Supplying a single file path and no key argument will result in the file
#' # being uploaded to the top level directory of the bucket.
#'
#' containerTemplateUtils::aws_s3_upload(path = "R/utils-aws-upload.R",
#' error = TRUE,
#' bucket =Sys.getenv("AWS_BUCKET"))
#'
#' # aws key will be "./utils-aws-upload.R"
#'
#' # If the path argument is a folder, the key argument should also be a folder.
#' # Files from the folder will be uploaded into that directory.
#'
#' containerTemplateUtils::aws_s3_upload(path = "R/",
#' key = "test/upload_folder/",
#' error = TRUE,
#' bucket =Sys.getenv("AWS_BUCKET"))
#'
#' # aws keys will be "test/upload_nested_folder/<files from R/>"
#'
#' # If the path argument is a folder with sub-directories, the structure of
#' # the sub-directories will be preserved.
#'
#' dir.create("example_with_sub_dirs")
#' dir.create("example_with_sub_dirs/sub_dir")
#' file.create("example_with_sub_dirs/sub_dir/test.txt")
#'
#' containerTemplateUtils::aws_s3_upload(path = "example_with_sub_dirs/",
#' key = "test/upload_nested_folder/",
#' error = TRUE,
#' bucket =Sys.getenv("AWS_BUCKET"))
#'
#' # aws key will be "test/upload_nested_folder/example_with_sub_dirs/sub_dir/test.txt"
#'
#' # If the path argument is a folder and no key argument is supplied,
#' # the local directory structure will be copied to the S3 bucket.
#'
#' containerTemplateUtils::aws_s3_upload(path = "example_with_sub_dirs/",
#' error = TRUE,
#' bucket =Sys.getenv("AWS_BUCKET"))
#'
#' # aws keys will be "R/<files from R/>"
#'
#' # If the path argument is a folder and key is an empty string, then only
#' # the files from the folder will be uploaded.
#'
#' containerTemplateUtils::aws_s3_upload(path = "R/",
#' key = "",
#' error = TRUE,
#' bucket =Sys.getenv("AWS_BUCKET"))
#'
#' # aws keys will be "./<files from R/>"
#'
#' # The prefix argument can be used to add a directory to the beginning of
#' # a path in the AWS bucket.This can be used with files or folders.
#'
#' containerTemplateUtils::aws_s3_upload(path = "R/",
#' key = "example_r_scripts",
#' error = TRUE,
#' prefix = "my_example_prefix",
#' bucket =Sys.getenv("AWS_BUCKET"))
#'
#' # aws keys will be "my_example_prefix/example_r_scripts/<files from R/>"
#'
#' # This can be useful if you're using version control
#' # systems like git and would like to organize files by branch
#'
#' library(gert)
#' git_prefix <- gert::git_branch()
#'
#' containerTemplateUtils::aws_s3_upload(path = "R/",
#' key = "",
#' error = TRUE,
#' prefix = git_prefix,
#' bucket =Sys.getenv("AWS_BUCKET"))
#'
#' # aws keys will be "<current GIT branch>/<files from R/>"
#'
#'
#'
#' }
#'
#'
#'
aws_s3_upload <- function(path, bucket, key = basename(path), prefix = "",
check = TRUE, error = FALSE, file_type = "guess") {

Expand Down Expand Up @@ -53,22 +184,64 @@ aws_s3_upload <- function(path, bucket, key = basename(path), prefix = "",

}

if (!file.exists(path)) {

# Single path (directory or file) workflow

file_check <- utils::file_test(op = "-f",path)
dir_check <- utils::file_test(op = "-d",path)

# if neither the file nor the directory exist, break or warn
if (!file_check & !dir_check) {
if (error) {
stop("File not found.")
err_msg <- glue::glue("Neither File nor Directory found. Argument supplied
to path does not appear to exist. {path}")
stop(err_msg)
} else {
message("No file found. No upload, returning empty list")
msg <- glue::glue("Neither File nor Directory found. Argument supplied
to path does not appear to exist. {path}
Returning an empty list.")
message(msg)
return(list())
}
}



# single path workflow
svc <- paws::s3()

# if the file exists and the dir does not,
if (file.exists(path) && !dir.exists(path)) {
out <- list(aws_s3_upload_single(path, paste0(prefix, key), bucket, check, svc, file_type))
} else if (file.exists(path) && dir.exists(path)) {
# if path is a single file
if (file_check) {

# check that the key has a file extension

key_ext_check <- check_for_file_extension(key)

if(!key_ext_check){

key_1 <- sprintf("%s/%s/%s",prefix,key, path)
key <- gsub("/{2,}", "/", key_1) ## correcting multiple slashes in key

wrn_msg <- glue::glue(
"Path is a single file and key does not have a file extension.
Treating key as a subdirectory.
Path in AWS is:
{key}")

warning(wrn_msg)

}

out <- list(aws_s3_upload_single(path = path,
key = paste0(prefix, key),
bucket = bucket,
check = check,
svc = svc,
file_type = file_type))
}
# if path is a directory
if(dir_check){
files <- list.files(path, recursive = TRUE, full.names = TRUE, all.files = TRUE)

if (!length(files)) {
Expand All @@ -81,8 +254,13 @@ aws_s3_upload <- function(path, bucket, key = basename(path), prefix = "",
}

# Create prefixed records (archive or branches)
keys <- paste0(prefix, gsub(paste0("^", basename(path)), key, files))
keys <- gsub("/+", "/", keys)
#keys <- paste0(prefix, gsub(paste0("^", basename(path)), key, files))

## drop the path argument from the key precursor
file_paths <- gsub(paste0("^", path), "", files)

keys <- sprintf("%s/%s/%s",prefix,key,file_paths)
keys <- gsub("/{2,}", "/", keys) ## correcting multiple slashes in key

out <- mapply(aws_s3_upload_single,
path = files,
Expand Down
3 changes: 2 additions & 1 deletion man/aws_s3_download.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 48c6ea4

Please sign in to comment.