Skip to content

Commit

Permalink
separate scripts for aws transfer in parallel
Browse files Browse the repository at this point in the history
  • Loading branch information
emmamendelsohn committed Oct 10, 2023
1 parent 4821e08 commit a68163c
Show file tree
Hide file tree
Showing 11 changed files with 512 additions and 159 deletions.
432 changes: 309 additions & 123 deletions renv/activate.R

Large diffs are not rendered by default.

19 changes: 19 additions & 0 deletions scripts/download-aws-s3-ecmwf-forecasts-dataset.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
suppressPackageStartupMessages(source("packages.R"))
library(furrr)

aws_bucket <- Sys.getenv("AWS_BUCKET_ID")

message("downloading ecmwf forecasts dataset")

ecmwf_forecasts_dataset <- tar_read(ecmwf_forecasts_dataset)

# PARALLEL set number of workers
n_workers <- length(ecmwf_forecasts_dataset)

plan(multisession, workers = n_workers)
future_map(ecmwf_forecasts_dataset, function(x){
aws_s3_download(path = x,
bucket = aws_bucket ,
key = x,
check = TRUE)
})
19 changes: 19 additions & 0 deletions scripts/download-aws-s3-ecmwf-forecasts-raw.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
suppressPackageStartupMessages(source("packages.R"))
library(furrr)

aws_bucket <- Sys.getenv("AWS_BUCKET_ID")

message("downloading ecmwf forecasts raw")

ecmwf_forecasts_downloaded <- tar_read(ecmwf_forecasts_downloaded)

# PARALLEL set number of workers
n_workers <- length(ecmwf_forecasts_downloaded)

plan(multisession, workers = n_workers)
future_map(ecmwf_forecasts_downloaded, function(x){
aws_s3_download(path = x,
bucket = aws_bucket,
key = x,
check = TRUE)
})
19 changes: 19 additions & 0 deletions scripts/download-aws-s3-modis-ndvi-dataset.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
suppressPackageStartupMessages(source("packages.R"))
library(furrr)

aws_bucket <- Sys.getenv("AWS_BUCKET_ID")

message("downloading modis ndvi dataset")

modis_ndvi_dataset <- tar_read(modis_ndvi_dataset)

# PARALLEL set number of workers
n_workers <- 40

plan(multisession, workers = n_workers)
future_map(modis_ndvi_dataset, function(x){
aws_s3_download(path = x,
bucket = aws_bucket ,
key = x,
check = TRUE)
})
19 changes: 19 additions & 0 deletions scripts/download-aws-s3-modis-ndvi-raw.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
suppressPackageStartupMessages(source("packages.R"))
library(furrr)

aws_bucket <- Sys.getenv("AWS_BUCKET_ID")

message("downloading modis ndvi raw")

modis_ndvi_downloaded <- tar_read(modis_ndvi_downloaded)

# PARALLEL set number of workers
n_workers <- 40

plan(multisession, workers = n_workers)
future_map(modis_ndvi_downloaded, function(x){
aws_s3_download(path = x,
bucket = aws_bucket,
key = x,
check = TRUE)
})
19 changes: 19 additions & 0 deletions scripts/download-aws-s3-nasa-weather-dataset.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
suppressPackageStartupMessages(source("packages.R"))
library(furrr)

aws_bucket <- Sys.getenv("AWS_BUCKET_ID")

message("downloading nasa weather dataset")

nasa_weather_dataset <- tar_read(nasa_weather_dataset)

# PARALLEL set number of workers
n_workers <- length(nasa_weather_dataset)

plan(multisession, workers = n_workers)
future_map(nasa_weather_dataset, function(x){
aws_s3_download(path = x,
bucket = aws_bucket ,
key = x,
check = TRUE)
})
19 changes: 19 additions & 0 deletions scripts/download-aws-s3-nasa-weather-raw.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
suppressPackageStartupMessages(source("packages.R"))
library(furrr)

aws_bucket <- Sys.getenv("AWS_BUCKET_ID")

message("downloading nasa weather raw")

nasa_weather_downloaded <- tar_read(nasa_weather_downloaded)

# PARALLEL set number of workers
n_workers <- 40

plan(multisession, workers = n_workers)
future_map(nasa_weather_downloaded, function(x){
aws_s3_download(path = x,
bucket = aws_bucket,
key = x,
check = TRUE)
})
19 changes: 19 additions & 0 deletions scripts/download-aws-s3-sentinel-ndvi-dataset.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
suppressPackageStartupMessages(source("packages.R"))
library(furrr)

aws_bucket <- Sys.getenv("AWS_BUCKET_ID")

message("downloading sentinel ndvi dataset")

sentinel_ndvi_dataset <- tar_read(sentinel_ndvi_dataset)

# PARALLEL set number of workers
n_workers <- 40

plan(multisession, workers = n_workers)
future_map(sentinel_ndvi_dataset, function(x){
aws_s3_download(path = x,
bucket = aws_bucket ,
key = x,
check = TRUE)
})
19 changes: 19 additions & 0 deletions scripts/download-aws-s3-sentinel-ndvi-raw.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
suppressPackageStartupMessages(source("packages.R"))
library(furrr)

aws_bucket <- Sys.getenv("AWS_BUCKET_ID")

message("downloading sentinel ndvi raw")

sentinel_ndvi_downloaded <- tar_read(sentinel_ndvi_downloaded)

# PARALLEL set number of workers
n_workers <- 40

plan(multisession, workers = n_workers)
future_map(sentinel_ndvi_downloaded, function(x){
aws_s3_download(path = x,
bucket = aws_bucket,
key = x,
check = TRUE)
})
36 changes: 0 additions & 36 deletions scripts/download-aws-s3.R

This file was deleted.

51 changes: 51 additions & 0 deletions scripts/scrap/download-aws-s3.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
suppressPackageStartupMessages(source("packages.R"))
library(furrr)
aws_bucket <- Sys.getenv("AWS_BUCKET_ID")

# Sentinel NDVI -----------------------------------------------------------
# sentinel_ndvi_directory_raw <- tar_read(sentinel_ndvi_directory_raw)
# aws_s3_download(path = sentinel_ndvi_directory_raw,
# bucket = aws_bucket ,
# key = sentinel_ndvi_directory_raw,
# check = TRUE)

# sentinel_ndvi_directory_dataset <- tar_read(sentinel_ndvi_directory_dataset)
# aws_s3_download(path = sentinel_ndvi_directory_dataset,
# bucket = aws_bucket ,
# key = sentinel_ndvi_directory_dataset,
# check = TRUE)

# Modis NDVI -----------------------------------------------------------
# modis_ndvi_directory <- tar_read(modis_ndvi_directory)
# aws_s3_download(path = modis_ndvi_directory,
# bucket = aws_bucket ,
# key = paste0("open-rvfcast/", modis_ndvi_directory),
# check = TRUE)

# NASA Weather ------------------------------------------------------------
# nasa_weather_directory_raw <- tar_read(nasa_weather_directory_raw)
# aws_s3_download(path = nasa_weather_directory_raw,
# bucket = aws_bucket ,
# key = nasa_weather_directory_raw,
# check = TRUE)

message("downloading nasa_weather_dataset")

nasa_weather_dataset <- tar_read(nasa_weather_dataset)

plan(multisession, workers = 19)
future_map(nasa_weather_dataset, function(x){
aws_s3_download(path = x,
bucket = aws_bucket ,
key = x,
check = TRUE)
})



# parallel?????
# aws.s3::s3sync(path = "data/nasa_weather_dataset",
# bucket = "open-rvfcast-data",
# prefix = "data/nasa_weather_dataset",
# direction = "download")

0 comments on commit a68163c

Please sign in to comment.