Skip to content

Commit

Permalink
Continue working on storing and retrieving data on AWS. Finally got f…
Browse files Browse the repository at this point in the history
…rustrated enough to make my own S3 and IAM
  • Loading branch information
n8layman committed Oct 1, 2024
1 parent 8abc396 commit 813a319
Show file tree
Hide file tree
Showing 16 changed files with 1,525 additions and 1,017 deletions.
Binary file modified .env
Binary file not shown.
9 changes: 5 additions & 4 deletions R/AWS_get_folder.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#' @author Nathan Layman
#'
#' @param local_folder Character. The path to the local folder where files should be downloaded and the AWS prefix
#' @param ...
#'
#' @return A list of files downloaded from AWS
#'
Expand Down Expand Up @@ -162,7 +163,7 @@ AWS_put_files <- function(transformed_file_list,
}

# Get files in local folder
local_folder_files <- list.files(path = local_folder, recursive = TRUE, full.names = TRUE)
local_folder_files <- list.files(path = local_folder, recursive = TRUE)

# Collect outcomes
outcome <- c()
Expand All @@ -183,10 +184,9 @@ AWS_put_files <- function(transformed_file_list,
outcome <- c(outcome, glue::glue("Uploading {file} to AWS"))

# Put the file on S3
s3_download <- s3$put_object(
s3_upload <- s3$put_object(
Bucket = Sys.getenv("AWS_BUCKET_ID"),
Key = file)

Key = file.path(local_folder, file))
}
} else {

Expand All @@ -200,6 +200,7 @@ AWS_put_files <- function(transformed_file_list,
# Remove the file from AWS
s3_download <- s3$delete_object(
Bucket = Sys.getenv("AWS_BUCKET_ID"),
Prefix = local_folder,
Key = file)
}
}
Expand Down
23 changes: 13 additions & 10 deletions R/calculate_forecasts_anomalies.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,33 +3,36 @@
#' .. content for \details{} ..
#'
#' @title
#' @param ecmwf_forecasts_transformed
#'
#' @param ecmwf_forecasts_transformed_directory
#' @param weather_historical_means
#' @param forecast_anomalies_directory
#' @param model_dates
#' @param model_dates_selected
#' @param forecasts_anomalies_directory
#' @param lead_intervals
#' @param ...
#' @param overwrite
#'
#' @return
#' @author Emma Mendelsohn
#' @export
calculate_forecasts_anomalies <- function(ecmwf_forecasts_transformed,
ecmwf_forecasts_transformed_directory,
calculate_forecasts_anomalies <- function(ecmwf_forecasts_transformed_directory,
weather_historical_means,
forecasts_anomalies_directory,
model_dates_selected,
lead_intervals,
overwrite = FALSE) {
overwrite = FALSE,
...) {

# Set filename
date_selected <- model_dates_selected
save_filename <- glue::glue("forecast_anomaly_{date_selected}.gz.parquet")
message(paste0("Calculating forecast anomalies for ", date_selected))

# Check if file already exists
existing_files <- list.files(forecasts_anomalies_directory)
if(save_filename %in% existing_files & !overwrite) {
message("file already exists, skipping download")
# Check if file already exists and can be read
error_safe_read_parquet <- possibly(arrow::read_parquet, NULL)

if(!is.null(error_safe_read_parquet(file.path(forecasts_validate_directory, save_filename))) & !overwrite) {
message("file already exists and can be loaded, skipping download")
return(file.path(forecasts_anomalies_directory, save_filename))
}

Expand Down
16 changes: 11 additions & 5 deletions R/calculate_weather_anomalies.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,25 @@
#'
#' .. content for \details{} ..
#'
#' @param nasa_weather_transformed_directory
#' @param weather_historical_means
#' @param weather_anomalies_directory
#' @param model_dates_selected
#' @param lag_intervals
#' @param overwrite
#' @param ...
#'
#' @title
#' @param sentinel_ndvi_transformed
#' @param nasa_weather_transformed
#' @return
#' @author Emma Mendelsohn
#' @export
calculate_weather_anomalies <- function(nasa_weather_transformed,
nasa_weather_transformed_directory,
calculate_weather_anomalies <- function(nasa_weather_transformed_directory,
weather_historical_means,
weather_anomalies_directory,
model_dates_selected,
lag_intervals,
overwrite = FALSE) {
overwrite = FALSE,
...) {

# Set filename
date_selected <- model_dates_selected
Expand Down
10 changes: 8 additions & 2 deletions R/calculate_weather_historical_means.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,15 @@
#' .. content for \details{} ..
#'
#' @title
#' @param nasa_weather_transformed
#'
#' @param nasa_weather_transformed_directory
#' @param days_of_year
#' @param lag_intervals
#' @param lead_intervals
#' @param overwrite
#' @param ...
#' @param weather_historical_means_directory
#'
#' @return
#' @author Emma Mendelsohn
#' @export
Expand All @@ -15,7 +21,7 @@ calculate_weather_historical_means <- function(nasa_weather_transformed_director
lag_intervals,
lead_intervals,
overwrite = FALSE,
nasa_weather_transformed) {
...) {

# Check that we're only working with one interval length.
interval_length <- unique(c(diff(lag_intervals), diff(lead_intervals)))
Expand Down
2 changes: 1 addition & 1 deletion R/create_africa_polygon.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#' @export
create_africa_polygon <- function() {

ne_countries(continent = "Africa", returnclass = "sf") |>
rnaturalearth::ne_countries(continent = "Africa", returnclass = "sf") |>
select(featurecla, country = name, country_iso3c = sov_a3)

}
120 changes: 63 additions & 57 deletions R/preprocess_soil.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,36 +10,50 @@
library(DBI)
library(RSQLite)
preprocess_soil <- function(soil_directory_dataset,
soil_directory_raw,
continent_raster_template,
soil_downloaded) {
overwrite = FALSE,
...) {

#read in the raster file

#crop the raster to the continent
#hwsd_bounded <- terra::crop(unzipped_soil_raster, terra::unwrap(continent_raster_template))

#reproject the raster
#print(paste("UTM zone:", utm.zone <-
# floor(((sf::st_bbox(hwsd_bounded)$xmin +
# sf::st_bbox(hwsd_bounded)$xmax)/2 + 180)/6)
# + 1))

#(epsg <- 32600 + utm.zone)
# Unwrap continent_raster template
continent_raster_template <- terra::unwrap(continent_raster_template)

# Set up safe way to read parquet files
error_safe_read_parquet <- possibly(arrow::read_parquet, NULL)

# Parquet filenames
soil_texture_file <- file.path(soil_directory_dataset, "soil_texture.parquet")
soil_drainage_file <- file.path(soil_directory_dataset, "soil_drainage.parquet")

# Check if sile files exist and can be read and that we don't want to overwrite them.
if(!is.null(error_safe_read_parquet(soil_texture_file)) &
!is.null(error_safe_read_parquet(soil_drainage_file)) &
!overwrite) {
message("preprocessed soil files already exist and can be loaded, skipping download and processing")
return(c(basename(soil_texture_file),
basename(soil_drainage_file)))
}

#hwsd_bounded.utm <- project(hwsd_bounded, paste0("EPSG:", epsg), method = "near")
# Download soil texture data and unzip
soil_texture_raw_file <- file.path(soil_directory_dataset, "soil_raster.zip")
download.file(url="https://s3.eu-west-1.amazonaws.com/data.gaezdev.aws.fao.org/HWSD/HWSD2_RASTER.zip",
destfile = soil_texture_raw_file)
unzip(soil_texture_raw_file, exdir = soil_directory_dataset)

#terra::resample(hwsd_bounded.utm, method = "near")
# Download soil drainage data
soil_drainage_raw_file <- file.path(soil_directory_dataset, "soil_drainage.sqlite")
download.file(url="https://www.isric.org/sites/default/files/HWSD2.sqlite",
destfile = soil_drainage_raw_file)

transformed_raster <- transform_raster(raw_raster = rast(paste0(soil_downloaded, "/HWSD2.bil")),
###### SOIL TEXTURE ######
transformed_raster <- transform_raster(raw_raster = rast(file.path(soil_directory_dataset, "/HWSD2.bil")),
template = rast(continent_raster_template))

#connect to database and extract values
# connect to database and extract values
m <- dbDriver("SQLite")
con <- dbConnect(m, dbname="data/soil/soil_database.sqlite")
dbListTables(con)

####extract map unit codes in bounded area (WINDOW_ZHNJ) to join with SQL databases###
#### extract map unit codes in bounded area (WINDOW_ZHNJ) to join with SQL databases###
dbWriteTable(con, name="WINDOW_ZHNJ",
value=data.frame(hwsd2_smu = sort(unique(values(transformed_raster)))),
overwrite=TRUE)
Expand All @@ -58,27 +72,11 @@ preprocess_soil <- function(soil_directory_dataset,
#creates a dataframe "records" in R from SQL temp table created above
records <- dbGetQuery(con, "select * from ZHNJ_SMU")

#create sand and clay tables in R
#sand.d1 <- dbGetQuery(con,
# "select U.HWSD2_SMU_ID, U.SAND from ZHNJ_SMU as T
# join HWSD2_LAYERS as U on T.HWSD2_SMU_ID=U.HWSD2_SMU_ID
# where U.LAYER='D1'
# order by U.HWSD2_SMU_ID")
#
#clay.d1 <- dbGetQuery(con,
# "select U.HWSD2_SMU_ID, U.CLAY from ZHNJ_SMU as T
# join HWSD2_LAYERS as U on T.HWSD2_SMU_ID=U.HWSD2_SMU_ID
# where U.LAYER='D1'
# order by U.HWSD2_SMU_ID")

#remove the temp tables and database connection
dbRemoveTable(con, "WINDOW_ZHNJ")
dbRemoveTable(con, "ZHNJ_SMU")
dbDisconnect(con)

#join sand and clay data frames in r to create a ratio variable
#full_join (sand.d1, clay.d1)

#changes from character to factor for the raster
for (i in names(records)[c(2:5,7:13,16:17,19:23)]) {
eval(parse(text=paste0("records$",i," <- as.factor(records$",i,")")))
Expand All @@ -95,7 +93,7 @@ preprocess_soil <- function(soil_directory_dataset,
levels(hwsd.zhnj.texture) <- levels(records$TEXTURE_USDA)

# Convert to dataframe
dat_out <- as.data.frame(hwsd.zhnj.texture, xy = TRUE) |>
soil_texture <- as.data.frame(hwsd.zhnj.texture, xy = TRUE) |>
as_tibble()

# At this point:
Expand All @@ -115,15 +113,17 @@ preprocess_soil <- function(soil_directory_dataset,

# Re-code factor levels to collapse simplex.
# Figure out where key is for the units are in HWSD2
dat_out$HWSD2 <- if_else(dat_out$HWSD2=="5", "1", # clay (heavy) + clay loam
if_else(dat_out$HWSD2=="7", "2", # silty clay + silty loam aka
if_else(dat_out$HWSD2=="8", "3", # clay + sandy clay
if_else(dat_out$HWSD2=="9", "4", # silty clay loam
if_else(dat_out$HWSD2=="10", "5", # clay loam + sandy clay loam BUT SEE RULE 1!!!
if_else(dat_out$HWSD2=="11", "6", # silt sandy + loam
if_else(dat_out$HWSD2=="12", "7","0"))))))) # loamy sand + silt loam
soil_texture$HWSD2 <- if_else(soil_texture$HWSD2=="5", "1", # clay (heavy) + clay loam
if_else(soil_texture$HWSD2=="7", "2", # silty clay + silty loam aka
if_else(soil_texture$HWSD2=="8", "3", # clay + sandy clay
if_else(soil_texture$HWSD2=="9", "4", # silty clay loam
if_else(soil_texture$HWSD2=="10", "5", # clay loam + sandy clay loam BUT SEE RULE 1!!!
if_else(soil_texture$HWSD2=="11", "6", # silt sandy + loam
if_else(soil_texture$HWSD2=="12", "7", "0"))))))) # loamy sand + silt loam


###### SOIL DRAINAGE ######

#create matrix of map unit ids and the variable of interest - DRAINAGE
rcl.matrix.drainage <- cbind(id = as.numeric(as.character(records$HWSD2_SMU_ID)),
drainage = as.numeric(records$DRAINAGE))
Expand All @@ -134,25 +134,31 @@ preprocess_soil <- function(soil_directory_dataset,
levels(hwsd.zhnj.drainage) <- levels(records$DRAINAGE)

# Convert to dataframe
dat_out2 <- as.data.frame(hwsd.zhnj.drainage, xy = TRUE) |>
soil_drainage <- as.data.frame(hwsd.zhnj.drainage, xy = TRUE) |>
as_tibble()

dat_out2$HWSD2 <- if_else(dat_out2$HWSD2=="MW", "4",
if_else(dat_out2$HWSD2=="P", "6",
if_else(dat_out2$HWSD2=="SE", "2",
if_else(dat_out2$HWSD2=="VP", "7","0"))))
soil_drainage$HWSD2 <- if_else(soil_drainage$HWSD2=="MW", "4",
if_else(soil_drainage$HWSD2=="P", "6",
if_else(soil_drainage$HWSD2=="SE", "2",
if_else(soil_drainage$HWSD2=="VP", "7","0"))))

dat_out2$HWSD2 <- as.numeric(as.character(dat_out2$HWSD2))
soil_drainage$HWSD2 <- as.numeric(as.character(soil_drainage$HWSD2))

# Save as parquet
write_parquet(dat_out, "data/soil_dataset/soil_texture", compression = "gzip", compression_level = 5)
write_parquet(dat_out2, "data/soil_dataset/soil_drainage", compression = "gzip", compression_level = 5)

#writeRaster(hwsd.zhnj.drainage, "data/soil/drainage_raster.tif", overwrite=TRUE)
#writeRaster(hwsd.zhnj.texture, "data/soil/texture_class_raster.tif", overwrite=TRUE)
#writeRaster(x, sand_clay_raster, overwrite=TRUE)
# Save soil data as parquet files
arrow::write_parquet(soil_texture, soil_texture_file, compression = "gzip", compression_level = 5)
arrow::write_parquet(soil_drainage, soil_drainage_file, compression = "gzip", compression_level = 5)

# Test if soil parquet files can be loaded. If not clean up directory and return NULL
if(is.null(error_safe_read_parquet(soil_texture_file)) ||
is.null(error_safe_read_parquet(soil_drainage_file))) {
message("Preprocessed soil parquet files couldn't be read after processing. Cleaning up")
file.remove(list.files(soil_directory_dataset, full.names = TRUE))
return(NULL)
}

return(soil_directory_dataset)
# Clean up all non-parquet files
file.remove(grep("\\.parquet$", list.files(soil_directory_dataset, full.names = TRUE), value = TRUE, invert = TRUE))

return(c(basename(soil_texture_file),
basename(soil_drainage_file)))
}
14 changes: 6 additions & 8 deletions R/soil_download.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,31 +7,29 @@
#' @return
#' @author Whitney Bagge
#' @export
soil_download <- function(soil_directory_raw) {
soil_download <- function(soil_directory_dataset) {

options(timeout=200)

location <- c("soil_database", "soil_raster")

for(loc in location){

url_out<- switch(loc, "soil_raster" = "https://s3.eu-west-1.amazonaws.com/data.gaezdev.aws.fao.org/HWSD/HWSD2_RASTER.zip",
url_out <- switch(loc, "soil_raster" = "https://s3.eu-west-1.amazonaws.com/data.gaezdev.aws.fao.org/HWSD/HWSD2_RASTER.zip",
"soil_database" = "https://www.isric.org/sites/default/files/HWSD2.sqlite")

file_ext<- switch(loc,"soil_raster" = ".zip", "soil_database" = ".sqlite")
file_ext <- switch(loc, "soil_raster" = ".zip", "soil_database" = ".sqlite")

filename <- paste("data/soil/", loc, file_ext, sep="")
filename <- file.path(soil_directory_dataset, paste0(loc, file_ext))

download.file(url=url_out, destfile = filename)

if (loc == "soil_raster" ){
unzip(filename, exdir = "data/soil/")
unzip(filename, exdir = soil_directory_dataset)
}

}

return(soil_directory_raw)


return(soil_directory_dataset)
}

3 changes: 2 additions & 1 deletion R/transform_ecmwf_forecasts.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@
#' @export
transform_ecmwf_forecasts <- function(ecmwf_forecasts_api_parameters,
local_folder = ecmwf_forecasts_transformed_directory,
continent_raster_template) {
continent_raster_template,
...) {

# Check that ecmwf_forecasts_api_parameters is only one row
stopifnot(nrow(ecmwf_forecasts_api_parameters) == 1)
Expand Down
4 changes: 3 additions & 1 deletion R/transform_modis_ndvi.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,16 @@
#' @param modis_ndvi_bundle_request List. Contains the `file_name`, `task_id`, and `file_id` from the AppEEARS bundle request for MODIS NDVI data.
#' @param continent_raster_template Character. The file path to the template raster used for resampling the MODIS NDVI data.
#' @param local_folder Character. The path to the local directory where both raw and transformed files are saved.
#' @param ...
#'
#' @return A list of successfully transformed files
#'
#' @export
transform_modis_ndvi <- function(modis_ndvi_token,
modis_ndvi_bundle_request,
continent_raster_template,
local_folder) {
local_folder,
...) {

# Figure out raw file name and path
raw_file <- file.path(local_folder, basename(modis_ndvi_bundle_request$file_name))
Expand Down
Loading

0 comments on commit 813a319

Please sign in to comment.