Skip to content

Commit

Permalink
Merge pull request #59 from ecohealthalliance/modis
Browse files Browse the repository at this point in the history
Modis
  • Loading branch information
emmamendelsohn authored Sep 14, 2023
2 parents c381161 + f1ce840 commit eff4ca4
Show file tree
Hide file tree
Showing 14 changed files with 3,279 additions and 314 deletions.
Binary file modified .env
Binary file not shown.
50 changes: 50 additions & 0 deletions R/create_modis_ndvi_dataset.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#' .. content for \description{} (no empty lines) ..
#'
#' .. content for \details{} ..
#'
#' @title
#' @param modis_ndvi_downloaded
#' @param continent_raster_template
#' @param modis_ndvi_directory_dataset
#' @param overwrite
#' @return
#' @author Emma Mendelsohn
#' @export
create_modis_ndvi_dataset <- function(modis_ndvi_downloaded,
continent_raster_template,
modis_ndvi_directory_dataset, overwrite =
FALSE) {

filename <- basename(modis_ndvi_downloaded)

year_doy <- sub(".*doy(\\d+).*", "\\1", filename)
start_date <- as.Date(year_doy, format = "%Y%j") # confirmed this is start date through manual download tests
end_date <- start_date + 16
save_filename <- glue::glue("transformed_modis_NDVI_{start_date}_to_{end_date}.gz.parquet")

existing_files <- list.files(modis_ndvi_directory_dataset)

message(paste0("Transforming ", save_filename))

if(save_filename %in% existing_files & !overwrite){
message("file already exists, skipping transform")
return(file.path(modis_ndvi_directory_dataset, save_filename))
}

transformed_raster <- transform_raster(raw_raster = rast(modis_ndvi_downloaded),
template = rast(continent_raster_template))

# Convert to dataframe
dat_out <- as.data.frame(transformed_raster, xy = TRUE) |>
as_tibble() |>
rename(ndvi = 3) |>
mutate(start_date = start_date,
end_date = end_date)

# Save as parquet
write_parquet(dat_out, here::here(modis_ndvi_directory_dataset, save_filename), compression = "gzip", compression_level = 5)

return(file.path(modis_ndvi_directory_dataset, save_filename))


}
7 changes: 4 additions & 3 deletions R/create_sentinel_ndvi_dataset.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,10 @@ create_sentinel_ndvi_dataset <- function(sentinel_ndvi_downloaded,
overwrite = FALSE) {

filename <- basename(sentinel_ndvi_downloaded)
start_date <- as.Date(str_extract(filename, "(\\d{8}T\\d{6})"), format = "%Y%m%dT%H%M%S")
end_date <- as.Date(str_extract(filename, "(?<=_)(\\d{8}T\\d{6})(?=_\\w{6}_)"), format = "%Y%m%dT%H%M%S")
save_filename <- glue::glue("transformed_NDVI_{start_date}_to_{end_date}.gz.parquet")
assertthat::are_equal(nchar(filename), 97)
start_date <- as.Date(str_sub(filename, 17, 24), format = "%Y%m%d")
end_date <- as.Date(str_sub(filename, 33, 40), format = "%Y%m%d")
save_filename <- glue::glue("transformed_sentinel_NDVI_{start_date}_to_{end_date}.gz.parquet")

existing_files <- list.files(sentinel_ndvi_directory_dataset)

Expand Down
130 changes: 25 additions & 105 deletions R/download_modis_ndvi.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,116 +3,36 @@
#' .. content for \details{} ..
#'
#' @title
#' @param modis_ndvi_api_parameters
#' @param modis_ndvi_bundle_request
#' @param download_directory
#' @param overwrite
#' @return
#' @author Emma Mendelsohn
#' @export
download_modis_ndvi <- function(continent_bounding_box,
modis_ndvi_years,
download_directory = "data/modis_ndvi_rasters") {
download_modis_ndvi <- function(modis_ndvi_token,
modis_ndvi_bundle_request,
download_directory,
overwrite = FALSE) {

suppressWarnings(dir.create(download_directory, recursive = TRUE))

existing_files <- list.files(download_directory)

bbox_coords <- continent_bounding_box
year <- modis_ndvi_years

# Connect to API
planetary_query <- stac("https://planetarycomputer.microsoft.com/api/stac/v1/")

# This is the NDVI collection
collection <- "modis-13A1-061" # this is 500m (modis-13Q1-061 is 250m)

# Run query for year/country
date_ranges <- c(paste0(year, "-01-01T00:00:00Z/", year, "-03-31T23:59:59Z"),
paste0(year, "-04-01T00:00:00Z/", year, "-06-30T23:59:59Z"),
paste0(year, "-07-01T00:00:00Z/", year, "-09-30T23:59:59Z"),
paste0(year, "-10-01T00:00:00Z/", year, "-12-31T23:59:59Z"))

ndvi_query <- map(date_ranges, function(date_range){
q <- planetary_query |>
stac_search(collections = collection,
limit = 1000,
datetime = date_range,
bbox = bbox_coords) |>
get_request() |>
items_sign(sign_fn = sign_planetary_computer())
assertthat::assert_that(length(q$features) < 1000)
return(q$features)
}) |> reduce(c)

ndvi_parameters <- tibble(
url = map_vec(ndvi_query, ~.$assets$`500m_16_days_NDVI`$href),
id = map_vec(ndvi_query, ~.$id),
start_date = map_vec(ndvi_query, ~.$properties$start_datetime),
end_date = map_vec(ndvi_query, ~.$properties$end_datetime),
platform = map_vec(ndvi_query, ~.$properties$platform),
bbox = list(map(ndvi_query, ~.$bbox))
) |>
filter(platform == "terra")

# Plan to download by start date - each file will be a mosaic of the tiles for each NDVI day
ndvi_params_split <- ndvi_parameters |>
mutate(id_lab = str_extract(id, "[^\\.]*\\.[^\\.]*")) |>
mutate(start_lab = str_remove_all(start_date, "\\:|\\-")) |>
mutate(end_lab = str_remove_all(end_date, "\\:|\\-")) |>
mutate(filename = paste0(paste(id_lab, "africa", start_lab, end_lab, sep = "_"), ".tif")) |>
group_split(start_date)

assertthat::assert_that(unique(map_vec(ndvi_params_split, ~n_distinct(.$filename)))==1)

# Debugging check
### Some of these parameter tibble are double the number of rows
### seems to be the same three dates each year (manually checked 2005, 2006, 2018)
# nrows <- map_vec(ndvi_params_split, nrow)
# assertthat::are_equal(n_distinct(nrows), 2)
# which_dupes <- which(nrows > median(nrows))
# ndvi_params_split_with_dupes <- ndvi_params_split[which_dupes]
### They have dupe IDs
# map_vec(ndvi_params_split_with_dupes, ~n_distinct(.$id))
### But they do have different URL endpoints
# map_vec(ndvi_params_split_with_dupes, ~n_distinct(.$url))
### Other than URLs, everything we've extracted is the same
# map_vec(ndvi_params_split_with_dupes, ~nrow(distinct(select(., -url))))
### Let's look at these dupes
# message("spot checking that dupes are identical")
# dupe_tiles_to_test <- map(ndvi_params_split_with_dupes, function(x){
# x |>
# mutate(url = paste0("/vsicurl/", url)) |>
# group_by(id) |>
# group_split() |>
# sample(size = 3)
# }) |> reduce(c)
# assertthat::assert_that(all(imap_lgl(dupe_tiles_to_test, function(dup, i){
# print(i)
# assertthat::are_equal(nrow(dup), 2)
# tile1 <- as.data.frame(rast(dup$url[1]))
# tile2 <- as.data.frame(rast(dup$url[2]))
# identical(tile1, tile2)
# })))
# ### Since they are identical, let's just select the first
# ndvi_params_split[which_dupes] <- map(ndvi_params_split[which_dupes], function(x){
# x |> group_by(id) |> slice(1) |> ungroup()
# })
# nrows_new <- map_vec(ndvi_params_split, nrow)
# assertthat::are_equal(n_distinct(nrows_new), 1)

# read in and mosaic the tiles
filenames <- map_vec(ndvi_params_split, function(params){
filename <- unique(params$filename)
message(paste("downloading", filename))
if(filename %in% existing_files){
message("file already exists, skipping download")
return(filename)
}
urls <- paste0("/vsicurl/", params$url)
tiles <- map(urls, rast)
rast_downloaded <- do.call(terra::merge, tiles)
terra::writeRaster(rast_downloaded, here::here(download_directory, unique(params$filename)), overwrite = T)
return(filename)
})

return(file.path(download_directory, filenames))

task_id <- unique(modis_ndvi_bundle_request$task_id)
file_id <- modis_ndvi_bundle_request$file_id
filename <- basename(modis_ndvi_bundle_request$file_name)

message(paste0("Downloading ", filename))

if(filename %in% existing_files & !overwrite) {
message("file already exists, skipping download")
return(file.path(download_directory, filename))
}

# Write the file to disk
response <- GET(paste("https://appeears.earthdatacloud.nasa.gov/api/bundle/", task_id, '/', file_id, sep = ""),
write_disk(file.path(download_directory, filename), overwrite = TRUE), progress(), add_headers(Authorization = modis_ndvi_token))


return(file.path(download_directory, filename))

}
19 changes: 19 additions & 0 deletions R/download_modis_ndvi_delete.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#' .. content for \description{} (no empty lines) ..
#'
#' .. content for \details{} ..
#'
#' @title
#' @param modis_ndvi_token
#' @param modis_ndvi_bundle_request
#' @param download_directory
#' @param overwrite
#' @return
#' @author Emma Mendelsohn
#' @export
download_modis_ndvi_delete <- function(modis_ndvi_token, modis_ndvi_bundle_request,
download_directory = modis_ndvi_directory_raw,
overwrite = FALSE) {

return(modis_ndvi_token)

}
22 changes: 22 additions & 0 deletions R/get_modis_ndvi_token.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#' .. content for \description{} (no empty lines) ..
#'
#' .. content for \details{} ..
#'
#' @title

#' @return
#' @author Emma Mendelsohn
#' @export
get_modis_ndvi_token <- function() {

secret <- base64_enc(paste(Sys.getenv("APPEEARS_USERNAME"), Sys.getenv("APPEEARS_PASSWORD"), sep = ":")) #TODO make project auth
token_response <- POST("https://appeears.earthdatacloud.nasa.gov/api/login",
add_headers("Authorization" = paste("Basic", gsub("\n", "", secret)),
"Content-Type" = "application/x-www-form-urlencoded;charset=UTF-8"),
body = "grant_type=client_credentials")
token_response <- prettify(toJSON(content(token_response), auto_unbox = TRUE))
token <- paste("Bearer", fromJSON(token_response)$token)

return(token)

}
60 changes: 60 additions & 0 deletions R/submit_modis_ndvi_bundle_request.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#' .. content for \description{} (no empty lines) ..
#'
#' .. content for \details{} ..
#'
#' @title
#' @param modis_ndvi_token
#' @param modis_ndvi_task_id_list
#' @return
#' @author Emma Mendelsohn
#' @export
submit_modis_ndvi_bundle_request <- function(modis_ndvi_token, modis_ndvi_task_id_continent, timeout = 1000) {

task_id <- modis_ndvi_task_id_continent$task_id

# Get sys time for the loop timeout
sys_start_time <- Sys.time()

# Function to check task status
check_task_status <- function() {
task_response <- GET("https://appeears.earthdatacloud.nasa.gov/api/task", add_headers(Authorization = modis_ndvi_token))
task_response <- fromJSON(toJSON(content(task_response))) |> filter(task_id == !!task_id)
task_status <- task_response |> pull(status) |> unlist()
assertthat::assert_that(task_status %in% c("queued", "pending", "processing", "done"))
return(task_status)
}

# Check task status in a loop
task_status <- ""
while (task_status != "done") {
task_status <- check_task_status()

# Print current task status
message(paste("task status:", task_status))

# Check timeout
elapsed_time <- difftime(Sys.time(), sys_start_time, units = "secs")
if (task_status != "done" & elapsed_time >= timeout) {
message("timeout reached")
break
}

# Sleep for a few seconds before checking again
if(task_status != "done"){
message("pausing 60 seconds before checking again")
Sys.sleep(60)
}
}

bundle_response <- GET(paste("https://appeears.earthdatacloud.nasa.gov/api/bundle/", task_id, sep = ""), add_headers(Authorization = modis_ndvi_token))
bundle_response <- fromJSON(toJSON(content(bundle_response)))

bundle_response_files <- bundle_response$files |>
mutate(file_type = unlist(file_type)) |>
mutate(file_name = unlist(file_name)) |>
mutate(file_id = unlist(file_id)) |>
filter(file_type == "tif") |>
mutate(task_id = task_id)

return(bundle_response_files)
}
35 changes: 35 additions & 0 deletions R/submit_modis_ndvi_task_request.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#' .. content for \description{} (no empty lines) ..
#'
#' .. content for \details{} ..
#'
#' @title

#' @return
#' @author Emma Mendelsohn
#' @export
submit_modis_ndvi_task_request <- function(modis_ndvi_start_year,
modis_ndvi_end_year,
modis_ndvi_token,
country_bounding_boxes) {

task_name <- country_bounding_boxes$country_iso3c
bbox_coords <- unlist(country_bounding_boxes$bounding_box)

# create the task list
task <- list(task_type = "area",
task_name = task_name,
startDate = paste0("01-01-", modis_ndvi_start_year),
endDate = paste0("12-31-", modis_ndvi_end_year),
layer = "MOD13A2.061,_1_km_16_days_NDVI",
file_type = "geotiff",
projection_name = "native",
bbox = paste(bbox_coords, collapse = ","))

# post the task request
task_response <- POST("https://appeears.earthdatacloud.nasa.gov/api/task", query = task, add_headers(Authorization = modis_ndvi_token))
task_response <- content(task_response)
task_response$country_iso3c <- task_name

return(list(task_response))

}
38 changes: 38 additions & 0 deletions R/submit_modis_ndvi_task_request_continent.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#' .. content for \description{} (no empty lines) ..
#'
#' .. content for \details{} ..
#'
#' @title
#' @param modis_ndvi_start_year
#' @param modis_ndvi_end_year
#' @param modis_ndvi_token
#' @param bbox_coords
#' @return
#' @author Emma Mendelsohn
#' @export
submit_modis_ndvi_task_request_continent <- function(modis_ndvi_start_year,
modis_ndvi_end_year,
modis_ndvi_token,
bbox_coords) {

task_name <- "africa"

# create the task list
task <- list(task_type = "area",
task_name = task_name,
startDate = paste0("01-01-", modis_ndvi_start_year),
endDate = paste0("12-31-", modis_ndvi_end_year),
layer = "MOD13A2.061,_1_km_16_days_NDVI",
file_type = "geotiff",
projection_name = "native",
bbox = paste(bbox_coords, collapse = ","))

# post the task request
task_response <- POST("https://appeears.earthdatacloud.nasa.gov/api/task", query = task, add_headers(Authorization = modis_ndvi_token))
task_response <- content(task_response)
task_response$country_iso3c <- task_name

return(task_response)


}
Loading

0 comments on commit eff4ca4

Please sign in to comment.