Skip to content

Commit

Permalink
continue with calculations for lagged means and anomalies
Browse files Browse the repository at this point in the history
  • Loading branch information
emmamendelsohn committed Sep 15, 2023
1 parent 15df181 commit a73e420
Show file tree
Hide file tree
Showing 5 changed files with 80 additions and 119 deletions.
114 changes: 33 additions & 81 deletions R/process_weather_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,94 +8,46 @@
#' @return
#' @author Emma Mendelsohn
#' @export
process_weather_data <- function(nasa_weather_directory_dataset, nasa_weather_dataset) {
process_weather_data <- function(nasa_weather_directory_dataset, nasa_weather_dataset, model_dates) {

library(dbplyr)
weather_dataset <- open_dataset(nasa_weather_directory_dataset) |> to_duckdb(table_name = "weather")

weather_dataset <- weather_dataset |>
mutate(across(c("month", "day", "day_of_year"), ~as.integer(.))) |>
mutate(year_doy = paste(year, day_of_year, sep = "_"))
mutate(across(c(year, month, day, day_of_year), as.integer)) |>
mutate(year_day_of_year = paste(year, day_of_year, sep = "_"))

# calculate monthly averages by pixel
weather_means <- weather_dataset |>
group_by(x, y, month) |>
summarize(mean_relative_humidity = mean(relative_humidity),
mean_temperature = mean(temperature),
mean_precipitation = mean(precipitation)) |>
ungroup()

# weather_means_check <- collect(weather_means)

# get 30 day lags for selected dates
for(i in 1:nrow(model_dates_random_select)){
date_select <- model_dates_random_select[i,]$date
previous_30_days <- seq(date_select - 30, date_select - 1, by = "day")
previous_30_days_tib <- tibble(year = year(previous_30_days), day_of_year = yday(previous_30_days))
# generate the weather dataset - get the lagged anomolies for selected dates
# TODO: do this for each lag internal
outt <- map(model_dates$date[model_dates$select_date], function(date_selected){

#TODO filter for year_doy for these 30 days
row_select <- which(model_dates$date == date_selected)
lag_dates <- model_dates |> slice((row_select - 30):(row_select - 1))

lag <- weather_dataset |>
inner_join(previous_30_days_tib)
# lag: calculate mean by pixel for the preceeding 30 days
lagged_means <- weather_dataset |>
filter(year_day_of_year %in% !!lag_dates$year_day_of_year) |>
group_by(x, y) |>
window_frame(-30, -1) |>
window_order(day_of_year) |>
mutate(anomaly_relative_humidity_roll30 = mean(anomaly_relative_humidity)) |>
ungroup()
}



# calculate anomalies between lags and weighted monthly averages







# # calculate monthly averages by pixel
# # calculate anomalies for each day relative to monthly average
# weather_anomalies <- weather_dataset |>
# group_by(x, y, month) |>
# mutate(mean_relative_humidity = mean(relative_humidity),
# mean_temperature = mean(temperature),
# mean_precipitation = mean(precipitation)) |>
# ungroup() |>
# mutate(anomaly_relative_humidity = relative_humidity - mean_relative_humidity,
# anomaly_temperature = temperature - mean_temperature,
# anomaly_precipitation = precipitation - mean_precipitation)

# get rolling avg for each x,y
# throws memory error - PRAGMA temp_directory='/path/to/tmp.tmp'
weather_lags <- weather_anomalies |>
group_by(x, y) |>
window_frame(-30, -1) |>
window_order(day_of_year) |>
mutate(anomaly_relative_humidity_roll30 = mean(anomaly_relative_humidity)) |>
ungroup()

# check this worked
test = weather_lags |> filter(x == min(x), y == max(y), day_of_year < 50) |> collect()

# ^ get rolling avg for 1 month, 2 month, 3 month ahead for each var

# filter out top 90 days of days (these are used to inform the lags only)

# filter for model_dates_random_select


### testing notes
# SQL rolling avg
# "AVG(anomaly_precipitation) OVER (
# ORDER BY day_of_year
# ROWS BETWEEN 30 PRECEDING AND 1 PRECEDING)"

# sanity checks
# mf <- memdb_frame(x = 1:100, y = 1:100)
# mf2 <- mf |>
# window_frame(-30, -1) |>
# window_order(y) |>
# mutate(x2 = mean(x))
summarize(lag_relative_humidity = mean(relative_humidity),
lag_temperature = mean(temperature),
lag_precipitation = mean(precipitation)) |>
ungroup() |>
mutate(year_day_of_year = date_selected)

# overall: calculate mean across the full dataset for the days of the year covered by the lag period
overall_means <- weather_dataset |>
filter(day_of_year %in% !!lag_dates$day_of_year ) |>
group_by(x, y) |>
summarize(overall_relative_humidity = mean(relative_humidity),
overall_temperature = mean(temperature),
overall_precipitation = mean(precipitation)) |>
ungroup() |>
mutate(year_day_of_year = date_selected)


#TODO
# join the lagged mean with the historical mean and calculate anomoly
# join with the data from the actual day, one row for each x, y, selected date

})

}
30 changes: 0 additions & 30 deletions R/random_select_model_dates.R

This file was deleted.

35 changes: 35 additions & 0 deletions R/set_model_dates.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#' .. content for \description{} (no empty lines) ..
#'
#' .. content for \details{} ..
#'
#' @title
#' @param start_year
#' @param end_year
#' @param n_per_month
#' @return
#' @author Emma Mendelsohn
#' @export
set_model_dates <- function(start_year, end_year, n_per_month, lag_intervals, seed = 1) {

set.seed(seed)

# Create a vector of dates from January 2005 to December 2022
dates <- seq(as.Date(paste0(start_year,"-01-01")), as.Date(paste0(end_year, "-12-31")), by = "day")

model_dates <-tibble(date = dates) |>
mutate(year = format(dates, "%Y"),
month = format(dates, "%m"),
day = format(dates, "%d"),
day_of_year = format(dates, "%j")) |>
mutate(across(c(year, month, day, day_of_year), as.integer)) |>
mutate(year_day_of_year = paste(year, day_of_year, sep = "_")) |>
group_by(month, year) |>
mutate(select_date = row_number() %in% sample(n(), n_per_month)) |>
ungroup() |>
arrange(year, month, day)

# make sure we always have enough days to calculate lags
model_dates$select_date[1:max(lag_intervals)] <- FALSE

return(model_dates)
}
5 changes: 3 additions & 2 deletions _targets.R
Original file line number Diff line number Diff line change
Expand Up @@ -286,12 +286,13 @@ dynamic_targets <- tar_plan(
# Data Processing -----------------------------------------------------------
data_targets <- tar_plan(

tar_target(model_dates_random_select, random_select_model_dates(start_year = 2006, end_year = 2022, n_per_month = 2, seed = 212)),
tar_target(lag_intervals, c(30, 60, 90)),
tar_target(model_dates, set_model_dates(start_year = 2005, end_year = 2022, n_per_month = 2, lag_intervals, seed = 212)),

# TODO take nasa_weather_directory_dataset and do full lag calcs in this function using duckdb, then collect into memory
tar_target(weather_data, process_weather_data(nasa_weather_directory_dataset,
nasa_weather_dataset, # enforce dependency
model_dates_random_select)),
model_dates)),
# tar_target(ndvi_data, process_ndvi_data(sentinel_ndvi_directory_dataset, sentinel_ndvi_dataset, model_dates_random_select))

)
Expand Down
15 changes: 9 additions & 6 deletions _targets/meta/meta
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name|type|data|command|depend|seed|path|time|size|bytes|format|repository|iteration|parent|children|seconds|warnings|error
.Random.seed|object|03fa9cad30a33312|||||||||||||||
.Random.seed|object|adfbe9c01236ca30|||||||||||||||
all_targets|function|2dda5afbd1f92385|||||||||||||||
aws_bucket|object|d9cf2c5ff7cc1be4|||||||||||||||
cache_aws_branched_target|function|6e2abfa4969de1bf|||||||||||||||
Expand All @@ -18,7 +18,7 @@ create_modis_ndvi_dataset|function|bb9bcd506ae906bd|||||||||||||||
create_nasa_weather_dataset|function|c12b134b7be25c25|||||||||||||||
create_raster_template_plot|function|db738156a3247831|||||||||||||||
create_sentinel_ndvi_dataset|function|201d4eaf8c87d0c3|||||||||||||||
data_targets|object|77e0b70e7dba2755|||||||||||||||
data_targets|object|20a3ace986c6d250|||||||||||||||
define_bounding_boxes|function|e614caacc0592e73|||||||||||||||
define_country_regions|function|54808365a1bb460e|||||||||||||||
deploy_targets|object|1eb1bc8d77111ded|||||||||||||||
Expand Down Expand Up @@ -179,8 +179,10 @@ get_sentinel_ndvi_api_parameters|function|ec6ebe60c7637311|||||||||||||||
get_wahis_rvf_outbreaks_raw|function|a8564ce9127c9c1d|||||||||||||||
get_weather_anomalies|function|1956aa290dc4fc5d|||||||||||||||
get_weather_data|function|1956aa290dc4fc5d|||||||||||||||
lag_intervals|stem|f4c9e8a4d588925c|6b4f81cd41a7b83e|a3dad144c40657ed|1055089432|bucket=open-rvfcast-data*region=NULL*key=_targets/lag_intervals*endpoint=TlVMTA*version=|t19615.7794897048s||55|qs|aws|vector|||0||
make_model_data|function|df0e5631ac6d7b53|||||||||||||||
model_dates_random_select|stem|42d120b8aa048c60|5719b67ece4afca5|4ea98346634ca2b4|-368262049|bucket=open-rvfcast-data*region=NULL*key=_targets/model_dates_random_select*endpoint=TlVMTA*version=|t19614.8223630127s||6256|qs|aws|vector|||0.037||
model_dates|stem|27f5b73d53fcb5e3|7646f39f9ba1810e|a8bb283b4980c84f|-204838086|bucket=open-rvfcast-data*region=NULL*key=_targets/model_dates*endpoint=TlVMTA*version=|t19615.7807563168s||80537|qs|aws|vector|||0.052||
model_dates_random_select|stem|ae6d12f9d280efdc|5719b67ece4afca5|41d39307354de6b6|-368262049|bucket=open-rvfcast-data*region=NULL*key=_targets/model_dates_random_select*endpoint=TlVMTA*version=|t19615.75211742s||76313|qs|aws|vector|||0.04||
model_targets|object|1eb1bc8d77111ded|||||||||||||||
modis_directory|stem|0404b408f5e5efef|c985137dd9b95cd4|ef46db3751d8e999|-671711443|bucket=project-dtra-ml-main*region=NULL*key=open-rvfcast/_targets/modis_directory*endpoint=TlVMTA*version=qdLdze87LwJZuMPusz2ovPXxe2rabGWb|t19493.6071375984s||55|qs|aws|vector|||0.001||
modis_ndvi_bundle|stem|error|a6c770fab6751fac|9d56e94e8363274f|-1456098296|bucket=open-rvfcast-data*region=NULL*key=_targets/modis_ndvi_bundle*endpoint=TlVMTA*version=|t19605.5645307177s||30|qs|aws|vector|||0.33||object task_id not found
Expand Down Expand Up @@ -3940,8 +3942,8 @@ preprocess_ecmwf_forecasts|function|033bd8a3c45b4d46|||||||||||||||
preprocess_nasa_weather|function|f5c92fafb420500d|||||||||||||||
preprocess_wahis_rvf_outbreaks|function|1739270cf02b72d6|||||||||||||||
process_ndvi_data|function|8a56ce9bd504bbec|||||||||||||||
process_weather_data|function|cfab097511ff525b|||||||||||||||
random_select_model_dates|function|9ff4c8b11c4c6a0f|||||||||||||||
process_weather_data|function|6dbc7677081a102e|||||||||||||||
random_select_model_dates|function|75d79de28b5c2e87|||||||||||||||
read_transform_raster|function|f7518264efa394ed|||||||||||||||
report_targets|object|1eb1bc8d77111ded|||||||||||||||
rolling_box|function|1195d491d98c697b|||||||||||||||
Expand Down Expand Up @@ -4964,6 +4966,7 @@ sentinel_ndvi_transformed_rasters_fefd5c9f|branch|891b7ee44128fe79|7490e5717bc38
sentinel_ndvi_transformed_upload_aws_s3|stem|138c6bf81e1f5512|14599b5def8fdb12|a880a8df380b19ae|1391835690|bucket=open-rvfcast-data*region=NULL*key=_targets/sentinel_ndvi_transformed_upload_aws_s3*endpoint=TlVMTA*version=|t19520.9852950252s||19579|qs|aws|vector|||159.826||
sentinel_ndvi_upload_aws_s3|stem|1a358ba66cfb3375|b9c6a884566d5476|826f421e76bc51b5|-1245746504|bucket=open-rvfcast-data*region=NULL*key=_targets/sentinel_ndvi_upload_aws_s3*endpoint=TlVMTA*version=|t19517.6998651133s||20237|qs|aws|vector|||397.502||
set_ecmwf_api_parameter|function|e3e4962883690ed5|||||||||||||||
set_model_dates|function|b0923ca118fec651|||||||||||||||
set_nasa_api_parameter|function|ac1cc420c9c9242c|||||||||||||||
static_targets|object|cbd54d1aeedd375e|||||||||||||||
submit_modis_ndvi_bundle_request|function|70d5dcdcf3510fa0|||||||||||||||
Expand All @@ -4974,7 +4977,7 @@ test_targets|object|1eb1bc8d77111ded|||||||||||||||
transform_nasa_weather|function|e80c244fb32ef2bd|||||||||||||||
transform_raster|function|47f20ba2b9ef9722|||||||||||||||
transform_sentinel_ndvi|function|92a19330c7f2bff2|||||||||||||||
user_rprof|object|a66bd50c4c539996|||||||||||||||
user_rprof|object|2a027f145a5a1891|||||||||||||||
wahis_rvf_outbreaks_preprocessed|stem|30ccd988b415d773|3ea98184b5887c93|275a59d310ff2a63|2127878318|bucket=open-rvfcast-data*region=NULL*key=_targets/wahis_rvf_outbreaks_preprocessed*endpoint=TlVMTA*version=|t19517.6952212142s||172965|qs|aws|vector|||0.043||
wahis_rvf_outbreaks_raw|stem|6fc7e6c7238977b3|b988ec4215d4213c|5ed4661ae3efb1aa|1933416983|bucket=open-rvfcast-data*region=NULL*key=_targets/wahis_rvf_outbreaks_raw*endpoint=TlVMTA*version=|t19517.6952047733s||173410|qs|aws|vector|||29.629||
wahis_rvf_query|function|9836433f6f1061fb|||||||||||||||
Expand Down

0 comments on commit a73e420

Please sign in to comment.