Skip to content

Commit

Permalink
additional NA handling to get NASA transform fully working
Browse files Browse the repository at this point in the history
  • Loading branch information
emmamendelsohn committed Oct 12, 2023
1 parent 6565aa1 commit dd33714
Show file tree
Hide file tree
Showing 10 changed files with 456 additions and 550 deletions.
12 changes: 6 additions & 6 deletions R/calculate_weather_anomalies.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,12 @@ calculate_weather_anomalies <- function(nasa_weather_dataset, # enforce dependen

weather_dataset <- open_dataset(nasa_weather_directory_dataset) #|> to_duckdb(table_name = "weather")

# TODO this could go into create_nasa_weather_dataset() to avoid repeating it on each branch
weather_dataset <- weather_dataset |>
mutate(across(c(year, month, day, day_of_year), as.integer)) |>
mutate(date = lubridate::make_date(year, month, day)) |>
select(x, y, date, day_of_year, relative_humidity, temperature, precipitation)

# # TODO this could go into create_nasa_weather_dataset() to avoid repeating it on each branch
# weather_dataset <- weather_dataset |>
# mutate(across(c(year, month, day, day_of_year), as.integer)) |>
# mutate(date = lubridate::make_date(year, month, day)) |>
# select(x, y, date, day_of_year, relative_humidity, temperature, precipitation)
#
# generate the weather dataset - get the lagged anomolies for selected dates
# map over the lag intervals
row_select <- which(model_dates$date == date_selected)
Expand Down
30 changes: 21 additions & 9 deletions R/calculate_weather_historical_means.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,28 @@
#' .. content for \details{} ..
#'
#' @title
#' @param nasa_weather_dataset
#' @param nasa_weather_directory_dataset
#' @param weather_historical_means_directory_dataset
#' @param nasa_weather_transformed
#' @param nasa_weather_directory_transformed
#' @param weather_historical_means_directory
#' @return
#' @author emmamendelsohn
#' @author Emma Mendelsohn
#' @export
calculate_weather_historical_means <- function(nasa_weather_dataset,
nasa_weather_directory_dataset,
weather_historical_means_directory_dataset) {

NULL
calculate_weather_historical_means <- function(nasa_weather_transformed,
nasa_weather_directory_transformed,
weather_historical_means_directory) {

weather_dataset <- open_dataset(nasa_weather_directory_transformed) #|> to_duckdb(table_name = "weather")

historical_means <- weather_dataset |>
group_by(x, y, day_of_year) |>
summarize(historical_relative_humidity = mean(relative_humidity),
historical_temperature = mean(temperature),
historical_precipitation = mean(precipitation)) |>
ungroup() |>
group_by(day_of_year) |>
write_dataset(weather_historical_means_directory)

return(list.files(weather_historical_means_directory, full.names = TRUE, recursive = TRUE))


}
4 changes: 4 additions & 0 deletions R/transform_nasa_weather.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@ transform_nasa_weather <- function(nasa_weather_pre_transformed,
check_rows <- raw_flat |> group_by(day_of_year) |> count() |> ungroup() |> distinct(n)
assertthat::are_equal(1, nrow(check_rows))

# For 2023, there are NAs for the last day of the year
# TODO make this a less risky step
raw_flat <- drop_na(raw_flat)

# Split by day of year and transform with template raster. Return as a row-binded dataframe
dat_out <- raw_flat |>
group_split(month, day, year, day_of_year, date) |>
Expand Down
14 changes: 6 additions & 8 deletions _targets.R
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,6 @@ dynamic_targets <- tar_plan(
nasa_weather_directory_pre_transformed)),

# project to the template and save as arrow dataset
# TODO failing for 2023 because data coverage is not even for last few days
tar_target(nasa_weather_transformed,
transform_nasa_weather(nasa_weather_pre_transformed,
nasa_weather_directory_transformed,
Expand All @@ -242,7 +241,6 @@ dynamic_targets <- tar_plan(
cue = tar_cue("never")),

# ECMWF Weather Forecast data -----------------------------------------------------------

tar_target(ecmwf_forecasts_directory_raw,
create_data_directory(directory_path = "data/ecmwf_forecasts_raw")),
tar_target(ecmwf_forecasts_directory_transformed,
Expand Down Expand Up @@ -285,7 +283,7 @@ dynamic_targets <- tar_plan(
repository = "local",
cue = tar_cue("thorough")),

# save dataset to AWS bucket
# save transformed to AWS bucket
tar_target(ecmwf_forecasts_transformed_upload_aws_s3, {ecmwf_forecasts_transformed; # enforce dependency
aws_s3_upload_single_type(directory_path = ecmwf_forecasts_directory_transformed,
bucket = aws_bucket,
Expand All @@ -304,15 +302,15 @@ data_targets <- tar_plan(

# weather data

tar_target(weather_historical_means_directory_transformed,
create_data_directory(directory_path = "data/weather_historical_means_transformed")),
tar_target(weather_historical_means_directory,
create_data_directory(directory_path = "data/weather_historical_means")),

tar_target(weather_historical_means, calculate_weather_historical_means(nasa_weather_transformed, # enforce dependency
nasa_weather_directory_transformed,
weather_historical_means_directory_transformed)),
weather_historical_means_directory)),

tar_target(weather_anomalies_directory_transformed,
create_data_directory(directory_path = "data/weather_anomalies_transformed")),
tar_target(weather_anomalies_directory,
create_data_directory(directory_path = "data/weather_anomalies")),

tar_target(weather_anomalies, calculate_weather_anomalies(nasa_weather_transformed, # enforce dependency
nasa_weather_directory_transformed,
Expand Down
946 changes: 419 additions & 527 deletions _targets/meta/meta

Large diffs are not rendered by default.

Empty file.
File renamed without changes.
Empty file.
File renamed without changes.
Empty file.

0 comments on commit dd33714

Please sign in to comment.