Skip to content

Commit

Permalink
complete function for calculating anomalies
Browse files Browse the repository at this point in the history
  • Loading branch information
emmamendelsohn committed Oct 31, 2023
1 parent a3a6884 commit 46fe1c7
Showing 1 changed file with 58 additions and 14 deletions.
72 changes: 58 additions & 14 deletions R/calculate_forecasts_anomalies.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ calculate_forecasts_anomalies <- function(ecmwf_forecasts_transformed,
forecasts_anomalies_directory,
model_dates, lead_intervals,
overwrite = FALSE) {

# Set filename
date_selected <- model_dates_selected
save_filename <- glue::glue("forecast_anomaly_{date_selected}.gz.parquet")
Expand All @@ -40,38 +40,82 @@ calculate_forecasts_anomalies <- function(ecmwf_forecasts_transformed,
lead_intervals_end <- lead_intervals

anomalies <- map(1:length(lead_intervals_start), function(i){


# subset to start and end day of interval
start <- lead_intervals_start[i]
end <- lead_intervals_end[i]

start_date <- date_selected + start
end_date <- date_selected + end
lead_start_date <- date_selected + start
lead_end_date <- date_selected + end

# lead months for subsetting
lead_months <- as.character(c(i, i+1))
baseline_date <- floor_date(start_date, unit = "month")

# this is the date from which the forecasts are made
baseline_date <- floor_date(date_selected, unit = "month")

# calculate weights
weight_a <- as.integer(days_in_month(start_date) - day(start_date)) + 1 # include current date
weight_b <- day(end_date) - 1
weight_a <- as.integer(days_in_month(lead_start_date) - day(lead_start_date)) + 1 # include current date
weight_b <- day(lead_end_date) - 1

# get weighted mean of forecast
forecasts_transformed_dataset |>
# get weighted mean of forecasts means
lead_means <- forecasts_transformed_dataset |>
filter(data_date == baseline_date) |>
filter(lead_month %in% lead_months) |>
mutate(weight = case_when(lead_month == lead_months[1] ~ weight_a,
lead_month == lead_months[2] ~ weight_b)) |>
group_by(x, y, short_name) |>
summarize(lead_mean = sum(mean * weight)/ sum(weight)) |>
ungroup()

# bring in historical means for the relevant days of the year
hist_doy <- yday(seq(lead_start_date, lead_end_date-1, by = "day"))
hist_doy_frmt <- str_pad(hist_doy, width = 3, side = "left", pad = "0")
hist_means <- open_dataset(weather_historical_means[str_detect(weather_historical_means, paste(hist_doy_frmt, collapse = "|"))]) |>
select(-day_of_year) |>
group_by(x, y) |>
summarize(across(everything(), mean)) |>
ungroup() |>
head(5) |> collect()
collect()

# calculate anomalies - a bit inefficient because arrow doesn't allow reshaping (should have done so in the transform function)
# NAs are expected because forecasts are for the whole continent, weather is just for areas of interest

temp_anomalies <- lead_means |>
filter(short_name == "2t") |>
left_join(hist_means |> select(x, y, contains("temperature")), by = c("x", "y")) |>
mutate(!!paste0("anomaly_temperature_forecast_", end) := lead_mean - historical_temperature_mean,
!!paste0("anomaly_temperature_scaled_forecast_", end) := (lead_mean - historical_temperature_mean)/historical_temperature_sd) |>
select(-short_name, -lead_mean, -starts_with("historical")) |>
filter(!is.na(!!sym(paste0("anomaly_temperature_forecast_", end))))

# bring in historical means
rh_anomalies <- lead_means |>
filter(short_name == "rh") |>
left_join(hist_means |> select(x, y, contains("humidity")), by = c("x", "y")) |>
mutate(!!paste0("anomaly_relative_humidity_forecast_", end) := lead_mean - historical_relative_humidity_mean,
!!paste0("anomaly_relative_humidity_scaled_forecast_", end) := (lead_mean - historical_relative_humidity_mean)/historical_relative_humidity_sd) |>
select(-short_name, -lead_mean, -starts_with("historical")) |>
filter(!is.na(!!sym(paste0("anomaly_relative_humidity_forecast_", end))))

precip_anomalies <- lead_means |>
filter(short_name == "tprate") |>
left_join(hist_means |> select(x, y, contains("precipitation")), by = c("x", "y")) |>
mutate(!!paste0("anomaly_precipitation_forecast_", end) := lead_mean - historical_precipitation_mean,
!!paste0("anomaly_precipitation_scaled_forecast_", end) := (lead_mean - historical_precipitation_mean)/historical_precipitation_sd) |>
select(-short_name, -lead_mean, -starts_with("historical")) |>
filter(!is.na(!!sym(paste0("anomaly_precipitation_forecast_", end))))

left_join(temp_anomalies, rh_anomalies, by = c("x", "y")) |>
left_join(precip_anomalies, by = c("x", "y"))

# reshape and label with interval

}) |>
reduce(left_join, by = c("x", "y")) |>
mutate(date = date_selected) |>
relocate(date)


# Save as parquet
write_parquet(anomalies, here::here(forecasts_anomalies_directory, save_filename), compression = "gzip", compression_level = 5)

return(file.path(forecasts_anomalies_directory, save_filename))

}

0 comments on commit 46fe1c7

Please sign in to comment.