diff --git a/R/calculate_forecasts_anomalies.R b/R/calculate_forecasts_anomalies.R index 3921dec..76008b3 100644 --- a/R/calculate_forecasts_anomalies.R +++ b/R/calculate_forecasts_anomalies.R @@ -19,7 +19,7 @@ calculate_forecasts_anomalies <- function(ecmwf_forecasts_transformed, forecasts_anomalies_directory, model_dates, lead_intervals, overwrite = FALSE) { - + # Set filename date_selected <- model_dates_selected save_filename <- glue::glue("forecast_anomaly_{date_selected}.gz.parquet") @@ -40,38 +40,82 @@ calculate_forecasts_anomalies <- function(ecmwf_forecasts_transformed, lead_intervals_end <- lead_intervals anomalies <- map(1:length(lead_intervals_start), function(i){ - + + # subset to start and end day of interval start <- lead_intervals_start[i] end <- lead_intervals_end[i] - start_date <- date_selected + start - end_date <- date_selected + end + lead_start_date <- date_selected + start + lead_end_date <- date_selected + end + # lead months for subsetting lead_months <- as.character(c(i, i+1)) - baseline_date <- floor_date(start_date, unit = "month") + + # this is the date from which the forecasts are made + baseline_date <- floor_date(date_selected, unit = "month") # calculate weights - weight_a <- as.integer(days_in_month(start_date) - day(start_date)) + 1 # include current date - weight_b <- day(end_date) - 1 + weight_a <- as.integer(days_in_month(lead_start_date) - day(lead_start_date)) + 1 # include current date + weight_b <- day(lead_end_date) - 1 - # get weighted mean of forecast - forecasts_transformed_dataset |> + # get weighted mean of forecasts means + lead_means <- forecasts_transformed_dataset |> filter(data_date == baseline_date) |> filter(lead_month %in% lead_months) |> mutate(weight = case_when(lead_month == lead_months[1] ~ weight_a, lead_month == lead_months[2] ~ weight_b)) |> group_by(x, y, short_name) |> summarize(lead_mean = sum(mean * weight)/ sum(weight)) |> + ungroup() + + # bring in historical means for the relevant days of the year + hist_doy <- yday(seq(lead_start_date, lead_end_date-1, by = "day")) + hist_doy_frmt <- str_pad(hist_doy, width = 3, side = "left", pad = "0") + hist_means <- open_dataset(weather_historical_means[str_detect(weather_historical_means, paste(hist_doy_frmt, collapse = "|"))]) |> + select(-day_of_year) |> + group_by(x, y) |> + summarize(across(everything(), mean)) |> ungroup() |> - head(5) |> collect() + collect() + + # calculate anomalies - a bit inefficient because arrow doesn't allow reshaping (should have done so in the transform function) + # NAs are expected because forecasts are for the whole continent, weather is just for areas of interest + + temp_anomalies <- lead_means |> + filter(short_name == "2t") |> + left_join(hist_means |> select(x, y, contains("temperature")), by = c("x", "y")) |> + mutate(!!paste0("anomaly_temperature_forecast_", end) := lead_mean - historical_temperature_mean, + !!paste0("anomaly_temperature_scaled_forecast_", end) := (lead_mean - historical_temperature_mean)/historical_temperature_sd) |> + select(-short_name, -lead_mean, -starts_with("historical")) |> + filter(!is.na(!!sym(paste0("anomaly_temperature_forecast_", end)))) - # bring in historical means + rh_anomalies <- lead_means |> + filter(short_name == "rh") |> + left_join(hist_means |> select(x, y, contains("humidity")), by = c("x", "y")) |> + mutate(!!paste0("anomaly_relative_humidity_forecast_", end) := lead_mean - historical_relative_humidity_mean, + !!paste0("anomaly_relative_humidity_scaled_forecast_", end) := (lead_mean - historical_relative_humidity_mean)/historical_relative_humidity_sd) |> + select(-short_name, -lead_mean, -starts_with("historical")) |> + filter(!is.na(!!sym(paste0("anomaly_relative_humidity_forecast_", end)))) + + precip_anomalies <- lead_means |> + filter(short_name == "tprate") |> + left_join(hist_means |> select(x, y, contains("precipitation")), by = c("x", "y")) |> + mutate(!!paste0("anomaly_precipitation_forecast_", end) := lead_mean - historical_precipitation_mean, + !!paste0("anomaly_precipitation_scaled_forecast_", end) := (lead_mean - historical_precipitation_mean)/historical_precipitation_sd) |> + select(-short_name, -lead_mean, -starts_with("historical")) |> + filter(!is.na(!!sym(paste0("anomaly_precipitation_forecast_", end)))) + + left_join(temp_anomalies, rh_anomalies, by = c("x", "y")) |> + left_join(precip_anomalies, by = c("x", "y")) - # reshape and label with interval - }) |> reduce(left_join, by = c("x", "y")) |> mutate(date = date_selected) |> relocate(date) - + + # Save as parquet + write_parquet(anomalies, here::here(forecasts_anomalies_directory, save_filename), compression = "gzip", compression_level = 5) + + return(file.path(forecasts_anomalies_directory, save_filename)) + } \ No newline at end of file