diff --git a/R/calculate_forecasts_anomalies.R b/R/calculate_forecasts_anomalies.R
index 3921dec..76008b3 100644
--- a/R/calculate_forecasts_anomalies.R
+++ b/R/calculate_forecasts_anomalies.R
@@ -19,7 +19,7 @@ calculate_forecasts_anomalies <- function(ecmwf_forecasts_transformed,
                                           forecasts_anomalies_directory,
                                           model_dates, lead_intervals,
                                           overwrite = FALSE) {
-
+  
   # Set filename
   date_selected <- model_dates_selected
   save_filename <- glue::glue("forecast_anomaly_{date_selected}.gz.parquet")
@@ -40,38 +40,82 @@ calculate_forecasts_anomalies <- function(ecmwf_forecasts_transformed,
   lead_intervals_end <- lead_intervals
   
   anomalies <- map(1:length(lead_intervals_start), function(i){
-   
+    
+    # subset to start and end day of interval
     start <- lead_intervals_start[i]
     end <- lead_intervals_end[i]
     
-    start_date <- date_selected + start
-    end_date <- date_selected + end
+    lead_start_date <- date_selected + start
+    lead_end_date <- date_selected + end
     
+    # lead months for subsetting
     lead_months <- as.character(c(i, i+1))
-    baseline_date <- floor_date(start_date, unit = "month")
+    
+    # this is the date from which the forecasts are made
+    baseline_date <- floor_date(date_selected, unit = "month")
     
     # calculate weights
-    weight_a <- as.integer(days_in_month(start_date) - day(start_date)) + 1 # include current date
-    weight_b <- day(end_date) - 1
+    weight_a <- as.integer(days_in_month(lead_start_date) - day(lead_start_date)) + 1 # include current date
+    weight_b <- day(lead_end_date) - 1
     
-    # get weighted mean of forecast
-    forecasts_transformed_dataset |> 
+    # get weighted mean of forecasts means
+    lead_means <- forecasts_transformed_dataset |> 
       filter(data_date == baseline_date) |> 
       filter(lead_month %in% lead_months) |> 
       mutate(weight = case_when(lead_month == lead_months[1] ~ weight_a, 
                                 lead_month == lead_months[2] ~ weight_b)) |> 
       group_by(x, y, short_name) |>
       summarize(lead_mean = sum(mean * weight)/ sum(weight)) |>
+      ungroup() 
+    
+    # bring in historical means for the relevant days of the year
+    hist_doy <- yday(seq(lead_start_date, lead_end_date-1, by = "day"))
+    hist_doy_frmt <- str_pad(hist_doy, width = 3, side = "left", pad = "0")
+    hist_means <- open_dataset(weather_historical_means[str_detect(weather_historical_means, paste(hist_doy_frmt, collapse = "|"))]) |> 
+      select(-day_of_year) |> 
+      group_by(x, y) |> 
+      summarize(across(everything(), mean)) |> 
       ungroup() |> 
-      head(5) |> collect()
+      collect()
+    
+    # calculate anomalies - a bit inefficient because arrow doesn't allow reshaping (should have done so in the transform function)
+    # NAs are expected because forecasts are for the whole continent, weather is just for areas of interest
+    
+    temp_anomalies <- lead_means |> 
+      filter(short_name == "2t") |> 
+      left_join(hist_means |> select(x, y, contains("temperature")), by = c("x", "y")) |> 
+      mutate(!!paste0("anomaly_temperature_forecast_", end) := lead_mean  -  historical_temperature_mean,
+             !!paste0("anomaly_temperature_scaled_forecast_", end) := (lead_mean  -  historical_temperature_mean)/historical_temperature_sd) |> 
+      select(-short_name, -lead_mean, -starts_with("historical")) |> 
+      filter(!is.na(!!sym(paste0("anomaly_temperature_forecast_", end))))
     
-    # bring in historical means
+    rh_anomalies <- lead_means |> 
+      filter(short_name == "rh") |> 
+      left_join(hist_means |> select(x, y, contains("humidity")), by = c("x", "y")) |> 
+      mutate(!!paste0("anomaly_relative_humidity_forecast_", end) := lead_mean - historical_relative_humidity_mean,
+             !!paste0("anomaly_relative_humidity_scaled_forecast_", end) := (lead_mean  -  historical_relative_humidity_mean)/historical_relative_humidity_sd) |> 
+      select(-short_name, -lead_mean, -starts_with("historical")) |> 
+      filter(!is.na(!!sym(paste0("anomaly_relative_humidity_forecast_", end))))
+    
+    precip_anomalies <- lead_means |> 
+      filter(short_name == "tprate") |> 
+      left_join(hist_means |> select(x, y, contains("precipitation")), by = c("x", "y")) |> 
+      mutate(!!paste0("anomaly_precipitation_forecast_", end) := lead_mean  -  historical_precipitation_mean,
+             !!paste0("anomaly_precipitation_scaled_forecast_", end) := (lead_mean  -  historical_precipitation_mean)/historical_precipitation_sd) |> 
+      select(-short_name, -lead_mean, -starts_with("historical")) |> 
+      filter(!is.na(!!sym(paste0("anomaly_precipitation_forecast_", end))))
+    
+    left_join(temp_anomalies, rh_anomalies, by = c("x", "y")) |> 
+      left_join(precip_anomalies, by = c("x", "y"))
     
-    # reshape and label with interval
-
   }) |> 
     reduce(left_join, by = c("x", "y")) |> 
     mutate(date = date_selected) |> 
     relocate(date)
-    
+  
+  # Save as parquet 
+  write_parquet(anomalies, here::here(forecasts_anomalies_directory, save_filename), compression = "gzip", compression_level = 5)
+  
+  return(file.path(forecasts_anomalies_directory, save_filename))
+  
 }
\ No newline at end of file