Skip to content

Commit

Permalink
Merge pull request #181 from UrbanSystemsLab/solarsally
Browse files Browse the repository at this point in the history
Solarsally
  • Loading branch information
SallyElHajjar authored Oct 9, 2024
2 parents 54f2374 + 23ff6f0 commit 02edb3e
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 89 deletions.
112 changes: 39 additions & 73 deletions usl_pipeline/cloud_functions/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
import time
import traceback
from typing import BinaryIO, Callable, IO, TextIO, Tuple

from google.api_core import exceptions
from google.cloud import error_reporting
from google.cloud import firestore
Expand Down Expand Up @@ -40,6 +39,7 @@
# of time between when an event is triggered and when the cloud
# function actually runs, which can be a bit of time if there are a
# lot of triggers.
# Added
_MAX_RETRY_SECONDS = 60 * 60

# The vector is long enough to express rainfall every five minutes for up to three days.
Expand Down Expand Up @@ -952,8 +952,14 @@ def _build_wps_feature_matrix(fd: IO[bytes]) -> Tuple[NDArray, FeatureMetadata]:
# Ignore type checker error - BytesIO inherits from expected type BufferedIOBase
# https://shorturl.at/lk4om
with xarray.open_dataset(fd, engine="h5netcdf") as ds: # type: ignore
# Assign Time coordinates so datetime is associated with each data array
ds = ds.assign_coords(Time=ds.Times)
# Dropp the existing 'Time' coordinate if present, to avoid conflict
if "Time" in ds.coords:
ds = ds.drop_vars("Time")

# Assign the 'Time' coordinate
if "Times" in ds:
ds = ds.assign_coords(Time=ds.Times)

# Derive non-native variables and assign to dataset
ds = _compute_custom_wps_variables(ds)

Expand All @@ -967,7 +973,9 @@ def _build_wps_feature_matrix(fd: IO[bytes]) -> Tuple[NDArray, FeatureMetadata]:
features_components.append(feature)

features_matrix = numpy.dstack(features_components)
snapshot_time = ds.Times.values[0].decode("utf-8")

# Get snapshot time
snapshot_time = ds.Times.values[0].astype(str)

return features_matrix, FeatureMetadata(
time=datetime.datetime.fromisoformat(snapshot_time)
Expand Down Expand Up @@ -995,24 +1003,15 @@ def _compute_wind_components(dataset: xarray.Dataset) -> xarray.Dataset:
# WDIR_SIN (wind direction_sine component) at FNL level 0 (~10m)
# WDIR_COS (wind direction_cosine component) at FNL level 0 (~10m)
if all(var in dataset.keys() for var in ["UU", "VV"]):
# Derive wind components from UU and VV: WSPD, WDIR
uu = dataset.data_vars["UU"]
vv = dataset.data_vars["VV"]

# Interpolate UU and VV to the common 200x200 grid
uu_centered = (uu[:, :, :, :-1] + uu[:, :, :, 1:]) / 2
vv_centered = (vv[:, :, :-1, :] + vv[:, :, 1:, :]) / 2

# Compute wind speed
wind_speed = numpy.sqrt(uu_centered.values**2 + vv_centered.values**2)

# compute wind direction 0 deg is x-axis
# 270 is subtracted to align direction to true North
wind_direction = (
270 - numpy.degrees(numpy.arctan2(vv_centered.values, uu_centered.values))
) % 360

# Define functions to compute sine and cosine components of wind direction
def _direction_to_sine(degrees):
radians = (degrees / 360.0) * 2 * numpy.pi
return numpy.sin(radians)
Expand All @@ -1021,13 +1020,11 @@ def _direction_to_cosine(degrees):
radians = (degrees / 360.0) * 2 * numpy.pi
return numpy.cos(radians)

# Compute sine and cosine components of wind direction
wind_direction_sin = _direction_to_sine(wind_direction)
wind_direction_cos = _direction_to_cosine(wind_direction)

new_dims = ["Time", "num_metgrid_levels", "south_north", "west_east"]

# Assign new variables to dataset
dataset = dataset.assign(
WSPD=xarray.DataArray(wind_speed, coords=vv_centered.coords, dims=new_dims)
)
Expand All @@ -1046,58 +1043,44 @@ def _direction_to_cosine(degrees):


def _compute_solar_time_components(dataset: xarray.Dataset) -> xarray.Dataset:
"""Compute solar time components (sine and cosine) for the dataset's longitudes.
Args:
- dataset: The xarray dataset containing longitude, latitude, and time information.
- use_gcp_time: A boolean flag indicating whether the Times are in Unix time format.
Returns:
- The dataset with added solar time sine and cosine components.
"""
# Solar Time Computation for every Longitude of the City
# Solar Time Sine and Cosine Derivation from there
if all(var in dataset.keys() for var in ["XLONG_M", "XLAT_M", "Times"]):
# Extract longitude, latitude, and time variables from dataset
longitudes = dataset["XLONG_M"][0, :, :]
latitudes = dataset["XLAT_M"][0, :, :]
times = dataset["Times"]
longitudes = dataset["XLONG_M"][0, :, :] # Extract the longitudes
times = dataset["Times"] # Extract the time variable

# Convert time variable to datetime objects
# Convert preformatted string-based times to nanosecond precision datetime
times = xarray.DataArray(
[
# Check if the value is a Unix timestamp (numeric type)
(
numpy.datetime64(
datetime.datetime.utcfromtimestamp(t).strftime(
"%Y-%m-%dT%H:%M:%S"
)
)
if isinstance(t, (int, float))
else numpy.datetime64("".join(t.astype(str)).replace("_", "T"))
)
numpy.datetime64("".join(t.astype(str)).replace("_", "T"), "ns")
for t in times.values
]
],
dims=["Time"],
)

print("Debug - Converted Times:", times)

# Extract hours and minutes from the datetime objects separately
utc_hours = times.dt.hour # Extract the hour component
utc_minutes = times.dt.minute # Extract the minute component

# Convert to fractional hours (hours + minutes/60)
# Extract hours and minutes in UTC
utc_hours = times.dt.hour
utc_minutes = times.dt.minute
utc_hours_minutes = utc_hours + utc_minutes / 60.0

print("Debug - UTC Hours and Minutes (in fraction):", utc_hours_minutes)

# Define the function to calculate solar time
# Function to calculate solar time from UTC and longitude
def calculate_solar_time(utc_time, longitude):
solar_time = (utc_time + longitude / 15) % 24
return solar_time
return (utc_time + longitude / 15) % 24

# Vectorized solar time calculation using xarray apply_ufunc
# Calculate solar times for all longitudes
solar_times = xarray.apply_ufunc(
calculate_solar_time,
xarray.DataArray(utc_hours_minutes, dims=["Time"]),
longitudes,
vectorize=True,
calculate_solar_time, utc_hours_minutes, longitudes, vectorize=True
)

print("Debug - Calculated Solar Times:", solar_times)

# Convert time in hours to sine and cosine components
# Functions to convert solar time to sine and cosine values
def time_to_sine(hours):
radians = (hours / 24.0) * 2 * numpy.pi
return numpy.sin(radians)
Expand All @@ -1106,25 +1089,20 @@ def time_to_cosine(hours):
radians = (hours / 24.0) * 2 * numpy.pi
return numpy.cos(radians)

# Compute sine and cosine components of solar time
# Apply sine and cosine transformations
solar_time_sin = xarray.apply_ufunc(time_to_sine, solar_times, vectorize=True)
solar_time_cos = xarray.apply_ufunc(time_to_cosine, solar_times, vectorize=True)

print("Debug - Solar Time Sine:", solar_time_sin)
print("Debug - Solar Time Cosine:", solar_time_cos)

# Match the dimensionality and coordinate structure
# Assign new variables for sine and cosine of solar time
new_dims = ["Time", "south_north", "west_east"]

# Assign new variables to dataset
dataset = dataset.assign(
SOLAR_TIME_SIN=xarray.DataArray(
solar_time_sin, coords=latitudes.coords, dims=new_dims
solar_time_sin, coords=dataset["XLAT_M"].coords, dims=new_dims
)
)
dataset = dataset.assign(
SOLAR_TIME_COS=xarray.DataArray(
solar_time_cos, coords=latitudes.coords, dims=new_dims
solar_time_cos, coords=dataset["XLAT_M"].coords, dims=new_dims
)
)

Expand All @@ -1146,33 +1124,21 @@ def _process_wps_feature(feature: xarray.DataArray, var_config: dict) -> NDArray
for each variable defined in: https://shorturl.at/W6nzY
"""
snapshot_time = feature.coords.get("Time")
# Ensure order of dimension axes are: (west_east, south_north, <other spatial dims>)
# to stay consistent with rest of data pipeline
feature = feature.transpose("west_east", "south_north", ...)

# FNL-derived var - extract to only first level
if "num_metgrid_levels" in feature.dims:
feature = feature.isel(num_metgrid_levels=0)

# Monthly climatology var - extract to only the month of the file's datestamp
if "z-dimension0012" in feature.dims and snapshot_time:
snapshot_datetime = datetime.datetime.strptime(
snapshot_time.values[0].decode(), "%Y-%m-%d_%H:%M:%S"
)
feature = feature.isel({"z-dimension0012": snapshot_datetime.month - 1})

# At this point, we can remove the Time axis from current feature DataArray
# since we no longer need it for processing or in the feature matrix for ML
# model. xarray.DataArry.isel() will return new DataArray with axis dropped.
feature = feature.isel(Time=0)

feature_values = feature.values

# Convert percentage-based units to decimal
if var_config.get("unit") == wps_data.Unit.PERCENTAGE:
feature_values = _convert_to_decimal(feature_values)

# If var config requires global scaling, normalize feature values
scaling_config = var_config.get("scaling")
if (
scaling_config is not None
Expand Down
46 changes: 30 additions & 16 deletions usl_pipeline/cloud_functions/main_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -652,15 +652,16 @@ def test_compute_wind_components():


def test_compute_solar_time_components():
# Create an in-memory NetCDF file with dimensions and variables

# Create an in-memory NetCDF file with fake data
ncfile = netCDF4.Dataset(
"met_em.d03.2010-02-02_18:00:00.nc", mode="w", format="NETCDF4", memory=1
)
ncfile.createDimension("Time", 1)
ncfile.createDimension("west_east", 2)
ncfile.createDimension("south_north", 2)

# In WPS/WRF files, 'Times' is often defined as a time dimension
# Set fake time, longitude, and latitude data
times = ncfile.createVariable("Times", "f8", ("Time",))
longitudes = ncfile.createVariable(
"XLONG_M", "float32", ("Time", "south_north", "west_east")
Expand All @@ -669,10 +670,11 @@ def test_compute_solar_time_components():
"XLAT_M", "float32", ("Time", "south_north", "west_east")
)

# Set the time using numpy.datetime64 for the expected format
times[0] = numpy.datetime64("2010-02-02T18:00:00")

# Set longitude and latitude values
# Use a Unix timestamp for "2010-02-02T18:00:00"
times[0] = (
numpy.datetime64("2010-02-02T18:00:00")
- numpy.datetime64("1970-01-01T00:00:00")
) / numpy.timedelta64(1, "s")
longitudes[:] = numpy.array(
[[-74.00, -73.90], [-74.10, -73.80]], dtype=numpy.float32
)
Expand All @@ -685,26 +687,38 @@ def test_compute_solar_time_components():
# Open the in-memory NetCDF file with xarray
ds = xarray.open_dataset(io.BytesIO(ncfile_bytes))

# Convert Unix timestamps to datetime for the fake dataset
times = xarray.DataArray(
[
numpy.datetime64(int(t), "s") # Convert Unix timestamp to seconds
for t in ds["Times"].values
],
dims=["Time"],
)
ds["Times"] = times # Overwrite the time reading with fake data conversion

# Process the dataset with the _compute_solar_time_components function
processed_ds = main._compute_solar_time_components(ds)

# Check the computed sine values of solar time
solartime_sin = processed_ds.data_vars["SOLAR_TIME_SIN"]
solartime_sin_expected = [[[-0.2756, -0.2773], [-0.274, -0.279]]]
numpy.testing.assert_array_almost_equal(
solartime_sin_expected, solartime_sin.values, decimal=4
)
print("Solar Time Sin Shape:", solartime_sin.shape)

# Check the computed cosine values of solar time
solartime_cos = processed_ds.data_vars["SOLAR_TIME_COS"]
solartime_cos_expected = [[[-0.9613, -0.9608], [-0.9617, -0.9603]]]
numpy.testing.assert_array_almost_equal(
solartime_cos_expected, solartime_cos.values, decimal=4
)
print("Solar Time Cos Shape:", solartime_cos.shape)

# Verify the shape of the computed solar time variables
assert solartime_sin.shape == (1, 2, 2)
assert solartime_cos.shape == (1, 2, 2)
assert solartime_sin.shape == (
ds.dims["Time"],
ds.dims["south_north"],
ds.dims["west_east"],
)
assert solartime_cos.shape == (
ds.dims["Time"],
ds.dims["south_north"],
ds.dims["west_east"],
)


@mock.patch.object(main.firestore, "Client", autospec=True)
Expand Down

0 comments on commit 02edb3e

Please sign in to comment.