Skip to content

Commit

Permalink
DAS-2300 - provides spatial subsetting support for 3d variables in SP…
Browse files Browse the repository at this point in the history
…L3SMP (#30)

* DAS-2300 - provides spatial subsetting support for 3d variables in SPL3SMP

* DAS-2300 - PR feedback updates

* DAS-2300 - updates to PR feedback

* DAS-2300 - removed comments and minor updates

* DAS-2300 - updates to comments based on PR feedback
  • Loading branch information
sudha-murthy authored Feb 18, 2025
1 parent 0437f41 commit c327bf6
Show file tree
Hide file tree
Showing 9 changed files with 330 additions and 114 deletions.
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
## v1.1.5
### 2025-02-14

This version of HOSS adds support for 3D variables which
do not have the nominal order. This would provide support
for the 3D variables in SMAP - SPL3SMP with dimension order
information provided in the configurations file.

## v1.1.4
### 2025-02-12

Expand Down
2 changes: 1 addition & 1 deletion docker/service_version.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.1.4
1.1.5
92 changes: 58 additions & 34 deletions hoss/coordinate_utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ def get_coordinate_variables(
) -> tuple[list[str], list[str]]:
"""This function returns latitude and longitude variable names from
latitude and longitude variables listed in the CF-Convention coordinates
metadata attribute. It returns them in a specific
order [latitude_name, longitude_name]"
metadata attribute. It checks that the variables exist in the file and then
returns the lists in a specific order: [latitude_names], [longitude_names]
"""

coordinate_variables = varinfo.get_references_for_attribute(
Expand Down Expand Up @@ -86,16 +86,18 @@ def any_absent_dimension_variables(varinfo: VarInfoFromDmr, variable: str) -> bo
def get_dimension_array_names(
varinfo: VarInfoFromDmr,
variable_name: str,
) -> list[str]:
) -> dict[str:str]:
"""
Returns the dimensions names from coordinate variables or from
configuration
Returns the dimension names from coordinate variables or from configuration.
VarInfo implements pulling dimension names from configuration, which is
used for some collections with anonymous dimensions.
"""
variable = varinfo.get_variable(variable_name)
if variable is None:
return []
return {}

dimension_names = variable.dimensions
configured_dimensions = variable.dimensions
dimension_names = get_configured_dimension_order(varinfo, configured_dimensions)

if len(dimension_names) >= 2:
return dimension_names
Expand All @@ -107,26 +109,25 @@ def get_dimension_array_names(
# Given variable has coordinates: use latitude coordinate
# to define variable spatial dimensions.
if len(latitude_coordinates) == 1 and len(longitude_coordinates) == 1:
dimension_array_names = create_spatial_dimension_names_from_coordinates(
dimension_names = create_spatial_dimension_names_from_coordinates(
varinfo, latitude_coordinates[0]
)

# Given variable variable has no coordinate attribute itself,
# but is itself a coordinate (latitude or longitude):
# use as a coordinate to define spatial dimensions
elif variable.is_latitude() or variable.is_longitude():
dimension_array_names = create_spatial_dimension_names_from_coordinates(
dimension_names = create_spatial_dimension_names_from_coordinates(
varinfo, variable_name
)
else:
dimension_array_names = []

return dimension_array_names
dimension_names = {}
return dimension_names


def create_spatial_dimension_names_from_coordinates(
varinfo: VarInfoFromDmr, variable_name: str
) -> str:
) -> dict[str:str]:
"""returns the x-y variable names that would
match the group of the input variable. The 'dim_y' dimension
and 'dim_x' names are returned with the group pathname
Expand All @@ -135,22 +136,21 @@ def create_spatial_dimension_names_from_coordinates(
variable = varinfo.get_variable(variable_name)

if variable is not None:
dimension_array_names = [
f'{variable.group_path}/dim_y',
f'{variable.group_path}/dim_x',
]
dimension_names = {
'projection_y_coordinate': f'{variable.group_path}/y_dim',
'projection_x_coordinate': f'{variable.group_path}/x_dim',
}
else:
raise MissingVariable(variable_name)

return dimension_array_names
return dimension_names


def create_dimension_arrays_from_coordinates(
prefetch_dataset: Dataset,
latitude_coordinate: VariableFromDmr,
longitude_coordinate: VariableFromDmr,
crs: CRS,
projected_dimension_names: list[str],
dimension_names: dict[str, str],
) -> dict[str, np.ndarray]:
"""Generate artificial 1D dimensions scales for each
2D dimension or coordinate variable.
Expand All @@ -159,9 +159,11 @@ def create_dimension_arrays_from_coordinates(
3) Generate the x-y dimscale array and return to the calling method
"""
if len(projected_dimension_names) < 2:
raise InvalidDimensionNames(projected_dimension_names)
# dimension_names = get_dimension_array_names(varinfo, variable_name)
if len(dimension_names) < 2:
raise InvalidDimensionNames(dimension_names)

# check if the dimension names are configured in hoss_config
lat_arr = get_2d_coordinate_array(
prefetch_dataset,
latitude_coordinate.full_name_path,
Expand All @@ -171,10 +173,12 @@ def create_dimension_arrays_from_coordinates(
longitude_coordinate.full_name_path,
)

# get the max spread x and y indices
row_indices, col_indices = get_valid_sample_pts(
lat_arr, lon_arr, latitude_coordinate, longitude_coordinate
)

# get the dimension order from the coordinate data
dim_order_is_y_x, row_dim_values = get_dimension_order_and_dim_values(
lat_arr, lon_arr, row_indices, crs, is_row=True
)
Expand All @@ -188,18 +192,16 @@ def create_dimension_arrays_from_coordinates(
lat_arr, lon_arr, dim_order_is_y_x
)

# calculate the dimension values
y_dim = interpolate_dim_values_from_sample_pts(
row_dim_values, np.transpose(row_indices)[0], row_size
)

x_dim = interpolate_dim_values_from_sample_pts(
col_dim_values, np.transpose(col_indices)[1], col_size
)

projected_y, projected_x = (
projected_dimension_names[-2],
projected_dimension_names[-1],
)
projected_y = dimension_names['projection_y_coordinate']
projected_x = dimension_names['projection_x_coordinate']

if dim_order_is_y_x:
return {projected_y: y_dim, projected_x: x_dim}
Expand All @@ -208,6 +210,23 @@ def create_dimension_arrays_from_coordinates(
# return {projected_x: x_dim, projected_y: y_dim}


def get_configured_dimension_order(
varinfo: VarInfoFromDmr, dimension_names: list[str]
) -> dict[str, str]:
"""This function returns the dimension order in a dictionary
with standard_names that is used to define the dimensions e.g.
'projection_x_coordinate' and 'projection_y_coordinate' if they
are configured in hoss_config.json
"""
dimension_name_order = {}
for dimension_name in dimension_names:
attrs = varinfo.get_missing_variable_attributes(dimension_name)
if 'standard_name' in attrs.keys():
dimension_name_order[attrs['standard_name']] = dimension_name
return dimension_name_order


def get_2d_coordinate_array(
prefetch_dataset: Dataset,
coordinate_name: str,
Expand Down Expand Up @@ -310,7 +329,9 @@ def get_max_spread_pts(
valid_indices = np.ma.array(arr_indices, mask=valid_geospatial_mask)
elif valid_geospatial_mask.ndim == 3:
# use just 2 of the dimensions
# mask arr_ind to hide the invalid data points
# This assumes that the first dimension is the "extra" non-spatial dimension,
# Currently we define the dimensions and their order in the configuration file,
# ToDo When the configuration entry is dropped, this needs to be reconsidered.
valid_indices = np.ma.array(arr_indices, mask=valid_geospatial_mask[0, :, :])
else:
raise NotImplementedError
Expand Down Expand Up @@ -347,7 +368,7 @@ def get_dimension_order_and_dim_values(
projected y or projected_x values are varying across row or column.
Also returns a 1-D array of dimension values for the requested
projected spatial dimension. The input lat lon arrays and dimension
indices are assumed to be 2D in this implementation of the function.
indices are assumed to be 1D or 2D in this implementation of the function.
"""
if lat_array_points.ndim == 1 and lon_array_points.ndim == 1:
lat_arr_values = lat_array_points
Expand Down Expand Up @@ -445,27 +466,30 @@ def interpolate_dim_values_from_sample_pts(
def create_dimension_arrays_from_geotransform(
prefetch_dataset: Dataset,
latitude_coordinate: VariableFromDmr,
projected_dimension_names: list[str],
geotranform,
projected_dimension_names: dict[str, str],
geotransform,
) -> dict[str, np.ndarray]:
"""Generate artificial 1D dimensions scales from geotransform"""

lat_arr = get_2d_coordinate_array(
prefetch_dataset,
latitude_coordinate.full_name_path,
)

# compute the x,y locations along a column and row
column_dimensions = [
col_row_to_xy(geotranform, col, 0) for col in range(lat_arr.shape[-1])
col_row_to_xy(geotransform, col, 0) for col in range(lat_arr.shape[-1])
]
row_dimensions = [
col_row_to_xy(geotranform, 0, row) for row in range(lat_arr.shape[-2])
col_row_to_xy(geotransform, 0, row) for row in range(lat_arr.shape[-2])
]

# pull out dimension values
x_values = np.array([x for x, y in column_dimensions], dtype=np.float64)
y_values = np.array([y for x, y in row_dimensions], dtype=np.float64)
projected_y, projected_x = projected_dimension_names[-2:]

projected_y = projected_dimension_names['projection_y_coordinate']
projected_x = projected_dimension_names['projection_x_coordinate']

return {projected_y: y_values, projected_x: x_values}

Expand Down
6 changes: 5 additions & 1 deletion hoss/dimension_utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -443,7 +443,11 @@ def add_index_range(
else:
# Anonymous dimensions, so check for dimension derived from coordinates
# or from configuration
variable_dimensions = get_dimension_array_names(varinfo, variable_name)
variable_dimensions_dict = get_dimension_array_names(varinfo, variable_name)
if variable_dimensions_dict:
variable_dimensions = list(variable_dimensions_dict.values())
else:
variable_dimensions = []

range_strings = get_range_strings(variable_dimensions, index_ranges)

Expand Down
64 changes: 63 additions & 1 deletion hoss/hoss_config.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"Identification": "hoss_config",
"Version": 21,
"Version": 22,
"CollectionShortNamePath": [
"/HDF5_GLOBAL/short_name",
"/NC_GLOBAL/short_name",
Expand Down Expand Up @@ -442,6 +442,68 @@
],
"_Description": "SMAP L3 data are HDF5 and without dimension settings. Overrides here define the dimensions, a useful reference name, and critically, the dimension order."
},
{
"Applicability": {
"Mission": "SMAP",
"ShortNamePath": "SPL3SMP",
"VariablePattern": ".*/x_dim"
},
"Attributes": [
{
"Name": "dimensions",
"Value": "x_dim"
},
{
"Name": "Units",
"Value": "m"
},
{
"Name": "standard_name",
"Value": "projection_x_coordinate"
}
],
"_Description": "The pseudo-dimension variable is here supplemented with variable attributes (as if it was a dimension variables) to fully specify the X dimension."
},
{
"Applicability": {
"Mission": "SMAP",
"ShortNamePath": "SPL3SMP",
"VariablePattern": ".*/y_dim"
},
"Attributes": [
{
"Name": "dimensions",
"Value": "y_dim"
},
{
"Name": "Units",
"Value": "m"
},
{
"Name": "standard_name",
"Value": "projection_y_coordinate"
}
],
"_Description": "The pseudo-dimension variable is here supplemented with variable attributes (as if it was a dimension variables) to fully specify the Y dimension."
},
{
"Applicability": {
"Mission": "SMAP",
"ShortNamePath": "SPL3SMP",
"VariablePattern": ".*/am_pm"
},
"Attributes": [
{
"Name": "dimensions",
"Value": "am_pm"
},
{
"Name": "long_name",
"Value": "AM-PM dimension of size 2, 0 => AM, 1=> PM"
}
],
"_Description": "The pseudo-dimension variable is here supplemented with variable attributes (as if it was a dimension variables) to clarify the dimension name"
},
{
"Applicability": {
"Mission": "ICESat2",
Expand Down
3 changes: 1 addition & 2 deletions hoss/spatial.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,10 +246,9 @@ def get_x_y_index_ranges_from_coordinates(
points.
"""
projected_dimension_names = get_dimension_array_names(varinfo, non_spatial_variable)

crs = get_variable_crs(non_spatial_variable, varinfo)

projected_dimension_names = get_dimension_array_names(varinfo, non_spatial_variable)
master_geotransform = get_master_geotransform(non_spatial_variable, varinfo)
if master_geotransform:
dimension_arrays = create_dimension_arrays_from_geotransform(
Expand Down
Loading

0 comments on commit c327bf6

Please sign in to comment.