Skip to content

Commit 2244c71

Browse files
committed
DAS-1177: Add functions and CF overrides to create artificial bounds for collections with edge-alignment.
1 parent 3ba073f commit 2244c71

File tree

4 files changed

+181
-4
lines changed

4 files changed

+181
-4
lines changed

CHANGELOG.md

+15
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,18 @@
1+
## v1.0.2
2+
### 2024-2-26
3+
4+
This version of HOSS correctly handles edge-aligned geographic collections by
5+
adding the attribute `cell_alignment` with the value `edge` to `hoss_config.json`
6+
for edge-aligned collections (namely, ATL16), and by adding functions that
7+
create pseudo bounds for edge-aligned collections to make HOSS use the
8+
`dimension_utilities.py` function, `get_dimension_indices_from_bounds`.
9+
10+
This change also includes an addition of a CF override that addresses an
11+
issue with the ATL16 metadata for the variables `/spolar_asr_obs_grid` and
12+
`/spolar_lorate_blowing_snow_freq` where their `grid_mapping` attribute points
13+
to north polar variables instead of south polar variables. This CF Override
14+
will have to be removed if/when the metadata is corrected.
15+
116
## v1.0.1
217
### 2023-12-19
318

docker/service_version.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
1.0.1
1+
1.0.2

hoss/dimension_utilities.py

+138-3
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
from harmony.message import Message
2020
from harmony.message_utility import rgetattr
2121
from harmony.util import Config
22-
from varinfo import VarInfoFromDmr
22+
from varinfo import VarInfoFromDmr, VariableFromDmr
2323

2424
from hoss.bbox_utilities import flatten_list
2525
from hoss.exceptions import InvalidNamedDimension, InvalidRequestedRange
@@ -75,8 +75,143 @@ def prefetch_dimension_variables(opendap_url: str, varinfo: VarInfoFromDmr,
7575

7676
logger.info('Variables being retrieved in prefetch request: '
7777
f'{format_variable_set_string(required_dimensions)}')
78-
return get_opendap_nc4(opendap_url, required_dimensions, output_dir,
79-
logger, access_token, config)
78+
79+
required_dimensions_nc4 = get_opendap_nc4(opendap_url,
80+
required_dimensions, output_dir,logger, access_token, config)
81+
82+
# Create bounds variables if necessary.
83+
add_bounds_variables(required_dimensions_nc4, required_dimensions,
84+
varinfo, logger)
85+
86+
return required_dimensions_nc4
87+
88+
89+
def add_bounds_variables(dimensions_nc4: str,
90+
required_dimensions: Set[str],
91+
varinfo: VarInfoFromDmr,
92+
logger: Logger) -> None:
93+
""" Augment a NetCDF4 file with artificial bounds variables for each
94+
dimension variable that is edge-aligned and does not already
95+
have bounds variables.
96+
97+
For each dimension variable:
98+
(1) Check if the variable needs a bounds variable.
99+
(2) If so, create a bounds array of minimum and maximum values.
100+
(3) Then write the bounds variable to the NetCDF4 URL.
101+
102+
"""
103+
with Dataset(dimensions_nc4, 'r+') as datasets:
104+
for dimension_name in required_dimensions:
105+
dimension_variable = varinfo.get_variable(dimension_name)
106+
if needs_bounds(dimension_variable) is True:
107+
min_and_max_bounds = create_bounds(datasets, dimension_name)
108+
write_bounds(datasets, dimension_variable, min_and_max_bounds)
109+
110+
logger.info('Artificial bounds added for dimension variable: '
111+
f'{dimension_name}')
112+
113+
114+
def needs_bounds(dimension: VariableFromDmr) -> bool:
115+
""" Check if a dimension variable needs a bounds variable.
116+
This will be the case when dimension cells are edge-aligned
117+
and bounds for that dimension do not already exist.
118+
119+
"""
120+
121+
return dimension.attributes['cell_alignment'] == 'edge' and dimension.references.get('bounds') == None
122+
123+
124+
def create_bounds(dimension_dataset: Dataset,
125+
dimension_path: str) -> np.ndarray:
126+
""" Create an array containing the minimum and maximum bounds
127+
for a given dimension.
128+
129+
The minimum and maximum values are determined under the assumption
130+
that the dimension data is monotonically increasing and contiguous.
131+
So for every bounds but the last, the bounds are simply extracted
132+
from the dimension dataset.
133+
134+
The final bounds must be calculated with the assumption that
135+
the last data cell is edge-aligned and thus has a value the does
136+
not account for the cell length. So, the final bound is determined
137+
by taking the median of all the resolutions in the dataset to obtain
138+
a resolution that can be added to the final data value.
139+
140+
Ex: Input dataset with resolution of 3 degrees: [ ... , 81, 84, 87]
141+
142+
Minimum | Maximum
143+
<...> <...>
144+
81 84
145+
84 87
146+
87 ? -> 87 + median resolution -> 87 + 3 -> 90
147+
148+
"""
149+
# Access the dimension variable's data using the variable's full path.
150+
dimension_data = dimension_dataset[dimension_path][:]
151+
152+
# Determine the dimension's resolution by taking the median value
153+
# of the differences between each ascending data value.
154+
dimension_array = np.array(dimension_data)
155+
156+
# Build array.
157+
size = dimension_array.size
158+
min_max_pairs = [[dimension_array[idx],
159+
dimension_array[idx+1]]
160+
for idx in range(0, size-1)]
161+
162+
# Calculate final values.
163+
dim_resolution = np.median(np.diff(dimension_array))
164+
min_max_pairs.append([dimension_array[size-1],
165+
dimension_array[size-1] + dim_resolution])
166+
167+
return np.array(min_max_pairs)
168+
169+
170+
def write_bounds(dimension_dataset: Dataset,
171+
dimension_variable: VariableFromDmr,
172+
min_and_max_bounds: np.ndarray) -> None:
173+
""" Write the input bounds array to a given dimension dataset.
174+
175+
First a new dimension is created for the new bounds variable
176+
to allow the variable to be two-dimensional.
177+
178+
Then the new bounds variable is created using two dimensions:
179+
(1) the existing dimension of the dimension dataset, and
180+
(2) the new bounds variable dimension.
181+
182+
"""
183+
# Create the second bounds dimension.
184+
dimension_full_name_path = dimension_variable.full_name_path
185+
dimension_group = '/' + '/'.join(dimension_full_name_path.split('/')[1:-1])
186+
dimension_name = dimension_full_name_path.split('/')[-1]
187+
188+
# Consider the special case when the dimension group is the root directory.
189+
# The dimension can't refer to the full path in the name itself, so we have
190+
# to create it with respect to the group we want to place it in.
191+
if dimension_group == '/':
192+
bounds_dim = dimension_dataset.createDimension(dimension_name + '_bnds_dim', 2)
193+
else:
194+
bounds_dim = dimension_dataset[dimension_group].createDimension(dimension_name + '_bnds_dim', 2)
195+
196+
# Dimension variables only have one dimension - themselves.
197+
variable_dimension = dimension_dataset[dimension_full_name_path].dimensions[0]
198+
199+
bounds_data_type = str(dimension_variable.data_type)
200+
bounds = dimension_dataset.createVariable(dimension_full_name_path + '_bnds',
201+
bounds_data_type,
202+
(variable_dimension,
203+
bounds_dim,))
204+
# Write data to dataset file.
205+
size = len(min_and_max_bounds)
206+
for idx in range(0, size):
207+
bounds[idx, 0] = (min_and_max_bounds[idx])[0]
208+
bounds[idx, 1] = (min_and_max_bounds[idx])[1]
209+
210+
# Update varinfo attributes and references.
211+
bounds_name = dimension_name + '_bnds'
212+
dimension_dataset[dimension_full_name_path].setncatts({'bounds': bounds_name})
213+
dimension_variable.references['bounds'] = {bounds_name,}
214+
dimension_variable.attributes['bounds'] = bounds_name
80215

81216

82217
def is_dimension_ascending(dimension: MaskedArray) -> bool:

hoss/hoss_config.json

+27
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,20 @@
245245
],
246246
"_Description": "Ensure variables in /Soil_Moisture_Retrieval_Data_Polar_PM group point to correct coordinate variables."
247247
},
248+
{
249+
"Applicability": {
250+
"Mission": "ICESat2",
251+
"ShortNamePath": "ATL16",
252+
"Variable_Pattern": ".*_grid_(lat|lon)"
253+
},
254+
"Attributes": [
255+
{
256+
"Name": "cell_alignment",
257+
"Value": "edge"
258+
}
259+
],
260+
"_Description": "ATL16 has edge-aligned grid cells."
261+
},
248262
{
249263
"Applicability": {
250264
"Mission": "ICESat2",
@@ -357,6 +371,19 @@
357371
}
358372
],
359373
"_Description": "Ensure the latitude and longitude dimension variables know their associated grid_mapping variable."
374+
},
375+
{
376+
"Applicability": {
377+
"Mission": "ICESat2",
378+
"ShortNamePath": "ATL16",
379+
"Variable_Pattern": "/spolar_(asr_obs_grid|lorate_blowing_snow_freq)"
380+
},
381+
"Attributes": [
382+
{
383+
"Name": "grid_mapping",
384+
"Value": "crs_latlon: spolar_grid_lat crs_latlon: spolar_grid_lon"
385+
}
386+
]
360387
}
361388
],
362389
"CF_Supplements": [

0 commit comments

Comments
 (0)