|
19 | 19 | from harmony.message import Message
|
20 | 20 | from harmony.message_utility import rgetattr
|
21 | 21 | from harmony.util import Config
|
22 |
| -from varinfo import VarInfoFromDmr |
| 22 | +from varinfo import VarInfoFromDmr, VariableFromDmr |
23 | 23 |
|
24 | 24 | from hoss.bbox_utilities import flatten_list
|
25 | 25 | from hoss.exceptions import InvalidNamedDimension, InvalidRequestedRange
|
@@ -75,8 +75,143 @@ def prefetch_dimension_variables(opendap_url: str, varinfo: VarInfoFromDmr,
|
75 | 75 |
|
76 | 76 | logger.info('Variables being retrieved in prefetch request: '
|
77 | 77 | f'{format_variable_set_string(required_dimensions)}')
|
78 |
| - return get_opendap_nc4(opendap_url, required_dimensions, output_dir, |
79 |
| - logger, access_token, config) |
| 78 | + |
| 79 | + required_dimensions_nc4 = get_opendap_nc4(opendap_url, |
| 80 | + required_dimensions, output_dir,logger, access_token, config) |
| 81 | + |
| 82 | + # Create bounds variables if necessary. |
| 83 | + add_bounds_variables(required_dimensions_nc4, required_dimensions, |
| 84 | + varinfo, logger) |
| 85 | + |
| 86 | + return required_dimensions_nc4 |
| 87 | + |
| 88 | + |
| 89 | +def add_bounds_variables(dimensions_nc4: str, |
| 90 | + required_dimensions: Set[str], |
| 91 | + varinfo: VarInfoFromDmr, |
| 92 | + logger: Logger) -> None: |
| 93 | + """ Augment a NetCDF4 file with artificial bounds variables for each |
| 94 | + dimension variable that is edge-aligned and does not already |
| 95 | + have bounds variables. |
| 96 | +
|
| 97 | + For each dimension variable: |
| 98 | + (1) Check if the variable needs a bounds variable. |
| 99 | + (2) If so, create a bounds array of minimum and maximum values. |
| 100 | + (3) Then write the bounds variable to the NetCDF4 URL. |
| 101 | +
|
| 102 | + """ |
| 103 | + with Dataset(dimensions_nc4, 'r+') as datasets: |
| 104 | + for dimension_name in required_dimensions: |
| 105 | + dimension_variable = varinfo.get_variable(dimension_name) |
| 106 | + if needs_bounds(dimension_variable) is True: |
| 107 | + min_and_max_bounds = create_bounds(datasets, dimension_name) |
| 108 | + write_bounds(datasets, dimension_variable, min_and_max_bounds) |
| 109 | + |
| 110 | + logger.info('Artificial bounds added for dimension variable: ' |
| 111 | + f'{dimension_name}') |
| 112 | + |
| 113 | + |
| 114 | +def needs_bounds(dimension: VariableFromDmr) -> bool: |
| 115 | + """ Check if a dimension variable needs a bounds variable. |
| 116 | + This will be the case when dimension cells are edge-aligned |
| 117 | + and bounds for that dimension do not already exist. |
| 118 | + |
| 119 | + """ |
| 120 | + |
| 121 | + return dimension.attributes['cell_alignment'] == 'edge' and dimension.references.get('bounds') == None |
| 122 | + |
| 123 | + |
| 124 | +def create_bounds(dimension_dataset: Dataset, |
| 125 | + dimension_path: str) -> np.ndarray: |
| 126 | + """ Create an array containing the minimum and maximum bounds |
| 127 | + for a given dimension. |
| 128 | +
|
| 129 | + The minimum and maximum values are determined under the assumption |
| 130 | + that the dimension data is monotonically increasing and contiguous. |
| 131 | + So for every bounds but the last, the bounds are simply extracted |
| 132 | + from the dimension dataset. |
| 133 | +
|
| 134 | + The final bounds must be calculated with the assumption that |
| 135 | + the last data cell is edge-aligned and thus has a value the does |
| 136 | + not account for the cell length. So, the final bound is determined |
| 137 | + by taking the median of all the resolutions in the dataset to obtain |
| 138 | + a resolution that can be added to the final data value. |
| 139 | +
|
| 140 | + Ex: Input dataset with resolution of 3 degrees: [ ... , 81, 84, 87] |
| 141 | +
|
| 142 | + Minimum | Maximum |
| 143 | + <...> <...> |
| 144 | + 81 84 |
| 145 | + 84 87 |
| 146 | + 87 ? -> 87 + median resolution -> 87 + 3 -> 90 |
| 147 | + |
| 148 | + """ |
| 149 | + # Access the dimension variable's data using the variable's full path. |
| 150 | + dimension_data = dimension_dataset[dimension_path][:] |
| 151 | + |
| 152 | + # Determine the dimension's resolution by taking the median value |
| 153 | + # of the differences between each ascending data value. |
| 154 | + dimension_array = np.array(dimension_data) |
| 155 | + |
| 156 | + # Build array. |
| 157 | + size = dimension_array.size |
| 158 | + min_max_pairs = [[dimension_array[idx], |
| 159 | + dimension_array[idx+1]] |
| 160 | + for idx in range(0, size-1)] |
| 161 | + |
| 162 | + # Calculate final values. |
| 163 | + dim_resolution = np.median(np.diff(dimension_array)) |
| 164 | + min_max_pairs.append([dimension_array[size-1], |
| 165 | + dimension_array[size-1] + dim_resolution]) |
| 166 | + |
| 167 | + return np.array(min_max_pairs) |
| 168 | + |
| 169 | + |
| 170 | +def write_bounds(dimension_dataset: Dataset, |
| 171 | + dimension_variable: VariableFromDmr, |
| 172 | + min_and_max_bounds: np.ndarray) -> None: |
| 173 | + """ Write the input bounds array to a given dimension dataset. |
| 174 | + |
| 175 | + First a new dimension is created for the new bounds variable |
| 176 | + to allow the variable to be two-dimensional. |
| 177 | + |
| 178 | + Then the new bounds variable is created using two dimensions: |
| 179 | + (1) the existing dimension of the dimension dataset, and |
| 180 | + (2) the new bounds variable dimension. |
| 181 | + |
| 182 | + """ |
| 183 | + # Create the second bounds dimension. |
| 184 | + dimension_full_name_path = dimension_variable.full_name_path |
| 185 | + dimension_group = '/' + '/'.join(dimension_full_name_path.split('/')[1:-1]) |
| 186 | + dimension_name = dimension_full_name_path.split('/')[-1] |
| 187 | + |
| 188 | + # Consider the special case when the dimension group is the root directory. |
| 189 | + # The dimension can't refer to the full path in the name itself, so we have |
| 190 | + # to create it with respect to the group we want to place it in. |
| 191 | + if dimension_group == '/': |
| 192 | + bounds_dim = dimension_dataset.createDimension(dimension_name + '_bnds_dim', 2) |
| 193 | + else: |
| 194 | + bounds_dim = dimension_dataset[dimension_group].createDimension(dimension_name + '_bnds_dim', 2) |
| 195 | + |
| 196 | + # Dimension variables only have one dimension - themselves. |
| 197 | + variable_dimension = dimension_dataset[dimension_full_name_path].dimensions[0] |
| 198 | + |
| 199 | + bounds_data_type = str(dimension_variable.data_type) |
| 200 | + bounds = dimension_dataset.createVariable(dimension_full_name_path + '_bnds', |
| 201 | + bounds_data_type, |
| 202 | + (variable_dimension, |
| 203 | + bounds_dim,)) |
| 204 | + # Write data to dataset file. |
| 205 | + size = len(min_and_max_bounds) |
| 206 | + for idx in range(0, size): |
| 207 | + bounds[idx, 0] = (min_and_max_bounds[idx])[0] |
| 208 | + bounds[idx, 1] = (min_and_max_bounds[idx])[1] |
| 209 | + |
| 210 | + # Update varinfo attributes and references. |
| 211 | + bounds_name = dimension_name + '_bnds' |
| 212 | + dimension_dataset[dimension_full_name_path].setncatts({'bounds': bounds_name}) |
| 213 | + dimension_variable.references['bounds'] = {bounds_name,} |
| 214 | + dimension_variable.attributes['bounds'] = bounds_name |
80 | 215 |
|
81 | 216 |
|
82 | 217 | def is_dimension_ascending(dimension: MaskedArray) -> bool:
|
|
0 commit comments