diff --git a/src/eva/data/soca_restart.py b/src/eva/data/soca_restart.py index 818775fb..e8f6ba77 100644 --- a/src/eva/data/soca_restart.py +++ b/src/eva/data/soca_restart.py @@ -74,8 +74,9 @@ def execute(self, dataset_config, data_collections, timing): # ------------------------- soca_vars = get(dataset_config, self.logger, 'variables', default=[]) coord_vars = get(dataset_config, self.logger, 'coordinate variables', default=None) + opt_vars = get(dataset_config, self.logger, 'optional variables', default=None) - # Read orographic fields first + # Read grid fields first # ------------------------- var_dict = {} group_name = 'SOCAgrid' @@ -90,6 +91,17 @@ def execute(self, dataset_config, data_collections, timing): dims, data = read_soca(soca_filenames, var, self.logger) var_dict[group_name + '::' + var] = (dims, data) + # Create opt_vars if requested + # ------------------------- + if opt_vars is not None: + for var in opt_vars: + if var == 'depth': + dims, data = create_depth_variable(soca_filenames, self.logger) + var_dict[group_name + '::' + var] = (dims, data) + else: + self.logger.abort(f"{var} is not a valid optional variable. Valid optional " + f"variables are: depth") + # Create dataset_config from data dictionary # ------------------------- ds = xr.Dataset(var_dict) @@ -149,9 +161,9 @@ def read_soca(file, variable, logger): with Dataset(file, mode='r') as f: try: - dims = ["lon", "lat"] + dims = ["lat", "lon"] if len(f.variables[variable].dimensions) > 3: - dims = ["lev", "lon", "lat"] + dims = ["lev", "lat", "lon"] var = np.squeeze(f.variables[variable][:]) except KeyError: logger.abort(f"{variable} is not a valid variable. \nExiting ...") @@ -159,3 +171,43 @@ def read_soca(file, variable, logger): return dims, var # -------------------------------------------------------------------------------------------------- + +def create_depth_variable(file, logger): + """ + Create the time dependent depth variable by cumulatively summing the `h` variable over the `zl` + dimension. + + Args: + file (str): Path to the SOCA file. + logger (Logger): Logger for logging messages. + + Returns: + tuple: A tuple containing dimensions (list) and data (numpy.ndarray) for the depth variable. + """ + + with Dataset(file, mode='r') as f: + h_dims = f.variables['h'].dimensions + h_data = f.variables['h'][:] + + # Ensure `h` has the expected dimensions, which should be (time, zl, yh, xh) + if len(h_dims) != 4 or list(h_dims) != ['time', 'zl', 'yh', 'xh']: + logger.abort(f"The `h` variable does not have the expected dimensions or " + f"shape: {h_dims}. Expected (time, zl, yh, xh).") + + # Handle the `time` dimension + if h_data.shape[0] > 1: + logger.info(f"The `h` variable has a time dimension of size {h_data.shape[0]}. " + f"Using the first time step for depth calculation.") + h_data = h_data[0, ...] + else: + h_data = np.squeeze(h_data, axis=0) + + # Compute the cumulative sum over the `zl` dimension + depth_data = np.cumsum(h_data, axis=0) + + # Define the dimensions for the depth variable + depth_dims = ['lev', 'lat', 'lon'] + + return depth_dims, depth_data + +# -------------------------------------------------------------------------------------------------- diff --git a/src/eva/transforms/select_dimension.py b/src/eva/transforms/select_dimension.py new file mode 100644 index 00000000..6323f6d3 --- /dev/null +++ b/src/eva/transforms/select_dimension.py @@ -0,0 +1,118 @@ +# (C) Copyright 2025- NOAA/NWS/EMC +# +# (C) Copyright 2025- United States Government as represented by the Administrator of the +# National Aeronautics and Space Administration. All Rights Reserved. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + +from eva.transforms.transform_utils import parse_for_dict, split_collectiongroupvariable +from eva.transforms.transform_utils import replace_cgv +from eva.utilities.config import get +from eva.utilities.logger import Logger + +def select_dimension(config, data_collections): + """ + Selects and processes data variables for a specified dimension (e.g., latitude, longitude, depth). + + Args: + config (dict): A configuration dictionary containing transformation parameters. + data_collections (DataCollections): An instance of the DataCollections class containing + input data. + + Returns: + None + + This function selects and processes data variables within the provided data collections based + on a specified dimension (e.g., latitude, longitude, depth) and either a single value or a range + of values for that dimension. The resulting processed variables are added to the data collections. + + Example: + :: + + config = { + 'collections': [...], + 'groups': [...], + 'variables': [...], + 'new name': 'dimension_selected_variable', + 'starting field': 'original_variable', + 'dimension': 'lat', # Dimension to select (e.g., lat, lon, depth) + 'value': 5, # Single value + # OR + 'start value': -5, + 'end value': 5 # Range of values + } + select_dimension(config, data_collections) + """ + + # Create a logger + logger = Logger('SelectDimensionTransform') + + # Parse config for dictionary + [collections, groups, variables] = parse_for_dict(config, logger) + + # Parse config for the expression and new collection/group/variable naming + new_name_template = get(config, logger, 'new name') + starting_field_template = get(config, logger, 'starting field') + + # Get the dimension to select + dimension = get(config, logger, 'dimension', abort_on_failure=True) + + # Get value or start value and end value from yaml file + values = [None] + value = get(config, logger, 'value', abort_on_failure=False) + if value is not None: + values = [value] + else: + start_value = get(config, logger, 'start value', abort_on_failure=False) + end_value = get(config, logger, 'end value', abort_on_failure=False) + if start_value is not None and end_value is not None: + values = [start_value, end_value] + + if None in values: + logger.abort(f'{dimension} value(s) for transformation not specified in yaml file. ' + f'This should be either value: value or ' + f'start value: value and end value: value') + if len(values) > 2: + logger.info(f'WARNING: More than 2 {dimension} values specified in yaml file. ' + f'Only the first 2 values will be used.') + + # Loop over the templates + for collection in collections: + for group in groups: + for variable in variables: + + if variable is not None: + # Replace collection, group, variable in template + [new_name, starting_field] = replace_cgv(logger, collection, group, variable, + new_name_template, + starting_field_template) + else: + new_name = new_name_template + starting_field = starting_field_template + + # Get a dataset for the requested collection::group::variable + cgv = split_collectiongroupvariable(logger, starting_field) + select_dim = data_collections.get_variable_data_array(cgv[0], cgv[1], cgv[2]) + + # Get the collection, group, var for new dataset + cgv_new = split_collectiongroupvariable(logger, new_name) + + # If a range of values is provided, select the slice; otherwise, select a single value + if len(values) == 2: + if dimension in ['lat', 'lon'] and select_dim[dimension].ndim == 2: + # Create a mask for the range + mask = (select_dim[dimension] >= values[0]) & (select_dim[dimension] <= values[1]) + select_dim = select_dim.where(mask, drop=True) + else: + select_dim = select_dim.sel({dimension: slice(values[0], values[1])}) + else: + if dimension in ['lat', 'lon'] and select_dim[dimension].ndim == 2: + # Create a mask for the single value + mask = select_dim[dimension] == values[0] + select_dim = select_dim.where(mask, drop=True) + else: + select_dim = select_dim.sel({dimension: values[0]}, drop=True) + + data_collections.add_variable_to_collection(cgv_new[0], cgv_new[1], + cgv_new[2], select_dim) \ No newline at end of file