From 21ec6c22aba22b0a077d777c137def71488cd99f Mon Sep 17 00:00:00 2001 From: Bouwe Andela Date: Tue, 5 Dec 2023 16:32:39 +0100 Subject: [PATCH] Faster coordinate checks and longitude fix (#2264) --- esmvalcore/cmor/_fixes/fix.py | 16 +++++------ esmvalcore/cmor/check.py | 53 +++++++++++++++++++++++------------ 2 files changed, 43 insertions(+), 26 deletions(-) diff --git a/esmvalcore/cmor/_fixes/fix.py b/esmvalcore/cmor/_fixes/fix.py index a6156a231c..f0e794f24d 100644 --- a/esmvalcore/cmor/_fixes/fix.py +++ b/esmvalcore/cmor/_fixes/fix.py @@ -9,6 +9,7 @@ from pathlib import Path from typing import TYPE_CHECKING, Any, Optional +import dask import numpy as np from cf_units import Unit from iris.coords import Coord, CoordExtent @@ -675,18 +676,17 @@ def _fix_longitude_0_360( if not cube_coord.standard_name == 'longitude': return (cube, cube_coord) - # Only apply fixes when values are outside of valid range [0, 360] - inside_0_360 = all([ - cube_coord.core_points().min() >= 0.0, - cube_coord.core_points().max() <= 360.0, - ]) - if inside_0_360: + points = cube_coord.core_points() + min_, max_ = dask.compute(points.min(), points.max()) + + # Do not apply fixes when values are inside of valid range [0, 360] + if min_ >= 0.0 and max_ <= 360.0: return (cube, cube_coord) # Cannot fix longitudes outside [-360, 720] - if np.any(cube_coord.core_points() < -360.0): + if min_ < -360.0: return (cube, cube_coord) - if np.any(cube_coord.core_points() > 720.0): + if max_ > 720.0: return (cube, cube_coord) # cube.intersection only works for cells with 0 or 2 bounds diff --git a/esmvalcore/cmor/check.py b/esmvalcore/cmor/check.py index 43214168b8..cba7e347ae 100644 --- a/esmvalcore/cmor/check.py +++ b/esmvalcore/cmor/check.py @@ -3,17 +3,20 @@ import logging import warnings +from collections import namedtuple from collections.abc import Callable from enum import IntEnum from functools import cached_property from typing import Optional import cf_units +import dask import iris.coord_categorisation import iris.coords import iris.exceptions import iris.util import numpy as np +from iris.coords import Coord from iris.cube import Cube from esmvalcore.cmor._fixes.fix import GenericFix @@ -24,7 +27,7 @@ _get_simplified_calendar, _is_unstructured_grid, ) -from esmvalcore.cmor.table import get_var_info +from esmvalcore.cmor.table import CoordinateInfo, get_var_info from esmvalcore.exceptions import ESMValCoreDeprecationWarning @@ -500,6 +503,7 @@ def _check_alternative_dim_names(self, key): def _check_coords(self): """Check coordinates.""" + coords = [] for coordinate in self._cmor_var.coordinates.values(): # Cannot check generic_level coords with no CMOR information if coordinate.generic_level and not coordinate.out_name: @@ -513,6 +517,36 @@ def _check_coords(self): continue self._check_coord(coordinate, coord, var_name) + coords.append((coordinate, coord)) + + self._check_coord_ranges(coords) + + def _check_coord_ranges(self, coords: list[tuple[CoordinateInfo, Coord]]): + """Check coordinate value are inside valid ranges.""" + Limit = namedtuple('Limit', ['name', 'type', 'limit', 'value']) + + limits = [] + for coord_info, coord in coords: + points = coord.core_points() + for limit_type in 'min', 'max': + valid = getattr(coord_info, f'valid_{limit_type}') + if valid != "": + limit = Limit( + name=coord_info.out_name, + type=limit_type, + limit=float(valid), + value=getattr(points, limit_type)(), + ) + limits.append(limit) + + limits = dask.compute(*limits) + for limit in limits: + if limit.type == 'min' and limit.value < limit.limit: + self.report_critical(self._vals_msg, limit.name, + '< valid_min =', limit.limit) + if limit.type == 'max' and limit.value > limit.limit: + self.report_critical(self._vals_msg, limit.name, + '> valid_max =', limit.limit) def _check_coords_data(self): """Check coordinate data.""" @@ -593,24 +627,7 @@ def _check_coord_monotonicity_and_direction(self, cmor, coord, var_name): def _check_coord_points(self, coord_info, coord, var_name): """Check coordinate points: values, bounds and monotonicity.""" - # Check requested coordinate values exist in coord.points self._check_requested_values(coord, coord_info, var_name) - - # Check coordinate value ranges - if coord_info.valid_min: - valid_min = float(coord_info.valid_min) - if np.any(coord.core_points() < valid_min): - self.report_critical(self._vals_msg, var_name, - '< {} ='.format('valid_min'), - valid_min) - - if coord_info.valid_max: - valid_max = float(coord_info.valid_max) - if np.any(coord.core_points() > valid_max): - self.report_critical(self._vals_msg, var_name, - '> {} ='.format('valid_max'), - valid_max) - self._check_coord_bounds(coord_info, coord, var_name) self._check_coord_monotonicity_and_direction(coord_info, coord, var_name)