From ffaeff1ecc9ba203d7d9cec6a9af20925707aa94 Mon Sep 17 00:00:00 2001 From: David Hohn Date: Wed, 18 Sep 2024 17:09:57 +0200 Subject: [PATCH] cat experiments first, then do the time based cat :black_cat: (#2343) Co-authored-by: David Hohn Co-authored-by: Bouwe Andela --- esmvalcore/preprocessor/_io.py | 33 +++++++++++++++++++ .../preprocessor/_io/test_concatenate.py | 28 ++++++++++++++++ 2 files changed, 61 insertions(+) diff --git a/esmvalcore/preprocessor/_io.py b/esmvalcore/preprocessor/_io.py index 900d026943..649bb577a6 100644 --- a/esmvalcore/preprocessor/_io.py +++ b/esmvalcore/preprocessor/_io.py @@ -20,6 +20,7 @@ from esmvalcore.cmor.check import CheckLevels from esmvalcore.iris_helpers import merge_cube_attributes +from esmvalcore.esgf.facets import FACETS from .._task import write_ncl_settings @@ -306,6 +307,36 @@ def _sort_cubes_by_time(cubes): return cubes +def _concatenate_cubes_by_experiment( + cubes: list[iris.cube.Cube], +) -> list[iris.cube.Cube]: + """Concatenate cubes by experiment. + + This ensures overlapping (branching) experiments are handled correctly. + """ + # get the possible facet names in CMIP3, 5, 6 for exp + # currently these are 'experiment', 'experiment_id' + exp_facet_names = { + project["exp"] for project in FACETS.values() if "exp" in project + } + + def get_exp(cube: iris.cube.Cube) -> str: + for key in exp_facet_names: + if key in cube.attributes: + return cube.attributes[key] + return "" + + experiments = {get_exp(cube) for cube in cubes} + if len(experiments) > 1: + # first do experiment-wise concatenation, then time-based + cubes = [ + concatenate([cube for cube in cubes if get_exp(cube) == exp]) + for exp in experiments + ] + + return cubes + + def concatenate(cubes, check_level=CheckLevels.DEFAULT): """Concatenate all cubes after fixing metadata. @@ -331,6 +362,8 @@ def concatenate(cubes, check_level=CheckLevels.DEFAULT): if len(cubes) == 1: return cubes[0] + cubes = _concatenate_cubes_by_experiment(cubes) + merge_cube_attributes(cubes) cubes = _sort_cubes_by_time(cubes) _fix_calendars(cubes) diff --git a/tests/integration/preprocessor/_io/test_concatenate.py b/tests/integration/preprocessor/_io/test_concatenate.py index d8d5d680c1..7076f3a603 100644 --- a/tests/integration/preprocessor/_io/test_concatenate.py +++ b/tests/integration/preprocessor/_io/test_concatenate.py @@ -11,6 +11,7 @@ from esmvalcore.cmor.check import CheckLevels from esmvalcore.preprocessor import _io +from tests import assert_array_equal def get_hybrid_pressure_cube(): @@ -253,6 +254,33 @@ def test_concatenate_with_order(self): concatenated.coord('time').points, np.array([1., 2., 5., 7., 100.])) + def test_concatenate_by_experiment_first(self): + """Test that data from experiments does not get mixed.""" + historical_1 = Cube( + np.zeros(2), + dim_coords_and_dims=([ + DimCoord(np.arange(2), + var_name='time', + standard_name='time', + units='days since 1950-01-01'), 0 + ], ), + attributes={'experiment_id': 'historical'}, + ) + historical_2 = historical_1.copy() + historical_2.coord('time').points = np.arange(2, 4) + historical_3 = historical_1.copy() + historical_3.coord('time').points = np.arange(4, 6) + ssp585_1 = historical_1.copy(np.ones(2)) + ssp585_1.coord('time').points = np.arange(3, 5) + ssp585_1.attributes['experiment_id'] = 'ssp585' + ssp585_2 = ssp585_1.copy() + ssp585_2.coord('time').points = np.arange(5, 7) + result = _io.concatenate( + [historical_1, historical_2, historical_3, ssp585_1, ssp585_2] + ) + assert_array_equal(result.coord('time').points, np.arange(7)) + assert_array_equal(result.data, np.array([0, 0, 0, 1, 1, 1, 1])) + def test_concatenate_differing_attributes(self): """Test concatenation of cubes with different attributes.""" cubes = CubeList(self.raw_cubes)