diff --git a/model/testing/src/icon4py/model/testing/data_handling.py b/model/testing/src/icon4py/model/testing/data_handling.py index 9624c64839..95bc8b8369 100644 --- a/model/testing/src/icon4py/model/testing/data_handling.py +++ b/model/testing/src/icon4py/model/testing/data_handling.py @@ -6,6 +6,7 @@ # Please, refer to the LICENSE file in the root directory. # SPDX-License-Identifier: BSD-3-Clause +import os import pathlib import tarfile @@ -36,16 +37,24 @@ def download_and_extract(uri: str, dst: pathlib.Path, data_file: str = "download pathlib.Path(data_file).unlink(missing_ok=True) -def download_test_data(dst: pathlib.Path, uri: str) -> None: +# TODO(msimberg): Remove dst_subdir once archives don't contain a subdir with +# special name. +def download_test_data(dst_root: pathlib.Path, dst_subdir: pathlib.Path, uri: str) -> None: + dst = dst_root.joinpath(dst_subdir) if config.ENABLE_TESTDATA_DOWNLOAD: - # We create and lock the *parent* directory as we later check for existence of `dst`. - dst.parent.mkdir(parents=True, exist_ok=True) - with locking.lock(dst.parent): - if not dst.exists(): - download_and_extract(uri, dst) + dst.mkdir(parents=True, exist_ok=True) + # Explicitly specify the lockfile name to make sure that os.listdir sees + # it if it's created in dst. + lockfile = "filelock.lock" + with locking.lock(dst, lockfile=lockfile): + files = os.listdir(dst) + if len(files) == 0 or (len(files) == 1 and files[0] == lockfile): + download_and_extract(uri, dst_root) else: # If test data download is disabled, we check if the directory exists - # without locking. We assume the location is managed by the user + # and isn't empty without locking. We assume the location is managed by the user # and avoid locking shared directories (e.g. on CI). if not dst.exists(): raise RuntimeError(f"Test data {dst} does not exist, and downloading is disabled.") + elif not any(os.scandir(dst)): + raise RuntimeError(f"Test data {dst} exists but is empty, and downloading is disabled.") diff --git a/model/testing/src/icon4py/model/testing/datatest_utils.py b/model/testing/src/icon4py/model/testing/datatest_utils.py index e9cf03b23c..d0b42a35b9 100644 --- a/model/testing/src/icon4py/model/testing/datatest_utils.py +++ b/model/testing/src/icon4py/model/testing/datatest_utils.py @@ -52,11 +52,17 @@ def get_ranked_data_path(base_path: pathlib.Path, comm_size: int) -> pathlib.Pat return base_path.absolute().joinpath(f"mpitask{comm_size}") +def get_datapath_subdir_for_experiment( + experiment: definitions.Experiment = definitions.Experiments.MCH_CH_R04B09, +) -> pathlib.Path: + return pathlib.Path(f"{experiment.name}/ser_data") + + def get_datapath_for_experiment( ranked_base_path: pathlib.Path, experiment: definitions.Experiment = definitions.Experiments.MCH_CH_R04B09, ) -> pathlib.Path: - return ranked_base_path.joinpath(f"{experiment.name}/ser_data") + return ranked_base_path.joinpath(get_datapath_subdir_for_experiment(experiment)) def create_icon_serial_data_provider( diff --git a/model/testing/src/icon4py/model/testing/fixtures/datatest.py b/model/testing/src/icon4py/model/testing/fixtures/datatest.py index c1d17332e9..0727c962ed 100644 --- a/model/testing/src/icon4py/model/testing/fixtures/datatest.py +++ b/model/testing/src/icon4py/model/testing/fixtures/datatest.py @@ -117,9 +117,9 @@ def _download_ser_data( ) -> None: # not a fixture to be able to use this function outside of pytest try: - destination_path = dt_utils.get_datapath_for_experiment(_ranked_data_path, _experiment) + subdir = dt_utils.get_datapath_subdir_for_experiment(_experiment) uri = _experiment.partitioned_data[comm_size] - data.download_test_data(destination_path, uri) + data.download_test_data(_ranked_data_path, subdir, uri) except KeyError as err: raise RuntimeError( f"No data for communicator of size {comm_size} exists, use 1, 2 or 4" diff --git a/model/testing/src/icon4py/model/testing/locking.py b/model/testing/src/icon4py/model/testing/locking.py index 89cdf842fb..ef96d25072 100644 --- a/model/testing/src/icon4py/model/testing/locking.py +++ b/model/testing/src/icon4py/model/testing/locking.py @@ -17,11 +17,13 @@ # Consider moving to common if this is needed outside of testing -def lock(directory: pathlib.Path | str, suffix: str = ".lock") -> contextlib.AbstractContextManager: +def lock( + directory: pathlib.Path | str, lockfile: str = "filelock.lock" +) -> contextlib.AbstractContextManager: """Create a lock for the given path.""" directory = pathlib.Path(directory) if not directory.is_dir(): raise ValueError(f"Expected a directory, got: {directory}") - path = directory / f"filelock{suffix}" + path = directory / lockfile return filelock.FileLock(str(path))