From c956403cde35e9655d9edc35603866a80bd52d79 Mon Sep 17 00:00:00 2001 From: Martin Yeo <40734014+trexfeathers@users.noreply.github.com> Date: Sat, 18 May 2024 19:21:21 +0100 Subject: [PATCH] Automatically install iris-test-data for benchmark data generation (#5958) * Automatically install iris-test-data for benchmark data generation. * What's New entry/ * Defer to any existing environment variable. --- benchmarks/README.md | 10 ++++++++-- benchmarks/bm_runner.py | 27 +++++++++++++++++---------- docs/src/whatsnew/latest.rst | 3 +++ 3 files changed, 28 insertions(+), 12 deletions(-) diff --git a/benchmarks/README.md b/benchmarks/README.md index 2f6b8d5904..ed0b6497c6 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -43,11 +43,17 @@ if it is not already. You can achieve this by either: * `OVERRIDE_TEST_DATA_REPOSITORY` - required - some benchmarks use `iris-test-data` content, and your local `site.cfg` is not available for -benchmark scripts. +benchmark scripts. The benchmark runner defers to any value already set in +the shell, but will otherwise download `iris-test-data` and set the variable +accordingly. * `DATA_GEN_PYTHON` - required - path to a Python executable that can be used to generate benchmark test objects/files; see [Data generation](#data-generation). The benchmark runner sets this -automatically, but will defer to any value already set in the shell. +automatically, but will defer to any value already set in the shell. Note that +[Mule](https://github.com/metomi/mule) will be automatically installed into +this environment, and sometimes +[iris-test-data](https://github.com/SciTools/iris-test-data) (see +`OVERRIDE_TEST_DATA_REPOSITORY`). * `BENCHMARK_DATA` - optional - path to a directory for benchmark synthetic test data, which the benchmark scripts will create if it doesn't already exist. Defaults to `/benchmarks/.data/` if not set. Note that some of diff --git a/benchmarks/bm_runner.py b/benchmarks/bm_runner.py index 2c18de4a41..ced97f450c 100644 --- a/benchmarks/bm_runner.py +++ b/benchmarks/bm_runner.py @@ -91,17 +91,16 @@ def _prep_data_gen_env() -> None: ).resolve() environ[data_gen_var] = str(data_gen_python) + def clone_resource(name: str, clone_source: str) -> Path: + resource_dir = data_gen_python.parents[1] / "resources" + resource_dir.mkdir(exist_ok=True) + clone_dir = resource_dir / name + if not clone_dir.is_dir(): + _subprocess_runner(["git", "clone", clone_source, str(clone_dir)]) + return clone_dir + echo("Installing Mule into data generation environment ...") - mule_dir = data_gen_python.parents[1] / "resources" / "mule" - if not mule_dir.is_dir(): - _subprocess_runner( - [ - "git", - "clone", - "https://github.com/metomi/mule.git", - str(mule_dir), - ] - ) + mule_dir = clone_resource("mule", "https://github.com/metomi/mule.git") _subprocess_runner( [ str(data_gen_python), @@ -112,6 +111,14 @@ def _prep_data_gen_env() -> None: ] ) + test_data_var = "OVERRIDE_TEST_DATA_REPOSITORY" + if test_data_var not in environ: + echo("Installing iris-test-data into data generation environment ...") + test_data_dir = clone_resource( + "iris-test-data", "https://github.com/SciTools/iris-test-data.git" + ) + environ[test_data_var] = str(test_data_dir / "test_data") + echo("Data generation environment ready.") diff --git a/docs/src/whatsnew/latest.rst b/docs/src/whatsnew/latest.rst index ce66bf3937..b6e2ed42c9 100644 --- a/docs/src/whatsnew/latest.rst +++ b/docs/src/whatsnew/latest.rst @@ -108,6 +108,9 @@ This document explains the changes made to Iris for this release #. `@ESadek-MO`_ added further `benchmarks` for aggregation and collapse. (:pull:`5954`) +#. `@trexfeathers`_ set the benchmark data generation environment to + automatically install iris-test-data during setup. (:pull:`5958`) + .. comment Whatsnew author names (@github name) in alphabetical order. Note that,