From c956403cde35e9655d9edc35603866a80bd52d79 Mon Sep 17 00:00:00 2001
From: Martin Yeo <40734014+trexfeathers@users.noreply.github.com>
Date: Sat, 18 May 2024 19:21:21 +0100
Subject: [PATCH] Automatically install iris-test-data for benchmark data
 generation (#5958)

* Automatically install iris-test-data for benchmark data generation.

* What's New entry/

* Defer to any existing environment variable.
---
 benchmarks/README.md         | 10 ++++++++--
 benchmarks/bm_runner.py      | 27 +++++++++++++++++----------
 docs/src/whatsnew/latest.rst |  3 +++
 3 files changed, 28 insertions(+), 12 deletions(-)
diff --git a/benchmarks/README.md b/benchmarks/README.md
index 2f6b8d5904..ed0b6497c6 100644
--- a/benchmarks/README.md
+++ b/benchmarks/README.md
@@ -43,11 +43,17 @@ if it is not already. You can achieve this by either:
 
 * `OVERRIDE_TEST_DATA_REPOSITORY` - required - some benchmarks use
 `iris-test-data` content, and your local `site.cfg` is not available for
-benchmark scripts.
+benchmark scripts. The benchmark runner defers to any value already set in
+the shell, but will otherwise download `iris-test-data` and set the variable
+accordingly.
 * `DATA_GEN_PYTHON` - required - path to a Python executable that can be
 used to generate benchmark test objects/files; see
 [Data generation](#data-generation). The benchmark runner sets this 
-automatically, but will defer to any value already set in the shell.
+automatically, but will defer to any value already set in the shell. Note that
+[Mule](https://github.com/metomi/mule) will be  automatically installed into 
+this environment, and sometimes 
+[iris-test-data](https://github.com/SciTools/iris-test-data) (see 
+`OVERRIDE_TEST_DATA_REPOSITORY`).
 * `BENCHMARK_DATA` - optional - path to a directory for benchmark synthetic
 test data, which the benchmark scripts will create if it doesn't already
 exist. Defaults to `<root>/benchmarks/.data/` if not set. Note that some of
diff --git a/benchmarks/bm_runner.py b/benchmarks/bm_runner.py
index 2c18de4a41..ced97f450c 100644
--- a/benchmarks/bm_runner.py
+++ b/benchmarks/bm_runner.py
@@ -91,17 +91,16 @@ def _prep_data_gen_env() -> None:
         ).resolve()
         environ[data_gen_var] = str(data_gen_python)
 
+        def clone_resource(name: str, clone_source: str) -> Path:
+            resource_dir = data_gen_python.parents[1] / "resources"
+            resource_dir.mkdir(exist_ok=True)
+            clone_dir = resource_dir / name
+            if not clone_dir.is_dir():
+                _subprocess_runner(["git", "clone", clone_source, str(clone_dir)])
+            return clone_dir
+
         echo("Installing Mule into data generation environment ...")
-        mule_dir = data_gen_python.parents[1] / "resources" / "mule"
-        if not mule_dir.is_dir():
-            _subprocess_runner(
-                [
-                    "git",
-                    "clone",
-                    "https://github.com/metomi/mule.git",
-                    str(mule_dir),
-                ]
-            )
+        mule_dir = clone_resource("mule", "https://github.com/metomi/mule.git")
         _subprocess_runner(
             [
                 str(data_gen_python),
@@ -112,6 +111,14 @@ def _prep_data_gen_env() -> None:
             ]
         )
 
+        test_data_var = "OVERRIDE_TEST_DATA_REPOSITORY"
+        if test_data_var not in environ:
+            echo("Installing iris-test-data into data generation environment ...")
+            test_data_dir = clone_resource(
+                "iris-test-data", "https://github.com/SciTools/iris-test-data.git"
+            )
+            environ[test_data_var] = str(test_data_dir / "test_data")
+
         echo("Data generation environment ready.")
 
 
diff --git a/docs/src/whatsnew/latest.rst b/docs/src/whatsnew/latest.rst
index ce66bf3937..b6e2ed42c9 100644
--- a/docs/src/whatsnew/latest.rst
+++ b/docs/src/whatsnew/latest.rst
@@ -108,6 +108,9 @@ This document explains the changes made to Iris for this release
 #. `@ESadek-MO`_ added further `benchmarks` for aggregation and collapse.
    (:pull:`5954`)
 
+#. `@trexfeathers`_ set the benchmark data generation environment to
+   automatically install iris-test-data during setup. (:pull:`5958`)
+
 
 .. comment
     Whatsnew author names (@github name) in alphabetical order. Note that,