Add acceptance tests for SAMOS CLIs where nothing should be returned (#2219)

brhooper · bayliffe · web-flow · commit c802190d56a4 · 2025-11-12T08:50:35.000Z
* Add null return to CLIs is no gams have been found.

* Add handling for rolling window calculations returning NaN for all values due to having insufficient input data.

* Improve test doc-string.

* Add acceptance tests for SAMOS estimation CLIs for occasions where nothing should be returned. Improve doc-string.

* Update acceptance test doc-strings following review.

* Fix acceptance tests for EMOS and SAMOS which were incorrectly checking for no output.

---------

Co-authored-by: bayliffe &lt;benjamin.ayliffe@metoffice.gov.uk&gt;
diff --git a/improver/calibration/__init__.py b/improver/calibration/__init__.py
@@ -391,6 +391,9 @@ def split_cubes_for_samos(
             cubes. If False, an error will be raised if any such cubes are found.
 
     Raises:
+        IOError:
+            If no forecast cube is found and/or no truth cube is found when a
+            truth_attribute has been provided.
         IOError:
             If EMOS coefficients cubes are found when they are not expected.
         IOError:
diff --git a/improver_tests/acceptance/test_estimate_emos_coefficients_from_table.py b/improver_tests/acceptance/test_estimate_emos_coefficients_from_table.py
@@ -176,4 +176,6 @@ def test_return_none(tmp_path):
         "--output",
         output_path,
     ]
-    assert run_cli(args) is None
+    run_cli(args)
+    # Check no file has been written to disk.
+    assert not output_path.exists()
diff --git a/improver_tests/acceptance/test_estimate_samos_coefficients.py b/improver_tests/acceptance/test_estimate_samos_coefficients.py
@@ -138,3 +138,31 @@ def test_estimate_samos_coefficients_sites(tmp_path):
     acc.compare(
         output_path, kgo_path, atol=COMPARE_EMOS_TOLERANCE, rtol=COMPARE_EMOS_TOLERANCE
     )
+
+
+def test_estimate_samos_coefficients_no_gam(tmp_path):
+    """
+    Test estimate-samos-coefficients when no GAM is provided. The CLI should return
+    None in this instance.
+    """
+    # The source data is from the estimate-emos-coefficients acceptance tests
+    source_emos_dir = acc.kgo_root() / "estimate-emos-coefficients/normal/sites"
+    history_path = source_emos_dir / "history/*.nc"
+    truth_path = source_emos_dir / "truth/*.nc"
+
+    output_path = tmp_path / "output.nc"
+    gam_features = "latitude,longitude,height"
+
+    args = [
+        history_path,
+        truth_path,
+        "--truth-attribute",
+        "mosg__model_configuration=uk_det",
+        "--gam-features",
+        gam_features,
+        "--output",
+        output_path,
+    ]
+    run_cli(args)
+    # Check no file has been written to disk.
+    assert not output_path.exists()
diff --git a/improver_tests/acceptance/test_estimate_samos_coefficients_from_table.py b/improver_tests/acceptance/test_estimate_samos_coefficients_from_table.py
@@ -114,6 +114,40 @@ def test_additional_gam_features_cube(tmp_path):
     )
 
 
+@pytest.mark.slow
+def test_no_gam(tmp_path):
+    """
+    Test estimate-samos-coefficients-from-table when no GAM is provided. The CLI should
+    return None in this instance.
+    """
+    source_dir = acc.kgo_root() / "estimate-emos-coefficients-from-table/"
+    history_path = source_dir / "forecast_table"
+    truth_path = source_dir / "truth_table"
+
+    output_path = tmp_path / "output.nc"
+
+    compulsory_args = [history_path, truth_path]
+    named_args = [
+        "--gam-features",
+        "latitude,longitude,altitude",
+        "--percentiles",
+        "10,20,30,40,50,60,70,80,90",
+        "--forecast-period",
+        "86400",
+        "--training-length",
+        "5",
+        "--diagnostic",
+        "temperature_at_screen_level",
+        "--cycletime",
+        "20210805T2100Z",
+        "--output",
+        output_path,
+    ]
+    run_cli(compulsory_args + named_args)
+    # Check no file has been written to disk.
+    assert not output_path.exists()
+
+
 @pytest.mark.slow
 def test_return_none(tmp_path):
     """
@@ -144,4 +178,6 @@ def test_return_none(tmp_path):
         "--output",
         output_path,
     ]
-    assert run_cli(compulsory_args + named_args) is None
+    run_cli(compulsory_args + named_args)
+    # Check no file has been written to disk.
+    assert not output_path.exists()
diff --git a/improver_tests/acceptance/test_estimate_samos_gams.py b/improver_tests/acceptance/test_estimate_samos_gams.py
@@ -146,3 +146,45 @@ def test_gam_at_sites(tmp_path):
     # pickled objects are the same, not the actual objects as
     # there is no function to compare the GAM class objects.
     acc.compare(output_path, kgo_path, file_type="generic_pickle")
+
+
+def test_insufficient_data(tmp_path):
+    """
+    Test estimate-samos-gams returns None when insufficient data is available at all
+    sites.
+
+    This test provides 3 days of input data but uses a window length of 11 days. This
+    will cause the training data at all sites to be considered insufficient to fit the
+    GAMs (at least 50% of the possible data points are required). Hence, None should be
+    returned.
+    """
+    source_emos_dir = acc.kgo_root() / "estimate-emos-coefficients/normal/sites"
+    history_path = source_emos_dir / "history/*.nc"
+    truth_path = source_emos_dir / "truth/*.nc"
+
+    kgo_dir = acc.kgo_root() / "estimate-samos-gam"
+    model_specification_path = kgo_dir / "samos_model_spec_simple.json"
+    output_path = tmp_path / "output.pkl"
+
+    gam_features = "latitude,longitude,height"
+    args = [
+        history_path,
+        truth_path,
+        "--distribution",
+        "normal",
+        "--truth-attribute",
+        "mosg__model_configuration=uk_det",
+        "--tolerance",
+        TOLERANCE,
+        "--gam-features",
+        gam_features,
+        "--model-specification",
+        model_specification_path,
+        "--window-length",
+        "11",
+        "--output",
+        output_path,
+    ]
+    run_cli(args)
+    # Check no file has been written to disk.
+    assert not output_path.exists()
diff --git a/improver_tests/acceptance/test_estimate_samos_gams_from_table.py b/improver_tests/acceptance/test_estimate_samos_gams_from_table.py
@@ -173,3 +173,98 @@ def test_additional_features_cubes(
     # pickled objects are the same, not the actual objects as
     # there is no function to compare the GAM class objects.
     acc.compare(output_path, kgo_path, file_type="generic_pickle")
+
+
+@pytest.mark.slow
+def test_no_forecast(
+    tmp_path,
+):
+    """
+    Test estimate-samos-gams-from-table returns None when no forecast data is available
+     for the given leadtime in the given table.
+    """
+    source_dir = acc.kgo_root() / "estimate-emos-coefficients-from-table/"
+    history_path = source_dir / "forecast_table"
+    truth_path = source_dir / "truth_table"
+
+    kgo_dir = acc.kgo_root() / "estimate-samos-gams-from-table/"
+
+    output_path = tmp_path / "output.pkl"
+    compulsory_args = [history_path, truth_path]
+    named_args = [
+        "--diagnostic",
+        "temperature_at_screen_level",
+        "--cycletime",
+        "20210805T2100Z",
+        "--forecast-period",
+        "3600000",
+        "--training-length",
+        "5",
+        "--distribution",
+        "normal",
+        "--tolerance",
+        "1e-4",
+        "--gam-features",
+        "latitude,longitude,altitude",
+        "--model-specification",
+        kgo_dir / "samos_model_spec_simple.json",
+        "--percentiles",
+        "10,20,30,40,50,60,70,80,90",
+        "--window-length",
+        "3",
+        "--output",
+        output_path,
+    ]
+    run_cli(compulsory_args + named_args)
+    # Check no file has been written to disk.
+    assert not output_path.exists()
+
+
+@pytest.mark.slow
+def test_insufficient_data(
+    tmp_path,
+):
+    """
+    Test estimate-samos-gams-from-table returns None when insufficient data is
+    available at all sites.
+
+    This test provides 3 days of input data but uses a window length of 11 days. This
+    will cause the training data at all sites to be considered insufficient to fit the
+    GAMs (at least 50% of the possible data points are required). Hence, None should be
+    returned.
+    """
+    source_dir = acc.kgo_root() / "estimate-emos-coefficients-from-table/"
+    history_path = source_dir / "forecast_table"
+    truth_path = source_dir / "truth_table"
+
+    kgo_dir = acc.kgo_root() / "estimate-samos-gams-from-table/"
+
+    output_path = tmp_path / "output.pkl"
+    compulsory_args = [history_path, truth_path]
+    named_args = [
+        "--diagnostic",
+        "temperature_at_screen_level",
+        "--cycletime",
+        "20210805T2100Z",
+        "--forecast-period",
+        "86400",
+        "--training-length",
+        "5",
+        "--distribution",
+        "normal",
+        "--tolerance",
+        "1e-4",
+        "--gam-features",
+        "latitude,longitude,altitude",
+        "--model-specification",
+        kgo_dir / "samos_model_spec_simple.json",
+        "--percentiles",
+        "10,20,30,40,50,60,70,80,90",
+        "--window-length",
+        "11",
+        "--output",
+        output_path,
+    ]
+    run_cli(compulsory_args + named_args)
+    # Check no file has been written to disk.
+    assert not output_path.exists()

Original file line number	Diff line number	Diff line change
`@@ -176,4 +176,6 @@ def test_return_none(tmp_path):`
`176`	`176`	`"--output",`
`177`	`177`	`output_path,`
`178`	`178`	`]`
`179`		`- assert run_cli(args) is None`
	`179`	`+ run_cli(args)`
	`180`	`+ # Check no file has been written to disk.`
	`181`	`+ assert not output_path.exists()`