From 388f7f45d97d680f6198057f4f0d43e362a84d25 Mon Sep 17 00:00:00 2001
From: Rodrigo Neto <rneto@esss.com>
Date: Tue, 16 Jul 2024 18:47:04 -0300
Subject: [PATCH] Add historic curves data to the History Matching results
 metadata

ASIM-5713
---
 CHANGELOG.rst                                 |   2 +-
 src/alfasim_sdk/result_reader/aggregator.py   | 124 +++++++++++++-----
 .../result_reader/aggregator_constants.py     |   1 +
 tests/conftest.py                             |  78 +++++++++--
 tests/results/test_aggregator.py              |  59 ++++++++-
 5 files changed, 218 insertions(+), 46 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 2a6345c0..18b221cb 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -5,7 +5,7 @@ CHANGELOG
 2024.2 (unreleased)
 ===================
 
-*
+* Added support to read historic data curves directly from the results of History Matching analyses.
 
 
 2024.1 (2024-05-27)
diff --git a/src/alfasim_sdk/result_reader/aggregator.py b/src/alfasim_sdk/result_reader/aggregator.py
index 4452de01..6250bc68 100644
--- a/src/alfasim_sdk/result_reader/aggregator.py
+++ b/src/alfasim_sdk/result_reader/aggregator.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import functools
 import json
 import os
@@ -28,6 +30,9 @@
     HISTORY_MATCHING_DETERMINISTIC_DSET_NAME,
 )
 from alfasim_sdk.result_reader.aggregator_constants import HISTORY_MATCHING_GROUP_NAME
+from alfasim_sdk.result_reader.aggregator_constants import (
+    HISTORY_MATCHING_HISTORIC_DATA_GROUP_NAME,
+)
 from alfasim_sdk.result_reader.aggregator_constants import (
     HISTORY_MATCHING_PROBABILISTIC_DSET_NAME,
 )
@@ -219,44 +224,71 @@ def map_data(
             )
 
 
+@attr.define(slots=True, hash=True)
+class HistoricDataCurveMetadata:
+    """
+    Metadata of the historic data curves used in the History Matching analysis.
+    """
+
+    curve_id: str
+    curve_name: str
+    domain_unit: str
+    image_unit: str
+    image_category: str
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> Self:
+        return cls(
+            curve_id=data["curve_id"],
+            curve_name=data["curve_name"],
+            domain_unit=data["domain_unit"],
+            image_unit=data["image_unit"],
+            image_category=data["image_category"],
+        )
+
+
 @attr.s(slots=True, hash=False)
 class HistoryMatchingMetadata:
     """
     Holder for the History Matching results metadata.
-
-    :ivar hm_items:
-        Map of the data id and its associated metadata.
-    :ivar objective_functions:
-        Map of observed curve id to a dict of Quantity of Interest data, populated with keys
-        'trend_id' and 'property_id'. This represents the setup for this HM analysis.
-    :ivar parametric_vars:
-        Map of parametric vars to the values that represents the analysis, with all existent vars.
-        Values are either the optimal values (deterministic) or the base values (probabilistic).
-    :ivar result_directory:
-        The directory in which the result is saved.
     """
 
+    #: Map of the data id and its associated metadata.
+    hm_items: Dict[str, HMItem] = attr.ib(validator=attr.validators.instance_of(Dict))
+    #: Map of observed curve id to a dict of Quantity of Interest data, populated with keys
+    #: 'trend_id' and 'property_id'. This represents the setup for this HM analysis.
+    objective_functions: Dict[str, Dict[str, str]] = attr.ib(
+        validator=attr.validators.instance_of(Dict)
+    )
+    #: Map of parametric vars to the values that represents the analysis, with all existent vars.
+    #: Values are either the optimal values (deterministic) or the base values (probabilistic).
+    parametric_vars: Dict[str, float] = attr.ib(
+        validator=attr.validators.instance_of(Dict)
+    )
+    #: The directory in which the result is saved.
+    result_directory: Path = attr.ib(validator=attr.validators.instance_of(Path))
+    #: Metadata of the historic curves present in the results. Optional as this was introduced
+    #: later (ASIM-5713).
+    historic_data_curve_infos: Optional[List[HistoricDataCurveMetadata]] = attr.ib(
+        validator=attr.validators.optional(attr.validators.instance_of(list)),
+        default=None,
+    )
+
     @attr.s(slots=True, hash=False)
     class HMItem:
         """
         Metadata associated with each item of the HM results.
-
-        :ivar parametric_var_id:
-            The id of the associated parametric var.
-        :ivar parametric_var_name:
-            The name of the associated parametric var.
-        :ivar min_value:
-            Lower limit of the specified range for the parametric var.
-        :ivar max_value:
-            Upper limit of the specified range for the parametric var.
-        :ivar data_index:
-            The index of the data in the result datasets.
         """
 
+        #: The id of the associated parametric var.
         parametric_var_id: str = attr.ib(validator=attr.validators.instance_of(str))
+        #: The name of the associated parametric var.
         parametric_var_name: str = attr.ib(validator=attr.validators.instance_of(str))
+        #: Lower limit of the specified range for the parametric var.
         min_value: float = attr.ib(validator=attr.validators.instance_of(float))
+        #: Upper limit of the specified range for the parametric var.
         max_value: float = attr.ib(validator=attr.validators.instance_of(float))
+        #: The index of the data in the result datasets.
         data_index: int = attr.ib(validator=attr.validators.instance_of(int))
 
         @classmethod
@@ -274,15 +306,6 @@ def from_dict(cls, data: Dict[str, Any]) -> Self:
                 data_index=data["data_index"],
             )
 
-    hm_items: Dict[str, HMItem] = attr.ib(validator=attr.validators.instance_of(Dict))
-    objective_functions: Dict[str, Dict[str, str]] = attr.ib(
-        validator=attr.validators.instance_of(Dict)
-    )
-    parametric_vars: Dict[str, float] = attr.ib(
-        validator=attr.validators.instance_of(Dict)
-    )
-    result_directory: Path = attr.ib(validator=attr.validators.instance_of(Path))
-
     @classmethod
     def empty(cls, result_directory: Path) -> Self:
         return cls(
@@ -300,12 +323,19 @@ def from_result_directory(cls, result_directory: Path) -> Self:
         If result file is not ready or doesn't exist, return an empty metadata.
         """
 
-        def map_data(hm_metadata: Dict) -> Dict[str, HistoryMatchingMetadata.HMItem]:
+        def map_meta_items(
+            hm_metadata: Dict,
+        ) -> Dict[str, HistoryMatchingMetadata.HMItem]:
             return {
                 key: HistoryMatchingMetadata.HMItem.from_dict(data)
                 for key, data in hm_metadata.items()
             }
 
+        def map_historic_data_infos(
+            infos: List[Dict[str, Any]]
+        ) -> List[HistoricDataCurveMetadata]:
+            return [HistoricDataCurveMetadata.from_dict(info) for info in infos]
+
         with open_result_file(result_directory) as result_file:
             if not result_file:
                 return cls.empty(result_directory=result_directory)
@@ -321,10 +351,14 @@ def map_data(hm_metadata: Dict) -> Dict[str, HistoryMatchingMetadata.HMItem]:
 
             objective_functions = some_item_metadata["objective_functions"]
             parametric_vars = some_item_metadata["parametric_vars"]
+            historic_curve_infos = some_item_metadata.get("historic_data_curves_info")
+            if historic_curve_infos is not None:
+                historic_curve_infos = map_historic_data_infos(historic_curve_infos)
 
             return cls(
-                hm_items=map_data(loaded_metadata),
+                hm_items=map_meta_items(loaded_metadata),
                 objective_functions=objective_functions,
+                historic_data_curve_infos=historic_curve_infos,
                 parametric_vars=parametric_vars,
                 result_directory=result_directory,
             )
@@ -1776,6 +1810,30 @@ def read_history_matching_result(
         return result_map
 
 
+def read_history_matching_historic_data_curves(
+    metadata: HistoryMatchingMetadata,
+) -> Dict[str, np.ndarray]:
+    """
+    :return:
+        Map of historic data curve id to the actual curve, represented as an array of points in the
+        form [[y1, y2, ..., yn], [x1, x1, ..., xn]].
+    """
+    with open_result_file(metadata.result_directory) as result_file:
+        if not result_file:
+            return {}
+
+        result = result_file.get(HISTORY_MATCHING_HISTORIC_DATA_GROUP_NAME)
+
+        if result is None:
+            # Old result files may not have this data group.
+            return {}
+
+        return {
+            info.curve_id: result[info.curve_id][:]
+            for info in metadata.historic_data_curve_infos
+        }
+
+
 @contextmanager
 def open_result_file(
     result_directory: Path, result_filename: str = "result"
diff --git a/src/alfasim_sdk/result_reader/aggregator_constants.py b/src/alfasim_sdk/result_reader/aggregator_constants.py
index 70e21088..3a0ceda2 100644
--- a/src/alfasim_sdk/result_reader/aggregator_constants.py
+++ b/src/alfasim_sdk/result_reader/aggregator_constants.py
@@ -4,6 +4,7 @@
 GLOBAL_SENSITIVITY_ANALYSIS_GROUP_NAME = "global_sensitivity_analysis"
 
 HISTORY_MATCHING_GROUP_NAME = "history_matching"
+HISTORY_MATCHING_HISTORIC_DATA_GROUP_NAME = "history_matching_historic_data"
 HISTORY_MATCHING_DETERMINISTIC_DSET_NAME = "history_matching_deterministic"
 HISTORY_MATCHING_PROBABILISTIC_DSET_NAME = "history_matching_probabilistic"
 
diff --git a/tests/conftest.py b/tests/conftest.py
index 61ea911b..6715cb12 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -3,6 +3,7 @@
 import shutil
 import textwrap
 from pathlib import Path
+from typing import Dict
 from typing import List
 
 import h5py
@@ -11,6 +12,9 @@
 from _pytest.fixtures import FixtureRequest
 from _pytest.monkeypatch import MonkeyPatch
 
+from alfasim_sdk.result_reader.aggregator import (
+    HISTORY_MATCHING_HISTORIC_DATA_GROUP_NAME,
+)
 from alfasim_sdk.result_reader.aggregator_constants import (
     GLOBAL_SENSITIVITY_ANALYSIS_GROUP_NAME,
 )
@@ -272,21 +276,17 @@ def global_sa_results_dir(datadir: Path) -> Path:
 def _create_and_populate_hm_result_file(
     result_dir: Path,
     result: np.ndarray,
-    dataset_key: str,
+    result_dataset_key: str,
+    historic_data_curves: Dict[str, np.ndarray],
 ) -> None:
     result_dir.mkdir(parents=True, exist_ok=True)
     result_filepath = result_dir / "result"
 
     with h5py.File(result_filepath, "x", libver="latest", locking=False) as file:
         meta_group = file.create_group(META_GROUP_NAME, track_order=True)
-        data_group = file.create_group(HISTORY_MATCHING_GROUP_NAME, track_order=True)
+        result_group = file.create_group(HISTORY_MATCHING_GROUP_NAME, track_order=True)
 
-        dataset = data_group.create_dataset(
-            dataset_key,
-            shape=result.shape,
-            dtype=np.float64,
-            maxshape=tuple(None for _ in result.shape),
-        )
+        result_group.create_dataset(result_dataset_key, data=result)
 
         objective_functions = {
             "observed_curve_1": {"trend_id": "trend_1", "property_id": "holdup"},
@@ -314,9 +314,34 @@ def _create_and_populate_hm_result_file(
                 "data_index": 1,
             },
         }
+        if historic_data_curves:
+            historic_curves_group = file.create_group(
+                HISTORY_MATCHING_HISTORIC_DATA_GROUP_NAME
+            )
+            for curve_id, curve in historic_data_curves.items():
+                historic_curves_group.create_dataset(curve_id, data=curve)
+
+            historic_curves_meta = [
+                {
+                    "curve_id": "observed_curve_1",
+                    "curve_name": "curve 1",
+                    "domain_unit": "s",
+                    "image_unit": "m3/m3",
+                    "image_category": "volume fraction",
+                },
+                {
+                    "curve_id": "observed_curve_2",
+                    "curve_name": "curve 2",
+                    "domain_unit": "s",
+                    "image_unit": "Pa",
+                    "image_category": "pressure",
+                },
+            ]
+            meta_entries = list(fake_meta.values())
+            for entry in meta_entries:
+                entry["historic_data_curves_info"] = historic_curves_meta
 
         meta_group.attrs[HISTORY_MATCHING_GROUP_NAME] = json.dumps(fake_meta)
-        dataset[:] = result
 
         file.swmr_mode = True
 
@@ -332,11 +357,16 @@ def hm_probabilistic_results_dir(datadir: Path) -> Path:
     probabilistic_result = np.array(
         [[0.1, 0.22, 1.0, 0.8, 0.55], [3.0, 6.0, 5.1, 4.7, 6.3]]
     )
+    historic_data_curves = {
+        "observed_curve_1": np.array([[0.1, 0.5, 0.9], [1.1, 2.2, 3.3]]),
+        "observed_curve_2": np.array([[1.0, 5.0, 9.0, 3.1], [1.2, 2.3, 3.4, 4.5]]),
+    }
 
     _create_and_populate_hm_result_file(
         result_dir=result_dir,
         result=probabilistic_result,
-        dataset_key=HISTORY_MATCHING_PROBABILISTIC_DSET_NAME,
+        result_dataset_key=HISTORY_MATCHING_PROBABILISTIC_DSET_NAME,
+        historic_data_curves=historic_data_curves,
     )
 
     return result_dir
@@ -349,13 +379,39 @@ def hm_deterministic_results_dir(datadir: Path) -> Path:
     """
     import numpy as np
 
+    result_dir = datadir / "main-HM-deterministic"
+    deterministic_result = np.array([0.1, 3.2])
+    historic_data_curves = {
+        "observed_curve_1": np.array([[0.1, 0.5, 0.9], [1.1, 2.2, 3.3]]),
+        "observed_curve_2": np.array([[1.0, 5.0, 9.0, 3.1], [1.2, 2.3, 3.4, 4.5]]),
+    }
+
+    _create_and_populate_hm_result_file(
+        result_dir=result_dir,
+        result=deterministic_result,
+        result_dataset_key=HISTORY_MATCHING_DETERMINISTIC_DSET_NAME,
+        historic_data_curves=historic_data_curves,
+    )
+
+    return result_dir
+
+
+@pytest.fixture()
+def hm_results_dir_without_historic_data(datadir: Path) -> Path:
+    """
+    Create a History Matching Deterministic result folder with a populated HDF5 file in the old
+    format, i.e. without historic data curves.
+    """
+    import numpy as np
+
     result_dir = datadir / "main-HM-deterministic"
     deterministic_result = np.array([0.1, 3.2])
 
     _create_and_populate_hm_result_file(
         result_dir=result_dir,
         result=deterministic_result,
-        dataset_key=HISTORY_MATCHING_DETERMINISTIC_DSET_NAME,
+        result_dataset_key=HISTORY_MATCHING_DETERMINISTIC_DSET_NAME,
+        historic_data_curves={},
     )
 
     return result_dir
diff --git a/tests/results/test_aggregator.py b/tests/results/test_aggregator.py
index af516d96..12a49a57 100644
--- a/tests/results/test_aggregator.py
+++ b/tests/results/test_aggregator.py
@@ -13,6 +13,7 @@
 from pytest_regressions.num_regression import NumericRegressionFixture
 
 from alfasim_sdk.result_reader.aggregator import concatenate_metadata
+from alfasim_sdk.result_reader.aggregator import HistoricDataCurveMetadata
 from alfasim_sdk.result_reader.aggregator import HistoryMatchingMetadata
 from alfasim_sdk.result_reader.aggregator import open_result_files
 from alfasim_sdk.result_reader.aggregator import (
@@ -22,6 +23,9 @@
     read_global_sensitivity_analysis_time_set,
 )
 from alfasim_sdk.result_reader.aggregator import read_global_sensitivity_coefficients
+from alfasim_sdk.result_reader.aggregator import (
+    read_history_matching_historic_data_curves,
+)
 from alfasim_sdk.result_reader.aggregator import read_history_matching_metadata
 from alfasim_sdk.result_reader.aggregator import read_history_matching_result
 from alfasim_sdk.result_reader.aggregator import read_metadata
@@ -399,6 +403,22 @@ def test_read_history_matching_result_metadata(
         "observed_curve_2": {"trend_id": "trend_2", "property_id": "pressure"},
     }
     assert metadata.parametric_vars == {"mg": 0.5, "mo": 4.0}
+    assert metadata.historic_data_curve_infos == [
+        HistoricDataCurveMetadata(
+            curve_id="observed_curve_1",
+            curve_name="curve 1",
+            domain_unit="s",
+            image_unit="m3/m3",
+            image_category="volume fraction",
+        ),
+        HistoricDataCurveMetadata(
+            curve_id="observed_curve_2",
+            curve_name="curve 2",
+            domain_unit="s",
+            image_unit="Pa",
+            image_category="pressure",
+        ),
+    ]
 
     expected_meta1 = HistoryMatchingMetadata.HMItem(
         parametric_var_id="parametric_var_1",
@@ -455,7 +475,7 @@ def test_read_history_matching_result_metadata(
 def test_read_history_matching_result_data(
     hm_probabilistic_results_dir: Path,
     hm_deterministic_results_dir: Path,
-    hm_type: Literal["probabilistic", "deterministic"],
+    hm_type: Literal["HM-probabilistic", "HM-deterministic"],
 ) -> None:
     """
     Check reading the result of both HM type analysis. Both results are available simultaneously by
@@ -503,3 +523,40 @@ def test_read_history_matching_result_data(
     # Receiving an invalid History Matching type should raise.
     with pytest.raises(ValueError, match="type `foobar` not supported"):
         read_history_matching_result(metadata, "foobar")  # type: ignore
+
+
+def test_read_history_matching_historic_data_curves(
+    hm_probabilistic_results_dir: Path,
+    hm_deterministic_results_dir: Path,
+) -> None:
+    """
+    Check reading the historic data curves from the result file of both HM type analysis.
+    """
+    result_directories = (hm_probabilistic_results_dir, hm_deterministic_results_dir)
+    for result_dir in result_directories:
+        metadata = read_history_matching_metadata(result_dir)
+        curves = read_history_matching_historic_data_curves(metadata)
+        assert len(curves) == 2
+        assert curves["observed_curve_1"] == pytest.approx(
+            numpy.array([[0.1, 0.5, 0.9], [1.1, 2.2, 3.3]])
+        )
+        assert curves["observed_curve_2"] == pytest.approx(
+            numpy.array([[1.0, 5.0, 9.0, 3.1], [1.2, 2.3, 3.4, 4.5]])
+        )
+
+    # For completeness, check result when passing some invalid directory.
+    meta = HistoryMatchingMetadata.empty(result_directory=Path("foo"))
+    assert read_history_matching_historic_data_curves(meta) == {}
+
+
+def test_read_history_matching_historic_data_curves_backward_compatibility(
+    hm_results_dir_without_historic_data: Path,
+) -> None:
+    """
+    Check reading the historic data curves from an old result file which doesn't have historic data
+    curves data in it (pre ASIM-5713).
+    """
+    result_dir = hm_results_dir_without_historic_data
+    metadata = read_history_matching_metadata(result_dir)
+    curves = read_history_matching_historic_data_curves(metadata)
+    assert curves == {}