From 388f7f45d97d680f6198057f4f0d43e362a84d25 Mon Sep 17 00:00:00 2001 From: Rodrigo Neto Date: Tue, 16 Jul 2024 18:47:04 -0300 Subject: [PATCH] Add historic curves data to the History Matching results metadata ASIM-5713 --- CHANGELOG.rst | 2 +- src/alfasim_sdk/result_reader/aggregator.py | 124 +++++++++++++----- .../result_reader/aggregator_constants.py | 1 + tests/conftest.py | 78 +++++++++-- tests/results/test_aggregator.py | 59 ++++++++- 5 files changed, 218 insertions(+), 46 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 2a6345c0..18b221cb 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -5,7 +5,7 @@ CHANGELOG 2024.2 (unreleased) =================== -* +* Added support to read historic data curves directly from the results of History Matching analyses. 2024.1 (2024-05-27) diff --git a/src/alfasim_sdk/result_reader/aggregator.py b/src/alfasim_sdk/result_reader/aggregator.py index 4452de01..6250bc68 100644 --- a/src/alfasim_sdk/result_reader/aggregator.py +++ b/src/alfasim_sdk/result_reader/aggregator.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import functools import json import os @@ -28,6 +30,9 @@ HISTORY_MATCHING_DETERMINISTIC_DSET_NAME, ) from alfasim_sdk.result_reader.aggregator_constants import HISTORY_MATCHING_GROUP_NAME +from alfasim_sdk.result_reader.aggregator_constants import ( + HISTORY_MATCHING_HISTORIC_DATA_GROUP_NAME, +) from alfasim_sdk.result_reader.aggregator_constants import ( HISTORY_MATCHING_PROBABILISTIC_DSET_NAME, ) @@ -219,44 +224,71 @@ def map_data( ) +@attr.define(slots=True, hash=True) +class HistoricDataCurveMetadata: + """ + Metadata of the historic data curves used in the History Matching analysis. + """ + + curve_id: str + curve_name: str + domain_unit: str + image_unit: str + image_category: str + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> Self: + return cls( + curve_id=data["curve_id"], + curve_name=data["curve_name"], + domain_unit=data["domain_unit"], + image_unit=data["image_unit"], + image_category=data["image_category"], + ) + + @attr.s(slots=True, hash=False) class HistoryMatchingMetadata: """ Holder for the History Matching results metadata. - - :ivar hm_items: - Map of the data id and its associated metadata. - :ivar objective_functions: - Map of observed curve id to a dict of Quantity of Interest data, populated with keys - 'trend_id' and 'property_id'. This represents the setup for this HM analysis. - :ivar parametric_vars: - Map of parametric vars to the values that represents the analysis, with all existent vars. - Values are either the optimal values (deterministic) or the base values (probabilistic). - :ivar result_directory: - The directory in which the result is saved. """ + #: Map of the data id and its associated metadata. + hm_items: Dict[str, HMItem] = attr.ib(validator=attr.validators.instance_of(Dict)) + #: Map of observed curve id to a dict of Quantity of Interest data, populated with keys + #: 'trend_id' and 'property_id'. This represents the setup for this HM analysis. + objective_functions: Dict[str, Dict[str, str]] = attr.ib( + validator=attr.validators.instance_of(Dict) + ) + #: Map of parametric vars to the values that represents the analysis, with all existent vars. + #: Values are either the optimal values (deterministic) or the base values (probabilistic). + parametric_vars: Dict[str, float] = attr.ib( + validator=attr.validators.instance_of(Dict) + ) + #: The directory in which the result is saved. + result_directory: Path = attr.ib(validator=attr.validators.instance_of(Path)) + #: Metadata of the historic curves present in the results. Optional as this was introduced + #: later (ASIM-5713). + historic_data_curve_infos: Optional[List[HistoricDataCurveMetadata]] = attr.ib( + validator=attr.validators.optional(attr.validators.instance_of(list)), + default=None, + ) + @attr.s(slots=True, hash=False) class HMItem: """ Metadata associated with each item of the HM results. - - :ivar parametric_var_id: - The id of the associated parametric var. - :ivar parametric_var_name: - The name of the associated parametric var. - :ivar min_value: - Lower limit of the specified range for the parametric var. - :ivar max_value: - Upper limit of the specified range for the parametric var. - :ivar data_index: - The index of the data in the result datasets. """ + #: The id of the associated parametric var. parametric_var_id: str = attr.ib(validator=attr.validators.instance_of(str)) + #: The name of the associated parametric var. parametric_var_name: str = attr.ib(validator=attr.validators.instance_of(str)) + #: Lower limit of the specified range for the parametric var. min_value: float = attr.ib(validator=attr.validators.instance_of(float)) + #: Upper limit of the specified range for the parametric var. max_value: float = attr.ib(validator=attr.validators.instance_of(float)) + #: The index of the data in the result datasets. data_index: int = attr.ib(validator=attr.validators.instance_of(int)) @classmethod @@ -274,15 +306,6 @@ def from_dict(cls, data: Dict[str, Any]) -> Self: data_index=data["data_index"], ) - hm_items: Dict[str, HMItem] = attr.ib(validator=attr.validators.instance_of(Dict)) - objective_functions: Dict[str, Dict[str, str]] = attr.ib( - validator=attr.validators.instance_of(Dict) - ) - parametric_vars: Dict[str, float] = attr.ib( - validator=attr.validators.instance_of(Dict) - ) - result_directory: Path = attr.ib(validator=attr.validators.instance_of(Path)) - @classmethod def empty(cls, result_directory: Path) -> Self: return cls( @@ -300,12 +323,19 @@ def from_result_directory(cls, result_directory: Path) -> Self: If result file is not ready or doesn't exist, return an empty metadata. """ - def map_data(hm_metadata: Dict) -> Dict[str, HistoryMatchingMetadata.HMItem]: + def map_meta_items( + hm_metadata: Dict, + ) -> Dict[str, HistoryMatchingMetadata.HMItem]: return { key: HistoryMatchingMetadata.HMItem.from_dict(data) for key, data in hm_metadata.items() } + def map_historic_data_infos( + infos: List[Dict[str, Any]] + ) -> List[HistoricDataCurveMetadata]: + return [HistoricDataCurveMetadata.from_dict(info) for info in infos] + with open_result_file(result_directory) as result_file: if not result_file: return cls.empty(result_directory=result_directory) @@ -321,10 +351,14 @@ def map_data(hm_metadata: Dict) -> Dict[str, HistoryMatchingMetadata.HMItem]: objective_functions = some_item_metadata["objective_functions"] parametric_vars = some_item_metadata["parametric_vars"] + historic_curve_infos = some_item_metadata.get("historic_data_curves_info") + if historic_curve_infos is not None: + historic_curve_infos = map_historic_data_infos(historic_curve_infos) return cls( - hm_items=map_data(loaded_metadata), + hm_items=map_meta_items(loaded_metadata), objective_functions=objective_functions, + historic_data_curve_infos=historic_curve_infos, parametric_vars=parametric_vars, result_directory=result_directory, ) @@ -1776,6 +1810,30 @@ def read_history_matching_result( return result_map +def read_history_matching_historic_data_curves( + metadata: HistoryMatchingMetadata, +) -> Dict[str, np.ndarray]: + """ + :return: + Map of historic data curve id to the actual curve, represented as an array of points in the + form [[y1, y2, ..., yn], [x1, x1, ..., xn]]. + """ + with open_result_file(metadata.result_directory) as result_file: + if not result_file: + return {} + + result = result_file.get(HISTORY_MATCHING_HISTORIC_DATA_GROUP_NAME) + + if result is None: + # Old result files may not have this data group. + return {} + + return { + info.curve_id: result[info.curve_id][:] + for info in metadata.historic_data_curve_infos + } + + @contextmanager def open_result_file( result_directory: Path, result_filename: str = "result" diff --git a/src/alfasim_sdk/result_reader/aggregator_constants.py b/src/alfasim_sdk/result_reader/aggregator_constants.py index 70e21088..3a0ceda2 100644 --- a/src/alfasim_sdk/result_reader/aggregator_constants.py +++ b/src/alfasim_sdk/result_reader/aggregator_constants.py @@ -4,6 +4,7 @@ GLOBAL_SENSITIVITY_ANALYSIS_GROUP_NAME = "global_sensitivity_analysis" HISTORY_MATCHING_GROUP_NAME = "history_matching" +HISTORY_MATCHING_HISTORIC_DATA_GROUP_NAME = "history_matching_historic_data" HISTORY_MATCHING_DETERMINISTIC_DSET_NAME = "history_matching_deterministic" HISTORY_MATCHING_PROBABILISTIC_DSET_NAME = "history_matching_probabilistic" diff --git a/tests/conftest.py b/tests/conftest.py index 61ea911b..6715cb12 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -3,6 +3,7 @@ import shutil import textwrap from pathlib import Path +from typing import Dict from typing import List import h5py @@ -11,6 +12,9 @@ from _pytest.fixtures import FixtureRequest from _pytest.monkeypatch import MonkeyPatch +from alfasim_sdk.result_reader.aggregator import ( + HISTORY_MATCHING_HISTORIC_DATA_GROUP_NAME, +) from alfasim_sdk.result_reader.aggregator_constants import ( GLOBAL_SENSITIVITY_ANALYSIS_GROUP_NAME, ) @@ -272,21 +276,17 @@ def global_sa_results_dir(datadir: Path) -> Path: def _create_and_populate_hm_result_file( result_dir: Path, result: np.ndarray, - dataset_key: str, + result_dataset_key: str, + historic_data_curves: Dict[str, np.ndarray], ) -> None: result_dir.mkdir(parents=True, exist_ok=True) result_filepath = result_dir / "result" with h5py.File(result_filepath, "x", libver="latest", locking=False) as file: meta_group = file.create_group(META_GROUP_NAME, track_order=True) - data_group = file.create_group(HISTORY_MATCHING_GROUP_NAME, track_order=True) + result_group = file.create_group(HISTORY_MATCHING_GROUP_NAME, track_order=True) - dataset = data_group.create_dataset( - dataset_key, - shape=result.shape, - dtype=np.float64, - maxshape=tuple(None for _ in result.shape), - ) + result_group.create_dataset(result_dataset_key, data=result) objective_functions = { "observed_curve_1": {"trend_id": "trend_1", "property_id": "holdup"}, @@ -314,9 +314,34 @@ def _create_and_populate_hm_result_file( "data_index": 1, }, } + if historic_data_curves: + historic_curves_group = file.create_group( + HISTORY_MATCHING_HISTORIC_DATA_GROUP_NAME + ) + for curve_id, curve in historic_data_curves.items(): + historic_curves_group.create_dataset(curve_id, data=curve) + + historic_curves_meta = [ + { + "curve_id": "observed_curve_1", + "curve_name": "curve 1", + "domain_unit": "s", + "image_unit": "m3/m3", + "image_category": "volume fraction", + }, + { + "curve_id": "observed_curve_2", + "curve_name": "curve 2", + "domain_unit": "s", + "image_unit": "Pa", + "image_category": "pressure", + }, + ] + meta_entries = list(fake_meta.values()) + for entry in meta_entries: + entry["historic_data_curves_info"] = historic_curves_meta meta_group.attrs[HISTORY_MATCHING_GROUP_NAME] = json.dumps(fake_meta) - dataset[:] = result file.swmr_mode = True @@ -332,11 +357,16 @@ def hm_probabilistic_results_dir(datadir: Path) -> Path: probabilistic_result = np.array( [[0.1, 0.22, 1.0, 0.8, 0.55], [3.0, 6.0, 5.1, 4.7, 6.3]] ) + historic_data_curves = { + "observed_curve_1": np.array([[0.1, 0.5, 0.9], [1.1, 2.2, 3.3]]), + "observed_curve_2": np.array([[1.0, 5.0, 9.0, 3.1], [1.2, 2.3, 3.4, 4.5]]), + } _create_and_populate_hm_result_file( result_dir=result_dir, result=probabilistic_result, - dataset_key=HISTORY_MATCHING_PROBABILISTIC_DSET_NAME, + result_dataset_key=HISTORY_MATCHING_PROBABILISTIC_DSET_NAME, + historic_data_curves=historic_data_curves, ) return result_dir @@ -349,13 +379,39 @@ def hm_deterministic_results_dir(datadir: Path) -> Path: """ import numpy as np + result_dir = datadir / "main-HM-deterministic" + deterministic_result = np.array([0.1, 3.2]) + historic_data_curves = { + "observed_curve_1": np.array([[0.1, 0.5, 0.9], [1.1, 2.2, 3.3]]), + "observed_curve_2": np.array([[1.0, 5.0, 9.0, 3.1], [1.2, 2.3, 3.4, 4.5]]), + } + + _create_and_populate_hm_result_file( + result_dir=result_dir, + result=deterministic_result, + result_dataset_key=HISTORY_MATCHING_DETERMINISTIC_DSET_NAME, + historic_data_curves=historic_data_curves, + ) + + return result_dir + + +@pytest.fixture() +def hm_results_dir_without_historic_data(datadir: Path) -> Path: + """ + Create a History Matching Deterministic result folder with a populated HDF5 file in the old + format, i.e. without historic data curves. + """ + import numpy as np + result_dir = datadir / "main-HM-deterministic" deterministic_result = np.array([0.1, 3.2]) _create_and_populate_hm_result_file( result_dir=result_dir, result=deterministic_result, - dataset_key=HISTORY_MATCHING_DETERMINISTIC_DSET_NAME, + result_dataset_key=HISTORY_MATCHING_DETERMINISTIC_DSET_NAME, + historic_data_curves={}, ) return result_dir diff --git a/tests/results/test_aggregator.py b/tests/results/test_aggregator.py index af516d96..12a49a57 100644 --- a/tests/results/test_aggregator.py +++ b/tests/results/test_aggregator.py @@ -13,6 +13,7 @@ from pytest_regressions.num_regression import NumericRegressionFixture from alfasim_sdk.result_reader.aggregator import concatenate_metadata +from alfasim_sdk.result_reader.aggregator import HistoricDataCurveMetadata from alfasim_sdk.result_reader.aggregator import HistoryMatchingMetadata from alfasim_sdk.result_reader.aggregator import open_result_files from alfasim_sdk.result_reader.aggregator import ( @@ -22,6 +23,9 @@ read_global_sensitivity_analysis_time_set, ) from alfasim_sdk.result_reader.aggregator import read_global_sensitivity_coefficients +from alfasim_sdk.result_reader.aggregator import ( + read_history_matching_historic_data_curves, +) from alfasim_sdk.result_reader.aggregator import read_history_matching_metadata from alfasim_sdk.result_reader.aggregator import read_history_matching_result from alfasim_sdk.result_reader.aggregator import read_metadata @@ -399,6 +403,22 @@ def test_read_history_matching_result_metadata( "observed_curve_2": {"trend_id": "trend_2", "property_id": "pressure"}, } assert metadata.parametric_vars == {"mg": 0.5, "mo": 4.0} + assert metadata.historic_data_curve_infos == [ + HistoricDataCurveMetadata( + curve_id="observed_curve_1", + curve_name="curve 1", + domain_unit="s", + image_unit="m3/m3", + image_category="volume fraction", + ), + HistoricDataCurveMetadata( + curve_id="observed_curve_2", + curve_name="curve 2", + domain_unit="s", + image_unit="Pa", + image_category="pressure", + ), + ] expected_meta1 = HistoryMatchingMetadata.HMItem( parametric_var_id="parametric_var_1", @@ -455,7 +475,7 @@ def test_read_history_matching_result_metadata( def test_read_history_matching_result_data( hm_probabilistic_results_dir: Path, hm_deterministic_results_dir: Path, - hm_type: Literal["probabilistic", "deterministic"], + hm_type: Literal["HM-probabilistic", "HM-deterministic"], ) -> None: """ Check reading the result of both HM type analysis. Both results are available simultaneously by @@ -503,3 +523,40 @@ def test_read_history_matching_result_data( # Receiving an invalid History Matching type should raise. with pytest.raises(ValueError, match="type `foobar` not supported"): read_history_matching_result(metadata, "foobar") # type: ignore + + +def test_read_history_matching_historic_data_curves( + hm_probabilistic_results_dir: Path, + hm_deterministic_results_dir: Path, +) -> None: + """ + Check reading the historic data curves from the result file of both HM type analysis. + """ + result_directories = (hm_probabilistic_results_dir, hm_deterministic_results_dir) + for result_dir in result_directories: + metadata = read_history_matching_metadata(result_dir) + curves = read_history_matching_historic_data_curves(metadata) + assert len(curves) == 2 + assert curves["observed_curve_1"] == pytest.approx( + numpy.array([[0.1, 0.5, 0.9], [1.1, 2.2, 3.3]]) + ) + assert curves["observed_curve_2"] == pytest.approx( + numpy.array([[1.0, 5.0, 9.0, 3.1], [1.2, 2.3, 3.4, 4.5]]) + ) + + # For completeness, check result when passing some invalid directory. + meta = HistoryMatchingMetadata.empty(result_directory=Path("foo")) + assert read_history_matching_historic_data_curves(meta) == {} + + +def test_read_history_matching_historic_data_curves_backward_compatibility( + hm_results_dir_without_historic_data: Path, +) -> None: + """ + Check reading the historic data curves from an old result file which doesn't have historic data + curves data in it (pre ASIM-5713). + """ + result_dir = hm_results_dir_without_historic_data + metadata = read_history_matching_metadata(result_dir) + curves = read_history_matching_historic_data_curves(metadata) + assert curves == {}