Skip to content

Commit

Permalink
Merge pull request #394 from ESSS/fb-ASIM-5713-error-deleting-histori…
Browse files Browse the repository at this point in the history
…c-data-curves

Add historic curves to the History Matching results metadata
  • Loading branch information
ro-oliveira95 authored Jul 23, 2024
2 parents 8fa83bd + 388f7f4 commit c6a88dc
Show file tree
Hide file tree
Showing 5 changed files with 218 additions and 46 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ CHANGELOG
2024.2 (unreleased)
===================

*
* Added support to read historic data curves directly from the results of History Matching analyses.


2024.1 (2024-05-27)
Expand Down
124 changes: 91 additions & 33 deletions src/alfasim_sdk/result_reader/aggregator.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import annotations

import functools
import json
import os
Expand Down Expand Up @@ -28,6 +30,9 @@
HISTORY_MATCHING_DETERMINISTIC_DSET_NAME,
)
from alfasim_sdk.result_reader.aggregator_constants import HISTORY_MATCHING_GROUP_NAME
from alfasim_sdk.result_reader.aggregator_constants import (
HISTORY_MATCHING_HISTORIC_DATA_GROUP_NAME,
)
from alfasim_sdk.result_reader.aggregator_constants import (
HISTORY_MATCHING_PROBABILISTIC_DSET_NAME,
)
Expand Down Expand Up @@ -219,44 +224,71 @@ def map_data(
)


@attr.define(slots=True, hash=True)
class HistoricDataCurveMetadata:
"""
Metadata of the historic data curves used in the History Matching analysis.
"""

curve_id: str
curve_name: str
domain_unit: str
image_unit: str
image_category: str

@classmethod
def from_dict(cls, data: Dict[str, Any]) -> Self:
return cls(
curve_id=data["curve_id"],
curve_name=data["curve_name"],
domain_unit=data["domain_unit"],
image_unit=data["image_unit"],
image_category=data["image_category"],
)


@attr.s(slots=True, hash=False)
class HistoryMatchingMetadata:
"""
Holder for the History Matching results metadata.
:ivar hm_items:
Map of the data id and its associated metadata.
:ivar objective_functions:
Map of observed curve id to a dict of Quantity of Interest data, populated with keys
'trend_id' and 'property_id'. This represents the setup for this HM analysis.
:ivar parametric_vars:
Map of parametric vars to the values that represents the analysis, with all existent vars.
Values are either the optimal values (deterministic) or the base values (probabilistic).
:ivar result_directory:
The directory in which the result is saved.
"""

#: Map of the data id and its associated metadata.
hm_items: Dict[str, HMItem] = attr.ib(validator=attr.validators.instance_of(Dict))
#: Map of observed curve id to a dict of Quantity of Interest data, populated with keys
#: 'trend_id' and 'property_id'. This represents the setup for this HM analysis.
objective_functions: Dict[str, Dict[str, str]] = attr.ib(
validator=attr.validators.instance_of(Dict)
)
#: Map of parametric vars to the values that represents the analysis, with all existent vars.
#: Values are either the optimal values (deterministic) or the base values (probabilistic).
parametric_vars: Dict[str, float] = attr.ib(
validator=attr.validators.instance_of(Dict)
)
#: The directory in which the result is saved.
result_directory: Path = attr.ib(validator=attr.validators.instance_of(Path))
#: Metadata of the historic curves present in the results. Optional as this was introduced
#: later (ASIM-5713).
historic_data_curve_infos: Optional[List[HistoricDataCurveMetadata]] = attr.ib(
validator=attr.validators.optional(attr.validators.instance_of(list)),
default=None,
)

@attr.s(slots=True, hash=False)
class HMItem:
"""
Metadata associated with each item of the HM results.
:ivar parametric_var_id:
The id of the associated parametric var.
:ivar parametric_var_name:
The name of the associated parametric var.
:ivar min_value:
Lower limit of the specified range for the parametric var.
:ivar max_value:
Upper limit of the specified range for the parametric var.
:ivar data_index:
The index of the data in the result datasets.
"""

#: The id of the associated parametric var.
parametric_var_id: str = attr.ib(validator=attr.validators.instance_of(str))
#: The name of the associated parametric var.
parametric_var_name: str = attr.ib(validator=attr.validators.instance_of(str))
#: Lower limit of the specified range for the parametric var.
min_value: float = attr.ib(validator=attr.validators.instance_of(float))
#: Upper limit of the specified range for the parametric var.
max_value: float = attr.ib(validator=attr.validators.instance_of(float))
#: The index of the data in the result datasets.
data_index: int = attr.ib(validator=attr.validators.instance_of(int))

@classmethod
Expand All @@ -274,15 +306,6 @@ def from_dict(cls, data: Dict[str, Any]) -> Self:
data_index=data["data_index"],
)

hm_items: Dict[str, HMItem] = attr.ib(validator=attr.validators.instance_of(Dict))
objective_functions: Dict[str, Dict[str, str]] = attr.ib(
validator=attr.validators.instance_of(Dict)
)
parametric_vars: Dict[str, float] = attr.ib(
validator=attr.validators.instance_of(Dict)
)
result_directory: Path = attr.ib(validator=attr.validators.instance_of(Path))

@classmethod
def empty(cls, result_directory: Path) -> Self:
return cls(
Expand All @@ -300,12 +323,19 @@ def from_result_directory(cls, result_directory: Path) -> Self:
If result file is not ready or doesn't exist, return an empty metadata.
"""

def map_data(hm_metadata: Dict) -> Dict[str, HistoryMatchingMetadata.HMItem]:
def map_meta_items(
hm_metadata: Dict,
) -> Dict[str, HistoryMatchingMetadata.HMItem]:
return {
key: HistoryMatchingMetadata.HMItem.from_dict(data)
for key, data in hm_metadata.items()
}

def map_historic_data_infos(
infos: List[Dict[str, Any]]
) -> List[HistoricDataCurveMetadata]:
return [HistoricDataCurveMetadata.from_dict(info) for info in infos]

with open_result_file(result_directory) as result_file:
if not result_file:
return cls.empty(result_directory=result_directory)
Expand All @@ -321,10 +351,14 @@ def map_data(hm_metadata: Dict) -> Dict[str, HistoryMatchingMetadata.HMItem]:

objective_functions = some_item_metadata["objective_functions"]
parametric_vars = some_item_metadata["parametric_vars"]
historic_curve_infos = some_item_metadata.get("historic_data_curves_info")
if historic_curve_infos is not None:
historic_curve_infos = map_historic_data_infos(historic_curve_infos)

return cls(
hm_items=map_data(loaded_metadata),
hm_items=map_meta_items(loaded_metadata),
objective_functions=objective_functions,
historic_data_curve_infos=historic_curve_infos,
parametric_vars=parametric_vars,
result_directory=result_directory,
)
Expand Down Expand Up @@ -1776,6 +1810,30 @@ def read_history_matching_result(
return result_map


def read_history_matching_historic_data_curves(
metadata: HistoryMatchingMetadata,
) -> Dict[str, np.ndarray]:
"""
:return:
Map of historic data curve id to the actual curve, represented as an array of points in the
form [[y1, y2, ..., yn], [x1, x1, ..., xn]].
"""
with open_result_file(metadata.result_directory) as result_file:
if not result_file:
return {}

result = result_file.get(HISTORY_MATCHING_HISTORIC_DATA_GROUP_NAME)

if result is None:
# Old result files may not have this data group.
return {}

return {
info.curve_id: result[info.curve_id][:]
for info in metadata.historic_data_curve_infos
}


@contextmanager
def open_result_file(
result_directory: Path, result_filename: str = "result"
Expand Down
1 change: 1 addition & 0 deletions src/alfasim_sdk/result_reader/aggregator_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
GLOBAL_SENSITIVITY_ANALYSIS_GROUP_NAME = "global_sensitivity_analysis"

HISTORY_MATCHING_GROUP_NAME = "history_matching"
HISTORY_MATCHING_HISTORIC_DATA_GROUP_NAME = "history_matching_historic_data"
HISTORY_MATCHING_DETERMINISTIC_DSET_NAME = "history_matching_deterministic"
HISTORY_MATCHING_PROBABILISTIC_DSET_NAME = "history_matching_probabilistic"

Expand Down
78 changes: 67 additions & 11 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import shutil
import textwrap
from pathlib import Path
from typing import Dict
from typing import List

import h5py
Expand All @@ -11,6 +12,9 @@
from _pytest.fixtures import FixtureRequest
from _pytest.monkeypatch import MonkeyPatch

from alfasim_sdk.result_reader.aggregator import (
HISTORY_MATCHING_HISTORIC_DATA_GROUP_NAME,
)
from alfasim_sdk.result_reader.aggregator_constants import (
GLOBAL_SENSITIVITY_ANALYSIS_GROUP_NAME,
)
Expand Down Expand Up @@ -272,21 +276,17 @@ def global_sa_results_dir(datadir: Path) -> Path:
def _create_and_populate_hm_result_file(
result_dir: Path,
result: np.ndarray,
dataset_key: str,
result_dataset_key: str,
historic_data_curves: Dict[str, np.ndarray],
) -> None:
result_dir.mkdir(parents=True, exist_ok=True)
result_filepath = result_dir / "result"

with h5py.File(result_filepath, "x", libver="latest", locking=False) as file:
meta_group = file.create_group(META_GROUP_NAME, track_order=True)
data_group = file.create_group(HISTORY_MATCHING_GROUP_NAME, track_order=True)
result_group = file.create_group(HISTORY_MATCHING_GROUP_NAME, track_order=True)

dataset = data_group.create_dataset(
dataset_key,
shape=result.shape,
dtype=np.float64,
maxshape=tuple(None for _ in result.shape),
)
result_group.create_dataset(result_dataset_key, data=result)

objective_functions = {
"observed_curve_1": {"trend_id": "trend_1", "property_id": "holdup"},
Expand Down Expand Up @@ -314,9 +314,34 @@ def _create_and_populate_hm_result_file(
"data_index": 1,
},
}
if historic_data_curves:
historic_curves_group = file.create_group(
HISTORY_MATCHING_HISTORIC_DATA_GROUP_NAME
)
for curve_id, curve in historic_data_curves.items():
historic_curves_group.create_dataset(curve_id, data=curve)

historic_curves_meta = [
{
"curve_id": "observed_curve_1",
"curve_name": "curve 1",
"domain_unit": "s",
"image_unit": "m3/m3",
"image_category": "volume fraction",
},
{
"curve_id": "observed_curve_2",
"curve_name": "curve 2",
"domain_unit": "s",
"image_unit": "Pa",
"image_category": "pressure",
},
]
meta_entries = list(fake_meta.values())
for entry in meta_entries:
entry["historic_data_curves_info"] = historic_curves_meta

meta_group.attrs[HISTORY_MATCHING_GROUP_NAME] = json.dumps(fake_meta)
dataset[:] = result

file.swmr_mode = True

Expand All @@ -332,11 +357,16 @@ def hm_probabilistic_results_dir(datadir: Path) -> Path:
probabilistic_result = np.array(
[[0.1, 0.22, 1.0, 0.8, 0.55], [3.0, 6.0, 5.1, 4.7, 6.3]]
)
historic_data_curves = {
"observed_curve_1": np.array([[0.1, 0.5, 0.9], [1.1, 2.2, 3.3]]),
"observed_curve_2": np.array([[1.0, 5.0, 9.0, 3.1], [1.2, 2.3, 3.4, 4.5]]),
}

_create_and_populate_hm_result_file(
result_dir=result_dir,
result=probabilistic_result,
dataset_key=HISTORY_MATCHING_PROBABILISTIC_DSET_NAME,
result_dataset_key=HISTORY_MATCHING_PROBABILISTIC_DSET_NAME,
historic_data_curves=historic_data_curves,
)

return result_dir
Expand All @@ -349,13 +379,39 @@ def hm_deterministic_results_dir(datadir: Path) -> Path:
"""
import numpy as np

result_dir = datadir / "main-HM-deterministic"
deterministic_result = np.array([0.1, 3.2])
historic_data_curves = {
"observed_curve_1": np.array([[0.1, 0.5, 0.9], [1.1, 2.2, 3.3]]),
"observed_curve_2": np.array([[1.0, 5.0, 9.0, 3.1], [1.2, 2.3, 3.4, 4.5]]),
}

_create_and_populate_hm_result_file(
result_dir=result_dir,
result=deterministic_result,
result_dataset_key=HISTORY_MATCHING_DETERMINISTIC_DSET_NAME,
historic_data_curves=historic_data_curves,
)

return result_dir


@pytest.fixture()
def hm_results_dir_without_historic_data(datadir: Path) -> Path:
"""
Create a History Matching Deterministic result folder with a populated HDF5 file in the old
format, i.e. without historic data curves.
"""
import numpy as np

result_dir = datadir / "main-HM-deterministic"
deterministic_result = np.array([0.1, 3.2])

_create_and_populate_hm_result_file(
result_dir=result_dir,
result=deterministic_result,
dataset_key=HISTORY_MATCHING_DETERMINISTIC_DSET_NAME,
result_dataset_key=HISTORY_MATCHING_DETERMINISTIC_DSET_NAME,
historic_data_curves={},
)

return result_dir
Loading

0 comments on commit c6a88dc

Please sign in to comment.