From 534a4542ce831886f1c318660899e72faa511d8b Mon Sep 17 00:00:00 2001 From: Thomas Kluyver Date: Mon, 15 Jan 2024 15:04:48 +0000 Subject: [PATCH] Add summary method (e.g. 'mean', 'sum') to database --- damnit/backend/db.py | 5 +++-- damnit/backend/extract_data.py | 7 +++++-- damnit/ctxsupport/ctxrunner.py | 4 ++++ 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/damnit/backend/db.py b/damnit/backend/db.py index 015508ae..f302882f 100644 --- a/damnit/backend/db.py +++ b/damnit/backend/db.py @@ -19,7 +19,7 @@ CREATE TABLE IF NOT EXISTS run_info(proposal, run, start_time, added_at); CREATE UNIQUE INDEX IF NOT EXISTS proposal_run ON run_info (proposal, run); -CREATE TABLE IF NOT EXISTS run_variables(proposal, run, name, version, value, timestamp, max_diff, provenance, summary_type); +CREATE TABLE IF NOT EXISTS run_variables(proposal, run, name, version, value, timestamp, max_diff, provenance, summary_type, summary_method); CREATE UNIQUE INDEX IF NOT EXISTS variable_version ON run_variables (proposal, run, name, version); -- These are dummy views that will be overwritten later, but they should at least @@ -48,6 +48,7 @@ class ReducedData: """ value: Any max_diff: float = None + summary_method: str = '' class BlobTypes(Enum): @@ -226,7 +227,7 @@ def set_variable(self, proposal: int, run: int, name: str, reduced): variable["version"] = 1 # if latest_version is None else latest_version + 1 # These columns should match those in the run_variables table - cols = ["proposal", "run", "name", "version", "value", "timestamp", "max_diff", "provenance"] + cols = ["proposal", "run", "name", "version", "value", "timestamp", "max_diff", "provenance", "summary_method"] col_list = ", ".join(cols) col_values = ", ".join([f":{col}" for col in cols]) col_updates = ", ".join([f"{col} = :{col}" for col in cols]) diff --git a/damnit/backend/extract_data.py b/damnit/backend/extract_data.py index 5b75c0d8..45a1e452 100644 --- a/damnit/backend/extract_data.py +++ b/damnit/backend/extract_data.py @@ -166,8 +166,11 @@ def get_dset_value(ds): with h5py.File(h5_path, 'r') as f: return { - name: ReducedData(get_dset_value(dset), - dset.attrs.get("max_diff", np.array(None)).item()) + name: ReducedData( + get_dset_value(dset), + max_diff=dset.attrs.get("max_diff", np.array(None)).item(), + summary_method=dset.attrs.get("summary_method", "") + ) for name, dset in f['.reduced'].items() } diff --git a/damnit/ctxsupport/ctxrunner.py b/damnit/ctxsupport/ctxrunner.py index 5b8af46d..8aa0b9d3 100644 --- a/damnit/ctxsupport/ctxrunner.py +++ b/damnit/ctxsupport/ctxrunner.py @@ -462,6 +462,10 @@ def save_hdf5(self, hdf5_path, reduced_only=False): and data.ndim == 1 and data.shape[0] > 1: reduced_ds.attrs["max_diff"] = abs(np.nanmax(data) - np.nanmin(data)) + var_obj = ctx_vars.get(name) + if var_obj is not None: + reduced_ds.attrs['summary_method'] = var_obj.summary or '' + for name, obj in xarray_dsets: # HDF5 doesn't allow slashes in names :( if isinstance(obj, xr.DataArray) and obj.name is not None and "/" in obj.name: