From 534a4542ce831886f1c318660899e72faa511d8b Mon Sep 17 00:00:00 2001
From: Thomas Kluyver <thomas@kluyver.me.uk>
Date: Mon, 15 Jan 2024 15:04:48 +0000
Subject: [PATCH] Add summary method (e.g. 'mean', 'sum') to database

---
 damnit/backend/db.py           | 5 +++--
 damnit/backend/extract_data.py | 7 +++++--
 damnit/ctxsupport/ctxrunner.py | 4 ++++
 3 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/damnit/backend/db.py b/damnit/backend/db.py
index 015508ae..f302882f 100644
--- a/damnit/backend/db.py
+++ b/damnit/backend/db.py
@@ -19,7 +19,7 @@
 CREATE TABLE IF NOT EXISTS run_info(proposal, run, start_time, added_at);
 CREATE UNIQUE INDEX IF NOT EXISTS proposal_run ON run_info (proposal, run);
 
-CREATE TABLE IF NOT EXISTS run_variables(proposal, run, name, version, value, timestamp, max_diff, provenance, summary_type);
+CREATE TABLE IF NOT EXISTS run_variables(proposal, run, name, version, value, timestamp, max_diff, provenance, summary_type, summary_method);
 CREATE UNIQUE INDEX IF NOT EXISTS variable_version ON run_variables (proposal, run, name, version);
 
 -- These are dummy views that will be overwritten later, but they should at least
@@ -48,6 +48,7 @@ class ReducedData:
     """
     value: Any
     max_diff: float = None
+    summary_method: str = ''
 
 
 class BlobTypes(Enum):
@@ -226,7 +227,7 @@ def set_variable(self, proposal: int, run: int, name: str, reduced):
         variable["version"] = 1 # if latest_version is None else latest_version + 1
 
         # These columns should match those in the run_variables table
-        cols = ["proposal", "run", "name", "version", "value", "timestamp", "max_diff", "provenance"]
+        cols = ["proposal", "run", "name", "version", "value", "timestamp", "max_diff", "provenance", "summary_method"]
         col_list = ", ".join(cols)
         col_values = ", ".join([f":{col}" for col in cols])
         col_updates = ", ".join([f"{col} = :{col}" for col in cols])
diff --git a/damnit/backend/extract_data.py b/damnit/backend/extract_data.py
index 5b75c0d8..45a1e452 100644
--- a/damnit/backend/extract_data.py
+++ b/damnit/backend/extract_data.py
@@ -166,8 +166,11 @@ def get_dset_value(ds):
 
     with h5py.File(h5_path, 'r') as f:
         return {
-            name: ReducedData(get_dset_value(dset),
-                              dset.attrs.get("max_diff", np.array(None)).item())
+            name: ReducedData(
+                get_dset_value(dset),
+                max_diff=dset.attrs.get("max_diff", np.array(None)).item(),
+                summary_method=dset.attrs.get("summary_method", "")
+            )
             for name, dset in f['.reduced'].items()
         }
 
diff --git a/damnit/ctxsupport/ctxrunner.py b/damnit/ctxsupport/ctxrunner.py
index 5b8af46d..8aa0b9d3 100644
--- a/damnit/ctxsupport/ctxrunner.py
+++ b/damnit/ctxsupport/ctxrunner.py
@@ -462,6 +462,10 @@ def save_hdf5(self, hdf5_path, reduced_only=False):
                    and data.ndim == 1 and data.shape[0] > 1:
                     reduced_ds.attrs["max_diff"] = abs(np.nanmax(data) - np.nanmin(data))
 
+                var_obj = ctx_vars.get(name)
+                if var_obj is not None:
+                    reduced_ds.attrs['summary_method'] = var_obj.summary or ''
+
         for name, obj in xarray_dsets:
             # HDF5 doesn't allow slashes in names :(
             if isinstance(obj, xr.DataArray) and obj.name is not None and "/" in obj.name: