Skip to content

Commit

Permalink
Add summary method (e.g. 'mean', 'sum') to database
Browse files Browse the repository at this point in the history
  • Loading branch information
takluyver committed Jan 15, 2024
1 parent 2a93333 commit cff3a3a
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 4 deletions.
5 changes: 3 additions & 2 deletions damnit/backend/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
CREATE TABLE IF NOT EXISTS run_info(proposal, run, start_time, added_at);
CREATE UNIQUE INDEX IF NOT EXISTS proposal_run ON run_info (proposal, run);
CREATE TABLE IF NOT EXISTS run_variables(proposal, run, name, version, value, timestamp, max_diff, provenance, summary_type);
CREATE TABLE IF NOT EXISTS run_variables(proposal, run, name, version, value, timestamp, max_diff, provenance, summary_type, summary_method);
CREATE UNIQUE INDEX IF NOT EXISTS variable_version ON run_variables (proposal, run, name, version);
-- These are dummy views that will be overwritten later, but they should at least
Expand Down Expand Up @@ -48,6 +48,7 @@ class ReducedData:
"""
value: Any
max_diff: float = None
summary_method: str = ''


class BlobTypes(Enum):
Expand Down Expand Up @@ -226,7 +227,7 @@ def set_variable(self, proposal: int, run: int, name: str, reduced):
variable["version"] = 1 # if latest_version is None else latest_version + 1

# These columns should match those in the run_variables table
cols = ["proposal", "run", "name", "version", "value", "timestamp", "max_diff", "provenance"]
cols = ["proposal", "run", "name", "version", "value", "timestamp", "max_diff", "provenance", "summary_method"]
col_list = ", ".join(cols)
col_values = ", ".join([f":{col}" for col in cols])
col_updates = ", ".join([f"{col} = :{col}" for col in cols])
Expand Down
7 changes: 5 additions & 2 deletions damnit/backend/extract_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,8 +166,11 @@ def get_dset_value(ds):

with h5py.File(h5_path, 'r') as f:
return {
name: ReducedData(get_dset_value(dset),
dset.attrs.get("max_diff", np.array(None)).item())
name: ReducedData(
get_dset_value(dset),
max_diff=dset.attrs.get("max_diff", np.array(None)).item(),
summary_method=dset.attrs.get("summary_method", "")
)
for name, dset in f['.reduced'].items()
}

Expand Down
4 changes: 4 additions & 0 deletions damnit/ctxsupport/ctxrunner.py
Original file line number Diff line number Diff line change
Expand Up @@ -462,6 +462,10 @@ def save_hdf5(self, hdf5_path, reduced_only=False):
and data.ndim == 1 and data.shape[0] > 1:
reduced_ds.attrs["max_diff"] = abs(np.nanmax(data) - np.nanmin(data))

var_obj = ctx_vars.get(name)
if var_obj is not None:
reduced_ds.attrs['summary_method'] = var_obj.summary or ''

for name, obj in xarray_dsets:
# HDF5 doesn't allow slashes in names :(
if isinstance(obj, xr.DataArray) and obj.name is not None and "/" in obj.name:
Expand Down

0 comments on commit cff3a3a

Please sign in to comment.