From ee035273338cabdba62c1532dbfcd541b60907f4 Mon Sep 17 00:00:00 2001 From: Christian Poli Date: Thu, 14 Nov 2024 11:42:46 +0100 Subject: [PATCH] add quantiles, fixes, improvements --- ipyprogressivis/widgets/chaining/__init__.py | 4 +- ipyprogressivis/widgets/chaining/heatmap.py | 60 +++++++--- ipyprogressivis/widgets/chaining/quantiles.py | 88 ++++++++++++++ .../widgets/chaining/range_query_2d.py | 107 +++++++++++++----- ipyprogressivis/widgets/chaining/utils.py | 32 +++++- 5 files changed, 239 insertions(+), 52 deletions(-) create mode 100644 ipyprogressivis/widgets/chaining/quantiles.py diff --git a/ipyprogressivis/widgets/chaining/__init__.py b/ipyprogressivis/widgets/chaining/__init__.py index 82fedab..a544174 100644 --- a/ipyprogressivis/widgets/chaining/__init__.py +++ b/ipyprogressivis/widgets/chaining/__init__.py @@ -16,6 +16,7 @@ from .any_vega import AnyVegaW from .code_cell import CodeCellW from .range_query_2d import RangeQuery2DW +from .quantiles import QuantilesW __all__ = [ "Constructor", "DescStatsW", @@ -32,5 +33,6 @@ "HeatmapW", "AnyVegaW", "CodeCellW", - "RangeQuery2DW" + "RangeQuery2DW", + "QuantilesW" ] diff --git a/ipyprogressivis/widgets/chaining/heatmap.py b/ipyprogressivis/widgets/chaining/heatmap.py index 515d200..d32d78d 100644 --- a/ipyprogressivis/widgets/chaining/heatmap.py +++ b/ipyprogressivis/widgets/chaining/heatmap.py @@ -5,8 +5,6 @@ VBoxTyped, TypedBase, needs_dtypes, - amend_last_record, - get_recording_state, replay_next, runner, ) @@ -14,6 +12,7 @@ from progressivis.core.api import Module from progressivis.vis.heatmap import Heatmap from progressivis.stats.api import Histogram2D, Min, Max +from progressivis import Quantiles from typing import Any as AnyType WidgetType = AnyType @@ -22,11 +21,27 @@ DIM = 512 +def make_float( + description: str = "", disabled: bool = False, value=0.0 +) -> ipw.BoundedFloatText: + return ipw.BoundedFloatText( + value=value, + min=0.0, + max=1.0, + step=0.001, + description=description, + disabled=disabled, + layout={"width": "initial"}, + ) + + class HeatmapW(VBoxTyped): class Typed(TypedBase): choice_x: ipw.Dropdown choice_y: ipw.Dropdown - freeze_ck: ipw.Checkbox + # freeze_ck: ipw.Checkbox + min_q: ipw.BoundedFloatText | ipw.Label + max_q: ipw.BoundedFloatText | ipw.Label start_btn: ipw.Button image: ipw.Image | ipw.Label @@ -34,6 +49,7 @@ def __init__(self) -> None: super().__init__() self.column_x: str = "" self.column_y: str = "" + self.has_quantiles: bool = False def obs_columns(self, change: dict[str, AnyType]) -> None: if self.child.choice_x.value and self.child.choice_y.value: @@ -69,11 +85,14 @@ def initialize(self) -> None: # layout={"width": "initial"}, ) self.child.choice_y.observe(self.obs_columns, "value") - self.child.image = ipw.Label() - is_rec = get_recording_state() - self.child.freeze_ck = ipw.Checkbox( - description="Freeze", value=is_rec, disabled=(not is_rec) + self.has_quantiles = isinstance(self.input_module, Quantiles) + self.child.min_q = ( + make_float("Min:", value=0.03) if self.has_quantiles else ipw.Label() ) + self.child.max_q = ( + make_float("Max:", value=0.97) if self.has_quantiles else ipw.Label() + ) + self.child.image = ipw.Label() self.child.start_btn = make_button( "Start", cb=self._start_btn_cb, disabled=True ) @@ -82,8 +101,8 @@ def initialize(self) -> None: def _start_btn_cb(self, btn: ipw.Button) -> None: assert self.column_x and self.column_y xy = dict(X=self.column_x, Y=self.column_y) - if self.child.freeze_ck.value: - amend_last_record({"frozen": xy}) + # if self.child.freeze_ck.value: + # amend_last_record({"frozen": xy}) self.init_heatmap(xy) btn.disabled = True @@ -93,17 +112,24 @@ def init_heatmap(self, ctx) -> None: print("XY", ctx) self.child.image = ipw.Image(value=b"\x00", width=DIM, height=DIM) s = self.input_module.scheduler() - query = self.input_module + query = quantiles = self.input_module with s: histogram2d = Histogram2D(col_x, col_y, xbins=DIM, ybins=DIM, scheduler=s) # Connect the module to the csv results and the min,max bounds to rescale - histogram2d.input.table = query.output.result - min_ = Min(scheduler=s) - min_.input.table = query.output.result[col_x, col_y] - max_ = Max(scheduler=s) - max_.input.table = query.output.result[col_x, col_y] - histogram2d.input.min = min_.output.result - histogram2d.input.max = max_.output.result + if self.has_quantiles: + histogram2d.input.table = quantiles.output.table + histogram2d.input.min = quantiles.output.result[self.child.min_q.value] + histogram2d.input.max = quantiles.output.result[self.child.max_q.value] + else: + histogram2d.input.table = query.output.result + min_ = Min(scheduler=s) + min_.input.table = query.output.result[col_x, col_y] + max_ = Max(scheduler=s) + max_.input.table = query.output.result[col_x, col_y] + histogram2d.input.min = min_.output.result + histogram2d.input.max = max_.output.result + # histogram2d.input.min = query.output.min + # histogram2d.input.max = query.output.max # Create a module to create an heatmap image from the histogram2d heatmap = Heatmap(scheduler=s) # Connect it to the histogram2d diff --git a/ipyprogressivis/widgets/chaining/quantiles.py b/ipyprogressivis/widgets/chaining/quantiles.py new file mode 100644 index 0000000..7090e97 --- /dev/null +++ b/ipyprogressivis/widgets/chaining/quantiles.py @@ -0,0 +1,88 @@ +# type: ignore +from .utils import ( + make_button, + stage_register, + VBoxTyped, + TypedBase, + amend_last_record, + is_recording, + runner, + needs_dtypes, +) +import ipywidgets as ipw +from progressivis import ( + Quantiles +) +from progressivis.core.api import Sink +from typing import Any as AnyType + +WidgetType = AnyType + + +class QuantilesW(VBoxTyped): + class Typed(TypedBase): + selection: ipw.SelectMultiple + freeze_ck: ipw.Checkbox + start_btn: ipw.Button + + @needs_dtypes + def initialize(self) -> None: + self.output_dtypes = self.dtypes + self.col_types = {k: str(t) for (k, t) in self.dtypes.items()} + self.col_typed_names = {f"{n}:{t}": (n, t) for (n, t) in self.col_types.items()} + num_cols = [ + (col, c) + for (col, (c, t)) in self.col_typed_names.items() + if (t.startswith("float") or t.startswith("int")) + ] + self.child.selection = ipw.SelectMultiple( + options=num_cols, + value=[], + rows=10, + description="Columns:", + disabled=False, + ) + self.child.selection.observe(self._selection_cb, "value") + is_rec = is_recording() + self.child.freeze_ck = ipw.Checkbox(description="Freeze", + value=is_rec, + disabled=(not is_rec)) + self.child.start_btn = make_button( + "Start", cb=self._start_btn_cb, disabled=True + ) + + @runner + def run(self) -> AnyType: + content = self.frozen_kw + self.output_module = self.init_quantiles(content) + self.output_slot = "result" + + def init_quantiles(self, content: list[str]) -> None: + s = self.input_module.scheduler() + with s: + quantiles = Quantiles(scheduler=s) + quantiles.input.table = self.input_module.output.result[*content] + sink = Sink(scheduler=s) + sink.input.inp = quantiles.output.result + sink2 = Sink(scheduler=s) + sink2.input.inp = quantiles.output.table + self.output_module = quantiles + self.output_slot = "result" + self.output_dtypes = self.dtypes + return quantiles + + def _selection_cb(self, change: AnyType) -> None: + self.child.start_btn.disabled = not change["new"] + + def _start_btn_cb(self, btn: ipw.Button) -> None: + content = self.child.selection.value + amend_last_record({"frozen": content}) + self.output_module = self.init_quantiles(content) + btn.disabled = True + self.child.selection.disabled = True + self.dag_running() + self.make_chaining_box() + self.manage_replay() + + +stage_register["Quantiles"] = QuantilesW diff --git a/ipyprogressivis/widgets/chaining/range_query_2d.py b/ipyprogressivis/widgets/chaining/range_query_2d.py index 572b8a7..ea56e70 100644 --- a/ipyprogressivis/widgets/chaining/range_query_2d.py +++ b/ipyprogressivis/widgets/chaining/range_query_2d.py @@ -4,8 +4,9 @@ stage_register, VBoxTyped, TypedBase, - amend_last_record, - get_recording_state, + amend_nth_record, + get_last_record_index, + is_recording, runner, needs_dtypes, ) @@ -27,15 +28,27 @@ class RangeQuery2DW(VBoxTyped): class Typed(TypedBase): grid: DataFrameGrid - freeze_ck: ipw.Checkbox - start_btn: ipw.Button - unfilter_btn: ipw.Button + buttons: ipw.HBox def __init__(self) -> None: super().__init__() self.column_x: str = "" self.column_y: str = "" self.index: BinningIndexND | None = None + self._saved_settings: dict[str, float] = {} + self._record_index: int | None = None + self._freeze_btn = make_button( + "Freeze", cb=self._freeze_btn_cb, disabled=True + ) + self._unfreeze_btn = make_button( + "Unfreeze", cb=self._unfreeze_btn_cb, disabled=True + ) + self._start_btn = make_button( + "Start", cb=self._start_btn_cb, disabled=True + ) + self._unfilter_btn = make_button( + "Unfilter", cb=self._unfilter_btn_cb, disabled=True + ) @needs_dtypes def initialize(self) -> None: @@ -88,28 +101,28 @@ def initialize(self) -> None: repeat="100px", sizes={"Column": "200px", "Filter": "200px"}, ) - is_rec = get_recording_state() - self.child.freeze_ck = ipw.Checkbox( - description="Freeze", value=is_rec, disabled=(not is_rec) - ) - self.child.start_btn = make_button( - "Start", cb=self._start_btn_cb, disabled=True - ) - self.child.unfilter_btn = make_button( - "Unfilter", cb=self._unfilter_btn_cb, disabled=True - ) - # self.input_module.on_after_run(self.refresh) - # self.init_min_max() + self._freeze_btn.disabled = not is_recording() self.child.grid.observe_col("Column", self.obs_columns) + self.child.buttons = ipw.HBox([self._freeze_btn, self._unfreeze_btn, + self._start_btn, self._unfilter_btn]) + self.reset_buttons() + if is_recording(): + self._record_index = get_last_record_index() + 1 + + def reset_buttons(self) -> None: + self._freeze_btn.disabled = True + self._unfreeze_btn.disabled = True + self._saved_settings = {} def obs_columns(self, change: dict[str, AnyType]) -> None: df = self.child.grid.df if df.loc["X", "Column"].value and df.loc["Y", "Column"].value: - self.child.start_btn.disabled = False + self._start_btn.disabled = False + self._freeze_btn.disabled = not is_recording() self.column_x: str = df.loc["X", "Column"].value.split(":")[0] self.column_y: str = df.loc["Y", "Column"].value.split(":")[0] else: - self.child.start_btn.disabled = True + self._start_btn.disabled = True def grid_update(self, m: Module, run_number: int) -> None: df = self.child.grid.df @@ -140,12 +153,12 @@ def grid_update(self, m: Module, run_number: int) -> None: else: slider_y.min = min_y slider_y.max = max_y - if self.child.unfilter_btn.disabled: + if self._unfilter_btn.disabled: slider_x.value = [min_x, max_x] slider_x.step = (max_x - min_x) / 10 slider_y.value = [min_y, max_y] slider_y.step = (max_y - min_y) / 10 - self.child.unfilter_btn.disabled = False + self._unfilter_btn.disabled = False def observer(_): async def _coro(): @@ -173,8 +186,12 @@ def init_min_max(self, ctx) -> None: # Create a querying module query = RangeQuery2d(column_x=col_x, column_y=col_y, scheduler=s) # Variable modules allow to dynamically modify their values; here, the query ranges - var_min = Variable(name="var_min", scheduler=s) - var_max = Variable(name="var_max", scheduler=s) + init_val_min = ({col_x: ctx.get("x_min"), col_y: ctx.get("y_min")} + if "x_min" in ctx else None) + var_min = Variable(init_val_min, name="var_min", scheduler=s) + init_val_max = ({col_x: ctx.get("x_max"), col_y: ctx.get("y_max")} + if "x_max" in ctx else None) + var_max = Variable(init_val_max, name="var_max", scheduler=s) self.var_min = var_min self.var_max = var_max query.input.lower = var_min.output.result @@ -185,9 +202,9 @@ def init_min_max(self, ctx) -> None: self.index = index sink = Sink(scheduler=s) sink.input.inp = query.output.result - # self.output_module = query - # self.output_slot = "result" - # self.output_dtypes = self.dtypes + self.output_module = query + self.output_slot = "result" + self.output_dtypes = self.dtypes if self.column_x: self.index.on_after_run(self.grid_update) return query @@ -198,12 +215,42 @@ def run(self) -> AnyType: self.output_module = self.init_min_max(content) self.output_slot = "result" + def _save_settings(self) -> None: + df = self.child.grid.df + assert self.column_x and self.column_y + slider_x = df.loc["X", "Filter"] + slider_y = df.loc["Y", "Filter"] + x_min, x_max = slider_x.value + y_min, y_max = slider_y.value + self._saved_settings = dict( + x_min=x_min, x_max=x_max, + y_min=y_min, y_max=y_max + ) + + def _freeze_btn_cb(self, btn: ipw.Button) -> None: + self.child.buttons.children[1].disabled = False + assert self.column_x and self.column_y + self._save_settings() + content = dict(X=self.column_x, Y=self.column_y) + if self._saved_settings: + content = dict(**content, **self._saved_settings) + i = self._record_index + assert i is not None + amend_nth_record(i, {"frozen": content}) + + def _unfreeze_btn_cb(self, btn: ipw.Button) -> None: + self._saved_settings = {} + self.child.buttons.children[1].disabled = True + def _start_btn_cb(self, btn: ipw.Button) -> None: assert self.column_x and self.column_y - xy = dict(X=self.column_x, Y=self.column_y) - if self.child.freeze_ck.value: - amend_last_record({"frozen": xy}) - self.init_min_max(xy) + content = dict(X=self.column_x, Y=self.column_y) + if self._saved_settings: + content = dict(**content, **self._saved_settings) + i = get_last_record_index() + assert i is not None + amend_nth_record(i, {"frozen": content}) + self.init_min_max(content) btn.disabled = True self.dag_running() self.make_chaining_box() diff --git a/ipyprogressivis/widgets/chaining/utils.py b/ipyprogressivis/widgets/chaining/utils.py index e61cf35..d88d9a3 100644 --- a/ipyprogressivis/widgets/chaining/utils.py +++ b/ipyprogressivis/widgets/chaining/utils.py @@ -321,14 +321,20 @@ def add_to_record(self, content: Dict[str, AnyType]) -> None: ) labcommand("progressivis:set_backup", backup=self.tape) - def amend_last_record(self, content: Dict[str, AnyType]) -> None: + def amend_nth_record(self, nth: int, content: Dict[str, AnyType]) -> None: unpacked = bunpack(self.tape) - current = b642json(unpacked[-1]) + current = b642json(unpacked[nth]) current.update(content) - unpacked[-1] = json2b64(current) + unpacked[nth] = json2b64(current) self.tape = ";".join(unpacked) labcommand("progressivis:set_backup", backup=self.tape) + def amend_last_record(self, content: Dict[str, AnyType]) -> None: + self.amend_nth_record(-1, content) + + def get_last_record_index(self) -> int: + return len(bunpack(self.tape)) - 1 + def get_recorder() -> Recorder: return cast(Recorder, PARAMS.get("recorder")) @@ -348,6 +354,20 @@ def amend_last_record(content: Dict[str, AnyType]) -> None: rec.amend_last_record(content) +def amend_nth_record(i: int, content: Dict[str, AnyType]) -> None: + rec = get_recorder() + if rec is None: + return + rec.amend_nth_record(i, content) + + +def get_last_record_index() -> int | None: + rec = get_recorder() + if rec is None: + return None + return rec.get_last_record_index() + + def reset_recorder(previous: str = "", init_val: str = "") -> None: if previous: PARAMS["previous_recorder"] = Recorder(previous) @@ -683,6 +703,10 @@ def get_recording_state() -> bool: return recording_state +def is_recording() -> bool: + return recording_state + + def set_recording_state(val: bool) -> None: global recording_state recording_state = val @@ -808,7 +832,7 @@ def _make_progress_bar(self) -> ipw.VBox: def _make_chaining_box(self: ChainingProtocol) -> ipw.Box: sel = ipw.Dropdown( - options=[""] + list(stage_register.keys()), + options=[""] + list(sorted(stage_register.keys())), value="", description="Next stage", disabled=False,