Skip to content

Commit

Permalink
add quantiles, fixes, improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
xtianpoli committed Nov 14, 2024
1 parent 13ea783 commit ee03527
Show file tree
Hide file tree
Showing 5 changed files with 239 additions and 52 deletions.
4 changes: 3 additions & 1 deletion ipyprogressivis/widgets/chaining/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from .any_vega import AnyVegaW
from .code_cell import CodeCellW
from .range_query_2d import RangeQuery2DW
from .quantiles import QuantilesW
__all__ = [
"Constructor",
"DescStatsW",
Expand All @@ -32,5 +33,6 @@
"HeatmapW",
"AnyVegaW",
"CodeCellW",
"RangeQuery2DW"
"RangeQuery2DW",
"QuantilesW"
]
60 changes: 43 additions & 17 deletions ipyprogressivis/widgets/chaining/heatmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,14 @@
VBoxTyped,
TypedBase,
needs_dtypes,
amend_last_record,
get_recording_state,
replay_next,
runner,
)
import ipywidgets as ipw
from progressivis.core.api import Module
from progressivis.vis.heatmap import Heatmap
from progressivis.stats.api import Histogram2D, Min, Max
from progressivis import Quantiles
from typing import Any as AnyType

WidgetType = AnyType
Expand All @@ -22,18 +21,35 @@
DIM = 512


def make_float(
description: str = "", disabled: bool = False, value=0.0
) -> ipw.BoundedFloatText:
return ipw.BoundedFloatText(
value=value,
min=0.0,
max=1.0,
step=0.001,
description=description,
disabled=disabled,
layout={"width": "initial"},
)


class HeatmapW(VBoxTyped):
class Typed(TypedBase):
choice_x: ipw.Dropdown
choice_y: ipw.Dropdown
freeze_ck: ipw.Checkbox
# freeze_ck: ipw.Checkbox
min_q: ipw.BoundedFloatText | ipw.Label
max_q: ipw.BoundedFloatText | ipw.Label
start_btn: ipw.Button
image: ipw.Image | ipw.Label

def __init__(self) -> None:
super().__init__()
self.column_x: str = ""
self.column_y: str = ""
self.has_quantiles: bool = False

def obs_columns(self, change: dict[str, AnyType]) -> None:
if self.child.choice_x.value and self.child.choice_y.value:
Expand Down Expand Up @@ -69,11 +85,14 @@ def initialize(self) -> None:
# layout={"width": "initial"},
)
self.child.choice_y.observe(self.obs_columns, "value")
self.child.image = ipw.Label()
is_rec = get_recording_state()
self.child.freeze_ck = ipw.Checkbox(
description="Freeze", value=is_rec, disabled=(not is_rec)
self.has_quantiles = isinstance(self.input_module, Quantiles)
self.child.min_q = (
make_float("Min:", value=0.03) if self.has_quantiles else ipw.Label()
)
self.child.max_q = (
make_float("Max:", value=0.97) if self.has_quantiles else ipw.Label()
)
self.child.image = ipw.Label()
self.child.start_btn = make_button(
"Start", cb=self._start_btn_cb, disabled=True
)
Expand All @@ -82,8 +101,8 @@ def initialize(self) -> None:
def _start_btn_cb(self, btn: ipw.Button) -> None:
assert self.column_x and self.column_y
xy = dict(X=self.column_x, Y=self.column_y)
if self.child.freeze_ck.value:
amend_last_record({"frozen": xy})
# if self.child.freeze_ck.value:
# amend_last_record({"frozen": xy})
self.init_heatmap(xy)
btn.disabled = True

Expand All @@ -93,17 +112,24 @@ def init_heatmap(self, ctx) -> None:
print("XY", ctx)
self.child.image = ipw.Image(value=b"\x00", width=DIM, height=DIM)
s = self.input_module.scheduler()
query = self.input_module
query = quantiles = self.input_module
with s:
histogram2d = Histogram2D(col_x, col_y, xbins=DIM, ybins=DIM, scheduler=s)
# Connect the module to the csv results and the min,max bounds to rescale
histogram2d.input.table = query.output.result
min_ = Min(scheduler=s)
min_.input.table = query.output.result[col_x, col_y]
max_ = Max(scheduler=s)
max_.input.table = query.output.result[col_x, col_y]
histogram2d.input.min = min_.output.result
histogram2d.input.max = max_.output.result
if self.has_quantiles:
histogram2d.input.table = quantiles.output.table
histogram2d.input.min = quantiles.output.result[self.child.min_q.value]
histogram2d.input.max = quantiles.output.result[self.child.max_q.value]
else:
histogram2d.input.table = query.output.result
min_ = Min(scheduler=s)
min_.input.table = query.output.result[col_x, col_y]
max_ = Max(scheduler=s)
max_.input.table = query.output.result[col_x, col_y]
histogram2d.input.min = min_.output.result
histogram2d.input.max = max_.output.result
# histogram2d.input.min = query.output.min
# histogram2d.input.max = query.output.max
# Create a module to create an heatmap image from the histogram2d
heatmap = Heatmap(scheduler=s)
# Connect it to the histogram2d
Expand Down
88 changes: 88 additions & 0 deletions ipyprogressivis/widgets/chaining/quantiles.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
# type: ignore
from .utils import (
make_button,
stage_register,
VBoxTyped,
TypedBase,
amend_last_record,
is_recording,
runner,
needs_dtypes,
)
import ipywidgets as ipw
from progressivis import (
Quantiles
)
from progressivis.core.api import Sink
from typing import Any as AnyType

WidgetType = AnyType


class QuantilesW(VBoxTyped):
class Typed(TypedBase):
selection: ipw.SelectMultiple
freeze_ck: ipw.Checkbox
start_btn: ipw.Button

@needs_dtypes
def initialize(self) -> None:
self.output_dtypes = self.dtypes
self.col_types = {k: str(t) for (k, t) in self.dtypes.items()}
self.col_typed_names = {f"{n}:{t}": (n, t) for (n, t) in self.col_types.items()}
num_cols = [
(col, c)
for (col, (c, t)) in self.col_typed_names.items()
if (t.startswith("float") or t.startswith("int"))
]
self.child.selection = ipw.SelectMultiple(
options=num_cols,
value=[],
rows=10,
description="Columns:",
disabled=False,
)
self.child.selection.observe(self._selection_cb, "value")
is_rec = is_recording()
self.child.freeze_ck = ipw.Checkbox(description="Freeze",
value=is_rec,
disabled=(not is_rec))
self.child.start_btn = make_button(
"Start", cb=self._start_btn_cb, disabled=True
)

@runner
def run(self) -> AnyType:
content = self.frozen_kw
self.output_module = self.init_quantiles(content)
self.output_slot = "result"

def init_quantiles(self, content: list[str]) -> None:
s = self.input_module.scheduler()
with s:
quantiles = Quantiles(scheduler=s)
quantiles.input.table = self.input_module.output.result[*content]
sink = Sink(scheduler=s)
sink.input.inp = quantiles.output.result
sink2 = Sink(scheduler=s)
sink2.input.inp = quantiles.output.table
self.output_module = quantiles
self.output_slot = "result"
self.output_dtypes = self.dtypes
return quantiles

def _selection_cb(self, change: AnyType) -> None:
self.child.start_btn.disabled = not change["new"]

def _start_btn_cb(self, btn: ipw.Button) -> None:
content = self.child.selection.value
amend_last_record({"frozen": content})
self.output_module = self.init_quantiles(content)
btn.disabled = True
self.child.selection.disabled = True
self.dag_running()
self.make_chaining_box()
self.manage_replay()


stage_register["Quantiles"] = QuantilesW
107 changes: 77 additions & 30 deletions ipyprogressivis/widgets/chaining/range_query_2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@
stage_register,
VBoxTyped,
TypedBase,
amend_last_record,
get_recording_state,
amend_nth_record,
get_last_record_index,
is_recording,
runner,
needs_dtypes,
)
Expand All @@ -27,15 +28,27 @@
class RangeQuery2DW(VBoxTyped):
class Typed(TypedBase):
grid: DataFrameGrid
freeze_ck: ipw.Checkbox
start_btn: ipw.Button
unfilter_btn: ipw.Button
buttons: ipw.HBox

def __init__(self) -> None:
super().__init__()
self.column_x: str = ""
self.column_y: str = ""
self.index: BinningIndexND | None = None
self._saved_settings: dict[str, float] = {}
self._record_index: int | None = None
self._freeze_btn = make_button(
"Freeze", cb=self._freeze_btn_cb, disabled=True
)
self._unfreeze_btn = make_button(
"Unfreeze", cb=self._unfreeze_btn_cb, disabled=True
)
self._start_btn = make_button(
"Start", cb=self._start_btn_cb, disabled=True
)
self._unfilter_btn = make_button(
"Unfilter", cb=self._unfilter_btn_cb, disabled=True
)

@needs_dtypes
def initialize(self) -> None:
Expand Down Expand Up @@ -88,28 +101,28 @@ def initialize(self) -> None:
repeat="100px",
sizes={"Column": "200px", "Filter": "200px"},
)
is_rec = get_recording_state()
self.child.freeze_ck = ipw.Checkbox(
description="Freeze", value=is_rec, disabled=(not is_rec)
)
self.child.start_btn = make_button(
"Start", cb=self._start_btn_cb, disabled=True
)
self.child.unfilter_btn = make_button(
"Unfilter", cb=self._unfilter_btn_cb, disabled=True
)
# self.input_module.on_after_run(self.refresh)
# self.init_min_max()
self._freeze_btn.disabled = not is_recording()
self.child.grid.observe_col("Column", self.obs_columns)
self.child.buttons = ipw.HBox([self._freeze_btn, self._unfreeze_btn,
self._start_btn, self._unfilter_btn])
self.reset_buttons()
if is_recording():
self._record_index = get_last_record_index() + 1

def reset_buttons(self) -> None:
self._freeze_btn.disabled = True
self._unfreeze_btn.disabled = True
self._saved_settings = {}

def obs_columns(self, change: dict[str, AnyType]) -> None:
df = self.child.grid.df
if df.loc["X", "Column"].value and df.loc["Y", "Column"].value:
self.child.start_btn.disabled = False
self._start_btn.disabled = False
self._freeze_btn.disabled = not is_recording()
self.column_x: str = df.loc["X", "Column"].value.split(":")[0]
self.column_y: str = df.loc["Y", "Column"].value.split(":")[0]
else:
self.child.start_btn.disabled = True
self._start_btn.disabled = True

def grid_update(self, m: Module, run_number: int) -> None:
df = self.child.grid.df
Expand Down Expand Up @@ -140,12 +153,12 @@ def grid_update(self, m: Module, run_number: int) -> None:
else:
slider_y.min = min_y
slider_y.max = max_y
if self.child.unfilter_btn.disabled:
if self._unfilter_btn.disabled:
slider_x.value = [min_x, max_x]
slider_x.step = (max_x - min_x) / 10
slider_y.value = [min_y, max_y]
slider_y.step = (max_y - min_y) / 10
self.child.unfilter_btn.disabled = False
self._unfilter_btn.disabled = False

def observer(_):
async def _coro():
Expand Down Expand Up @@ -173,8 +186,12 @@ def init_min_max(self, ctx) -> None:
# Create a querying module
query = RangeQuery2d(column_x=col_x, column_y=col_y, scheduler=s)
# Variable modules allow to dynamically modify their values; here, the query ranges
var_min = Variable(name="var_min", scheduler=s)
var_max = Variable(name="var_max", scheduler=s)
init_val_min = ({col_x: ctx.get("x_min"), col_y: ctx.get("y_min")}
if "x_min" in ctx else None)
var_min = Variable(init_val_min, name="var_min", scheduler=s)
init_val_max = ({col_x: ctx.get("x_max"), col_y: ctx.get("y_max")}
if "x_max" in ctx else None)
var_max = Variable(init_val_max, name="var_max", scheduler=s)
self.var_min = var_min
self.var_max = var_max
query.input.lower = var_min.output.result
Expand All @@ -185,9 +202,9 @@ def init_min_max(self, ctx) -> None:
self.index = index
sink = Sink(scheduler=s)
sink.input.inp = query.output.result
# self.output_module = query
# self.output_slot = "result"
# self.output_dtypes = self.dtypes
self.output_module = query
self.output_slot = "result"
self.output_dtypes = self.dtypes
if self.column_x:
self.index.on_after_run(self.grid_update)
return query
Expand All @@ -198,12 +215,42 @@ def run(self) -> AnyType:
self.output_module = self.init_min_max(content)
self.output_slot = "result"

def _save_settings(self) -> None:
df = self.child.grid.df
assert self.column_x and self.column_y
slider_x = df.loc["X", "Filter"]
slider_y = df.loc["Y", "Filter"]
x_min, x_max = slider_x.value
y_min, y_max = slider_y.value
self._saved_settings = dict(
x_min=x_min, x_max=x_max,
y_min=y_min, y_max=y_max
)

def _freeze_btn_cb(self, btn: ipw.Button) -> None:
self.child.buttons.children[1].disabled = False
assert self.column_x and self.column_y
self._save_settings()
content = dict(X=self.column_x, Y=self.column_y)
if self._saved_settings:
content = dict(**content, **self._saved_settings)
i = self._record_index
assert i is not None
amend_nth_record(i, {"frozen": content})

def _unfreeze_btn_cb(self, btn: ipw.Button) -> None:
self._saved_settings = {}
self.child.buttons.children[1].disabled = True

def _start_btn_cb(self, btn: ipw.Button) -> None:
assert self.column_x and self.column_y
xy = dict(X=self.column_x, Y=self.column_y)
if self.child.freeze_ck.value:
amend_last_record({"frozen": xy})
self.init_min_max(xy)
content = dict(X=self.column_x, Y=self.column_y)
if self._saved_settings:
content = dict(**content, **self._saved_settings)
i = get_last_record_index()
assert i is not None
amend_nth_record(i, {"frozen": content})
self.init_min_max(content)
btn.disabled = True
self.dag_running()
self.make_chaining_box()
Expand Down
Loading

0 comments on commit ee03527

Please sign in to comment.