Skip to content

Commit ac631fa

Browse files
committed
feat: add some kanban
1 parent a7c0ca6 commit ac631fa

4 files changed

Lines changed: 601 additions & 46 deletions

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,7 @@ __marimo__/
213213
# Bub
214214
references/
215215
.agents/
216+
.bub/
216217
# Runtime-generated marimo notebooks
217218
insights/dashboard.py
218219
insights/index.py

insights/minimum_dataset_analysis.py

Lines changed: 202 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -3,76 +3,232 @@
33
# dependencies = ["marimo"]
44
# ///
55

6-
"""A compact Iris notebook used as a sample insight for Bubseek."""
6+
"""Iris Dataset Analysis — Full embedded data, English, mixed text and figures."""
7+
# marimo.App (for directory scanner)
78

89
import marimo as mo
910

10-
app = mo.App(width="medium")
11+
app = mo.App(width="full")
1112

1213

1314
@app.cell
1415
def _():
1516
import marimo as mo
1617

17-
iris_rows = [
18-
{"species": "Setosa", "sepal_length": 5.1, "sepal_width": 3.5, "petal_length": 1.4, "petal_width": 0.2},
19-
{"species": "Setosa", "sepal_length": 4.9, "sepal_width": 3.0, "petal_length": 1.4, "petal_width": 0.2},
20-
{"species": "Versicolor", "sepal_length": 7.0, "sepal_width": 3.2, "petal_length": 4.7, "petal_width": 1.4},
21-
{"species": "Versicolor", "sepal_length": 6.4, "sepal_width": 3.2, "petal_length": 4.5, "petal_width": 1.5},
22-
{"species": "Virginica", "sepal_length": 6.3, "sepal_width": 3.3, "petal_length": 6.0, "petal_width": 2.5},
23-
{"species": "Virginica", "sepal_length": 5.8, "sepal_width": 2.7, "petal_length": 5.1, "petal_width": 1.9},
18+
def _row(sl, sw, pl, pw, sp):
19+
return {"sepal_length": sl, "sepal_width": sw, "petal_length": pl, "petal_width": pw, "species": sp}
20+
21+
# Full Iris dataset (150 samples, 50 per species) — embedded, no external fetch
22+
iris_data = [
23+
_row(5.1, 3.5, 1.4, 0.2, "Setosa"), _row(4.9, 3.0, 1.4, 0.2, "Setosa"),
24+
_row(4.7, 3.2, 1.3, 0.2, "Setosa"), _row(4.6, 3.1, 1.5, 0.2, "Setosa"),
25+
_row(5.0, 3.6, 1.4, 0.2, "Setosa"), _row(5.4, 3.9, 1.7, 0.4, "Setosa"),
26+
_row(4.6, 3.4, 1.4, 0.3, "Setosa"), _row(5.0, 3.4, 1.5, 0.2, "Setosa"),
27+
_row(4.4, 2.9, 1.4, 0.2, "Setosa"), _row(4.9, 3.1, 1.5, 0.1, "Setosa"),
28+
_row(5.4, 3.7, 1.5, 0.2, "Setosa"), _row(4.8, 3.4, 1.6, 0.2, "Setosa"),
29+
_row(4.8, 3.0, 1.4, 0.1, "Setosa"), _row(4.3, 3.0, 1.1, 0.1, "Setosa"),
30+
_row(5.8, 4.0, 1.2, 0.2, "Setosa"), _row(5.7, 4.4, 1.5, 0.4, "Setosa"),
31+
_row(5.4, 3.9, 1.3, 0.4, "Setosa"), _row(5.1, 3.5, 1.4, 0.3, "Setosa"),
32+
_row(5.7, 3.8, 1.7, 0.3, "Setosa"), _row(5.1, 3.8, 1.5, 0.3, "Setosa"),
33+
_row(5.4, 3.4, 1.7, 0.2, "Setosa"), _row(5.1, 3.7, 1.5, 0.4, "Setosa"),
34+
_row(4.6, 3.6, 1.0, 0.2, "Setosa"), _row(5.1, 3.3, 1.7, 0.5, "Setosa"),
35+
_row(4.8, 3.4, 1.9, 0.2, "Setosa"), _row(5.0, 3.0, 1.6, 0.2, "Setosa"),
36+
_row(5.0, 3.4, 1.6, 0.4, "Setosa"), _row(5.2, 3.5, 1.5, 0.2, "Setosa"),
37+
_row(5.2, 3.4, 1.4, 0.2, "Setosa"), _row(4.7, 3.2, 1.6, 0.2, "Setosa"),
38+
_row(4.8, 3.1, 1.6, 0.2, "Setosa"), _row(5.4, 3.4, 1.5, 0.4, "Setosa"),
39+
_row(5.2, 4.1, 1.5, 0.1, "Setosa"), _row(5.5, 4.2, 1.4, 0.2, "Setosa"),
40+
_row(4.9, 3.1, 1.5, 0.1, "Setosa"), _row(5.0, 3.2, 1.2, 0.2, "Setosa"),
41+
_row(5.5, 3.5, 1.3, 0.2, "Setosa"), _row(4.9, 3.1, 1.5, 0.1, "Setosa"),
42+
_row(4.4, 3.0, 1.3, 0.2, "Setosa"), _row(5.1, 3.4, 1.5, 0.2, "Setosa"),
43+
_row(5.0, 3.5, 1.3, 0.3, "Setosa"), _row(4.5, 2.3, 1.3, 0.3, "Setosa"),
44+
_row(4.4, 3.2, 1.3, 0.2, "Setosa"), _row(5.0, 3.5, 1.6, 0.6, "Setosa"),
45+
_row(5.1, 3.8, 1.9, 0.4, "Setosa"), _row(4.8, 3.0, 1.4, 0.3, "Setosa"),
46+
_row(5.1, 3.8, 1.6, 0.2, "Setosa"), _row(4.6, 3.2, 1.4, 0.2, "Setosa"),
47+
_row(5.3, 3.7, 1.5, 0.2, "Setosa"), _row(5.0, 3.3, 1.4, 0.2, "Setosa"),
48+
_row(7.0, 3.2, 4.7, 1.4, "Versicolor"), _row(6.4, 3.2, 4.5, 1.5, "Versicolor"),
49+
_row(6.9, 3.1, 4.9, 1.5, "Versicolor"), _row(5.5, 2.3, 4.0, 1.3, "Versicolor"),
50+
_row(6.5, 2.8, 4.6, 1.5, "Versicolor"), _row(5.7, 2.8, 4.5, 1.3, "Versicolor"),
51+
_row(6.3, 3.3, 4.7, 1.6, "Versicolor"), _row(4.9, 2.4, 3.3, 1.0, "Versicolor"),
52+
_row(6.6, 2.9, 4.6, 1.3, "Versicolor"), _row(5.2, 2.7, 3.9, 1.4, "Versicolor"),
53+
_row(5.0, 2.0, 3.5, 1.0, "Versicolor"), _row(5.9, 3.0, 4.2, 1.5, "Versicolor"),
54+
_row(6.0, 2.2, 4.0, 1.0, "Versicolor"), _row(6.1, 2.9, 4.7, 1.4, "Versicolor"),
55+
_row(5.6, 2.9, 3.6, 1.3, "Versicolor"), _row(6.7, 3.1, 4.4, 1.4, "Versicolor"),
56+
_row(5.6, 3.0, 4.5, 1.5, "Versicolor"), _row(5.8, 2.7, 4.1, 1.0, "Versicolor"),
57+
_row(6.2, 2.2, 4.5, 1.5, "Versicolor"), _row(5.6, 2.5, 3.9, 1.1, "Versicolor"),
58+
_row(5.9, 3.2, 4.8, 1.8, "Versicolor"), _row(6.1, 2.8, 4.0, 1.3, "Versicolor"),
59+
_row(6.3, 2.5, 4.9, 1.5, "Versicolor"), _row(6.1, 2.8, 4.7, 1.2, "Versicolor"),
60+
_row(6.4, 2.9, 4.3, 1.3, "Versicolor"), _row(6.6, 3.0, 4.4, 1.4, "Versicolor"),
61+
_row(6.8, 2.8, 4.8, 1.4, "Versicolor"), _row(6.7, 3.0, 5.0, 1.7, "Versicolor"),
62+
_row(6.0, 2.9, 4.5, 1.5, "Versicolor"), _row(5.7, 2.6, 3.5, 1.0, "Versicolor"),
63+
_row(5.5, 2.4, 3.8, 1.1, "Versicolor"), _row(5.5, 2.4, 3.7, 1.0, "Versicolor"),
64+
_row(5.8, 2.7, 3.9, 1.2, "Versicolor"), _row(6.0, 2.7, 5.1, 1.6, "Versicolor"),
65+
_row(5.4, 3.0, 4.5, 1.5, "Versicolor"), _row(6.0, 3.4, 4.5, 1.6, "Versicolor"),
66+
_row(6.7, 3.1, 4.7, 1.5, "Versicolor"), _row(6.3, 2.3, 4.4, 1.3, "Versicolor"),
67+
_row(5.6, 3.0, 4.1, 1.3, "Versicolor"), _row(5.5, 2.5, 4.0, 1.3, "Versicolor"),
68+
_row(5.5, 2.6, 4.4, 1.2, "Versicolor"), _row(6.1, 3.0, 4.6, 1.4, "Versicolor"),
69+
_row(5.8, 2.6, 4.0, 1.2, "Versicolor"), _row(5.0, 2.3, 3.3, 1.0, "Versicolor"),
70+
_row(5.6, 2.7, 4.2, 1.3, "Versicolor"), _row(5.7, 3.0, 4.2, 1.2, "Versicolor"),
71+
_row(5.7, 2.9, 4.2, 1.3, "Versicolor"), _row(6.2, 2.9, 4.3, 1.3, "Versicolor"),
72+
_row(5.1, 2.5, 3.0, 1.1, "Versicolor"), _row(5.7, 2.8, 4.1, 1.3, "Versicolor"),
73+
_row(6.3, 3.3, 6.0, 2.5, "Virginica"), _row(5.8, 2.7, 5.1, 1.9, "Virginica"),
74+
_row(7.1, 3.0, 5.9, 2.1, "Virginica"), _row(6.3, 2.9, 5.6, 1.8, "Virginica"),
75+
_row(6.5, 3.0, 5.8, 2.2, "Virginica"), _row(7.6, 3.0, 6.6, 2.1, "Virginica"),
76+
_row(4.9, 2.5, 4.5, 1.7, "Virginica"), _row(7.3, 2.9, 6.3, 1.8, "Virginica"),
77+
_row(6.7, 2.5, 5.8, 1.8, "Virginica"), _row(7.2, 3.6, 6.1, 2.5, "Virginica"),
78+
_row(6.5, 3.2, 5.1, 2.0, "Virginica"), _row(6.4, 2.7, 5.3, 1.9, "Virginica"),
79+
_row(6.8, 3.0, 5.5, 2.1, "Virginica"), _row(5.7, 2.5, 5.0, 2.0, "Virginica"),
80+
_row(5.8, 2.8, 5.1, 2.4, "Virginica"), _row(6.4, 3.2, 5.3, 2.3, "Virginica"),
81+
_row(6.5, 3.0, 5.5, 1.8, "Virginica"), _row(7.7, 3.8, 6.7, 2.2, "Virginica"),
82+
_row(7.7, 2.6, 6.9, 2.3, "Virginica"), _row(6.0, 2.2, 5.0, 1.5, "Virginica"),
83+
_row(6.9, 3.2, 5.7, 2.3, "Virginica"), _row(5.6, 2.8, 4.9, 2.0, "Virginica"),
84+
_row(7.7, 2.8, 6.7, 2.0, "Virginica"), _row(6.3, 2.7, 4.9, 1.8, "Virginica"),
85+
_row(6.7, 3.3, 5.7, 2.1, "Virginica"), _row(7.2, 3.2, 6.0, 1.8, "Virginica"),
86+
_row(6.2, 2.8, 4.8, 1.8, "Virginica"), _row(6.1, 3.0, 4.9, 1.8, "Virginica"),
87+
_row(6.4, 2.8, 5.6, 2.1, "Virginica"), _row(7.2, 3.0, 5.8, 1.6, "Virginica"),
88+
_row(7.4, 2.8, 6.1, 1.9, "Virginica"), _row(7.9, 3.8, 6.4, 2.0, "Virginica"),
89+
_row(6.4, 2.8, 5.6, 2.2, "Virginica"), _row(6.3, 2.8, 5.1, 1.5, "Virginica"),
90+
_row(6.1, 2.6, 5.6, 1.4, "Virginica"), _row(7.7, 3.0, 6.1, 2.3, "Virginica"),
91+
_row(6.3, 3.4, 5.6, 2.4, "Virginica"), _row(6.4, 3.1, 5.5, 1.8, "Virginica"),
92+
_row(6.0, 3.0, 4.8, 1.8, "Virginica"), _row(6.9, 3.1, 5.4, 2.1, "Virginica"),
93+
_row(6.7, 3.1, 5.6, 2.4, "Virginica"), _row(6.9, 3.1, 5.1, 2.3, "Virginica"),
94+
_row(5.8, 2.7, 5.1, 1.9, "Virginica"), _row(6.8, 3.2, 5.9, 2.3, "Virginica"),
95+
_row(6.7, 3.3, 5.7, 2.5, "Virginica"), _row(6.7, 3.0, 5.2, 2.3, "Virginica"),
96+
_row(6.3, 2.5, 5.0, 1.9, "Virginica"), _row(6.5, 3.0, 5.2, 2.0, "Virginica"),
97+
_row(6.2, 3.4, 5.4, 2.3, "Virginica"), _row(5.9, 3.0, 5.1, 1.8, "Virginica"),
2498
]
25-
return (iris_rows, mo)
99+
return (iris_data, mo)
26100

27101

28102
@app.cell
29-
def _(mo):
30-
title = mo.md("# Iris Snapshot")
31-
title
32-
return (title,)
103+
def _(iris_data, mo):
104+
_n = len(iris_data)
105+
_counts = {}
106+
for _r in iris_data:
107+
_counts[_r["species"]] = _counts.get(_r["species"], 0) + 1
108+
_avgs = {}
109+
for _k in ["sepal_length", "sepal_width", "petal_length", "petal_width"]:
110+
_avgs[_k] = sum(_r[_k] for _r in iris_data) / _n
111+
_max_c = max(_counts.values())
112+
_pal = {"Setosa": "#e11d48", "Versicolor": "#0891b2", "Virginica": "#4f46e5"}
113+
_default_fill = "#64748b"
114+
_lines = []
115+
_y = 28
116+
for _s, _c in _counts.items():
117+
_w = int((_c / _max_c) * 180)
118+
_lines.append(
119+
f'<text x="8" y="{_y}" font-size="12" fill="#334155">{_s}</text>'
120+
f'<rect x="95" y="{_y-12}" rx="4" width="{_w}" height="18" fill="{_pal.get(_s, _default_fill)}"/>'
121+
f'<text x="{100+_w}" y="{_y}" font-size="11" fill="#0f172a">{_c}</text>'
122+
)
123+
_y += 32
124+
_svg_intro = f'<svg width="280" height="{_y+8}" xmlns="http://www.w3.org/2000/svg"><rect width="100%" height="100%" fill="#f8fafc"/><text x="8" y="16" font-size="12" font-weight="bold" fill="#1e293b">Species count</text>{"".join(_lines)}</svg>'
125+
intro = mo.vstack([
126+
mo.md(
127+
"# Iris Dataset Analysis\n\n"
128+
"Classic **Fisher's Iris** dataset: 150 samples (50 per species), 4 numeric features. "
129+
"Data is **fully embedded** in this notebook — no external fetch."
130+
),
131+
mo.hstack([
132+
mo.md(
133+
f"**Dataset overview** \n"
134+
f"Samples: **{_n}** \n"
135+
f"Features: sepal length/width, petal length/width (cm) \n"
136+
f"Species: **{', '.join(f'{s} ({c})' for s, c in _counts.items())}** \n"
137+
f"Overall means: sepal length {_avgs['sepal_length']:.2f}, petal length {_avgs['petal_length']:.2f}."
138+
),
139+
mo.Html(_svg_intro),
140+
], widths=[0.55, 0.45], gap=1.0),
141+
], gap=0.75)
142+
return (intro,)
33143

34144

35145
@app.cell
36-
def _(iris_rows):
37-
species_counts: dict[str, int] = {}
38-
feature_totals = {
39-
"sepal_length": 0.0,
40-
"sepal_width": 0.0,
41-
"petal_length": 0.0,
42-
"petal_width": 0.0,
43-
}
146+
def _(iris_data, mo):
147+
_caption = mo.md(" *Table: first 20 rows of the embedded dataset (150 rows total).*")
148+
data_section = mo.vstack([
149+
mo.md("## Data preview"),
150+
mo.ui.table(iris_data[:20], label="Iris (first 20)", selection=None),
151+
_caption,
152+
], gap=0.5)
153+
return (data_section,)
44154

45-
for row in iris_rows:
46-
species_counts[row["species"]] = species_counts.get(row["species"], 0) + 1
47-
for feature in feature_totals:
48-
feature_totals[feature] += row[feature]
49155

50-
row_count = len(iris_rows)
51-
feature_averages = {feature: total / row_count for feature, total in feature_totals.items()}
52-
return (feature_averages, species_counts)
156+
@app.cell
157+
def _(iris_data, mo):
158+
_colors = {"Setosa": "#e11d48", "Versicolor": "#0891b2", "Virginica": "#4f46e5"}
159+
_pl = [r["petal_length"] for r in iris_data]
160+
_pw = [r["petal_width"] for r in iris_data]
161+
_min_pl, _max_pl = min(_pl), max(_pl)
162+
_min_pw, _max_pw = min(_pw), max(_pw)
163+
164+
def _s(v, lo, hi, a, b):
165+
return a + (v - lo) / (hi - lo) * (b - a) if hi != lo else a
166+
167+
_pts = []
168+
for _r in iris_data:
169+
_x = _s(_r["petal_length"], _min_pl, _max_pl, 50, 420)
170+
_y = _s(_r["petal_width"], _max_pw, _min_pw, 35, 300)
171+
_pts.append(f'<circle cx="{_x:.1f}" cy="{_y:.1f}" r="5" fill="{_colors.get(_r["species"],"#999")}" stroke="white" stroke-width="1" opacity="0.85"/>')
172+
_leg = "".join(
173+
f'<circle cx="{60+_i*130}" cy="318" r="4" fill="{_c}"/><text x="{70+_i*130}" y="322" font-size="10" fill="#334155">{_s}</text>'
174+
for _i, (_s, _c) in enumerate(_colors.items())
175+
)
176+
_svg = f'<svg width="480" height="340" xmlns="http://www.w3.org/2000/svg"><rect width="100%" height="100%" fill="#f8fafc"/><text x="160" y="18" font-size="13" font-weight="bold" fill="#1e293b">Petal length vs width</text><line x1="50" y1="300" x2="430" y2="300" stroke="#cbd5e1" stroke-width="1"/><line x1="50" y1="35" x2="50" y2="300" stroke="#cbd5e1" stroke-width="1"/><text x="50" y="315" font-size="9" fill="#64748b">{_min_pl:.1f}</text><text x="415" y="315" font-size="9" fill="#64748b">{_max_pl:.1f}</text><text x="220" y="332" font-size="10" fill="#334155">Petal length (cm)</text><text x="18" y="300" font-size="9" fill="#64748b">{_min_pw:.1f}</text><text x="18" y="40" font-size="9" fill="#64748b">{_max_pw:.1f}</text>{"".join(_pts)}{_leg}</svg>'
177+
scatter_block = mo.vstack([
178+
mo.md("## Petal dimensions\n\nPetal length and width separate **Setosa** (small petals) from the other two species; **Versicolor** and **Virginica** overlap but remain partly separable."),
179+
mo.Html(_svg),
180+
], gap=0.5)
181+
return (scatter_block,)
53182

54183

55184
@app.cell
56-
def _(feature_averages, mo, species_counts):
57-
summary = mo.md(
58-
"\n".join([
59-
"## Summary",
60-
"",
61-
f"- Samples: **{sum(species_counts.values())}**",
62-
f"- Species: **{', '.join(f'{name}={count}' for name, count in species_counts.items())}**",
63-
f"- Mean sepal length: **{feature_averages['sepal_length']:.2f} cm**",
64-
f"- Mean petal length: **{feature_averages['petal_length']:.2f} cm**",
65-
])
66-
)
67-
summary
68-
return (summary,)
185+
def _(iris_data, mo):
186+
_by_species = {"Setosa": [], "Versicolor": [], "Virginica": []}
187+
for _r in iris_data:
188+
_by_species[_r["species"]].append(_r)
189+
_means = {}
190+
for _sp, _rows in _by_species.items():
191+
if _rows:
192+
_means[_sp] = {f: sum(_x[f] for _x in _rows) / len(_rows) for f in ["sepal_length", "sepal_width", "petal_length", "petal_width"]}
193+
_fnames = ["sepal_length", "sepal_width", "petal_length", "petal_width"]
194+
_labels = ["Sepal length", "Sepal width", "Petal length", "Petal width"]
195+
_cols = {"Setosa": "#e11d48", "Versicolor": "#0891b2", "Virginica": "#4f46e5"}
196+
_bars = []
197+
_x0 = 55
198+
for _i, _f in enumerate(_fnames):
199+
_g = f'<text x="{_x0+_i*105+8}" y="268" font-size="9" fill="#334155">{_labels[_i].replace(" ", "&#8203;")}</text>'
200+
for _j, _sp in enumerate(["Setosa", "Versicolor", "Virginica"]):
201+
_v = _means[_sp][_f]
202+
_ht = int((_v / 8) * 200)
203+
_g += f'<rect x="{_x0+_i*105+_j*22}" y="{248-_ht}" width="18" height="{_ht}" fill="{_cols[_sp]}" rx="2"/>'
204+
_bars.append(_g)
205+
_leg2 = "".join(f'<rect x="{380+_i*65}" y="14" width="10" height="10" fill="{_c}" rx="1"/><text x="{393+_i*65}" y="22" font-size="9" fill="#334155">{_s}</text>' for _i, (_s, _c) in enumerate(_cols.items()))
206+
_svg2 = f'<svg width="480" height="290" xmlns="http://www.w3.org/2000/svg"><rect width="100%" height="100%" fill="#f8fafc"/><text x="140" y="18" font-size="13" font-weight="bold" fill="#1e293b">Mean feature value by species (cm)</text><line x1="45" y1="248" x2="45" y2="48" stroke="#cbd5e1" stroke-width="1"/><line x1="45" y1="248" x2="430" y2="248" stroke="#cbd5e1" stroke-width="1"/>{"".join(_bars)}{_leg2}</svg>'
207+
means_block = mo.vstack([
208+
mo.md("## Feature means by species\n\nAll four features differ across species; petal dimensions show the largest gaps and are most useful for classification."),
209+
mo.Html(_svg2),
210+
], gap=0.5)
211+
return (means_block,)
69212

70213

71214
@app.cell
72-
def _(iris_rows, mo):
73-
table = mo.ui.table(iris_rows, label="Iris rows", selection=None)
74-
table
75-
return (table,)
215+
def _(data_section, intro, means_block, mo, scatter_block):
216+
_summary = mo.md(
217+
"## Summary\n\n"
218+
"- **Setosa** is linearly separable (small petals). \n"
219+
"- **Versicolor** and **Virginica** overlap; classification is harder but still feasible. \n"
220+
"- The dataset is well-suited for teaching classification (e.g. KNN, SVM, decision trees) and exploratory visualization. \n"
221+
"--- \n"
222+
"*Visualizations use inline SVG; the full 150-row dataset is embedded in this notebook.*"
223+
)
224+
page = mo.vstack([
225+
intro,
226+
data_section,
227+
mo.hstack([scatter_block, means_block], widths=[0.5, 0.5], gap=1.0),
228+
_summary,
229+
], gap=1.5)
230+
page
231+
return (page,)
76232

77233

78234
if __name__ == "__main__":

0 commit comments

Comments
 (0)