Skip to content

Commit 243af5e

Browse files
authored
Merge pull request #7 from marimo-team/aka/templatize-example
Templatize span comparison example
2 parents bda43b5 + 12adbae commit 243af5e

File tree

2 files changed

+88
-54
lines changed

2 files changed

+88
-54
lines changed

nlp_span_comparison/README.md

+16-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,22 @@
55
This notebook can be used as a template for comparing NLP models that predict
66
spans. Given two models and a sequence of text examples from which to extract
77
spans, the notebook presents the model predictions on each example and
8-
lets you indicate which model yielded the better prediction.
8+
lets you indicate which model yielded the better prediction. Your preferences
9+
are saved (and loaded) from storage, letting you use this as a real tool.
10+
11+
To use this notebook for your own data, just replace the implementations
12+
of the following three functions:
13+
14+
* `load_examples`: Load your own examples (strings) from a file or database.
15+
* `model_a_predictor`: Predict a span for a given example using model A.
16+
* `model_b_predictor`: Predict a span for a given example using model B.
17+
18+
The notebook keeps track of your preferences in a JSON file. To track
19+
preferences in a different way, such as in a database, replace the implementations
20+
of the following two functions:
21+
22+
* `load_choices`
23+
* `save_choices`
924

1025
## Running this notebook
1126

nlp_span_comparison/nlp_span_comparison.py

+72-53
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
import marimo
99

10-
__generated_with = "0.10.12"
10+
__generated_with = "0.10.13"
1111
app = marimo.App()
1212

1313

@@ -17,6 +17,48 @@ def _(mo):
1717
return
1818

1919

20+
@app.cell
21+
def _(textwrap, urllib):
22+
# Modify this function to load your own examples
23+
def load_examples():
24+
hamlet_url = "https://gist.githubusercontent.com/provpup/2fc41686eab7400b796b/raw/b575bd01a58494dfddc1d6429ef0167e709abf9b/hamlet.txt"
25+
26+
with urllib.request.urlopen(hamlet_url) as f:
27+
HAMLET = f.read().decode("utf-8")
28+
29+
return [
30+
textwrap.dedent(block).strip()[:1000]
31+
for block in HAMLET.split("\n\n")
32+
if block
33+
]
34+
35+
return (load_examples,)
36+
37+
38+
@app.cell
39+
def _(random):
40+
# Replace with your predictor for model A
41+
def model_a_predictor(text: str) -> tuple[int, int]:
42+
random.seed(len(text))
43+
start = random.randint(0, len(text) - 2)
44+
end = random.randint(start + 1, len(text) - 1)
45+
return start, end
46+
47+
return (model_a_predictor,)
48+
49+
50+
@app.cell
51+
def _(random):
52+
# Replace with your predictor for model B
53+
def model_b_predictor(text: str) -> tuple[int, int]:
54+
random.seed(len(text) / 2)
55+
start = random.randint(0, len(text) - 2)
56+
end = random.randint(start + 1, len(text) - 1)
57+
return start, end
58+
59+
return (model_b_predictor,)
60+
61+
2062
@app.cell(hide_code=True)
2163
def _(mo):
2264
mo.md(
@@ -28,6 +70,12 @@ def _(mo):
2870
return
2971

3072

73+
@app.cell
74+
def _(load_examples):
75+
EXAMPLES = load_examples()
76+
return (EXAMPLES,)
77+
78+
3179
@app.cell
3280
def _(NUMBER_OF_EXAMPLES, mo):
3381
index = mo.ui.number(
@@ -73,28 +121,32 @@ def _(index):
73121

74122
@app.cell
75123
def _(CHOICES_PATH, get_choices, index, mo, write_choices):
76-
preference = get_choices()[index.value]["model"]
77-
mo.stop(preference is None, mo.md("**Choose the better model**.").center())
78-
write_choices(get_choices(), CHOICES_PATH)
79-
mo.md(f"You prefer **model {preference}**.").center()
80-
return (preference,)
124+
def _():
125+
preference = get_choices()[index.value]["model"]
126+
mo.stop(preference is None, mo.md("**Choose the better model**.").center())
127+
128+
write_choices(get_choices(), CHOICES_PATH)
129+
return mo.md(f"You prefer **model {preference}**.").center()
130+
131+
_()
132+
return
81133

82134

83135
@app.cell
84136
def _(annotate, mo):
85137
mo.hstack(
86138
[
87-
mo.md(annotate("Model A", [0, len("Model A")], "yellow")),
88-
mo.md(annotate("Model B", [0, len("Model B")], "lightblue")),
139+
annotate("Model A", [0, len("Model A")], "yellow"),
140+
annotate("Model B", [0, len("Model B")], "lightblue"),
89141
],
90142
justify="space-around",
91143
)
92144
return
93145

94146

95147
@app.cell
96-
def _(CHOICES_PATH, PARAGRAPHS, load_choices, mo):
97-
get_choices, set_choices = mo.state(load_choices(CHOICES_PATH, len(PARAGRAPHS)))
148+
def _(CHOICES_PATH, EXAMPLES, load_choices, mo):
149+
get_choices, set_choices = mo.state(load_choices(CHOICES_PATH, len(EXAMPLES)))
98150
return get_choices, set_choices
99151

100152

@@ -122,13 +174,13 @@ def _(index, mo, set_choices):
122174

123175

124176
@app.cell
125-
def _(PARAGRAPHS, SPANS, annotate, index, mo):
126-
model_A_prediction = mo.md(
127-
annotate(PARAGRAPHS[index.value], SPANS[index.value][0], color="yellow")
128-
)
177+
def _(EXAMPLES, annotate, index, model_a_predictor, model_b_predictor):
178+
_example = EXAMPLES[index.value]
179+
180+
model_A_prediction = annotate(_example, model_a_predictor(_example), color="yellow")
129181

130-
model_B_prediction = mo.md(
131-
annotate(PARAGRAPHS[index.value], SPANS[index.value][1], color="lightblue")
182+
model_B_prediction = annotate(
183+
_example, model_b_predictor(_example), color="lightblue"
132184
)
133185
return model_A_prediction, model_B_prediction
134186

@@ -174,34 +226,10 @@ def write_choices(choices, path):
174226

175227

176228
@app.cell
177-
def _(PARAGRAPHS, random):
178-
random.seed(0)
179-
180-
def predict_spans(text):
181-
first = [random.randint(0, len(text) - 2)]
182-
first.append(random.randint(first[0] + 1, len(text) - 1))
183-
second = [random.randint(0, len(text) - 2)]
184-
second.append(random.randint(second[0] + 1, len(text) - 1))
185-
186-
return first, second
187-
188-
SPANS = [predict_spans(p) for p in PARAGRAPHS]
189-
return SPANS, predict_spans
190-
191-
192-
@app.cell
193-
def _(HAMLET, textwrap):
194-
PARAGRAPHS = [
195-
textwrap.dedent(block).strip()[:1000] for block in HAMLET.split("\n\n") if block
196-
]
197-
return (PARAGRAPHS,)
198-
199-
200-
@app.cell
201-
def _():
229+
def _(mo):
202230
def annotate(text, span, color):
203231
mark_start = f"<mark style='background-color:{color}'>"
204-
return (
232+
return mo.md(
205233
text[: span[0]]
206234
+ mark_start
207235
+ text[span[0] : span[1]]
@@ -213,20 +241,11 @@ def annotate(text, span, color):
213241

214242

215243
@app.cell
216-
def _(PARAGRAPHS):
217-
NUMBER_OF_EXAMPLES = len(PARAGRAPHS)
244+
def _(EXAMPLES):
245+
NUMBER_OF_EXAMPLES = len(EXAMPLES)
218246
return (NUMBER_OF_EXAMPLES,)
219247

220248

221-
@app.cell
222-
def _(urllib):
223-
_hamlet_url = "https://gist.githubusercontent.com/provpup/2fc41686eab7400b796b/raw/b575bd01a58494dfddc1d6429ef0167e709abf9b/hamlet.txt"
224-
225-
with urllib.request.urlopen(_hamlet_url) as f:
226-
HAMLET = f.read().decode("utf-8")
227-
return HAMLET, f
228-
229-
230249
@app.cell
231250
def _():
232251
import marimo as mo

0 commit comments

Comments
 (0)