Merge pull request #7 from marimo-team/aka/templatize-example

akshayka · web-flow · commit 243af5e73e47 · 2025-01-15T14:26:51.000-08:00
Templatize span comparison example
diff --git a/nlp_span_comparison/README.md b/nlp_span_comparison/README.md
@@ -5,7 +5,22 @@
 This notebook can be used as a template for comparing NLP models that predict
 spans. Given two models and a sequence of text examples from which to extract
 spans, the notebook presents the model predictions on each example and
-lets you indicate which model yielded the better prediction.
+lets you indicate which model yielded the better prediction. Your preferences
+are saved (and loaded) from storage, letting you use this as a real tool.
+
+To use this notebook for your own data, just replace the implementations
+of the following three functions:
+
+* `load_examples`: Load your own examples (strings) from a file or database.
+* `model_a_predictor`: Predict a span for a given example using model A.
+* `model_b_predictor`: Predict a span for a given example using model B.
+
+The notebook keeps track of your preferences in a JSON file. To track
+preferences in a different way, such as in a database, replace the implementations
+of the following two functions:
+
+* `load_choices`
+* `save_choices`
 
 ## Running this notebook
 
diff --git a/nlp_span_comparison/nlp_span_comparison.py b/nlp_span_comparison/nlp_span_comparison.py
@@ -7,7 +7,7 @@
 
 import marimo
 
-__generated_with = "0.10.12"
+__generated_with = "0.10.13"
 app = marimo.App()
 
 
@@ -17,6 +17,48 @@ def _(mo):
     return
 
 
+@app.cell
+def _(textwrap, urllib):
+    # Modify this function to load your own examples
+    def load_examples():
+        hamlet_url = "https://gist.githubusercontent.com/provpup/2fc41686eab7400b796b/raw/b575bd01a58494dfddc1d6429ef0167e709abf9b/hamlet.txt"
+
+        with urllib.request.urlopen(hamlet_url) as f:
+            HAMLET = f.read().decode("utf-8")
+
+        return [
+            textwrap.dedent(block).strip()[:1000]
+            for block in HAMLET.split("\n\n")
+            if block
+        ]
+
+    return (load_examples,)
+
+
+@app.cell
+def _(random):
+    # Replace with your predictor for model A
+    def model_a_predictor(text: str) -> tuple[int, int]:
+        random.seed(len(text))
+        start = random.randint(0, len(text) - 2)
+        end = random.randint(start + 1, len(text) - 1)
+        return start, end
+
+    return (model_a_predictor,)
+
+
+@app.cell
+def _(random):
+    # Replace with your predictor for model B
+    def model_b_predictor(text: str) -> tuple[int, int]:
+        random.seed(len(text) / 2)
+        start = random.randint(0, len(text) - 2)
+        end = random.randint(start + 1, len(text) - 1)
+        return start, end
+
+    return (model_b_predictor,)
+
+
 @app.cell(hide_code=True)
 def _(mo):
     mo.md(
@@ -28,6 +70,12 @@ def _(mo):
     return
 
 
+@app.cell
+def _(load_examples):
+    EXAMPLES = load_examples()
+    return (EXAMPLES,)
+
+
 @app.cell
 def _(NUMBER_OF_EXAMPLES, mo):
     index = mo.ui.number(
@@ -73,28 +121,32 @@ def _(index):
 
 @app.cell
 def _(CHOICES_PATH, get_choices, index, mo, write_choices):
-    preference = get_choices()[index.value]["model"]
-    mo.stop(preference is None, mo.md("**Choose the better model**.").center())
-    write_choices(get_choices(), CHOICES_PATH)
-    mo.md(f"You prefer **model {preference}**.").center()
-    return (preference,)
+    def _():
+        preference = get_choices()[index.value]["model"]
+        mo.stop(preference is None, mo.md("**Choose the better model**.").center())
+
+        write_choices(get_choices(), CHOICES_PATH)
+        return mo.md(f"You prefer **model {preference}**.").center()
+
+    _()
+    return
 
 
 @app.cell
 def _(annotate, mo):
     mo.hstack(
         [
-            mo.md(annotate("Model A", [0, len("Model A")], "yellow")),
-            mo.md(annotate("Model B", [0, len("Model B")], "lightblue")),
+            annotate("Model A", [0, len("Model A")], "yellow"),
+            annotate("Model B", [0, len("Model B")], "lightblue"),
         ],
         justify="space-around",
     )
     return
 
 
 @app.cell
-def _(CHOICES_PATH, PARAGRAPHS, load_choices, mo):
-    get_choices, set_choices = mo.state(load_choices(CHOICES_PATH, len(PARAGRAPHS)))
+def _(CHOICES_PATH, EXAMPLES, load_choices, mo):
+    get_choices, set_choices = mo.state(load_choices(CHOICES_PATH, len(EXAMPLES)))
     return get_choices, set_choices
 
 
@@ -122,13 +174,13 @@ def _(index, mo, set_choices):
 
 
 @app.cell
-def _(PARAGRAPHS, SPANS, annotate, index, mo):
-    model_A_prediction = mo.md(
-        annotate(PARAGRAPHS[index.value], SPANS[index.value][0], color="yellow")
-    )
+def _(EXAMPLES, annotate, index, model_a_predictor, model_b_predictor):
+    _example = EXAMPLES[index.value]
+
+    model_A_prediction = annotate(_example, model_a_predictor(_example), color="yellow")
 
-    model_B_prediction = mo.md(
-        annotate(PARAGRAPHS[index.value], SPANS[index.value][1], color="lightblue")
+    model_B_prediction = annotate(
+        _example, model_b_predictor(_example), color="lightblue"
     )
     return model_A_prediction, model_B_prediction
 
@@ -174,34 +226,10 @@ def write_choices(choices, path):
 
 
 @app.cell
-def _(PARAGRAPHS, random):
-    random.seed(0)
-
-    def predict_spans(text):
-        first = [random.randint(0, len(text) - 2)]
-        first.append(random.randint(first[0] + 1, len(text) - 1))
-        second = [random.randint(0, len(text) - 2)]
-        second.append(random.randint(second[0] + 1, len(text) - 1))
-
-        return first, second
-
-    SPANS = [predict_spans(p) for p in PARAGRAPHS]
-    return SPANS, predict_spans
-
-
-@app.cell
-def _(HAMLET, textwrap):
-    PARAGRAPHS = [
-        textwrap.dedent(block).strip()[:1000] for block in HAMLET.split("\n\n") if block
-    ]
-    return (PARAGRAPHS,)
-
-
-@app.cell
-def _():
+def _(mo):
     def annotate(text, span, color):
         mark_start = f"<mark style='background-color:{color}'>"
-        return (
+        return mo.md(
             text[: span[0]]
             + mark_start
             + text[span[0] : span[1]]
@@ -213,20 +241,11 @@ def annotate(text, span, color):
 
 
 @app.cell
-def _(PARAGRAPHS):
-    NUMBER_OF_EXAMPLES = len(PARAGRAPHS)
+def _(EXAMPLES):
+    NUMBER_OF_EXAMPLES = len(EXAMPLES)
     return (NUMBER_OF_EXAMPLES,)
 
 
-@app.cell
-def _(urllib):
-    _hamlet_url = "https://gist.githubusercontent.com/provpup/2fc41686eab7400b796b/raw/b575bd01a58494dfddc1d6429ef0167e709abf9b/hamlet.txt"
-
-    with urllib.request.urlopen(_hamlet_url) as f:
-        HAMLET = f.read().decode("utf-8")
-    return HAMLET, f
-
-
 @app.cell
 def _():
     import marimo as mo