pyviz · Dec 2, 2023
diff --git a/‎.gitignore
+2 b/‎.gitignore
+2
diff --git a/‎README.md
+47-1 b/‎README.md
+47-1
diff --git a/‎sampling-dashboard/README.md
+26 b/‎sampling-dashboard/README.md
+26
diff --git a/‎sampling-dashboard/dash/app.py
+86 b/‎sampling-dashboard/dash/app.py
+86
diff --git a/‎sampling-dashboard/dash/nyc-taxi.csv
+1,000 b/‎sampling-dashboard/dash/nyc-taxi.csv
+1,000
diff --git a/‎sampling-dashboard/dash/plots.py
+34 b/‎sampling-dashboard/dash/plots.py
+34
diff --git a/‎sampling-dashboard/gradio/app.py
+64 b/‎sampling-dashboard/gradio/app.py
+64
diff --git a/‎sampling-dashboard/gradio/nyc-taxi.csv
+1,000 b/‎sampling-dashboard/gradio/nyc-taxi.csv
+1,000
diff --git a/‎sampling-dashboard/gradio/plots.py
+34 b/‎sampling-dashboard/gradio/plots.py
+34
diff --git a/‎sampling-dashboard/panel/app.py
+44 b/‎sampling-dashboard/panel/app.py
+44
diff --git a/‎sampling-dashboard/panel/nyc-taxi.csv
+1,000 b/‎sampling-dashboard/panel/nyc-taxi.csv
+1,000
diff --git a/‎sampling-dashboard/panel/plots.py
+34 b/‎sampling-dashboard/panel/plots.py
+34
diff --git a/‎sampling-dashboard/requirements.txt
+9 b/‎sampling-dashboard/requirements.txt
+9
diff --git a/‎sampling-dashboard/shiny/app.py
+42 b/‎sampling-dashboard/shiny/app.py
+42
diff --git a/‎sampling-dashboard/shiny/nyc-taxi.csv
+1,000 b/‎sampling-dashboard/shiny/nyc-taxi.csv
+1,000
diff --git a/‎sampling-dashboard/shiny/plots.py
+34 b/‎sampling-dashboard/shiny/plots.py
+34
diff --git a/‎sampling-dashboard/streamlit/app.py
+42 b/‎sampling-dashboard/streamlit/app.py
+42
diff --git a/‎sampling-dashboard/streamlit/nyc-taxi.csv
+1,000 b/‎sampling-dashboard/streamlit/nyc-taxi.csv
+1,000
diff --git a/‎sampling-dashboard/streamlit/plots.py
+34 b/‎sampling-dashboard/streamlit/plots.py
+34
@@ -158,3 +158,5 @@ cython_debug/
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
+
+.DS_Store
@@ -1 +1,47 @@
-# python-data-app-challenge
+# Data App Comparison
+
+This repo illustrates some differences between various web application frameworks.
+The purpose is to provide minimal, concrete examples of how to accomplish common development tasks in various python web application frameworks, and to use those examples to help people learn their APIs.
+The frameworks we have so far are:
+
+-   [Dash](https://plotly.com/dash/)
+
+-   [Panel](https://panel.holoviz.org/reference/index.html)
+
+-   [Shiny](https://shiny.posit.co/)
+
+-   [Streamlit](https://streamlit.io/)
+
+## Running the examples
+
+Navigate to the example folder and install the dependencies in a virtual environment with
+
+``` bash
+pip install -r requirements.txt
+```
+
+| Framework | Command                |
+|-----------|------------------------|
+| Dash      | `python app.py`        |
+| Panel     | `panel serve app.py`   |
+| Streamlit | `streamlit run app.py` |
+| Shiny     | `shiny run app.py`     |
+
+# Submitting a new problem
+
+Please raise an issue to discuss and clarify the problem statement, and then submit a pull request with the problem statement in a README file.
+Ideally problems should have the following qualities:
+
+-   Problems should be small and clear
+
+-   Successful apps should stand alone and not require external APIs or system setup
+
+-   Problems should focus on the capabilities of the web framework
+
+-   For inspriation see [7guis](https://eugenkiss.github.io/7guis/) or [TodoMVC](https://todomvc.com/)
+
+# Submitting a new solution
+
+We want only one solution per framework, but please submit PRs with either solutions from a new framework, or improvements to an the existing solution.
+Your solution should focus on the framework's capabilities, and ideally have fairly few dependencies.
+For example it's not a good idea to include a lot of JavaScript code in your Streamlit solution because that will tell the reader more about how to do something in JavaScript than it will about what they can do in Streamlit.
@@ -0,0 +1,26 @@
+# Problem description
+
+This exercise illustrates the common problem of sampling from a dataset and interrogating that dataset with matplotlib plots.
+You could imaging the sample being taken from a database, or larger than memory dataset, but in this case it's based on a small sample of the NYC Taxi data.
+
+## Requirements
+
+1.  The application should have the following components:
+
+    -   A proportion input which selects the proportion of the dataset to sample
+
+    -   A log-scale input which selects whether the tip plot is on a log scale
+
+    -   A plot showing the relationship between tips and prices
+
+    -   A plot showing a histogram of prices
+
+2.  The app should use matplotlib plots (which can be found in `plots.py`
+
+3.  The histogram plot should not rerender if the log-scale selector is changed
+
+4.  The sample should only be retaken if the proportion slider changes
+
+5.  Each time the proportion slider changes the app should take a new sample
+
+# 
@@ -0,0 +1,86 @@
+import dash
+import dash_bootstrap_components as dbc
+import pandas as pd
+import plotly.express as px
+from dash import Input, Output, dcc, html
+
+app = dash.Dash(external_stylesheets=[dbc.themes.BOOTSTRAP])
+# the style arguments for the sidebar. We use position:fixed and a fixed width
+SIDEBAR_STYLE = {
+    "position": "fixed",
+    "top": 0,
+    "left": 0,
+    "bottom": 0,
+    "width": "16rem",
+    "padding": "2rem 1rem",
+    "background-color": "#f8f9fa",
+}
+
+CONTENT_STYLE = {
+    "margin-left": "18rem",
+    "margin-right": "2rem",
+    "padding": "2rem 1rem",
+}
+
+sidebar = html.Div(
+    children=[
+        dcc.Input(id="sample", type="number", min=0, max=1, value=0.1, step=0.01),
+        html.Div("Plot scale"),
+        dcc.RadioItems(["Linear", "Log"], id="scale"),
+    ],
+    style=SIDEBAR_STYLE,
+)
+
+content = html.Div(
+    id="page-content",
+    style=CONTENT_STYLE,
+    children=[
+        html.Div(id="max-value", style={"padding-top": "50px"}),
+        dcc.Graph(id="scatter-plot"),
+        dcc.Graph(id="histogram"),
+        dcc.Store(id="sampled-dataset"),
+    ],
+)
+
+app.layout = html.Div([dcc.Location(id="url"), sidebar, content])
+
+
+@app.callback(Output("sampled-dataset", "data"), Input("sample", "value"))
+def cache_dataset(sample):
+    df = pd.read_csv("nyc-taxi.csv")
+    df = df.sample(frac=sample)
+
+    # To cache data in this way we need to seiralize it to json
+    json = df.to_json(date_format="iso", orient="split")
+    return json
+
+
+@app.callback(Output("max-value", "children"), Input("sampled-dataset", "data"))
+def update_max_value(sampled_df):
+    df = pd.read_json(sampled_df, orient="split")
+    return f'First taxi id: {df["taxi_id"].iloc[0]}'
+
+
+@app.callback(
+    Output("scatter-plot", "figure"),
+    Input("sampled-dataset", "data"),
+    Input("scale", "value"),
+)
+def update_scatter(sampled_df, scale):
+    df = pd.read_json(sampled_df, orient="split")
+    scale = scale == "Log"
+    fig = px.scatter(df, x="total_amount", y="tip_amount", log_x=scale, log_y=scale)
+    fig.update_layout(transition_duration=500)
+    return fig
+
+
+@app.callback(Output("histogram", "figure"), Input("sampled-dataset", "data"))
+def update_histogram(sampled_df):
+    df = pd.read_json(sampled_df, orient="split")
+    fig = px.histogram(df, x="total_amount")
+    fig.update_layout(transition_duration=500)
+    return fig
+
+
+if __name__ == "__main__":
+    app.run_server(debug=True)
@@ -0,0 +1,34 @@
+from matplotlib.pyplot import close
+from plotnine import (
+    aes,
+    geom_histogram,
+    geom_point,
+    ggplot,
+    scale_x_log10,
+    scale_y_log10,
+    theme_bw,
+)
+
+
+def plot_tips(sampled_data, log, color="black"):
+    plot = (
+        ggplot(sampled_data, aes("tip_amount", "total_amount"))
+        + geom_point(color=color)
+        + theme_bw()
+    )
+    if log:
+        plot = plot + scale_x_log10() + scale_y_log10()
+    fig = plot.draw()
+    close()
+    return fig
+
+
+def plot_hist(sampled_data, color="black"):
+    plot = (
+        ggplot(sampled_data, aes(x="total_amount"))
+        + geom_histogram(binwidth=5, color=color, fill=color)
+        + theme_bw()
+    )
+    fig = plot.draw()
+    close()
+    return fig
@@ -0,0 +1,64 @@
+import time
+
+from pandas import read_csv
+from plotnine import (
+    aes,
+    geom_histogram,
+    geom_point,
+    ggplot,
+    scale_x_log10,
+    scale_y_log10,
+    theme_bw,
+)
+
+import gradio as gr
+
+taxi = read_csv("nyc-taxi.csv")
+
+
+def sample_data(slider):
+    time.sleep(1)
+    out = taxi.sample(frac=slider)
+
+    return {sampled_data: out}
+
+
+def plot_tips(sampled_data, log):
+    plot = (
+        ggplot(sampled_data, aes("tip_amount", "total_amount"))
+        + geom_point()
+        + theme_bw()
+    )
+    if log:
+        plot = plot + scale_x_log10() + scale_y_log10()
+    return plot.draw()
+
+
+def plot_hist(sampled_data):
+    plot = (
+        ggplot(sampled_data, aes(x="total_amount"))
+        + geom_histogram(binwidth=5)
+        + theme_bw()
+    )
+    return plot.draw()
+
+
+with gr.Blocks() as demo:
+    sampled_data = gr.State(None)
+    with gr.Row():
+        with gr.Column(scale=2):
+            slider = gr.Slider(0, 1, value=0.1, step=0.01)
+            log_scale = gr.Checkbox(label="Log Scale")
+        with gr.Column(scale=10):
+            tip_plot = gr.Plot()
+            hist_plot = gr.Plot()
+
+    slider.change(sample_data, [slider], [sampled_data]).then(
+        plot_tips, [sampled_data, log_scale], [tip_plot]
+    ).then(plot_hist, [sampled_data], [hist_plot])
+
+    log_scale.change(plot_tips, [sampled_data, log_scale], [tip_plot])
+
+
+if __name__ == "__main__":
+    demo.launch()
@@ -0,0 +1,34 @@
+from matplotlib.pyplot import close
+from plotnine import (
+    aes,
+    geom_histogram,
+    geom_point,
+    ggplot,
+    scale_x_log10,
+    scale_y_log10,
+    theme_bw,
+)
+
+
+def plot_tips(sampled_data, log, color="black"):
+    plot = (
+        ggplot(sampled_data, aes("tip_amount", "total_amount"))
+        + geom_point(color=color)
+        + theme_bw()
+    )
+    if log:
+        plot = plot + scale_x_log10() + scale_y_log10()
+    fig = plot.draw()
+    close()
+    return fig
+
+
+def plot_hist(sampled_data, color="black"):
+    plot = (
+        ggplot(sampled_data, aes(x="total_amount"))
+        + geom_histogram(binwidth=5, color=color, fill=color)
+        + theme_bw()
+    )
+    fig = plot.draw()
+    close()
+    return fig
@@ -0,0 +1,44 @@
+from pandas import read_csv
+import panel as pn
+
+from plots import plot_hist, plot_tips
+
+
+def first_taxi(data):
+    if data.empty:
+        return '## First taxi id: *NA*'
+
+    return f'## First taxi id: *{data["taxi_id"].iloc[0]}*'
+
+pn.extension(
+    sizing_mode="stretch_width",
+)
+
+data = pn.state.as_cached(
+    key="nyc-taxi", fn=read_csv, filepath_or_buffer="nyc-taxi.csv"
+)
+plot_hist = pn.cache(plot_hist)
+plot_tips = pn.cache(plot_tips)
+
+sample_input = pn.widgets.FloatSlider(
+    value=0.1, start=0, end=1, step=0.01, name="Sample"
+)
+scale_input = pn.widgets.Checkbox(name="Use Log Scale", margin=(20, 10, 0, 10))
+
+sample_data = pn.bind(data.sample, frac=sample_input)
+
+pn.template.FastListTemplate(
+    site="Panel",
+    title="NYC Taxi Data",
+    sidebar=[
+        "## NYC Taxi Data",
+        sample_input,
+        scale_input,
+    ],
+    main=[
+        pn.bind(first_taxi, sample_data),
+        pn.pane.Matplotlib(pn.bind(plot_tips, sample_data, scale_input), height=600),
+        pn.pane.Matplotlib(pn.bind(plot_hist, sample_data), height=600  ),
+    ],
+    main_max_width="850px",
+).servable()
@@ -0,0 +1,34 @@
+from matplotlib.pyplot import close
+from plotnine import (
+    aes,
+    geom_histogram,
+    geom_point,
+    ggplot,
+    scale_x_log10,
+    scale_y_log10,
+    theme_bw,
+)
+
+
+def plot_tips(sampled_data, log, color="black"):
+    plot = (
+        ggplot(sampled_data, aes("tip_amount", "total_amount"))
+        + geom_point(color=color)
+        + theme_bw()
+    )
+    if log:
+        plot = plot + scale_x_log10() + scale_y_log10()
+    fig = plot.draw()
+    close()
+    return fig
+
+
+def plot_hist(sampled_data, color="black"):
+    plot = (
+        ggplot(sampled_data, aes(x="total_amount"))
+        + geom_histogram(binwidth=5, color=color, fill=color)
+        + theme_bw()
+    )
+    fig = plot.draw()
+    close()
+    return fig
@@ -0,0 +1,9 @@
+dash
+dash_bootstrap_components
+gradio
+pandas
+panel
+plotnine
+ruff
+shiny
+streamlit
@@ -0,0 +1,42 @@
+from pandas import read_csv
+from shiny import App, reactive, render, ui
+
+from plots import plot_hist, plot_tips
+
+app_ui = ui.page_sidebar(
+    ui.sidebar(
+        ui.input_slider("sample", "Sample Size", 0, 1, value=0.1, ticks=False),
+        ui.input_checkbox("log", "Log Scale"),
+    ),
+    ui.h3(ui.output_text("first_taxi_id")),
+    ui.card(ui.output_plot("tip_plot")),
+    ui.card(ui.output_plot("amount_histogram")),
+    title="Shiny",
+)
+
+
+def server(input, output, session):
+    @reactive.Calc
+    def dat():
+        df = read_csv("nyc-taxi.csv")
+        return df
+
+    @reactive.Calc
+    def sampled_dat():
+        return dat().sample(frac=input.sample())
+
+    @render.text
+    def first_taxi_id():
+        return f'Sample ID: {sampled_dat()["taxi_id"].iloc[0]}'
+
+    @render.plot
+    def tip_plot():
+        return plot_tips(sampled_dat(), input.log())
+
+    @output
+    @render.plot
+    def amount_histogram():
+        return plot_hist(sampled_dat())
+
+
+app = App(app_ui, server)
@@ -0,0 +1,34 @@
+from matplotlib.pyplot import close
+from plotnine import (
+    aes,
+    geom_histogram,
+    geom_point,
+    ggplot,
+    scale_x_log10,
+    scale_y_log10,
+    theme_bw,
+)
+
+
+def plot_tips(sampled_data, log, color="black"):
+    plot = (
+        ggplot(sampled_data, aes("tip_amount", "total_amount"))
+        + geom_point(color=color)
+        + theme_bw()
+    )
+    if log:
+        plot = plot + scale_x_log10() + scale_y_log10()
+    fig = plot.draw()
+    close()
+    return fig
+
+
+def plot_hist(sampled_data, color="black"):
+    plot = (
+        ggplot(sampled_data, aes(x="total_amount"))
+        + geom_histogram(binwidth=5, color=color, fill=color)
+        + theme_bw()
+    )
+    fig = plot.draw()
+    close()
+    return fig
@@ -0,0 +1,42 @@
+import streamlit as st
+from pandas import read_csv
+
+from plots import plot_hist, plot_tips
+
+if "count" not in st.session_state:
+    st.session_state.count = 0
+
+
+def increment_counter():
+    st.session_state.count += 1
+
+
+with st.sidebar:
+    sample_ui = st.number_input(
+        "sample", 0.0, 1.0, value=0.1, step=0.01, on_change=increment_counter
+    )
+    log = st.checkbox("Log Scale")
+
+
+@st.cache_data
+def load_data():
+    df = read_csv("nyc-taxi.csv")
+    return df
+
+
+data = load_data()
+
+
+@st.cache_data(max_entries=2)
+def take_sample_busted(df, fraction, counter):
+    return df.copy().sample(frac=fraction)
+
+
+# We need to use this cache busting approach because otherwise the
+# sample will be retrieved from cache instead of taking a new sample each
+# time the sample size changed.
+busted_sample = take_sample_busted(data, sample_ui, st.session_state.count)
+
+st.subheader(f'Sample id: {busted_sample["taxi_id"].iloc[0]}')
+st.pyplot(plot_tips(busted_sample, log))
+st.pyplot(plot_hist(busted_sample))
@@ -0,0 +1,34 @@
+from matplotlib.pyplot import close
+from plotnine import (
+    aes,
+    geom_histogram,
+    geom_point,
+    ggplot,
+    scale_x_log10,
+    scale_y_log10,
+    theme_bw,
+)
+
+
+def plot_tips(sampled_data, log, color="black"):
+    plot = (
+        ggplot(sampled_data, aes("tip_amount", "total_amount"))
+        + geom_point(color=color)
+        + theme_bw()
+    )
+    if log:
+        plot = plot + scale_x_log10() + scale_y_log10()
+    fig = plot.draw()
+    close()
+    return fig
+
+
+def plot_hist(sampled_data, color="black"):
+    plot = (
+        ggplot(sampled_data, aes(x="total_amount"))
+        + geom_histogram(binwidth=5, color=color, fill=color)
+        + theme_bw()
+    )
+    fig = plot.draw()
+    close()
+    return fig