diff --git a/examples/docker/chat-with-csv-solara/Dockerfile b/examples/docker/chat-with-csv-solara/Dockerfile new file mode 100644 index 00000000..6c90608c --- /dev/null +++ b/examples/docker/chat-with-csv-solara/Dockerfile @@ -0,0 +1,10 @@ +FROM python:3.11 + +COPY app.py app.py +COPY chat.py chat.py +COPY static/ static/ +RUN pip install git+https://github.com/ploomber/jupysql.git@master +RUN pip install requests solara openai pandas duckdb duckdb-engine matplotlib + + +ENTRYPOINT ["solara", "run", "app.py", "--host=0.0.0.0", "--port=80"] \ No newline at end of file diff --git a/examples/docker/chat-with-csv-solara/README.md b/examples/docker/chat-with-csv-solara/README.md new file mode 100644 index 00000000..733cec23 --- /dev/null +++ b/examples/docker/chat-with-csv-solara/README.md @@ -0,0 +1,24 @@ +# Data querying and visualisation App + +This query and data visualisation application is designed to provide a user-friendly chatbot interface to interact with your data, making data exploration and analysis a breeze. + + +## Getting Started + +To get started with this app, follow these steps: + +1. Login to your [Ploomber Cloud](https://ploomber.io/) account. + +2. Follow the [steps](https://docs.cloud.ploomber.io/en/latest/apps/solara.html) for deploying a Solara application and upload the `app.zip` file provided in the example. Ensure to add your own `openai` API Key in `app.py` file. + +## How to use + +1. **Dataset**: Click the `SAMPLE DATASET` button to load a sample csv file, or upload your own content by dragging a file to the drop area. You may also clear the loaded data by clicking the `Clear Dataset` button. + +2. **Number of preview rows**: Input the desired number of preview rows to be displayed. + +3. **Interaction**: You may ask the chatbot natural language queries like : `top 20 rows of table`, `unique values of column with counts`, etc. + +4. **Data visualisation**: Visualize your data on the fly. Currently, the app supports histogram and box plot on a specific column, e.g., `histogram on column`. + +5. **Export Results**: The app allows you to export the charts, or query results. \ No newline at end of file diff --git a/examples/docker/chat-with-csv-solara/app.py b/examples/docker/chat-with-csv-solara/app.py new file mode 100644 index 00000000..39e284f5 --- /dev/null +++ b/examples/docker/chat-with-csv-solara/app.py @@ -0,0 +1,380 @@ +import uuid +import requests +from functools import partial + +import openai +import solara +import solara.lab +from solara.components.file_drop import FileDrop + +from sql import inspect +from sql.run import run +from sqlalchemy import create_engine +from sql.connection import SQLAlchemyConnection +from sql.magic import SqlMagic, load_ipython_extension +from IPython.core.interactiveshell import InteractiveShell +from sql.plot import boxplot, histogram +from sqlalchemy.exc import ProgrammingError + +from chat import * + +from matplotlib import pyplot as plt + +plt.switch_backend("agg") + +css = """ + .main { + width: 100%; + height: 100%; + max-width: 1200px; + margin: auto; + padding: 1em; + } + + #app > div > div:nth-child(2) > div:nth-child(2) { + display: none; +} +""" + +openai.api_key = "YOUR_API_KEY" + +prompt_template = """ +This is the schema for the my_data table: + +{} + +I'll start prompting you and I want you to return SQL code. + +If you're asked to plot a histogram, you can return: %sqlplot histogram NAME +If you're asked to plot a boxplot, you can return: %sqlplot boxplot NAME + +And replace NAME with the column name, do not include the table name +""" + + +def gen_name(): + return str(uuid.uuid4())[:8] + ".csv" + + +def load_data(name): + run.run_statements(conn, "drop table if exists my_data", sqlmagic) + run.run_statements( + conn, f"create table my_data as (select * from '{name}')", sqlmagic + ) + cols = inspect.get_columns("my_data") + return cols + + +def delete_data(): + run.run_statements(conn, "drop table if exists my_data", sqlmagic) + + +ip = InteractiveShell() + +sqlmagic = SqlMagic(shell=ip) +sqlmagic.feedback = 1 +sqlmagic.autopandas = True +load_ipython_extension(ip) + +conn = SQLAlchemyConnection(create_engine("duckdb://"), config=sqlmagic) + + +class State: + initial_prompt = solara.reactive("") + sample_data_loaded = solara.reactive(False) + upload_data = solara.reactive(False) + upload_data_error = solara.reactive("") + results = solara.reactive(20) + input = solara.reactive("") + loading_data = solara.reactive(False) + + @staticmethod + def load_sample(): + State.reset() + name = gen_name() + State.loading_data.value = True + url = ( + "https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv" + ) + response = requests.get(url) + if response.status_code == 200: + with open(name, "wb") as f: + f.write(response.content) + cols = load_data(name) + State.sample_data_loaded.value = True + State.loading_data.value = False + State.initial_prompt.value = prompt_template.format(cols) + else: + solara.Warning("Failed to fetch the data. Check the URL and try again.") + + @staticmethod + def load_from_file(file): + if not file["name"].endswith(".csv"): + State.upload_data_error.value = "Only csv files are supported" + return + State.reset() + name = gen_name() + State.loading_data.value = True + try: + df = pd.read_csv(file["file_obj"]) + df.columns = df.columns.str.strip() + df.columns = df.columns.str.replace(" ", "_") + df.to_csv(name, index=False) + cols = load_data(name) + State.upload_data.value = True + State.loading_data.value = False + State.initial_prompt.value = prompt_template.format(cols) + except Exception as e: + State.upload_data_error.value = str(e) + return + State.upload_data_error.value = "" + + @staticmethod + def reset(): + State.sample_data_loaded.value = False + State.upload_data.value = False + delete_data() + State.initial_prompt.value = "" + State.upload_data_error.value = "" + + @staticmethod + def chat_with_gpt3(prompts): + response = openai.ChatCompletion.create( + model="gpt-3.5-turbo", + messages=[ + {"role": "system", "content": State.initial_prompt.value}, + {"role": "user", "content": "Show me the first 5 rows"}, + {"role": "assistant", "content": "SELECT * FROM my_data LIMIT 5"}, + ] + + [{"role": prompt.role, "content": prompt.content} for prompt in prompts], + temperature=0.1, + stream=True, + ) + + total = "" + for chunk in response: + part = chunk["choices"][0]["delta"].get("content", "") + total += part + yield total + + +@solara.component +def Chat() -> None: + solara.Style( + """ + .chat-input { + max-width: 800px; + }) + """ + ) + + messages, set_messages = solara.use_state( + [ + Message( + role="assistant", + content=f"Welcome. Please post your queries!", + df=None, + fig=None, + ) + ] + ) + input, set_input = solara.use_state("") + + def ask_chatgpt(): + input = State.input.value + _messages = messages + [Message(role="user", content=input, df=None, fig=None)] + user_input = input + set_input("") + State.input.value = "" + set_messages(_messages) + if State.initial_prompt.value: + final = None + for command in State.chat_with_gpt3( + [Message(role="user", content=user_input, df=None, fig=None)] + ): + final = command + + if final.startswith("%sqlplot"): + try: + _, name, column = final.split(" ") + except Exception as e: + error_message = ( + "Sorry, we couldn't run your query on the data. " + "Please ensure you specify a relevant column." + ) + set_messages( + _messages + + [ + Message( + role="assistant", + content=error_message, + df=None, + fig=None, + ) + ] + ) + return + + fig = Figure() + ax = fig.subplots() + + fn_map = {"histogram": partial(histogram, bins=50), "boxplot": boxplot} + + fn = fn_map[name] + try: + ax = fn("my_data", column, ax=ax) + set_messages( + _messages + + [Message(role="assistant", content="", df=None, fig=fig)] + ) + except Exception as e: + set_messages( + _messages + + [ + Message( + role="assistant", + content="Please pass relevant columns", + df=None, + fig=None, + ) + ] + ) + else: + error = "Sorry, we couldn't run your query on the data" + try: + query_result = run.run_statements(conn, final, sqlmagic) + set_messages( + _messages + + [ + Message( + role="assistant", content="", df=query_result, fig=None + ) + ] + ) + except ProgrammingError as e: + set_messages( + _messages + + [Message(role="assistant", content=error, df=None, fig=None)] + ) + except Exception as e: + set_messages( + _messages + + [Message(role="assistant", content=error, df=None, fig=None)] + ) + + else: + set_messages( + _messages + + [ + Message( + role="assistant", + content="Please load some data first!", + df=None, + fig=None, + ) + ] + ) + + with solara.VBox(): + for message in messages: + ChatBox(message) + + with solara.Row(justify="center"): + with solara.HBox(align_items="center", classes=["chat-input"]): + solara.InputText(label="Query", value=State.input, continuous_update=False) + + if State.input.value: + ask_chatgpt() + + +@solara.component +def Page(): + initial_prompt = State.initial_prompt.value + sample_data_loaded = State.sample_data_loaded.value + upload_data = State.upload_data.value + upload_data_error = State.upload_data_error.value + results = State.results.value + + with solara.AppBarTitle(): + solara.Text("Data Querying and Visualisation App") + + with solara.Card(title="About", elevation=6, style="background-color: #f5f5f5;"): + solara.Markdown( + """ + Interact with your data using natural language. + + Examples:
+ - show me the unique values of column {column name}
+ - create a histogram of {column name}
+ - create a boxplot of {column name}""" + ) + + with solara.Sidebar(): + with solara.Card("Controls", margin=0, elevation=0): + with solara.Column(): + with solara.Row(): + solara.Button( + "Sample dataset", + color="primary", + text=True, + outlined=True, + on_click=State.load_sample, + ) + solara.Button( + "Clear dataset", + color="primary", + text=True, + outlined=True, + on_click=State.reset, + ) + FileDrop( + on_file=State.load_from_file, + on_total_progress=lambda *args: None, + label="Drag a .csv file here", + ) + if State.loading_data.value: + with solara.Div(): + solara.Text("Loading csv...") + solara.ProgressLinear(True) + if initial_prompt: + solara.InputInt( + "Number of preview rows", + value=State.results, + continuous_update=True, + ) + + solara.Markdown("Hosted in [Ploomber Cloud](https://ploomber.io/)") + + if sample_data_loaded: + solara.Info("Sample data is loaded") + sql_output = run.run_statements( + conn, f"select * from my_data limit {results}", sqlmagic + ) + solara.DataFrame(sql_output, items_per_page=10) + + if upload_data: + solara.Info("Data is successfully uploaded") + sql_output = run.run_statements( + conn, f"select * from my_data limit {results}", sqlmagic + ) + solara.DataFrame(sql_output, items_per_page=10) + + if upload_data_error: + solara.Error(f"Error uploading data: {upload_data_error}") + + if initial_prompt == "": + solara.Info("No data loaded") + + solara.Style(css) + with solara.VBox(classes=["main"]): + solara.HTML( + tag="h3", style="margin: auto;", unsafe_innerHTML="Chat with your data" + ) + + Chat() + + +@solara.component +def Layout(children): + route, routes = solara.use_route() + return solara.AppLayout(children=children) diff --git a/examples/docker/chat-with-csv-solara/app.zip b/examples/docker/chat-with-csv-solara/app.zip new file mode 100644 index 00000000..be5cb320 Binary files /dev/null and b/examples/docker/chat-with-csv-solara/app.zip differ diff --git a/examples/docker/chat-with-csv-solara/chat.py b/examples/docker/chat-with-csv-solara/chat.py new file mode 100644 index 00000000..db03c744 --- /dev/null +++ b/examples/docker/chat-with-csv-solara/chat.py @@ -0,0 +1,91 @@ +import io +import tempfile +from pathlib import Path +from dataclasses import dataclass + +import pandas as pd +import solara as sl +from matplotlib.figure import Figure +from matplotlib import pyplot as plt + +plt.switch_backend("agg") + + +chatbox_css = """ +.message { + max-width: 450px; + width: 100%; +} + +.user-message, .user-message > * { + background-color: #f0f0f0 !important; +} + +.assistant-message, .assistant-message > * { + background-color: #9ab2e9 !important; +} + +.avatar { + width: 50px; + height: 50px; + border-radius: 50%; + border: 2px solid transparent; + overflow: hidden; + display: flex; +} + +.avatar img { + width: 100%; + height: 100%; + object-fit: cover; +} +""" + + +@dataclass +class Message: + role: str + content: str + df: pd.DataFrame + fig: Figure + + +def ChatBox(message: Message) -> None: + sl.Style(chatbox_css) + + align = "start" if message.role == "assistant" else "end" + with sl.Column(align=align): + with sl.Card(classes=["message", f"{message.role}-message"]): + if message.content: + sl.Markdown(message.content) + elif message.df is not None: + with sl.Card(): + sl.DataFrame(message.df) + with sl.Card(): + sl.FileDownload( + message.df.to_csv(index=False), + filename="data.csv", + label="Download file", + ) + elif message.fig is not None: + with sl.Card(): + sl.FigureMatplotlib(message.fig) + with sl.Card(): + buf = io.BytesIO() + message.fig.savefig(buf, format="jpg") + fp = tempfile.NamedTemporaryFile() + with open(f"{fp.name}.jpg", "wb") as ff: + ff.write(buf.getvalue()) + buf.close() + file_object = sl.use_memo(lambda: open(f"{fp.name}.jpg", "rb"), []) + sl.FileDownload( + file_object, mime_type="image/jpeg", close_file=False + ) + + # Image reference: https://www.flaticon.com/free-icons/bot; + # https://www.flaticon.com/free-icons/use + + with sl.HBox(align_items="center"): + image_path = Path(f"static/{message.role}-logo.png") + sl.Image(str(image_path), classes=["avatar"]) + sl.Text(message.role.capitalize()) diff --git a/examples/docker/chat-with-csv-solara/static/assistant-logo.png b/examples/docker/chat-with-csv-solara/static/assistant-logo.png new file mode 100644 index 00000000..7fc877fd Binary files /dev/null and b/examples/docker/chat-with-csv-solara/static/assistant-logo.png differ diff --git a/examples/docker/chat-with-csv-solara/static/user-logo.png b/examples/docker/chat-with-csv-solara/static/user-logo.png new file mode 100644 index 00000000..f33a440d Binary files /dev/null and b/examples/docker/chat-with-csv-solara/static/user-logo.png differ