Skip to content

Commit 97c505b

Browse files
committed
chat with csv
solara notebook voila removed files Python app reverted deleted folder Import changed plots chat based lates version zip readme
1 parent 2bdfdb5 commit 97c505b

File tree

7 files changed

+418
-0
lines changed

7 files changed

+418
-0
lines changed
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
FROM python:3.11
2+
3+
COPY app.py app.py
4+
COPY chat.py chat.py
5+
COPY static/ static/
6+
RUN pip install git+https://github.com/ploomber/jupysql.git@master
7+
RUN pip install requests solara openai pandas duckdb duckdb-engine matplotlib
8+
9+
10+
ENTRYPOINT ["solara", "run", "app.py", "--host=0.0.0.0", "--port=80"]
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# Data querying and visualisation App
2+
3+
This query and data visualisation application is designed to provide a user-friendly chatbot interface to interact with your data, making data exploration and analysis a breeze.
4+
5+
6+
## Getting Started
7+
8+
To get started with this app, follow these steps:
9+
10+
1. Login to your [Ploomber Cloud](https://ploomber.io/) account.
11+
12+
2. Follow the [steps](https://docs.cloud.ploomber.io/en/latest/apps/solara.html) for deploying a Solara application and upload the `app.zip` file provided in the example. Ensure to add your own `openai` API Key in `app.py` file.
13+
14+
## How to use
15+
16+
1. **Dataset**: Click the `SAMPLE DATASET` button to load a sample csv file, or upload your own content by dragging a file to the drop area. You may also clear the loaded data by clicking the `Clear Dataset` button.
17+
18+
2. **Number of preview rows**: Input the desired number of preview rows to be displayed.
19+
20+
3. **Interaction**: You may ask the chatbot natural language queries like : `top 20 rows of table`, `unique values of column with counts`, etc.
21+
22+
4. **Data visualisation**: Visualize your data on the fly. Currently, the app supports histogram and box plot on a specific column, e.g., `histogram on column`.
23+
24+
5. **Export Results**: The app allows you to export the charts, or query results.
Lines changed: 299 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,299 @@
1+
import uuid
2+
import requests
3+
from functools import partial
4+
5+
import openai
6+
import solara
7+
import solara.lab
8+
from solara.components.file_drop import FileDrop
9+
10+
from sql import inspect
11+
from sql.run import run
12+
from sql.connection import ConnectionManager
13+
from sql.magic import SqlMagic, load_ipython_extension
14+
from IPython.core.interactiveshell import InteractiveShell
15+
from sql.plot import boxplot, histogram
16+
from sqlalchemy.exc import ProgrammingError
17+
18+
from chat import *
19+
20+
from matplotlib import pyplot as plt
21+
plt.switch_backend("agg")
22+
23+
css = """
24+
.main {
25+
width: 100%;
26+
height: 100%;
27+
max-width: 1200px;
28+
margin: auto;
29+
padding: 1em;
30+
}
31+
32+
#app > div > div:nth-child(2) > div:nth-child(2) {
33+
display: none;
34+
}
35+
"""
36+
37+
openai.api_key = "YOUR_API_KEY"
38+
39+
prompt_template = """
40+
This is the schema for the my_data table:
41+
42+
{}
43+
44+
I'll start prompting you and I want you to return SQL code.
45+
46+
If you're asked to plot a histogram, you can return: %sqlplot histogram NAME
47+
If you're asked to plot a boxplot, you can return: %sqlplot boxplot NAME
48+
49+
And replace NAME with the column name, do not include the table name
50+
"""
51+
52+
53+
def gen_name():
54+
return str(uuid.uuid4())[:8] + '.csv'
55+
56+
57+
def load_data(name):
58+
run.run_statements(conn, "drop table if exists my_data", sqlmagic)
59+
run.run_statements(conn, f"create table my_data as (select * from '{name}')", sqlmagic)
60+
cols = inspect.get_columns("my_data")
61+
return cols
62+
63+
64+
def delete_data():
65+
run.run_statements(conn, "drop table if exists my_data", sqlmagic)
66+
67+
ip = InteractiveShell()
68+
69+
sqlmagic = SqlMagic(shell=ip)
70+
sqlmagic.feedback = 1
71+
sqlmagic.autopandas = True
72+
load_ipython_extension(ip)
73+
74+
conn = ConnectionManager.set(
75+
"duckdb://",
76+
displaycon=True,
77+
connect_args={},
78+
creator=None,
79+
alias=None,
80+
config=sqlmagic,
81+
)
82+
83+
84+
class State:
85+
initial_prompt = solara.reactive("")
86+
sample_data_loaded = solara.reactive(False)
87+
upload_data = solara.reactive(False)
88+
upload_data_error = solara.reactive("")
89+
results = solara.reactive(20)
90+
input = solara.reactive("")
91+
loading_data = solara.reactive(False)
92+
93+
@staticmethod
94+
def load_sample():
95+
State.reset()
96+
name = gen_name()
97+
State.loading_data.value = True
98+
url = "https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv"
99+
response = requests.get(url)
100+
if response.status_code == 200:
101+
with open(name, "wb") as f:
102+
f.write(response.content)
103+
cols = load_data(name)
104+
State.sample_data_loaded.value = True
105+
State.loading_data.value = False
106+
State.initial_prompt.value = prompt_template.format(cols)
107+
else:
108+
solara.Warning("Failed to fetch the data. Check the URL and try again.")
109+
110+
@staticmethod
111+
def load_from_file(file):
112+
if not file["name"].endswith(".csv"):
113+
State.upload_data_error.value = "Only csv files are supported"
114+
return
115+
State.reset()
116+
name = gen_name()
117+
State.loading_data.value = True
118+
try:
119+
df = pd.read_csv(file["file_obj"])
120+
df.columns = df.columns.str.strip()
121+
df.columns = df.columns.str.replace(' ', '_')
122+
df.to_csv(name, index=False)
123+
cols = load_data(name)
124+
State.upload_data.value = True
125+
State.loading_data.value = False
126+
State.initial_prompt.value = prompt_template.format(cols)
127+
except Exception as e:
128+
State.upload_data_error.value = str(e)
129+
return
130+
State.upload_data_error.value = ""
131+
132+
@staticmethod
133+
def reset():
134+
State.sample_data_loaded.value = False
135+
State.upload_data.value = False
136+
delete_data()
137+
State.initial_prompt.value = ""
138+
State.upload_data_error.value = ""
139+
140+
@staticmethod
141+
def chat_with_gpt3(prompts):
142+
response = openai.ChatCompletion.create(
143+
model="gpt-3.5-turbo",
144+
messages=[
145+
{"role": "system", "content": State.initial_prompt.value},
146+
{"role": "user", "content": "Show me the first 5 rows"},
147+
{"role": "assistant", "content": "SELECT * FROM my_data LIMIT 5"},
148+
] + [{"role": prompt.role, "content": prompt.content} for prompt in prompts],
149+
temperature=0.1,
150+
stream=True
151+
)
152+
153+
total = ""
154+
for chunk in response:
155+
part = chunk['choices'][0]['delta'].get("content", "")
156+
total += part
157+
yield total
158+
159+
160+
@solara.component
161+
def Chat() -> None:
162+
solara.Style("""
163+
.chat-input {
164+
max-width: 800px;
165+
})
166+
""")
167+
168+
messages, set_messages = solara.use_state([
169+
Message(
170+
role="assistant",
171+
content=f"Welcome. Please post your queries!",
172+
df=None,
173+
fig=None)
174+
]
175+
)
176+
input, set_input = solara.use_state("")
177+
178+
def ask_chatgpt():
179+
input = State.input.value
180+
_messages = messages + [Message(role="user", content=input, df=None, fig=None)]
181+
user_input = input
182+
set_input("")
183+
State.input.value = ""
184+
set_messages(_messages)
185+
if State.initial_prompt.value:
186+
final = None
187+
for command in State.chat_with_gpt3([Message(role="user", content=user_input, df=None, fig=None)]):
188+
final = command
189+
190+
if final.startswith("%sqlplot"):
191+
_, name, column = final.split(" ")
192+
193+
fig = Figure()
194+
ax = fig.subplots()
195+
196+
fn_map = {"histogram": partial(histogram, bins=50),
197+
"boxplot": boxplot}
198+
199+
fn = fn_map[name]
200+
try:
201+
ax = fn("my_data", column, ax=ax)
202+
set_messages(_messages + [Message(role="assistant", content="", df=None, fig=fig)])
203+
except Exception as e:
204+
set_messages(_messages + [
205+
Message(role="assistant", content="Please pass relevant columns", df=None, fig=None)])
206+
else:
207+
error = "Sorry, we couldn't run your query on the data"
208+
try:
209+
query_result = run.run_statements(conn, final, sqlmagic)
210+
set_messages(_messages + [Message(role="assistant", content="", df=query_result, fig=None)])
211+
except ProgrammingError as e:
212+
set_messages(_messages + [
213+
Message(role="assistant", content=error, df=None, fig=None)])
214+
except Exception as e:
215+
set_messages(_messages + [
216+
Message(role="assistant", content=error, df=None, fig=None)])
217+
218+
else:
219+
set_messages(_messages + [Message(role="assistant",
220+
content="Please load some data first!", df=None, fig=None)])
221+
222+
with solara.VBox():
223+
for message in messages:
224+
ChatBox(message)
225+
226+
with solara.Row(justify="center"):
227+
with solara.HBox(align_items="center", classes=["chat-input"]):
228+
solara.InputText(label="Query", value=State.input, continuous_update=False)
229+
230+
if State.input.value:
231+
ask_chatgpt()
232+
233+
234+
@solara.component
235+
def Page():
236+
initial_prompt = State.initial_prompt.value
237+
sample_data_loaded = State.sample_data_loaded.value
238+
upload_data = State.upload_data.value
239+
upload_data_error = State.upload_data_error.value
240+
results = State.results.value
241+
242+
with solara.AppBarTitle():
243+
solara.Text("Data Querying and Visualisation App")
244+
245+
with solara.Card(title="About", elevation=6, style="background-color: #f5f5f5;"):
246+
solara.Markdown("""This Solara app is designed for chatting with your data. <br> <br>
247+
Examples of queries :
248+
unique column-name values ;
249+
select top 20 rows from table ; <br> <br>
250+
Example of queries that will return a plot :
251+
histogram on column ;
252+
boxplot on column""")
253+
254+
with solara.Sidebar():
255+
with solara.Card("Controls", margin=0, elevation=0):
256+
with solara.Column():
257+
with solara.Row():
258+
solara.Button("Sample dataset", color="primary", text=True, outlined=True,
259+
on_click=State.load_sample)
260+
solara.Button("Clear dataset", color="primary", text=True, outlined=True, on_click=State.reset)
261+
FileDrop(on_file=State.load_from_file, on_total_progress=lambda *args: None,
262+
label="Drag a .csv file here")
263+
if State.loading_data.value:
264+
with solara.Div():
265+
solara.Text("Loading csv...")
266+
solara.ProgressLinear(True)
267+
if initial_prompt:
268+
solara.InputInt("Number of preview rows", value=State.results, continuous_update=True)
269+
270+
solara.Markdown("Hosted in [Ploomber Cloud](https://ploomber.io/)")
271+
272+
if sample_data_loaded:
273+
solara.Info("Sample data is loaded")
274+
sql_output = run.run_statements(conn, f"select * from my_data limit {results}", sqlmagic)
275+
solara.DataFrame(sql_output, items_per_page=10)
276+
277+
if upload_data:
278+
solara.Info("Data is successfully uploaded")
279+
sql_output = run.run_statements(conn, f"select * from my_data limit {results}", sqlmagic)
280+
solara.DataFrame(sql_output, items_per_page=10)
281+
282+
if upload_data_error:
283+
solara.Error(f"Error uploading data: {upload_data_error}")
284+
285+
if initial_prompt == "":
286+
solara.Info("No data loaded")
287+
288+
solara.Style(css)
289+
with solara.VBox(classes=["main"]):
290+
solara.HTML(tag="h3", style="margin: auto;", unsafe_innerHTML="Chat with your data")
291+
292+
Chat()
293+
294+
295+
@solara.component
296+
def Layout(children):
297+
route, routes = solara.use_route()
298+
return solara.AppLayout(children=children)
299+
47.8 KB
Binary file not shown.

0 commit comments

Comments
 (0)