-
Notifications
You must be signed in to change notification settings - Fork 5
/
dataset_describer.py
106 lines (78 loc) · 3.9 KB
/
dataset_describer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import os
import openai
import streamlit as st
import pandas as pd
import re
import uuid
def read_csv_to_string(file_path):
with open(file_path, 'r', newline='', encoding='utf-8') as file:
content = file.read()
return content
def extract_string_between_triple_backticks(text):
pattern = r'```(.*?)```'
matches = re.findall(pattern, text, re.DOTALL)
return matches
def save_to_py_file(strings, file_path):
with open(file_path, 'w', encoding='utf-8') as file:
for string in strings:
file.write(string)
file.write('\n')
#@st.cache_data
def load_data(uploaded_file,file_name):
dataframe_version = pd.read_csv(uploaded_file, encoding='utf-8',delimiter=csv_delimiter)
dataframe_version.to_csv('datasets/'+file_name+'.csv')
#string_version = uploaded_file.read().decode("utf-8")
string_version = read_csv_to_string('datasets/'+file_name+'.csv')
return dataframe_version,string_version
max_token = 3000
summ_temprature = 0.5
# upload a csv file to the datasets folder using st.fileuploader
with st.sidebar:
uploaded_file = st.file_uploader("Choose a CSV file from your computer", accept_multiple_files=False)
csv_delimiter = st.radio("choose delimiter", (';', ','))
if uploaded_file is not None:
unique_filename = str(uuid.uuid4())
df,st_version = load_data(uploaded_file,unique_filename)
st.dataframe(df.head(5))
# Set up OpenAI API key
openai.api_key = st.secrets['open_ai_key']['OPENAI_API_KEY']
with st.form(key='my_form'):
plot_description = st.text_area("Describe the plot you want to see.")
submit_button = st.form_submit_button(label='Submit')
#plot_description = st.text_area("Describe the plot you want to see.")
if plot_description:
response_dataset_description = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[
{"role": "user", "content": st.secrets['chatgpt_queries']['datadescribe_query1'] +
st_version[0:200] + "\n" +
st.secrets['chatgpt_queries']['datadescribe_query2']
}
],
max_tokens = max_token,
temperature = summ_temprature,
)
response_dataset_summary = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[
{"role": "user", "content": st.secrets['chatgpt_queries']['datadescribe_query1'] +
st_version[0:200] + "\n" +
plot_description + "\n" +
st.secrets['chatgpt_queries']['datadescribe_query3'] +
st.secrets['chatgpt_queries']['datadescribe_query4'] +
st.secrets['chatgpt_queries']['datadescribe_query5']+
st.secrets['chatgpt_queries']['datadescribe_query6']+
f'datasets/{unique_filename}.csv'+
st.secrets['chatgpt_queries']['datadescribe_query7']
}
],
max_tokens = max_token,
temperature = summ_temprature,
)
with st.sidebar:
st.write(response_dataset_description.choices[0].message.content)
created_code = extract_string_between_triple_backticks(response_dataset_summary.choices[0].message.content)
save_to_py_file(created_code, 'plotter.py')
with open("plotter.py") as f:
exec(f.read())
os.remove('datasets/'+unique_filename+'.csv')