Skip to content

Commit cc85707

Browse files
committed
2 parents d2e1f7e + ba1cbb1 commit cc85707

File tree

3 files changed

+94
-2
lines changed

3 files changed

+94
-2
lines changed

ai_ta_backend/export_data.py

+66
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
import os
2+
import uuid
3+
import pandas as pd
4+
import supabase
5+
from flask import send_file
6+
7+
def export_convo_history_csv(course_name: str, from_date= '', to_date= ''):
8+
"""
9+
Export conversation history to csv file.
10+
Optional args: from_date, to_date
11+
"""
12+
print("Exporting conversation history to csv file...")
13+
supabase_client = supabase.create_client( # type: ignore
14+
supabase_url=os.getenv('SUPABASE_URL'), # type: ignore
15+
supabase_key=os.getenv('SUPABASE_API_KEY')) # type: ignore
16+
17+
if from_date == '' and to_date == '':
18+
# Get all data
19+
print("No dates")
20+
response = supabase_client.table("llm-convo-monitor").select("id", count = 'exact').eq("course_name", course_name).order('id', desc=False).execute()
21+
elif from_date != '' and to_date == '':
22+
print("only from_date")
23+
# Get data from from_date to now
24+
response = supabase_client.table("llm-convo-monitor").select("id", count = 'exact').eq("course_name", course_name).gte('created_at', from_date).order('id', desc=False).execute()
25+
elif from_date == '' and to_date != '':
26+
print("only to_date")
27+
# Get data from beginning to to_date
28+
response = supabase_client.table("llm-convo-monitor").select("id", count = 'exact').eq("course_name", course_name).lte('created_at', to_date).order('id', desc=False).execute()
29+
else:
30+
print("both from_date and to_date")
31+
# Get data from from_date to to_date
32+
response = supabase_client.table("llm-convo-monitor").select("id", count = 'exact').eq("course_name", course_name).gte('created_at', from_date).lte('created_at', to_date).order('id', desc=False).execute()
33+
34+
# Fetch data
35+
if response.count > 0:
36+
print("id count greater than zero")
37+
first_id = response.data[0]['id']
38+
last_id = response.data[-1]['id']
39+
40+
filename = course_name + '_' + str(uuid.uuid4()) + '_convo_history.csv'
41+
file_path = os.path.join(os.getcwd(), filename)
42+
# Fetch data in batches of 25 from first_id to last_id
43+
while first_id <= last_id:
44+
print("Fetching data from id: ", first_id)
45+
response = supabase_client.table("llm-convo-monitor").select("*").eq("course_name", course_name).gte('id', first_id).lte('id', last_id).order('id', desc=False).limit(25).execute()
46+
# Convert to pandas dataframe
47+
df = pd.DataFrame(response.data)
48+
# Append to csv file
49+
if not os.path.isfile(file_path):
50+
df.to_csv(file_path, mode='a', header=True, index=False)
51+
else:
52+
df.to_csv(file_path, mode='a', header=False, index=False)
53+
54+
# Update first_id
55+
first_id = response.data[-1]['id'] + 1
56+
print("updated first_id: ", first_id)
57+
58+
# Download file
59+
try:
60+
return (file_path, filename, os.getcwd())
61+
except Exception as e:
62+
print(e)
63+
return "Error downloading file"
64+
else:
65+
return "No data found between the dates"
66+

ai_ta_backend/main.py

+26-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from typing import List
66

77
from dotenv import load_dotenv
8-
from flask import Flask, Response, abort, jsonify, request
8+
from flask import Flask, Response, abort, jsonify, request, send_file, make_response, send_from_directory
99
from flask_cors import CORS
1010
from flask_executor import Executor
1111
from sqlalchemy import JSON
@@ -14,6 +14,7 @@
1414
from ai_ta_backend.nomic_logging import get_nomic_map, log_convo_to_nomic
1515
from ai_ta_backend.vector_database import Ingest
1616
from ai_ta_backend.web_scrape import WebScrape, mit_course_download
17+
from ai_ta_backend.export_data import export_convo_history_csv
1718

1819
app = Flask(__name__)
1920
CORS(app)
@@ -470,6 +471,30 @@ def logToNomic():
470471
response.headers.add('Access-Control-Allow-Origin', '*')
471472
return response
472473

474+
@app.route('/export-convo-history-csv', methods=['GET'])
475+
def export_convo_history():
476+
course_name: str = request.args.get('course_name', default='', type=str)
477+
from_date: str = request.args.get('from_date', default='', type=str)
478+
to_date: str = request.args.get('to_date', default='', type=str)
479+
480+
if course_name == '':
481+
# proper web error "400 Bad request"
482+
abort(
483+
400,
484+
description=
485+
f"Missing required parameter: 'course_name' must be provided. Course name: `{course_name}`"
486+
)
487+
488+
export_status = export_convo_history_csv(course_name, from_date, to_date)
489+
print("EXPORT FILE LINKS: ", export_status)
490+
491+
response = make_response(send_from_directory(export_status[2], export_status[1], as_attachment=True))
492+
response.headers.add('Access-Control-Allow-Origin', '*')
493+
response.headers["Content-Disposition"] = f"attachment; filename={export_status[1]}"
494+
495+
os.remove(export_status[0])
496+
return response
497+
473498

474499
if __name__ == '__main__':
475500
app.run(debug=True, port=int(os.getenv("PORT", default=8000)))

ai_ta_backend/vector_database.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1200,6 +1200,7 @@ def format_for_json(self, found_docs: List[Document]) -> List[Dict]:
12001200
} for doc in found_docs]
12011201

12021202
return contexts
1203-
1203+
1204+
12041205
if __name__ == '__main__':
12051206
pass

0 commit comments

Comments
 (0)