Skip to content

Commit

Permalink
Merge branch 'main' into file-update
Browse files Browse the repository at this point in the history
  • Loading branch information
star-nox authored Nov 21, 2023
2 parents 290c616 + ba1cbb1 commit 7a5cc3a
Show file tree
Hide file tree
Showing 3 changed files with 93 additions and 1 deletion.
66 changes: 66 additions & 0 deletions ai_ta_backend/export_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import os
import uuid
import pandas as pd
import supabase
from flask import send_file

def export_convo_history_csv(course_name: str, from_date= '', to_date= ''):
"""
Export conversation history to csv file.
Optional args: from_date, to_date
"""
print("Exporting conversation history to csv file...")
supabase_client = supabase.create_client( # type: ignore
supabase_url=os.getenv('SUPABASE_URL'), # type: ignore
supabase_key=os.getenv('SUPABASE_API_KEY')) # type: ignore

if from_date == '' and to_date == '':
# Get all data
print("No dates")
response = supabase_client.table("llm-convo-monitor").select("id", count = 'exact').eq("course_name", course_name).order('id', desc=False).execute()
elif from_date != '' and to_date == '':
print("only from_date")
# Get data from from_date to now
response = supabase_client.table("llm-convo-monitor").select("id", count = 'exact').eq("course_name", course_name).gte('created_at', from_date).order('id', desc=False).execute()
elif from_date == '' and to_date != '':
print("only to_date")
# Get data from beginning to to_date
response = supabase_client.table("llm-convo-monitor").select("id", count = 'exact').eq("course_name", course_name).lte('created_at', to_date).order('id', desc=False).execute()
else:
print("both from_date and to_date")
# Get data from from_date to to_date
response = supabase_client.table("llm-convo-monitor").select("id", count = 'exact').eq("course_name", course_name).gte('created_at', from_date).lte('created_at', to_date).order('id', desc=False).execute()

# Fetch data
if response.count > 0:
print("id count greater than zero")
first_id = response.data[0]['id']
last_id = response.data[-1]['id']

filename = course_name + '_' + str(uuid.uuid4()) + '_convo_history.csv'
file_path = os.path.join(os.getcwd(), filename)
# Fetch data in batches of 25 from first_id to last_id
while first_id <= last_id:
print("Fetching data from id: ", first_id)
response = supabase_client.table("llm-convo-monitor").select("*").eq("course_name", course_name).gte('id', first_id).lte('id', last_id).order('id', desc=False).limit(25).execute()
# Convert to pandas dataframe
df = pd.DataFrame(response.data)
# Append to csv file
if not os.path.isfile(file_path):
df.to_csv(file_path, mode='a', header=True, index=False)
else:
df.to_csv(file_path, mode='a', header=False, index=False)

# Update first_id
first_id = response.data[-1]['id'] + 1
print("updated first_id: ", first_id)

# Download file
try:
return (file_path, filename, os.getcwd())
except Exception as e:
print(e)
return "Error downloading file"
else:
return "No data found between the dates"

27 changes: 26 additions & 1 deletion ai_ta_backend/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from typing import List

from dotenv import load_dotenv
from flask import Flask, Response, abort, jsonify, request
from flask import Flask, Response, abort, jsonify, request, send_file, make_response, send_from_directory
from flask_cors import CORS
from flask_executor import Executor
from sqlalchemy import JSON
Expand All @@ -15,6 +15,7 @@
from ai_ta_backend.vector_database import Ingest
from ai_ta_backend.web_scrape import WebScrape, mit_course_download
from ai_ta_backend.canvas import CanvasAPI
from ai_ta_backend.export_data import export_convo_history_csv

app = Flask(__name__)
CORS(app)
Expand Down Expand Up @@ -471,6 +472,30 @@ def logToNomic():
response.headers.add('Access-Control-Allow-Origin', '*')
return response

@app.route('/export-convo-history-csv', methods=['GET'])
def export_convo_history():
course_name: str = request.args.get('course_name', default='', type=str)
from_date: str = request.args.get('from_date', default='', type=str)
to_date: str = request.args.get('to_date', default='', type=str)

if course_name == '':
# proper web error "400 Bad request"
abort(
400,
description=
f"Missing required parameter: 'course_name' must be provided. Course name: `{course_name}`"
)

export_status = export_convo_history_csv(course_name, from_date, to_date)
print("EXPORT FILE LINKS: ", export_status)

response = make_response(send_from_directory(export_status[2], export_status[1], as_attachment=True))
response.headers.add('Access-Control-Allow-Origin', '*')
response.headers["Content-Disposition"] = f"attachment; filename={export_status[1]}"

os.remove(export_status[0])
return response


if __name__ == '__main__':
app.run(debug=True, port=int(os.getenv("PORT", default=8000)))
1 change: 1 addition & 0 deletions ai_ta_backend/vector_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -1283,5 +1283,6 @@ def check_for_duplicates(self, texts: List[Dict], metadatas: List[Dict[str, Any]
return False



if __name__ == '__main__':
pass

0 comments on commit 7a5cc3a

Please sign in to comment.