Merge branch 'main' into file-update

UIUC-Chatbot · Nov 21, 2023 · 7a5cc3a · 7a5cc3a
2 parents 290c616 + ba1cbb1
commit 7a5cc3a
Show file tree

Hide file tree

Showing 3 changed files with 93 additions and 1 deletion.
diff --git a/ai_ta_backend/export_data.py b/ai_ta_backend/export_data.py
@@ -0,0 +1,66 @@
+import os
+import uuid
+import pandas as pd
+import supabase
+from flask import send_file
+
+def export_convo_history_csv(course_name: str, from_date= '', to_date= ''):
+    """
+    Export conversation history to csv file.
+    Optional args: from_date, to_date
+    """
+    print("Exporting conversation history to csv file...")
+    supabase_client = supabase.create_client(  # type: ignore
+      supabase_url=os.getenv('SUPABASE_URL'),  # type: ignore
+      supabase_key=os.getenv('SUPABASE_API_KEY'))  # type: ignore
+
+    if from_date == '' and to_date == '':
+        # Get all data
+        print("No dates")
+        response = supabase_client.table("llm-convo-monitor").select("id", count = 'exact').eq("course_name", course_name).order('id', desc=False).execute()
+    elif from_date != '' and to_date == '':
+        print("only from_date")
+        # Get data from from_date to now
+        response = supabase_client.table("llm-convo-monitor").select("id", count = 'exact').eq("course_name", course_name).gte('created_at', from_date).order('id', desc=False).execute()
+    elif from_date == '' and to_date != '':
+        print("only to_date")
+        # Get data from beginning to to_date
+        response = supabase_client.table("llm-convo-monitor").select("id", count = 'exact').eq("course_name", course_name).lte('created_at', to_date).order('id', desc=False).execute()
+    else:
+        print("both from_date and to_date")
+        # Get data from from_date to to_date
+        response = supabase_client.table("llm-convo-monitor").select("id", count = 'exact').eq("course_name", course_name).gte('created_at', from_date).lte('created_at', to_date).order('id', desc=False).execute()
+
+    # Fetch data
+    if response.count > 0:
+        print("id count greater than zero")
+        first_id = response.data[0]['id']
+        last_id = response.data[-1]['id']
+
+        filename = course_name + '_' + str(uuid.uuid4()) + '_convo_history.csv'
+        file_path = os.path.join(os.getcwd(), filename)
+        # Fetch data in batches of 25 from first_id to last_id
+        while first_id <= last_id:
+            print("Fetching data from id: ", first_id)
+            response = supabase_client.table("llm-convo-monitor").select("*").eq("course_name", course_name).gte('id', first_id).lte('id', last_id).order('id', desc=False).limit(25).execute()
+            # Convert to pandas dataframe
+            df = pd.DataFrame(response.data)
+            # Append to csv file
+            if not os.path.isfile(file_path):
+                df.to_csv(file_path, mode='a', header=True, index=False)
+            else:
+                df.to_csv(file_path, mode='a', header=False, index=False)
+
+            # Update first_id
+            first_id = response.data[-1]['id'] + 1
+            print("updated first_id: ", first_id)
+
+        # Download file
+        try:
+            return (file_path, filename, os.getcwd())
+        except Exception as e:
+            print(e)
+            return "Error downloading file"
+    else:
+        return "No data found between the dates"
+
diff --git a/ai_ta_backend/main.py b/ai_ta_backend/main.py
@@ -5,7 +5,7 @@
 from typing import List
 
 from dotenv import load_dotenv
-from flask import Flask, Response, abort, jsonify, request
+from flask import Flask, Response, abort, jsonify, request, send_file, make_response, send_from_directory
 from flask_cors import CORS
 from flask_executor import Executor
 from sqlalchemy import JSON
@@ -15,6 +15,7 @@
 from ai_ta_backend.vector_database import Ingest
 from ai_ta_backend.web_scrape import WebScrape, mit_course_download
 from ai_ta_backend.canvas import CanvasAPI
+from ai_ta_backend.export_data import export_convo_history_csv
 
 app = Flask(__name__)
 CORS(app)
@@ -471,6 +472,30 @@ def logToNomic():
   response.headers.add('Access-Control-Allow-Origin', '*')
   return response
 
+@app.route('/export-convo-history-csv', methods=['GET'])
+def export_convo_history():
+  course_name: str = request.args.get('course_name', default='', type=str)
+  from_date: str = request.args.get('from_date', default='', type=str)
+  to_date: str = request.args.get('to_date', default='', type=str)
+
+  if course_name == '':
+    # proper web error "400 Bad request"
+    abort(
+        400,
+        description=
+        f"Missing required parameter: 'course_name' must be provided. Course name: `{course_name}`"
+    )
+
+  export_status = export_convo_history_csv(course_name, from_date, to_date)
+  print("EXPORT FILE LINKS: ",  export_status)
+
+  response = make_response(send_from_directory(export_status[2], export_status[1], as_attachment=True))
+  response.headers.add('Access-Control-Allow-Origin', '*')
+  response.headers["Content-Disposition"] = f"attachment; filename={export_status[1]}"
+
+  os.remove(export_status[0])
+  return response
+
 
 if __name__ == '__main__':
   app.run(debug=True, port=int(os.getenv("PORT", default=8000)))
diff --git a/ai_ta_backend/vector_database.py b/ai_ta_backend/vector_database.py
@@ -1283,5 +1283,6 @@ def check_for_duplicates(self, texts: List[Dict], metadatas: List[Dict[str, Any]
       return False
 
 
+
 if __name__ == '__main__':
   pass
Original file line number	Diff line number	Diff line change
Expand Up		@@ -1283,5 +1283,6 @@ def check_for_duplicates(self, texts: List[Dict], metadatas: List[Dict[str, Any]
		return False



		if __name__ == '__main__':
		pass