completed export function

UIUC-Chatbot · Nov 13, 2023 · 2f03517 · 2f03517
1 parent 1e350da
commit 2f03517
Show file tree

Hide file tree

Showing 2 changed files with 51 additions and 2 deletions.
diff --git a/ai_ta_backend/export_data.py b/ai_ta_backend/export_data.py
@@ -3,9 +3,57 @@
 import pandas as pd
 import supabase
 
-def export_convo_history_csv(course_name: str, from_date= None, to_date= None):
+def export_convo_history_csv(course_name: str, from_date= '', to_date= ''):
     """
     Export conversation history to csv file.
     Optional args: from_date, to_date
     """
+    print("Exporting conversation history to csv file...")
+    supabase_client = supabase.create_client(  # type: ignore
+      supabase_url=os.getenv('SUPABASE_URL'),  # type: ignore
+      supabase_key=os.getenv('SUPABASE_API_KEY'))  # type: ignore
+
+    if from_date == '' and to_date == '':
+        # Get all data
+        print("No dates")
+        response = supabase_client.table("llm-convo-monitor").select("id", count = 'exact').eq("course_name", course_name).order('id', desc=False).execute()
+    elif from_date != '' and to_date == '':
+        print("only from_date")
+        # Get data from from_date to now
+        response = supabase_client.table("llm-convo-monitor").select("id", count = 'exact').eq("course_name", course_name).gte('created_at', from_date).order('id', desc=False).execute()
+    elif from_date == '' and to_date != '':
+        print("only to_date")
+        # Get data from beginning to to_date
+        response = supabase_client.table("llm-convo-monitor").select("id", count = 'exact').eq("course_name", course_name).lte('created_at', to_date).order('id', desc=False).execute()
+    else:
+        print("both from_date and to_date")
+        # Get data from from_date to to_date
+        response = supabase_client.table("llm-convo-monitor").select("id", count = 'exact').eq("course_name", course_name).gte('created_at', from_date).lte('created_at', to_date).order('id', desc=False).execute()
+
+    # Fetch data
+    if response.count > 0:
+        print("id count greater than zero")
+        first_id = response.data[0]['id']
+        last_id = response.data[-1]['id']
+
+        # Fetch data in batches of 25 from first_id to last_id
+        while first_id <= last_id:
+            print("Fetching data from id: ", first_id)
+            response = supabase_client.table("llm-convo-monitor").select("*").eq("course_name", course_name).gte('id', first_id).lte('id', last_id).order('id', desc=False).limit(25).execute()
+            # Convert to pandas dataframe
+            df = pd.DataFrame(response.data)
+            # Append to csv file
+            filename = course_name + '_convo_history.csv'
+            if not os.path.isfile(filename):
+                df.to_csv(filename, mode='a', header=True, index=False)
+            else:
+                df.to_csv(filename, mode='a', header=False, index=False)
+
+            # Update first_id
+            first_id = response.data[-1]['id'] + 1
+            print("updated first_id: ", first_id)
+
+    else:
+        print("No data found between the dates")
+        return "No data found between the dates"
     return "success"
diff --git a/ai_ta_backend/main.py b/ai_ta_backend/main.py
@@ -14,6 +14,7 @@
 from ai_ta_backend.nomic_logging import get_nomic_map, log_convo_to_nomic
 from ai_ta_backend.vector_database import Ingest
 from ai_ta_backend.web_scrape import WebScrape, mit_course_download
+from ai_ta_backend.export_data import export_convo_history_csv
 
 app = Flask(__name__)
 CORS(app)
@@ -468,7 +469,7 @@ def logToNomic():
   response = executor.submit(log_convo_to_nomic, course_name, data)
   response = jsonify({'outcome': 'success'})
   response.headers.add('Access-Control-Allow-Origin', '*')
-  return 
+  return response
 
 @app.route('/export-convo-history-csv', methods=['GET'])
 def export_convo_history():