From 9752a1fdc29ac53d1b9115bd0a1535dfbe3ef98d Mon Sep 17 00:00:00 2001 From: Vira Kasprova Date: Wed, 23 Oct 2024 23:37:32 -0500 Subject: [PATCH 1/2] added fetch functions --- ai_ta_backend/database/sql.py | 5 +- ai_ta_backend/main.py | 25 ++++++++ ai_ta_backend/service/retrieval_service.py | 75 ++++++++++++++++++++++ 3 files changed, 104 insertions(+), 1 deletion(-) diff --git a/ai_ta_backend/database/sql.py b/ai_ta_backend/database/sql.py index 576226d8..dd712751 100644 --- a/ai_ta_backend/database/sql.py +++ b/ai_ta_backend/database/sql.py @@ -155,4 +155,7 @@ def insertProject(self, project_info): return self.supabase_client.table("projects").insert(project_info).execute() def getPreAssignedAPIKeys(self, email: str): - return self.supabase_client.table("pre_authorized_api_keys").select("*").contains("emails", '["' + email + '"]').execute() \ No newline at end of file + return self.supabase_client.table("pre_authorized_api_keys").select("*").contains("emails", '["' + email + '"]').execute() + + def getConversationsCreatedAtByCourse(self, course_name: str): + return self.supabase_client.table("llm-convo-monitor").select("created_at").eq("course_name", course_name).order("created_at", desc=False).execute() \ No newline at end of file diff --git a/ai_ta_backend/main.py b/ai_ta_backend/main.py index 6282541c..a11e9c91 100644 --- a/ai_ta_backend/main.py +++ b/ai_ta_backend/main.py @@ -519,6 +519,31 @@ def switch_workflow(service: WorkflowService) -> Response: else: abort(400, description=f"Bad request: {e}") +@app.route('/getConversationStats', methods=['GET']) +def get_conversation_stats(service: RetrievalService) -> Response: + course_name = request.args.get('course_name', default='', type=str) + + if course_name == '': + abort(400, description="Missing required parameter: 'course_name' must be provided.") + + conversation_stats = service.getConversationStats(course_name) + + response = jsonify(conversation_stats) + response.headers.add('Access-Control-Allow-Origin', '*') + return response + +@app.route('/getConversationHeatmapByHour', methods=['GET']) +def get_questions_heatmap_by_hour(service: RetrievalService) -> Response: + course_name = request.args.get('course_name', default='', type=str) + + if not course_name: + abort(400, description="Missing required parameter: 'course_name' must be provided.") + + heatmap_data = service.getConversationHeatmapByHour(course_name) + + response = jsonify(heatmap_data) + response.headers.add('Access-Control-Allow-Origin', '*') + return response @app.route('/run_flow', methods=['POST']) def run_flow(service: WorkflowService) -> Response: diff --git a/ai_ta_backend/service/retrieval_service.py b/ai_ta_backend/service/retrieval_service.py index b098345b..0304c7f9 100644 --- a/ai_ta_backend/service/retrieval_service.py +++ b/ai_ta_backend/service/retrieval_service.py @@ -3,7 +3,10 @@ import os import time import traceback +import pytz from typing import Dict, List, Union +from dateutil import parser +from collections import defaultdict import openai from injector import inject @@ -541,3 +544,75 @@ def format_for_json(self, found_docs: List[Document]) -> List[Dict]: ] return contexts + + def getConversationStats(self, course_name: str): + """ + Fetches conversation data from the database and groups them by day, hour, and weekday. + """ + response = self.sqlDb.getConversationsCreatedAtByCourse(course_name) + + central_tz = pytz.timezone('America/Chicago') + + grouped_data = { + 'per_day': defaultdict(int), + 'per_hour': defaultdict(int), + 'per_weekday': defaultdict(int), + } + + if response and hasattr(response, 'data') and response.data: + for record in response.data: + created_at = record['created_at'] + + parsed_date = parser.parse(created_at) + + central_time = parsed_date.astimezone(central_tz) + + day = central_time.date() + hour = central_time.hour + day_of_week = central_time.strftime('%A') + + grouped_data['per_day'][str(day)] += 1 + grouped_data['per_hour'][hour] += 1 + grouped_data['per_weekday'][day_of_week] += 1 + else: + print("No valid response data. Check if the query is correct or if the response is empty.") + return {} + + return { + 'per_day': dict(grouped_data['per_day']), + 'per_hour': dict(grouped_data['per_hour']), + 'per_weekday': dict(grouped_data['per_weekday']), + } + + def getConversationHeatmapByHour(self, course_name: str): + """ + Fetches conversation data and groups them into a heatmap by day of the week and hour (Central Time). + + Args: + course_name (str): The name of the course. + + Returns: + dict: A nested dictionary where the outer key is the day of the week (e.g., "Monday"), + and the inner key is the hour of the day (0-23), and the value is the count of conversations. + """ + response = self.sqlDb.getConversationsCreatedAtByCourse(course_name) + central_tz = pytz.timezone('America/Chicago') + + heatmap_data = defaultdict(lambda: defaultdict(int)) + + if response and hasattr(response, 'data') and response.data: + for record in response.data: + created_at = record['created_at'] + + parsed_date = parser.parse(created_at) + central_time = parsed_date.astimezone(central_tz) + + day_of_week = central_time.strftime('%A') + hour = central_time.hour + + heatmap_data[day_of_week][hour] += 1 + else: + print("No valid response data. Check if the query is correct or if the response is empty.") + return {} + + return dict(heatmap_data) \ No newline at end of file From d1e6642bbb742f7a830017b5aa404447c07844ac Mon Sep 17 00:00:00 2001 From: Vira Kasprova Date: Thu, 24 Oct 2024 11:08:50 -0500 Subject: [PATCH 2/2] fixed timezone --- ai_ta_backend/database/sql.py | 2 +- ai_ta_backend/service/retrieval_service.py | 14 +++++++++++--- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/ai_ta_backend/database/sql.py b/ai_ta_backend/database/sql.py index dd712751..5224e3b8 100644 --- a/ai_ta_backend/database/sql.py +++ b/ai_ta_backend/database/sql.py @@ -158,4 +158,4 @@ def getPreAssignedAPIKeys(self, email: str): return self.supabase_client.table("pre_authorized_api_keys").select("*").contains("emails", '["' + email + '"]').execute() def getConversationsCreatedAtByCourse(self, course_name: str): - return self.supabase_client.table("llm-convo-monitor").select("created_at").eq("course_name", course_name).order("created_at", desc=False).execute() \ No newline at end of file + return self.supabase_client.table("llm-convo-monitor").select("created_at").eq("course_name", course_name).execute() \ No newline at end of file diff --git a/ai_ta_backend/service/retrieval_service.py b/ai_ta_backend/service/retrieval_service.py index 0304c7f9..506d4af1 100644 --- a/ai_ta_backend/service/retrieval_service.py +++ b/ai_ta_backend/service/retrieval_service.py @@ -548,6 +548,15 @@ def format_for_json(self, found_docs: List[Document]) -> List[Dict]: def getConversationStats(self, course_name: str): """ Fetches conversation data from the database and groups them by day, hour, and weekday. + + Args: + course_name (str) + + Returns: + dict: Aggregated conversation counts: + - 'per_day': By date (YYYY-MM-DD). + - 'per_hour': By hour (0-23). + - 'per_weekday': By weekday (Monday-Sunday). """ response = self.sqlDb.getConversationsCreatedAtByCourse(course_name) @@ -589,11 +598,10 @@ def getConversationHeatmapByHour(self, course_name: str): Fetches conversation data and groups them into a heatmap by day of the week and hour (Central Time). Args: - course_name (str): The name of the course. + course_name (str) Returns: - dict: A nested dictionary where the outer key is the day of the week (e.g., "Monday"), - and the inner key is the hour of the day (0-23), and the value is the count of conversations. + dict: A nested dictionary with days of the week as outer keys and hours (0-23) as inner keys, where values are conversation counts. """ response = self.sqlDb.getConversationsCreatedAtByCourse(course_name) central_tz = pytz.timezone('America/Chicago')