Skip to content

Commit

Permalink
Clean up and format all code
Browse files Browse the repository at this point in the history
  • Loading branch information
KastanDay committed Mar 5, 2024
1 parent 8cce02b commit 2f30285
Show file tree
Hide file tree
Showing 6 changed files with 823 additions and 802 deletions.
2 changes: 1 addition & 1 deletion ai_ta_backend/canvas.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
import shutil

import requests
from canvasapi import Canvas
import sentry_sdk
from canvasapi import Canvas

from ai_ta_backend.aws import upload_data_files_to_s3
from ai_ta_backend.vector_database import Ingest
Expand Down
36 changes: 18 additions & 18 deletions ai_ta_backend/emails.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,29 @@
import os
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText


def send_email(subject, body_text, sender, receipients):
# Create message content
message = MIMEMultipart("alternative")
message["Subject"] = subject
message["From"] = sender
# Create message content
message = MIMEMultipart("alternative")
message["Subject"] = subject
message["From"] = sender

if len(receipients) == 1:
message["To"] = receipients[0]
else:
message["To"] = ", ".join(receipients)
if len(receipients) == 1:
message["To"] = receipients[0]
else:
message["To"] = ", ".join(receipients)

# Add plain text part
part1 = MIMEText(body_text, "plain")
message.attach(part1)
# Add plain text part
part1 = MIMEText(body_text, "plain")
message.attach(part1)

# Add additional parts for HTML, attachments, etc. (optional)
# Add additional parts for HTML, attachments, etc. (optional)

# Connect to SMTP server
with smtplib.SMTP_SSL(os.getenv('SES_HOST'), os.getenv('SES_PORT')) as server: # type: ignore
server.login(os.getenv('USERNAME_SMTP'), os.getenv('PASSWORD_SMTP')) # type: ignore
server.sendmail(sender, receipients, message.as_string())
# Connect to SMTP server
with smtplib.SMTP_SSL(os.getenv('SES_HOST'), os.getenv('SES_PORT')) as server: # type: ignore
server.login(os.getenv('USERNAME_SMTP'), os.getenv('PASSWORD_SMTP')) # type: ignore
server.sendmail(sender, receipients, message.as_string())

return "Email sent successfully!"
return "Email sent successfully!"
85 changes: 49 additions & 36 deletions ai_ta_backend/export_data.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,22 @@
import json
import os
import uuid
import zipfile
import io
import pandas as pd
import supabase
import sentry_sdk
from concurrent.futures import ProcessPoolExecutor

import boto3
import botocore
from concurrent.futures import ProcessPoolExecutor
import pandas as pd
import requests
import json
import sentry_sdk
import supabase

from ai_ta_backend.emails import send_email

# Initialize Supabase client
SUPABASE_CLIENT = supabase.create_client(supabase_url=os.getenv('SUPABASE_URL'), # type: ignore
supabase_key=os.getenv('SUPABASE_API_KEY')) # type: ignore
SUPABASE_CLIENT = supabase.create_client(
supabase_url=os.getenv('SUPABASE_URL'), # type: ignore
supabase_key=os.getenv('SUPABASE_API_KEY')) # type: ignore


def export_documents_json(course_name: str, from_date='', to_date=''):
Expand All @@ -24,7 +26,7 @@ def export_documents_json(course_name: str, from_date='', to_date=''):
course_name (str): The name of the course.
from_date (str, optional): The start date for the data export. Defaults to ''.
to_date (str, optional): The end date for the data export. Defaults to ''.
"""
"""

if from_date != '' and to_date != '':
# query between the dates
Expand Down Expand Up @@ -55,7 +57,7 @@ def export_documents_json(course_name: str, from_date='', to_date=''):
# add a condition to route to direct download or s3 download
if response.count > 1000:
# call background task to upload to s3

filename = course_name + '_' + str(uuid.uuid4()) + '_documents.zip'
s3_filepath = s3_file = f"courses/{course_name}/{filename}"
# background task of downloading data - map it with above ID
Expand All @@ -74,17 +76,18 @@ def export_documents_json(course_name: str, from_date='', to_date=''):
print("total_doc_count: ", total_doc_count)
print("first_id: ", first_id)
print("last_id: ", last_id)

curr_doc_count = 0
filename = course_name + '_' + str(uuid.uuid4()) + '_documents.json'
file_path = os.path.join(os.getcwd(), filename)

while curr_doc_count < total_doc_count:
print("Fetching data from id: ", first_id)
response = SUPABASE_CLIENT.table("documents").select("*").eq("course_name", course_name).gte('id', first_id).order('id', desc=False).limit(100).execute()
response = SUPABASE_CLIENT.table("documents").select("*").eq("course_name", course_name).gte(
'id', first_id).order('id', desc=False).limit(100).execute()
df = pd.DataFrame(response.data)
curr_doc_count += len(response.data)

# writing to file
if not os.path.isfile(file_path):
df.to_json(file_path, orient='records')
Expand All @@ -93,7 +96,7 @@ def export_documents_json(course_name: str, from_date='', to_date=''):

if len(response.data) > 0:
first_id = response.data[-1]['id'] + 1

# Download file
try:
# zip file
Expand All @@ -111,7 +114,7 @@ def export_documents_json(course_name: str, from_date='', to_date=''):
return {"response": "Error downloading file."}
else:
return {"response": "No data found between the given dates."}


def export_data_in_bg(response, download_type, course_name, s3_path):
"""
Expand Down Expand Up @@ -139,12 +142,14 @@ def export_data_in_bg(response, download_type, course_name, s3_path):
while curr_doc_count < total_doc_count:
print("Fetching data from id: ", first_id)
if download_type == 'documents':
response = SUPABASE_CLIENT.table("documents").select("*").eq("course_name", course_name).gte('id', first_id).order('id', desc=False).limit(100).execute()
response = SUPABASE_CLIENT.table("documents").select("*").eq("course_name", course_name).gte(
'id', first_id).order('id', desc=False).limit(100).execute()
else:
response = SUPABASE_CLIENT.table("llm-convo-monitor").select("*").eq("course_name", course_name).gte('id', first_id).order('id', desc=False).limit(100).execute()
response = SUPABASE_CLIENT.table("llm-convo-monitor").select("*").eq("course_name", course_name).gte(
'id', first_id).order('id', desc=False).limit(100).execute()
df = pd.DataFrame(response.data)
curr_doc_count += len(response.data)

# writing to file
if not os.path.isfile(file_path):
df.to_json(file_path, orient='records')
Expand All @@ -153,7 +158,7 @@ def export_data_in_bg(response, download_type, course_name, s3_path):

if len(response.data) > 0:
first_id = response.data[-1]['id'] + 1

# zip file
zip_filename = filename.split('.')[0] + '.zip'
zip_file_path = os.path.join(os.getcwd(), zip_filename)
Expand All @@ -174,7 +179,7 @@ def export_data_in_bg(response, download_type, course_name, s3_path):
#s3_file = f"courses/{course_name}/exports/{os.path.basename(zip_file_path)}"
s3_file = f"courses/{course_name}/{os.path.basename(zip_file_path)}"
s3.upload_file(zip_file_path, os.getenv('S3_BUCKET_NAME'), s3_file)

# remove local files
os.remove(file_path)
os.remove(zip_file_path)
Expand All @@ -183,16 +188,18 @@ def export_data_in_bg(response, download_type, course_name, s3_path):

# pre-signed URL
s3_object = s3.head_object(Bucket=os.getenv('S3_BUCKET_NAME'), Key=s3_path)

# generate presigned URL
s3_url = s3.generate_presigned_url('get_object', Params={'Bucket': os.getenv('S3_BUCKET_NAME'), 'Key': s3_path}, ExpiresIn=3600)

s3_url = s3.generate_presigned_url('get_object',
Params={
'Bucket': os.getenv('S3_BUCKET_NAME'),
'Key': s3_path
},
ExpiresIn=3600)

# get admin email IDs
headers = {
"Authorization": f"Bearer {os.getenv('VERCEL_READ_ONLY_API_KEY')}",
"Content-Type": "application/json"
}

headers = {"Authorization": f"Bearer {os.getenv('VERCEL_READ_ONLY_API_KEY')}", "Content-Type": "application/json"}

hget_url = str(os.getenv('VERCEL_BASE_URL')) + "course_metadatas/" + course_name
response = requests.get(hget_url, headers=headers)
course_metadata = response.json()
Expand All @@ -214,6 +221,7 @@ def export_data_in_bg(response, download_type, course_name, s3_path):
print(e)
return "Error: " + str(e)


def check_s3_path_and_download(s3_path):
"""
This function checks if the file exists in S3 and downloads it.
Expand All @@ -229,12 +237,17 @@ def check_s3_path_and_download(s3_path):
try:
print("Checking if file exists in S3...", s3_path)
s3_object = s3.head_object(Bucket=os.getenv('S3_BUCKET_NAME'), Key=s3_path)

# generate presigned URL
s3_url = s3.generate_presigned_url('get_object', Params={'Bucket': os.getenv('S3_BUCKET_NAME'), 'Key': s3_path}, ExpiresIn=172800)
s3_url = s3.generate_presigned_url('get_object',
Params={
'Bucket': os.getenv('S3_BUCKET_NAME'),
'Key': s3_path
},
ExpiresIn=172800)
print("Presigned URL: ", s3_url)
return {"response": s3_url}
return {"response": s3_url}

except botocore.exceptions.ClientError as e:
if e.response['Error']['Code'] == "404":
# The object does not exist.
Expand All @@ -243,7 +256,7 @@ def check_s3_path_and_download(s3_path):
# Something else has gone wrong.
sentry_sdk.capture_exception(e)
return {"response": "Error downloading file."}


def export_convo_history_json(course_name: str, from_date='', to_date=''):
"""
Expand All @@ -254,7 +267,7 @@ def export_convo_history_json(course_name: str, from_date='', to_date=''):
to_date (str, optional): The end date for the data export. Defaults to ''.
"""
print("Exporting conversation history to csv file...")

if from_date == '' and to_date == '':
# Get all data
print("No dates")
Expand All @@ -276,7 +289,7 @@ def export_convo_history_json(course_name: str, from_date='', to_date=''):
response = SUPABASE_CLIENT.table("llm-convo-monitor").select("id", count='exact').eq(
"course_name", course_name).gte('created_at', from_date).lte('created_at', to_date).order('id',
desc=False).execute()

if response.count > 1000:
# call background task to upload to s3
filename = course_name + '_' + str(uuid.uuid4()) + '_convo_history.zip'
Expand Down
3 changes: 2 additions & 1 deletion ai_ta_backend/filtering_contexts.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,11 @@
import openai
import ray
import requests
import sentry_sdk

# from langchain import hub
# import replicate
from posthog import Posthog
import sentry_sdk

# from dotenv import load_dotenv
# load_dotenv(override=True)
Expand Down
25 changes: 14 additions & 11 deletions ai_ta_backend/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
import threading
import time
from typing import List
import requests
from threading import Thread


import ray
import requests
import sentry_sdk
from dotenv import load_dotenv
from flask import (
Flask,
Expand All @@ -21,13 +21,14 @@
from flask_cors import CORS
from flask_executor import Executor
from posthog import Posthog
import ray
import sentry_sdk

from ai_ta_backend.canvas import CanvasAPI

from ai_ta_backend.export_data import export_convo_history_json, export_documents_json, check_s3_path_and_download
from ai_ta_backend.nomic_logging import get_nomic_map, log_convo_to_nomic, create_document_map
from ai_ta_backend.export_data import export_convo_history_json
from ai_ta_backend.nomic_logging import (
create_document_map,
get_nomic_map,
log_convo_to_nomic,
)
from ai_ta_backend.vector_database import Ingest
from ai_ta_backend.web_scrape import WebScrape, mit_course_download

Expand Down Expand Up @@ -561,6 +562,7 @@ def nomic_map():
response.headers.add('Access-Control-Allow-Origin', '*')
return response


@app.route('/createDocumentMap', methods=['GET'])
def createDocumentMap():
course_name: str = request.args.get('course_name', default='', type=str)
Expand Down Expand Up @@ -624,11 +626,12 @@ def export_convo_history():
response.headers.add('Access-Control-Allow-Origin', '*')

else:
response = make_response(send_from_directory(export_status['response'][2], export_status['response'][1], as_attachment=True))
response = make_response(
send_from_directory(export_status['response'][2], export_status['response'][1], as_attachment=True))
response.headers.add('Access-Control-Allow-Origin', '*')
response.headers["Content-Disposition"] = f"attachment; filename={export_status['response'][1]}"
os.remove(export_status['response'][0])

return response


Expand Down Expand Up @@ -674,8 +677,8 @@ def resource_report() -> Response:
# https://manpages.debian.org/bookworm/manpages-dev/getrlimit.2.en.html
"""
import resource
from resource import getrusage, RUSAGE_SELF, RUSAGE_CHILDREN
import subprocess
from resource import RUSAGE_CHILDREN, RUSAGE_SELF, getrusage

print("πŸ‘‡πŸ‘‡πŸ‘‡πŸ‘‡πŸ‘‡πŸ‘‡πŸ‘‡πŸ‘‡πŸ‘‡ <RESOURCE REPORT> πŸ‘‡πŸ‘‡πŸ‘‡πŸ‘‡πŸ‘‡πŸ‘‡πŸ‘‡πŸ‘‡πŸ‘‡")

Expand Down
Loading

0 comments on commit 2f30285

Please sign in to comment.