Skip to content

Commit

Permalink
cleanup prints, looks good to me
Browse files Browse the repository at this point in the history
  • Loading branch information
KastanDay committed Sep 15, 2023
1 parent adb1b50 commit cf22849
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 57 deletions.
24 changes: 4 additions & 20 deletions ai_ta_backend/main.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,18 @@
import gc
import gc
import json
import os
import time
from typing import List
import json

from dotenv import load_dotenv
from flask import Flask, Response, Response, abort, jsonify, request
from flask import Flask, Response, abort, jsonify, request
from flask_cors import CORS
from flask_executor import Executor
from sqlalchemy import JSON

from ai_ta_backend.nomic_logging import get_nomic_map, log_convo_to_nomic
from ai_ta_backend.vector_database import Ingest
from ai_ta_backend.web_scrape import main_crawler, mit_course_download
from ai_ta_backend.nomic_logging import get_nomic_map, log_convo_to_nomic
from flask_executor import Executor

app = Flask(__name__)
CORS(app)
Expand Down Expand Up @@ -397,36 +395,22 @@ def nomic_map():

@app.route('/onResponseCompletion', methods=['POST'])
def logToNomic():
course_name: str = request.args.get('course_name', default='', type=str)
conversation: str = request.args.get('conversation', default='', type=str)
print("In /onResponseCompletion")

# print("print json: ", request.get_json())
data = request.get_json()
print(len(data))
print(type(data))

course_name = data['course_name']
conversation = data['conversation']

# print("course_name: ", course_name)
# print("conversation: ", conversation)

if course_name == '' or conversation == '':
# proper web error "400 Bad request"
abort(
400,
description=
f"Missing one or more required parameters: 'course_name' and 'conversation' must be provided. Course name: `{course_name}`, Conversation: `{conversation}`"
)
print(f"In /onResponseCompletion for course: {course_name}")

#conversation_json = json.loads(conversation)

# background execution of tasks!!
response = executor.submit(log_convo_to_nomic, course_name, data)
response = jsonify({'outcome': 'success'})
response.headers.add('Access-Control-Allow-Origin', '*')

return response


Expand Down
54 changes: 17 additions & 37 deletions ai_ta_backend/nomic_logging.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
import datetime
import os
import time

import nomic
from nomic import atlas
from nomic import AtlasProject
from langchain.embeddings import OpenAIEmbeddings
import numpy as np
import time
import datetime
import pandas as pd
import supabase
from langchain.embeddings import OpenAIEmbeddings
from nomic import AtlasProject, atlas

nomic.login(os.getenv('NOMIC_API_KEY')) # login during start of flask app
NOMIC_MAP_NAME_PREFIX = 'Conversation Map for '
Expand All @@ -22,16 +22,12 @@ def log_convo_to_nomic(course_name: str, conversation) -> str:
- if no, add new data point
3. Keep current logic for map doesn't exist - update metadata
"""
print("in log_convo_to_nomic()")

print("conversation: ", conversation)
print(f"in log_convo_to_nomic() for course: {course_name}")

messages = conversation['conversation']['messages']
user_email = conversation['conversation']['user_email']
conversation_id = conversation['conversation']['id']

#print("conversation: ", conversation)

# we have to upload whole conversations
# check what the fetched data looks like - pandas df or pyarrow table
# check if conversation ID exists in Nomic, if yes fetch all data from it and delete it.
Expand All @@ -44,33 +40,25 @@ def log_convo_to_nomic(course_name: str, conversation) -> str:
try:
# fetch project metadata and embbeddings
project = AtlasProject(name=project_name, add_datums_if_exists=True)
map_metadata_df = project.maps[1].data.df
map_metadata_df = project.maps[1].data.df # type: ignore
map_embeddings_df = project.maps[1].embeddings.latent
map_metadata_df['id'] = map_metadata_df['id'].astype(int)
last_id = map_metadata_df['id'].max()
print("last_id: ", last_id)

if conversation_id in map_metadata_df.values:
print("conversation_id exists")

# store that convo metadata locally
prev_data = map_metadata_df[map_metadata_df['conversation_id'] == conversation_id]
prev_index = prev_data.index.values[0]
print("prev_index: ", prev_index)
embeddings = map_embeddings_df[prev_index - 1].reshape(1, 1536)
prev_convo = prev_data['conversation'].values[0]
prev_id = prev_data['id'].values[0]
print("prev_id: ", prev_id)
created_at = pd.to_datetime(prev_data['created_at'].values[0]).strftime('%Y-%m-%d %H:%M:%S')
print("prev_created_at: ", created_at)
print("before delete")

# delete that convo data point from Nomic
print(project.delete_data([str(prev_id)]))
# delete that convo data point from Nomic, and print result
print("Deleting point from nomic:", project.delete_data([str(prev_id)]))

# prep for new point
first_message = prev_convo.split("\n")[1].split(": ")[1]
print("first_message: ", first_message)

# select the last 2 messages and append new convo to prev convo
messages_to_be_logged = messages[-2:]
Expand Down Expand Up @@ -127,7 +115,7 @@ def log_convo_to_nomic(course_name: str, conversation) -> str:
}]

# create embeddings
embeddings_model = OpenAIEmbeddings()
embeddings_model = OpenAIEmbeddings() # type: ignore
embeddings = embeddings_model.embed_documents(user_queries)

# add embeddings to the project
Expand All @@ -137,7 +125,7 @@ def log_convo_to_nomic(course_name: str, conversation) -> str:

except Exception as e:
# if project doesn't exist, create it
print(e)
print("ERROR in log_convo_to_nomic():", e)
result = create_nomic_map(course_name, conversation)
if result is None:
print("Nomic map does not exist yet, probably because you have less than 20 queries on your project: ", e)
Expand Down Expand Up @@ -167,10 +155,6 @@ def get_nomic_map(course_name: str):
print(err)
return {"map_id": None, "map_link": None}

# Moved this to the logging function to keep our UI fast.
# with project.wait_for_project_lock() as project:
# project.rebuild_maps()

map = project.get_map(project_name)

print(f"⏰ Nomic Full Map Retrieval: {(time.monotonic() - start_time):.2f} seconds")
Expand All @@ -185,7 +169,7 @@ def create_nomic_map(course_name: str, log_data: list):
2. appends current embeddings and metadata to it
2. creates map if there are at least 20 queries
"""
print("in create_nomic_map()")
print(f"in create_nomic_map() for {course_name}")
# initialize supabase
supabase_client = supabase.create_client( # type: ignore
supabase_url=os.getenv('SUPABASE_URL'), # type: ignore
Expand All @@ -206,9 +190,9 @@ def create_nomic_map(course_name: str, log_data: list):
conversation_exists = False

# current log details
log_messages = log_data['conversation']['messages']
log_user_email = log_data['conversation']['user_email']
log_conversation_id = log_data['conversation']['id']
log_messages = log_data['conversation']['messages'] # type: ignore
log_user_email = log_data['conversation']['user_email'] # type: ignore
log_conversation_id = log_data['conversation']['id'] # type: ignore

for index, row in df.iterrows():
user_email = row['user_email']
Expand All @@ -220,7 +204,7 @@ def create_nomic_map(course_name: str, log_data: list):

# create metadata for multi-turn conversation
conversation = ""
if message['role'] == 'user':
if message['role'] == 'user': # type: ignore
emoji = "🙋 "
else:
emoji = "🤖 "
Expand All @@ -231,7 +215,7 @@ def create_nomic_map(course_name: str, log_data: list):
# append current chat to previous chat if convo already exists
if convo['id'] == log_conversation_id:
conversation_exists = True
if m['role'] == 'user':
if m['role'] == 'user': # type: ignore
emoji = "🙋 "
else:
emoji = "🤖 "
Expand Down Expand Up @@ -281,16 +265,13 @@ def create_nomic_map(course_name: str, log_data: list):
}
metadata.append(metadata_row)

print("length of metadata: ", len(metadata))
metadata = pd.DataFrame(metadata)

embeddings_model = OpenAIEmbeddings() # type: ignore
embeddings = embeddings_model.embed_documents(user_queries)

# create Atlas project
project_name = NOMIC_MAP_NAME_PREFIX + course_name
index_name = course_name + "_convo_index"
print("project_name: ", project_name)
project = atlas.map_embeddings(
embeddings=np.array(embeddings),
data=metadata, # type: ignore -- this is actually the correc type, the function signature from Nomic is incomplete
Expand All @@ -300,7 +281,6 @@ def create_nomic_map(course_name: str, log_data: list):
name=project_name,
colorable_fields=['conversation_id', 'first_query'])
project.create_index(index_name, build_topic_model=True)
print("project: ", project)
return f"Successfully created Nomic map for {course_name}"


Expand Down

0 comments on commit cf22849

Please sign in to comment.