Skip to content

Commit

Permalink
Yapf format ONLY
Browse files Browse the repository at this point in the history
  • Loading branch information
KastanDay committed Sep 15, 2023
1 parent 5a84764 commit fe0de52
Showing 1 changed file with 77 additions and 42 deletions.
119 changes: 77 additions & 42 deletions ai_ta_backend/nomic_logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,10 @@
import pandas as pd
import supabase

nomic.login(os.getenv('NOMIC_API_KEY')) # login during start of flask app
nomic.login(os.getenv('NOMIC_API_KEY')) # login during start of flask app
NOMIC_MAP_NAME_PREFIX = 'Conversation Map for '


def log_convo_to_nomic(course_name: str, conversation) -> str:
"""
Logs conversation to Nomic.
Expand All @@ -24,22 +25,22 @@ def log_convo_to_nomic(course_name: str, conversation) -> str:
print("in log_convo_to_nomic()")

print("conversation: ", conversation)

messages = conversation['conversation']['messages']
user_email = conversation['conversation']['user_email']
conversation_id = conversation['conversation']['id']

#print("conversation: ", conversation)

# we have to upload whole conversations
# check what the fetched data looks like - pandas df or pyarrow table
# check if conversation ID exists in Nomic, if yes fetch all data from it and delete it.
# check if conversation ID exists in Nomic, if yes fetch all data from it and delete it.
# will have current QA and historical QA from Nomic, append new data and add_embeddings()

project_name = NOMIC_MAP_NAME_PREFIX + course_name
start_time = time.monotonic()
emoji = ""

try:
# fetch project metadata and embbeddings
project = AtlasProject(name=project_name, add_datums_if_exists=True)
Expand All @@ -48,46 +49,53 @@ def log_convo_to_nomic(course_name: str, conversation) -> str:
map_metadata_df['id'] = map_metadata_df['id'].astype(int)
last_id = map_metadata_df['id'].max()
print("last_id: ", last_id)

if conversation_id in map_metadata_df.values:
print("conversation_id exists")

# store that convo metadata locally
prev_data = map_metadata_df[map_metadata_df['conversation_id'] == conversation_id]
prev_index = prev_data.index.values[0]
print("prev_index: ", prev_index)
embeddings = map_embeddings_df[prev_index-1].reshape(1, 1536)
embeddings = map_embeddings_df[prev_index - 1].reshape(1, 1536)
prev_convo = prev_data['conversation'].values[0]
prev_id = prev_data['id'].values[0]
print("prev_id: ", prev_id)
created_at = pd.to_datetime(prev_data['created_at'].values[0]).strftime('%Y-%m-%d %H:%M:%S')
print("prev_created_at: ", created_at)
print("before delete")

# delete that convo data point from Nomic
print(project.delete_data([str(prev_id)]))

# prep for new point
first_message = prev_convo.split("\n")[1].split(": ")[1]
print("first_message: ", first_message)

# select the last 2 messages and append new convo to prev convo
messages_to_be_logged = messages[-2:]
for message in messages_to_be_logged:
if message['role'] == 'user':
emoji = "🙋 "
else:
emoji = "🤖 "

prev_convo += "\n>>> " + emoji + message['role'] + ": " + message['content'] + "\n"

# modified timestamp
current_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")

# update metadata
metadata = [{"course": course_name, "conversation": prev_convo, "conversation_id": conversation_id,
"id": last_id+1, "user_email": user_email, "first_query": first_message, "created_at": created_at,
"modified_at": current_time}]
metadata = [{
"course": course_name,
"conversation": prev_convo,
"conversation_id": conversation_id,
"id": last_id + 1,
"user_email": user_email,
"first_query": first_message,
"created_at": created_at,
"modified_at": current_time
}]
else:
print("conversation_id does not exist")

Expand All @@ -107,19 +115,26 @@ def log_convo_to_nomic(course_name: str, conversation) -> str:
# modified timestamp
current_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")

metadata = [{"course": course_name, "conversation": conversation_string, "conversation_id": conversation_id,
"id": last_id+1, "user_email": user_email, "first_query": first_message, "created_at": current_time,
"modified_at": current_time}]

metadata = [{
"course": course_name,
"conversation": conversation_string,
"conversation_id": conversation_id,
"id": last_id + 1,
"user_email": user_email,
"first_query": first_message,
"created_at": current_time,
"modified_at": current_time
}]

# create embeddings
embeddings_model = OpenAIEmbeddings()
embeddings = embeddings_model.embed_documents(user_queries)

# add embeddings to the project
project = atlas.AtlasProject(name=project_name, add_datums_if_exists=True)
project.add_embeddings(embeddings=np.array(embeddings), data=pd.DataFrame(metadata))
project.rebuild_maps()

except Exception as e:
# if project doesn't exist, create it
print(e)
Expand Down Expand Up @@ -155,13 +170,13 @@ def get_nomic_map(course_name: str):
# Moved this to the logging function to keep our UI fast.
# with project.wait_for_project_lock() as project:
# project.rebuild_maps()

map = project.get_map(project_name)

print(f"⏰ Nomic Full Map Retrieval: {(time.monotonic() - start_time):.2f} seconds")

return {"map_id": f"iframe{map.id}",
"map_link": map.map_link}
return {"map_id": f"iframe{map.id}", "map_link": map.map_link}


def create_nomic_map(course_name: str, log_data: list):
"""
Expand All @@ -173,14 +188,14 @@ def create_nomic_map(course_name: str, log_data: list):
print("in create_nomic_map()")
# initialize supabase
supabase_client = supabase.create_client( # type: ignore
supabase_url=os.getenv('SUPABASE_URL'), # type: ignore
supabase_key=os.getenv('SUPABASE_API_KEY')) # type: ignore
supabase_url=os.getenv('SUPABASE_URL'), # type: ignore
supabase_key=os.getenv('SUPABASE_API_KEY')) # type: ignore

# fetch all conversations with this new course (we expect <=20 conversations, because otherwise the map should be made already)
response = supabase_client.table("llm-convo-monitor").select("*").eq("course_name", course_name).execute()
data = response.data
df = pd.DataFrame(data)

if len(data) < 19:
return None
else:
Expand All @@ -197,7 +212,7 @@ def create_nomic_map(course_name: str, log_data: list):

for index, row in df.iterrows():
user_email = row['user_email']
created_at = pd.to_datetime(row['created_at']).strftime('%Y-%m-%d %H:%M:%S')
created_at = pd.to_datetime(row['created_at']).strftime('%Y-%m-%d %H:%M:%S')
convo = row['convo']
messages = convo['messages']
first_message = messages[0]['content']
Expand All @@ -206,13 +221,13 @@ def create_nomic_map(course_name: str, log_data: list):
# create metadata for multi-turn conversation
conversation = ""
if message['role'] == 'user':
emoji = "🙋 "
emoji = "🙋 "
else:
emoji = "🤖 "
emoji = "🤖 "
for message in messages:
# string of role: content, role: content, ...
conversation += "\n>>> " + emoji + message['role'] + ": " + message['content'] + "\n"

# append current chat to previous chat if convo already exists
if convo['id'] == log_conversation_id:
conversation_exists = True
Expand All @@ -225,11 +240,18 @@ def create_nomic_map(course_name: str, log_data: list):

# adding modified timestamp
current_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")

# add to metadata
metadata_row = {"course": row['course_name'], "conversation": conversation, "conversation_id": convo['id'],
"id": i, "user_email": user_email, "first_query": first_message, "created_at": created_at,
"modified_at": current_time}
metadata_row = {
"course": row['course_name'],
"conversation": conversation,
"conversation_id": convo['id'],
"id": i,
"user_email": user_email,
"first_query": first_message,
"created_at": created_at,
"modified_at": current_time
}
metadata.append(metadata_row)
i += 1

Expand All @@ -247,27 +269,40 @@ def create_nomic_map(course_name: str, log_data: list):
# adding timestamp
current_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")

metadata_row = {"course": course_name, "conversation": conversation, "conversation_id": log_conversation_id,
"id": i, "user_email": log_user_email, "first_query": log_messages[0]['content'], "created_at": current_time,
"modified_at": current_time}
metadata_row = {
"course": course_name,
"conversation": conversation,
"conversation_id": log_conversation_id,
"id": i,
"user_email": log_user_email,
"first_query": log_messages[0]['content'],
"created_at": current_time,
"modified_at": current_time
}
metadata.append(metadata_row)

print("length of metadata: ", len(metadata))
metadata = pd.DataFrame(metadata)

embeddings_model = OpenAIEmbeddings() # type: ignore
embeddings_model = OpenAIEmbeddings() # type: ignore
embeddings = embeddings_model.embed_documents(user_queries)

# create Atlas project
project_name = NOMIC_MAP_NAME_PREFIX + course_name
index_name = course_name + "_convo_index"
print("project_name: ", project_name)
project = atlas.map_embeddings(embeddings=np.array(embeddings), data=metadata, # type: ignore -- this is actually the correc type, the function signature from Nomic is incomplete
id_field='id', build_topic_model=True, topic_label_field='first_query',
name=project_name, colorable_fields=['conversation_id', 'first_query'])
project = atlas.map_embeddings(
embeddings=np.array(embeddings),
data=metadata, # type: ignore -- this is actually the correc type, the function signature from Nomic is incomplete
id_field='id',
build_topic_model=True,
topic_label_field='first_query',
name=project_name,
colorable_fields=['conversation_id', 'first_query'])
project.create_index(index_name, build_topic_model=True)
print("project: ", project)
return f"Successfully created Nomic map for {course_name}"


if __name__ == '__main__':
pass

0 comments on commit fe0de52

Please sign in to comment.