Skip to content

Commit f230d0c

Browse files
committed
added function for logging single-turn conversation
1 parent 6d21c8f commit f230d0c

File tree

3 files changed

+1635
-121
lines changed

3 files changed

+1635
-121
lines changed

ai_ta_backend/main.py

+6-23
Original file line numberDiff line numberDiff line change
@@ -396,38 +396,21 @@ def nomic_map():
396396
response.headers.add('Access-Control-Allow-Origin', '*')
397397
return response
398398

399-
@app.route('/onResponseCompletion', methods=['POST'])
399+
@app.route('/onResponseCompletion', methods=['GET'])
400400
def logToNomic():
401401
course_name: str = request.args.get('course_name', default='', type=str)
402-
#search_query: str = request.args.get('search_query', default='', type=str)
403402
conversation: str = request.args.get('conversation', default='', type=str)
404403

405404
print("In /onResponseCompletion")
406-
print("\n---------------------------------\n")
407-
408-
#print("response body: ", request.get_json())
409-
response = request.get_json()
410405

411-
# if json.loads(response) == {}:
412-
# print("response body is empty")
413-
# else:
414-
# print("response body is not empty")
415-
416-
# if json.loads(response) == {}:
417-
# # proper web error "400 Bad request"
418-
# abort(
419-
# 400,
420-
# description=
421-
# f"Missing parameters: 'response' must be provided."
422-
# )
406+
conversation_json = json.loads(conversation)
423407

424408
# background execution of tasks!!
425-
response = executor.submit(log_convo_to_nomic, response)
426-
409+
response = executor.submit(log_convo_to_nomic, course_name, conversation_json)
427410

428-
response = jsonify(response)
429-
response.headers.add('Access-Control-Allow-Origin', '*')
430-
return response
411+
#response = jsonify(response)
412+
#response.headers.add('Access-Control-Allow-Origin', '*')
413+
return "response"
431414

432415

433416
if __name__ == '__main__':

ai_ta_backend/nomic_logging.py

+73-98
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import os
22
import nomic
33
from nomic import atlas
4+
from nomic import AtlasProject
45
from langchain.embeddings import OpenAIEmbeddings
56
import numpy as np
67
import time
@@ -10,127 +11,101 @@
1011
nomic.login(os.getenv('NOMIC_API_KEY')) # login during start of flask app
1112
NOMIC_MAP_NAME_PREFIX = 'Queries for '
1213

13-
def log_query_to_nomic(course_name: str, search_query: str) -> str:
14+
def log_convo_to_nomic(course_name: str, conversation) -> str:
1415
"""
15-
Logs user query and retrieved contexts to Nomic. Must have more than 20 queries to get a map, otherwise we'll show nothing for now.
16+
Logs conversation to Nomic.
17+
1. Check if map exists for given course
18+
2. Check if conversation ID exists
19+
- if yes, delete and add new data point
20+
- if no, add new data point
21+
3. Keep current logic for map doesn't exist - update metadata
1622
"""
17-
project_name = NOMIC_MAP_NAME_PREFIX + course_name
18-
start_time = time.monotonic()
19-
20-
embeddings_model = OpenAIEmbeddings() # type: ignore
21-
embeddings = np.array(embeddings_model.embed_query(search_query)).reshape(1, 1536)
23+
print("in log_convo_to_nomic()")
24+
25+
messages = conversation['conversation']['messages']
26+
user_email = conversation['conversation']['user_email']
27+
conversation_id = conversation['conversation']['id']
2228

23-
data = [{'course_name': course_name, 'query': search_query, 'id': time.time()}]
29+
# we have to upload whole conversations
30+
# check what the fetched data looks like - pandas df or pyarrow table
31+
# check if conversation ID exists in Nomic, if yes fetch all data from it and delete it.
32+
# will have current QA and historical QA from Nomic, append new data and add_embeddings()
2433

34+
project_name = NOMIC_MAP_NAME_PREFIX + course_name
35+
start_time = time.monotonic()
36+
project_name = "Conversation Map for NCSA"
2537
try:
26-
# slow call, about 0.6 sec
27-
project = atlas.AtlasProject(name=project_name, add_datums_if_exists=True)
28-
# mostly async call (0.35 to 0.5 sec)
29-
project.add_embeddings(embeddings=embeddings, data=data)
38+
# fetch project metadata and embbeddings
39+
project = AtlasProject(name=project_name, add_datums_if_exists=True)
40+
map_metadata_df = project.maps[1].data.df
41+
map_embeddings_df = project.maps[1].embeddings.latent
42+
43+
if conversation_id in map_metadata_df.values:
44+
print("conversation_id exists")
45+
46+
# store that convo metadata locally
47+
prev_data = map_metadata_df[map_metadata_df['conversation_id'] == conversation_id]
48+
prev_index = prev_data.index.values[0]
49+
prev_convo = prev_data['conversation'].values[0]
50+
prev_id = prev_data['id'].values[0]
51+
embeddings = map_embeddings_df[prev_index-1].reshape(1, 1536)
52+
53+
# delete that convo data point from Nomic
54+
print("Prev point deleted: ", project.delete_data([prev_id]))
55+
56+
# prep for new point
57+
first_message = prev_convo.split("\n")[1].split(": ")[1]
58+
59+
# append new convo to prev convo
60+
for message in messages:
61+
prev_convo += "\n>>> " + message['role'] + ": " + message['content'] + "\n"
3062

31-
# required to keep maps fresh (or we could put on fetch side, but then our UI is slow)
32-
project.rebuild_maps()
33-
except Exception as e:
34-
# if project doesn't exist, create it
35-
result = create_nomic_map(course_name, embeddings, data)
36-
if result is None:
37-
print("Nomic map does not exist yet, probably because you have less than 20 queries on your project: ", e)
63+
# update metadata
64+
metadata = [{"course": course_name, "conversation": prev_convo, "conversation_id": conversation_id,
65+
"id": len(map_metadata_df)+1, "user_email": user_email, "first_query": first_message}]
66+
3867
else:
39-
print(f"⏰ Nomic logging runtime: {(time.monotonic() - start_time):.2f} seconds")
40-
return f"Successfully logged for {course_name}"
41-
42-
print(f"⏰ Nomic logging runtime: {(time.monotonic() - start_time):.2f} seconds")
43-
return f"Successfully logged for {course_name}"
68+
print("conversation_id does not exist")
4469

45-
def log_query_response_to_nomic(course_name: str, search_query: str, response: str) -> str:
46-
"""
47-
Logs user query and model responses to Nomic. Must have more than 20 queries to get a map,
48-
otherwise we'll show nothing for now.
49-
"""
50-
project_name = NOMIC_MAP_NAME_PREFIX + course_name
51-
start_time = time.monotonic()
70+
# add new data point
71+
user_queries = []
72+
conversation_string = ""
73+
first_message = messages[0]['content']
74+
user_queries.append(first_message)
5275

53-
embeddings_model = OpenAIEmbeddings() # type: ignore
54-
embeddings = np.array(embeddings_model.embed_query(search_query)).reshape(1, 1536)
55-
56-
data = [{'course_name': course_name, 'query': search_query, 'response': response, 'id': time.time()}]
76+
for message in messages:
77+
conversation_string += "\n>>> " + message['role'] + ": " + message['content'] + "\n"
5778

58-
try:
59-
# slow call, about 0.6 sec
60-
project = atlas.AtlasProject(name=project_name, add_datums_if_exists=True)
61-
# mostly async call (0.35 to 0.5 sec)
62-
project.add_embeddings(embeddings=embeddings, data=data)
79+
metadata = [{"course": course_name, "conversation": conversation_string, "conversation_id": conversation_id,
80+
"id": len(map_metadata_df)+1, "user_email": user_email, "first_query": first_message}]
6381

64-
# required to keep maps fresh (or we could put on fetch side, but then our UI is slow)
82+
print("metadata: ", metadata)
83+
print("user_queries: ", user_queries)
84+
print(len(metadata))
85+
print(len(user_queries))
86+
87+
# create embeddings
88+
embeddings_model = OpenAIEmbeddings()
89+
embeddings = embeddings_model.embed_documents(user_queries)
90+
91+
# add embeddings to project
92+
project = atlas.AtlasProject(name=project_name, add_datums_if_exists=True)
93+
project.add_embeddings(embeddings=np.array(embeddings), data=pd.DataFrame(metadata))
6594
project.rebuild_maps()
95+
6696
except Exception as e:
6797
# if project doesn't exist, create it
68-
result = create_nomic_map(course_name, embeddings, data)
98+
result = create_nomic_map(course_name, embeddings, pd.DataFrame(metadata))
6999
if result is None:
70100
print("Nomic map does not exist yet, probably because you have less than 20 queries on your project: ", e)
71101
else:
72102
print(f"⏰ Nomic logging runtime: {(time.monotonic() - start_time):.2f} seconds")
73103
return f"Successfully logged for {course_name}"
74-
75-
print(f"⏰ Nomic logging runtime: {(time.monotonic() - start_time):.2f} seconds")
76-
return f"Successfully logged for {course_name}"
77-
78-
def log_convo_to_nomic(response: dict) -> str:
79-
"""
80-
Logs conversation to Nomic.
81-
1. Check if map exists for given course
82-
2. Check if conversation ID exists
83-
- if yes, delete and add new data point
84-
- if no, add new data point
85-
3. Keep current logic for map doesn't exist - update metadata
86-
"""
87-
print("\n--------------------------------------------\n")
88-
print("in log_convo_to_nomic()")
89-
print("response: ", type(response))
90-
#print(response[0])
91-
print("\n--------------------------------------------\n")
92-
print(response['course_name'])
93-
for key, value in response.items():
94-
print(key + "----->" + value)
95-
print(key)
96-
print("\n--------------------------------------------\n")
97-
98-
print(response['conversation']['messages'][0]['content'])
99-
print("\n--------------------------------------------\n")
100-
101-
course_name = response['course_name']
102-
user_email = response['user_email']
103-
conversation = response['conversation']
104-
messages = conversation['messages']
105-
conversation_id = conversation['id']
106-
107-
print("course_name: ", course_name)
108-
print("user_email: ", user_email)
109-
print("conversation: ", conversation)
110-
111-
# we have to upload whole conversations
112-
113-
# check if conversation ID exists in Nomic, if yes fetch all data from it and delete it.
114-
# will have current QA and historical QA from Nomic, append new data and add_embeddings()
115-
116-
project_name = "Conversation Map for NCSA"
117-
try:
118-
project = atlas.AtlasProject(name=project_name, add_datums_if_exists=True)
119-
map = project.maps[0]
120-
data = map.data
121-
122-
print("map: ", map)
123-
print("2nd map: ", project.maps[1])
124-
print("data: ", data)
125-
126-
except Exception as e:
127-
print(e)
128-
129104

105+
print(f"⏰ Nomic logging runtime: {(time.monotonic() - start_time):.2f} seconds")
130106
return f"Successfully logged for {course_name}"
131107

132108

133-
134109
def get_nomic_map(course_name: str):
135110
"""
136111
Returns the variables necessary to construct an iframe of the Nomic map given a course name.

0 commit comments

Comments
 (0)