|
1 | 1 | import os
|
2 | 2 | import nomic
|
3 | 3 | from nomic import atlas
|
| 4 | +from nomic import AtlasProject |
4 | 5 | from langchain.embeddings import OpenAIEmbeddings
|
5 | 6 | import numpy as np
|
6 | 7 | import time
|
|
10 | 11 | nomic.login(os.getenv('NOMIC_API_KEY')) # login during start of flask app
|
11 | 12 | NOMIC_MAP_NAME_PREFIX = 'Queries for '
|
12 | 13 |
|
13 |
| -def log_query_to_nomic(course_name: str, search_query: str) -> str: |
| 14 | +def log_convo_to_nomic(course_name: str, conversation) -> str: |
14 | 15 | """
|
15 |
| - Logs user query and retrieved contexts to Nomic. Must have more than 20 queries to get a map, otherwise we'll show nothing for now. |
| 16 | + Logs conversation to Nomic. |
| 17 | + 1. Check if map exists for given course |
| 18 | + 2. Check if conversation ID exists |
| 19 | + - if yes, delete and add new data point |
| 20 | + - if no, add new data point |
| 21 | + 3. Keep current logic for map doesn't exist - update metadata |
16 | 22 | """
|
17 |
| - project_name = NOMIC_MAP_NAME_PREFIX + course_name |
18 |
| - start_time = time.monotonic() |
19 |
| - |
20 |
| - embeddings_model = OpenAIEmbeddings() # type: ignore |
21 |
| - embeddings = np.array(embeddings_model.embed_query(search_query)).reshape(1, 1536) |
| 23 | + print("in log_convo_to_nomic()") |
| 24 | + |
| 25 | + messages = conversation['conversation']['messages'] |
| 26 | + user_email = conversation['conversation']['user_email'] |
| 27 | + conversation_id = conversation['conversation']['id'] |
22 | 28 |
|
23 |
| - data = [{'course_name': course_name, 'query': search_query, 'id': time.time()}] |
| 29 | + # we have to upload whole conversations |
| 30 | + # check what the fetched data looks like - pandas df or pyarrow table |
| 31 | + # check if conversation ID exists in Nomic, if yes fetch all data from it and delete it. |
| 32 | + # will have current QA and historical QA from Nomic, append new data and add_embeddings() |
24 | 33 |
|
| 34 | + project_name = NOMIC_MAP_NAME_PREFIX + course_name |
| 35 | + start_time = time.monotonic() |
| 36 | + project_name = "Conversation Map for NCSA" |
25 | 37 | try:
|
26 |
| - # slow call, about 0.6 sec |
27 |
| - project = atlas.AtlasProject(name=project_name, add_datums_if_exists=True) |
28 |
| - # mostly async call (0.35 to 0.5 sec) |
29 |
| - project.add_embeddings(embeddings=embeddings, data=data) |
| 38 | + # fetch project metadata and embbeddings |
| 39 | + project = AtlasProject(name=project_name, add_datums_if_exists=True) |
| 40 | + map_metadata_df = project.maps[1].data.df |
| 41 | + map_embeddings_df = project.maps[1].embeddings.latent |
| 42 | + |
| 43 | + if conversation_id in map_metadata_df.values: |
| 44 | + print("conversation_id exists") |
| 45 | + |
| 46 | + # store that convo metadata locally |
| 47 | + prev_data = map_metadata_df[map_metadata_df['conversation_id'] == conversation_id] |
| 48 | + prev_index = prev_data.index.values[0] |
| 49 | + prev_convo = prev_data['conversation'].values[0] |
| 50 | + prev_id = prev_data['id'].values[0] |
| 51 | + embeddings = map_embeddings_df[prev_index-1].reshape(1, 1536) |
| 52 | + |
| 53 | + # delete that convo data point from Nomic |
| 54 | + print("Prev point deleted: ", project.delete_data([prev_id])) |
| 55 | + |
| 56 | + # prep for new point |
| 57 | + first_message = prev_convo.split("\n")[1].split(": ")[1] |
| 58 | + |
| 59 | + # append new convo to prev convo |
| 60 | + for message in messages: |
| 61 | + prev_convo += "\n>>> " + message['role'] + ": " + message['content'] + "\n" |
30 | 62 |
|
31 |
| - # required to keep maps fresh (or we could put on fetch side, but then our UI is slow) |
32 |
| - project.rebuild_maps() |
33 |
| - except Exception as e: |
34 |
| - # if project doesn't exist, create it |
35 |
| - result = create_nomic_map(course_name, embeddings, data) |
36 |
| - if result is None: |
37 |
| - print("Nomic map does not exist yet, probably because you have less than 20 queries on your project: ", e) |
| 63 | + # update metadata |
| 64 | + metadata = [{"course": course_name, "conversation": prev_convo, "conversation_id": conversation_id, |
| 65 | + "id": len(map_metadata_df)+1, "user_email": user_email, "first_query": first_message}] |
| 66 | + |
38 | 67 | else:
|
39 |
| - print(f"⏰ Nomic logging runtime: {(time.monotonic() - start_time):.2f} seconds") |
40 |
| - return f"Successfully logged for {course_name}" |
41 |
| - |
42 |
| - print(f"⏰ Nomic logging runtime: {(time.monotonic() - start_time):.2f} seconds") |
43 |
| - return f"Successfully logged for {course_name}" |
| 68 | + print("conversation_id does not exist") |
44 | 69 |
|
45 |
| -def log_query_response_to_nomic(course_name: str, search_query: str, response: str) -> str: |
46 |
| - """ |
47 |
| - Logs user query and model responses to Nomic. Must have more than 20 queries to get a map, |
48 |
| - otherwise we'll show nothing for now. |
49 |
| - """ |
50 |
| - project_name = NOMIC_MAP_NAME_PREFIX + course_name |
51 |
| - start_time = time.monotonic() |
| 70 | + # add new data point |
| 71 | + user_queries = [] |
| 72 | + conversation_string = "" |
| 73 | + first_message = messages[0]['content'] |
| 74 | + user_queries.append(first_message) |
52 | 75 |
|
53 |
| - embeddings_model = OpenAIEmbeddings() # type: ignore |
54 |
| - embeddings = np.array(embeddings_model.embed_query(search_query)).reshape(1, 1536) |
55 |
| - |
56 |
| - data = [{'course_name': course_name, 'query': search_query, 'response': response, 'id': time.time()}] |
| 76 | + for message in messages: |
| 77 | + conversation_string += "\n>>> " + message['role'] + ": " + message['content'] + "\n" |
57 | 78 |
|
58 |
| - try: |
59 |
| - # slow call, about 0.6 sec |
60 |
| - project = atlas.AtlasProject(name=project_name, add_datums_if_exists=True) |
61 |
| - # mostly async call (0.35 to 0.5 sec) |
62 |
| - project.add_embeddings(embeddings=embeddings, data=data) |
| 79 | + metadata = [{"course": course_name, "conversation": conversation_string, "conversation_id": conversation_id, |
| 80 | + "id": len(map_metadata_df)+1, "user_email": user_email, "first_query": first_message}] |
63 | 81 |
|
64 |
| - # required to keep maps fresh (or we could put on fetch side, but then our UI is slow) |
| 82 | + print("metadata: ", metadata) |
| 83 | + print("user_queries: ", user_queries) |
| 84 | + print(len(metadata)) |
| 85 | + print(len(user_queries)) |
| 86 | + |
| 87 | + # create embeddings |
| 88 | + embeddings_model = OpenAIEmbeddings() |
| 89 | + embeddings = embeddings_model.embed_documents(user_queries) |
| 90 | + |
| 91 | + # add embeddings to project |
| 92 | + project = atlas.AtlasProject(name=project_name, add_datums_if_exists=True) |
| 93 | + project.add_embeddings(embeddings=np.array(embeddings), data=pd.DataFrame(metadata)) |
65 | 94 | project.rebuild_maps()
|
| 95 | + |
66 | 96 | except Exception as e:
|
67 | 97 | # if project doesn't exist, create it
|
68 |
| - result = create_nomic_map(course_name, embeddings, data) |
| 98 | + result = create_nomic_map(course_name, embeddings, pd.DataFrame(metadata)) |
69 | 99 | if result is None:
|
70 | 100 | print("Nomic map does not exist yet, probably because you have less than 20 queries on your project: ", e)
|
71 | 101 | else:
|
72 | 102 | print(f"⏰ Nomic logging runtime: {(time.monotonic() - start_time):.2f} seconds")
|
73 | 103 | return f"Successfully logged for {course_name}"
|
74 |
| - |
75 |
| - print(f"⏰ Nomic logging runtime: {(time.monotonic() - start_time):.2f} seconds") |
76 |
| - return f"Successfully logged for {course_name}" |
77 |
| - |
78 |
| -def log_convo_to_nomic(response: dict) -> str: |
79 |
| - """ |
80 |
| - Logs conversation to Nomic. |
81 |
| - 1. Check if map exists for given course |
82 |
| - 2. Check if conversation ID exists |
83 |
| - - if yes, delete and add new data point |
84 |
| - - if no, add new data point |
85 |
| - 3. Keep current logic for map doesn't exist - update metadata |
86 |
| - """ |
87 |
| - print("\n--------------------------------------------\n") |
88 |
| - print("in log_convo_to_nomic()") |
89 |
| - print("response: ", type(response)) |
90 |
| - #print(response[0]) |
91 |
| - print("\n--------------------------------------------\n") |
92 |
| - print(response['course_name']) |
93 |
| - for key, value in response.items(): |
94 |
| - print(key + "----->" + value) |
95 |
| - print(key) |
96 |
| - print("\n--------------------------------------------\n") |
97 |
| - |
98 |
| - print(response['conversation']['messages'][0]['content']) |
99 |
| - print("\n--------------------------------------------\n") |
100 |
| - |
101 |
| - course_name = response['course_name'] |
102 |
| - user_email = response['user_email'] |
103 |
| - conversation = response['conversation'] |
104 |
| - messages = conversation['messages'] |
105 |
| - conversation_id = conversation['id'] |
106 |
| - |
107 |
| - print("course_name: ", course_name) |
108 |
| - print("user_email: ", user_email) |
109 |
| - print("conversation: ", conversation) |
110 |
| - |
111 |
| - # we have to upload whole conversations |
112 |
| - |
113 |
| - # check if conversation ID exists in Nomic, if yes fetch all data from it and delete it. |
114 |
| - # will have current QA and historical QA from Nomic, append new data and add_embeddings() |
115 |
| - |
116 |
| - project_name = "Conversation Map for NCSA" |
117 |
| - try: |
118 |
| - project = atlas.AtlasProject(name=project_name, add_datums_if_exists=True) |
119 |
| - map = project.maps[0] |
120 |
| - data = map.data |
121 |
| - |
122 |
| - print("map: ", map) |
123 |
| - print("2nd map: ", project.maps[1]) |
124 |
| - print("data: ", data) |
125 |
| - |
126 |
| - except Exception as e: |
127 |
| - print(e) |
128 |
| - |
129 | 104 |
|
| 105 | + print(f"⏰ Nomic logging runtime: {(time.monotonic() - start_time):.2f} seconds") |
130 | 106 | return f"Successfully logged for {course_name}"
|
131 | 107 |
|
132 | 108 |
|
133 |
| - |
134 | 109 | def get_nomic_map(course_name: str):
|
135 | 110 | """
|
136 | 111 | Returns the variables necessary to construct an iframe of the Nomic map given a course name.
|
|
0 commit comments