diff --git a/ai_ta_backend/nomic_logging.py b/ai_ta_backend/nomic_logging.py index f643f5b7..9e816302 100644 --- a/ai_ta_backend/nomic_logging.py +++ b/ai_ta_backend/nomic_logging.py @@ -5,6 +5,7 @@ from langchain.embeddings import OpenAIEmbeddings import numpy as np import time +import datetime import pandas as pd import supabase @@ -57,6 +58,7 @@ def log_convo_to_nomic(course_name: str, conversation) -> str: embeddings = map_embeddings_df[prev_index-1].reshape(1, 1536) prev_convo = prev_data['conversation'].values[0] prev_id = prev_data['id'].values[0] + prev_created_at = prev_data['created_at'].values[0] # delete that convo data point from Nomic project.delete_data([prev_id]) @@ -69,15 +71,21 @@ def log_convo_to_nomic(course_name: str, conversation) -> str: messages_to_be_logged = messages[-2:] for message in messages_to_be_logged: if message['role'] == 'user': - emoji = "πŸ™‹" + emoji = "πŸ™‹ " else: - emoji = "πŸ€–" + emoji = "πŸ€– " prev_convo += "\n>>> " + emoji + message['role'] + ": " + message['content'] + "\n" + # modified timestamp + current_time = time.time() + dt_object = datetime.datetime.fromtimestamp(current_time) + current_timestamp = dt_object.strftime("%Y-%m-%d %H:%M:%S.%f+00:00") + # update metadata metadata = [{"course": course_name, "conversation": prev_convo, "conversation_id": conversation_id, - "id": last_id+1, "user_email": user_email, "first_query": first_message}] + "id": last_id+1, "user_email": user_email, "first_query": first_message, "created_at": prev_created_at, + "modified_at": current_timestamp}] else: print("conversation_id does not exist") @@ -89,13 +97,19 @@ def log_convo_to_nomic(course_name: str, conversation) -> str: for message in messages: if message['role'] == 'user': - emoji = "πŸ™‹" + emoji = "πŸ™‹ " else: - emoji = "πŸ€–" + emoji = "πŸ€– " conversation_string += "\n>>> " + emoji + message['role'] + ": " + message['content'] + "\n" + # modified timestamp + current_time = time.time() + dt_object = datetime.datetime.fromtimestamp(current_time) + current_timestamp = dt_object.strftime("%Y-%m-%d %H:%M:%S.%f+00:00") + metadata = [{"course": course_name, "conversation": conversation_string, "conversation_id": conversation_id, - "id": last_id+1, "user_email": user_email, "first_query": first_message}] + "id": last_id+1, "user_email": user_email, "first_query": first_message, "created_at": current_timestamp, + "modified_at": current_timestamp}] # create embeddings embeddings_model = OpenAIEmbeddings() @@ -183,6 +197,7 @@ def create_nomic_map(course_name: str, log_data: list): for index, row in df.iterrows(): user_email = row['user_email'] + created_at = row['created_at'] convo = row['convo'] messages = convo['messages'] first_message = messages[0]['content'] @@ -191,9 +206,9 @@ def create_nomic_map(course_name: str, log_data: list): # create metadata for multi-turn conversation conversation = "" if message['role'] == 'user': - emoji = "πŸ™‹" + emoji = "πŸ™‹ " else: - emoji = "πŸ€–" + emoji = "πŸ€– " for message in messages: # string of role: content, role: content, ... conversation += "\n>>> " + emoji + message['role'] + ": " + message['content'] + "\n" @@ -202,15 +217,21 @@ def create_nomic_map(course_name: str, log_data: list): if convo['id'] == log_conversation_id: conversation_exists = True if m['role'] == 'user': - emoji = "πŸ™‹" + emoji = "πŸ™‹ " else: - emoji = "πŸ€–" + emoji = "πŸ€– " for m in log_messages: conversation += "\n>>> " + emoji + m['role'] + ": " + m['content'] + "\n" + # adding timestamp + current_time = time.time() + dt_object = datetime.datetime.fromtimestamp(current_time) + current_timestamp = dt_object.strftime("%Y-%m-%d %H:%M:%S.%f+00:00") + # add to metadata metadata_row = {"course": row['course_name'], "conversation": conversation, "conversation_id": convo['id'], - "id": i, "user_email": user_email, "first_query": first_message} + "id": i, "user_email": user_email, "first_query": first_message, "created_at": created_at, + "modified_at": current_timestamp} metadata.append(metadata_row) i += 1 @@ -220,12 +241,19 @@ def create_nomic_map(course_name: str, log_data: list): conversation = "" for message in log_messages: if message['role'] == 'user': - emoji = "πŸ™‹" + emoji = "πŸ™‹ " else: - emoji = "πŸ€–" + emoji = "πŸ€– " conversation += "\n>>> " + emoji + message['role'] + ": " + message['content'] + "\n" + + # adding timestamp + current_time = time.time() + dt_object = datetime.datetime.fromtimestamp(current_time) + current_timestamp = dt_object.strftime("%Y-%m-%d %H:%M:%S.%f+00:00") + metadata_row = {"course": course_name, "conversation": conversation, "conversation_id": log_conversation_id, - "id": i, "user_email": log_user_email, "first_query": log_messages[0]['content']} + "id": i, "user_email": log_user_email, "first_query": log_messages[0]['content'], "created_at": current_timestamp, + "modified_at": current_timestamp} metadata.append(metadata_row) print("length of metadata: ", len(metadata)) diff --git a/ai_ta_backend/nomic_map_creation.ipynb b/ai_ta_backend/nomic_map_creation.ipynb index aeecd4bd..c11b80db 100644 --- a/ai_ta_backend/nomic_map_creation.ipynb +++ b/ai_ta_backend/nomic_map_creation.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 6, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ @@ -20,7 +20,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 18, "metadata": {}, "outputs": [ { @@ -29,7 +29,7 @@ "True" ] }, - "execution_count": 7, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -43,7 +43,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 19, "metadata": {}, "outputs": [], "source": [ @@ -57,7 +57,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -81,85 +81,310 @@ " \n", " \n", " \n", + " id\n", + " created_at\n", + " convo\n", + " convo_id\n", " course_name\n", + " user_email\n", " \n", " \n", " \n", " \n", " 0\n", - " gpt4\n", + " 5193\n", + " 2023-09-06T23:21:36.639848+00:00\n", + " {'id': 'd6e83182-56d8-4151-b84e-a09dd84b8836',...\n", + " d6e83182-56d8-4151-b84e-a09dd84b8836\n", + " cropwizard-beta\n", + " dabholkar.asmita@gmail.com\n", " \n", " \n", " 1\n", - " gpt4\n", + " 5192\n", + " 2023-09-06T23:04:50.982857+00:00\n", + " {'id': '1390b226-2115-4078-a594-ed4bad0fb4e0',...\n", + " 1390b226-2115-4078-a594-ed4bad0fb4e0\n", + " cropwizard-beta\n", + " kastanday@live.com\n", " \n", " \n", " 2\n", - " gpt4\n", + " 5174\n", + " 2023-09-06T22:22:44.107536+00:00\n", + " {'id': 'fca0cf0b-6bac-4cf6-bd4d-d3501827c947',...\n", + " fca0cf0b-6bac-4cf6-bd4d-d3501827c947\n", + " cropwizard-beta\n", + " dabholkar.asmita@gmail.com\n", " \n", " \n", " 3\n", - " gpt4\n", + " 5184\n", + " 2023-09-06T23:01:06.796384+00:00\n", + " {'id': '1572f8a1-9ceb-42f7-b39f-fe95ec2b3c0e',...\n", + " 1572f8a1-9ceb-42f7-b39f-fe95ec2b3c0e\n", + " cropwizard-beta\n", + " dabholkar.asmita@gmail.com\n", " \n", " \n", " 4\n", - " gpt4\n", + " 5182\n", + " 2023-09-06T22:58:21.66316+00:00\n", + " {'id': '0c9d9873-2c52-4b12-90ec-d4a495cbf4e0',...\n", + " 0c9d9873-2c52-4b12-90ec-d4a495cbf4e0\n", + " cropwizard-beta\n", + " dabholkar.asmita@gmail.com\n", + " \n", + " \n", + " 5\n", + " 5194\n", + " 2023-09-06T23:22:06.786717+00:00\n", + " {'id': 'd51bbdd8-c5c8-4e5b-a003-556a8ac74726',...\n", + " d51bbdd8-c5c8-4e5b-a003-556a8ac74726\n", + " cropwizard-beta\n", + " avd6@illinois.edu\n", + " \n", + " \n", + " 6\n", + " 5113\n", + " 2023-09-05T21:34:17.944623+00:00\n", + " {'id': '2a39551b-7b6c-4ba0-aa77-ffc9b1316718',...\n", + " 2a39551b-7b6c-4ba0-aa77-ffc9b1316718\n", + " cropwizard-beta\n", + " hetarth2@illinois.edu\n", + " \n", + " \n", + " 7\n", + " 5333\n", + " 2023-09-07T22:25:52.858867+00:00\n", + " {'id': 'bd3c9174-4512-4ac6-ae05-8aafa4099215',...\n", + " bd3c9174-4512-4ac6-ae05-8aafa4099215\n", + " cropwizard-beta\n", + " dabholkar.asmita@gmail.com\n", + " \n", + " \n", + " 8\n", + " 5108\n", + " 2023-09-05T21:11:51.289178+00:00\n", + " {'id': 'a49941c4-2049-478b-b43e-9c574cadbd05',...\n", + " a49941c4-2049-478b-b43e-9c574cadbd05\n", + " cropwizard-beta\n", + " vadve@illinois.edu\n", + " \n", + " \n", + " 9\n", + " 5104\n", + " 2023-09-05T20:38:27.687893+00:00\n", + " {'id': '1be7956c-d5c8-4b1f-a62f-145421a2e7f7',...\n", + " 1be7956c-d5c8-4b1f-a62f-145421a2e7f7\n", + " cropwizard-beta\n", + " kastanday@live.com\n", + " \n", + " \n", + " 10\n", + " 5103\n", + " 2023-09-05T20:11:51.810222+00:00\n", + " {'id': '20e6e160-6fd3-4e0a-82b7-98457f2ff1c3',...\n", + " 20e6e160-6fd3-4e0a-82b7-98457f2ff1c3\n", + " cropwizard-beta\n", + " kastanday@live.com\n", + " \n", + " \n", + " 11\n", + " 5098\n", + " 2023-09-05T19:17:36.190976+00:00\n", + " {'id': 'fbd19bd0-373b-4a9c-b58c-a47cbd4e9e1c',...\n", + " fbd19bd0-373b-4a9c-b58c-a47cbd4e9e1c\n", + " cropwizard-beta\n", + " dabholkar.asmita@gmail.com\n", + " \n", + " \n", + " 12\n", + " 5042\n", + " 2023-09-02T14:43:02.160608+00:00\n", + " {'id': '8df8a436-adbf-441a-92ec-f6d926aee789',...\n", + " 8df8a436-adbf-441a-92ec-f6d926aee789\n", + " cropwizard-beta\n", + " None\n", " \n", " \n", - " ...\n", - " ...\n", + " 13\n", + " 5009\n", + " 2023-09-01T18:26:57.628766+00:00\n", + " {'id': 'b401031c-5540-43fc-baff-7c6db90ff3ae',...\n", + " b401031c-5540-43fc-baff-7c6db90ff3ae\n", + " cropwizard-beta\n", + " rohan13@illinois.edu\n", " \n", " \n", - " 1789\n", - " FIN574-GT\n", + " 14\n", + " 5006\n", + " 2023-09-01T18:06:33.350981+00:00\n", + " {'id': 'c42e70a8-03c0-4c9e-ac7f-ee718b4ac968',...\n", + " c42e70a8-03c0-4c9e-ac7f-ee718b4ac968\n", + " cropwizard-beta\n", + " hetarth2@illinois.edu\n", " \n", " \n", - " 1790\n", - " NCSA\n", + " 15\n", + " 5045\n", + " 2023-09-03T20:07:34.895841+00:00\n", + " {'id': 'cbecc7f4-a94c-49d6-ae1a-e42f20136676',...\n", + " cbecc7f4-a94c-49d6-ae1a-e42f20136676\n", + " cropwizard-beta\n", + " None\n", " \n", " \n", - " 1791\n", - " gpt4\n", + " 16\n", + " 5014\n", + " 2023-09-01T18:36:04.202716+00:00\n", + " {'id': '3303ec4f-84d5-4eec-a4b3-af020e62b79d',...\n", + " 3303ec4f-84d5-4eec-a4b3-af020e62b79d\n", + " cropwizard-beta\n", + " vadve@illinois.edu\n", " \n", " \n", - " 1792\n", - " NCSA\n", + " 17\n", + " 5013\n", + " 2023-09-01T18:35:32.12103+00:00\n", + " {'id': 'fc91f681-54e3-4df5-8fd4-952d02b8feba',...\n", + " fc91f681-54e3-4df5-8fd4-952d02b8feba\n", + " cropwizard-beta\n", + " rohan13@illinois.edu\n", " \n", " \n", - " 1793\n", - " NCSA\n", + " 18\n", + " 5011\n", + " 2023-09-01T18:28:24.588312+00:00\n", + " {'id': '5c2d89b3-3e41-4a8a-84be-f93c7b8127c3',...\n", + " 5c2d89b3-3e41-4a8a-84be-f93c7b8127c3\n", + " cropwizard-beta\n", + " rohan13@illinois.edu\n", + " \n", + " \n", + " 19\n", + " 5007\n", + " 2023-09-01T18:15:08.636935+00:00\n", + " {'id': 'd7191543-c517-4007-a2fb-ae28660ef37c',...\n", + " d7191543-c517-4007-a2fb-ae28660ef37c\n", + " cropwizard-beta\n", + " None\n", + " \n", + " \n", + " 20\n", + " 5004\n", + " 2023-09-01T16:28:23.202471+00:00\n", + " {'id': 'edabed51-e581-48f7-865c-89a4b1d9d120',...\n", + " edabed51-e581-48f7-865c-89a4b1d9d120\n", + " cropwizard-beta\n", + " dabholkar.asmita@gmail.com\n", " \n", " \n", "\n", - "

1794 rows Γ— 1 columns

\n", "" ], "text/plain": [ - " course_name\n", - "0 gpt4\n", - "1 gpt4\n", - "2 gpt4\n", - "3 gpt4\n", - "4 gpt4\n", - "... ...\n", - "1789 FIN574-GT\n", - "1790 NCSA\n", - "1791 gpt4\n", - "1792 NCSA\n", - "1793 NCSA\n", + " id created_at \\\n", + "0 5193 2023-09-06T23:21:36.639848+00:00 \n", + "1 5192 2023-09-06T23:04:50.982857+00:00 \n", + "2 5174 2023-09-06T22:22:44.107536+00:00 \n", + "3 5184 2023-09-06T23:01:06.796384+00:00 \n", + "4 5182 2023-09-06T22:58:21.66316+00:00 \n", + "5 5194 2023-09-06T23:22:06.786717+00:00 \n", + "6 5113 2023-09-05T21:34:17.944623+00:00 \n", + "7 5333 2023-09-07T22:25:52.858867+00:00 \n", + "8 5108 2023-09-05T21:11:51.289178+00:00 \n", + "9 5104 2023-09-05T20:38:27.687893+00:00 \n", + "10 5103 2023-09-05T20:11:51.810222+00:00 \n", + "11 5098 2023-09-05T19:17:36.190976+00:00 \n", + "12 5042 2023-09-02T14:43:02.160608+00:00 \n", + "13 5009 2023-09-01T18:26:57.628766+00:00 \n", + "14 5006 2023-09-01T18:06:33.350981+00:00 \n", + "15 5045 2023-09-03T20:07:34.895841+00:00 \n", + "16 5014 2023-09-01T18:36:04.202716+00:00 \n", + "17 5013 2023-09-01T18:35:32.12103+00:00 \n", + "18 5011 2023-09-01T18:28:24.588312+00:00 \n", + "19 5007 2023-09-01T18:15:08.636935+00:00 \n", + "20 5004 2023-09-01T16:28:23.202471+00:00 \n", + "\n", + " convo \\\n", + "0 {'id': 'd6e83182-56d8-4151-b84e-a09dd84b8836',... \n", + "1 {'id': '1390b226-2115-4078-a594-ed4bad0fb4e0',... \n", + "2 {'id': 'fca0cf0b-6bac-4cf6-bd4d-d3501827c947',... \n", + "3 {'id': '1572f8a1-9ceb-42f7-b39f-fe95ec2b3c0e',... \n", + "4 {'id': '0c9d9873-2c52-4b12-90ec-d4a495cbf4e0',... \n", + "5 {'id': 'd51bbdd8-c5c8-4e5b-a003-556a8ac74726',... \n", + "6 {'id': '2a39551b-7b6c-4ba0-aa77-ffc9b1316718',... \n", + "7 {'id': 'bd3c9174-4512-4ac6-ae05-8aafa4099215',... \n", + "8 {'id': 'a49941c4-2049-478b-b43e-9c574cadbd05',... \n", + "9 {'id': '1be7956c-d5c8-4b1f-a62f-145421a2e7f7',... \n", + "10 {'id': '20e6e160-6fd3-4e0a-82b7-98457f2ff1c3',... \n", + "11 {'id': 'fbd19bd0-373b-4a9c-b58c-a47cbd4e9e1c',... \n", + "12 {'id': '8df8a436-adbf-441a-92ec-f6d926aee789',... \n", + "13 {'id': 'b401031c-5540-43fc-baff-7c6db90ff3ae',... \n", + "14 {'id': 'c42e70a8-03c0-4c9e-ac7f-ee718b4ac968',... \n", + "15 {'id': 'cbecc7f4-a94c-49d6-ae1a-e42f20136676',... \n", + "16 {'id': '3303ec4f-84d5-4eec-a4b3-af020e62b79d',... \n", + "17 {'id': 'fc91f681-54e3-4df5-8fd4-952d02b8feba',... \n", + "18 {'id': '5c2d89b3-3e41-4a8a-84be-f93c7b8127c3',... \n", + "19 {'id': 'd7191543-c517-4007-a2fb-ae28660ef37c',... \n", + "20 {'id': 'edabed51-e581-48f7-865c-89a4b1d9d120',... \n", "\n", - "[1794 rows x 1 columns]" + " convo_id course_name \\\n", + "0 d6e83182-56d8-4151-b84e-a09dd84b8836 cropwizard-beta \n", + "1 1390b226-2115-4078-a594-ed4bad0fb4e0 cropwizard-beta \n", + "2 fca0cf0b-6bac-4cf6-bd4d-d3501827c947 cropwizard-beta \n", + "3 1572f8a1-9ceb-42f7-b39f-fe95ec2b3c0e cropwizard-beta \n", + "4 0c9d9873-2c52-4b12-90ec-d4a495cbf4e0 cropwizard-beta \n", + "5 d51bbdd8-c5c8-4e5b-a003-556a8ac74726 cropwizard-beta \n", + "6 2a39551b-7b6c-4ba0-aa77-ffc9b1316718 cropwizard-beta \n", + "7 bd3c9174-4512-4ac6-ae05-8aafa4099215 cropwizard-beta \n", + "8 a49941c4-2049-478b-b43e-9c574cadbd05 cropwizard-beta \n", + "9 1be7956c-d5c8-4b1f-a62f-145421a2e7f7 cropwizard-beta \n", + "10 20e6e160-6fd3-4e0a-82b7-98457f2ff1c3 cropwizard-beta \n", + "11 fbd19bd0-373b-4a9c-b58c-a47cbd4e9e1c cropwizard-beta \n", + "12 8df8a436-adbf-441a-92ec-f6d926aee789 cropwizard-beta \n", + "13 b401031c-5540-43fc-baff-7c6db90ff3ae cropwizard-beta \n", + "14 c42e70a8-03c0-4c9e-ac7f-ee718b4ac968 cropwizard-beta \n", + "15 cbecc7f4-a94c-49d6-ae1a-e42f20136676 cropwizard-beta \n", + "16 3303ec4f-84d5-4eec-a4b3-af020e62b79d cropwizard-beta \n", + "17 fc91f681-54e3-4df5-8fd4-952d02b8feba cropwizard-beta \n", + "18 5c2d89b3-3e41-4a8a-84be-f93c7b8127c3 cropwizard-beta \n", + "19 d7191543-c517-4007-a2fb-ae28660ef37c cropwizard-beta \n", + "20 edabed51-e581-48f7-865c-89a4b1d9d120 cropwizard-beta \n", + "\n", + " user_email \n", + "0 dabholkar.asmita@gmail.com \n", + "1 kastanday@live.com \n", + "2 dabholkar.asmita@gmail.com \n", + "3 dabholkar.asmita@gmail.com \n", + "4 dabholkar.asmita@gmail.com \n", + "5 avd6@illinois.edu \n", + "6 hetarth2@illinois.edu \n", + "7 dabholkar.asmita@gmail.com \n", + "8 vadve@illinois.edu \n", + "9 kastanday@live.com \n", + "10 kastanday@live.com \n", + "11 dabholkar.asmita@gmail.com \n", + "12 None \n", + "13 rohan13@illinois.edu \n", + "14 hetarth2@illinois.edu \n", + "15 None \n", + "16 vadve@illinois.edu \n", + "17 rohan13@illinois.edu \n", + "18 rohan13@illinois.edu \n", + "19 None \n", + "20 dabholkar.asmita@gmail.com " ] }, - "execution_count": 21, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# query data for one course for testing\n", - "course = 'ece120'\n", - "response = supabase_client.table(\"llm-convo-monitor\").select(\"course_name\", count='exact').execute()\n", + "course = 'cropwizard-beta'\n", + "response = supabase_client.table(\"llm-convo-monitor\").select(\"*\").eq('course_name', course).execute()\n", "data = response.data\n", "df = pd.DataFrame(data)\n", "df" @@ -167,340 +392,276 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 8, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "id int64\n", + "created_at object\n", + "convo object\n", + "convo_id object\n", + "course_name object\n", + "user_email object\n", + "dtype: object\n", + "id int64\n", + "created_at datetime64[ns, UTC]\n", + "convo object\n", + "convo_id object\n", + "course_name object\n", + "user_email object\n", + "dtype: object\n" + ] + } + ], "source": [ - "course_list = df['course_name'].unique()\n" + "\n" ] }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 9, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idcreated_atconvoconvo_idcourse_nameuser_email
050042023-09-01 16:28:23.202471+00:00{'id': 'edabed51-e581-48f7-865c-89a4b1d9d120',...edabed51-e581-48f7-865c-89a4b1d9d120cropwizard-betadabholkar.asmita@gmail.com
150062023-09-01 18:06:33.350981+00:00{'id': 'c42e70a8-03c0-4c9e-ac7f-ee718b4ac968',...c42e70a8-03c0-4c9e-ac7f-ee718b4ac968cropwizard-betahetarth2@illinois.edu
250072023-09-01 18:15:08.636935+00:00{'id': 'd7191543-c517-4007-a2fb-ae28660ef37c',...d7191543-c517-4007-a2fb-ae28660ef37ccropwizard-betaNone
350112023-09-01 18:28:24.588312+00:00{'id': '5c2d89b3-3e41-4a8a-84be-f93c7b8127c3',...5c2d89b3-3e41-4a8a-84be-f93c7b8127c3cropwizard-betarohan13@illinois.edu
450132023-09-01 18:35:32.121030+00:00{'id': 'fc91f681-54e3-4df5-8fd4-952d02b8feba',...fc91f681-54e3-4df5-8fd4-952d02b8febacropwizard-betarohan13@illinois.edu
\n", + "
" + ], + "text/plain": [ + " id created_at \\\n", + "0 5004 2023-09-01 16:28:23.202471+00:00 \n", + "1 5006 2023-09-01 18:06:33.350981+00:00 \n", + "2 5007 2023-09-01 18:15:08.636935+00:00 \n", + "3 5011 2023-09-01 18:28:24.588312+00:00 \n", + "4 5013 2023-09-01 18:35:32.121030+00:00 \n", + "\n", + " convo \\\n", + "0 {'id': 'edabed51-e581-48f7-865c-89a4b1d9d120',... \n", + "1 {'id': 'c42e70a8-03c0-4c9e-ac7f-ee718b4ac968',... \n", + "2 {'id': 'd7191543-c517-4007-a2fb-ae28660ef37c',... \n", + "3 {'id': '5c2d89b3-3e41-4a8a-84be-f93c7b8127c3',... \n", + "4 {'id': 'fc91f681-54e3-4df5-8fd4-952d02b8feba',... \n", + "\n", + " convo_id course_name \\\n", + "0 edabed51-e581-48f7-865c-89a4b1d9d120 cropwizard-beta \n", + "1 c42e70a8-03c0-4c9e-ac7f-ee718b4ac968 cropwizard-beta \n", + "2 d7191543-c517-4007-a2fb-ae28660ef37c cropwizard-beta \n", + "3 5c2d89b3-3e41-4a8a-84be-f93c7b8127c3 cropwizard-beta \n", + "4 fc91f681-54e3-4df5-8fd4-952d02b8feba cropwizard-beta \n", + "\n", + " user_email \n", + "0 dabholkar.asmita@gmail.com \n", + "1 hetarth2@illinois.edu \n", + "2 None \n", + "3 rohan13@illinois.edu \n", + "4 rohan13@illinois.edu " + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": 10, "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Timestamp('2023-09-01 16:28:23.202471+0000', tz='UTC')" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "## Mapping Conversations to Nomic" + "df.iloc[0]['created_at']" ] }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "course_list = df['course_name'].unique()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "gpt4\n", - "623\n", - "badm_550_ashley\n", - "17\n", - "None\n", - "0\n", - "ece120\n", - "154\n", - "test-video-ingest\n", - "13\n", - "badm-567-v3\n", - "15\n", - "badm-567\n", - "3\n", - "new-weather\n", - "65\n", - "gies-online-mba-v2\n", - "course_name: gies-online-mba-v2\n", - "error: The read operation timed out\n", - "frontend\n", - "8\n", - "test-video-ingest-28\n", - "2\n", - "ECE220FA23\n", - "74\n", - "ECE408FA23\n", - "259\n", - "pdeploy999\n", - "2\n", - "badm-350-summer\n", - "5\n", - "previewtesting1\n", - "1\n", - "localtest2\n", - "2\n", - "your-favorite-url\n", - "1\n", - "mantine\n", - "6\n", - "ece408\n", - "27\n", - "27\n", - "27\n", - "(27, 1536)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-09-12 15:42:49.002 | INFO | nomic.project:_create_project:790 - Creating project `Conversation Map for ece408` in organization `dabholkar.asmita`\n", - "2023-09-12 15:42:50.721 | INFO | nomic.atlas:map_embeddings:108 - Uploading embeddings to Atlas.\n", - "2023-09-12 15:42:50.721 | WARNING | nomic.project:_validate_and_correct_arrow_upload:238 - Replacing 20 null values for field user_email with string 'null'. This behavior will change in a future version.\n", - "2023-09-12 15:42:50.721 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", - "1it [00:02, 2.16s/it]\n", - "2023-09-12 15:42:52.900 | INFO | nomic.project:_add_data:1422 - Upload succeeded.\n", - "2023-09-12 15:42:52.908 | INFO | nomic.atlas:map_embeddings:127 - Embedding upload succeeded.\n", - "2023-09-12 15:42:56.033 | INFO | nomic.project:create_index:1132 - Created map `Conversation Map for ece408` in project `Conversation Map for ece408`: https://atlas.nomic.ai/map/df8e3337-396a-443d-a6f5-8240c66024ac/bc754afd-83fb-43cb-99db-e2bd26f1f40b\n", - "2023-09-12 15:42:56.046 | INFO | nomic.atlas:map_embeddings:140 - Conversation Map for ece408: https://atlas.nomic.ai/map/df8e3337-396a-443d-a6f5-8240c66024ac/bc754afd-83fb-43cb-99db-e2bd26f1f40b\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Conversation Map for ece408: https://atlas.nomic.ai/map/df8e3337-396a-443d-a6f5-8240c66024ac/bc754afd-83fb-43cb-99db-e2bd26f1f40b]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-09-12 15:43:00.951 | INFO | nomic.project:create_index:1132 - Created map `ece408_convo_index` in project `Conversation Map for ece408`: https://atlas.nomic.ai/map/df8e3337-396a-443d-a6f5-8240c66024ac/49bd2ab9-db8a-45ab-b399-5039c7b7e736\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "test-video-ingest-2\n", - "2\n", - "Snowmass\n", - "19\n", - "badm_567_v2\n", - "11\n", - "erpnext\n", - "1\n", - "mip\n", - "1\n", - "farmdoc_test_kastan-v1\n", - "9\n", - "personalstatement\n", - "2\n", - "test-canvas\n", - "4\n", - "hrc\n", - "3\n", - "csv\n", - "4\n", - "star_nox\n", - "3\n", - "badm_567\n", - "3\n", - "SPaRCEd\n", - "2\n", - "NPRE247\n", - "13\n", - "localdemo8\n", - "2\n", - "badm_567_thumbnails\n", - "2\n", - "your-awesome-course\n", - "course_name: your-awesome-course\n", - "error: The read operation timed out\n", - "chatbot\n", - "3\n", - "erp\n", - "2\n", - "extreme\n", - "3\n", - "rohan_atree\n", - "4\n", - "zotero-extreme\n", - "9\n", - "pract\n", - "18\n", - "test-video-ingest-20\n", - "3\n", - "gies-online-mba2\n", - "2\n", - "gies-online-mba\n", - "3\n", - "ece120FL22\n", - "15\n", - "careerassistant\n", - "7\n", - "weather\n", - "4\n", - "lillian-wang-blog\n", - "2\n", - "local-test5\n", - "4\n", - "demo-for-vyriad\n", - "6\n", - "ag-gpt-beta\n", - "5\n", - "rohan_atree_demo\n", - "2\n", - "cropwizard\n", - "25\n", - "25\n", - "25\n", - "(25, 1536)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-09-12 15:43:49.933 | INFO | nomic.project:_create_project:790 - Creating project `Conversation Map for cropwizard` in organization `dabholkar.asmita`\n", - "2023-09-12 15:43:50.980 | INFO | nomic.atlas:map_embeddings:108 - Uploading embeddings to Atlas.\n", - "2023-09-12 15:43:50.980 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", - "1it [00:03, 3.94s/it]\n", - "2023-09-12 15:43:54.938 | INFO | nomic.project:_add_data:1422 - Upload succeeded.\n", - "2023-09-12 15:43:54.953 | INFO | nomic.atlas:map_embeddings:127 - Embedding upload succeeded.\n", - "2023-09-12 15:43:58.534 | INFO | nomic.project:create_index:1132 - Created map `Conversation Map for cropwizard` in project `Conversation Map for cropwizard`: https://atlas.nomic.ai/map/c9b13bcc-d0cb-40a6-80c6-3e98b1bf0bda/53b8076a-7f80-455f-abea-2cf84bc1912c\n", - "2023-09-12 15:43:58.534 | INFO | nomic.atlas:map_embeddings:140 - Conversation Map for cropwizard: https://atlas.nomic.ai/map/c9b13bcc-d0cb-40a6-80c6-3e98b1bf0bda/53b8076a-7f80-455f-abea-2cf84bc1912c\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Conversation Map for cropwizard: https://atlas.nomic.ai/map/c9b13bcc-d0cb-40a6-80c6-3e98b1bf0bda/53b8076a-7f80-455f-abea-2cf84bc1912c]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-09-12 15:44:00.949 | INFO | nomic.project:create_index:1132 - Created map `cropwizard_convo_index` in project `Conversation Map for cropwizard`: https://atlas.nomic.ai/map/c9b13bcc-d0cb-40a6-80c6-3e98b1bf0bda/5eb008c1-5a10-4f20-ab7d-c42a238e1595\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "rtest\n", - "1\n", - "previewdeploy\n", - "3\n", - "r2test\n", - "1\n", - "Law794-TransactionalDraftingAlam\n", - "4\n", - "personal-statement\n", - "2\n", - "rohan_excel\n", - "1\n", - "langchain-python\n", - "1\n", - "langchain\n", - "4\n", - "ncsa-live-demo\n", - "1\n", - "rohan_atree_individual\n", - "2\n", - "meta11-test\n", - "14\n", - "ceesd-mirgecom\n", - "2\n", - "NCSADelta\n", - "10\n", - "HealthyLivingGuide\n", - "3\n", - "rohan\n", - "2\n", - "NCSA\n", - "40\n", - "40\n", - "40\n", - "(40, 1536)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-09-12 15:44:35.409 | INFO | nomic.project:_create_project:790 - Creating project `Conversation Map for NCSA` in organization `dabholkar.asmita`\n", - "2023-09-12 15:44:36.768 | INFO | nomic.atlas:map_embeddings:108 - Uploading embeddings to Atlas.\n", - "2023-09-12 15:44:36.778 | WARNING | nomic.project:_validate_and_correct_arrow_upload:238 - Replacing 10 null values for field user_email with string 'null'. This behavior will change in a future version.\n", - "2023-09-12 15:44:36.783 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", - "1it [00:01, 1.99s/it]\n", - "2023-09-12 15:44:38.783 | INFO | nomic.project:_add_data:1422 - Upload succeeded.\n", - "2023-09-12 15:44:38.784 | INFO | nomic.atlas:map_embeddings:127 - Embedding upload succeeded.\n", - "2023-09-12 15:44:40.137 | INFO | nomic.project:create_index:1132 - Created map `Conversation Map for NCSA` in project `Conversation Map for NCSA`: https://atlas.nomic.ai/map/d2aef24e-2ea4-4712-87c0-804da0ab96b0/7b2238ae-7eb9-407a-ac60-6d5a8fd1f447\n", - "2023-09-12 15:44:40.146 | INFO | nomic.atlas:map_embeddings:140 - Conversation Map for NCSA: https://atlas.nomic.ai/map/d2aef24e-2ea4-4712-87c0-804da0ab96b0/7b2238ae-7eb9-407a-ac60-6d5a8fd1f447\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Conversation Map for NCSA: https://atlas.nomic.ai/map/d2aef24e-2ea4-4712-87c0-804da0ab96b0/7b2238ae-7eb9-407a-ac60-6d5a8fd1f447]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-09-12 15:44:45.686 | INFO | nomic.project:create_index:1132 - Created map `NCSA_convo_index` in project `Conversation Map for NCSA`: https://atlas.nomic.ai/map/d2aef24e-2ea4-4712-87c0-804da0ab96b0/331ba551-f6b4-4c79-a31c-4cc5390bfac7\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "FIN574-GT\n", - "24\n", - "24\n", - "24\n", - "(24, 1536)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-09-12 15:45:00.655 | INFO | nomic.project:_create_project:790 - Creating project `Conversation Map for FIN574-GT` in organization `dabholkar.asmita`\n", - "2023-09-12 15:45:04.369 | INFO | nomic.atlas:map_embeddings:108 - Uploading embeddings to Atlas.\n", - "2023-09-12 15:45:04.385 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", - "1it [00:06, 6.08s/it]\n", - "2023-09-12 15:45:10.475 | INFO | nomic.project:_add_data:1422 - Upload succeeded.\n", - "2023-09-12 15:45:10.475 | INFO | nomic.atlas:map_embeddings:127 - Embedding upload succeeded.\n", - "2023-09-12 15:45:13.721 | INFO | nomic.project:create_index:1132 - Created map `Conversation Map for FIN574-GT` in project `Conversation Map for FIN574-GT`: https://atlas.nomic.ai/map/d83f5440-9ef1-45ed-a2e5-c3229398b0e8/149f6eab-f636-4754-8117-2da7f030c5b3\n", - "2023-09-12 15:45:13.723 | INFO | nomic.atlas:map_embeddings:140 - Conversation Map for FIN574-GT: https://atlas.nomic.ai/map/d83f5440-9ef1-45ed-a2e5-c3229398b0e8/149f6eab-f636-4754-8117-2da7f030c5b3\n" + "21\n", + "21\n" ] - }, + } + ], + "source": [ + "import datetime\n", + "\n", + "metadata = []\n", + "user_queries = []\n", + "i = 1\n", + "\n", + "# log conversation instead of individual messages\n", + "for index, row in df.iterrows():\n", + " user_email = row['user_email']\n", + " created_at = row['created_at']\n", + "\n", + " current_time = time.time()\n", + " dt_object = datetime.datetime.fromtimestamp(current_time)\n", + " formatted_timestamp = dt_object.strftime(\"%Y-%m-%d %H:%M:%S.%f+00:00\")\n", + "\n", + "\n", + "\n", + " convo = row['convo']\n", + " messages = convo['messages']\n", + " first_message = messages[0]['content']\n", + " \n", + " user_queries.append(first_message)\n", + " # create metadata for multi-turn conversation\n", + " conversation = \"\"\n", + " for message in messages:\n", + " # string of role: content, role: content, ...\n", + " if message['role'] == 'user':\n", + " emoji = \"πŸ™‹\"\n", + " else:\n", + " emoji = \"πŸ€–\"\n", + " conversation += \"\\n>>> \" + emoji + message['role'] + \": \" + message['content'] + \"\\n\"\n", + " # add to metadata\n", + " metadata_row = {\"course\": row['course_name'], \"conversation\": conversation, \"conversation_id\": convo['id'], \n", + " \"id\": i, \"user_email\": user_email, \"first_query\": first_message, \"created_at\": created_at,\n", + " \"modified_at\": formatted_timestamp}\n", + " metadata.append(metadata_row)\n", + " i += 1\n", + "\n", + "print(len(user_queries))\n", + "print(len(metadata))" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[Conversation Map for FIN574-GT: https://atlas.nomic.ai/map/d83f5440-9ef1-45ed-a2e5-c3229398b0e8/149f6eab-f636-4754-8117-2da7f030c5b3]\n" + "{'course': 'cropwizard-beta', 'conversation': \"\\n>>> πŸ™‹user: What is the best approach to prevent weeds in my soybean field?\\n\\n>>> πŸ€–assistant: Weed management in soybean fields involves a combination of cultural, mechanical, and chemical strategies. Here are some recommended practices:\\n\\n1. **Crop Rotation**: Rotating crops can help reduce the presence of certain weed species that are more prevalent in soybean fields. \\n\\n2. **Cover Crops**: Planting cover crops can suppress weed growth by competing for sunlight, water, and nutrients. \\n\\n3. **Tillage**: Tillage can be used to disrupt weed growth. However, it should be used judiciously to prevent soil erosion.\\n\\n4. **Planting Date and Seeding Rate**: Planting soybeans early and at a higher seeding rate can help the crop compete better against weeds.\\n\\n5. **Herbicide Application**: Use pre-emergent and post-emergent herbicides. Pre-emergent herbicides are applied before the weeds germinate, while post-emergent herbicides are applied after the weeds have sprouted. \\n\\n6. **Herbicide Rotation**: Rotate herbicides with different modes of action to prevent weeds from developing resistance.\\n\\n7. **Scouting and Monitoring**: Regularly scout and monitor your fields to identify weed problems early and respond quickly.\\n\\n8. **Clean Equipment**: Clean your farming equipment to prevent the spread of weed seeds from one field to another.\\n\\nRemember, the most effective weed management strategies often involve a combination of these practices. It's also essential to consider the specific types of weeds you're dealing with, as different species may require different control methods.\\n\\n>>> πŸ™‹user: What is the best approach to prevent weeds in my soybean field?\\n\\n\\n\\n\\n>>> πŸ€–assistant: Managing weeds in soybean fields requires a comprehensive and integrated approach. Here are some best practices:\\n\\n1. **Crop Rotation**: This practice can help break the life cycle of certain weeds. By alternating the types of crops in each field, you can naturally reduce the presence of weeds that thrive in soybean fields.\\n\\n2. **Cover Crops**: Planting cover crops in the off-season can suppress weed growth by outcompeting them for resources such as sunlight, water, and nutrients.\\n\\n3. **Tillage**: Tillage can help control weeds by physically removing or burying them. However, excessive tillage can lead to soil erosion, so it should be used judiciously.\\n\\n4. **Planting Date and Seeding Rate**: Planting soybeans early and at a higher seeding rate can help the crop establish itself and compete more effectively against weeds.\\n\\n5. **Herbicide Application**: Use pre-emergence and post-emergence herbicides to control weeds. Pre-emergence herbicides are used before the weeds germinate, while post-emergence herbicides are used after the weeds have sprouted. \\n\\n6. **Herbicide Rotation**: To prevent herbicide resistance, rotate the types of herbicides used, ensuring they have different modes of action.\\n\\n7. **Regular Scouting**: Regularly inspect your fields to identify weed problems early. This allows for timely intervention and can prevent the spread of weeds.\\n\\n8. **Clean Equipment**: Ensure that your farming equipment is cleaned regularly to prevent the spread of weed seeds from one field to another.\\n\\nRemember, the most effective weed management strategies often involve a combination of these practices. Tailor your approach based on the specific types of weeds prevalent in your field, as different species may require different control strategies.\\n\", 'conversation_id': 'd6e83182-56d8-4151-b84e-a09dd84b8836', 'id': 1, 'user_email': 'dabholkar.asmita@gmail.com', 'first_query': 'What is the best approach to prevent weeds in my soybean field?', 'created_at': '2023-09-06T23:21:36.639848+00:00', 'modified_at': '2023-09-12 18:17:49.784979+00:00'}\n" ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-09-12 15:45:19.164 | INFO | nomic.project:create_index:1132 - Created map `FIN574-GT_convo_index` in project `Conversation Map for FIN574-GT`: https://atlas.nomic.ai/map/d83f5440-9ef1-45ed-a2e5-c3229398b0e8/092d7d2c-b792-4304-ae04-d8f09ffbba5d\n" - ] - }, + } + ], + "source": [ + "print(metadata[0])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Mapping Conversations to Nomic" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "babreu\n", - "1\n", - "test-video-ingest-31\n", - "1\n", - "p\n", - "1\n", - "new_test_course\n", - "1\n", "cropwizard-beta\n", "21\n", "21\n", @@ -512,159 +673,35 @@ "name": "stderr", "output_type": "stream", "text": [ - "2023-09-12 15:45:30.549 | INFO | nomic.project:_create_project:790 - Creating project `Conversation Map for cropwizard-beta` in organization `dabholkar.asmita`\n", - "2023-09-12 15:45:32.134 | INFO | nomic.atlas:map_embeddings:108 - Uploading embeddings to Atlas.\n", - "2023-09-12 15:45:32.150 | WARNING | nomic.project:_validate_and_correct_arrow_upload:238 - Replacing 3 null values for field user_email with string 'null'. This behavior will change in a future version.\n", - "2023-09-12 15:45:32.150 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", - "1it [00:00, 1.08it/s]\n", - "2023-09-12 15:45:33.088 | INFO | nomic.project:_add_data:1422 - Upload succeeded.\n", - "2023-09-12 15:45:33.092 | INFO | nomic.atlas:map_embeddings:127 - Embedding upload succeeded.\n", - "2023-09-12 15:45:34.335 | INFO | nomic.project:create_index:1132 - Created map `Conversation Map for cropwizard-beta` in project `Conversation Map for cropwizard-beta`: https://atlas.nomic.ai/map/44b31bc3-726e-4930-9584-616bbcb2d5d3/d3a66bb5-0ab9-4e9c-9fe5-10aa840ce9bd\n", - "2023-09-12 15:45:34.335 | INFO | nomic.atlas:map_embeddings:140 - Conversation Map for cropwizard-beta: https://atlas.nomic.ai/map/44b31bc3-726e-4930-9584-616bbcb2d5d3/d3a66bb5-0ab9-4e9c-9fe5-10aa840ce9bd\n" + "2023-09-12 22:36:00.418 | INFO | nomic.project:_create_project:790 - Creating project `Conversation Map for cropwizard-beta` in organization `dabholkar.asmita`\n", + "2023-09-12 22:36:01.434 | INFO | nomic.atlas:map_embeddings:108 - Uploading embeddings to Atlas.\n", + "2023-09-12 22:36:01.454 | WARNING | nomic.project:_validate_and_correct_arrow_upload:238 - Replacing 3 null values for field user_email with string 'null'. This behavior will change in a future version.\n", + "2023-09-12 22:36:01.459 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", + "1it [00:00, 1.11it/s]\n", + "2023-09-12 22:36:02.373 | INFO | nomic.project:_add_data:1422 - Upload succeeded.\n", + "2023-09-12 22:36:02.374 | INFO | nomic.atlas:map_embeddings:127 - Embedding upload succeeded.\n", + "2023-09-12 22:36:03.570 | INFO | nomic.project:create_index:1132 - Created map `Conversation Map for cropwizard-beta` in project `Conversation Map for cropwizard-beta`: https://atlas.nomic.ai/map/8634eb7a-22b5-4ab8-adc4-fbbc4bc654bf/f5ad20c7-d1cb-4fe7-be16-04d0502af4fe\n", + "2023-09-12 22:36:03.571 | INFO | nomic.atlas:map_embeddings:140 - Conversation Map for cropwizard-beta: https://atlas.nomic.ai/map/8634eb7a-22b5-4ab8-adc4-fbbc4bc654bf/f5ad20c7-d1cb-4fe7-be16-04d0502af4fe\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "[Conversation Map for cropwizard-beta: https://atlas.nomic.ai/map/44b31bc3-726e-4930-9584-616bbcb2d5d3/d3a66bb5-0ab9-4e9c-9fe5-10aa840ce9bd]\n" + "[Conversation Map for cropwizard-beta: https://atlas.nomic.ai/map/8634eb7a-22b5-4ab8-adc4-fbbc4bc654bf/f5ad20c7-d1cb-4fe7-be16-04d0502af4fe]\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "2023-09-12 15:45:37.865 | INFO | nomic.project:create_index:1132 - Created map `cropwizard-beta_convo_index` in project `Conversation Map for cropwizard-beta`: https://atlas.nomic.ai/map/44b31bc3-726e-4930-9584-616bbcb2d5d3/20a567c6-056b-49b3-a421-f0e49f348cda\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "maxlindsey\n", - "1\n", - "Gies-graduate-hub\n", - "4\n", - "test-video-ingest-17\n", - "2\n", - "summary\n", - "10\n", - "test-video-ingest-3\n", - "8\n", - "test-video-ingest-27\n", - "2\n", - "lillian-wang-blog-2\n", - "1\n", - "python-magic\n", - "1\n", - "ansible2\n", - "1\n", - "ece408fa23\n", - "7\n", - "farmdoc_test_josh_v2\n", - "1\n", - "local-test3\n", - "1\n", - "automata\n", - "1\n", - "SpaceFlorida-GT\n", - "5\n", - "GBSI-GT\n", - "4\n", - "test-video-ingest-21\n", - "8\n", - "newnew_ncsa\n", - "1\n", - "canvas\n", - "1\n", - "gbsi-gt\n", - "3\n", - "meditation-tutorial\n", - "1\n", - "profit\n", - "1\n", - "ansible\n", - "8\n", - "langchain-docs\n", - "9\n", - "testing_url_metadata_josh\n", - "1\n", - "test-india-biodiversity\n", - "1\n", - "vyriad\n", - "10\n", - "irc-workplans\n", - "1\n", - "kastanasdfasdfasdf\n", - "1\n", - "testing_refactor\n", - "2\n", - "BADM-567-GT\n", - "3\n", - "mdt\n", - "1\n", - "vercel\n", - "1\n", - "gies-graduate-hub\n", - "course_name: gies-graduate-hub\n", - "error: The read operation timed out\n", - "test-video-ingest-12\n", - "3\n", - "NuclGPT-v1\n", - "2\n", - "test-video-ingest-13\n", - "1\n", - "test_new_supa_scrape\n", - "1\n", - "doe-ascr-2023\n", - "1\n", - "arize\n", - "2\n", - "final-meta-test\n", - "1\n", - "preview-meta-test\n", - "1\n", - "gies-online-mba-V3\n", - "2\n", - "FoF-Drawdown-from-INVPEIV-5-24-23\n", - "1\n", - "test-video-ingest-30\n", - "1\n", - "test\n", - "1\n", - "NCSA-v2\n", - "3\n", - "conversational\n", - "1\n", - "clowder-docs\n", - "5\n", - "DA\n", - "1\n", - "test-video-ingest-25\n", - "1\n", - "test-ingest-10\n", - "1\n", - "eric-test-course\n", - "1\n", - "farmdoc-test\n", - "1\n", - "test-video-ingest-22\n", - "2\n", - "Academic-paper\n", - "1\n", - "starwars\n", - "1\n", - "AdamDemo\n", - "1\n", - "OpenCI-ACCESS\n", - "1\n", - "clockkit-github\n", - "1\n" + "2023-09-12 22:36:05.071 | INFO | nomic.project:create_index:1132 - Created map `cropwizard-beta_convo_index` in project `Conversation Map for cropwizard-beta`: https://atlas.nomic.ai/map/8634eb7a-22b5-4ab8-adc4-fbbc4bc654bf/9507bda2-b846-4e48-90d8-684a40fb3220\n" ] } ], "source": [ "embeddings_model = OpenAIEmbeddings()\n", + "course_list = ['cropwizard-beta']\n", "\n", "for course in course_list:\n", " print(course)\n", @@ -674,7 +711,7 @@ " course_df = pd.DataFrame(data)\n", " print(len(course_df))\n", "\n", - " if len(course_df) < 20 or course in ['gpt4', 'badm_550_ashley', 'ece120', 'new-weather', 'ECE220FA23', 'ECE408FA23']:\n", + " if len(course_df) < 20:\n", " continue\n", " else:\n", " \n", @@ -685,9 +722,15 @@ " # log conversation instead of individual messages\n", " for index, row in course_df.iterrows():\n", " user_email = row['user_email']\n", + " created_at = row['created_at']\n", " convo = row['convo']\n", " messages = convo['messages']\n", " first_message = messages[0]['content']\n", + "\n", + " current_time = time.time()\n", + " dt_object = datetime.datetime.fromtimestamp(current_time)\n", + " formatted_timestamp = dt_object.strftime(\"%Y-%m-%d %H:%M:%S.%f+00:00\")\n", + " \n", " user_queries.append(first_message)\n", " # create metadata for multi-turn conversation\n", " conversation = \"\"\n", @@ -700,7 +743,8 @@ " conversation += \"\\n>>> \" + emoji + message['role'] + \": \" + message['content'] + \"\\n\"\n", " # add to metadata\n", " metadata_row = {\"course\": row['course_name'], \"conversation\": conversation, \"conversation_id\": convo['id'], \n", - " \"id\": i, \"user_email\": user_email, \"first_query\": first_message}\n", + " \"id\": i, \"user_email\": user_email, \"first_query\": first_message, \"created_at\": created_at,\n", + " \"modified_at\": formatted_timestamp}\n", " metadata.append(metadata_row)\n", " i += 1\n", "\n",