From dbdd24ad7c68bb94e6e8089bfdfc13f5c2b875f4 Mon Sep 17 00:00:00 2001 From: star-nox Date: Tue, 29 Aug 2023 12:23:58 -0500 Subject: [PATCH 01/61] new PR --- ai_ta_backend/nomic_logging.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ai_ta_backend/nomic_logging.py b/ai_ta_backend/nomic_logging.py index bcef5fe9..6b333d34 100644 --- a/ai_ta_backend/nomic_logging.py +++ b/ai_ta_backend/nomic_logging.py @@ -19,6 +19,7 @@ def log_query_to_nomic(course_name: str, search_query: str) -> str: embeddings_model = OpenAIEmbeddings() # type: ignore embeddings = np.array(embeddings_model.embed_query(search_query)).reshape(1, 1536) + # add model responses here data = [{'course_name': course_name, 'query': search_query, 'id': time.time()}] try: From fb1e0eafeeabe532bdb9d14ba93d55160d272304 Mon Sep 17 00:00:00 2001 From: star-nox Date: Tue, 29 Aug 2023 21:44:06 -0500 Subject: [PATCH 02/61] added onResponseCompletion endpoint --- ai_ta_backend/main.py | 25 ++++++++++++++++++++++-- ai_ta_backend/nomic_logging.py | 35 +++++++++++++++++++++++++++++++++- 2 files changed, 57 insertions(+), 3 deletions(-) diff --git a/ai_ta_backend/main.py b/ai_ta_backend/main.py index f87ea3aa..235891c0 100644 --- a/ai_ta_backend/main.py +++ b/ai_ta_backend/main.py @@ -9,7 +9,7 @@ from ai_ta_backend.vector_database import Ingest from ai_ta_backend.web_scrape import main_crawler, mit_course_download -from ai_ta_backend.nomic_logging import log_query_to_nomic, get_nomic_map, create_nomic_map +from ai_ta_backend.nomic_logging import log_query_to_nomic, get_nomic_map, create_nomic_map, log_query_response_to_nomic from flask_executor import Executor app = Flask(__name__) @@ -124,7 +124,7 @@ def getTopContexts(): abort( 400, description= - f"Missing one or me required parameters: 'search_query' and 'course_name' must be provided. Search query: `{search_query}`, Course name: `{course_name}`" + f"Missing one or more required parameters: 'search_query' and 'course_name' must be provided. Search query: `{search_query}`, Course name: `{course_name}`" ) ingester = Ingest() @@ -332,6 +332,27 @@ def nomic_map(): response.headers.add('Access-Control-Allow-Origin', '*') return response +@app.route('/onResponseCompletion', methods=['GET']) +def logToNomic(): + course_name: str = request.args.get('course_name', default='', type=str) + search_query: str = request.args.get('search_query', default='', type=str) + response: str = request.args.get('response', default='', type=str) + + if course_name == '' or search_query == '' or response == '': + # proper web error "400 Bad request" + abort( + 400, + description= + f"Missing one or more required parameters: 'course_name', 'search_query', and 'response' must be provided. Course name: `{course_name}`" + ) + + # background execution of tasks!! + response = executor.submit(log_query_response_to_nomic, course_name, search_query) + + response = jsonify(response) + response.headers.add('Access-Control-Allow-Origin', '*') + return response + if __name__ == '__main__': app.run(debug=True, port=os.getenv("PORT", default=8000)) diff --git a/ai_ta_backend/nomic_logging.py b/ai_ta_backend/nomic_logging.py index 6b333d34..a96e3b38 100644 --- a/ai_ta_backend/nomic_logging.py +++ b/ai_ta_backend/nomic_logging.py @@ -19,7 +19,7 @@ def log_query_to_nomic(course_name: str, search_query: str) -> str: embeddings_model = OpenAIEmbeddings() # type: ignore embeddings = np.array(embeddings_model.embed_query(search_query)).reshape(1, 1536) - # add model responses here + data = [{'course_name': course_name, 'query': search_query, 'id': time.time()}] try: @@ -42,6 +42,39 @@ def log_query_to_nomic(course_name: str, search_query: str) -> str: print(f"⏰ Nomic logging runtime: {(time.monotonic() - start_time):.2f} seconds") return f"Successfully logged for {course_name}" +def log_query_response_to_nomic(course_name: str, search_query: str, response: str) -> str: + """ + Logs user query and model responses to Nomic. Must have more than 20 queries to get a map, + otherwise we'll show nothing for now. + """ + project_name = NOMIC_MAP_NAME_PREFIX + course_name + start_time = time.monotonic() + + embeddings_model = OpenAIEmbeddings() # type: ignore + embeddings = np.array(embeddings_model.embed_query(search_query)).reshape(1, 1536) + + data = [{'course_name': course_name, 'query': search_query, 'response': response, 'id': time.time()}] + + try: + # slow call, about 0.6 sec + project = atlas.AtlasProject(name=project_name, add_datums_if_exists=True) + # mostly async call (0.35 to 0.5 sec) + project.add_embeddings(embeddings=embeddings, data=data) + + # required to keep maps fresh (or we could put on fetch side, but then our UI is slow) + project.rebuild_maps() + except Exception as e: + # if project doesn't exist, create it + result = create_nomic_map(course_name, embeddings, data) + if result is None: + print("Nomic map does not exist yet, probably because you have less than 20 queries on your project: ", e) + else: + print(f"⏰ Nomic logging runtime: {(time.monotonic() - start_time):.2f} seconds") + return f"Successfully logged for {course_name}" + + print(f"⏰ Nomic logging runtime: {(time.monotonic() - start_time):.2f} seconds") + return f"Successfully logged for {course_name}" + def get_nomic_map(course_name: str): """ Returns the variables necessary to construct an iframe of the Nomic map given a course name. From 4ee29ff2e796bd89b4932ff0f569b3802e2c9010 Mon Sep 17 00:00:00 2001 From: star-nox Date: Tue, 5 Sep 2023 11:44:46 -0500 Subject: [PATCH 03/61] nomic log test push --- ai_ta_backend/main.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/ai_ta_backend/main.py b/ai_ta_backend/main.py index 235891c0..ac084ca6 100644 --- a/ai_ta_backend/main.py +++ b/ai_ta_backend/main.py @@ -335,8 +335,14 @@ def nomic_map(): @app.route('/onResponseCompletion', methods=['GET']) def logToNomic(): course_name: str = request.args.get('course_name', default='', type=str) - search_query: str = request.args.get('search_query', default='', type=str) - response: str = request.args.get('response', default='', type=str) + #search_query: str = request.args.get('search_query', default='', type=str) + response: str = request.args.get('conversation', default='', type=str) + + print("In /onResponseCompletion") + print("course_name: ", course_name) + print("response: ", response) + + search_query = "dummy search query" if course_name == '' or search_query == '' or response == '': # proper web error "400 Bad request" From 5f39b3a0201d9e86f3e57245b9aad517cd5ec580 Mon Sep 17 00:00:00 2001 From: star-nox Date: Tue, 5 Sep 2023 12:03:19 -0500 Subject: [PATCH 04/61] nomic log test push 2 --- ai_ta_backend/main.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ai_ta_backend/main.py b/ai_ta_backend/main.py index ac084ca6..3d971ad8 100644 --- a/ai_ta_backend/main.py +++ b/ai_ta_backend/main.py @@ -339,6 +339,7 @@ def logToNomic(): response: str = request.args.get('conversation', default='', type=str) print("In /onResponseCompletion") + print(request.headers) print("course_name: ", course_name) print("response: ", response) From 1d40910cf7e691a876b6dac7f83b74bf3f5f1790 Mon Sep 17 00:00:00 2001 From: star-nox Date: Tue, 5 Sep 2023 13:35:46 -0500 Subject: [PATCH 05/61] nomic changed to POST --- ai_ta_backend/main.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ai_ta_backend/main.py b/ai_ta_backend/main.py index 3d971ad8..78de33e6 100644 --- a/ai_ta_backend/main.py +++ b/ai_ta_backend/main.py @@ -332,7 +332,7 @@ def nomic_map(): response.headers.add('Access-Control-Allow-Origin', '*') return response -@app.route('/onResponseCompletion', methods=['GET']) +@app.route('/onResponseCompletion', methods=['POST']) def logToNomic(): course_name: str = request.args.get('course_name', default='', type=str) #search_query: str = request.args.get('search_query', default='', type=str) @@ -340,6 +340,7 @@ def logToNomic(): print("In /onResponseCompletion") print(request.headers) + print(request.get_json()) print("course_name: ", course_name) print("response: ", response) From 82bf14341fd48eecc209f201c4d36b2b1589978d Mon Sep 17 00:00:00 2001 From: star-nox Date: Tue, 5 Sep 2023 13:51:58 -0500 Subject: [PATCH 06/61] nomic changed back to GET --- ai_ta_backend/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ai_ta_backend/main.py b/ai_ta_backend/main.py index 78de33e6..4f287916 100644 --- a/ai_ta_backend/main.py +++ b/ai_ta_backend/main.py @@ -332,7 +332,7 @@ def nomic_map(): response.headers.add('Access-Control-Allow-Origin', '*') return response -@app.route('/onResponseCompletion', methods=['POST']) +@app.route('/onResponseCompletion', methods=['GET']) def logToNomic(): course_name: str = request.args.get('course_name', default='', type=str) #search_query: str = request.args.get('search_query', default='', type=str) From 62b5b1b2490d09db2f469ba9a974f853cc1b0b1c Mon Sep 17 00:00:00 2001 From: star-nox Date: Tue, 5 Sep 2023 14:11:53 -0500 Subject: [PATCH 07/61] nomic changed to POST --- ai_ta_backend/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ai_ta_backend/main.py b/ai_ta_backend/main.py index 4f287916..78de33e6 100644 --- a/ai_ta_backend/main.py +++ b/ai_ta_backend/main.py @@ -332,7 +332,7 @@ def nomic_map(): response.headers.add('Access-Control-Allow-Origin', '*') return response -@app.route('/onResponseCompletion', methods=['GET']) +@app.route('/onResponseCompletion', methods=['POST']) def logToNomic(): course_name: str = request.args.get('course_name', default='', type=str) #search_query: str = request.args.get('search_query', default='', type=str) From 39d95f9dd6c6d8dba8e94dab4e636767e4af4d16 Mon Sep 17 00:00:00 2001 From: star-nox Date: Tue, 5 Sep 2023 14:19:34 -0500 Subject: [PATCH 08/61] nomic moved print statements --- ai_ta_backend/main.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/ai_ta_backend/main.py b/ai_ta_backend/main.py index 78de33e6..0342c467 100644 --- a/ai_ta_backend/main.py +++ b/ai_ta_backend/main.py @@ -336,17 +336,16 @@ def nomic_map(): def logToNomic(): course_name: str = request.args.get('course_name', default='', type=str) #search_query: str = request.args.get('search_query', default='', type=str) - response: str = request.args.get('conversation', default='', type=str) + conversation: str = request.args.get('conversation', default='', type=str) print("In /onResponseCompletion") - print(request.headers) - print(request.get_json()) print("course_name: ", course_name) - print("response: ", response) + print("conversation: ", conversation) - search_query = "dummy search query" + print(request.headers) + print(request.get_json()) - if course_name == '' or search_query == '' or response == '': + if course_name == '' or conversation == '': # proper web error "400 Bad request" abort( 400, From 36df49f07dddae322cf93e2709a69f3ab559dfbb Mon Sep 17 00:00:00 2001 From: Kastan Day Date: Tue, 5 Sep 2023 12:53:12 -0700 Subject: [PATCH 09/61] adding proper variables to /web-scrape endpoint --- ai_ta_backend/main.py | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/ai_ta_backend/main.py b/ai_ta_backend/main.py index 0342c467..1bad08f1 100644 --- a/ai_ta_backend/main.py +++ b/ai_ta_backend/main.py @@ -1,9 +1,10 @@ +import gc import os import time from typing import Any, List, Union from dotenv import load_dotenv -from flask import Flask, abort, jsonify, request +from flask import Flask, Response, abort, jsonify, request from flask_cors import CORS from sqlalchemy import JSON @@ -279,13 +280,21 @@ def delete(): return response @app.route('/web-scrape', methods=['GET']) -def scrape(): - url: str = request.args.get('url') - max_urls: int = request.args.get('max_urls') - max_depth: int = request.args.get('max_depth') - timeout: int = request.args.get('timeout') - course_name: str = request.args.get('course_name') - base_url_bool: str = request.args.get('base_url_on') +def scrape() -> Response: + url: str = request.args.get('url', default='', type=str) + max_urls: int = request.args.get('max_urls', default=-1, type=int) + max_depth: int = request.args.get('max_depth', default=-1, type=int) + timeout: int = request.args.get('timeout', default=-1, type=int) + course_name: str = request.args.get('course_name', default='', type=str) + stay_on_baseurl: bool | None = request.args.get('stay_on_baseurl', type=bool) + + if url == '' or max_urls == -1 or max_depth == -1 or timeout == -1 or course_name == '' or stay_on_baseurl is None: + # proper web error "400 Bad request" + abort( + 400, + description= + f"Missing one or more required parameters: 'url', 'max_urls', 'max_depth', 'timeout', 'course_name', and 'stay_on_baseurl' must be provided. url: `{url}`, max_urls: `{max_urls}`, max_depth: `{max_depth}`, timeout: `{timeout}`, course_name: `{course_name}`, stay_on_baseurl: `{stay_on_baseurl}`" + ) # print all input params print(f"Web scrape!") @@ -293,14 +302,15 @@ def scrape(): print(f"Max Urls: {max_urls}") print(f"Max Depth: {max_depth}") print(f"Timeout in Seconds ⏰: {timeout}") + print(f"Stay on baseurl: {stay_on_baseurl}") - success_fail_dict = main_crawler(url, course_name, max_urls, max_depth, timeout, base_url_bool) + success_fail_dict = main_crawler(url, course_name, max_urls, max_depth, timeout, stay_on_baseurl) response = jsonify(success_fail_dict) response.headers.add('Access-Control-Allow-Origin', '*') + gc.collect() # manually invoke garbage collection, try to reduce memory on Railway $$$ return response - @app.route('/mit-download', methods=['GET']) def mit_download_course(): url: str = request.args.get('url') From b447c1351a852050f55608a78b324a7518133d2e Mon Sep 17 00:00:00 2001 From: Kastan Day Date: Tue, 5 Sep 2023 15:00:24 -0700 Subject: [PATCH 10/61] fix formatting in code formatting action (use double quotes) --- .github/workflows/yapf-format.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/yapf-format.yml b/.github/workflows/yapf-format.yml index 52d2df31..3c0c1321 100644 --- a/.github/workflows/yapf-format.yml +++ b/.github/workflows/yapf-format.yml @@ -14,7 +14,7 @@ jobs: - name: pip install yapf run: pip install yapf - name: Format code with yapf - run: yapf --in-place --recursive --parallel --style='{based_on_style: google, column_limit: 140, indent_width: 2}' --exclude '*.env' . + run: yapf --in-place --recursive --parallel --style="{based_on_style: google, column_limit: 140, indent_width: 2}" --exclude '*.env' . - name: Commit changes uses: EndBug/add-and-commit@v4 with: From dc262bbb212e4b63cb22b4181b4e35d6c0f0dc49 Mon Sep 17 00:00:00 2001 From: star-nox Date: Wed, 6 Sep 2023 11:49:00 -0500 Subject: [PATCH 11/61] added prints for test --- ai_ta_backend/main.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ai_ta_backend/main.py b/ai_ta_backend/main.py index 0342c467..61294dce 100644 --- a/ai_ta_backend/main.py +++ b/ai_ta_backend/main.py @@ -342,8 +342,8 @@ def logToNomic(): print("course_name: ", course_name) print("conversation: ", conversation) - print(request.headers) - print(request.get_json()) + exit() + if course_name == '' or conversation == '': # proper web error "400 Bad request" From c1dafa4e629cdafaeca96ef174938cf1f051a07c Mon Sep 17 00:00:00 2001 From: star-nox Date: Wed, 6 Sep 2023 13:02:58 -0500 Subject: [PATCH 12/61] added prints for test --- ai_ta_backend/main.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ai_ta_backend/main.py b/ai_ta_backend/main.py index 4d87d4f1..bdace68a 100644 --- a/ai_ta_backend/main.py +++ b/ai_ta_backend/main.py @@ -405,7 +405,6 @@ def logToNomic(): print("course_name: ", course_name) print("conversation: ", conversation) - exit() if course_name == '' or conversation == '': @@ -417,7 +416,8 @@ def logToNomic(): ) # background execution of tasks!! - response = executor.submit(log_query_response_to_nomic, course_name, search_query) + #response = executor.submit(log_query_response_to_nomic, course_name, search_query) + response = {'status': 'success'} response = jsonify(response) response.headers.add('Access-Control-Allow-Origin', '*') From 772cd0b0a7c76afdc3901954b64abdd7fb486c71 Mon Sep 17 00:00:00 2001 From: star-nox Date: Wed, 6 Sep 2023 13:19:38 -0500 Subject: [PATCH 13/61] added print for response body --- ai_ta_backend/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ai_ta_backend/main.py b/ai_ta_backend/main.py index bdace68a..37cd8f7b 100644 --- a/ai_ta_backend/main.py +++ b/ai_ta_backend/main.py @@ -405,7 +405,7 @@ def logToNomic(): print("course_name: ", course_name) print("conversation: ", conversation) - + print("response body: ", request.get_json()) if course_name == '' or conversation == '': # proper web error "400 Bad request" From c5c2bf8a49079f5c0812fc9c05c8f216b997793a Mon Sep 17 00:00:00 2001 From: star-nox Date: Wed, 6 Sep 2023 14:21:42 -0500 Subject: [PATCH 14/61] modified logging function WIP --- ai_ta_backend/main.py | 17 ++++++++++---- ai_ta_backend/nomic_logging.py | 42 ++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 5 deletions(-) diff --git a/ai_ta_backend/main.py b/ai_ta_backend/main.py index 37cd8f7b..36232057 100644 --- a/ai_ta_backend/main.py +++ b/ai_ta_backend/main.py @@ -3,6 +3,7 @@ import os import time from typing import List +import json from dotenv import load_dotenv from flask import Flask, Response, Response, abort, jsonify, request @@ -13,7 +14,7 @@ from ai_ta_backend.nomic_logging import get_nomic_map, log_query_to_nomic from ai_ta_backend.vector_database import Ingest from ai_ta_backend.web_scrape import main_crawler, mit_course_download -from ai_ta_backend.nomic_logging import log_query_to_nomic, get_nomic_map, create_nomic_map, log_query_response_to_nomic +from ai_ta_backend.nomic_logging import log_query_to_nomic, get_nomic_map, log_convo_to_nomic from flask_executor import Executor app = Flask(__name__) @@ -406,18 +407,24 @@ def logToNomic(): print("conversation: ", conversation) print("response body: ", request.get_json()) + response = request.get_json() + + if json.loads(response) == {}: + print("response body is empty") + else: + print("response body is not empty") - if course_name == '' or conversation == '': + if json.loads(response) == {}: # proper web error "400 Bad request" abort( 400, description= - f"Missing one or more required parameters: 'course_name', 'search_query', and 'response' must be provided. Course name: `{course_name}`" + f"Missing parameters: 'response' must be provided." ) # background execution of tasks!! - #response = executor.submit(log_query_response_to_nomic, course_name, search_query) - response = {'status': 'success'} + response = executor.submit(log_convo_to_nomic, response) + response = jsonify(response) response.headers.add('Access-Control-Allow-Origin', '*') diff --git a/ai_ta_backend/nomic_logging.py b/ai_ta_backend/nomic_logging.py index e09220af..c23a6bb5 100644 --- a/ai_ta_backend/nomic_logging.py +++ b/ai_ta_backend/nomic_logging.py @@ -75,6 +75,48 @@ def log_query_response_to_nomic(course_name: str, search_query: str, response: s print(f"⏰ Nomic logging runtime: {(time.monotonic() - start_time):.2f} seconds") return f"Successfully logged for {course_name}" +def log_convo_to_nomic(response: dict) -> str: + """ + Logs conversation to Nomic. + 1. Check if map exists for given course + 2. Check if conversation ID exists + - if yes, delete and add new data point + - if no, add new data point + 3. Keep current logic for map doesn't exist - update metadata + """ + print("in log_convo_to_nomic()") + course_name = response['course_name'] + user_email = response['user_email'] + conversation = response['conversation'] + conversation_id = conversation['id'] + + print("course_name: ", course_name) + print("user_email: ", user_email) + print("conversation: ", conversation) + + # we have to upload whole conversations + + # check if conversation ID exists in Nomic, if yes fetch all data from it and delete it. + # will have current QA and historical QA from Nomic, append new data and add_embeddings() + + project_name = "Conversation Map for NCSA" + try: + project = atlas.AtlasProject(name=project_name, add_datums_if_exists=True) + map = project.maps[0] + data = map.data + + print("map: ", map) + print("2nd map: ", project.maps[1]) + print("data: ", data) + + except Exception as e: + print(e) + + + return "Successfully logged conversation to Nomic" + + + def get_nomic_map(course_name: str): """ Returns the variables necessary to construct an iframe of the Nomic map given a course name. From 0686045ea6be927d11ce1279139463a8c67ee6b0 Mon Sep 17 00:00:00 2001 From: star-nox Date: Wed, 6 Sep 2023 14:39:31 -0500 Subject: [PATCH 15/61] modified logging function WIP --- ai_ta_backend/main.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/ai_ta_backend/main.py b/ai_ta_backend/main.py index 36232057..fe32e074 100644 --- a/ai_ta_backend/main.py +++ b/ai_ta_backend/main.py @@ -414,13 +414,13 @@ def logToNomic(): else: print("response body is not empty") - if json.loads(response) == {}: - # proper web error "400 Bad request" - abort( - 400, - description= - f"Missing parameters: 'response' must be provided." - ) + # if json.loads(response) == {}: + # # proper web error "400 Bad request" + # abort( + # 400, + # description= + # f"Missing parameters: 'response' must be provided." + # ) # background execution of tasks!! response = executor.submit(log_convo_to_nomic, response) From a051bf13b8baaf9b45c801872b7133ff1e3b7aad Mon Sep 17 00:00:00 2001 From: star-nox Date: Wed, 6 Sep 2023 14:43:27 -0500 Subject: [PATCH 16/61] modified logging function WIP --- ai_ta_backend/main.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ai_ta_backend/main.py b/ai_ta_backend/main.py index fe32e074..240343b0 100644 --- a/ai_ta_backend/main.py +++ b/ai_ta_backend/main.py @@ -409,10 +409,10 @@ def logToNomic(): print("response body: ", request.get_json()) response = request.get_json() - if json.loads(response) == {}: - print("response body is empty") - else: - print("response body is not empty") + # if json.loads(response) == {}: + # print("response body is empty") + # else: + # print("response body is not empty") # if json.loads(response) == {}: # # proper web error "400 Bad request" From 92942312530f7a546447dcbb2d4c46c8388d1e9d Mon Sep 17 00:00:00 2001 From: star-nox Date: Thu, 7 Sep 2023 16:05:23 -0500 Subject: [PATCH 17/61] added print for response body --- ai_ta_backend/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ai_ta_backend/main.py b/ai_ta_backend/main.py index 240343b0..4d4505d8 100644 --- a/ai_ta_backend/main.py +++ b/ai_ta_backend/main.py @@ -406,7 +406,7 @@ def logToNomic(): print("course_name: ", course_name) print("conversation: ", conversation) - print("response body: ", request.get_json()) + print("response body: ", request.body) response = request.get_json() # if json.loads(response) == {}: From b56af3b8cb7a661eb9d9f2a4ad8de8f2e1c037a4 Mon Sep 17 00:00:00 2001 From: star-nox Date: Thu, 7 Sep 2023 16:25:32 -0500 Subject: [PATCH 18/61] added print for response json --- ai_ta_backend/main.py | 2 +- ai_ta_backend/nomic_logging.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/ai_ta_backend/main.py b/ai_ta_backend/main.py index 4d4505d8..240343b0 100644 --- a/ai_ta_backend/main.py +++ b/ai_ta_backend/main.py @@ -406,7 +406,7 @@ def logToNomic(): print("course_name: ", course_name) print("conversation: ", conversation) - print("response body: ", request.body) + print("response body: ", request.get_json()) response = request.get_json() # if json.loads(response) == {}: diff --git a/ai_ta_backend/nomic_logging.py b/ai_ta_backend/nomic_logging.py index c23a6bb5..a1c4d385 100644 --- a/ai_ta_backend/nomic_logging.py +++ b/ai_ta_backend/nomic_logging.py @@ -85,6 +85,7 @@ def log_convo_to_nomic(response: dict) -> str: 3. Keep current logic for map doesn't exist - update metadata """ print("in log_convo_to_nomic()") + print("response: ", response) course_name = response['course_name'] user_email = response['user_email'] conversation = response['conversation'] From 6ae44594603a239d93c127e7a9b85c9996f7cc4c Mon Sep 17 00:00:00 2001 From: star-nox Date: Thu, 7 Sep 2023 16:37:26 -0500 Subject: [PATCH 19/61] added print statements --- ai_ta_backend/main.py | 3 +-- ai_ta_backend/nomic_logging.py | 6 ++++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/ai_ta_backend/main.py b/ai_ta_backend/main.py index 240343b0..48252807 100644 --- a/ai_ta_backend/main.py +++ b/ai_ta_backend/main.py @@ -403,8 +403,7 @@ def logToNomic(): conversation: str = request.args.get('conversation', default='', type=str) print("In /onResponseCompletion") - print("course_name: ", course_name) - print("conversation: ", conversation) + print("\n---------------------------------\n") print("response body: ", request.get_json()) response = request.get_json() diff --git a/ai_ta_backend/nomic_logging.py b/ai_ta_backend/nomic_logging.py index a1c4d385..015825da 100644 --- a/ai_ta_backend/nomic_logging.py +++ b/ai_ta_backend/nomic_logging.py @@ -84,11 +84,13 @@ def log_convo_to_nomic(response: dict) -> str: - if no, add new data point 3. Keep current logic for map doesn't exist - update metadata """ + print("\n--------------------------------------------\n") print("in log_convo_to_nomic()") - print("response: ", response) + #print("response: ", response) course_name = response['course_name'] user_email = response['user_email'] conversation = response['conversation'] + messages = conversation['messages'] conversation_id = conversation['id'] print("course_name: ", course_name) @@ -114,7 +116,7 @@ def log_convo_to_nomic(response: dict) -> str: print(e) - return "Successfully logged conversation to Nomic" + return f"Successfully logged for {course_name}" From 1f57cafa8d6c1972c3b6201eb6d467bb4330d23d Mon Sep 17 00:00:00 2001 From: star-nox Date: Thu, 7 Sep 2023 16:46:29 -0500 Subject: [PATCH 20/61] added print statements --- ai_ta_backend/nomic_logging.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ai_ta_backend/nomic_logging.py b/ai_ta_backend/nomic_logging.py index 015825da..3a2c011d 100644 --- a/ai_ta_backend/nomic_logging.py +++ b/ai_ta_backend/nomic_logging.py @@ -86,7 +86,7 @@ def log_convo_to_nomic(response: dict) -> str: """ print("\n--------------------------------------------\n") print("in log_convo_to_nomic()") - #print("response: ", response) + print("response: ", len(response)) course_name = response['course_name'] user_email = response['user_email'] conversation = response['conversation'] From e0fa4892bd3bf8eb3cdde7ad89c11818dcee53ce Mon Sep 17 00:00:00 2001 From: star-nox Date: Thu, 7 Sep 2023 16:52:58 -0500 Subject: [PATCH 21/61] added print statements --- ai_ta_backend/main.py | 2 +- ai_ta_backend/nomic_logging.py | 21 ++++++++++++--------- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/ai_ta_backend/main.py b/ai_ta_backend/main.py index 48252807..70218adb 100644 --- a/ai_ta_backend/main.py +++ b/ai_ta_backend/main.py @@ -405,7 +405,7 @@ def logToNomic(): print("In /onResponseCompletion") print("\n---------------------------------\n") - print("response body: ", request.get_json()) + #print("response body: ", request.get_json()) response = request.get_json() # if json.loads(response) == {}: diff --git a/ai_ta_backend/nomic_logging.py b/ai_ta_backend/nomic_logging.py index 3a2c011d..82f6dd49 100644 --- a/ai_ta_backend/nomic_logging.py +++ b/ai_ta_backend/nomic_logging.py @@ -87,15 +87,18 @@ def log_convo_to_nomic(response: dict) -> str: print("\n--------------------------------------------\n") print("in log_convo_to_nomic()") print("response: ", len(response)) - course_name = response['course_name'] - user_email = response['user_email'] - conversation = response['conversation'] - messages = conversation['messages'] - conversation_id = conversation['id'] - - print("course_name: ", course_name) - print("user_email: ", user_email) - print("conversation: ", conversation) + print(response[0]) + print("\n--------------------------------------------\n") + print(response[1]) + # course_name = response['course_name'] + # user_email = response['user_email'] + # conversation = response['conversation'] + # messages = conversation['messages'] + # conversation_id = conversation['id'] + + # print("course_name: ", course_name) + # print("user_email: ", user_email) + # print("conversation: ", conversation) # we have to upload whole conversations From e0e94f05318f570ef7e3b3c312a7c6100381a410 Mon Sep 17 00:00:00 2001 From: star-nox Date: Thu, 7 Sep 2023 17:10:14 -0500 Subject: [PATCH 22/61] added print statements --- ai_ta_backend/nomic_logging.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ai_ta_backend/nomic_logging.py b/ai_ta_backend/nomic_logging.py index 82f6dd49..f2ec72c1 100644 --- a/ai_ta_backend/nomic_logging.py +++ b/ai_ta_backend/nomic_logging.py @@ -87,9 +87,9 @@ def log_convo_to_nomic(response: dict) -> str: print("\n--------------------------------------------\n") print("in log_convo_to_nomic()") print("response: ", len(response)) - print(response[0]) + #print(response[0]) print("\n--------------------------------------------\n") - print(response[1]) + print(response) # course_name = response['course_name'] # user_email = response['user_email'] # conversation = response['conversation'] From 6485cbe23c520bdd0a30cff99f133929d224b4e6 Mon Sep 17 00:00:00 2001 From: star-nox Date: Thu, 7 Sep 2023 17:28:41 -0500 Subject: [PATCH 23/61] iterating over response --- ai_ta_backend/nomic_logging.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ai_ta_backend/nomic_logging.py b/ai_ta_backend/nomic_logging.py index f2ec72c1..cddc5560 100644 --- a/ai_ta_backend/nomic_logging.py +++ b/ai_ta_backend/nomic_logging.py @@ -89,7 +89,8 @@ def log_convo_to_nomic(response: dict) -> str: print("response: ", len(response)) #print(response[0]) print("\n--------------------------------------------\n") - print(response) + print(response['course_name']) + # course_name = response['course_name'] # user_email = response['user_email'] # conversation = response['conversation'] From 85828f5a884a38a49f180d305c438ef3ee3659ca Mon Sep 17 00:00:00 2001 From: star-nox Date: Thu, 7 Sep 2023 17:38:44 -0500 Subject: [PATCH 24/61] extracting relevant data --- ai_ta_backend/nomic_logging.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/ai_ta_backend/nomic_logging.py b/ai_ta_backend/nomic_logging.py index cddc5560..e8fdb46a 100644 --- a/ai_ta_backend/nomic_logging.py +++ b/ai_ta_backend/nomic_logging.py @@ -91,15 +91,15 @@ def log_convo_to_nomic(response: dict) -> str: print("\n--------------------------------------------\n") print(response['course_name']) - # course_name = response['course_name'] - # user_email = response['user_email'] - # conversation = response['conversation'] - # messages = conversation['messages'] - # conversation_id = conversation['id'] - - # print("course_name: ", course_name) - # print("user_email: ", user_email) - # print("conversation: ", conversation) + course_name = response['course_name'] + user_email = response['user_email'] + conversation = response['conversation'] + messages = conversation['messages'] + conversation_id = conversation['id'] + + print("course_name: ", course_name) + print("user_email: ", user_email) + print("conversation: ", conversation) # we have to upload whole conversations From 2e827b02c3cd76b070848e8be2d30edb77a2a7ac Mon Sep 17 00:00:00 2001 From: star-nox Date: Thu, 7 Sep 2023 17:49:52 -0500 Subject: [PATCH 25/61] extracting relevant data --- ai_ta_backend/nomic_logging.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ai_ta_backend/nomic_logging.py b/ai_ta_backend/nomic_logging.py index e8fdb46a..a3b084a0 100644 --- a/ai_ta_backend/nomic_logging.py +++ b/ai_ta_backend/nomic_logging.py @@ -86,7 +86,7 @@ def log_convo_to_nomic(response: dict) -> str: """ print("\n--------------------------------------------\n") print("in log_convo_to_nomic()") - print("response: ", len(response)) + print("response: ", response) #print(response[0]) print("\n--------------------------------------------\n") print(response['course_name']) From 315ea1dbb43cb58855fef4759c0411650ce691c4 Mon Sep 17 00:00:00 2001 From: star-nox Date: Thu, 7 Sep 2023 18:03:51 -0500 Subject: [PATCH 26/61] printing response data --- ai_ta_backend/nomic_logging.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ai_ta_backend/nomic_logging.py b/ai_ta_backend/nomic_logging.py index a3b084a0..8124f70b 100644 --- a/ai_ta_backend/nomic_logging.py +++ b/ai_ta_backend/nomic_logging.py @@ -90,6 +90,10 @@ def log_convo_to_nomic(response: dict) -> str: #print(response[0]) print("\n--------------------------------------------\n") print(response['course_name']) + print(response['user_email']) + print(response['conversation']) + + print("\n--------------------------------------------\n") course_name = response['course_name'] user_email = response['user_email'] From e47b5a5ec41d48271a2f772d939934e6286d2ceb Mon Sep 17 00:00:00 2001 From: star-nox Date: Thu, 7 Sep 2023 18:13:59 -0500 Subject: [PATCH 27/61] printing response data type --- ai_ta_backend/nomic_logging.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ai_ta_backend/nomic_logging.py b/ai_ta_backend/nomic_logging.py index 8124f70b..cdc28a98 100644 --- a/ai_ta_backend/nomic_logging.py +++ b/ai_ta_backend/nomic_logging.py @@ -86,7 +86,7 @@ def log_convo_to_nomic(response: dict) -> str: """ print("\n--------------------------------------------\n") print("in log_convo_to_nomic()") - print("response: ", response) + print("response: ", type(response)) #print(response[0]) print("\n--------------------------------------------\n") print(response['course_name']) From 683afb270bbb49bcea4d1622829631aa10aec336 Mon Sep 17 00:00:00 2001 From: star-nox Date: Thu, 7 Sep 2023 18:21:05 -0500 Subject: [PATCH 28/61] iterating over response --- ai_ta_backend/nomic_logging.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/ai_ta_backend/nomic_logging.py b/ai_ta_backend/nomic_logging.py index cdc28a98..d31f9942 100644 --- a/ai_ta_backend/nomic_logging.py +++ b/ai_ta_backend/nomic_logging.py @@ -90,10 +90,11 @@ def log_convo_to_nomic(response: dict) -> str: #print(response[0]) print("\n--------------------------------------------\n") print(response['course_name']) - print(response['user_email']) - print(response['conversation']) + for key, value in response.items(): + print(key, value) + print("\n--------------------------------------------\n") - print("\n--------------------------------------------\n") + course_name = response['course_name'] user_email = response['user_email'] From b17876c40eef4596232501e43b253659d2bf2cf6 Mon Sep 17 00:00:00 2001 From: star-nox Date: Thu, 7 Sep 2023 18:27:53 -0500 Subject: [PATCH 29/61] extracting messages --- ai_ta_backend/nomic_logging.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ai_ta_backend/nomic_logging.py b/ai_ta_backend/nomic_logging.py index d31f9942..9073023a 100644 --- a/ai_ta_backend/nomic_logging.py +++ b/ai_ta_backend/nomic_logging.py @@ -91,10 +91,11 @@ def log_convo_to_nomic(response: dict) -> str: print("\n--------------------------------------------\n") print(response['course_name']) for key, value in response.items(): - print(key, value) + print(key + "----->" + value) print("\n--------------------------------------------\n") - + print(response['conversation']['messages'][0]['content']) + print("\n--------------------------------------------\n") course_name = response['course_name'] user_email = response['user_email'] From 6d21c8f5881f6342a8b04a8cc83c39dede360bec Mon Sep 17 00:00:00 2001 From: star-nox Date: Thu, 7 Sep 2023 18:50:41 -0500 Subject: [PATCH 30/61] extracting messages --- ai_ta_backend/nomic_logging.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ai_ta_backend/nomic_logging.py b/ai_ta_backend/nomic_logging.py index 9073023a..52ef34fb 100644 --- a/ai_ta_backend/nomic_logging.py +++ b/ai_ta_backend/nomic_logging.py @@ -92,6 +92,7 @@ def log_convo_to_nomic(response: dict) -> str: print(response['course_name']) for key, value in response.items(): print(key + "----->" + value) + print(key) print("\n--------------------------------------------\n") print(response['conversation']['messages'][0]['content']) From f230d0ca883c652634c2e7deba0fe3d9d53573c8 Mon Sep 17 00:00:00 2001 From: star-nox Date: Fri, 8 Sep 2023 20:17:28 -0500 Subject: [PATCH 31/61] added function for logging single-turn conversation --- ai_ta_backend/main.py | 29 +- ai_ta_backend/nomic_logging.py | 171 ++- ai_ta_backend/nomic_map_creation.ipynb | 1556 ++++++++++++++++++++++++ 3 files changed, 1635 insertions(+), 121 deletions(-) create mode 100644 ai_ta_backend/nomic_map_creation.ipynb diff --git a/ai_ta_backend/main.py b/ai_ta_backend/main.py index 70218adb..dae2442f 100644 --- a/ai_ta_backend/main.py +++ b/ai_ta_backend/main.py @@ -396,38 +396,21 @@ def nomic_map(): response.headers.add('Access-Control-Allow-Origin', '*') return response -@app.route('/onResponseCompletion', methods=['POST']) +@app.route('/onResponseCompletion', methods=['GET']) def logToNomic(): course_name: str = request.args.get('course_name', default='', type=str) - #search_query: str = request.args.get('search_query', default='', type=str) conversation: str = request.args.get('conversation', default='', type=str) print("In /onResponseCompletion") - print("\n---------------------------------\n") - - #print("response body: ", request.get_json()) - response = request.get_json() - # if json.loads(response) == {}: - # print("response body is empty") - # else: - # print("response body is not empty") - - # if json.loads(response) == {}: - # # proper web error "400 Bad request" - # abort( - # 400, - # description= - # f"Missing parameters: 'response' must be provided." - # ) + conversation_json = json.loads(conversation) # background execution of tasks!! - response = executor.submit(log_convo_to_nomic, response) - + response = executor.submit(log_convo_to_nomic, course_name, conversation_json) - response = jsonify(response) - response.headers.add('Access-Control-Allow-Origin', '*') - return response + #response = jsonify(response) + #response.headers.add('Access-Control-Allow-Origin', '*') + return "response" if __name__ == '__main__': diff --git a/ai_ta_backend/nomic_logging.py b/ai_ta_backend/nomic_logging.py index 52ef34fb..04845579 100644 --- a/ai_ta_backend/nomic_logging.py +++ b/ai_ta_backend/nomic_logging.py @@ -1,6 +1,7 @@ import os import nomic from nomic import atlas +from nomic import AtlasProject from langchain.embeddings import OpenAIEmbeddings import numpy as np import time @@ -10,127 +11,101 @@ nomic.login(os.getenv('NOMIC_API_KEY')) # login during start of flask app NOMIC_MAP_NAME_PREFIX = 'Queries for ' -def log_query_to_nomic(course_name: str, search_query: str) -> str: +def log_convo_to_nomic(course_name: str, conversation) -> str: """ - Logs user query and retrieved contexts to Nomic. Must have more than 20 queries to get a map, otherwise we'll show nothing for now. + Logs conversation to Nomic. + 1. Check if map exists for given course + 2. Check if conversation ID exists + - if yes, delete and add new data point + - if no, add new data point + 3. Keep current logic for map doesn't exist - update metadata """ - project_name = NOMIC_MAP_NAME_PREFIX + course_name - start_time = time.monotonic() - - embeddings_model = OpenAIEmbeddings() # type: ignore - embeddings = np.array(embeddings_model.embed_query(search_query)).reshape(1, 1536) + print("in log_convo_to_nomic()") + + messages = conversation['conversation']['messages'] + user_email = conversation['conversation']['user_email'] + conversation_id = conversation['conversation']['id'] - data = [{'course_name': course_name, 'query': search_query, 'id': time.time()}] + # we have to upload whole conversations + # check what the fetched data looks like - pandas df or pyarrow table + # check if conversation ID exists in Nomic, if yes fetch all data from it and delete it. + # will have current QA and historical QA from Nomic, append new data and add_embeddings() + project_name = NOMIC_MAP_NAME_PREFIX + course_name + start_time = time.monotonic() + project_name = "Conversation Map for NCSA" try: - # slow call, about 0.6 sec - project = atlas.AtlasProject(name=project_name, add_datums_if_exists=True) - # mostly async call (0.35 to 0.5 sec) - project.add_embeddings(embeddings=embeddings, data=data) + # fetch project metadata and embbeddings + project = AtlasProject(name=project_name, add_datums_if_exists=True) + map_metadata_df = project.maps[1].data.df + map_embeddings_df = project.maps[1].embeddings.latent + + if conversation_id in map_metadata_df.values: + print("conversation_id exists") + + # store that convo metadata locally + prev_data = map_metadata_df[map_metadata_df['conversation_id'] == conversation_id] + prev_index = prev_data.index.values[0] + prev_convo = prev_data['conversation'].values[0] + prev_id = prev_data['id'].values[0] + embeddings = map_embeddings_df[prev_index-1].reshape(1, 1536) + + # delete that convo data point from Nomic + print("Prev point deleted: ", project.delete_data([prev_id])) + + # prep for new point + first_message = prev_convo.split("\n")[1].split(": ")[1] + + # append new convo to prev convo + for message in messages: + prev_convo += "\n>>> " + message['role'] + ": " + message['content'] + "\n" - # required to keep maps fresh (or we could put on fetch side, but then our UI is slow) - project.rebuild_maps() - except Exception as e: - # if project doesn't exist, create it - result = create_nomic_map(course_name, embeddings, data) - if result is None: - print("Nomic map does not exist yet, probably because you have less than 20 queries on your project: ", e) + # update metadata + metadata = [{"course": course_name, "conversation": prev_convo, "conversation_id": conversation_id, + "id": len(map_metadata_df)+1, "user_email": user_email, "first_query": first_message}] + else: - print(f"⏰ Nomic logging runtime: {(time.monotonic() - start_time):.2f} seconds") - return f"Successfully logged for {course_name}" - - print(f"⏰ Nomic logging runtime: {(time.monotonic() - start_time):.2f} seconds") - return f"Successfully logged for {course_name}" + print("conversation_id does not exist") -def log_query_response_to_nomic(course_name: str, search_query: str, response: str) -> str: - """ - Logs user query and model responses to Nomic. Must have more than 20 queries to get a map, - otherwise we'll show nothing for now. - """ - project_name = NOMIC_MAP_NAME_PREFIX + course_name - start_time = time.monotonic() + # add new data point + user_queries = [] + conversation_string = "" + first_message = messages[0]['content'] + user_queries.append(first_message) - embeddings_model = OpenAIEmbeddings() # type: ignore - embeddings = np.array(embeddings_model.embed_query(search_query)).reshape(1, 1536) - - data = [{'course_name': course_name, 'query': search_query, 'response': response, 'id': time.time()}] + for message in messages: + conversation_string += "\n>>> " + message['role'] + ": " + message['content'] + "\n" - try: - # slow call, about 0.6 sec - project = atlas.AtlasProject(name=project_name, add_datums_if_exists=True) - # mostly async call (0.35 to 0.5 sec) - project.add_embeddings(embeddings=embeddings, data=data) + metadata = [{"course": course_name, "conversation": conversation_string, "conversation_id": conversation_id, + "id": len(map_metadata_df)+1, "user_email": user_email, "first_query": first_message}] - # required to keep maps fresh (or we could put on fetch side, but then our UI is slow) + print("metadata: ", metadata) + print("user_queries: ", user_queries) + print(len(metadata)) + print(len(user_queries)) + + # create embeddings + embeddings_model = OpenAIEmbeddings() + embeddings = embeddings_model.embed_documents(user_queries) + + # add embeddings to project + project = atlas.AtlasProject(name=project_name, add_datums_if_exists=True) + project.add_embeddings(embeddings=np.array(embeddings), data=pd.DataFrame(metadata)) project.rebuild_maps() + except Exception as e: # if project doesn't exist, create it - result = create_nomic_map(course_name, embeddings, data) + result = create_nomic_map(course_name, embeddings, pd.DataFrame(metadata)) if result is None: print("Nomic map does not exist yet, probably because you have less than 20 queries on your project: ", e) else: print(f"⏰ Nomic logging runtime: {(time.monotonic() - start_time):.2f} seconds") return f"Successfully logged for {course_name}" - - print(f"⏰ Nomic logging runtime: {(time.monotonic() - start_time):.2f} seconds") - return f"Successfully logged for {course_name}" - -def log_convo_to_nomic(response: dict) -> str: - """ - Logs conversation to Nomic. - 1. Check if map exists for given course - 2. Check if conversation ID exists - - if yes, delete and add new data point - - if no, add new data point - 3. Keep current logic for map doesn't exist - update metadata - """ - print("\n--------------------------------------------\n") - print("in log_convo_to_nomic()") - print("response: ", type(response)) - #print(response[0]) - print("\n--------------------------------------------\n") - print(response['course_name']) - for key, value in response.items(): - print(key + "----->" + value) - print(key) - print("\n--------------------------------------------\n") - - print(response['conversation']['messages'][0]['content']) - print("\n--------------------------------------------\n") - - course_name = response['course_name'] - user_email = response['user_email'] - conversation = response['conversation'] - messages = conversation['messages'] - conversation_id = conversation['id'] - - print("course_name: ", course_name) - print("user_email: ", user_email) - print("conversation: ", conversation) - - # we have to upload whole conversations - - # check if conversation ID exists in Nomic, if yes fetch all data from it and delete it. - # will have current QA and historical QA from Nomic, append new data and add_embeddings() - - project_name = "Conversation Map for NCSA" - try: - project = atlas.AtlasProject(name=project_name, add_datums_if_exists=True) - map = project.maps[0] - data = map.data - - print("map: ", map) - print("2nd map: ", project.maps[1]) - print("data: ", data) - - except Exception as e: - print(e) - + print(f"⏰ Nomic logging runtime: {(time.monotonic() - start_time):.2f} seconds") return f"Successfully logged for {course_name}" - def get_nomic_map(course_name: str): """ Returns the variables necessary to construct an iframe of the Nomic map given a course name. diff --git a/ai_ta_backend/nomic_map_creation.ipynb b/ai_ta_backend/nomic_map_creation.ipynb new file mode 100644 index 00000000..23924157 --- /dev/null +++ b/ai_ta_backend/nomic_map_creation.ipynb @@ -0,0 +1,1556 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-09-07 11:57:29,274:INFO - Note: NumExpr detected 16 cores but \"NUMEXPR_MAX_THREADS\" not set, so enforcing safe limit of 8.\n", + "2023-09-07 11:57:29,274:INFO - NumExpr defaulting to 8 threads.\n" + ] + } + ], + "source": [ + "# import required libraries\n", + "\n", + "import os\n", + "import supabase\n", + "from nomic import atlas\n", + "from dotenv import load_dotenv\n", + "from langchain.embeddings import OpenAIEmbeddings\n", + "import numpy as np\n", + "import time\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# loading environment variables\n", + "\n", + "env_path = \"../.env\"\n", + "load_dotenv(dotenv_path=env_path)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# initialize supabase client\n", + "\n", + "url = os.environ.get(\"SUPABASE_URL\")\n", + "key = os.environ.get(\"SUPABASE_API_KEY\")\n", + "\n", + "supabase_client = supabase.create_client(url, key)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idcreated_atconvoconvo_idcourse_nameuser_email
052002023-09-07T17:03:47.705812+00:00{'id': 'abd2e044-fbff-455e-8c60-755cc7635182',...abd2e044-fbff-455e-8c60-755cc7635182cropwizardavd6@illinois.edu
152012023-09-07T17:05:25.863567+00:00{'id': '3e5d4861-b128-4c64-96ac-87c74f3217e5',...3e5d4861-b128-4c64-96ac-87c74f3217e5cropwizardavd6@illinois.edu
252162023-09-07T17:18:32.197379+00:00{'id': '43ee631a-cb58-43f5-b2af-a5b91b7585cd',...43ee631a-cb58-43f5-b2af-a5b91b7585cdcropwizardavd6@illinois.edu
352122023-09-07T17:16:34.267931+00:00{'id': '0129ea46-207f-47e3-be90-da143857000f',...0129ea46-207f-47e3-be90-da143857000fcropwizardavd6@illinois.edu
452172023-09-07T17:19:00.681823+00:00{'id': 'c6b4e4d8-4de7-4387-b4e9-411084dffea6',...c6b4e4d8-4de7-4387-b4e9-411084dffea6cropwizardavd6@illinois.edu
552232023-09-07T17:22:38.970643+00:00{'id': 'b5500763-7e7b-4b23-9031-cc320a51ccbf',...b5500763-7e7b-4b23-9031-cc320a51ccbfcropwizardavd6@illinois.edu
652272023-09-07T17:24:10.362647+00:00{'id': 'd410955f-4398-4869-b395-e6b659cc2d06',...d410955f-4398-4869-b395-e6b659cc2d06cropwizardavd6@illinois.edu
752092023-09-07T17:14:43.518617+00:00{'id': '0ecd2c05-772a-42aa-b29a-0a892bd0e9ab',...0ecd2c05-772a-42aa-b29a-0a892bd0e9abcropwizardavd6@illinois.edu
852222023-09-07T17:21:29.223343+00:00{'id': 'c82056a0-2d67-4ce8-82e3-86a30f1f6dc0',...c82056a0-2d67-4ce8-82e3-86a30f1f6dc0cropwizardavd6@illinois.edu
952242023-09-07T17:22:54.856839+00:00{'id': '2316bbd7-61f3-44aa-a79e-bb42bd688c47',...2316bbd7-61f3-44aa-a79e-bb42bd688c47cropwizardavd6@illinois.edu
1052262023-09-07T17:23:27.644745+00:00{'id': '66abfe85-bb04-456e-8709-89f9aafe5508',...66abfe85-bb04-456e-8709-89f9aafe5508cropwizardavd6@illinois.edu
1152282023-09-07T17:24:41.32465+00:00{'id': '175ad6b2-3bf2-4889-b2de-a18961ee8ecb',...175ad6b2-3bf2-4889-b2de-a18961ee8ecbcropwizardavd6@illinois.edu
1252322023-09-07T17:30:05.770146+00:00{'id': 'f9859e36-bf76-40ab-9413-91ef6663dbd6',...f9859e36-bf76-40ab-9413-91ef6663dbd6cropwizardavd6@illinois.edu
1352332023-09-07T17:30:52.749867+00:00{'id': 'bab32d0b-8e2b-4eaa-a46e-069be0d0c3a2',...bab32d0b-8e2b-4eaa-a46e-069be0d0c3a2cropwizardavd6@illinois.edu
1452342023-09-07T17:31:19.801611+00:00{'id': 'ecaf3228-78f3-49f7-b46d-3a5c3d5b62fd',...ecaf3228-78f3-49f7-b46d-3a5c3d5b62fdcropwizardavd6@illinois.edu
1552372023-09-07T17:36:14.68431+00:00{'id': 'edead825-12df-417c-af40-059e83067c69',...edead825-12df-417c-af40-059e83067c69cropwizardavd6@illinois.edu
1652382023-09-07T17:36:42.984907+00:00{'id': 'bc44d229-327a-452d-a386-8868216a1bd2',...bc44d229-327a-452d-a386-8868216a1bd2cropwizardavd6@illinois.edu
1752412023-09-07T17:37:22.134543+00:00{'id': 'ff7a1c27-e126-49db-be79-6deaefcffec3',...ff7a1c27-e126-49db-be79-6deaefcffec3cropwizardavd6@illinois.edu
1853042023-09-07T19:45:21.73541+00:00{'id': '6226b153-356a-408c-9483-49ef5808538c',...6226b153-356a-408c-9483-49ef5808538ccropwizardavd6@illinois.edu
1953052023-09-07T19:46:03.626639+00:00{'id': 'e9edae6b-b7e1-46a8-b5e8-6215890a2a01',...e9edae6b-b7e1-46a8-b5e8-6215890a2a01cropwizardavd6@illinois.edu
2053062023-09-07T19:46:36.076704+00:00{'id': 'b2116035-da7b-4136-878d-66a10098a756',...b2116035-da7b-4136-878d-66a10098a756cropwizardavd6@illinois.edu
2151952023-09-06T23:43:38.201481+00:00{'id': '543ee10e-faf0-47a8-bb1c-c040aec44ed1',...543ee10e-faf0-47a8-bb1c-c040aec44ed1cropwizarddabholkar.asmita@gmail.com
\n", + "
" + ], + "text/plain": [ + " id created_at \\\n", + "0 5200 2023-09-07T17:03:47.705812+00:00 \n", + "1 5201 2023-09-07T17:05:25.863567+00:00 \n", + "2 5216 2023-09-07T17:18:32.197379+00:00 \n", + "3 5212 2023-09-07T17:16:34.267931+00:00 \n", + "4 5217 2023-09-07T17:19:00.681823+00:00 \n", + "5 5223 2023-09-07T17:22:38.970643+00:00 \n", + "6 5227 2023-09-07T17:24:10.362647+00:00 \n", + "7 5209 2023-09-07T17:14:43.518617+00:00 \n", + "8 5222 2023-09-07T17:21:29.223343+00:00 \n", + "9 5224 2023-09-07T17:22:54.856839+00:00 \n", + "10 5226 2023-09-07T17:23:27.644745+00:00 \n", + "11 5228 2023-09-07T17:24:41.32465+00:00 \n", + "12 5232 2023-09-07T17:30:05.770146+00:00 \n", + "13 5233 2023-09-07T17:30:52.749867+00:00 \n", + "14 5234 2023-09-07T17:31:19.801611+00:00 \n", + "15 5237 2023-09-07T17:36:14.68431+00:00 \n", + "16 5238 2023-09-07T17:36:42.984907+00:00 \n", + "17 5241 2023-09-07T17:37:22.134543+00:00 \n", + "18 5304 2023-09-07T19:45:21.73541+00:00 \n", + "19 5305 2023-09-07T19:46:03.626639+00:00 \n", + "20 5306 2023-09-07T19:46:36.076704+00:00 \n", + "21 5195 2023-09-06T23:43:38.201481+00:00 \n", + "\n", + " convo \\\n", + "0 {'id': 'abd2e044-fbff-455e-8c60-755cc7635182',... \n", + "1 {'id': '3e5d4861-b128-4c64-96ac-87c74f3217e5',... \n", + "2 {'id': '43ee631a-cb58-43f5-b2af-a5b91b7585cd',... \n", + "3 {'id': '0129ea46-207f-47e3-be90-da143857000f',... \n", + "4 {'id': 'c6b4e4d8-4de7-4387-b4e9-411084dffea6',... \n", + "5 {'id': 'b5500763-7e7b-4b23-9031-cc320a51ccbf',... \n", + "6 {'id': 'd410955f-4398-4869-b395-e6b659cc2d06',... \n", + "7 {'id': '0ecd2c05-772a-42aa-b29a-0a892bd0e9ab',... \n", + "8 {'id': 'c82056a0-2d67-4ce8-82e3-86a30f1f6dc0',... \n", + "9 {'id': '2316bbd7-61f3-44aa-a79e-bb42bd688c47',... \n", + "10 {'id': '66abfe85-bb04-456e-8709-89f9aafe5508',... \n", + "11 {'id': '175ad6b2-3bf2-4889-b2de-a18961ee8ecb',... \n", + "12 {'id': 'f9859e36-bf76-40ab-9413-91ef6663dbd6',... \n", + "13 {'id': 'bab32d0b-8e2b-4eaa-a46e-069be0d0c3a2',... \n", + "14 {'id': 'ecaf3228-78f3-49f7-b46d-3a5c3d5b62fd',... \n", + "15 {'id': 'edead825-12df-417c-af40-059e83067c69',... \n", + "16 {'id': 'bc44d229-327a-452d-a386-8868216a1bd2',... \n", + "17 {'id': 'ff7a1c27-e126-49db-be79-6deaefcffec3',... \n", + "18 {'id': '6226b153-356a-408c-9483-49ef5808538c',... \n", + "19 {'id': 'e9edae6b-b7e1-46a8-b5e8-6215890a2a01',... \n", + "20 {'id': 'b2116035-da7b-4136-878d-66a10098a756',... \n", + "21 {'id': '543ee10e-faf0-47a8-bb1c-c040aec44ed1',... \n", + "\n", + " convo_id course_name \\\n", + "0 abd2e044-fbff-455e-8c60-755cc7635182 cropwizard \n", + "1 3e5d4861-b128-4c64-96ac-87c74f3217e5 cropwizard \n", + "2 43ee631a-cb58-43f5-b2af-a5b91b7585cd cropwizard \n", + "3 0129ea46-207f-47e3-be90-da143857000f cropwizard \n", + "4 c6b4e4d8-4de7-4387-b4e9-411084dffea6 cropwizard \n", + "5 b5500763-7e7b-4b23-9031-cc320a51ccbf cropwizard \n", + "6 d410955f-4398-4869-b395-e6b659cc2d06 cropwizard \n", + "7 0ecd2c05-772a-42aa-b29a-0a892bd0e9ab cropwizard \n", + "8 c82056a0-2d67-4ce8-82e3-86a30f1f6dc0 cropwizard \n", + "9 2316bbd7-61f3-44aa-a79e-bb42bd688c47 cropwizard \n", + "10 66abfe85-bb04-456e-8709-89f9aafe5508 cropwizard \n", + "11 175ad6b2-3bf2-4889-b2de-a18961ee8ecb cropwizard \n", + "12 f9859e36-bf76-40ab-9413-91ef6663dbd6 cropwizard \n", + "13 bab32d0b-8e2b-4eaa-a46e-069be0d0c3a2 cropwizard \n", + "14 ecaf3228-78f3-49f7-b46d-3a5c3d5b62fd cropwizard \n", + "15 edead825-12df-417c-af40-059e83067c69 cropwizard \n", + "16 bc44d229-327a-452d-a386-8868216a1bd2 cropwizard \n", + "17 ff7a1c27-e126-49db-be79-6deaefcffec3 cropwizard \n", + "18 6226b153-356a-408c-9483-49ef5808538c cropwizard \n", + "19 e9edae6b-b7e1-46a8-b5e8-6215890a2a01 cropwizard \n", + "20 b2116035-da7b-4136-878d-66a10098a756 cropwizard \n", + "21 543ee10e-faf0-47a8-bb1c-c040aec44ed1 cropwizard \n", + "\n", + " user_email \n", + "0 avd6@illinois.edu \n", + "1 avd6@illinois.edu \n", + "2 avd6@illinois.edu \n", + "3 avd6@illinois.edu \n", + "4 avd6@illinois.edu \n", + "5 avd6@illinois.edu \n", + "6 avd6@illinois.edu \n", + "7 avd6@illinois.edu \n", + "8 avd6@illinois.edu \n", + "9 avd6@illinois.edu \n", + "10 avd6@illinois.edu \n", + "11 avd6@illinois.edu \n", + "12 avd6@illinois.edu \n", + "13 avd6@illinois.edu \n", + "14 avd6@illinois.edu \n", + "15 avd6@illinois.edu \n", + "16 avd6@illinois.edu \n", + "17 avd6@illinois.edu \n", + "18 avd6@illinois.edu \n", + "19 avd6@illinois.edu \n", + "20 avd6@illinois.edu \n", + "21 dabholkar.asmita@gmail.com " + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# query data for one course for testing\n", + "course = 'cropwizard'\n", + "response = supabase_client.table(\"llm-convo-monitor\").select(\"*\").eq(\"course_name\", course).execute()\n", + "data = response.data\n", + "df = pd.DataFrame(data)\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 {'id': 'abd2e044-fbff-455e-8c60-755cc7635182',...\n", + "1 {'id': '3e5d4861-b128-4c64-96ac-87c74f3217e5',...\n", + "2 {'id': '43ee631a-cb58-43f5-b2af-a5b91b7585cd',...\n", + "3 {'id': '0129ea46-207f-47e3-be90-da143857000f',...\n", + "4 {'id': 'c6b4e4d8-4de7-4387-b4e9-411084dffea6',...\n", + "Name: convo, dtype: object" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "course_df = df[df['course_name'] == course]['convo']\n", + "course_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'role': 'assistant', 'content': \"The U.S. Environmental Protection Agency (EPA)'s mission is to protect human health and the environment. As part of this mission, the EPA is involved in efforts such as developing strategies to protect endangered and threatened species from potential impacts of agricultural practices, including the use of herbicides. For instance, the EPA has released a draft Herbicide Strategy for public comment, aimed at proposing early mitigations for more than 900 listed species and designated critical habitats to reduce potential impacts from the agricultural use of herbicides(1^,2^,3^,4^).\\n\\n1. University of Illinois Extension\\n2. EPA releases draft herbicide strategy\\n3. EPA releases draft herbicide strategy\\n4. extension.pdf, page: 3\", 'contexts': [{'base_url': 'https://extension.illinois.edu/newsletters/illinois-pesticide-review-newsletter/julyaugust-2023', 'course_name ': 'cropwizard', 'pagenumber': '', 'readable_filename': 'University of Illinois Extension', 's3_path': 'courses/cropwizard/University_of_Illinois_Extension.html', 'text': \". — The U.S. Environmental Protection Agency, in collaboration with the U.S. Department of Energy, is funding the creation of Thriving Communities Technical Assistance Centers to help rural and underserved communities access state and federal funding for environmental infrastructure…\\n\\n\\n \\nAddress health challenges with holistic solutions\\nSeptember 1, 2023\\n\\nURBANA, Ill. — The University of Illinois, along with the Interdisciplinary Health Sciences Institute, and in collaboration with Illinois Extension, has developed the Autumn Health Picks 2023 webinar series. This series is part of the Community Seminar Series, and it provides an opportunity for…\\n\\n\\n \\nDo artificial roosts help bats? Illinois experts say more research needed\\nSeptember 1, 2023\\n\\nURBANA, Ill.\\xa0— Artificial roosts for bats come in many forms — bat boxes, condos, bark mimics, clay roosts, and cinder block structures, to name a few — but a new conservation practice and policy article from researchers at the\\xa0University of Illinois Urbana-Champaign\\xa0suggests the structures…\\n\\n\\nMore news\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\n\\nUpcoming Events\\n\\n\\n\\n\\n \\n\\nRead Before You Sign: Renting & Leasing \\n\\n\\nSeptember 6, 2023\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\nTechnology & Soil Health Field Day\\n\\n\\nSeptember 6, 2023\\n\\n\\nCounty\\n\\nHenry\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\nPollinator Plants to Span the Season\\n\\n\\nSeptember 6, 2023\\n\\n\\nCounty\\n\\nMacoupin\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\nOrr Beef Research Center Field Day\\n\\n\\nSeptember 6, 2023\\n\\n\\n\\n\\n\\n\\nMore Events\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nSubtitle\\nOpens the Door to New Opportunities\\n\\n\\n\\n\\nTitle\\nLearn @ Illinois Extension\\n\\n\\n\\n\\nBody\\nThere is so much you want to do, but the challenges of time, distance, and cost create barriers to achieving those goals. You need a program that's flexible to your schedule.\\xa0Learn @ Illinois Extension\\xa0helps remove those challenge by offering\\xa0flexible online learning programs that meet your personal interests and continuing education requirements. We provide learning on your terms so you can be who you were meant to be.\\xa0\\n\\n\\n\\nOnline Courses\\n\\n\\n\\n\\n\\n \\n\\n\\nLatest Podcast\\n\\n\\n\\n\\nGood Growing\\n\\n\\nGardenbite: Three tips for a healthier lawn | #GoodGrowingThis week on the Good Growing podcast Chris shares a Gardenbite of when retired horticulture educator Richard Hentschel visited the show in 2021 to talk about fall lawn care. During the show, Richard spoke about three things we could all do to reduce our lawn inputs.\\xa0Want to see or...\\n\\n\\n Your browser does not support iframes, but you can visit \\n\\n\\n\\n\\n\\nMore Podcasts\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\n\\nDig Deeper\\n\\n\\nTopics we address\\n\\n4-H Youth Development\\nBeef Cattle\\nClimate\\nCommunity Gardens\\nCommunity Planning\\nCottage Food\\nCrops\\nDisasters\\nEconomic Development\\nEnergy\\nEnvironmental Quality\\nFamily\\nFinances\\nFirewood\\nFlowers\\nFood\\nForestry\\nHealth\\nHemp\\nHerbs\\nHome Vegetable Gardening\\nIllinois Grasses\\nInsects\\nInvasives\\nLivestock\\nLocal Food Systems and Small Farms\\nLocal Government Education\\nMental Health\\nMushrooms\\nNatural Resources\\nPlant Problems\\nPlants\\nRainfall Management\\nSoil\\nSpecialty Crops\\nVaccines\\nWeather\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nShare with us\\n\\n\\n \\n\\nBody\\n\\n\\n\\n\\xa0\\n\\n\\xa0\\n\\xa0\\n\\n\\n\\xa0\\n\\n\\nView this profile on Instagram\\n\\n\\xa0\\n\\n\\n\\xa0\\n\\xa0\\n\\xa0\\n\\n\\n\\xa0\\n\\xa0\\n\\n\\n\\xa0\\n\\xa0\\n\\xa0\\n\\n\\n\\n\\xa0\\n\\xa0\\n\\nIllinois Extension (@ilextension) • Instagram photos and videos\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\nBody\\n\\xa0\\n\\nUniversity of Illinois Extension\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\n\\n\\nVolunteer with Extension\\nLearn Something New\\nRead a Blog\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nIllinois Extension\\n\\n 101 Mumford Hall (MC-710)\\n \\n1301 W\", 'url': 'https://extension.illinois.edu/'}, {'base_url': 'https://extension.illinois.edu/newsletters/illinois-pesticide-review-newsletter/julyaugust-2023', 'course_name ': 'cropwizard', 'pagenumber': '', 'readable_filename': 'EPA releases draft herbicide strategy', 's3_path': 'courses/cropwizard/EPA_releases_draft_herbicide_strategy.html', 'text': '. The draft framework describes how EPA would apply the mitigations in the Strategy compared to mitigations in the other initiatives.\\nWhat can you do? Submit comments! Learn more!\\nThe draft herbicide framework and accompanying documents are available in docket EPA-HQ-OPP-2023-0365 for public comment for 60 days.\\xa0Comments are due September 22, 2023. Agricultural pesticide users are encouraged to learn about EPA’s plan and to start thinking about how these mitigation measures could apply to herbicide use in their operation. While extensive recordkeeping is not currently required for the mitigation factors described in the strategy, it is highly recommended that users begin thinking about how to incorporate these new elements into their current record systems. If you are applying according to label directions, proper records can only assist your defense should you need it. To help guide you, watch for shared comments from professional organizations such as the Weed Science Society of America (WSSA). In April, a WSSA press release linked their comments to EPA and encouraged growers to act now to understand the impact of ESA’s new compliance initiatives. One good suggestion they offered to growers is to learn how to use EPA’s Bulletins Live! Two which is where important application instructions will be found.\\nEPA’s Office of Pesticide Programs will present a webinar on this draft herbicide Strategy on August 10th at Noon Central Time. EPA plans to walk through the framework and take questions from grower groups and other stakeholders. Register today. Questions may be submitted in advance of the webinar by emailing sm.opmp.pesticides@usda.gov.\\nTo learn more about EPA’s comprehensive ESA workplan Check out our article, “Change Coming to How EPA Protects Endangered Species from Pesticides – Feedback Needed” in the November/December 2022 issue of this newsletter. Proposed mitigation measures are discussed in more general terms in this comprehensive workplan. Please note that the comment period discussed there has ended.\\nVisit EPA’s website to learn more about how EPA’s pesticide program is protecting endangered species.\\nAdapted slightly from an EPA press release, “EPA Releases Draft Strategy to Better Protect Endangered Species from Herbicide Use” and related EPA documents. \\nABOUT THE AUTHOR: Michelle Wiesbrook\\xa0provides subject matter expertise and training in pesticide safety with an emphasis on horticultural weed science. She serves as the Illinois Pesticide Review newsletter editor, collecting and organizing material; and co-coordinates social media information for the PSEP program and ensures its timely publication.\\n\\nPesticide News\\n\\n\\n\\n\\nKeywords\\n\\nPesticide\\nHerbicide\\nInsecticide\\nFungicide\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nRelated Content\\n\\nUnintended herbicide injury on trees: A growing concernAugust 28, 2023\\n\\nTips to help employees succeedAugust 2, 2023\\n\\nParaquat certification valid 3 years: Are you due for training?August 2, 2023\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nIllinois Extension\\n\\n 101 Mumford Hall (MC-710)\\n \\n1301 W. Gregory Dr.\\n Urbana,\\n IL\\n 61801\\n \\nEmail: extension@illinois.edu\\n\\n\\n\\nInstagram\\nFacebook\\nTwitter\\nYouTube\\nLinkedIn\\n\\n\\n\\nCollege of Agricultural, Consumer & Environmental Sciences\\n\\n\\n\\n\\n\\nDig Deeper\\n\\n\\nTake an Online Course\\n\\n\\nRead a Blog\\n\\n\\nRead a Newsletter\\n\\n\\nListen to a Podcast\\n\\n\\nWatch a Video\\n\\n\\nBuy a Publication\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nExtension Network\\n\\n\\nEat.Move.Save.\\n\\n\\nIllinois 4-H\\n\\n\\nIllini Science Policy Program\\n\\n\\nIllinois Indiana Sea Grant\\n\\n\\nIllinois Master Gardeners\\n\\n\\nIllinois Master Naturalists\\n\\n\\nIllinois Nutrition Education Programs\\n\\n\\nPesticide Safety Education Program\\n\\n\\nResearch Centers\\n\\n\\nSafe Electricity\\n\\n\\nU of I Plant Clinic\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nAdditional links\\n\\nAbout Cookies\\nPrivacy Policy\\n© 2023 University of Illinois Board of Trustees\\nEEO\\nAccessibility\\nmyExtension\\nLogin', 'url': 'https://extension.illinois.edu/blogs/pesticide-news/2023-08-02-epa-releases-draft-herbicide-strategy-public-comment-period-open'}, {'base_url': 'https://extension.illinois.edu/newsletters/illinois-pesticide-review-newsletter/julyaugust-2023', 'course_name ': 'cropwizard', 'pagenumber': '', 'readable_filename': 'EPA releases draft herbicide strategy', 's3_path': 'courses/cropwizard/EPA_releases_draft_herbicide_strategy.html', 'text': \". The Strategy — which is primarily designed to provide early mitigations that minimize impacts to over 900 listed species — is one of EPA’s most significant proposals to help overcome these challenges.\\nEPA focused the Strategy on agricultural crop uses in the lower 48 states because hundreds of millions of pounds of herbicides (and plant growth regulators) are applied each year, which is substantially more than for non-agricultural uses of herbicides and for other pesticide classes (e.g., insecticides, fungicides). Additionally, hundreds of listed species in the lower 48 states live in habitats adjacent to agricultural areas. The proposed mitigations in the Strategy would address the most common ways that conventional agricultural herbicides might impact these listed species. More specifically, EPA developed potential mitigation options for conventional agricultural herbicides to reduce pesticide transport via spray drift and runoff/erosion that could result in exposure to listed plants and listed animals that depend on plants.\\nEPA expects that the Strategy will increase the efficiency of future ESA consultations on herbicides with the U.S. Fish and Wildlife Service (FWS), which has authority over most listed species that could benefit from the proposed mitigations. Under the Strategy, EPA proposes to identify and begin mitigating for potential impacts even before EPA completes ESA consultations. These early mitigations should expedite EPA’s ability to fully comply with the ESA by reducing impacts to listed species before EPA conducts most of its ESA analysis. Adopting mitigations earlier will also allow EPA and FWS to use their resources more efficiently in ESA consultations.\\nThe Strategy’s proposed mitigations to reduce spray drift, runoff, and erosion and thereby reduce the potential exposure reflect practices that can be readily implemented by growers and identified by pesticide applicators and that provide flexibility for growers to select the mitigations that work best for them. The Strategy also gives credit to landowners who are already implementing certain measures to reduce pesticide runoff. For example, existing vegetated ditches and water retention ponds will qualify for credits that reduce the need for additional mitigation. Similarly, the Strategy would require less mitigation on flat lands, which are less prone to runoff, and in many western states, which typically experience less rain to carry pesticides off fields. The Strategy also describes how the Agency could add other mitigation practices to the menu of mitigation options in the future, particularly to incorporate emerging technology or new information on the effectiveness of specific practices.\\nDraft Herbicide Framework Document\\nThe draft framework document titled, “Draft Herbicide Strategy Framework to Reduce Exposure of Federally Listed Endangered and Threatened Species and Designated Critical Habitats from the Use of Conventional Agricultural Herbicides” is 97 pages long and includes a discussion of both the proposed scope of the Herbicide Strategy and the proposed decision framework to determine the level of mitigation needed for a particular conventional agricultural herbicide. The draft framework document also includes examples of how the proposed herbicide mitigation would apply to some of the herbicides for which EPA has conducted case studies as well as EPA's proposed implementation plan.\\nSome of the accompanying documents are quite lengthy. The “Herbicide Strategy Case Study Summary and Process” is 666 pages!\\xa0 Coincidence on the number? I’m not sure. I haven’t made it through it all yet. The primary thing I gathered from perusing through the spreadsheet files was that managing these complexities must be a nightmare. The document, “Application of EPA’s Draft Herbicide Strategy Framework Through Scenarios that Represent Crop Production Systems” is only 17 pages long and includes possible scenarios. Examples 1 and 2 would be particularly fitting for Illinois corn and soybean producers. These are shared to help producers better understand how these mitigation practices may be used.\\nIn its ESA Workplan and ESA Workplan Update, EPA outlined this and other ESA initiatives to develop early mitigations that provide listed species with practical protections from pesticides. The Strategy complements those other initiatives, such as targeted mitigations for listed species particularly vulnerable to pesticides and Interim Ecological Mitigations that EPA has begun incorporating under the Federal Insecticide, Fungicide, and Rodenticide Act. The draft framework describes how EPA would apply the mitigations in the Strategy compared to mitigations in the other initiatives.\\nWhat can you do? Submit comments! Learn more!\\nThe draft herbicide framework and accompanying documents are available in docket EPA-HQ-OPP-2023-0365 for public comment for 60 days.\\xa0Comments are due September 22, 2023. Agricultural pesticide users are encouraged to learn about EPA’s plan and to start thinking about how these mitigation measures could apply to herbicide use in their operation. While extensive recordkeeping is not currently required for the mitigation factors described in the strategy, it is highly recommended that users begin thinking about how to incorporate these new elements into their current record systems\", 'url': 'https://extension.illinois.edu/blogs/pesticide-news/2023-08-02-epa-releases-draft-herbicide-strategy-public-comment-period-open'}, {'base_url': 'https://extension.illinois.edu/newsletters/illinois-pesticide-review-newsletter/julyaugust-2023', 'course_name ': 'cropwizard', 'pagenumber': '', 'readable_filename': 'EPA releases draft herbicide strategy', 's3_path': 'courses/cropwizard/EPA_releases_draft_herbicide_strategy.html', 'text': 'EPA releases draft herbicide strategy; public comment period open | Illinois Extension | UIUC\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n Skip to main content\\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\nCollege of Agricultural, Consumer & Environmental Sciences\\n\\nIllinois Extension\\n\\n\\n\\n\\n\\nGive\\nVolunteer\\nCareers\\n\\n\\n\\n\\nTopics\\n\\n\\nAll Topics\\n\\n\\nCottage Food\\n\\n\\nFood\\n\\n\\nForestry\\n\\n\\nLocal Government Education\\n\\n\\nPlants\\n\\n\\nRainfall Management\\n\\n\\nSoil\\n\\n\\nVaccines\\n\\n\\nVegetable Gardening\\n\\n\\n\\n\\nLearn\\n\\n\\nOnline Courses\\n\\n\\nBlogs\\n\\n\\nNewsletters\\n\\n\\nPodcasts\\n\\n\\nVideos\\n\\n\\nPublications\\n\\n\\nSummer Resources\\n\\n\\n\\n\\nEvents\\n\\n\\nStatewide Webinars\\n\\n\\n\\n\\nNews\\n\\n\\nConnect\\n\\n\\nContact Staff\\n\\n\\nFind an Office\\n\\n\\nSocial Media\\n\\n\\nAdministration and Educator Teams\\n\\n\\nCommunications and Information Technology\\n\\n\\nIllini Science Policy Program\\n\\n\\nIllinois Indiana Sea Grant\\n\\n\\nMaster Gardeners\\n\\n\\nMaster Naturalists\\n\\n\\nPlant Clinic\\n\\n\\nResearch and Education Centers\\n\\n\\nSea Grant\\n\\n\\nEnergy Education Council\\n\\n\\nHome and Community Education\\n\\n\\nPlanning, Reporting, and Evaluation\\n\\n\\n\\n\\nImpact\\n\\n\\n2024 Extension Collaboration Grants\\n\\n\\nEconomic and Functional Impact\\n\\n\\nOur Impact in Agriculture and AgriBusiness\\n\\n\\nSNAP-Education Impact\\n\\n\\nExtension Funded Research Projects\\n\\n\\nOur Impact in Agriculture and Natural Resources\\n\\n\\nOur Impact in Community & Economic Development\\n\\n\\nOur Impact in Family and Consumer Sciences\\n\\n\\nOur Impact in Integrated Health Disparities\\n\\n\\n\\n\\nAbout\\n\\n\\nStrategic Planning\\n\\n\\nExtension Councils\\n\\n\\nCareers\\n\\n\\nProfessional Associations\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nBreadcrumb\\n\\n\\nHome\\n\\n\\nBlogs\\n\\n\\nPesticide News\\n\\n\\n EPA releases draft herbicide strategy; public comment period open \\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nPesticide News\\n\\n\\nEPA releases draft herbicide strategy; public comment period open \\n\\n\\n\\n\\n\\n\\nAugust 2, 2023\\n\\n\\n\\nMichelle Wiesbrook\\n\\n\\n \\n\\nStrategy aims to increase efficiencies while supporting farmers, herbicide users with continued use of important pesticide tools. \\xa0\\nThe U.S. Environmental Protection Agency (EPA) released the draft Herbicide Strategy for public comment, a major milestone in the Agency’s work to protect federally endangered and threatened (listed) species from conventional agricultural herbicides. The Strategy describes proposed early mitigations for more than 900 listed species and designated critical habitats to reduce potential impacts from the agricultural use of these herbicides while helping to ensure the continued availability of these important pesticide tools.\\n“Ensuring safe use of herbicides is an important part of EPA’s mission to protect the environment,” said Deputy Assistant Administrator for Pesticide Programs for the Office of Chemical Safety and Pollution Prevention Jake Li. “This strategy reflects one of our biggest steps to support farmers and other herbicide users with tools for managing weeds, while accelerating EPA’s ability to protect many endangered species that live near agricultural areas.”\\nThe Strategy is part of EPA’s ongoing efforts to develop a multichemical, multispecies approach toward meeting its obligations under the Endangered Species Act (ESA). EPA’s traditional chemical-by-chemical, species-by-species approach to meeting these obligations is slow and costly.\\xa0 As a result, EPA has completed its ESA obligations for less than 5% of its actions, creating legal vulnerabilities for the Agency, increased litigation, and uncertainty for farmers and other pesticide users about their continued ability to use many pesticides. The Strategy — which is primarily designed to provide early mitigations that minimize impacts to over 900 listed species — is one of EPA’s most significant proposals to help overcome these challenges.\\nEPA focused the Strategy on agricultural crop uses in the lower 48 states because hundreds of millions of pounds of herbicides (and plant growth regulators) are applied each year, which is substantially more than for non-agricultural uses of herbicides and for other pesticide classes (e.g., insecticides, fungicides). Additionally, hundreds of listed species in the lower 48 states live in habitats adjacent to agricultural areas. The proposed mitigations in the Strategy would address the most common ways that conventional agricultural herbicides might impact these listed species', 'url': 'https://extension.illinois.edu/blogs/pesticide-news/2023-08-02-epa-releases-draft-herbicide-strategy-public-comment-period-open'}, {'base_url': 'https://extension.illinois.edu/newsletters/illinois-pesticide-review-newsletter/julyaugust-2023', 'course_name ': 'cropwizard', 'pagenumber': '', 'readable_filename': 'News', 's3_path': 'courses/cropwizard/News.html', 'text': \". — The U.S. Environmental Protection Agency, in collaboration with the U.S. Department of Energy, is funding the creation of Thriving Communities Technical Assistance Centers to help rural and underserved communities access state and federal funding for environmental infrastructure…\\n\\n\\n \\nAddress health challenges with holistic solutions\\nSeptember 1, 2023\\n\\nURBANA, Ill. — The University of Illinois, along with the Interdisciplinary Health Sciences Institute, and in collaboration with Illinois Extension, has developed the Autumn Health Picks 2023 webinar series. This series is part of the Community Seminar Series, and it provides an opportunity for…\\n\\n\\n \\nDo artificial roosts help bats? Illinois experts say more research needed\\nSeptember 1, 2023\\n\\nURBANA, Ill.\\xa0— Artificial roosts for bats come in many forms — bat boxes, condos, bark mimics, clay roosts, and cinder block structures, to name a few — but a new conservation practice and policy article from researchers at the\\xa0University of Illinois Urbana-Champaign\\xa0suggests the structures…\\n\\n\\nMore news\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\nBlock Reference\\n\\nNews\\n\\n\\n\\nBy topic\\n- Any -4-H4-H alumni4-H FoundationAccessibilityAdministrationAgingAgricultural safetyAgricultureAnimal scienceAnnie's ProjectAnnualsAutismBeefBeef CattleBeekeepingBeesBeneficial InsectsBirdsBrain healthBulbsBullyingBusiness developmentBusiness retention and expansionButterflyCampingCaregivingCensusCertified Crop AdvisorCertified Livestock ManagerChild nutritionChronic diseasesCitizen ScienceCivic engagementClimateCollege ReadinessCommercial agricultureCommercial Vegetable ProductionCommunicationCommunity developmentCommunity gardenCommunity healthCommunity planningCommunity resiliencyCompostingConservationConsumer economicsCornCover cropsCreditCrop diseaseCropsDairy CattleDebt managementDementia Alzheimer’s diseaseDiabetesDicambaDisaster preparednessDiversity Equity InclusionDowntown developmentDrainageDronesEarly childhoodEconomic developmentEDEN Ready BusinessEFNEPElder careEmergency foodEnergyEnergy conservationEnergy efficiencyEntomologyEntrepreneurshipEnvironmentEstate planningExpensesFacultyFamily lifeFarm business managementFarm safetyFarmers marketsFinancial ExploitationFinancial planningFinancial wellnessFlowersFood accessFood PreservationFood safetyFood sanitationForestryFruitsFungicideGardeningGrassesHayHealthHealth CareHealthy cookingHealthy eatingHempHerbicideHerbsHolidaysHome OwnershipHorticultureHouseplantsIdentity TheftInclusionINEPInformation TechnologyInsect PestsInsecticideInsects and pestsInsuranceIntegrated Health DisparitiesIntegrated pest managementInvasive speciesInvestingLandscape architectureLandscape designLawn careLeadershipLeadership developmentLife skillsLivestockLocal foods and small farmsLocal governmentManaging stressManure managementMarketingMaster GardenersMaster NaturalistMeeting ManagementMental healthMindfulnessMoney MentorsMyPINative plantsNavigating differenceNutritionNutrition educationObesity preventionOrnamentalOutdoor SkillsParentingPasturePerennialsPesticidePesticide LabelPhysical ActivityPlant ClinicPlant diseasePlant health carePollinator HabitatPondsPoultryPoverty simulationPrivate/Commercial ApplicatorProfessional Development CEU CPDUPSEP trainingReal ColorsRecyclingRelationshipsResilienceRoboticsRosesSafetyShooting sportsShrubsSmall farmsSmart MeterSNAP-EdSocial-emotional healthSoilSoybeansSpecialty CropsSpendingState 4-H OfficeSTEMSubstance UseSustainable agricultureSwineTaxesTeam buildingTeenagersTime managementTrauma informed Adverse Childhood ExperiencesTree fruitsTreesTurfUrban AgricultureUrban gardeningVegetable gardeningVegetablesVolunteersWaterWeatherWeedsWellnessWheatWhole grainsWildlifeWorkforce developmentWorkplace wellnessYouth and MoneyYouth development\\n\\n\\nSearch\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\n\\n\\nSeptember 6, 2023\\n\\nIllinois Extension selected to establish environmental assistance center to help Illinois communities\\n\\n \\n URBANA, Ill. — The U.S. Environmental Protection Agency, in collaboration with the U.S. Department of Energy, is funding the creation of Thriving Communities Technical Assistance Centers to help rural and underserved communities access state and federal funding for environmental...\\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\n\\n\\nSeptember 1, 2023\\n\\nAddress health challenges with holistic solutions\\n\\n \\n URBANA, Ill\", 'url': 'https://extension.illinois.edu/global/news-releases'}, {'base_url': 'https://extension.illinois.edu/newsletters/illinois-pesticide-review-newsletter/julyaugust-2023', 'course_name ': 'cropwizard', 'pagenumber': 4, 'readable_filename': 'extension.pdf', 's3_path': 'courses/cropwizard/extension.pdf', 'text': \"Illinois Pesticide Review \\nVolume 36, No. 4, July/August \\n4\\nproposed mitigations. Under the Strategy, EPA \\nproposes to identify and begin mitigating for \\npotential impacts even before EPA completes \\nESA consultations. These early mitigations \\nshould expedite EPAs ability to fully comply \\nwith the ESA by reducing impacts to listed \\nspecies before EPA conducts most of its ESA \\nanalysis. Adopting mitigations earlier will \\nalso allow EPA and FWS to use their resources \\nmore efficiently in ESA consultations.\\nThe Strategys proposed mitigations to reduce \\nspray drift, runoff, and erosion and thereby \\nreduce the potential exposure reflect practices \\nthat can be readily implemented by growers \\nand identified by pesticide applicators and \\nthat provide flexibility for growers to select \\nthe mitigations that work best for them. The \\nStrategy also gives credit to landowners who \\nare already implementing certain measures to \\nreduce pesticide runoff. For example, existing \\nvegetated ditches and water retention ponds \\nwill qualify for credits that reduce the need for \\nadditional mitigation. Similarly, the Strategy \\nwould require less mitigation on flat lands, \\nwhich are less prone to runoff, and in many \\nwestern states, which typically experience \\nless rain to carry pesticides off fields. The \\nStrategy also describes how the Agency could \\nadd other mitigation practices to the menu of \\nmitigation options in the future, particularly \\nto incorporate emerging technology or new \\ninformation on the effectiveness of specific \\npractices.\\nDraft Herbicide Framework \\nDocument\\nThe draft framework document titled, Draft \\nHerbicide Strategy Framework to Reduce \\nExposure of Federally Listed Endangered \\nand Threatened Species and Designated \\nCritical Habitats from the Use of Conventional \\nAgricultural Herbicides is 97 pages long and \\nincludes a discussion of both the proposed \\nscope of the Herbicide Strategy and the \\nproposed decision framework to determine \\nthe level of mitigation needed for a particular \\nconventional agricultural herbicide. The draft \\nframework document also includes examples \\nof how the proposed herbicide mitigation \\nwould apply to some of the herbicides for \\nwhich EPA has conducted case studies as well \\nas EPA's proposed implementation plan.\\nSome of the accompanying documents are \\nquite lengthy. The Herbicide Strategy Case \\nStudy Summary and Process is 666 pages! \\nCoincidence on the number? Im not sure. I \\nhavent made it through it all yet. The primary \\nthing I gathered from perusing through \\nthe spreadsheet files was that managing \\nthese complexities must be a nightmare. \\nThe document, Application of EPAs Draft \\nHerbicide Strategy Framework Through \\nScenarios that Represent Crop Production \\nSystems is only 17 pages long and includes \\npossible scenarios. Examples 1 and 2 would \\nbe particularly fitting for Illinois corn and \\nsoybean producers. These are shared to \\nhelp producers better understand how these \\nmitigation practices may be used. \\nIn its ESA Workplan and ESA Workplan \\nUpdate, EPA outlined this and other ESA \\ninitiatives to develop early mitigations \\nthat provide listed species with practical \\nprotections from pesticides. The Strategy \\ncomplements those other initiatives, such \\nas targeted mitigations for listed species \\nparticularly vulnerable to pesticides and \\nInterim Ecological Mitigations that EPA \\nhas begun incorporating under the Federal \\nInsecticide, Fungicide, and Rodenticide Act. \\nThe draft framework describes how EPA would \\napply the mitigations in the Strategy compared \\nto mitigations in the other initiatives. \\nWhat can you do? Submit \\ncomments! Learn more!\\nThe draft herbicide framework and \\naccompanying documents are available in \\ndocket EPA-HQ-OPP-2023-0365 for public \\ncomment for 60 days. Comments are due \\nSeptember 22, 2023. Agricultural pesticide \\nusers are encouraged to learn about EPAs\", 'url': 'https://extension.illinois.edu/sites/default/files/2023-08/IPR%20Volume%2036%20Issue%204%20July%20August%20SECURE.pdf'}, {'base_url': 'https://extension.illinois.edu/newsletters/illinois-pesticide-review-newsletter/julyaugust-2023', 'course_name ': 'cropwizard', 'pagenumber': 3, 'readable_filename': 'extension.pdf', 's3_path': 'courses/cropwizard/extension.pdf', 'text': 'Illinois Pesticide Review \\nVolume 36, No. 4, July/August \\n3\\nIts important to consider that one should \\ntake the test in the language he or she is \\nmost comfortable with. If someone has \\nbeen studying the material in English, dont \\nbe surprised if they opt to take the exam in \\nEnglish too. \\nIn the end, it all comes down to good \\ncommunication between you and your \\nemployees. It could be that they dont know \\nyet which learning method would work best \\nfor them and theyll need to try a few things. \\nTheyll appreciate you taking the time to ask \\nthem and work with them to help ensure their \\nsuccess.\\nMichelle Wiesbrook \\nEPA Releases Draft \\nHerbicide Strategy, Public \\nComment Period Open \\nStrategy aims to increase \\nefficiencies while supporting \\nfarmers, herbicide users with \\ncontinued use of important \\npesticide tools \\nThe U.S. Environmental Protection Agency \\n(EPA) released the draft Herbicide Strategy \\nfor public comment, a major milestone in the \\nAgencys work to protect federally endangered \\nand threatened (listed) species from conven-\\ntional agricultural herbicides. The Strategy \\ndescribes proposed early mitigations for more \\nthan 900 listed species and designated criti-\\ncal habitats to reduce potential impacts from \\nthe agricultural use of these herbicides while \\nhelping to ensure the continued availability of \\nthese important pesticide tools.\\nEnsuring safe use of herbicides is an \\nimportant part of EPAs mission to protect \\nthe environment, said Deputy Assistant \\nAdministrator for Pesticide Programs for \\nthe Office of Chemical Safety and Pollution \\nPrevention Jake Li. This strategy reflects one \\nof our biggest steps to support farmers and \\nother herbicide users with tools for managing \\nweeds, while accelerating EPAs ability to \\nprotect many endangered species that live near \\nagricultural areas.\\nThe Strategy is part of EPAs ongoing efforts \\nto develop a multichemical, multispecies \\napproach toward meeting its obligations \\nunder the Endangered Species Act (ESA). \\nEPAs traditional chemical-by-chemical, \\nspecies-by-species approach to meeting these \\nobligations is slow and costly. As a result, EPA \\nhas completed its ESA obligations for less than \\n5% of its actions, creating legal vulnerabilities \\nfor the Agency, increased litigation, and \\nuncertainty for farmers and other pesticide \\nusers about their continued ability to use many \\npesticides. The Strategy which is primarily \\ndesigned to provide early mitigations that \\nminimize impacts to over 900 listed species \\nis one of EPAs most significant proposals to \\nhelp overcome these challenges.\\nEPA focused the Strategy on agricultural crop \\nuses in the lower 48 states because hundreds \\nof millions of pounds of herbicides (and plant \\ngrowth regulators) are applied each year, \\nwhich is substantially more than for non-\\nagricultural uses of herbicides and for other \\npesticide classes (e.g., insecticides, fungicides). \\nAdditionally, hundreds of listed species in \\nthe lower 48 states live in habitats adjacent to \\nagricultural areas. The proposed mitigations \\nin the Strategy would address the most \\ncommon ways that conventional agricultural \\nherbicides might impact these listed \\nspecies. More specifically, EPA developed \\npotential mitigation options for conventional \\nagricultural herbicides to reduce pesticide \\ntransport via spray drift and runoff/erosion \\nthat could result in exposure to listed plants \\nand listed animals that depend on plants.\\nEPA expects that the Strategy will increase \\nthe efficiency of future ESA consultations \\non herbicides with the U.S. Fish and Wildlife \\nService (FWS), which has authority over most \\nlisted species that could benefit from the', 'url': 'https://extension.illinois.edu/sites/default/files/2023-08/IPR%20Volume%2036%20Issue%204%20July%20August%20SECURE.pdf'}, {'base_url': 'https://extension.illinois.edu/newsletters/illinois-pesticide-review-newsletter/julyaugust-2023', 'course_name ': 'cropwizard', 'pagenumber': '', 'readable_filename': 'News', 's3_path': 'courses/cropwizard/News.html', 'text': \". — The U.S. Environmental Protection Agency, in collaboration with the U.S. Department of Energy, is funding the creation of Thriving Communities Technical Assistance Centers to help rural and underserved communities access state and federal funding for environmental...\\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\n\\n\\nSeptember 1, 2023\\n\\nAddress health challenges with holistic solutions\\n\\n \\n URBANA, Ill. — The University of Illinois, along with the Interdisciplinary Health Sciences Institute, and in collaboration with Illinois Extension, has developed...\\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\n\\n\\nSeptember 1, 2023\\n\\nDo artificial roosts help bats? Illinois experts say more research needed\\n\\n \\n URBANA, Ill.\\xa0— Artificial roosts for bats come in many forms — bat boxes, condos, bark mimics, clay roosts, and cinder block structures, to name a few — but a new conservation practice and policy article from researchers at the\\xa0University of...\\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\n\\n\\nAugust 29, 2023\\n\\nButterflies can eat to live, live to eat in a balanced garden\\n\\n \\n URBANA, Ill. — A favorite thing about visiting gardens in the summer is catching sight of a butterfly enjoying nectar from a brightly colored zinnia or a monarch caterpillar munching on a milkweed leaf. When designing a butterfly garden, expand and balance plant selection to provide more than...\\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\n\\n\\nAugust 24, 2023\\n\\nField Day event plans to keep beef cattle producers up on trends for their herds\\n\\n \\n URBANA, Ill. — Beef cattle producers will gain insights and stay up to date on current research from cow/calf patterns to alternative forages and more at the Orr Beef Research Center's Field Day on September 6.\\xa0The meeting will be held at the John Wood Community College Ag Center located west of...\\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\n\\n\\nAugust 23, 2023\\n\\nBuild drought-tolerant gardens this fall for next summer’s blooms\\n\\n \\n URBANA, Ill. — Many Illinois gardens are entering the fall stressed from the lack of summer rains combined with scorching hot temperatures. These conditions easily stress some plants; however, many plants quickly adapt to hot, dry conditions. Drought-tolerant plants are not only tough and...\\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\n\\n\\nAugust 23, 2023\\n\\nIllinois Extension exhibits research, programs, innovation at 2023 Farm Progress Show\\n\\n \\n DECATUR, Ill. — The Farm Progress Show returns to Decatur, Aug. 29-31, and\\xa0University of Illinois Extension will be on-site in the College of...\\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\n\\n\\nAugust 21, 2023\\n\\nBuild privacy with plants for secret gardens\\n\\n \\n URBANA, Ill.\\xa0— Plants serve a lot of purposes in the landscape. One of which is to add some privacy. Screening plants can help define and give purpose to a space. Homeowners may wish to screen a particular area or transparency in the landscape, creating interest in what lies beyond.\\xa0\\n\\n...\\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\n\\n\\nAugust 21, 2023\\n\\nIllinois Extension investing in the future of farming across the state\\n\\n \\n URBANA, Ill. — Helping Illinois farmers grow thriving crops and livestock has always been at the heart of University of Illinois Extension’s mission. Using feedback received from farmers and other agricultural stakeholders through a 2022 survey,...\\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\n\\n\\nAugust 21, 2023\\n\\nExtraordinary 4-H volunteers honored\\n\\n \\n SPRINGFIELD, Ill\", 'url': 'https://extension.illinois.edu/global/news-releases'}, {'base_url': 'https://extension.illinois.edu/newsletters/illinois-pesticide-review-newsletter/julyaugust-2023', 'course_name ': 'cropwizard', 'pagenumber': '', 'readable_filename': 'Extension Funded Research Projects', 's3_path': 'courses/cropwizard/Extension_Funded_Research_Projects.html', 'text': '. Today’s most pressing issues are related to climate change. An understanding of how human activity impacts the environment helps us make sense of how a zoonotic transfer of a virus to humans causes a global pandemic, and how rising temperatures increase the frequency and intensity of wildfires and storms. Environmental Education (EE) connects people to their environments, instilling values and knowledge that leads to better environmental stewardship of local environments and natural resources. There are several well-established EE programs offered throughout Cook County by Illinois\\xa0Extension and the Forest Preserve of Cook County (FPCC). Missing from these opportunities are programs available to middle school-aged children, the age best suited for EE experiences because their environmental sensitivities are still developing and early experiences in nature have been shown to have impacts throughout adulthood (Boom, 2017). This proposal seeks to develop a program, Illinois Inquiry Adventures in Nature (IIAN), for middle school children and their families, suitable for small groups during the pandemic\\xa0and expanding in scope to include classrooms when safe. A series of four seasonal activities\\xa0and teacher workshops\\xa0will be created to bring groups to their local green spaces, including FPCC sites. Groups will engage in open-ended investigations based on their own observations and questions, complete activities at home\\xa0and enact local community conservation projects. Research will be conducted to examine how individuals’ connections to nature and environmental stewardship change over the course of their participation. This program fills a local need in Cook County, creating a continuum of opportunities across ages, and will be made available to all residents in Illinois, and nationwide, encouraging the next generation of environmental leaders.\\n\\n\\nAssessing the Needs and Connecting Young & Beginning Farmers with Extension Resources in Northern Illinois\\nAwarded to: Illinois Extension in the College of ACES\\nPrincipal Investigator: Joseph Malual\\nCo-Investigators:\\nNikki Keltner, Extension program coordinator, Illinois Extension\\nGrant McCarty, Extension educator, Illinois Extension\\nHope Michelson, assistant professor,\\xa0Department of Agricultural & Consumer Economics\\nPROJECT SUMMARY\\nMore and more young people are engaging in small-scale farming, with many focusing on specialty crops and sustainable agricultural production. Despite this trend, entry into farming, which is a complex business, is challenging. Beginning farmers face serious obstacles in accessing critical assets, including startup capital to acquire land, farm equipment\\xa0and agricultural technical knowledge needed to develop a\\xa0successful agricultural practice and profitable business. The situation is complicated by lack of adequate research to understand the unique challenges facing this generation of farmers. In Illinois, there is limited research to understand how people new to farming navigate access to critical resources. This research project aims to provide a comprehensive assessment of the needs and opportunities facing young and beginning\\xa0farmers in northern Illinois. We will identify and map farms owned by young and beginning farmers, examine their experiences and strategies used to leverage critical startup assets, including farmland and equipment, financial capital\\xa0and agricultural technical assistance, as well as strategies for marketing agricultural products. This project will build relations and connect this new audience with Extension resources, which can help\\xa0beginning farmers develop the knowledge and skills necessary for solving critical problems. Through interdisciplinary collaboration between Extension educators and specialists with faculty at the University of Illinois at Urbana-Champaign, this research will generate useful knowledge that can help beginning farmers, businesses\\xa0and communities make informed decisions and plan for future support of those new to farming. The\\xa0knowledge and practices discovered and identified through this project will be shared with Extension across the state. Extension educators can build on this knowledge to plan and deliver educational programming that empowers farmers to develop financially viable and sustainable farms. Those successful endeavors will, in turn, help to revitalize their rural communities.\\n\\n\\nNew Immigrant Foodways\\nAwarded to: Department of History in the College of Liberal Arts and Sciences\\nPrincipal Investigator: Teresa Barnes\\nCo-Investigators:\\nGisela Sin, director, Center for Latin American and Caribbean Studies\\nMargarita Teran-Garcia, Extension specialist, Illinois Extension\\nPROJECT SUMMARY\\nThis project will leverage new and existing research with immigrant communities about challenges and strategies in adapting home foodways to American food systems to create short instructional videos related to nutrition and cooking. The project addresses a complex issue at the intersection of three critical areas of Extension’s mission: food, health\\xa0and environment. It addresses the public need of new immigrant families to access information and expertise and develop sustainable strategies when faced with the bewildering array of often unhealthy food options in the USA', 'url': 'https://extension.illinois.edu/global/extension-funded-research-projects'}]}\n" + ] + } + ], + "source": [ + "print(course_df[0]['messages'][1])" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [], + "source": [ + "# user email is in DF, outside of convo" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Mapping Conversations to Nomic" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "22\n", + "22\n" + ] + } + ], + "source": [ + "user_queries = []\n", + "metadata = []\n", + "i = 1\n", + "\n", + "# log conversation instead of individual messages\n", + "for index, row in df.iterrows():\n", + " user_email = row['user_email']\n", + " convo = row['convo']\n", + " messages = convo['messages']\n", + " first_message = messages[0]['content']\n", + " user_queries.append(first_message)\n", + " # create metadata for multi-turn conversation\n", + " conversation = \"\"\n", + " for message in messages:\n", + " # string of role: content, role: content, ...\n", + " conversation += \"\\n>>> \" + message['role'] + \": \" + message['content'] + \"\\n\"\n", + " # add to metadata\n", + " metadata_row = {\"course\": row['course_name'], \"conversation\": conversation, \"conversation_id\": convo['id'], \n", + " \"id\": i, \"user_email\": user_email, \"first_query\": first_message}\n", + " metadata.append(metadata_row)\n", + " i += 1\n", + "\n", + "print(len(user_queries))\n", + "print(len(metadata))" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'course': 'cropwizard', 'conversation': '\\n>>> user: What type of pest is most harmful to corn crops in Illinois and how can I prevent it?\\n\\n>>> assistant: The most harmful pest to corn crops in Illinois is the corn rootworm. Corn rootworms, specifically the western corn rootworm (Diabrotica virgifera virgifera), are the most impactful species in Illinois and the North Central U.S. These pests can cause significant damage to corn plants by feeding on the roots, leading to reduced plant vigor and yield.\\n\\nTo prevent corn rootworm infestations, several measures can be taken:\\n\\n1. Crop rotation: Implementing a crop rotation strategy by alternating corn with non-host crops, such as soybeans, can help reduce corn rootworm populations. The larvae are not capable of surviving on soybean or other broadleaf plants, so rotating crops disrupts their life cycle.\\n\\n2. Utilize Bt hybrids: Planting corn hybrids that have been genetically modified to express Bacillus thuringiensis (Bt) proteins targeting corn rootworm can provide effective control. These Bt hybrids produce toxins that are toxic to the pests, reducing their feeding and damage to corn plants.\\n\\n3. Insecticide treatments: Seed-applied insecticides containing neonicotinoid or diamide insecticides can be used to protect corn seedlings from early-season pests, including corn rootworms. Soil-applied insecticides can also be applied at planting to control corn rootworms and other early-season pests.\\n\\n4. Monitoring and scouting: Regular monitoring of corn fields for signs of corn rootworm damage, such as root feeding and lodging, is essential. Scouting helps determine if pest populations have reached economic thresholds that require action, such as insecticide application.\\n\\n5. Integrated Pest Management (IPM): Implementing an IPM approach involves combining multiple pest management strategies, including cultural practices, biological control, and chemical control, to minimize the impact of corn rootworms and other pests. This holistic approach aims to reduce reliance on chemical treatments and promote sustainable pest management.\\n\\nIt is important to consult with local agricultural extension services, seed dealers, or agricultural experts for specific recommendations tailored to your region and field conditions to effectively manage corn rootworm infestations.\\n', 'conversation_id': '3e5d4861-b128-4c64-96ac-87c74f3217e5', 'id': 2, 'user_email': 'avd6@illinois.edu', 'first_query': 'What type of pest is most harmful to corn crops in Illinois and how can I prevent it?'}\n" + ] + } + ], + "source": [ + "print(metadata[1])" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(22, 1536)\n" + ] + } + ], + "source": [ + "# initialize langchain OpenAI embeddings model\n", + "embeddings_model = OpenAIEmbeddings()\n", + "metadata = pd.DataFrame(metadata)\n", + "embeddings = embeddings_model.embed_documents(user_queries)\n", + "embeddings = np.array(embeddings)\n", + "print(embeddings.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-09-07 14:53:44.900 | INFO | nomic.project:_create_project:779 - Creating project `Conversation Map for cropwizard` in organization `dabholkar.asmita`\n", + "2023-09-07 14:53:45.794 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", + "2023-09-07 14:53:45.798 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", + "1it [00:00, 1.07it/s]\n", + "2023-09-07 14:53:46.743 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", + "2023-09-07 14:53:46.744 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", + "2023-09-07 14:53:47.568 | INFO | nomic.project:create_index:1111 - Created map `Conversation Map for cropwizard` in project `Conversation Map for cropwizard`: https://atlas.nomic.ai/map/27ba0013-71ba-4fd6-a710-187d114442ab/f74b45b3-7213-45df-a4cb-c16ec9feb8b7\n", + "2023-09-07 14:53:47.570 | INFO | nomic.atlas:map_embeddings:139 - Conversation Map for cropwizard: https://atlas.nomic.ai/map/27ba0013-71ba-4fd6-a710-187d114442ab/f74b45b3-7213-45df-a4cb-c16ec9feb8b7\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Conversation Map for cropwizard: https://atlas.nomic.ai/map/27ba0013-71ba-4fd6-a710-187d114442ab/f74b45b3-7213-45df-a4cb-c16ec9feb8b7]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-09-07 14:53:48.872 | INFO | nomic.project:create_index:1111 - Created map `cropwizard_convo_index_2` in project `Conversation Map for cropwizard`: https://atlas.nomic.ai/map/27ba0013-71ba-4fd6-a710-187d114442ab/3336aa7f-5995-4f02-831b-7161fd0c0b71\n" + ] + }, + { + "data": { + "text/html": [ + "Atlas Projection cropwizard_convo_index_2. Status Topic Modeling. view online" + ], + "text/plain": [ + "cropwizard_convo_index_2: https://atlas.nomic.ai/map/27ba0013-71ba-4fd6-a710-187d114442ab/3336aa7f-5995-4f02-831b-7161fd0c0b71" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# create an Atlas project\n", + "project_name = \"Conversation Map for \" + course\n", + "index_name = course + \"_convo_index_2\"\n", + "project = atlas.map_embeddings(embeddings=np.array(embeddings),\n", + " data=metadata,\n", + " id_field='id',\n", + " build_topic_model=True,\n", + " topic_label_field='first_query',\n", + " name=project_name,\n", + " colorable_fields=['conversation_id', 'first_query'])\n", + "print(project.maps)\n", + "\n", + "project.create_index(index_name, build_topic_model=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Mapping Query-Response Pairs to Nomic" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2\n", + "user queries: 1\n", + "metadata 1\n", + "------------------------\n" + ] + } + ], + "source": [ + "user_queries = []\n", + "metadata = []\n", + "i = 1\n", + "for convo in course_df:\n", + " messages = convo['messages']\n", + " print(len(messages))\n", + " pair_check = 0\n", + " for message in messages:\n", + " if message['role'] == 'user' and pair_check == 0:\n", + " query = message['content']\n", + " metadata_row = {'course': course, 'query': message['content']}\n", + " #print(\"metadata row: \", metadata_row)\n", + " pair_check += 1\n", + " if message['role'] == 'assistant' and pair_check == 1:\n", + " metadata_row['response'] = message['content']\n", + " metadata_row['id'] = i \n", + " #print(\"response metadata row: \", metadata_row)\n", + " i += 1\n", + " pair_check += 1\n", + " if pair_check == 2:\n", + " # one conversation pair is complete\n", + " user_queries.append(query)\n", + " metadata.append(metadata_row)\n", + " pair_check = 0\n", + "\n", + " \n", + "print(\"user queries: \", len(user_queries))\n", + "print(\"metadata\", len(metadata))\n", + "print(\"------------------------\")\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(590, 1536)\n" + ] + } + ], + "source": [ + "# initialize langchain OpenAI embeddings model\n", + "embeddings_model = OpenAIEmbeddings()\n", + "metadata = pd.DataFrame(metadata)\n", + "embeddings = embeddings_model.embed_documents(user_queries)\n", + "embeddings = np.array(embeddings)\n", + "print(embeddings.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-31 19:55:40.276 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for ECE408FA23` in organization `dabholkar.asmita`\n", + "2023-08-31 19:55:41.466 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", + "2023-08-31 19:55:41.491 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", + "1it [00:02, 2.43s/it]\n", + "2023-08-31 19:55:43.932 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", + "2023-08-31 19:55:43.932 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", + "2023-08-31 19:55:45.475 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for ECE408FA23` in project `Query-Response Map for ECE408FA23`: https://atlas.nomic.ai/map/d8e02f68-501e-4a25-9060-502710c082ce/b55cdca3-3ff4-454e-8f1b-a73ebc22cdff\n", + "2023-08-31 19:55:45.480 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for ECE408FA23: https://atlas.nomic.ai/map/d8e02f68-501e-4a25-9060-502710c082ce/b55cdca3-3ff4-454e-8f1b-a73ebc22cdff\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Query-Response Map for ECE408FA23: https://atlas.nomic.ai/map/d8e02f68-501e-4a25-9060-502710c082ce/b55cdca3-3ff4-454e-8f1b-a73ebc22cdff]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-31 19:55:47.813 | INFO | nomic.project:create_index:1111 - Created map `ECE408FA23_qr_index` in project `Query-Response Map for ECE408FA23`: https://atlas.nomic.ai/map/d8e02f68-501e-4a25-9060-502710c082ce/ff7276fc-942f-45cd-a199-e19a6e941db1\n" + ] + }, + { + "data": { + "text/html": [ + "Atlas Projection ECE408FA23_qr_index. Status Topic Modeling. view online" + ], + "text/plain": [ + "ECE408FA23_qr_index: https://atlas.nomic.ai/map/d8e02f68-501e-4a25-9060-502710c082ce/ff7276fc-942f-45cd-a199-e19a6e941db1" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# create an Atlas project\n", + "project_name = \"Query-Response Map for \" + course\n", + "index_name = course + \"_qr_index\"\n", + "project = atlas.map_embeddings(embeddings=np.array(embeddings),\n", + " data=metadata,\n", + " id_field='id',\n", + " build_topic_model=True,\n", + " topic_label_field='query',\n", + " name=project_name,\n", + " colorable_fields=['query'])\n", + "print(project.maps)\n", + "\n", + "project.create_index(index_name, build_topic_model=True)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1571\n" + ] + } + ], + "source": [ + "# cell for all course map creation\n", + "\n", + "response = supabase_client.table(\"llm-convo-monitor\").select(\"*\").execute()\n", + "data = response.data\n", + "print(len(data))" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "126" + ] + }, + "execution_count": 53, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.DataFrame(data)\n", + "course_names = df['course_name'].unique()\n", + "len(course_names)" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "course name: badm_550_ashley\n", + "(51, 1536)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:26:29.701 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for badm_550_ashley` in organization `dabholkar.asmita`\n", + "2023-08-30 15:26:31.242 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", + "2023-08-30 15:26:31.255 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", + "1it [00:00, 1.03it/s]\n", + "2023-08-30 15:26:32.239 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", + "2023-08-30 15:26:32.241 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", + "2023-08-30 15:26:33.498 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for badm_550_ashley` in project `Query-Response Map for badm_550_ashley`: https://atlas.nomic.ai/map/e7fe1faf-be7a-4ca7-b89b-22863fffe432/79d4480a-c4e5-4920-95cd-ed831d45f5d0\n", + "2023-08-30 15:26:33.500 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for badm_550_ashley: https://atlas.nomic.ai/map/e7fe1faf-be7a-4ca7-b89b-22863fffe432/79d4480a-c4e5-4920-95cd-ed831d45f5d0\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Query-Response Map for badm_550_ashley: https://atlas.nomic.ai/map/e7fe1faf-be7a-4ca7-b89b-22863fffe432/79d4480a-c4e5-4920-95cd-ed831d45f5d0]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:26:35.272 | INFO | nomic.project:create_index:1111 - Created map `badm_550_ashley_qr_index` in project `Query-Response Map for badm_550_ashley`: https://atlas.nomic.ai/map/e7fe1faf-be7a-4ca7-b89b-22863fffe432/2b997f7c-0084-4db7-8e9a-76eeb62d715b\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "course name: ece120\n", + "(298, 1536)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:26:42.765 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for ece120` in organization `dabholkar.asmita`\n", + "2023-08-30 15:26:43.831 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", + "2023-08-30 15:26:43.850 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", + "1it [00:01, 1.20s/it]\n", + "2023-08-30 15:26:45.059 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", + "2023-08-30 15:26:45.063 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", + "2023-08-30 15:26:46.221 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for ece120` in project `Query-Response Map for ece120`: https://atlas.nomic.ai/map/f35d2617-d029-4a29-ba4a-27a82088be24/0c8e9404-be65-4780-bba3-e3b44f14de9b\n", + "2023-08-30 15:26:46.230 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for ece120: https://atlas.nomic.ai/map/f35d2617-d029-4a29-ba4a-27a82088be24/0c8e9404-be65-4780-bba3-e3b44f14de9b\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Query-Response Map for ece120: https://atlas.nomic.ai/map/f35d2617-d029-4a29-ba4a-27a82088be24/0c8e9404-be65-4780-bba3-e3b44f14de9b]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:26:48.040 | INFO | nomic.project:create_index:1111 - Created map `ece120_qr_index` in project `Query-Response Map for ece120`: https://atlas.nomic.ai/map/f35d2617-d029-4a29-ba4a-27a82088be24/61277149-b12d-4b59-8bcd-e9dd29fc58a4\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "course name: badm-567-v3\n", + "(27, 1536)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:26:52.367 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for badm-567-v3` in organization `dabholkar.asmita`\n", + "2023-08-30 15:26:53.227 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", + "2023-08-30 15:26:53.236 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", + "1it [00:00, 1.07it/s]\n", + "2023-08-30 15:26:54.177 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", + "2023-08-30 15:26:54.185 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", + "2023-08-30 15:26:55.379 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for badm-567-v3` in project `Query-Response Map for badm-567-v3`: https://atlas.nomic.ai/map/022b4521-24df-401d-beff-bba4f881590f/adabe0a1-d147-462a-8992-72af89cb5ae2\n", + "2023-08-30 15:26:55.379 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for badm-567-v3: https://atlas.nomic.ai/map/022b4521-24df-401d-beff-bba4f881590f/adabe0a1-d147-462a-8992-72af89cb5ae2\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Query-Response Map for badm-567-v3: https://atlas.nomic.ai/map/022b4521-24df-401d-beff-bba4f881590f/adabe0a1-d147-462a-8992-72af89cb5ae2]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:26:57.012 | INFO | nomic.project:create_index:1111 - Created map `badm-567-v3_qr_index` in project `Query-Response Map for badm-567-v3`: https://atlas.nomic.ai/map/022b4521-24df-401d-beff-bba4f881590f/e6c9798b-c154-43e7-917e-dd5cb71f116f\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "course name: new-weather\n", + "(98, 1536)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:27:02.087 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for new-weather` in organization `dabholkar.asmita`\n", + "2023-08-30 15:27:03.117 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", + "2023-08-30 15:27:03.125 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", + "1it [00:00, 1.07it/s]\n", + "2023-08-30 15:27:04.071 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", + "2023-08-30 15:27:04.071 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", + "2023-08-30 15:27:05.459 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for new-weather` in project `Query-Response Map for new-weather`: https://atlas.nomic.ai/map/48db10b9-0a6e-4c43-83f7-f7fa2b406257/fd200444-86cb-4b1c-9be6-ebd0262e2428\n", + "2023-08-30 15:27:05.461 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for new-weather: https://atlas.nomic.ai/map/48db10b9-0a6e-4c43-83f7-f7fa2b406257/fd200444-86cb-4b1c-9be6-ebd0262e2428\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Query-Response Map for new-weather: https://atlas.nomic.ai/map/48db10b9-0a6e-4c43-83f7-f7fa2b406257/fd200444-86cb-4b1c-9be6-ebd0262e2428]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:27:07.001 | INFO | nomic.project:create_index:1111 - Created map `new-weather_qr_index` in project `Query-Response Map for new-weather`: https://atlas.nomic.ai/map/48db10b9-0a6e-4c43-83f7-f7fa2b406257/ff717c79-50cd-468b-9fcc-b391c8c167df\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "course name: gies-online-mba-v2\n", + "(52, 1536)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:27:10.946 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for gies-online-mba-v2` in organization `dabholkar.asmita`\n", + "2023-08-30 15:27:11.862 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", + "2023-08-30 15:27:11.868 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", + "1it [00:00, 1.33it/s]\n", + "2023-08-30 15:27:12.630 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", + "2023-08-30 15:27:12.634 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", + "2023-08-30 15:27:13.627 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for gies-online-mba-v2` in project `Query-Response Map for gies-online-mba-v2`: https://atlas.nomic.ai/map/8699b44f-8e2a-40e6-9562-0d2fc2898c3d/87144fbd-48a3-4d00-9136-ddc177685aa1\n", + "2023-08-30 15:27:13.627 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for gies-online-mba-v2: https://atlas.nomic.ai/map/8699b44f-8e2a-40e6-9562-0d2fc2898c3d/87144fbd-48a3-4d00-9136-ddc177685aa1\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Query-Response Map for gies-online-mba-v2: https://atlas.nomic.ai/map/8699b44f-8e2a-40e6-9562-0d2fc2898c3d/87144fbd-48a3-4d00-9136-ddc177685aa1]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:27:15.546 | INFO | nomic.project:create_index:1111 - Created map `gies-online-mba-v2_qr_index` in project `Query-Response Map for gies-online-mba-v2`: https://atlas.nomic.ai/map/8699b44f-8e2a-40e6-9562-0d2fc2898c3d/db13ea7a-f93d-4f97-b922-c51216d3d6e9\n", + "2023-08-30 15:27:15,670:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:27:15,673:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", + "2023-08-30 15:27:20,003:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:27:20,003:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "course name: frontend\n", + "(24, 1536)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:27:28.373 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for frontend` in organization `dabholkar.asmita`\n", + "2023-08-30 15:27:29.396 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", + "2023-08-30 15:27:29.405 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", + "1it [00:00, 1.11it/s]\n", + "2023-08-30 15:27:30.325 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", + "2023-08-30 15:27:30.325 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", + "2023-08-30 15:27:31.539 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for frontend` in project `Query-Response Map for frontend`: https://atlas.nomic.ai/map/3a8ff45a-ca2d-4750-ab1b-ea033d35580d/e427ecf0-339a-41bf-8f9a-dc7a2930ac70\n", + "2023-08-30 15:27:31.542 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for frontend: https://atlas.nomic.ai/map/3a8ff45a-ca2d-4750-ab1b-ea033d35580d/e427ecf0-339a-41bf-8f9a-dc7a2930ac70\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Query-Response Map for frontend: https://atlas.nomic.ai/map/3a8ff45a-ca2d-4750-ab1b-ea033d35580d/e427ecf0-339a-41bf-8f9a-dc7a2930ac70]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:27:33.028 | INFO | nomic.project:create_index:1111 - Created map `frontend_qr_index` in project `Query-Response Map for frontend`: https://atlas.nomic.ai/map/3a8ff45a-ca2d-4750-ab1b-ea033d35580d/006903b0-bb82-4432-9975-ff7c9ca80af9\n", + "2023-08-30 15:27:33,166:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:27:33,166:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", + "2023-08-30 15:27:37,279:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:27:37,281:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", + "2023-08-30 15:27:41,477:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:27:41,481:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "course name: ECE220FA23\n", + "(193, 1536)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:27:50.988 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for ECE220FA23` in organization `dabholkar.asmita`\n", + "2023-08-30 15:27:51.867 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", + "2023-08-30 15:27:51.878 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", + "1it [00:01, 1.01s/it]\n", + "2023-08-30 15:27:52.904 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", + "2023-08-30 15:27:52.908 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", + "2023-08-30 15:27:53.929 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for ECE220FA23` in project `Query-Response Map for ECE220FA23`: https://atlas.nomic.ai/map/29b6f987-ee46-40be-808e-3d79a56d1b04/6b9b60f6-e2b7-4863-911c-fe84a63b35a1\n", + "2023-08-30 15:27:53.929 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for ECE220FA23: https://atlas.nomic.ai/map/29b6f987-ee46-40be-808e-3d79a56d1b04/6b9b60f6-e2b7-4863-911c-fe84a63b35a1\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Query-Response Map for ECE220FA23: https://atlas.nomic.ai/map/29b6f987-ee46-40be-808e-3d79a56d1b04/6b9b60f6-e2b7-4863-911c-fe84a63b35a1]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:27:55.645 | INFO | nomic.project:create_index:1111 - Created map `ECE220FA23_qr_index` in project `Query-Response Map for ECE220FA23`: https://atlas.nomic.ai/map/29b6f987-ee46-40be-808e-3d79a56d1b04/d8163c57-a2e8-41ca-90fc-043c8a9469b3\n", + "2023-08-30 15:27:55,758:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:27:55,759:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", + "2023-08-30 15:27:59,841:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:27:59,841:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "course name: Snowmass\n", + "(23, 1536)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:28:08.067 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for Snowmass` in organization `dabholkar.asmita`\n", + "2023-08-30 15:28:09.006 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", + "2023-08-30 15:28:09.014 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", + "1it [00:00, 1.20it/s]\n", + "2023-08-30 15:28:09.854 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", + "2023-08-30 15:28:09.858 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", + "2023-08-30 15:28:10.994 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for Snowmass` in project `Query-Response Map for Snowmass`: https://atlas.nomic.ai/map/aa415361-5d23-44ba-ada2-252ed1dfe9a0/51de6990-9d60-4cc4-9a8e-46bc7de97a0e\n", + "2023-08-30 15:28:10.994 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for Snowmass: https://atlas.nomic.ai/map/aa415361-5d23-44ba-ada2-252ed1dfe9a0/51de6990-9d60-4cc4-9a8e-46bc7de97a0e\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Query-Response Map for Snowmass: https://atlas.nomic.ai/map/aa415361-5d23-44ba-ada2-252ed1dfe9a0/51de6990-9d60-4cc4-9a8e-46bc7de97a0e]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:28:12.968 | INFO | nomic.project:create_index:1111 - Created map `Snowmass_qr_index` in project `Query-Response Map for Snowmass`: https://atlas.nomic.ai/map/aa415361-5d23-44ba-ada2-252ed1dfe9a0/4fdea35b-cda2-434e-afd1-e46e01430a97\n", + "2023-08-30 15:28:13,066:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:28:13,068:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", + "2023-08-30 15:28:17,200:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:28:17,200:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", + "2023-08-30 15:28:21,297:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:28:21,297:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "course name: NPRE247\n", + "(54, 1536)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:28:29.951 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for NPRE247` in organization `dabholkar.asmita`\n", + "2023-08-30 15:28:31.043 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", + "2023-08-30 15:28:31.051 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", + "1it [00:01, 1.64s/it]\n", + "2023-08-30 15:28:32.709 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", + "2023-08-30 15:28:32.714 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", + "2023-08-30 15:28:33.787 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for NPRE247` in project `Query-Response Map for NPRE247`: https://atlas.nomic.ai/map/3ae13050-5e00-4f2b-9dd4-f05e98945405/d06b7c47-850c-48f4-a5f9-984fbb0ca424\n", + "2023-08-30 15:28:33.790 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for NPRE247: https://atlas.nomic.ai/map/3ae13050-5e00-4f2b-9dd4-f05e98945405/d06b7c47-850c-48f4-a5f9-984fbb0ca424\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Query-Response Map for NPRE247: https://atlas.nomic.ai/map/3ae13050-5e00-4f2b-9dd4-f05e98945405/d06b7c47-850c-48f4-a5f9-984fbb0ca424]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:28:35.347 | INFO | nomic.project:create_index:1111 - Created map `NPRE247_qr_index` in project `Query-Response Map for NPRE247`: https://atlas.nomic.ai/map/3ae13050-5e00-4f2b-9dd4-f05e98945405/03471048-46aa-473c-b599-0bc812c679c0\n", + "2023-08-30 15:28:35,479:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:28:35,484:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", + "2023-08-30 15:28:39,590:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:28:39,594:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "course name: your-awesome-course\n", + "(30, 1536)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:28:50.102 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for your-awesome-course` in organization `dabholkar.asmita`\n", + "2023-08-30 15:28:51.013 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", + "2023-08-30 15:28:51.022 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", + "1it [00:00, 1.10it/s]\n", + "2023-08-30 15:28:51.943 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", + "2023-08-30 15:28:51.945 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", + "2023-08-30 15:28:52.904 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for your-awesome-course` in project `Query-Response Map for your-awesome-course`: https://atlas.nomic.ai/map/dd5dfe40-c810-4402-9684-a43783ea6e52/468942cc-d46a-45a2-82a6-11fac2f2be78\n", + "2023-08-30 15:28:52.907 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for your-awesome-course: https://atlas.nomic.ai/map/dd5dfe40-c810-4402-9684-a43783ea6e52/468942cc-d46a-45a2-82a6-11fac2f2be78\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Query-Response Map for your-awesome-course: https://atlas.nomic.ai/map/dd5dfe40-c810-4402-9684-a43783ea6e52/468942cc-d46a-45a2-82a6-11fac2f2be78]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:28:54.405 | INFO | nomic.project:create_index:1111 - Created map `your-awesome-course_qr_index` in project `Query-Response Map for your-awesome-course`: https://atlas.nomic.ai/map/dd5dfe40-c810-4402-9684-a43783ea6e52/e58f20e3-fa19-4c4c-8764-a185e0691c85\n", + "2023-08-30 15:28:54,549:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:28:54,549:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", + "2023-08-30 15:28:58,646:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:28:58,653:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "course name: pract\n", + "(44, 1536)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:29:07.007 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for pract` in organization `dabholkar.asmita`\n", + "2023-08-30 15:29:08.243 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", + "2023-08-30 15:29:08.251 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", + "1it [00:01, 1.11s/it]\n", + "2023-08-30 15:29:09.368 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", + "2023-08-30 15:29:09.368 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", + "2023-08-30 15:29:10.392 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for pract` in project `Query-Response Map for pract`: https://atlas.nomic.ai/map/15b51f99-fe4b-41ec-97fd-5579884e39fb/669c8862-db60-41ad-94a9-fd815e1fdcd8\n", + "2023-08-30 15:29:10.392 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for pract: https://atlas.nomic.ai/map/15b51f99-fe4b-41ec-97fd-5579884e39fb/669c8862-db60-41ad-94a9-fd815e1fdcd8\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Query-Response Map for pract: https://atlas.nomic.ai/map/15b51f99-fe4b-41ec-97fd-5579884e39fb/669c8862-db60-41ad-94a9-fd815e1fdcd8]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:29:12.031 | INFO | nomic.project:create_index:1111 - Created map `pract_qr_index` in project `Query-Response Map for pract`: https://atlas.nomic.ai/map/15b51f99-fe4b-41ec-97fd-5579884e39fb/b15570eb-4db4-4b6f-9b4d-e80309d2dcb3\n", + "2023-08-30 15:29:12,113:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:29:12,115:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", + "2023-08-30 15:29:16,201:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:29:16,209:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", + "2023-08-30 15:29:20,282:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:29:20,285:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "course name: ece120FL22\n", + "(53, 1536)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:29:28.994 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for ece120FL22` in organization `dabholkar.asmita`\n", + "2023-08-30 15:29:29.838 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", + "2023-08-30 15:29:29.846 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", + "1it [00:00, 1.18it/s]\n", + "2023-08-30 15:29:30.708 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", + "2023-08-30 15:29:30.710 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", + "2023-08-30 15:29:31.828 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for ece120FL22` in project `Query-Response Map for ece120FL22`: https://atlas.nomic.ai/map/8e74c555-3192-451b-b4a1-2c86587df88b/888e5d82-2e4d-46ba-b920-915dc31b835b\n", + "2023-08-30 15:29:31.828 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for ece120FL22: https://atlas.nomic.ai/map/8e74c555-3192-451b-b4a1-2c86587df88b/888e5d82-2e4d-46ba-b920-915dc31b835b\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Query-Response Map for ece120FL22: https://atlas.nomic.ai/map/8e74c555-3192-451b-b4a1-2c86587df88b/888e5d82-2e4d-46ba-b920-915dc31b835b]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:29:33.354 | INFO | nomic.project:create_index:1111 - Created map `ece120FL22_qr_index` in project `Query-Response Map for ece120FL22`: https://atlas.nomic.ai/map/8e74c555-3192-451b-b4a1-2c86587df88b/eb4e553d-ecd3-4b11-9d75-468108ab08e2\n", + "2023-08-30 15:29:33,458:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:29:33,458:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", + "2023-08-30 15:29:37,544:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:29:37,545:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", + "2023-08-30 15:29:41,634:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:29:41,642:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "course name: Law794-TransactionalDraftingAlam\n", + "(21, 1536)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:29:49.618 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for Law794-TransactionalDraftingAlam` in organization `dabholkar.asmita`\n", + "2023-08-30 15:29:50.718 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", + "2023-08-30 15:29:50.731 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", + "1it [00:01, 1.11s/it]\n", + "2023-08-30 15:29:51.849 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", + "2023-08-30 15:29:51.851 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", + "2023-08-30 15:29:53.034 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for Law794-TransactionalDraftingAlam` in project `Query-Response Map for Law794-TransactionalDraftingAlam`: https://atlas.nomic.ai/map/c4328a33-8958-462e-ba30-50f342736dae/0da00b97-bf94-4805-935e-458fb5c3d76f\n", + "2023-08-30 15:29:53.034 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for Law794-TransactionalDraftingAlam: https://atlas.nomic.ai/map/c4328a33-8958-462e-ba30-50f342736dae/0da00b97-bf94-4805-935e-458fb5c3d76f\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Query-Response Map for Law794-TransactionalDraftingAlam: https://atlas.nomic.ai/map/c4328a33-8958-462e-ba30-50f342736dae/0da00b97-bf94-4805-935e-458fb5c3d76f]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:29:54.605 | INFO | nomic.project:create_index:1111 - Created map `Law794-TransactionalDraftingAlam_qr_index` in project `Query-Response Map for Law794-TransactionalDraftingAlam`: https://atlas.nomic.ai/map/c4328a33-8958-462e-ba30-50f342736dae/9792fd4e-2196-4e39-bded-cc2bfd42abbf\n", + "2023-08-30 15:29:54,728:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:29:54,731:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", + "2023-08-30 15:29:58,804:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:29:58,804:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "course name: NCSA\n", + "(84, 1536)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:30:07.528 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for NCSA` in organization `dabholkar.asmita`\n", + "2023-08-30 15:30:08.422 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", + "2023-08-30 15:30:08.431 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", + "1it [00:00, 1.09it/s]\n", + "2023-08-30 15:30:09.361 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", + "2023-08-30 15:30:09.361 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", + "2023-08-30 15:30:10.325 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for NCSA` in project `Query-Response Map for NCSA`: https://atlas.nomic.ai/map/237ff7ed-c987-4259-96e0-3c5ed6e21207/8f176512-6a1e-4c8f-9d94-5794b1d7e619\n", + "2023-08-30 15:30:10.325 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for NCSA: https://atlas.nomic.ai/map/237ff7ed-c987-4259-96e0-3c5ed6e21207/8f176512-6a1e-4c8f-9d94-5794b1d7e619\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Query-Response Map for NCSA: https://atlas.nomic.ai/map/237ff7ed-c987-4259-96e0-3c5ed6e21207/8f176512-6a1e-4c8f-9d94-5794b1d7e619]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:30:11.917 | INFO | nomic.project:create_index:1111 - Created map `NCSA_qr_index` in project `Query-Response Map for NCSA`: https://atlas.nomic.ai/map/237ff7ed-c987-4259-96e0-3c5ed6e21207/2ce836ec-557b-4037-9ebd-d3e8982c0926\n", + "2023-08-30 15:30:12,004:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:30:12,004:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", + "2023-08-30 15:30:16,092:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:30:16,092:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", + "2023-08-30 15:30:20,157:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:30:20,164:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "course name: NCSADelta\n", + "(22, 1536)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:30:28.362 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for NCSADelta` in organization `dabholkar.asmita`\n", + "2023-08-30 15:30:29.318 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", + "2023-08-30 15:30:29.326 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", + "1it [00:00, 1.10it/s]\n", + "2023-08-30 15:30:30.246 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", + "2023-08-30 15:30:30.251 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", + "2023-08-30 15:30:31.253 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for NCSADelta` in project `Query-Response Map for NCSADelta`: https://atlas.nomic.ai/map/f0c381d5-e2e9-41fb-807a-ae61bc818122/78d29eb7-be96-47c1-a856-b684c6419e34\n", + "2023-08-30 15:30:31.254 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for NCSADelta: https://atlas.nomic.ai/map/f0c381d5-e2e9-41fb-807a-ae61bc818122/78d29eb7-be96-47c1-a856-b684c6419e34\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Query-Response Map for NCSADelta: https://atlas.nomic.ai/map/f0c381d5-e2e9-41fb-807a-ae61bc818122/78d29eb7-be96-47c1-a856-b684c6419e34]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:30:33.104 | INFO | nomic.project:create_index:1111 - Created map `NCSADelta_qr_index` in project `Query-Response Map for NCSADelta`: https://atlas.nomic.ai/map/f0c381d5-e2e9-41fb-807a-ae61bc818122/b6e64fef-a829-435f-89b5-ed1a44c05202\n", + "2023-08-30 15:30:33,214:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:30:33,214:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", + "2023-08-30 15:30:37,289:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:30:37,290:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", + "2023-08-30 15:30:41,376:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:30:41,382:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "course name: NuclGPT-v1\n", + "(25, 1536)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:30:49.297 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for NuclGPT-v1` in organization `dabholkar.asmita`\n", + "2023-08-30 15:30:50.216 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", + "2023-08-30 15:30:50.222 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", + "1it [00:00, 1.23it/s]\n", + "2023-08-30 15:30:51.043 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", + "2023-08-30 15:30:51.043 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", + "2023-08-30 15:30:52.360 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for NuclGPT-v1` in project `Query-Response Map for NuclGPT-v1`: https://atlas.nomic.ai/map/76043322-073c-4d8c-8c90-8e563da8564f/335097b0-a80d-4f38-8559-c6075a46a8b2\n", + "2023-08-30 15:30:52.360 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for NuclGPT-v1: https://atlas.nomic.ai/map/76043322-073c-4d8c-8c90-8e563da8564f/335097b0-a80d-4f38-8559-c6075a46a8b2\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Query-Response Map for NuclGPT-v1: https://atlas.nomic.ai/map/76043322-073c-4d8c-8c90-8e563da8564f/335097b0-a80d-4f38-8559-c6075a46a8b2]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:30:53.900 | INFO | nomic.project:create_index:1111 - Created map `NuclGPT-v1_qr_index` in project `Query-Response Map for NuclGPT-v1`: https://atlas.nomic.ai/map/76043322-073c-4d8c-8c90-8e563da8564f/441b7ff6-00ef-47f3-98a9-e45d327a1414\n" + ] + } + ], + "source": [ + "low_volume_courses = []\n", + "high_volume_courses = []\n", + "for course in course_names:\n", + " if course is None or course == 'ece408':\n", + " continue\n", + " \n", + " user_queries = []\n", + " metadata = []\n", + " i = 1\n", + " course_df = df[df['course_name'] == course]['convo']\n", + " for convo in course_df: # iterate through all conversations in a course\n", + " messages = convo['messages']\n", + "\n", + " # form query-response pairs out of the messages\n", + " pair_check = 0\n", + " for message in messages:\n", + " if message['role'] == 'user' and pair_check == 0:\n", + " query = message['content']\n", + " metadata_row = {'course': course, 'query': message['content']}\n", + " #print(\"metadata row: \", metadata_row)\n", + " pair_check += 1\n", + " \n", + " if message['role'] == 'assistant' and pair_check == 1:\n", + " metadata_row['response'] = message['content']\n", + " metadata_row['id'] = i \n", + " #print(\"response metadata row: \", metadata_row)\n", + " i += 1\n", + " pair_check += 1\n", + "\n", + " if pair_check == 2:\n", + " # one conversation pair is complete\n", + " user_queries.append(query)\n", + " metadata.append(metadata_row)\n", + " pair_check = 0\n", + " # after iterating every conversation in a course, create the map\n", + " if len(user_queries) < 20:\n", + " low_volume_courses.append(course)\n", + " continue\n", + "\n", + " if len(user_queries) > 500:\n", + " high_volume_courses.append(course)\n", + " continue\n", + " \n", + " metadata = pd.DataFrame(metadata)\n", + " embeddings = embeddings_model.embed_documents(user_queries)\n", + " embeddings = np.array(embeddings)\n", + " print(\"course name: \", course)\n", + " print(embeddings.shape)\n", + "\n", + " # create an Atlas project\n", + " project_name = \"Query-Response Map for \" + course\n", + " index_name = course + \"_qr_index\"\n", + " project = atlas.map_embeddings(embeddings=np.array(embeddings),\n", + " data=metadata,\n", + " id_field='id',\n", + " build_topic_model=True,\n", + " topic_label_field='query',\n", + " name=project_name,\n", + " colorable_fields=['query'])\n", + " print(project.maps)\n", + "\n", + " project.create_index(index_name, build_topic_model=True)\n", + "\n", + " \n" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "low volume courses: ['test-video-ingest', 'badm-567', 'test-video-ingest-28', 'pdeploy999', 'badm-350-summer', 'previewtesting1', 'localtest2', 'your-favorite-url', 'mantine', 'test-video-ingest-2', 'badm_567_v2', 'erpnext', 'mip', 'farmdoc_test_kastan-v1', 'personalstatement', 'hrc', 'csv', 'star_nox', 'badm_567', 'SPaRCEd', 'localdemo8', 'badm_567_thumbnails', 'chatbot', 'erp', 'extreme', 'rohan_atree', 'zotero-extreme', 'test-video-ingest-20', 'gies-online-mba2', 'gies-online-mba', 'careerassistant', 'weather', 'lillian-wang-blog', 'local-test5', 'demo-for-vyriad', 'ag-gpt-beta', 'rtest', 'previewdeploy', 'r2test', 'personal-statement', 'rohan_excel', 'langchain-python', 'langchain', 'ncsa-live-demo', 'rohan_atree_individual', 'meta11-test', 'HealthyLivingGuide', 'rohan', 'babreu', 'test-video-ingest-31', 'p', 'test-video-ingest-17', 'summary', 'test-video-ingest-3', 'test-video-ingest-27', 'lillian-wang-blog-2', 'python-magic', 'ansible2', 'ece408fa23', 'farmdoc_test_josh_v2', 'local-test3', 'automata', 'SpaceFlorida-GT', 'GBSI-GT', 'newnew_ncsa', 'canvas', 'gbsi-gt', 'meditation-tutorial', 'profit', 'ansible', 'langchain-docs', 'testing_url_metadata_josh', 'test-india-biodiversity', 'vyriad', 'irc-workplans', 'kastanasdfasdfasdf', 'BADM-567-GT', 'mdt', 'vercel', 'gies-graduate-hub', 'test-video-ingest-12', 'test-video-ingest-13', 'Gies-graduate-hub', 'test_new_supa_scrape', 'doe-ascr-2023', 'arize', 'final-meta-test', 'preview-meta-test', 'gies-online-mba-V3', 'FoF-Drawdown-from-INVPEIV-5-24-23', 'FIN574-GT', 'test-video-ingest-30', 'test', 'NCSA-v2', 'conversational', 'clowder-docs', 'DA', 'test-video-ingest-21', 'test-video-ingest-25', 'test-ingest-10', 'eric-test-course', 'farmdoc-test', 'test-video-ingest-22', 'Academic-paper', 'starwars', 'AdamDemo']\n", + "high volume courses: ['gpt4', 'ECE408FA23']\n" + ] + } + ], + "source": [ + "print(\"low volume courses: \", low_volume_courses)\n", + "print(\"high volume courses: \", high_volume_courses)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 9d03c0b01096e6eb384f9090c12fc4ab6d85faba Mon Sep 17 00:00:00 2001 From: star-nox Date: Mon, 11 Sep 2023 12:43:58 -0500 Subject: [PATCH 32/61] completed workflow for multi-turn and map creation --- ai_ta_backend/main.py | 23 +- ai_ta_backend/nomic_logging.py | 102 +- ai_ta_backend/nomic_map_creation.ipynb | 1556 ------------------------ 3 files changed, 78 insertions(+), 1603 deletions(-) delete mode 100644 ai_ta_backend/nomic_map_creation.ipynb diff --git a/ai_ta_backend/main.py b/ai_ta_backend/main.py index dae2442f..a456828a 100644 --- a/ai_ta_backend/main.py +++ b/ai_ta_backend/main.py @@ -11,10 +11,9 @@ from flask_executor import Executor from sqlalchemy import JSON -from ai_ta_backend.nomic_logging import get_nomic_map, log_query_to_nomic from ai_ta_backend.vector_database import Ingest from ai_ta_backend.web_scrape import main_crawler, mit_course_download -from ai_ta_backend.nomic_logging import log_query_to_nomic, get_nomic_map, log_convo_to_nomic +from ai_ta_backend.nomic_logging import get_nomic_map, log_convo_to_nomic from flask_executor import Executor app = Flask(__name__) @@ -149,7 +148,7 @@ def getTopContexts() -> Response: del ingester # background execution of tasks!! - executor.submit(log_query_to_nomic, course_name, search_query) + #executor.submit(log_query_to_nomic, course_name, search_query) response = jsonify(found_documents) response.headers.add('Access-Control-Allow-Origin', '*') @@ -400,17 +399,23 @@ def nomic_map(): def logToNomic(): course_name: str = request.args.get('course_name', default='', type=str) conversation: str = request.args.get('conversation', default='', type=str) - print("In /onResponseCompletion") - + + if course_name == '' or conversation == '': + # proper web error "400 Bad request" + abort( + 400, + description= + f"Missing one or more required parameters: 'course_name' and 'conversation' must be provided. Course name: `{course_name}`, Conversation: `{conversation}`" + ) + conversation_json = json.loads(conversation) # background execution of tasks!! response = executor.submit(log_convo_to_nomic, course_name, conversation_json) - - #response = jsonify(response) - #response.headers.add('Access-Control-Allow-Origin', '*') - return "response" + response = jsonify(response) + response.headers.add('Access-Control-Allow-Origin', '*') + return response if __name__ == '__main__': diff --git a/ai_ta_backend/nomic_logging.py b/ai_ta_backend/nomic_logging.py index 04845579..99893b3d 100644 --- a/ai_ta_backend/nomic_logging.py +++ b/ai_ta_backend/nomic_logging.py @@ -9,7 +9,7 @@ import supabase nomic.login(os.getenv('NOMIC_API_KEY')) # login during start of flask app -NOMIC_MAP_NAME_PREFIX = 'Queries for ' +NOMIC_MAP_NAME_PREFIX = 'Conversation Map for ' def log_convo_to_nomic(course_name: str, conversation) -> str: """ @@ -25,6 +25,8 @@ def log_convo_to_nomic(course_name: str, conversation) -> str: messages = conversation['conversation']['messages'] user_email = conversation['conversation']['user_email'] conversation_id = conversation['conversation']['id'] + + print("conversation: ", conversation) # we have to upload whole conversations # check what the fetched data looks like - pandas df or pyarrow table @@ -33,37 +35,40 @@ def log_convo_to_nomic(course_name: str, conversation) -> str: project_name = NOMIC_MAP_NAME_PREFIX + course_name start_time = time.monotonic() - project_name = "Conversation Map for NCSA" + try: # fetch project metadata and embbeddings project = AtlasProject(name=project_name, add_datums_if_exists=True) map_metadata_df = project.maps[1].data.df map_embeddings_df = project.maps[1].embeddings.latent + last_id = int(map_metadata_df['id'].values[-1]) + print("last_id: ", last_id) if conversation_id in map_metadata_df.values: print("conversation_id exists") - + # store that convo metadata locally prev_data = map_metadata_df[map_metadata_df['conversation_id'] == conversation_id] prev_index = prev_data.index.values[0] + embeddings = map_embeddings_df[prev_index-1].reshape(1, 1536) prev_convo = prev_data['conversation'].values[0] prev_id = prev_data['id'].values[0] - embeddings = map_embeddings_df[prev_index-1].reshape(1, 1536) # delete that convo data point from Nomic - print("Prev point deleted: ", project.delete_data([prev_id])) + project.delete_data([prev_id]) # prep for new point first_message = prev_convo.split("\n")[1].split(": ")[1] + print("first_message: ", first_message) - # append new convo to prev convo - for message in messages: + # select the last 2 messages and append new convo to prev convo + messages_to_be_logged = messages[-2:] + for message in messages_to_be_logged: prev_convo += "\n>>> " + message['role'] + ": " + message['content'] + "\n" # update metadata metadata = [{"course": course_name, "conversation": prev_convo, "conversation_id": conversation_id, - "id": len(map_metadata_df)+1, "user_email": user_email, "first_query": first_message}] - + "id": last_id+1, "user_email": user_email, "first_query": first_message}] else: print("conversation_id does not exist") @@ -77,25 +82,20 @@ def log_convo_to_nomic(course_name: str, conversation) -> str: conversation_string += "\n>>> " + message['role'] + ": " + message['content'] + "\n" metadata = [{"course": course_name, "conversation": conversation_string, "conversation_id": conversation_id, - "id": len(map_metadata_df)+1, "user_email": user_email, "first_query": first_message}] - - print("metadata: ", metadata) - print("user_queries: ", user_queries) - print(len(metadata)) - print(len(user_queries)) - + "id": last_id+1, "user_email": user_email, "first_query": first_message}] + # create embeddings embeddings_model = OpenAIEmbeddings() embeddings = embeddings_model.embed_documents(user_queries) - # add embeddings to project + # add embeddings to the project project = atlas.AtlasProject(name=project_name, add_datums_if_exists=True) project.add_embeddings(embeddings=np.array(embeddings), data=pd.DataFrame(metadata)) project.rebuild_maps() except Exception as e: # if project doesn't exist, create it - result = create_nomic_map(course_name, embeddings, pd.DataFrame(metadata)) + result = create_nomic_map(course_name, conversation) if result is None: print("Nomic map does not exist yet, probably because you have less than 20 queries on your project: ", e) else: @@ -135,7 +135,7 @@ def get_nomic_map(course_name: str): return {"map_id": f"iframe{map.id}", "map_link": map.map_link} -def create_nomic_map(course_name: str, log_embeddings: np.ndarray, log_data: list): +def create_nomic_map(course_name: str, log_data: list): """ Creates a Nomic map for new courses and those which previously had < 20 queries. 1. fetches supabase conversations for course @@ -150,6 +150,7 @@ def create_nomic_map(course_name: str, log_embeddings: np.ndarray, log_data: lis # fetch all conversations with this new course (we expect <=20 conversations, because otherwise the map should be made already) response = supabase_client.table("llm-convo-monitor").select("*").eq("course_name", course_name).execute() data = response.data + df = pd.DataFrame(data) if len(data) < 19: return None @@ -157,36 +158,61 @@ def create_nomic_map(course_name: str, log_embeddings: np.ndarray, log_data: lis # get all queries for course and create metadata user_queries = [] metadata = [] - course_df = pd.DataFrame(data) - course_df = course_df['convo'] - i = 1 - for convo in course_df: - # extract all messages from convo + conversation_exists = False + + # current log details + log_messages = log_data['conversation']['messages'] + log_user_email = log_data['conversation']['user_email'] + log_conversation_id = log_data['conversation']['id'] + + for index, row in df.iterrows(): + user_email = row['user_email'] + convo = row['convo'] messages = convo['messages'] + first_message = messages[0]['content'] + user_queries.append(first_message) - # extract queries for user role from messages + # create metadata for multi-turn conversation + conversation = "" for message in messages: - if message['role'] == 'user' and message['content'] != '': - user_queries.append(message['content']) - metadata.append({'course_name': course_name, 'query': message['content'], 'id': i}) - i += 1 - - # convert query and context to embeddings - metadata.append(log_data[0]) + # string of role: content, role: content, ... + conversation += "\n>>> " + message['role'] + ": " + message['content'] + "\n" + + # append current chat to previous chat if convo already exists + if convo['id'] == log_conversation_id: + conversation_exists = True + for m in log_messages: + conversation += "\n>>> " + m['role'] + ": " + m['content'] + "\n" + + # add to metadata + metadata_row = {"course": row['course_name'], "conversation": conversation, "conversation_id": convo['id'], + "id": i, "user_email": user_email, "first_query": first_message} + metadata.append(metadata_row) + i += 1 + + # add current log as a new data point if convo doesn't exist + if not conversation_exists: + user_queries.append(log_messages[0]['content']) + conversation = "" + for message in log_messages: + conversation += "\n>>> " + message['role'] + ": " + message['content'] + "\n" + metadata_row = {"course": course_name, "conversation": conversation, "conversation_id": log_conversation_id, + "id": i, "user_email": log_user_email, "first_query": log_messages[0]['content']} + metadata.append(metadata_row) + + print(len(metadata)) metadata = pd.DataFrame(metadata) embeddings_model = OpenAIEmbeddings() # type: ignore embeddings = embeddings_model.embed_documents(user_queries) - embeddings = np.array(embeddings) - final_embeddings = np.concatenate((embeddings, log_embeddings), axis=0) # create Atlas project project_name = NOMIC_MAP_NAME_PREFIX + course_name - index_name = course_name + "_index" - project = atlas.map_embeddings(embeddings=final_embeddings, data=metadata, # type: ignore -- this is actually the correc type, the function signature from Nomic is incomplete - id_field='id', build_topic_model=True, topic_label_field='query', - name=project_name, colorable_fields=['query']) + index_name = course_name + "_convo_index" + project = atlas.map_embeddings(embeddings=np.array(embeddings), data=metadata, # type: ignore -- this is actually the correc type, the function signature from Nomic is incomplete + id_field='id', build_topic_model=True, topic_label_field='first_query', + name=project_name, colorable_fields=['conversation_id', 'first_query']) project.create_index(index_name, build_topic_model=True) return f"Successfully created Nomic map for {course_name}" diff --git a/ai_ta_backend/nomic_map_creation.ipynb b/ai_ta_backend/nomic_map_creation.ipynb deleted file mode 100644 index 23924157..00000000 --- a/ai_ta_backend/nomic_map_creation.ipynb +++ /dev/null @@ -1,1556 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-09-07 11:57:29,274:INFO - Note: NumExpr detected 16 cores but \"NUMEXPR_MAX_THREADS\" not set, so enforcing safe limit of 8.\n", - "2023-09-07 11:57:29,274:INFO - NumExpr defaulting to 8 threads.\n" - ] - } - ], - "source": [ - "# import required libraries\n", - "\n", - "import os\n", - "import supabase\n", - "from nomic import atlas\n", - "from dotenv import load_dotenv\n", - "from langchain.embeddings import OpenAIEmbeddings\n", - "import numpy as np\n", - "import time\n", - "import pandas as pd" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# loading environment variables\n", - "\n", - "env_path = \"../.env\"\n", - "load_dotenv(dotenv_path=env_path)" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "# initialize supabase client\n", - "\n", - "url = os.environ.get(\"SUPABASE_URL\")\n", - "key = os.environ.get(\"SUPABASE_API_KEY\")\n", - "\n", - "supabase_client = supabase.create_client(url, key)" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idcreated_atconvoconvo_idcourse_nameuser_email
052002023-09-07T17:03:47.705812+00:00{'id': 'abd2e044-fbff-455e-8c60-755cc7635182',...abd2e044-fbff-455e-8c60-755cc7635182cropwizardavd6@illinois.edu
152012023-09-07T17:05:25.863567+00:00{'id': '3e5d4861-b128-4c64-96ac-87c74f3217e5',...3e5d4861-b128-4c64-96ac-87c74f3217e5cropwizardavd6@illinois.edu
252162023-09-07T17:18:32.197379+00:00{'id': '43ee631a-cb58-43f5-b2af-a5b91b7585cd',...43ee631a-cb58-43f5-b2af-a5b91b7585cdcropwizardavd6@illinois.edu
352122023-09-07T17:16:34.267931+00:00{'id': '0129ea46-207f-47e3-be90-da143857000f',...0129ea46-207f-47e3-be90-da143857000fcropwizardavd6@illinois.edu
452172023-09-07T17:19:00.681823+00:00{'id': 'c6b4e4d8-4de7-4387-b4e9-411084dffea6',...c6b4e4d8-4de7-4387-b4e9-411084dffea6cropwizardavd6@illinois.edu
552232023-09-07T17:22:38.970643+00:00{'id': 'b5500763-7e7b-4b23-9031-cc320a51ccbf',...b5500763-7e7b-4b23-9031-cc320a51ccbfcropwizardavd6@illinois.edu
652272023-09-07T17:24:10.362647+00:00{'id': 'd410955f-4398-4869-b395-e6b659cc2d06',...d410955f-4398-4869-b395-e6b659cc2d06cropwizardavd6@illinois.edu
752092023-09-07T17:14:43.518617+00:00{'id': '0ecd2c05-772a-42aa-b29a-0a892bd0e9ab',...0ecd2c05-772a-42aa-b29a-0a892bd0e9abcropwizardavd6@illinois.edu
852222023-09-07T17:21:29.223343+00:00{'id': 'c82056a0-2d67-4ce8-82e3-86a30f1f6dc0',...c82056a0-2d67-4ce8-82e3-86a30f1f6dc0cropwizardavd6@illinois.edu
952242023-09-07T17:22:54.856839+00:00{'id': '2316bbd7-61f3-44aa-a79e-bb42bd688c47',...2316bbd7-61f3-44aa-a79e-bb42bd688c47cropwizardavd6@illinois.edu
1052262023-09-07T17:23:27.644745+00:00{'id': '66abfe85-bb04-456e-8709-89f9aafe5508',...66abfe85-bb04-456e-8709-89f9aafe5508cropwizardavd6@illinois.edu
1152282023-09-07T17:24:41.32465+00:00{'id': '175ad6b2-3bf2-4889-b2de-a18961ee8ecb',...175ad6b2-3bf2-4889-b2de-a18961ee8ecbcropwizardavd6@illinois.edu
1252322023-09-07T17:30:05.770146+00:00{'id': 'f9859e36-bf76-40ab-9413-91ef6663dbd6',...f9859e36-bf76-40ab-9413-91ef6663dbd6cropwizardavd6@illinois.edu
1352332023-09-07T17:30:52.749867+00:00{'id': 'bab32d0b-8e2b-4eaa-a46e-069be0d0c3a2',...bab32d0b-8e2b-4eaa-a46e-069be0d0c3a2cropwizardavd6@illinois.edu
1452342023-09-07T17:31:19.801611+00:00{'id': 'ecaf3228-78f3-49f7-b46d-3a5c3d5b62fd',...ecaf3228-78f3-49f7-b46d-3a5c3d5b62fdcropwizardavd6@illinois.edu
1552372023-09-07T17:36:14.68431+00:00{'id': 'edead825-12df-417c-af40-059e83067c69',...edead825-12df-417c-af40-059e83067c69cropwizardavd6@illinois.edu
1652382023-09-07T17:36:42.984907+00:00{'id': 'bc44d229-327a-452d-a386-8868216a1bd2',...bc44d229-327a-452d-a386-8868216a1bd2cropwizardavd6@illinois.edu
1752412023-09-07T17:37:22.134543+00:00{'id': 'ff7a1c27-e126-49db-be79-6deaefcffec3',...ff7a1c27-e126-49db-be79-6deaefcffec3cropwizardavd6@illinois.edu
1853042023-09-07T19:45:21.73541+00:00{'id': '6226b153-356a-408c-9483-49ef5808538c',...6226b153-356a-408c-9483-49ef5808538ccropwizardavd6@illinois.edu
1953052023-09-07T19:46:03.626639+00:00{'id': 'e9edae6b-b7e1-46a8-b5e8-6215890a2a01',...e9edae6b-b7e1-46a8-b5e8-6215890a2a01cropwizardavd6@illinois.edu
2053062023-09-07T19:46:36.076704+00:00{'id': 'b2116035-da7b-4136-878d-66a10098a756',...b2116035-da7b-4136-878d-66a10098a756cropwizardavd6@illinois.edu
2151952023-09-06T23:43:38.201481+00:00{'id': '543ee10e-faf0-47a8-bb1c-c040aec44ed1',...543ee10e-faf0-47a8-bb1c-c040aec44ed1cropwizarddabholkar.asmita@gmail.com
\n", - "
" - ], - "text/plain": [ - " id created_at \\\n", - "0 5200 2023-09-07T17:03:47.705812+00:00 \n", - "1 5201 2023-09-07T17:05:25.863567+00:00 \n", - "2 5216 2023-09-07T17:18:32.197379+00:00 \n", - "3 5212 2023-09-07T17:16:34.267931+00:00 \n", - "4 5217 2023-09-07T17:19:00.681823+00:00 \n", - "5 5223 2023-09-07T17:22:38.970643+00:00 \n", - "6 5227 2023-09-07T17:24:10.362647+00:00 \n", - "7 5209 2023-09-07T17:14:43.518617+00:00 \n", - "8 5222 2023-09-07T17:21:29.223343+00:00 \n", - "9 5224 2023-09-07T17:22:54.856839+00:00 \n", - "10 5226 2023-09-07T17:23:27.644745+00:00 \n", - "11 5228 2023-09-07T17:24:41.32465+00:00 \n", - "12 5232 2023-09-07T17:30:05.770146+00:00 \n", - "13 5233 2023-09-07T17:30:52.749867+00:00 \n", - "14 5234 2023-09-07T17:31:19.801611+00:00 \n", - "15 5237 2023-09-07T17:36:14.68431+00:00 \n", - "16 5238 2023-09-07T17:36:42.984907+00:00 \n", - "17 5241 2023-09-07T17:37:22.134543+00:00 \n", - "18 5304 2023-09-07T19:45:21.73541+00:00 \n", - "19 5305 2023-09-07T19:46:03.626639+00:00 \n", - "20 5306 2023-09-07T19:46:36.076704+00:00 \n", - "21 5195 2023-09-06T23:43:38.201481+00:00 \n", - "\n", - " convo \\\n", - "0 {'id': 'abd2e044-fbff-455e-8c60-755cc7635182',... \n", - "1 {'id': '3e5d4861-b128-4c64-96ac-87c74f3217e5',... \n", - "2 {'id': '43ee631a-cb58-43f5-b2af-a5b91b7585cd',... \n", - "3 {'id': '0129ea46-207f-47e3-be90-da143857000f',... \n", - "4 {'id': 'c6b4e4d8-4de7-4387-b4e9-411084dffea6',... \n", - "5 {'id': 'b5500763-7e7b-4b23-9031-cc320a51ccbf',... \n", - "6 {'id': 'd410955f-4398-4869-b395-e6b659cc2d06',... \n", - "7 {'id': '0ecd2c05-772a-42aa-b29a-0a892bd0e9ab',... \n", - "8 {'id': 'c82056a0-2d67-4ce8-82e3-86a30f1f6dc0',... \n", - "9 {'id': '2316bbd7-61f3-44aa-a79e-bb42bd688c47',... \n", - "10 {'id': '66abfe85-bb04-456e-8709-89f9aafe5508',... \n", - "11 {'id': '175ad6b2-3bf2-4889-b2de-a18961ee8ecb',... \n", - "12 {'id': 'f9859e36-bf76-40ab-9413-91ef6663dbd6',... \n", - "13 {'id': 'bab32d0b-8e2b-4eaa-a46e-069be0d0c3a2',... \n", - "14 {'id': 'ecaf3228-78f3-49f7-b46d-3a5c3d5b62fd',... \n", - "15 {'id': 'edead825-12df-417c-af40-059e83067c69',... \n", - "16 {'id': 'bc44d229-327a-452d-a386-8868216a1bd2',... \n", - "17 {'id': 'ff7a1c27-e126-49db-be79-6deaefcffec3',... \n", - "18 {'id': '6226b153-356a-408c-9483-49ef5808538c',... \n", - "19 {'id': 'e9edae6b-b7e1-46a8-b5e8-6215890a2a01',... \n", - "20 {'id': 'b2116035-da7b-4136-878d-66a10098a756',... \n", - "21 {'id': '543ee10e-faf0-47a8-bb1c-c040aec44ed1',... \n", - "\n", - " convo_id course_name \\\n", - "0 abd2e044-fbff-455e-8c60-755cc7635182 cropwizard \n", - "1 3e5d4861-b128-4c64-96ac-87c74f3217e5 cropwizard \n", - "2 43ee631a-cb58-43f5-b2af-a5b91b7585cd cropwizard \n", - "3 0129ea46-207f-47e3-be90-da143857000f cropwizard \n", - "4 c6b4e4d8-4de7-4387-b4e9-411084dffea6 cropwizard \n", - "5 b5500763-7e7b-4b23-9031-cc320a51ccbf cropwizard \n", - "6 d410955f-4398-4869-b395-e6b659cc2d06 cropwizard \n", - "7 0ecd2c05-772a-42aa-b29a-0a892bd0e9ab cropwizard \n", - "8 c82056a0-2d67-4ce8-82e3-86a30f1f6dc0 cropwizard \n", - "9 2316bbd7-61f3-44aa-a79e-bb42bd688c47 cropwizard \n", - "10 66abfe85-bb04-456e-8709-89f9aafe5508 cropwizard \n", - "11 175ad6b2-3bf2-4889-b2de-a18961ee8ecb cropwizard \n", - "12 f9859e36-bf76-40ab-9413-91ef6663dbd6 cropwizard \n", - "13 bab32d0b-8e2b-4eaa-a46e-069be0d0c3a2 cropwizard \n", - "14 ecaf3228-78f3-49f7-b46d-3a5c3d5b62fd cropwizard \n", - "15 edead825-12df-417c-af40-059e83067c69 cropwizard \n", - "16 bc44d229-327a-452d-a386-8868216a1bd2 cropwizard \n", - "17 ff7a1c27-e126-49db-be79-6deaefcffec3 cropwizard \n", - "18 6226b153-356a-408c-9483-49ef5808538c cropwizard \n", - "19 e9edae6b-b7e1-46a8-b5e8-6215890a2a01 cropwizard \n", - "20 b2116035-da7b-4136-878d-66a10098a756 cropwizard \n", - "21 543ee10e-faf0-47a8-bb1c-c040aec44ed1 cropwizard \n", - "\n", - " user_email \n", - "0 avd6@illinois.edu \n", - "1 avd6@illinois.edu \n", - "2 avd6@illinois.edu \n", - "3 avd6@illinois.edu \n", - "4 avd6@illinois.edu \n", - "5 avd6@illinois.edu \n", - "6 avd6@illinois.edu \n", - "7 avd6@illinois.edu \n", - "8 avd6@illinois.edu \n", - "9 avd6@illinois.edu \n", - "10 avd6@illinois.edu \n", - "11 avd6@illinois.edu \n", - "12 avd6@illinois.edu \n", - "13 avd6@illinois.edu \n", - "14 avd6@illinois.edu \n", - "15 avd6@illinois.edu \n", - "16 avd6@illinois.edu \n", - "17 avd6@illinois.edu \n", - "18 avd6@illinois.edu \n", - "19 avd6@illinois.edu \n", - "20 avd6@illinois.edu \n", - "21 dabholkar.asmita@gmail.com " - ] - }, - "execution_count": 22, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# query data for one course for testing\n", - "course = 'cropwizard'\n", - "response = supabase_client.table(\"llm-convo-monitor\").select(\"*\").eq(\"course_name\", course).execute()\n", - "data = response.data\n", - "df = pd.DataFrame(data)\n", - "df" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 {'id': 'abd2e044-fbff-455e-8c60-755cc7635182',...\n", - "1 {'id': '3e5d4861-b128-4c64-96ac-87c74f3217e5',...\n", - "2 {'id': '43ee631a-cb58-43f5-b2af-a5b91b7585cd',...\n", - "3 {'id': '0129ea46-207f-47e3-be90-da143857000f',...\n", - "4 {'id': 'c6b4e4d8-4de7-4387-b4e9-411084dffea6',...\n", - "Name: convo, dtype: object" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "course_df = df[df['course_name'] == course]['convo']\n", - "course_df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'role': 'assistant', 'content': \"The U.S. Environmental Protection Agency (EPA)'s mission is to protect human health and the environment. As part of this mission, the EPA is involved in efforts such as developing strategies to protect endangered and threatened species from potential impacts of agricultural practices, including the use of herbicides. For instance, the EPA has released a draft Herbicide Strategy for public comment, aimed at proposing early mitigations for more than 900 listed species and designated critical habitats to reduce potential impacts from the agricultural use of herbicides(1^,2^,3^,4^).\\n\\n1. University of Illinois Extension\\n2. EPA releases draft herbicide strategy\\n3. EPA releases draft herbicide strategy\\n4. extension.pdf, page: 3\", 'contexts': [{'base_url': 'https://extension.illinois.edu/newsletters/illinois-pesticide-review-newsletter/julyaugust-2023', 'course_name ': 'cropwizard', 'pagenumber': '', 'readable_filename': 'University of Illinois Extension', 's3_path': 'courses/cropwizard/University_of_Illinois_Extension.html', 'text': \". — The U.S. Environmental Protection Agency, in collaboration with the U.S. Department of Energy, is funding the creation of Thriving Communities Technical Assistance Centers to help rural and underserved communities access state and federal funding for environmental infrastructure…\\n\\n\\n \\nAddress health challenges with holistic solutions\\nSeptember 1, 2023\\n\\nURBANA, Ill. — The University of Illinois, along with the Interdisciplinary Health Sciences Institute, and in collaboration with Illinois Extension, has developed the Autumn Health Picks 2023 webinar series. This series is part of the Community Seminar Series, and it provides an opportunity for…\\n\\n\\n \\nDo artificial roosts help bats? Illinois experts say more research needed\\nSeptember 1, 2023\\n\\nURBANA, Ill.\\xa0— Artificial roosts for bats come in many forms — bat boxes, condos, bark mimics, clay roosts, and cinder block structures, to name a few — but a new conservation practice and policy article from researchers at the\\xa0University of Illinois Urbana-Champaign\\xa0suggests the structures…\\n\\n\\nMore news\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\n\\nUpcoming Events\\n\\n\\n\\n\\n \\n\\nRead Before You Sign: Renting & Leasing \\n\\n\\nSeptember 6, 2023\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\nTechnology & Soil Health Field Day\\n\\n\\nSeptember 6, 2023\\n\\n\\nCounty\\n\\nHenry\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\nPollinator Plants to Span the Season\\n\\n\\nSeptember 6, 2023\\n\\n\\nCounty\\n\\nMacoupin\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\nOrr Beef Research Center Field Day\\n\\n\\nSeptember 6, 2023\\n\\n\\n\\n\\n\\n\\nMore Events\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nSubtitle\\nOpens the Door to New Opportunities\\n\\n\\n\\n\\nTitle\\nLearn @ Illinois Extension\\n\\n\\n\\n\\nBody\\nThere is so much you want to do, but the challenges of time, distance, and cost create barriers to achieving those goals. You need a program that's flexible to your schedule.\\xa0Learn @ Illinois Extension\\xa0helps remove those challenge by offering\\xa0flexible online learning programs that meet your personal interests and continuing education requirements. We provide learning on your terms so you can be who you were meant to be.\\xa0\\n\\n\\n\\nOnline Courses\\n\\n\\n\\n\\n\\n \\n\\n\\nLatest Podcast\\n\\n\\n\\n\\nGood Growing\\n\\n\\nGardenbite: Three tips for a healthier lawn | #GoodGrowingThis week on the Good Growing podcast Chris shares a Gardenbite of when retired horticulture educator Richard Hentschel visited the show in 2021 to talk about fall lawn care. During the show, Richard spoke about three things we could all do to reduce our lawn inputs.\\xa0Want to see or...\\n\\n\\n Your browser does not support iframes, but you can visit \\n\\n\\n\\n\\n\\nMore Podcasts\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\n\\nDig Deeper\\n\\n\\nTopics we address\\n\\n4-H Youth Development\\nBeef Cattle\\nClimate\\nCommunity Gardens\\nCommunity Planning\\nCottage Food\\nCrops\\nDisasters\\nEconomic Development\\nEnergy\\nEnvironmental Quality\\nFamily\\nFinances\\nFirewood\\nFlowers\\nFood\\nForestry\\nHealth\\nHemp\\nHerbs\\nHome Vegetable Gardening\\nIllinois Grasses\\nInsects\\nInvasives\\nLivestock\\nLocal Food Systems and Small Farms\\nLocal Government Education\\nMental Health\\nMushrooms\\nNatural Resources\\nPlant Problems\\nPlants\\nRainfall Management\\nSoil\\nSpecialty Crops\\nVaccines\\nWeather\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nShare with us\\n\\n\\n \\n\\nBody\\n\\n\\n\\n\\xa0\\n\\n\\xa0\\n\\xa0\\n\\n\\n\\xa0\\n\\n\\nView this profile on Instagram\\n\\n\\xa0\\n\\n\\n\\xa0\\n\\xa0\\n\\xa0\\n\\n\\n\\xa0\\n\\xa0\\n\\n\\n\\xa0\\n\\xa0\\n\\xa0\\n\\n\\n\\n\\xa0\\n\\xa0\\n\\nIllinois Extension (@ilextension) • Instagram photos and videos\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\nBody\\n\\xa0\\n\\nUniversity of Illinois Extension\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\n\\n\\nVolunteer with Extension\\nLearn Something New\\nRead a Blog\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nIllinois Extension\\n\\n 101 Mumford Hall (MC-710)\\n \\n1301 W\", 'url': 'https://extension.illinois.edu/'}, {'base_url': 'https://extension.illinois.edu/newsletters/illinois-pesticide-review-newsletter/julyaugust-2023', 'course_name ': 'cropwizard', 'pagenumber': '', 'readable_filename': 'EPA releases draft herbicide strategy', 's3_path': 'courses/cropwizard/EPA_releases_draft_herbicide_strategy.html', 'text': '. The draft framework describes how EPA would apply the mitigations in the Strategy compared to mitigations in the other initiatives.\\nWhat can you do? Submit comments! Learn more!\\nThe draft herbicide framework and accompanying documents are available in docket EPA-HQ-OPP-2023-0365 for public comment for 60 days.\\xa0Comments are due September 22, 2023. Agricultural pesticide users are encouraged to learn about EPA’s plan and to start thinking about how these mitigation measures could apply to herbicide use in their operation. While extensive recordkeeping is not currently required for the mitigation factors described in the strategy, it is highly recommended that users begin thinking about how to incorporate these new elements into their current record systems. If you are applying according to label directions, proper records can only assist your defense should you need it. To help guide you, watch for shared comments from professional organizations such as the Weed Science Society of America (WSSA). In April, a WSSA press release linked their comments to EPA and encouraged growers to act now to understand the impact of ESA’s new compliance initiatives. One good suggestion they offered to growers is to learn how to use EPA’s Bulletins Live! Two which is where important application instructions will be found.\\nEPA’s Office of Pesticide Programs will present a webinar on this draft herbicide Strategy on August 10th at Noon Central Time. EPA plans to walk through the framework and take questions from grower groups and other stakeholders. Register today. Questions may be submitted in advance of the webinar by emailing sm.opmp.pesticides@usda.gov.\\nTo learn more about EPA’s comprehensive ESA workplan Check out our article, “Change Coming to How EPA Protects Endangered Species from Pesticides – Feedback Needed” in the November/December 2022 issue of this newsletter. Proposed mitigation measures are discussed in more general terms in this comprehensive workplan. Please note that the comment period discussed there has ended.\\nVisit EPA’s website to learn more about how EPA’s pesticide program is protecting endangered species.\\nAdapted slightly from an EPA press release, “EPA Releases Draft Strategy to Better Protect Endangered Species from Herbicide Use” and related EPA documents. \\nABOUT THE AUTHOR: Michelle Wiesbrook\\xa0provides subject matter expertise and training in pesticide safety with an emphasis on horticultural weed science. She serves as the Illinois Pesticide Review newsletter editor, collecting and organizing material; and co-coordinates social media information for the PSEP program and ensures its timely publication.\\n\\nPesticide News\\n\\n\\n\\n\\nKeywords\\n\\nPesticide\\nHerbicide\\nInsecticide\\nFungicide\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nRelated Content\\n\\nUnintended herbicide injury on trees: A growing concernAugust 28, 2023\\n\\nTips to help employees succeedAugust 2, 2023\\n\\nParaquat certification valid 3 years: Are you due for training?August 2, 2023\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nIllinois Extension\\n\\n 101 Mumford Hall (MC-710)\\n \\n1301 W. Gregory Dr.\\n Urbana,\\n IL\\n 61801\\n \\nEmail: extension@illinois.edu\\n\\n\\n\\nInstagram\\nFacebook\\nTwitter\\nYouTube\\nLinkedIn\\n\\n\\n\\nCollege of Agricultural, Consumer & Environmental Sciences\\n\\n\\n\\n\\n\\nDig Deeper\\n\\n\\nTake an Online Course\\n\\n\\nRead a Blog\\n\\n\\nRead a Newsletter\\n\\n\\nListen to a Podcast\\n\\n\\nWatch a Video\\n\\n\\nBuy a Publication\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nExtension Network\\n\\n\\nEat.Move.Save.\\n\\n\\nIllinois 4-H\\n\\n\\nIllini Science Policy Program\\n\\n\\nIllinois Indiana Sea Grant\\n\\n\\nIllinois Master Gardeners\\n\\n\\nIllinois Master Naturalists\\n\\n\\nIllinois Nutrition Education Programs\\n\\n\\nPesticide Safety Education Program\\n\\n\\nResearch Centers\\n\\n\\nSafe Electricity\\n\\n\\nU of I Plant Clinic\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nAdditional links\\n\\nAbout Cookies\\nPrivacy Policy\\n© 2023 University of Illinois Board of Trustees\\nEEO\\nAccessibility\\nmyExtension\\nLogin', 'url': 'https://extension.illinois.edu/blogs/pesticide-news/2023-08-02-epa-releases-draft-herbicide-strategy-public-comment-period-open'}, {'base_url': 'https://extension.illinois.edu/newsletters/illinois-pesticide-review-newsletter/julyaugust-2023', 'course_name ': 'cropwizard', 'pagenumber': '', 'readable_filename': 'EPA releases draft herbicide strategy', 's3_path': 'courses/cropwizard/EPA_releases_draft_herbicide_strategy.html', 'text': \". The Strategy — which is primarily designed to provide early mitigations that minimize impacts to over 900 listed species — is one of EPA’s most significant proposals to help overcome these challenges.\\nEPA focused the Strategy on agricultural crop uses in the lower 48 states because hundreds of millions of pounds of herbicides (and plant growth regulators) are applied each year, which is substantially more than for non-agricultural uses of herbicides and for other pesticide classes (e.g., insecticides, fungicides). Additionally, hundreds of listed species in the lower 48 states live in habitats adjacent to agricultural areas. The proposed mitigations in the Strategy would address the most common ways that conventional agricultural herbicides might impact these listed species. More specifically, EPA developed potential mitigation options for conventional agricultural herbicides to reduce pesticide transport via spray drift and runoff/erosion that could result in exposure to listed plants and listed animals that depend on plants.\\nEPA expects that the Strategy will increase the efficiency of future ESA consultations on herbicides with the U.S. Fish and Wildlife Service (FWS), which has authority over most listed species that could benefit from the proposed mitigations. Under the Strategy, EPA proposes to identify and begin mitigating for potential impacts even before EPA completes ESA consultations. These early mitigations should expedite EPA’s ability to fully comply with the ESA by reducing impacts to listed species before EPA conducts most of its ESA analysis. Adopting mitigations earlier will also allow EPA and FWS to use their resources more efficiently in ESA consultations.\\nThe Strategy’s proposed mitigations to reduce spray drift, runoff, and erosion and thereby reduce the potential exposure reflect practices that can be readily implemented by growers and identified by pesticide applicators and that provide flexibility for growers to select the mitigations that work best for them. The Strategy also gives credit to landowners who are already implementing certain measures to reduce pesticide runoff. For example, existing vegetated ditches and water retention ponds will qualify for credits that reduce the need for additional mitigation. Similarly, the Strategy would require less mitigation on flat lands, which are less prone to runoff, and in many western states, which typically experience less rain to carry pesticides off fields. The Strategy also describes how the Agency could add other mitigation practices to the menu of mitigation options in the future, particularly to incorporate emerging technology or new information on the effectiveness of specific practices.\\nDraft Herbicide Framework Document\\nThe draft framework document titled, “Draft Herbicide Strategy Framework to Reduce Exposure of Federally Listed Endangered and Threatened Species and Designated Critical Habitats from the Use of Conventional Agricultural Herbicides” is 97 pages long and includes a discussion of both the proposed scope of the Herbicide Strategy and the proposed decision framework to determine the level of mitigation needed for a particular conventional agricultural herbicide. The draft framework document also includes examples of how the proposed herbicide mitigation would apply to some of the herbicides for which EPA has conducted case studies as well as EPA's proposed implementation plan.\\nSome of the accompanying documents are quite lengthy. The “Herbicide Strategy Case Study Summary and Process” is 666 pages!\\xa0 Coincidence on the number? I’m not sure. I haven’t made it through it all yet. The primary thing I gathered from perusing through the spreadsheet files was that managing these complexities must be a nightmare. The document, “Application of EPA’s Draft Herbicide Strategy Framework Through Scenarios that Represent Crop Production Systems” is only 17 pages long and includes possible scenarios. Examples 1 and 2 would be particularly fitting for Illinois corn and soybean producers. These are shared to help producers better understand how these mitigation practices may be used.\\nIn its ESA Workplan and ESA Workplan Update, EPA outlined this and other ESA initiatives to develop early mitigations that provide listed species with practical protections from pesticides. The Strategy complements those other initiatives, such as targeted mitigations for listed species particularly vulnerable to pesticides and Interim Ecological Mitigations that EPA has begun incorporating under the Federal Insecticide, Fungicide, and Rodenticide Act. The draft framework describes how EPA would apply the mitigations in the Strategy compared to mitigations in the other initiatives.\\nWhat can you do? Submit comments! Learn more!\\nThe draft herbicide framework and accompanying documents are available in docket EPA-HQ-OPP-2023-0365 for public comment for 60 days.\\xa0Comments are due September 22, 2023. Agricultural pesticide users are encouraged to learn about EPA’s plan and to start thinking about how these mitigation measures could apply to herbicide use in their operation. While extensive recordkeeping is not currently required for the mitigation factors described in the strategy, it is highly recommended that users begin thinking about how to incorporate these new elements into their current record systems\", 'url': 'https://extension.illinois.edu/blogs/pesticide-news/2023-08-02-epa-releases-draft-herbicide-strategy-public-comment-period-open'}, {'base_url': 'https://extension.illinois.edu/newsletters/illinois-pesticide-review-newsletter/julyaugust-2023', 'course_name ': 'cropwizard', 'pagenumber': '', 'readable_filename': 'EPA releases draft herbicide strategy', 's3_path': 'courses/cropwizard/EPA_releases_draft_herbicide_strategy.html', 'text': 'EPA releases draft herbicide strategy; public comment period open | Illinois Extension | UIUC\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n Skip to main content\\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\nCollege of Agricultural, Consumer & Environmental Sciences\\n\\nIllinois Extension\\n\\n\\n\\n\\n\\nGive\\nVolunteer\\nCareers\\n\\n\\n\\n\\nTopics\\n\\n\\nAll Topics\\n\\n\\nCottage Food\\n\\n\\nFood\\n\\n\\nForestry\\n\\n\\nLocal Government Education\\n\\n\\nPlants\\n\\n\\nRainfall Management\\n\\n\\nSoil\\n\\n\\nVaccines\\n\\n\\nVegetable Gardening\\n\\n\\n\\n\\nLearn\\n\\n\\nOnline Courses\\n\\n\\nBlogs\\n\\n\\nNewsletters\\n\\n\\nPodcasts\\n\\n\\nVideos\\n\\n\\nPublications\\n\\n\\nSummer Resources\\n\\n\\n\\n\\nEvents\\n\\n\\nStatewide Webinars\\n\\n\\n\\n\\nNews\\n\\n\\nConnect\\n\\n\\nContact Staff\\n\\n\\nFind an Office\\n\\n\\nSocial Media\\n\\n\\nAdministration and Educator Teams\\n\\n\\nCommunications and Information Technology\\n\\n\\nIllini Science Policy Program\\n\\n\\nIllinois Indiana Sea Grant\\n\\n\\nMaster Gardeners\\n\\n\\nMaster Naturalists\\n\\n\\nPlant Clinic\\n\\n\\nResearch and Education Centers\\n\\n\\nSea Grant\\n\\n\\nEnergy Education Council\\n\\n\\nHome and Community Education\\n\\n\\nPlanning, Reporting, and Evaluation\\n\\n\\n\\n\\nImpact\\n\\n\\n2024 Extension Collaboration Grants\\n\\n\\nEconomic and Functional Impact\\n\\n\\nOur Impact in Agriculture and AgriBusiness\\n\\n\\nSNAP-Education Impact\\n\\n\\nExtension Funded Research Projects\\n\\n\\nOur Impact in Agriculture and Natural Resources\\n\\n\\nOur Impact in Community & Economic Development\\n\\n\\nOur Impact in Family and Consumer Sciences\\n\\n\\nOur Impact in Integrated Health Disparities\\n\\n\\n\\n\\nAbout\\n\\n\\nStrategic Planning\\n\\n\\nExtension Councils\\n\\n\\nCareers\\n\\n\\nProfessional Associations\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nBreadcrumb\\n\\n\\nHome\\n\\n\\nBlogs\\n\\n\\nPesticide News\\n\\n\\n EPA releases draft herbicide strategy; public comment period open \\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nPesticide News\\n\\n\\nEPA releases draft herbicide strategy; public comment period open \\n\\n\\n\\n\\n\\n\\nAugust 2, 2023\\n\\n\\n\\nMichelle Wiesbrook\\n\\n\\n \\n\\nStrategy aims to increase efficiencies while supporting farmers, herbicide users with continued use of important pesticide tools. \\xa0\\nThe U.S. Environmental Protection Agency (EPA) released the draft Herbicide Strategy for public comment, a major milestone in the Agency’s work to protect federally endangered and threatened (listed) species from conventional agricultural herbicides. The Strategy describes proposed early mitigations for more than 900 listed species and designated critical habitats to reduce potential impacts from the agricultural use of these herbicides while helping to ensure the continued availability of these important pesticide tools.\\n“Ensuring safe use of herbicides is an important part of EPA’s mission to protect the environment,” said Deputy Assistant Administrator for Pesticide Programs for the Office of Chemical Safety and Pollution Prevention Jake Li. “This strategy reflects one of our biggest steps to support farmers and other herbicide users with tools for managing weeds, while accelerating EPA’s ability to protect many endangered species that live near agricultural areas.”\\nThe Strategy is part of EPA’s ongoing efforts to develop a multichemical, multispecies approach toward meeting its obligations under the Endangered Species Act (ESA). EPA’s traditional chemical-by-chemical, species-by-species approach to meeting these obligations is slow and costly.\\xa0 As a result, EPA has completed its ESA obligations for less than 5% of its actions, creating legal vulnerabilities for the Agency, increased litigation, and uncertainty for farmers and other pesticide users about their continued ability to use many pesticides. The Strategy — which is primarily designed to provide early mitigations that minimize impacts to over 900 listed species — is one of EPA’s most significant proposals to help overcome these challenges.\\nEPA focused the Strategy on agricultural crop uses in the lower 48 states because hundreds of millions of pounds of herbicides (and plant growth regulators) are applied each year, which is substantially more than for non-agricultural uses of herbicides and for other pesticide classes (e.g., insecticides, fungicides). Additionally, hundreds of listed species in the lower 48 states live in habitats adjacent to agricultural areas. The proposed mitigations in the Strategy would address the most common ways that conventional agricultural herbicides might impact these listed species', 'url': 'https://extension.illinois.edu/blogs/pesticide-news/2023-08-02-epa-releases-draft-herbicide-strategy-public-comment-period-open'}, {'base_url': 'https://extension.illinois.edu/newsletters/illinois-pesticide-review-newsletter/julyaugust-2023', 'course_name ': 'cropwizard', 'pagenumber': '', 'readable_filename': 'News', 's3_path': 'courses/cropwizard/News.html', 'text': \". — The U.S. Environmental Protection Agency, in collaboration with the U.S. Department of Energy, is funding the creation of Thriving Communities Technical Assistance Centers to help rural and underserved communities access state and federal funding for environmental infrastructure…\\n\\n\\n \\nAddress health challenges with holistic solutions\\nSeptember 1, 2023\\n\\nURBANA, Ill. — The University of Illinois, along with the Interdisciplinary Health Sciences Institute, and in collaboration with Illinois Extension, has developed the Autumn Health Picks 2023 webinar series. This series is part of the Community Seminar Series, and it provides an opportunity for…\\n\\n\\n \\nDo artificial roosts help bats? Illinois experts say more research needed\\nSeptember 1, 2023\\n\\nURBANA, Ill.\\xa0— Artificial roosts for bats come in many forms — bat boxes, condos, bark mimics, clay roosts, and cinder block structures, to name a few — but a new conservation practice and policy article from researchers at the\\xa0University of Illinois Urbana-Champaign\\xa0suggests the structures…\\n\\n\\nMore news\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\nBlock Reference\\n\\nNews\\n\\n\\n\\nBy topic\\n- Any -4-H4-H alumni4-H FoundationAccessibilityAdministrationAgingAgricultural safetyAgricultureAnimal scienceAnnie's ProjectAnnualsAutismBeefBeef CattleBeekeepingBeesBeneficial InsectsBirdsBrain healthBulbsBullyingBusiness developmentBusiness retention and expansionButterflyCampingCaregivingCensusCertified Crop AdvisorCertified Livestock ManagerChild nutritionChronic diseasesCitizen ScienceCivic engagementClimateCollege ReadinessCommercial agricultureCommercial Vegetable ProductionCommunicationCommunity developmentCommunity gardenCommunity healthCommunity planningCommunity resiliencyCompostingConservationConsumer economicsCornCover cropsCreditCrop diseaseCropsDairy CattleDebt managementDementia Alzheimer’s diseaseDiabetesDicambaDisaster preparednessDiversity Equity InclusionDowntown developmentDrainageDronesEarly childhoodEconomic developmentEDEN Ready BusinessEFNEPElder careEmergency foodEnergyEnergy conservationEnergy efficiencyEntomologyEntrepreneurshipEnvironmentEstate planningExpensesFacultyFamily lifeFarm business managementFarm safetyFarmers marketsFinancial ExploitationFinancial planningFinancial wellnessFlowersFood accessFood PreservationFood safetyFood sanitationForestryFruitsFungicideGardeningGrassesHayHealthHealth CareHealthy cookingHealthy eatingHempHerbicideHerbsHolidaysHome OwnershipHorticultureHouseplantsIdentity TheftInclusionINEPInformation TechnologyInsect PestsInsecticideInsects and pestsInsuranceIntegrated Health DisparitiesIntegrated pest managementInvasive speciesInvestingLandscape architectureLandscape designLawn careLeadershipLeadership developmentLife skillsLivestockLocal foods and small farmsLocal governmentManaging stressManure managementMarketingMaster GardenersMaster NaturalistMeeting ManagementMental healthMindfulnessMoney MentorsMyPINative plantsNavigating differenceNutritionNutrition educationObesity preventionOrnamentalOutdoor SkillsParentingPasturePerennialsPesticidePesticide LabelPhysical ActivityPlant ClinicPlant diseasePlant health carePollinator HabitatPondsPoultryPoverty simulationPrivate/Commercial ApplicatorProfessional Development CEU CPDUPSEP trainingReal ColorsRecyclingRelationshipsResilienceRoboticsRosesSafetyShooting sportsShrubsSmall farmsSmart MeterSNAP-EdSocial-emotional healthSoilSoybeansSpecialty CropsSpendingState 4-H OfficeSTEMSubstance UseSustainable agricultureSwineTaxesTeam buildingTeenagersTime managementTrauma informed Adverse Childhood ExperiencesTree fruitsTreesTurfUrban AgricultureUrban gardeningVegetable gardeningVegetablesVolunteersWaterWeatherWeedsWellnessWheatWhole grainsWildlifeWorkforce developmentWorkplace wellnessYouth and MoneyYouth development\\n\\n\\nSearch\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\n\\n\\nSeptember 6, 2023\\n\\nIllinois Extension selected to establish environmental assistance center to help Illinois communities\\n\\n \\n URBANA, Ill. — The U.S. Environmental Protection Agency, in collaboration with the U.S. Department of Energy, is funding the creation of Thriving Communities Technical Assistance Centers to help rural and underserved communities access state and federal funding for environmental...\\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\n\\n\\nSeptember 1, 2023\\n\\nAddress health challenges with holistic solutions\\n\\n \\n URBANA, Ill\", 'url': 'https://extension.illinois.edu/global/news-releases'}, {'base_url': 'https://extension.illinois.edu/newsletters/illinois-pesticide-review-newsletter/julyaugust-2023', 'course_name ': 'cropwizard', 'pagenumber': 4, 'readable_filename': 'extension.pdf', 's3_path': 'courses/cropwizard/extension.pdf', 'text': \"Illinois Pesticide Review \\nVolume 36, No. 4, July/August \\n4\\nproposed mitigations. Under the Strategy, EPA \\nproposes to identify and begin mitigating for \\npotential impacts even before EPA completes \\nESA consultations. These early mitigations \\nshould expedite EPAs ability to fully comply \\nwith the ESA by reducing impacts to listed \\nspecies before EPA conducts most of its ESA \\nanalysis. Adopting mitigations earlier will \\nalso allow EPA and FWS to use their resources \\nmore efficiently in ESA consultations.\\nThe Strategys proposed mitigations to reduce \\nspray drift, runoff, and erosion and thereby \\nreduce the potential exposure reflect practices \\nthat can be readily implemented by growers \\nand identified by pesticide applicators and \\nthat provide flexibility for growers to select \\nthe mitigations that work best for them. The \\nStrategy also gives credit to landowners who \\nare already implementing certain measures to \\nreduce pesticide runoff. For example, existing \\nvegetated ditches and water retention ponds \\nwill qualify for credits that reduce the need for \\nadditional mitigation. Similarly, the Strategy \\nwould require less mitigation on flat lands, \\nwhich are less prone to runoff, and in many \\nwestern states, which typically experience \\nless rain to carry pesticides off fields. The \\nStrategy also describes how the Agency could \\nadd other mitigation practices to the menu of \\nmitigation options in the future, particularly \\nto incorporate emerging technology or new \\ninformation on the effectiveness of specific \\npractices.\\nDraft Herbicide Framework \\nDocument\\nThe draft framework document titled, Draft \\nHerbicide Strategy Framework to Reduce \\nExposure of Federally Listed Endangered \\nand Threatened Species and Designated \\nCritical Habitats from the Use of Conventional \\nAgricultural Herbicides is 97 pages long and \\nincludes a discussion of both the proposed \\nscope of the Herbicide Strategy and the \\nproposed decision framework to determine \\nthe level of mitigation needed for a particular \\nconventional agricultural herbicide. The draft \\nframework document also includes examples \\nof how the proposed herbicide mitigation \\nwould apply to some of the herbicides for \\nwhich EPA has conducted case studies as well \\nas EPA's proposed implementation plan.\\nSome of the accompanying documents are \\nquite lengthy. The Herbicide Strategy Case \\nStudy Summary and Process is 666 pages! \\nCoincidence on the number? Im not sure. I \\nhavent made it through it all yet. The primary \\nthing I gathered from perusing through \\nthe spreadsheet files was that managing \\nthese complexities must be a nightmare. \\nThe document, Application of EPAs Draft \\nHerbicide Strategy Framework Through \\nScenarios that Represent Crop Production \\nSystems is only 17 pages long and includes \\npossible scenarios. Examples 1 and 2 would \\nbe particularly fitting for Illinois corn and \\nsoybean producers. These are shared to \\nhelp producers better understand how these \\nmitigation practices may be used. \\nIn its ESA Workplan and ESA Workplan \\nUpdate, EPA outlined this and other ESA \\ninitiatives to develop early mitigations \\nthat provide listed species with practical \\nprotections from pesticides. The Strategy \\ncomplements those other initiatives, such \\nas targeted mitigations for listed species \\nparticularly vulnerable to pesticides and \\nInterim Ecological Mitigations that EPA \\nhas begun incorporating under the Federal \\nInsecticide, Fungicide, and Rodenticide Act. \\nThe draft framework describes how EPA would \\napply the mitigations in the Strategy compared \\nto mitigations in the other initiatives. \\nWhat can you do? Submit \\ncomments! Learn more!\\nThe draft herbicide framework and \\naccompanying documents are available in \\ndocket EPA-HQ-OPP-2023-0365 for public \\ncomment for 60 days. Comments are due \\nSeptember 22, 2023. Agricultural pesticide \\nusers are encouraged to learn about EPAs\", 'url': 'https://extension.illinois.edu/sites/default/files/2023-08/IPR%20Volume%2036%20Issue%204%20July%20August%20SECURE.pdf'}, {'base_url': 'https://extension.illinois.edu/newsletters/illinois-pesticide-review-newsletter/julyaugust-2023', 'course_name ': 'cropwizard', 'pagenumber': 3, 'readable_filename': 'extension.pdf', 's3_path': 'courses/cropwizard/extension.pdf', 'text': 'Illinois Pesticide Review \\nVolume 36, No. 4, July/August \\n3\\nIts important to consider that one should \\ntake the test in the language he or she is \\nmost comfortable with. If someone has \\nbeen studying the material in English, dont \\nbe surprised if they opt to take the exam in \\nEnglish too. \\nIn the end, it all comes down to good \\ncommunication between you and your \\nemployees. It could be that they dont know \\nyet which learning method would work best \\nfor them and theyll need to try a few things. \\nTheyll appreciate you taking the time to ask \\nthem and work with them to help ensure their \\nsuccess.\\nMichelle Wiesbrook \\nEPA Releases Draft \\nHerbicide Strategy, Public \\nComment Period Open \\nStrategy aims to increase \\nefficiencies while supporting \\nfarmers, herbicide users with \\ncontinued use of important \\npesticide tools \\nThe U.S. Environmental Protection Agency \\n(EPA) released the draft Herbicide Strategy \\nfor public comment, a major milestone in the \\nAgencys work to protect federally endangered \\nand threatened (listed) species from conven-\\ntional agricultural herbicides. The Strategy \\ndescribes proposed early mitigations for more \\nthan 900 listed species and designated criti-\\ncal habitats to reduce potential impacts from \\nthe agricultural use of these herbicides while \\nhelping to ensure the continued availability of \\nthese important pesticide tools.\\nEnsuring safe use of herbicides is an \\nimportant part of EPAs mission to protect \\nthe environment, said Deputy Assistant \\nAdministrator for Pesticide Programs for \\nthe Office of Chemical Safety and Pollution \\nPrevention Jake Li. This strategy reflects one \\nof our biggest steps to support farmers and \\nother herbicide users with tools for managing \\nweeds, while accelerating EPAs ability to \\nprotect many endangered species that live near \\nagricultural areas.\\nThe Strategy is part of EPAs ongoing efforts \\nto develop a multichemical, multispecies \\napproach toward meeting its obligations \\nunder the Endangered Species Act (ESA). \\nEPAs traditional chemical-by-chemical, \\nspecies-by-species approach to meeting these \\nobligations is slow and costly. As a result, EPA \\nhas completed its ESA obligations for less than \\n5% of its actions, creating legal vulnerabilities \\nfor the Agency, increased litigation, and \\nuncertainty for farmers and other pesticide \\nusers about their continued ability to use many \\npesticides. The Strategy which is primarily \\ndesigned to provide early mitigations that \\nminimize impacts to over 900 listed species \\nis one of EPAs most significant proposals to \\nhelp overcome these challenges.\\nEPA focused the Strategy on agricultural crop \\nuses in the lower 48 states because hundreds \\nof millions of pounds of herbicides (and plant \\ngrowth regulators) are applied each year, \\nwhich is substantially more than for non-\\nagricultural uses of herbicides and for other \\npesticide classes (e.g., insecticides, fungicides). \\nAdditionally, hundreds of listed species in \\nthe lower 48 states live in habitats adjacent to \\nagricultural areas. The proposed mitigations \\nin the Strategy would address the most \\ncommon ways that conventional agricultural \\nherbicides might impact these listed \\nspecies. More specifically, EPA developed \\npotential mitigation options for conventional \\nagricultural herbicides to reduce pesticide \\ntransport via spray drift and runoff/erosion \\nthat could result in exposure to listed plants \\nand listed animals that depend on plants.\\nEPA expects that the Strategy will increase \\nthe efficiency of future ESA consultations \\non herbicides with the U.S. Fish and Wildlife \\nService (FWS), which has authority over most \\nlisted species that could benefit from the', 'url': 'https://extension.illinois.edu/sites/default/files/2023-08/IPR%20Volume%2036%20Issue%204%20July%20August%20SECURE.pdf'}, {'base_url': 'https://extension.illinois.edu/newsletters/illinois-pesticide-review-newsletter/julyaugust-2023', 'course_name ': 'cropwizard', 'pagenumber': '', 'readable_filename': 'News', 's3_path': 'courses/cropwizard/News.html', 'text': \". — The U.S. Environmental Protection Agency, in collaboration with the U.S. Department of Energy, is funding the creation of Thriving Communities Technical Assistance Centers to help rural and underserved communities access state and federal funding for environmental...\\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\n\\n\\nSeptember 1, 2023\\n\\nAddress health challenges with holistic solutions\\n\\n \\n URBANA, Ill. — The University of Illinois, along with the Interdisciplinary Health Sciences Institute, and in collaboration with Illinois Extension, has developed...\\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\n\\n\\nSeptember 1, 2023\\n\\nDo artificial roosts help bats? Illinois experts say more research needed\\n\\n \\n URBANA, Ill.\\xa0— Artificial roosts for bats come in many forms — bat boxes, condos, bark mimics, clay roosts, and cinder block structures, to name a few — but a new conservation practice and policy article from researchers at the\\xa0University of...\\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\n\\n\\nAugust 29, 2023\\n\\nButterflies can eat to live, live to eat in a balanced garden\\n\\n \\n URBANA, Ill. — A favorite thing about visiting gardens in the summer is catching sight of a butterfly enjoying nectar from a brightly colored zinnia or a monarch caterpillar munching on a milkweed leaf. When designing a butterfly garden, expand and balance plant selection to provide more than...\\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\n\\n\\nAugust 24, 2023\\n\\nField Day event plans to keep beef cattle producers up on trends for their herds\\n\\n \\n URBANA, Ill. — Beef cattle producers will gain insights and stay up to date on current research from cow/calf patterns to alternative forages and more at the Orr Beef Research Center's Field Day on September 6.\\xa0The meeting will be held at the John Wood Community College Ag Center located west of...\\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\n\\n\\nAugust 23, 2023\\n\\nBuild drought-tolerant gardens this fall for next summer’s blooms\\n\\n \\n URBANA, Ill. — Many Illinois gardens are entering the fall stressed from the lack of summer rains combined with scorching hot temperatures. These conditions easily stress some plants; however, many plants quickly adapt to hot, dry conditions. Drought-tolerant plants are not only tough and...\\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\n\\n\\nAugust 23, 2023\\n\\nIllinois Extension exhibits research, programs, innovation at 2023 Farm Progress Show\\n\\n \\n DECATUR, Ill. — The Farm Progress Show returns to Decatur, Aug. 29-31, and\\xa0University of Illinois Extension will be on-site in the College of...\\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\n\\n\\nAugust 21, 2023\\n\\nBuild privacy with plants for secret gardens\\n\\n \\n URBANA, Ill.\\xa0— Plants serve a lot of purposes in the landscape. One of which is to add some privacy. Screening plants can help define and give purpose to a space. Homeowners may wish to screen a particular area or transparency in the landscape, creating interest in what lies beyond.\\xa0\\n\\n...\\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\n\\n\\nAugust 21, 2023\\n\\nIllinois Extension investing in the future of farming across the state\\n\\n \\n URBANA, Ill. — Helping Illinois farmers grow thriving crops and livestock has always been at the heart of University of Illinois Extension’s mission. Using feedback received from farmers and other agricultural stakeholders through a 2022 survey,...\\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\n\\n\\nAugust 21, 2023\\n\\nExtraordinary 4-H volunteers honored\\n\\n \\n SPRINGFIELD, Ill\", 'url': 'https://extension.illinois.edu/global/news-releases'}, {'base_url': 'https://extension.illinois.edu/newsletters/illinois-pesticide-review-newsletter/julyaugust-2023', 'course_name ': 'cropwizard', 'pagenumber': '', 'readable_filename': 'Extension Funded Research Projects', 's3_path': 'courses/cropwizard/Extension_Funded_Research_Projects.html', 'text': '. Today’s most pressing issues are related to climate change. An understanding of how human activity impacts the environment helps us make sense of how a zoonotic transfer of a virus to humans causes a global pandemic, and how rising temperatures increase the frequency and intensity of wildfires and storms. Environmental Education (EE) connects people to their environments, instilling values and knowledge that leads to better environmental stewardship of local environments and natural resources. There are several well-established EE programs offered throughout Cook County by Illinois\\xa0Extension and the Forest Preserve of Cook County (FPCC). Missing from these opportunities are programs available to middle school-aged children, the age best suited for EE experiences because their environmental sensitivities are still developing and early experiences in nature have been shown to have impacts throughout adulthood (Boom, 2017). This proposal seeks to develop a program, Illinois Inquiry Adventures in Nature (IIAN), for middle school children and their families, suitable for small groups during the pandemic\\xa0and expanding in scope to include classrooms when safe. A series of four seasonal activities\\xa0and teacher workshops\\xa0will be created to bring groups to their local green spaces, including FPCC sites. Groups will engage in open-ended investigations based on their own observations and questions, complete activities at home\\xa0and enact local community conservation projects. Research will be conducted to examine how individuals’ connections to nature and environmental stewardship change over the course of their participation. This program fills a local need in Cook County, creating a continuum of opportunities across ages, and will be made available to all residents in Illinois, and nationwide, encouraging the next generation of environmental leaders.\\n\\n\\nAssessing the Needs and Connecting Young & Beginning Farmers with Extension Resources in Northern Illinois\\nAwarded to: Illinois Extension in the College of ACES\\nPrincipal Investigator: Joseph Malual\\nCo-Investigators:\\nNikki Keltner, Extension program coordinator, Illinois Extension\\nGrant McCarty, Extension educator, Illinois Extension\\nHope Michelson, assistant professor,\\xa0Department of Agricultural & Consumer Economics\\nPROJECT SUMMARY\\nMore and more young people are engaging in small-scale farming, with many focusing on specialty crops and sustainable agricultural production. Despite this trend, entry into farming, which is a complex business, is challenging. Beginning farmers face serious obstacles in accessing critical assets, including startup capital to acquire land, farm equipment\\xa0and agricultural technical knowledge needed to develop a\\xa0successful agricultural practice and profitable business. The situation is complicated by lack of adequate research to understand the unique challenges facing this generation of farmers. In Illinois, there is limited research to understand how people new to farming navigate access to critical resources. This research project aims to provide a comprehensive assessment of the needs and opportunities facing young and beginning\\xa0farmers in northern Illinois. We will identify and map farms owned by young and beginning farmers, examine their experiences and strategies used to leverage critical startup assets, including farmland and equipment, financial capital\\xa0and agricultural technical assistance, as well as strategies for marketing agricultural products. This project will build relations and connect this new audience with Extension resources, which can help\\xa0beginning farmers develop the knowledge and skills necessary for solving critical problems. Through interdisciplinary collaboration between Extension educators and specialists with faculty at the University of Illinois at Urbana-Champaign, this research will generate useful knowledge that can help beginning farmers, businesses\\xa0and communities make informed decisions and plan for future support of those new to farming. The\\xa0knowledge and practices discovered and identified through this project will be shared with Extension across the state. Extension educators can build on this knowledge to plan and deliver educational programming that empowers farmers to develop financially viable and sustainable farms. Those successful endeavors will, in turn, help to revitalize their rural communities.\\n\\n\\nNew Immigrant Foodways\\nAwarded to: Department of History in the College of Liberal Arts and Sciences\\nPrincipal Investigator: Teresa Barnes\\nCo-Investigators:\\nGisela Sin, director, Center for Latin American and Caribbean Studies\\nMargarita Teran-Garcia, Extension specialist, Illinois Extension\\nPROJECT SUMMARY\\nThis project will leverage new and existing research with immigrant communities about challenges and strategies in adapting home foodways to American food systems to create short instructional videos related to nutrition and cooking. The project addresses a complex issue at the intersection of three critical areas of Extension’s mission: food, health\\xa0and environment. It addresses the public need of new immigrant families to access information and expertise and develop sustainable strategies when faced with the bewildering array of often unhealthy food options in the USA', 'url': 'https://extension.illinois.edu/global/extension-funded-research-projects'}]}\n" - ] - } - ], - "source": [ - "print(course_df[0]['messages'][1])" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "metadata": {}, - "outputs": [], - "source": [ - "# user email is in DF, outside of convo" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Mapping Conversations to Nomic" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "22\n", - "22\n" - ] - } - ], - "source": [ - "user_queries = []\n", - "metadata = []\n", - "i = 1\n", - "\n", - "# log conversation instead of individual messages\n", - "for index, row in df.iterrows():\n", - " user_email = row['user_email']\n", - " convo = row['convo']\n", - " messages = convo['messages']\n", - " first_message = messages[0]['content']\n", - " user_queries.append(first_message)\n", - " # create metadata for multi-turn conversation\n", - " conversation = \"\"\n", - " for message in messages:\n", - " # string of role: content, role: content, ...\n", - " conversation += \"\\n>>> \" + message['role'] + \": \" + message['content'] + \"\\n\"\n", - " # add to metadata\n", - " metadata_row = {\"course\": row['course_name'], \"conversation\": conversation, \"conversation_id\": convo['id'], \n", - " \"id\": i, \"user_email\": user_email, \"first_query\": first_message}\n", - " metadata.append(metadata_row)\n", - " i += 1\n", - "\n", - "print(len(user_queries))\n", - "print(len(metadata))" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'course': 'cropwizard', 'conversation': '\\n>>> user: What type of pest is most harmful to corn crops in Illinois and how can I prevent it?\\n\\n>>> assistant: The most harmful pest to corn crops in Illinois is the corn rootworm. Corn rootworms, specifically the western corn rootworm (Diabrotica virgifera virgifera), are the most impactful species in Illinois and the North Central U.S. These pests can cause significant damage to corn plants by feeding on the roots, leading to reduced plant vigor and yield.\\n\\nTo prevent corn rootworm infestations, several measures can be taken:\\n\\n1. Crop rotation: Implementing a crop rotation strategy by alternating corn with non-host crops, such as soybeans, can help reduce corn rootworm populations. The larvae are not capable of surviving on soybean or other broadleaf plants, so rotating crops disrupts their life cycle.\\n\\n2. Utilize Bt hybrids: Planting corn hybrids that have been genetically modified to express Bacillus thuringiensis (Bt) proteins targeting corn rootworm can provide effective control. These Bt hybrids produce toxins that are toxic to the pests, reducing their feeding and damage to corn plants.\\n\\n3. Insecticide treatments: Seed-applied insecticides containing neonicotinoid or diamide insecticides can be used to protect corn seedlings from early-season pests, including corn rootworms. Soil-applied insecticides can also be applied at planting to control corn rootworms and other early-season pests.\\n\\n4. Monitoring and scouting: Regular monitoring of corn fields for signs of corn rootworm damage, such as root feeding and lodging, is essential. Scouting helps determine if pest populations have reached economic thresholds that require action, such as insecticide application.\\n\\n5. Integrated Pest Management (IPM): Implementing an IPM approach involves combining multiple pest management strategies, including cultural practices, biological control, and chemical control, to minimize the impact of corn rootworms and other pests. This holistic approach aims to reduce reliance on chemical treatments and promote sustainable pest management.\\n\\nIt is important to consult with local agricultural extension services, seed dealers, or agricultural experts for specific recommendations tailored to your region and field conditions to effectively manage corn rootworm infestations.\\n', 'conversation_id': '3e5d4861-b128-4c64-96ac-87c74f3217e5', 'id': 2, 'user_email': 'avd6@illinois.edu', 'first_query': 'What type of pest is most harmful to corn crops in Illinois and how can I prevent it?'}\n" - ] - } - ], - "source": [ - "print(metadata[1])" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(22, 1536)\n" - ] - } - ], - "source": [ - "# initialize langchain OpenAI embeddings model\n", - "embeddings_model = OpenAIEmbeddings()\n", - "metadata = pd.DataFrame(metadata)\n", - "embeddings = embeddings_model.embed_documents(user_queries)\n", - "embeddings = np.array(embeddings)\n", - "print(embeddings.shape)" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-09-07 14:53:44.900 | INFO | nomic.project:_create_project:779 - Creating project `Conversation Map for cropwizard` in organization `dabholkar.asmita`\n", - "2023-09-07 14:53:45.794 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", - "2023-09-07 14:53:45.798 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", - "1it [00:00, 1.07it/s]\n", - "2023-09-07 14:53:46.743 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", - "2023-09-07 14:53:46.744 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", - "2023-09-07 14:53:47.568 | INFO | nomic.project:create_index:1111 - Created map `Conversation Map for cropwizard` in project `Conversation Map for cropwizard`: https://atlas.nomic.ai/map/27ba0013-71ba-4fd6-a710-187d114442ab/f74b45b3-7213-45df-a4cb-c16ec9feb8b7\n", - "2023-09-07 14:53:47.570 | INFO | nomic.atlas:map_embeddings:139 - Conversation Map for cropwizard: https://atlas.nomic.ai/map/27ba0013-71ba-4fd6-a710-187d114442ab/f74b45b3-7213-45df-a4cb-c16ec9feb8b7\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Conversation Map for cropwizard: https://atlas.nomic.ai/map/27ba0013-71ba-4fd6-a710-187d114442ab/f74b45b3-7213-45df-a4cb-c16ec9feb8b7]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-09-07 14:53:48.872 | INFO | nomic.project:create_index:1111 - Created map `cropwizard_convo_index_2` in project `Conversation Map for cropwizard`: https://atlas.nomic.ai/map/27ba0013-71ba-4fd6-a710-187d114442ab/3336aa7f-5995-4f02-831b-7161fd0c0b71\n" - ] - }, - { - "data": { - "text/html": [ - "Atlas Projection cropwizard_convo_index_2. Status Topic Modeling. view online" - ], - "text/plain": [ - "cropwizard_convo_index_2: https://atlas.nomic.ai/map/27ba0013-71ba-4fd6-a710-187d114442ab/3336aa7f-5995-4f02-831b-7161fd0c0b71" - ] - }, - "execution_count": 34, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# create an Atlas project\n", - "project_name = \"Conversation Map for \" + course\n", - "index_name = course + \"_convo_index_2\"\n", - "project = atlas.map_embeddings(embeddings=np.array(embeddings),\n", - " data=metadata,\n", - " id_field='id',\n", - " build_topic_model=True,\n", - " topic_label_field='first_query',\n", - " name=project_name,\n", - " colorable_fields=['conversation_id', 'first_query'])\n", - "print(project.maps)\n", - "\n", - "project.create_index(index_name, build_topic_model=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Mapping Query-Response Pairs to Nomic" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2\n", - "user queries: 1\n", - "metadata 1\n", - "------------------------\n" - ] - } - ], - "source": [ - "user_queries = []\n", - "metadata = []\n", - "i = 1\n", - "for convo in course_df:\n", - " messages = convo['messages']\n", - " print(len(messages))\n", - " pair_check = 0\n", - " for message in messages:\n", - " if message['role'] == 'user' and pair_check == 0:\n", - " query = message['content']\n", - " metadata_row = {'course': course, 'query': message['content']}\n", - " #print(\"metadata row: \", metadata_row)\n", - " pair_check += 1\n", - " if message['role'] == 'assistant' and pair_check == 1:\n", - " metadata_row['response'] = message['content']\n", - " metadata_row['id'] = i \n", - " #print(\"response metadata row: \", metadata_row)\n", - " i += 1\n", - " pair_check += 1\n", - " if pair_check == 2:\n", - " # one conversation pair is complete\n", - " user_queries.append(query)\n", - " metadata.append(metadata_row)\n", - " pair_check = 0\n", - "\n", - " \n", - "print(\"user queries: \", len(user_queries))\n", - "print(\"metadata\", len(metadata))\n", - "print(\"------------------------\")\n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(590, 1536)\n" - ] - } - ], - "source": [ - "# initialize langchain OpenAI embeddings model\n", - "embeddings_model = OpenAIEmbeddings()\n", - "metadata = pd.DataFrame(metadata)\n", - "embeddings = embeddings_model.embed_documents(user_queries)\n", - "embeddings = np.array(embeddings)\n", - "print(embeddings.shape)" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-31 19:55:40.276 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for ECE408FA23` in organization `dabholkar.asmita`\n", - "2023-08-31 19:55:41.466 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", - "2023-08-31 19:55:41.491 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", - "1it [00:02, 2.43s/it]\n", - "2023-08-31 19:55:43.932 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", - "2023-08-31 19:55:43.932 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", - "2023-08-31 19:55:45.475 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for ECE408FA23` in project `Query-Response Map for ECE408FA23`: https://atlas.nomic.ai/map/d8e02f68-501e-4a25-9060-502710c082ce/b55cdca3-3ff4-454e-8f1b-a73ebc22cdff\n", - "2023-08-31 19:55:45.480 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for ECE408FA23: https://atlas.nomic.ai/map/d8e02f68-501e-4a25-9060-502710c082ce/b55cdca3-3ff4-454e-8f1b-a73ebc22cdff\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Query-Response Map for ECE408FA23: https://atlas.nomic.ai/map/d8e02f68-501e-4a25-9060-502710c082ce/b55cdca3-3ff4-454e-8f1b-a73ebc22cdff]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-31 19:55:47.813 | INFO | nomic.project:create_index:1111 - Created map `ECE408FA23_qr_index` in project `Query-Response Map for ECE408FA23`: https://atlas.nomic.ai/map/d8e02f68-501e-4a25-9060-502710c082ce/ff7276fc-942f-45cd-a199-e19a6e941db1\n" - ] - }, - { - "data": { - "text/html": [ - "Atlas Projection ECE408FA23_qr_index. Status Topic Modeling. view online" - ], - "text/plain": [ - "ECE408FA23_qr_index: https://atlas.nomic.ai/map/d8e02f68-501e-4a25-9060-502710c082ce/ff7276fc-942f-45cd-a199-e19a6e941db1" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# create an Atlas project\n", - "project_name = \"Query-Response Map for \" + course\n", - "index_name = course + \"_qr_index\"\n", - "project = atlas.map_embeddings(embeddings=np.array(embeddings),\n", - " data=metadata,\n", - " id_field='id',\n", - " build_topic_model=True,\n", - " topic_label_field='query',\n", - " name=project_name,\n", - " colorable_fields=['query'])\n", - "print(project.maps)\n", - "\n", - "project.create_index(index_name, build_topic_model=True)\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 51, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1571\n" - ] - } - ], - "source": [ - "# cell for all course map creation\n", - "\n", - "response = supabase_client.table(\"llm-convo-monitor\").select(\"*\").execute()\n", - "data = response.data\n", - "print(len(data))" - ] - }, - { - "cell_type": "code", - "execution_count": 53, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "126" - ] - }, - "execution_count": 53, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df = pd.DataFrame(data)\n", - "course_names = df['course_name'].unique()\n", - "len(course_names)" - ] - }, - { - "cell_type": "code", - "execution_count": 54, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "course name: badm_550_ashley\n", - "(51, 1536)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:26:29.701 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for badm_550_ashley` in organization `dabholkar.asmita`\n", - "2023-08-30 15:26:31.242 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", - "2023-08-30 15:26:31.255 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", - "1it [00:00, 1.03it/s]\n", - "2023-08-30 15:26:32.239 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", - "2023-08-30 15:26:32.241 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", - "2023-08-30 15:26:33.498 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for badm_550_ashley` in project `Query-Response Map for badm_550_ashley`: https://atlas.nomic.ai/map/e7fe1faf-be7a-4ca7-b89b-22863fffe432/79d4480a-c4e5-4920-95cd-ed831d45f5d0\n", - "2023-08-30 15:26:33.500 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for badm_550_ashley: https://atlas.nomic.ai/map/e7fe1faf-be7a-4ca7-b89b-22863fffe432/79d4480a-c4e5-4920-95cd-ed831d45f5d0\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Query-Response Map for badm_550_ashley: https://atlas.nomic.ai/map/e7fe1faf-be7a-4ca7-b89b-22863fffe432/79d4480a-c4e5-4920-95cd-ed831d45f5d0]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:26:35.272 | INFO | nomic.project:create_index:1111 - Created map `badm_550_ashley_qr_index` in project `Query-Response Map for badm_550_ashley`: https://atlas.nomic.ai/map/e7fe1faf-be7a-4ca7-b89b-22863fffe432/2b997f7c-0084-4db7-8e9a-76eeb62d715b\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "course name: ece120\n", - "(298, 1536)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:26:42.765 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for ece120` in organization `dabholkar.asmita`\n", - "2023-08-30 15:26:43.831 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", - "2023-08-30 15:26:43.850 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", - "1it [00:01, 1.20s/it]\n", - "2023-08-30 15:26:45.059 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", - "2023-08-30 15:26:45.063 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", - "2023-08-30 15:26:46.221 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for ece120` in project `Query-Response Map for ece120`: https://atlas.nomic.ai/map/f35d2617-d029-4a29-ba4a-27a82088be24/0c8e9404-be65-4780-bba3-e3b44f14de9b\n", - "2023-08-30 15:26:46.230 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for ece120: https://atlas.nomic.ai/map/f35d2617-d029-4a29-ba4a-27a82088be24/0c8e9404-be65-4780-bba3-e3b44f14de9b\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Query-Response Map for ece120: https://atlas.nomic.ai/map/f35d2617-d029-4a29-ba4a-27a82088be24/0c8e9404-be65-4780-bba3-e3b44f14de9b]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:26:48.040 | INFO | nomic.project:create_index:1111 - Created map `ece120_qr_index` in project `Query-Response Map for ece120`: https://atlas.nomic.ai/map/f35d2617-d029-4a29-ba4a-27a82088be24/61277149-b12d-4b59-8bcd-e9dd29fc58a4\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "course name: badm-567-v3\n", - "(27, 1536)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:26:52.367 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for badm-567-v3` in organization `dabholkar.asmita`\n", - "2023-08-30 15:26:53.227 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", - "2023-08-30 15:26:53.236 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", - "1it [00:00, 1.07it/s]\n", - "2023-08-30 15:26:54.177 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", - "2023-08-30 15:26:54.185 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", - "2023-08-30 15:26:55.379 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for badm-567-v3` in project `Query-Response Map for badm-567-v3`: https://atlas.nomic.ai/map/022b4521-24df-401d-beff-bba4f881590f/adabe0a1-d147-462a-8992-72af89cb5ae2\n", - "2023-08-30 15:26:55.379 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for badm-567-v3: https://atlas.nomic.ai/map/022b4521-24df-401d-beff-bba4f881590f/adabe0a1-d147-462a-8992-72af89cb5ae2\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Query-Response Map for badm-567-v3: https://atlas.nomic.ai/map/022b4521-24df-401d-beff-bba4f881590f/adabe0a1-d147-462a-8992-72af89cb5ae2]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:26:57.012 | INFO | nomic.project:create_index:1111 - Created map `badm-567-v3_qr_index` in project `Query-Response Map for badm-567-v3`: https://atlas.nomic.ai/map/022b4521-24df-401d-beff-bba4f881590f/e6c9798b-c154-43e7-917e-dd5cb71f116f\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "course name: new-weather\n", - "(98, 1536)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:27:02.087 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for new-weather` in organization `dabholkar.asmita`\n", - "2023-08-30 15:27:03.117 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", - "2023-08-30 15:27:03.125 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", - "1it [00:00, 1.07it/s]\n", - "2023-08-30 15:27:04.071 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", - "2023-08-30 15:27:04.071 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", - "2023-08-30 15:27:05.459 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for new-weather` in project `Query-Response Map for new-weather`: https://atlas.nomic.ai/map/48db10b9-0a6e-4c43-83f7-f7fa2b406257/fd200444-86cb-4b1c-9be6-ebd0262e2428\n", - "2023-08-30 15:27:05.461 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for new-weather: https://atlas.nomic.ai/map/48db10b9-0a6e-4c43-83f7-f7fa2b406257/fd200444-86cb-4b1c-9be6-ebd0262e2428\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Query-Response Map for new-weather: https://atlas.nomic.ai/map/48db10b9-0a6e-4c43-83f7-f7fa2b406257/fd200444-86cb-4b1c-9be6-ebd0262e2428]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:27:07.001 | INFO | nomic.project:create_index:1111 - Created map `new-weather_qr_index` in project `Query-Response Map for new-weather`: https://atlas.nomic.ai/map/48db10b9-0a6e-4c43-83f7-f7fa2b406257/ff717c79-50cd-468b-9fcc-b391c8c167df\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "course name: gies-online-mba-v2\n", - "(52, 1536)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:27:10.946 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for gies-online-mba-v2` in organization `dabholkar.asmita`\n", - "2023-08-30 15:27:11.862 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", - "2023-08-30 15:27:11.868 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", - "1it [00:00, 1.33it/s]\n", - "2023-08-30 15:27:12.630 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", - "2023-08-30 15:27:12.634 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", - "2023-08-30 15:27:13.627 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for gies-online-mba-v2` in project `Query-Response Map for gies-online-mba-v2`: https://atlas.nomic.ai/map/8699b44f-8e2a-40e6-9562-0d2fc2898c3d/87144fbd-48a3-4d00-9136-ddc177685aa1\n", - "2023-08-30 15:27:13.627 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for gies-online-mba-v2: https://atlas.nomic.ai/map/8699b44f-8e2a-40e6-9562-0d2fc2898c3d/87144fbd-48a3-4d00-9136-ddc177685aa1\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Query-Response Map for gies-online-mba-v2: https://atlas.nomic.ai/map/8699b44f-8e2a-40e6-9562-0d2fc2898c3d/87144fbd-48a3-4d00-9136-ddc177685aa1]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:27:15.546 | INFO | nomic.project:create_index:1111 - Created map `gies-online-mba-v2_qr_index` in project `Query-Response Map for gies-online-mba-v2`: https://atlas.nomic.ai/map/8699b44f-8e2a-40e6-9562-0d2fc2898c3d/db13ea7a-f93d-4f97-b922-c51216d3d6e9\n", - "2023-08-30 15:27:15,670:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:27:15,673:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", - "2023-08-30 15:27:20,003:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:27:20,003:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "course name: frontend\n", - "(24, 1536)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:27:28.373 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for frontend` in organization `dabholkar.asmita`\n", - "2023-08-30 15:27:29.396 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", - "2023-08-30 15:27:29.405 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", - "1it [00:00, 1.11it/s]\n", - "2023-08-30 15:27:30.325 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", - "2023-08-30 15:27:30.325 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", - "2023-08-30 15:27:31.539 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for frontend` in project `Query-Response Map for frontend`: https://atlas.nomic.ai/map/3a8ff45a-ca2d-4750-ab1b-ea033d35580d/e427ecf0-339a-41bf-8f9a-dc7a2930ac70\n", - "2023-08-30 15:27:31.542 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for frontend: https://atlas.nomic.ai/map/3a8ff45a-ca2d-4750-ab1b-ea033d35580d/e427ecf0-339a-41bf-8f9a-dc7a2930ac70\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Query-Response Map for frontend: https://atlas.nomic.ai/map/3a8ff45a-ca2d-4750-ab1b-ea033d35580d/e427ecf0-339a-41bf-8f9a-dc7a2930ac70]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:27:33.028 | INFO | nomic.project:create_index:1111 - Created map `frontend_qr_index` in project `Query-Response Map for frontend`: https://atlas.nomic.ai/map/3a8ff45a-ca2d-4750-ab1b-ea033d35580d/006903b0-bb82-4432-9975-ff7c9ca80af9\n", - "2023-08-30 15:27:33,166:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:27:33,166:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", - "2023-08-30 15:27:37,279:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:27:37,281:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", - "2023-08-30 15:27:41,477:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:27:41,481:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "course name: ECE220FA23\n", - "(193, 1536)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:27:50.988 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for ECE220FA23` in organization `dabholkar.asmita`\n", - "2023-08-30 15:27:51.867 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", - "2023-08-30 15:27:51.878 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", - "1it [00:01, 1.01s/it]\n", - "2023-08-30 15:27:52.904 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", - "2023-08-30 15:27:52.908 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", - "2023-08-30 15:27:53.929 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for ECE220FA23` in project `Query-Response Map for ECE220FA23`: https://atlas.nomic.ai/map/29b6f987-ee46-40be-808e-3d79a56d1b04/6b9b60f6-e2b7-4863-911c-fe84a63b35a1\n", - "2023-08-30 15:27:53.929 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for ECE220FA23: https://atlas.nomic.ai/map/29b6f987-ee46-40be-808e-3d79a56d1b04/6b9b60f6-e2b7-4863-911c-fe84a63b35a1\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Query-Response Map for ECE220FA23: https://atlas.nomic.ai/map/29b6f987-ee46-40be-808e-3d79a56d1b04/6b9b60f6-e2b7-4863-911c-fe84a63b35a1]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:27:55.645 | INFO | nomic.project:create_index:1111 - Created map `ECE220FA23_qr_index` in project `Query-Response Map for ECE220FA23`: https://atlas.nomic.ai/map/29b6f987-ee46-40be-808e-3d79a56d1b04/d8163c57-a2e8-41ca-90fc-043c8a9469b3\n", - "2023-08-30 15:27:55,758:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:27:55,759:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", - "2023-08-30 15:27:59,841:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:27:59,841:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "course name: Snowmass\n", - "(23, 1536)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:28:08.067 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for Snowmass` in organization `dabholkar.asmita`\n", - "2023-08-30 15:28:09.006 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", - "2023-08-30 15:28:09.014 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", - "1it [00:00, 1.20it/s]\n", - "2023-08-30 15:28:09.854 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", - "2023-08-30 15:28:09.858 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", - "2023-08-30 15:28:10.994 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for Snowmass` in project `Query-Response Map for Snowmass`: https://atlas.nomic.ai/map/aa415361-5d23-44ba-ada2-252ed1dfe9a0/51de6990-9d60-4cc4-9a8e-46bc7de97a0e\n", - "2023-08-30 15:28:10.994 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for Snowmass: https://atlas.nomic.ai/map/aa415361-5d23-44ba-ada2-252ed1dfe9a0/51de6990-9d60-4cc4-9a8e-46bc7de97a0e\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Query-Response Map for Snowmass: https://atlas.nomic.ai/map/aa415361-5d23-44ba-ada2-252ed1dfe9a0/51de6990-9d60-4cc4-9a8e-46bc7de97a0e]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:28:12.968 | INFO | nomic.project:create_index:1111 - Created map `Snowmass_qr_index` in project `Query-Response Map for Snowmass`: https://atlas.nomic.ai/map/aa415361-5d23-44ba-ada2-252ed1dfe9a0/4fdea35b-cda2-434e-afd1-e46e01430a97\n", - "2023-08-30 15:28:13,066:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:28:13,068:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", - "2023-08-30 15:28:17,200:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:28:17,200:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", - "2023-08-30 15:28:21,297:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:28:21,297:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "course name: NPRE247\n", - "(54, 1536)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:28:29.951 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for NPRE247` in organization `dabholkar.asmita`\n", - "2023-08-30 15:28:31.043 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", - "2023-08-30 15:28:31.051 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", - "1it [00:01, 1.64s/it]\n", - "2023-08-30 15:28:32.709 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", - "2023-08-30 15:28:32.714 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", - "2023-08-30 15:28:33.787 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for NPRE247` in project `Query-Response Map for NPRE247`: https://atlas.nomic.ai/map/3ae13050-5e00-4f2b-9dd4-f05e98945405/d06b7c47-850c-48f4-a5f9-984fbb0ca424\n", - "2023-08-30 15:28:33.790 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for NPRE247: https://atlas.nomic.ai/map/3ae13050-5e00-4f2b-9dd4-f05e98945405/d06b7c47-850c-48f4-a5f9-984fbb0ca424\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Query-Response Map for NPRE247: https://atlas.nomic.ai/map/3ae13050-5e00-4f2b-9dd4-f05e98945405/d06b7c47-850c-48f4-a5f9-984fbb0ca424]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:28:35.347 | INFO | nomic.project:create_index:1111 - Created map `NPRE247_qr_index` in project `Query-Response Map for NPRE247`: https://atlas.nomic.ai/map/3ae13050-5e00-4f2b-9dd4-f05e98945405/03471048-46aa-473c-b599-0bc812c679c0\n", - "2023-08-30 15:28:35,479:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:28:35,484:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", - "2023-08-30 15:28:39,590:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:28:39,594:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "course name: your-awesome-course\n", - "(30, 1536)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:28:50.102 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for your-awesome-course` in organization `dabholkar.asmita`\n", - "2023-08-30 15:28:51.013 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", - "2023-08-30 15:28:51.022 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", - "1it [00:00, 1.10it/s]\n", - "2023-08-30 15:28:51.943 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", - "2023-08-30 15:28:51.945 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", - "2023-08-30 15:28:52.904 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for your-awesome-course` in project `Query-Response Map for your-awesome-course`: https://atlas.nomic.ai/map/dd5dfe40-c810-4402-9684-a43783ea6e52/468942cc-d46a-45a2-82a6-11fac2f2be78\n", - "2023-08-30 15:28:52.907 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for your-awesome-course: https://atlas.nomic.ai/map/dd5dfe40-c810-4402-9684-a43783ea6e52/468942cc-d46a-45a2-82a6-11fac2f2be78\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Query-Response Map for your-awesome-course: https://atlas.nomic.ai/map/dd5dfe40-c810-4402-9684-a43783ea6e52/468942cc-d46a-45a2-82a6-11fac2f2be78]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:28:54.405 | INFO | nomic.project:create_index:1111 - Created map `your-awesome-course_qr_index` in project `Query-Response Map for your-awesome-course`: https://atlas.nomic.ai/map/dd5dfe40-c810-4402-9684-a43783ea6e52/e58f20e3-fa19-4c4c-8764-a185e0691c85\n", - "2023-08-30 15:28:54,549:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:28:54,549:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", - "2023-08-30 15:28:58,646:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:28:58,653:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "course name: pract\n", - "(44, 1536)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:29:07.007 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for pract` in organization `dabholkar.asmita`\n", - "2023-08-30 15:29:08.243 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", - "2023-08-30 15:29:08.251 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", - "1it [00:01, 1.11s/it]\n", - "2023-08-30 15:29:09.368 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", - "2023-08-30 15:29:09.368 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", - "2023-08-30 15:29:10.392 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for pract` in project `Query-Response Map for pract`: https://atlas.nomic.ai/map/15b51f99-fe4b-41ec-97fd-5579884e39fb/669c8862-db60-41ad-94a9-fd815e1fdcd8\n", - "2023-08-30 15:29:10.392 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for pract: https://atlas.nomic.ai/map/15b51f99-fe4b-41ec-97fd-5579884e39fb/669c8862-db60-41ad-94a9-fd815e1fdcd8\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Query-Response Map for pract: https://atlas.nomic.ai/map/15b51f99-fe4b-41ec-97fd-5579884e39fb/669c8862-db60-41ad-94a9-fd815e1fdcd8]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:29:12.031 | INFO | nomic.project:create_index:1111 - Created map `pract_qr_index` in project `Query-Response Map for pract`: https://atlas.nomic.ai/map/15b51f99-fe4b-41ec-97fd-5579884e39fb/b15570eb-4db4-4b6f-9b4d-e80309d2dcb3\n", - "2023-08-30 15:29:12,113:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:29:12,115:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", - "2023-08-30 15:29:16,201:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:29:16,209:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", - "2023-08-30 15:29:20,282:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:29:20,285:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "course name: ece120FL22\n", - "(53, 1536)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:29:28.994 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for ece120FL22` in organization `dabholkar.asmita`\n", - "2023-08-30 15:29:29.838 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", - "2023-08-30 15:29:29.846 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", - "1it [00:00, 1.18it/s]\n", - "2023-08-30 15:29:30.708 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", - "2023-08-30 15:29:30.710 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", - "2023-08-30 15:29:31.828 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for ece120FL22` in project `Query-Response Map for ece120FL22`: https://atlas.nomic.ai/map/8e74c555-3192-451b-b4a1-2c86587df88b/888e5d82-2e4d-46ba-b920-915dc31b835b\n", - "2023-08-30 15:29:31.828 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for ece120FL22: https://atlas.nomic.ai/map/8e74c555-3192-451b-b4a1-2c86587df88b/888e5d82-2e4d-46ba-b920-915dc31b835b\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Query-Response Map for ece120FL22: https://atlas.nomic.ai/map/8e74c555-3192-451b-b4a1-2c86587df88b/888e5d82-2e4d-46ba-b920-915dc31b835b]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:29:33.354 | INFO | nomic.project:create_index:1111 - Created map `ece120FL22_qr_index` in project `Query-Response Map for ece120FL22`: https://atlas.nomic.ai/map/8e74c555-3192-451b-b4a1-2c86587df88b/eb4e553d-ecd3-4b11-9d75-468108ab08e2\n", - "2023-08-30 15:29:33,458:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:29:33,458:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", - "2023-08-30 15:29:37,544:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:29:37,545:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", - "2023-08-30 15:29:41,634:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:29:41,642:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "course name: Law794-TransactionalDraftingAlam\n", - "(21, 1536)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:29:49.618 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for Law794-TransactionalDraftingAlam` in organization `dabholkar.asmita`\n", - "2023-08-30 15:29:50.718 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", - "2023-08-30 15:29:50.731 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", - "1it [00:01, 1.11s/it]\n", - "2023-08-30 15:29:51.849 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", - "2023-08-30 15:29:51.851 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", - "2023-08-30 15:29:53.034 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for Law794-TransactionalDraftingAlam` in project `Query-Response Map for Law794-TransactionalDraftingAlam`: https://atlas.nomic.ai/map/c4328a33-8958-462e-ba30-50f342736dae/0da00b97-bf94-4805-935e-458fb5c3d76f\n", - "2023-08-30 15:29:53.034 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for Law794-TransactionalDraftingAlam: https://atlas.nomic.ai/map/c4328a33-8958-462e-ba30-50f342736dae/0da00b97-bf94-4805-935e-458fb5c3d76f\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Query-Response Map for Law794-TransactionalDraftingAlam: https://atlas.nomic.ai/map/c4328a33-8958-462e-ba30-50f342736dae/0da00b97-bf94-4805-935e-458fb5c3d76f]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:29:54.605 | INFO | nomic.project:create_index:1111 - Created map `Law794-TransactionalDraftingAlam_qr_index` in project `Query-Response Map for Law794-TransactionalDraftingAlam`: https://atlas.nomic.ai/map/c4328a33-8958-462e-ba30-50f342736dae/9792fd4e-2196-4e39-bded-cc2bfd42abbf\n", - "2023-08-30 15:29:54,728:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:29:54,731:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", - "2023-08-30 15:29:58,804:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:29:58,804:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "course name: NCSA\n", - "(84, 1536)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:30:07.528 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for NCSA` in organization `dabholkar.asmita`\n", - "2023-08-30 15:30:08.422 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", - "2023-08-30 15:30:08.431 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", - "1it [00:00, 1.09it/s]\n", - "2023-08-30 15:30:09.361 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", - "2023-08-30 15:30:09.361 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", - "2023-08-30 15:30:10.325 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for NCSA` in project `Query-Response Map for NCSA`: https://atlas.nomic.ai/map/237ff7ed-c987-4259-96e0-3c5ed6e21207/8f176512-6a1e-4c8f-9d94-5794b1d7e619\n", - "2023-08-30 15:30:10.325 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for NCSA: https://atlas.nomic.ai/map/237ff7ed-c987-4259-96e0-3c5ed6e21207/8f176512-6a1e-4c8f-9d94-5794b1d7e619\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Query-Response Map for NCSA: https://atlas.nomic.ai/map/237ff7ed-c987-4259-96e0-3c5ed6e21207/8f176512-6a1e-4c8f-9d94-5794b1d7e619]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:30:11.917 | INFO | nomic.project:create_index:1111 - Created map `NCSA_qr_index` in project `Query-Response Map for NCSA`: https://atlas.nomic.ai/map/237ff7ed-c987-4259-96e0-3c5ed6e21207/2ce836ec-557b-4037-9ebd-d3e8982c0926\n", - "2023-08-30 15:30:12,004:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:30:12,004:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", - "2023-08-30 15:30:16,092:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:30:16,092:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", - "2023-08-30 15:30:20,157:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:30:20,164:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "course name: NCSADelta\n", - "(22, 1536)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:30:28.362 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for NCSADelta` in organization `dabholkar.asmita`\n", - "2023-08-30 15:30:29.318 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", - "2023-08-30 15:30:29.326 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", - "1it [00:00, 1.10it/s]\n", - "2023-08-30 15:30:30.246 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", - "2023-08-30 15:30:30.251 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", - "2023-08-30 15:30:31.253 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for NCSADelta` in project `Query-Response Map for NCSADelta`: https://atlas.nomic.ai/map/f0c381d5-e2e9-41fb-807a-ae61bc818122/78d29eb7-be96-47c1-a856-b684c6419e34\n", - "2023-08-30 15:30:31.254 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for NCSADelta: https://atlas.nomic.ai/map/f0c381d5-e2e9-41fb-807a-ae61bc818122/78d29eb7-be96-47c1-a856-b684c6419e34\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Query-Response Map for NCSADelta: https://atlas.nomic.ai/map/f0c381d5-e2e9-41fb-807a-ae61bc818122/78d29eb7-be96-47c1-a856-b684c6419e34]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:30:33.104 | INFO | nomic.project:create_index:1111 - Created map `NCSADelta_qr_index` in project `Query-Response Map for NCSADelta`: https://atlas.nomic.ai/map/f0c381d5-e2e9-41fb-807a-ae61bc818122/b6e64fef-a829-435f-89b5-ed1a44c05202\n", - "2023-08-30 15:30:33,214:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:30:33,214:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", - "2023-08-30 15:30:37,289:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:30:37,290:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", - "2023-08-30 15:30:41,376:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:30:41,382:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "course name: NuclGPT-v1\n", - "(25, 1536)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:30:49.297 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for NuclGPT-v1` in organization `dabholkar.asmita`\n", - "2023-08-30 15:30:50.216 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", - "2023-08-30 15:30:50.222 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", - "1it [00:00, 1.23it/s]\n", - "2023-08-30 15:30:51.043 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", - "2023-08-30 15:30:51.043 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", - "2023-08-30 15:30:52.360 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for NuclGPT-v1` in project `Query-Response Map for NuclGPT-v1`: https://atlas.nomic.ai/map/76043322-073c-4d8c-8c90-8e563da8564f/335097b0-a80d-4f38-8559-c6075a46a8b2\n", - "2023-08-30 15:30:52.360 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for NuclGPT-v1: https://atlas.nomic.ai/map/76043322-073c-4d8c-8c90-8e563da8564f/335097b0-a80d-4f38-8559-c6075a46a8b2\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Query-Response Map for NuclGPT-v1: https://atlas.nomic.ai/map/76043322-073c-4d8c-8c90-8e563da8564f/335097b0-a80d-4f38-8559-c6075a46a8b2]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:30:53.900 | INFO | nomic.project:create_index:1111 - Created map `NuclGPT-v1_qr_index` in project `Query-Response Map for NuclGPT-v1`: https://atlas.nomic.ai/map/76043322-073c-4d8c-8c90-8e563da8564f/441b7ff6-00ef-47f3-98a9-e45d327a1414\n" - ] - } - ], - "source": [ - "low_volume_courses = []\n", - "high_volume_courses = []\n", - "for course in course_names:\n", - " if course is None or course == 'ece408':\n", - " continue\n", - " \n", - " user_queries = []\n", - " metadata = []\n", - " i = 1\n", - " course_df = df[df['course_name'] == course]['convo']\n", - " for convo in course_df: # iterate through all conversations in a course\n", - " messages = convo['messages']\n", - "\n", - " # form query-response pairs out of the messages\n", - " pair_check = 0\n", - " for message in messages:\n", - " if message['role'] == 'user' and pair_check == 0:\n", - " query = message['content']\n", - " metadata_row = {'course': course, 'query': message['content']}\n", - " #print(\"metadata row: \", metadata_row)\n", - " pair_check += 1\n", - " \n", - " if message['role'] == 'assistant' and pair_check == 1:\n", - " metadata_row['response'] = message['content']\n", - " metadata_row['id'] = i \n", - " #print(\"response metadata row: \", metadata_row)\n", - " i += 1\n", - " pair_check += 1\n", - "\n", - " if pair_check == 2:\n", - " # one conversation pair is complete\n", - " user_queries.append(query)\n", - " metadata.append(metadata_row)\n", - " pair_check = 0\n", - " # after iterating every conversation in a course, create the map\n", - " if len(user_queries) < 20:\n", - " low_volume_courses.append(course)\n", - " continue\n", - "\n", - " if len(user_queries) > 500:\n", - " high_volume_courses.append(course)\n", - " continue\n", - " \n", - " metadata = pd.DataFrame(metadata)\n", - " embeddings = embeddings_model.embed_documents(user_queries)\n", - " embeddings = np.array(embeddings)\n", - " print(\"course name: \", course)\n", - " print(embeddings.shape)\n", - "\n", - " # create an Atlas project\n", - " project_name = \"Query-Response Map for \" + course\n", - " index_name = course + \"_qr_index\"\n", - " project = atlas.map_embeddings(embeddings=np.array(embeddings),\n", - " data=metadata,\n", - " id_field='id',\n", - " build_topic_model=True,\n", - " topic_label_field='query',\n", - " name=project_name,\n", - " colorable_fields=['query'])\n", - " print(project.maps)\n", - "\n", - " project.create_index(index_name, build_topic_model=True)\n", - "\n", - " \n" - ] - }, - { - "cell_type": "code", - "execution_count": 55, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "low volume courses: ['test-video-ingest', 'badm-567', 'test-video-ingest-28', 'pdeploy999', 'badm-350-summer', 'previewtesting1', 'localtest2', 'your-favorite-url', 'mantine', 'test-video-ingest-2', 'badm_567_v2', 'erpnext', 'mip', 'farmdoc_test_kastan-v1', 'personalstatement', 'hrc', 'csv', 'star_nox', 'badm_567', 'SPaRCEd', 'localdemo8', 'badm_567_thumbnails', 'chatbot', 'erp', 'extreme', 'rohan_atree', 'zotero-extreme', 'test-video-ingest-20', 'gies-online-mba2', 'gies-online-mba', 'careerassistant', 'weather', 'lillian-wang-blog', 'local-test5', 'demo-for-vyriad', 'ag-gpt-beta', 'rtest', 'previewdeploy', 'r2test', 'personal-statement', 'rohan_excel', 'langchain-python', 'langchain', 'ncsa-live-demo', 'rohan_atree_individual', 'meta11-test', 'HealthyLivingGuide', 'rohan', 'babreu', 'test-video-ingest-31', 'p', 'test-video-ingest-17', 'summary', 'test-video-ingest-3', 'test-video-ingest-27', 'lillian-wang-blog-2', 'python-magic', 'ansible2', 'ece408fa23', 'farmdoc_test_josh_v2', 'local-test3', 'automata', 'SpaceFlorida-GT', 'GBSI-GT', 'newnew_ncsa', 'canvas', 'gbsi-gt', 'meditation-tutorial', 'profit', 'ansible', 'langchain-docs', 'testing_url_metadata_josh', 'test-india-biodiversity', 'vyriad', 'irc-workplans', 'kastanasdfasdfasdf', 'BADM-567-GT', 'mdt', 'vercel', 'gies-graduate-hub', 'test-video-ingest-12', 'test-video-ingest-13', 'Gies-graduate-hub', 'test_new_supa_scrape', 'doe-ascr-2023', 'arize', 'final-meta-test', 'preview-meta-test', 'gies-online-mba-V3', 'FoF-Drawdown-from-INVPEIV-5-24-23', 'FIN574-GT', 'test-video-ingest-30', 'test', 'NCSA-v2', 'conversational', 'clowder-docs', 'DA', 'test-video-ingest-21', 'test-video-ingest-25', 'test-ingest-10', 'eric-test-course', 'farmdoc-test', 'test-video-ingest-22', 'Academic-paper', 'starwars', 'AdamDemo']\n", - "high volume courses: ['gpt4', 'ECE408FA23']\n" - ] - } - ], - "source": [ - "print(\"low volume courses: \", low_volume_courses)\n", - "print(\"high volume courses: \", high_volume_courses)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.11" - }, - "orig_nbformat": 4 - }, - "nbformat": 4, - "nbformat_minor": 2 -} From c22399025e54a18ed2378a00448b454fa4fd816a Mon Sep 17 00:00:00 2001 From: star-nox Date: Mon, 11 Sep 2023 13:47:40 -0500 Subject: [PATCH 33/61] changed method to POST --- ai_ta_backend/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ai_ta_backend/main.py b/ai_ta_backend/main.py index a456828a..645b3732 100644 --- a/ai_ta_backend/main.py +++ b/ai_ta_backend/main.py @@ -395,7 +395,7 @@ def nomic_map(): response.headers.add('Access-Control-Allow-Origin', '*') return response -@app.route('/onResponseCompletion', methods=['GET']) +@app.route('/onResponseCompletion', methods=['POST']) def logToNomic(): course_name: str = request.args.get('course_name', default='', type=str) conversation: str = request.args.get('conversation', default='', type=str) From a53a5447ba8c816574edf4973fb93b5c12dcb10f Mon Sep 17 00:00:00 2001 From: star-nox Date: Mon, 11 Sep 2023 13:57:23 -0500 Subject: [PATCH 34/61] added print statements on endpoint --- ai_ta_backend/main.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ai_ta_backend/main.py b/ai_ta_backend/main.py index 645b3732..2a056787 100644 --- a/ai_ta_backend/main.py +++ b/ai_ta_backend/main.py @@ -401,6 +401,9 @@ def logToNomic(): conversation: str = request.args.get('conversation', default='', type=str) print("In /onResponseCompletion") + print("course_name: ", course_name) + print("conversation: ", conversation) + if course_name == '' or conversation == '': # proper web error "400 Bad request" abort( From be3d5810a715965d6316bd739ed802e9dc6ef042 Mon Sep 17 00:00:00 2001 From: star-nox Date: Mon, 11 Sep 2023 14:08:06 -0500 Subject: [PATCH 35/61] added get_json() --- ai_ta_backend/main.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ai_ta_backend/main.py b/ai_ta_backend/main.py index 2a056787..0dc1e496 100644 --- a/ai_ta_backend/main.py +++ b/ai_ta_backend/main.py @@ -404,6 +404,8 @@ def logToNomic(): print("course_name: ", course_name) print("conversation: ", conversation) + print(request.get_json()) + if course_name == '' or conversation == '': # proper web error "400 Bad request" abort( From 61068f897a79e7456de4ee7c88bc642708ad4083 Mon Sep 17 00:00:00 2001 From: star-nox Date: Mon, 11 Sep 2023 14:52:57 -0500 Subject: [PATCH 36/61] added get_json() --- ai_ta_backend/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ai_ta_backend/main.py b/ai_ta_backend/main.py index 0dc1e496..5e05684a 100644 --- a/ai_ta_backend/main.py +++ b/ai_ta_backend/main.py @@ -404,7 +404,7 @@ def logToNomic(): print("course_name: ", course_name) print("conversation: ", conversation) - print(request.get_json()) + print("print json: ", request.get_json()) if course_name == '' or conversation == '': # proper web error "400 Bad request" From dc28ca97d98b4cf87845b97d9085c02c6100f2c6 Mon Sep 17 00:00:00 2001 From: star-nox Date: Mon, 11 Sep 2023 14:59:56 -0500 Subject: [PATCH 37/61] investigate data --- ai_ta_backend/main.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/ai_ta_backend/main.py b/ai_ta_backend/main.py index 5e05684a..6f60d388 100644 --- a/ai_ta_backend/main.py +++ b/ai_ta_backend/main.py @@ -405,6 +405,15 @@ def logToNomic(): print("conversation: ", conversation) print("print json: ", request.get_json()) + data = request.get_json() + print(len(data)) + print(type(data)) + + course_name = data['course_name'] + conversation = data['conversation'] + + print("course_name: ", course_name) + print("conversation: ", conversation) if course_name == '' or conversation == '': # proper web error "400 Bad request" From f60e8764f8e0298bdc4dd4c2a35aa25e7263660b Mon Sep 17 00:00:00 2001 From: star-nox Date: Mon, 11 Sep 2023 15:08:41 -0500 Subject: [PATCH 38/61] investigate data --- ai_ta_backend/main.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ai_ta_backend/main.py b/ai_ta_backend/main.py index 6f60d388..081d5e4c 100644 --- a/ai_ta_backend/main.py +++ b/ai_ta_backend/main.py @@ -423,10 +423,10 @@ def logToNomic(): f"Missing one or more required parameters: 'course_name' and 'conversation' must be provided. Course name: `{course_name}`, Conversation: `{conversation}`" ) - conversation_json = json.loads(conversation) + #conversation_json = json.loads(conversation) # background execution of tasks!! - response = executor.submit(log_convo_to_nomic, course_name, conversation_json) + response = executor.submit(log_convo_to_nomic, course_name, data) response = jsonify(response) response.headers.add('Access-Control-Allow-Origin', '*') return response From b8cc4710cbf001f59076d7228e486cd30ca01315 Mon Sep 17 00:00:00 2001 From: star-nox Date: Mon, 11 Sep 2023 15:19:06 -0500 Subject: [PATCH 39/61] removed jsonify(response) --- ai_ta_backend/main.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/ai_ta_backend/main.py b/ai_ta_backend/main.py index 081d5e4c..41f9ce5a 100644 --- a/ai_ta_backend/main.py +++ b/ai_ta_backend/main.py @@ -427,9 +427,10 @@ def logToNomic(): # background execution of tasks!! response = executor.submit(log_convo_to_nomic, course_name, data) - response = jsonify(response) - response.headers.add('Access-Control-Allow-Origin', '*') - return response + #response = jsonify(response) + #response.headers.add('Access-Control-Allow-Origin', '*') + print(response) + return "response" if __name__ == '__main__': From 7b046d6557fa1fbf6982e3aab2b9e200a04bd486 Mon Sep 17 00:00:00 2001 From: star-nox Date: Mon, 11 Sep 2023 15:41:11 -0500 Subject: [PATCH 40/61] added print statements in create_map() --- ai_ta_backend/main.py | 6 +++--- ai_ta_backend/nomic_logging.py | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/ai_ta_backend/main.py b/ai_ta_backend/main.py index 41f9ce5a..1f2ee238 100644 --- a/ai_ta_backend/main.py +++ b/ai_ta_backend/main.py @@ -404,7 +404,7 @@ def logToNomic(): print("course_name: ", course_name) print("conversation: ", conversation) - print("print json: ", request.get_json()) + # print("print json: ", request.get_json()) data = request.get_json() print(len(data)) print(type(data)) @@ -412,8 +412,8 @@ def logToNomic(): course_name = data['course_name'] conversation = data['conversation'] - print("course_name: ", course_name) - print("conversation: ", conversation) + # print("course_name: ", course_name) + # print("conversation: ", conversation) if course_name == '' or conversation == '': # proper web error "400 Bad request" diff --git a/ai_ta_backend/nomic_logging.py b/ai_ta_backend/nomic_logging.py index 99893b3d..6afbdaec 100644 --- a/ai_ta_backend/nomic_logging.py +++ b/ai_ta_backend/nomic_logging.py @@ -26,7 +26,7 @@ def log_convo_to_nomic(course_name: str, conversation) -> str: user_email = conversation['conversation']['user_email'] conversation_id = conversation['conversation']['id'] - print("conversation: ", conversation) + #print("conversation: ", conversation) # we have to upload whole conversations # check what the fetched data looks like - pandas df or pyarrow table @@ -142,6 +142,7 @@ def create_nomic_map(course_name: str, log_data: list): 2. appends current embeddings and metadata to it 2. creates map if there are at least 20 queries """ + print("in create_nomic_map()") # initialize supabase supabase_client = supabase.create_client( # type: ignore supabase_url=os.getenv('SUPABASE_URL'), # type: ignore From 1f67a10251dabc4cee1b8f8ab59c30c8e7088bf0 Mon Sep 17 00:00:00 2001 From: star-nox Date: Mon, 11 Sep 2023 15:47:53 -0500 Subject: [PATCH 41/61] added print statements in create_map() --- ai_ta_backend/nomic_logging.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ai_ta_backend/nomic_logging.py b/ai_ta_backend/nomic_logging.py index 6afbdaec..972ff6bf 100644 --- a/ai_ta_backend/nomic_logging.py +++ b/ai_ta_backend/nomic_logging.py @@ -202,7 +202,7 @@ def create_nomic_map(course_name: str, log_data: list): "id": i, "user_email": log_user_email, "first_query": log_messages[0]['content']} metadata.append(metadata_row) - print(len(metadata)) + print("length of metadata: ", len(metadata)) metadata = pd.DataFrame(metadata) embeddings_model = OpenAIEmbeddings() # type: ignore @@ -211,6 +211,7 @@ def create_nomic_map(course_name: str, log_data: list): # create Atlas project project_name = NOMIC_MAP_NAME_PREFIX + course_name index_name = course_name + "_convo_index" + print("project_name: ", project_name) project = atlas.map_embeddings(embeddings=np.array(embeddings), data=metadata, # type: ignore -- this is actually the correc type, the function signature from Nomic is incomplete id_field='id', build_topic_model=True, topic_label_field='first_query', name=project_name, colorable_fields=['conversation_id', 'first_query']) From 2ca6d79b75b8b68a459e424904ebea3da6df4efc Mon Sep 17 00:00:00 2001 From: star-nox Date: Mon, 11 Sep 2023 15:57:17 -0500 Subject: [PATCH 42/61] added print statements in create_map() --- ai_ta_backend/nomic_logging.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ai_ta_backend/nomic_logging.py b/ai_ta_backend/nomic_logging.py index 972ff6bf..07271e57 100644 --- a/ai_ta_backend/nomic_logging.py +++ b/ai_ta_backend/nomic_logging.py @@ -95,6 +95,7 @@ def log_convo_to_nomic(course_name: str, conversation) -> str: except Exception as e: # if project doesn't exist, create it + print(e) result = create_nomic_map(course_name, conversation) if result is None: print("Nomic map does not exist yet, probably because you have less than 20 queries on your project: ", e) From 530379d9a87b923be999aa262e5bf65f7f437455 Mon Sep 17 00:00:00 2001 From: star-nox Date: Mon, 11 Sep 2023 16:16:53 -0500 Subject: [PATCH 43/61] added more print statements in create_map() --- ai_ta_backend/nomic_logging.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ai_ta_backend/nomic_logging.py b/ai_ta_backend/nomic_logging.py index 07271e57..d5ca3c70 100644 --- a/ai_ta_backend/nomic_logging.py +++ b/ai_ta_backend/nomic_logging.py @@ -217,6 +217,7 @@ def create_nomic_map(course_name: str, log_data: list): id_field='id', build_topic_model=True, topic_label_field='first_query', name=project_name, colorable_fields=['conversation_id', 'first_query']) project.create_index(index_name, build_topic_model=True) + print("project: ", project) return f"Successfully created Nomic map for {course_name}" if __name__ == '__main__': From aeb6151e29882d0291ffd93f050b4b5e86197399 Mon Sep 17 00:00:00 2001 From: star-nox Date: Mon, 11 Sep 2023 16:44:00 -0500 Subject: [PATCH 44/61] added emoji to metadata --- ai_ta_backend/nomic_logging.py | 29 +- ai_ta_backend/nomic_map_creation.ipynb | 1556 ++++++++++++++++++++++++ 2 files changed, 1580 insertions(+), 5 deletions(-) create mode 100644 ai_ta_backend/nomic_map_creation.ipynb diff --git a/ai_ta_backend/nomic_logging.py b/ai_ta_backend/nomic_logging.py index d5ca3c70..7da46e89 100644 --- a/ai_ta_backend/nomic_logging.py +++ b/ai_ta_backend/nomic_logging.py @@ -21,6 +21,8 @@ def log_convo_to_nomic(course_name: str, conversation) -> str: 3. Keep current logic for map doesn't exist - update metadata """ print("in log_convo_to_nomic()") + + print("conversation: ", conversation) messages = conversation['conversation']['messages'] user_email = conversation['conversation']['user_email'] @@ -64,7 +66,12 @@ def log_convo_to_nomic(course_name: str, conversation) -> str: # select the last 2 messages and append new convo to prev convo messages_to_be_logged = messages[-2:] for message in messages_to_be_logged: - prev_convo += "\n>>> " + message['role'] + ": " + message['content'] + "\n" + if message['role'] == 'user': + emoji = "🙋" + else: + emoji = "🤖" + + prev_convo += "\n>>> " + emoji + message['role'] + ": " + message['content'] + "\n" # update metadata metadata = [{"course": course_name, "conversation": prev_convo, "conversation_id": conversation_id, @@ -79,7 +86,7 @@ def log_convo_to_nomic(course_name: str, conversation) -> str: user_queries.append(first_message) for message in messages: - conversation_string += "\n>>> " + message['role'] + ": " + message['content'] + "\n" + conversation_string += "\n>>> " + emoji + message['role'] + ": " + message['content'] + "\n" metadata = [{"course": course_name, "conversation": conversation_string, "conversation_id": conversation_id, "id": last_id+1, "user_email": user_email, "first_query": first_message}] @@ -177,15 +184,23 @@ def create_nomic_map(course_name: str, log_data: list): # create metadata for multi-turn conversation conversation = "" + if message['role'] == 'user': + emoji = "🙋" + else: + emoji = "🤖" for message in messages: # string of role: content, role: content, ... - conversation += "\n>>> " + message['role'] + ": " + message['content'] + "\n" + conversation += "\n>>> " + emoji + message['role'] + ": " + message['content'] + "\n" # append current chat to previous chat if convo already exists if convo['id'] == log_conversation_id: conversation_exists = True + if m['role'] == 'user': + emoji = "🙋" + else: + emoji = "🤖" for m in log_messages: - conversation += "\n>>> " + m['role'] + ": " + m['content'] + "\n" + conversation += "\n>>> " + emoji + m['role'] + ": " + m['content'] + "\n" # add to metadata metadata_row = {"course": row['course_name'], "conversation": conversation, "conversation_id": convo['id'], @@ -198,7 +213,11 @@ def create_nomic_map(course_name: str, log_data: list): user_queries.append(log_messages[0]['content']) conversation = "" for message in log_messages: - conversation += "\n>>> " + message['role'] + ": " + message['content'] + "\n" + if message['role'] == 'user': + emoji = "🙋" + else: + emoji = "🤖" + conversation += "\n>>> " + emoji + message['role'] + ": " + message['content'] + "\n" metadata_row = {"course": course_name, "conversation": conversation, "conversation_id": log_conversation_id, "id": i, "user_email": log_user_email, "first_query": log_messages[0]['content']} metadata.append(metadata_row) diff --git a/ai_ta_backend/nomic_map_creation.ipynb b/ai_ta_backend/nomic_map_creation.ipynb new file mode 100644 index 00000000..23924157 --- /dev/null +++ b/ai_ta_backend/nomic_map_creation.ipynb @@ -0,0 +1,1556 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-09-07 11:57:29,274:INFO - Note: NumExpr detected 16 cores but \"NUMEXPR_MAX_THREADS\" not set, so enforcing safe limit of 8.\n", + "2023-09-07 11:57:29,274:INFO - NumExpr defaulting to 8 threads.\n" + ] + } + ], + "source": [ + "# import required libraries\n", + "\n", + "import os\n", + "import supabase\n", + "from nomic import atlas\n", + "from dotenv import load_dotenv\n", + "from langchain.embeddings import OpenAIEmbeddings\n", + "import numpy as np\n", + "import time\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# loading environment variables\n", + "\n", + "env_path = \"../.env\"\n", + "load_dotenv(dotenv_path=env_path)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# initialize supabase client\n", + "\n", + "url = os.environ.get(\"SUPABASE_URL\")\n", + "key = os.environ.get(\"SUPABASE_API_KEY\")\n", + "\n", + "supabase_client = supabase.create_client(url, key)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idcreated_atconvoconvo_idcourse_nameuser_email
052002023-09-07T17:03:47.705812+00:00{'id': 'abd2e044-fbff-455e-8c60-755cc7635182',...abd2e044-fbff-455e-8c60-755cc7635182cropwizardavd6@illinois.edu
152012023-09-07T17:05:25.863567+00:00{'id': '3e5d4861-b128-4c64-96ac-87c74f3217e5',...3e5d4861-b128-4c64-96ac-87c74f3217e5cropwizardavd6@illinois.edu
252162023-09-07T17:18:32.197379+00:00{'id': '43ee631a-cb58-43f5-b2af-a5b91b7585cd',...43ee631a-cb58-43f5-b2af-a5b91b7585cdcropwizardavd6@illinois.edu
352122023-09-07T17:16:34.267931+00:00{'id': '0129ea46-207f-47e3-be90-da143857000f',...0129ea46-207f-47e3-be90-da143857000fcropwizardavd6@illinois.edu
452172023-09-07T17:19:00.681823+00:00{'id': 'c6b4e4d8-4de7-4387-b4e9-411084dffea6',...c6b4e4d8-4de7-4387-b4e9-411084dffea6cropwizardavd6@illinois.edu
552232023-09-07T17:22:38.970643+00:00{'id': 'b5500763-7e7b-4b23-9031-cc320a51ccbf',...b5500763-7e7b-4b23-9031-cc320a51ccbfcropwizardavd6@illinois.edu
652272023-09-07T17:24:10.362647+00:00{'id': 'd410955f-4398-4869-b395-e6b659cc2d06',...d410955f-4398-4869-b395-e6b659cc2d06cropwizardavd6@illinois.edu
752092023-09-07T17:14:43.518617+00:00{'id': '0ecd2c05-772a-42aa-b29a-0a892bd0e9ab',...0ecd2c05-772a-42aa-b29a-0a892bd0e9abcropwizardavd6@illinois.edu
852222023-09-07T17:21:29.223343+00:00{'id': 'c82056a0-2d67-4ce8-82e3-86a30f1f6dc0',...c82056a0-2d67-4ce8-82e3-86a30f1f6dc0cropwizardavd6@illinois.edu
952242023-09-07T17:22:54.856839+00:00{'id': '2316bbd7-61f3-44aa-a79e-bb42bd688c47',...2316bbd7-61f3-44aa-a79e-bb42bd688c47cropwizardavd6@illinois.edu
1052262023-09-07T17:23:27.644745+00:00{'id': '66abfe85-bb04-456e-8709-89f9aafe5508',...66abfe85-bb04-456e-8709-89f9aafe5508cropwizardavd6@illinois.edu
1152282023-09-07T17:24:41.32465+00:00{'id': '175ad6b2-3bf2-4889-b2de-a18961ee8ecb',...175ad6b2-3bf2-4889-b2de-a18961ee8ecbcropwizardavd6@illinois.edu
1252322023-09-07T17:30:05.770146+00:00{'id': 'f9859e36-bf76-40ab-9413-91ef6663dbd6',...f9859e36-bf76-40ab-9413-91ef6663dbd6cropwizardavd6@illinois.edu
1352332023-09-07T17:30:52.749867+00:00{'id': 'bab32d0b-8e2b-4eaa-a46e-069be0d0c3a2',...bab32d0b-8e2b-4eaa-a46e-069be0d0c3a2cropwizardavd6@illinois.edu
1452342023-09-07T17:31:19.801611+00:00{'id': 'ecaf3228-78f3-49f7-b46d-3a5c3d5b62fd',...ecaf3228-78f3-49f7-b46d-3a5c3d5b62fdcropwizardavd6@illinois.edu
1552372023-09-07T17:36:14.68431+00:00{'id': 'edead825-12df-417c-af40-059e83067c69',...edead825-12df-417c-af40-059e83067c69cropwizardavd6@illinois.edu
1652382023-09-07T17:36:42.984907+00:00{'id': 'bc44d229-327a-452d-a386-8868216a1bd2',...bc44d229-327a-452d-a386-8868216a1bd2cropwizardavd6@illinois.edu
1752412023-09-07T17:37:22.134543+00:00{'id': 'ff7a1c27-e126-49db-be79-6deaefcffec3',...ff7a1c27-e126-49db-be79-6deaefcffec3cropwizardavd6@illinois.edu
1853042023-09-07T19:45:21.73541+00:00{'id': '6226b153-356a-408c-9483-49ef5808538c',...6226b153-356a-408c-9483-49ef5808538ccropwizardavd6@illinois.edu
1953052023-09-07T19:46:03.626639+00:00{'id': 'e9edae6b-b7e1-46a8-b5e8-6215890a2a01',...e9edae6b-b7e1-46a8-b5e8-6215890a2a01cropwizardavd6@illinois.edu
2053062023-09-07T19:46:36.076704+00:00{'id': 'b2116035-da7b-4136-878d-66a10098a756',...b2116035-da7b-4136-878d-66a10098a756cropwizardavd6@illinois.edu
2151952023-09-06T23:43:38.201481+00:00{'id': '543ee10e-faf0-47a8-bb1c-c040aec44ed1',...543ee10e-faf0-47a8-bb1c-c040aec44ed1cropwizarddabholkar.asmita@gmail.com
\n", + "
" + ], + "text/plain": [ + " id created_at \\\n", + "0 5200 2023-09-07T17:03:47.705812+00:00 \n", + "1 5201 2023-09-07T17:05:25.863567+00:00 \n", + "2 5216 2023-09-07T17:18:32.197379+00:00 \n", + "3 5212 2023-09-07T17:16:34.267931+00:00 \n", + "4 5217 2023-09-07T17:19:00.681823+00:00 \n", + "5 5223 2023-09-07T17:22:38.970643+00:00 \n", + "6 5227 2023-09-07T17:24:10.362647+00:00 \n", + "7 5209 2023-09-07T17:14:43.518617+00:00 \n", + "8 5222 2023-09-07T17:21:29.223343+00:00 \n", + "9 5224 2023-09-07T17:22:54.856839+00:00 \n", + "10 5226 2023-09-07T17:23:27.644745+00:00 \n", + "11 5228 2023-09-07T17:24:41.32465+00:00 \n", + "12 5232 2023-09-07T17:30:05.770146+00:00 \n", + "13 5233 2023-09-07T17:30:52.749867+00:00 \n", + "14 5234 2023-09-07T17:31:19.801611+00:00 \n", + "15 5237 2023-09-07T17:36:14.68431+00:00 \n", + "16 5238 2023-09-07T17:36:42.984907+00:00 \n", + "17 5241 2023-09-07T17:37:22.134543+00:00 \n", + "18 5304 2023-09-07T19:45:21.73541+00:00 \n", + "19 5305 2023-09-07T19:46:03.626639+00:00 \n", + "20 5306 2023-09-07T19:46:36.076704+00:00 \n", + "21 5195 2023-09-06T23:43:38.201481+00:00 \n", + "\n", + " convo \\\n", + "0 {'id': 'abd2e044-fbff-455e-8c60-755cc7635182',... \n", + "1 {'id': '3e5d4861-b128-4c64-96ac-87c74f3217e5',... \n", + "2 {'id': '43ee631a-cb58-43f5-b2af-a5b91b7585cd',... \n", + "3 {'id': '0129ea46-207f-47e3-be90-da143857000f',... \n", + "4 {'id': 'c6b4e4d8-4de7-4387-b4e9-411084dffea6',... \n", + "5 {'id': 'b5500763-7e7b-4b23-9031-cc320a51ccbf',... \n", + "6 {'id': 'd410955f-4398-4869-b395-e6b659cc2d06',... \n", + "7 {'id': '0ecd2c05-772a-42aa-b29a-0a892bd0e9ab',... \n", + "8 {'id': 'c82056a0-2d67-4ce8-82e3-86a30f1f6dc0',... \n", + "9 {'id': '2316bbd7-61f3-44aa-a79e-bb42bd688c47',... \n", + "10 {'id': '66abfe85-bb04-456e-8709-89f9aafe5508',... \n", + "11 {'id': '175ad6b2-3bf2-4889-b2de-a18961ee8ecb',... \n", + "12 {'id': 'f9859e36-bf76-40ab-9413-91ef6663dbd6',... \n", + "13 {'id': 'bab32d0b-8e2b-4eaa-a46e-069be0d0c3a2',... \n", + "14 {'id': 'ecaf3228-78f3-49f7-b46d-3a5c3d5b62fd',... \n", + "15 {'id': 'edead825-12df-417c-af40-059e83067c69',... \n", + "16 {'id': 'bc44d229-327a-452d-a386-8868216a1bd2',... \n", + "17 {'id': 'ff7a1c27-e126-49db-be79-6deaefcffec3',... \n", + "18 {'id': '6226b153-356a-408c-9483-49ef5808538c',... \n", + "19 {'id': 'e9edae6b-b7e1-46a8-b5e8-6215890a2a01',... \n", + "20 {'id': 'b2116035-da7b-4136-878d-66a10098a756',... \n", + "21 {'id': '543ee10e-faf0-47a8-bb1c-c040aec44ed1',... \n", + "\n", + " convo_id course_name \\\n", + "0 abd2e044-fbff-455e-8c60-755cc7635182 cropwizard \n", + "1 3e5d4861-b128-4c64-96ac-87c74f3217e5 cropwizard \n", + "2 43ee631a-cb58-43f5-b2af-a5b91b7585cd cropwizard \n", + "3 0129ea46-207f-47e3-be90-da143857000f cropwizard \n", + "4 c6b4e4d8-4de7-4387-b4e9-411084dffea6 cropwizard \n", + "5 b5500763-7e7b-4b23-9031-cc320a51ccbf cropwizard \n", + "6 d410955f-4398-4869-b395-e6b659cc2d06 cropwizard \n", + "7 0ecd2c05-772a-42aa-b29a-0a892bd0e9ab cropwizard \n", + "8 c82056a0-2d67-4ce8-82e3-86a30f1f6dc0 cropwizard \n", + "9 2316bbd7-61f3-44aa-a79e-bb42bd688c47 cropwizard \n", + "10 66abfe85-bb04-456e-8709-89f9aafe5508 cropwizard \n", + "11 175ad6b2-3bf2-4889-b2de-a18961ee8ecb cropwizard \n", + "12 f9859e36-bf76-40ab-9413-91ef6663dbd6 cropwizard \n", + "13 bab32d0b-8e2b-4eaa-a46e-069be0d0c3a2 cropwizard \n", + "14 ecaf3228-78f3-49f7-b46d-3a5c3d5b62fd cropwizard \n", + "15 edead825-12df-417c-af40-059e83067c69 cropwizard \n", + "16 bc44d229-327a-452d-a386-8868216a1bd2 cropwizard \n", + "17 ff7a1c27-e126-49db-be79-6deaefcffec3 cropwizard \n", + "18 6226b153-356a-408c-9483-49ef5808538c cropwizard \n", + "19 e9edae6b-b7e1-46a8-b5e8-6215890a2a01 cropwizard \n", + "20 b2116035-da7b-4136-878d-66a10098a756 cropwizard \n", + "21 543ee10e-faf0-47a8-bb1c-c040aec44ed1 cropwizard \n", + "\n", + " user_email \n", + "0 avd6@illinois.edu \n", + "1 avd6@illinois.edu \n", + "2 avd6@illinois.edu \n", + "3 avd6@illinois.edu \n", + "4 avd6@illinois.edu \n", + "5 avd6@illinois.edu \n", + "6 avd6@illinois.edu \n", + "7 avd6@illinois.edu \n", + "8 avd6@illinois.edu \n", + "9 avd6@illinois.edu \n", + "10 avd6@illinois.edu \n", + "11 avd6@illinois.edu \n", + "12 avd6@illinois.edu \n", + "13 avd6@illinois.edu \n", + "14 avd6@illinois.edu \n", + "15 avd6@illinois.edu \n", + "16 avd6@illinois.edu \n", + "17 avd6@illinois.edu \n", + "18 avd6@illinois.edu \n", + "19 avd6@illinois.edu \n", + "20 avd6@illinois.edu \n", + "21 dabholkar.asmita@gmail.com " + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# query data for one course for testing\n", + "course = 'cropwizard'\n", + "response = supabase_client.table(\"llm-convo-monitor\").select(\"*\").eq(\"course_name\", course).execute()\n", + "data = response.data\n", + "df = pd.DataFrame(data)\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 {'id': 'abd2e044-fbff-455e-8c60-755cc7635182',...\n", + "1 {'id': '3e5d4861-b128-4c64-96ac-87c74f3217e5',...\n", + "2 {'id': '43ee631a-cb58-43f5-b2af-a5b91b7585cd',...\n", + "3 {'id': '0129ea46-207f-47e3-be90-da143857000f',...\n", + "4 {'id': 'c6b4e4d8-4de7-4387-b4e9-411084dffea6',...\n", + "Name: convo, dtype: object" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "course_df = df[df['course_name'] == course]['convo']\n", + "course_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'role': 'assistant', 'content': \"The U.S. Environmental Protection Agency (EPA)'s mission is to protect human health and the environment. As part of this mission, the EPA is involved in efforts such as developing strategies to protect endangered and threatened species from potential impacts of agricultural practices, including the use of herbicides. For instance, the EPA has released a draft Herbicide Strategy for public comment, aimed at proposing early mitigations for more than 900 listed species and designated critical habitats to reduce potential impacts from the agricultural use of herbicides(1^,2^,3^,4^).\\n\\n1. University of Illinois Extension\\n2. EPA releases draft herbicide strategy\\n3. EPA releases draft herbicide strategy\\n4. extension.pdf, page: 3\", 'contexts': [{'base_url': 'https://extension.illinois.edu/newsletters/illinois-pesticide-review-newsletter/julyaugust-2023', 'course_name ': 'cropwizard', 'pagenumber': '', 'readable_filename': 'University of Illinois Extension', 's3_path': 'courses/cropwizard/University_of_Illinois_Extension.html', 'text': \". — The U.S. Environmental Protection Agency, in collaboration with the U.S. Department of Energy, is funding the creation of Thriving Communities Technical Assistance Centers to help rural and underserved communities access state and federal funding for environmental infrastructure…\\n\\n\\n \\nAddress health challenges with holistic solutions\\nSeptember 1, 2023\\n\\nURBANA, Ill. — The University of Illinois, along with the Interdisciplinary Health Sciences Institute, and in collaboration with Illinois Extension, has developed the Autumn Health Picks 2023 webinar series. This series is part of the Community Seminar Series, and it provides an opportunity for…\\n\\n\\n \\nDo artificial roosts help bats? Illinois experts say more research needed\\nSeptember 1, 2023\\n\\nURBANA, Ill.\\xa0— Artificial roosts for bats come in many forms — bat boxes, condos, bark mimics, clay roosts, and cinder block structures, to name a few — but a new conservation practice and policy article from researchers at the\\xa0University of Illinois Urbana-Champaign\\xa0suggests the structures…\\n\\n\\nMore news\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\n\\nUpcoming Events\\n\\n\\n\\n\\n \\n\\nRead Before You Sign: Renting & Leasing \\n\\n\\nSeptember 6, 2023\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\nTechnology & Soil Health Field Day\\n\\n\\nSeptember 6, 2023\\n\\n\\nCounty\\n\\nHenry\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\nPollinator Plants to Span the Season\\n\\n\\nSeptember 6, 2023\\n\\n\\nCounty\\n\\nMacoupin\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\nOrr Beef Research Center Field Day\\n\\n\\nSeptember 6, 2023\\n\\n\\n\\n\\n\\n\\nMore Events\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nSubtitle\\nOpens the Door to New Opportunities\\n\\n\\n\\n\\nTitle\\nLearn @ Illinois Extension\\n\\n\\n\\n\\nBody\\nThere is so much you want to do, but the challenges of time, distance, and cost create barriers to achieving those goals. You need a program that's flexible to your schedule.\\xa0Learn @ Illinois Extension\\xa0helps remove those challenge by offering\\xa0flexible online learning programs that meet your personal interests and continuing education requirements. We provide learning on your terms so you can be who you were meant to be.\\xa0\\n\\n\\n\\nOnline Courses\\n\\n\\n\\n\\n\\n \\n\\n\\nLatest Podcast\\n\\n\\n\\n\\nGood Growing\\n\\n\\nGardenbite: Three tips for a healthier lawn | #GoodGrowingThis week on the Good Growing podcast Chris shares a Gardenbite of when retired horticulture educator Richard Hentschel visited the show in 2021 to talk about fall lawn care. During the show, Richard spoke about three things we could all do to reduce our lawn inputs.\\xa0Want to see or...\\n\\n\\n Your browser does not support iframes, but you can visit \\n\\n\\n\\n\\n\\nMore Podcasts\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\n\\nDig Deeper\\n\\n\\nTopics we address\\n\\n4-H Youth Development\\nBeef Cattle\\nClimate\\nCommunity Gardens\\nCommunity Planning\\nCottage Food\\nCrops\\nDisasters\\nEconomic Development\\nEnergy\\nEnvironmental Quality\\nFamily\\nFinances\\nFirewood\\nFlowers\\nFood\\nForestry\\nHealth\\nHemp\\nHerbs\\nHome Vegetable Gardening\\nIllinois Grasses\\nInsects\\nInvasives\\nLivestock\\nLocal Food Systems and Small Farms\\nLocal Government Education\\nMental Health\\nMushrooms\\nNatural Resources\\nPlant Problems\\nPlants\\nRainfall Management\\nSoil\\nSpecialty Crops\\nVaccines\\nWeather\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nShare with us\\n\\n\\n \\n\\nBody\\n\\n\\n\\n\\xa0\\n\\n\\xa0\\n\\xa0\\n\\n\\n\\xa0\\n\\n\\nView this profile on Instagram\\n\\n\\xa0\\n\\n\\n\\xa0\\n\\xa0\\n\\xa0\\n\\n\\n\\xa0\\n\\xa0\\n\\n\\n\\xa0\\n\\xa0\\n\\xa0\\n\\n\\n\\n\\xa0\\n\\xa0\\n\\nIllinois Extension (@ilextension) • Instagram photos and videos\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\nBody\\n\\xa0\\n\\nUniversity of Illinois Extension\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\n\\n\\nVolunteer with Extension\\nLearn Something New\\nRead a Blog\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nIllinois Extension\\n\\n 101 Mumford Hall (MC-710)\\n \\n1301 W\", 'url': 'https://extension.illinois.edu/'}, {'base_url': 'https://extension.illinois.edu/newsletters/illinois-pesticide-review-newsletter/julyaugust-2023', 'course_name ': 'cropwizard', 'pagenumber': '', 'readable_filename': 'EPA releases draft herbicide strategy', 's3_path': 'courses/cropwizard/EPA_releases_draft_herbicide_strategy.html', 'text': '. The draft framework describes how EPA would apply the mitigations in the Strategy compared to mitigations in the other initiatives.\\nWhat can you do? Submit comments! Learn more!\\nThe draft herbicide framework and accompanying documents are available in docket EPA-HQ-OPP-2023-0365 for public comment for 60 days.\\xa0Comments are due September 22, 2023. Agricultural pesticide users are encouraged to learn about EPA’s plan and to start thinking about how these mitigation measures could apply to herbicide use in their operation. While extensive recordkeeping is not currently required for the mitigation factors described in the strategy, it is highly recommended that users begin thinking about how to incorporate these new elements into their current record systems. If you are applying according to label directions, proper records can only assist your defense should you need it. To help guide you, watch for shared comments from professional organizations such as the Weed Science Society of America (WSSA). In April, a WSSA press release linked their comments to EPA and encouraged growers to act now to understand the impact of ESA’s new compliance initiatives. One good suggestion they offered to growers is to learn how to use EPA’s Bulletins Live! Two which is where important application instructions will be found.\\nEPA’s Office of Pesticide Programs will present a webinar on this draft herbicide Strategy on August 10th at Noon Central Time. EPA plans to walk through the framework and take questions from grower groups and other stakeholders. Register today. Questions may be submitted in advance of the webinar by emailing sm.opmp.pesticides@usda.gov.\\nTo learn more about EPA’s comprehensive ESA workplan Check out our article, “Change Coming to How EPA Protects Endangered Species from Pesticides – Feedback Needed” in the November/December 2022 issue of this newsletter. Proposed mitigation measures are discussed in more general terms in this comprehensive workplan. Please note that the comment period discussed there has ended.\\nVisit EPA’s website to learn more about how EPA’s pesticide program is protecting endangered species.\\nAdapted slightly from an EPA press release, “EPA Releases Draft Strategy to Better Protect Endangered Species from Herbicide Use” and related EPA documents. \\nABOUT THE AUTHOR: Michelle Wiesbrook\\xa0provides subject matter expertise and training in pesticide safety with an emphasis on horticultural weed science. She serves as the Illinois Pesticide Review newsletter editor, collecting and organizing material; and co-coordinates social media information for the PSEP program and ensures its timely publication.\\n\\nPesticide News\\n\\n\\n\\n\\nKeywords\\n\\nPesticide\\nHerbicide\\nInsecticide\\nFungicide\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nRelated Content\\n\\nUnintended herbicide injury on trees: A growing concernAugust 28, 2023\\n\\nTips to help employees succeedAugust 2, 2023\\n\\nParaquat certification valid 3 years: Are you due for training?August 2, 2023\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nIllinois Extension\\n\\n 101 Mumford Hall (MC-710)\\n \\n1301 W. Gregory Dr.\\n Urbana,\\n IL\\n 61801\\n \\nEmail: extension@illinois.edu\\n\\n\\n\\nInstagram\\nFacebook\\nTwitter\\nYouTube\\nLinkedIn\\n\\n\\n\\nCollege of Agricultural, Consumer & Environmental Sciences\\n\\n\\n\\n\\n\\nDig Deeper\\n\\n\\nTake an Online Course\\n\\n\\nRead a Blog\\n\\n\\nRead a Newsletter\\n\\n\\nListen to a Podcast\\n\\n\\nWatch a Video\\n\\n\\nBuy a Publication\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nExtension Network\\n\\n\\nEat.Move.Save.\\n\\n\\nIllinois 4-H\\n\\n\\nIllini Science Policy Program\\n\\n\\nIllinois Indiana Sea Grant\\n\\n\\nIllinois Master Gardeners\\n\\n\\nIllinois Master Naturalists\\n\\n\\nIllinois Nutrition Education Programs\\n\\n\\nPesticide Safety Education Program\\n\\n\\nResearch Centers\\n\\n\\nSafe Electricity\\n\\n\\nU of I Plant Clinic\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nAdditional links\\n\\nAbout Cookies\\nPrivacy Policy\\n© 2023 University of Illinois Board of Trustees\\nEEO\\nAccessibility\\nmyExtension\\nLogin', 'url': 'https://extension.illinois.edu/blogs/pesticide-news/2023-08-02-epa-releases-draft-herbicide-strategy-public-comment-period-open'}, {'base_url': 'https://extension.illinois.edu/newsletters/illinois-pesticide-review-newsletter/julyaugust-2023', 'course_name ': 'cropwizard', 'pagenumber': '', 'readable_filename': 'EPA releases draft herbicide strategy', 's3_path': 'courses/cropwizard/EPA_releases_draft_herbicide_strategy.html', 'text': \". The Strategy — which is primarily designed to provide early mitigations that minimize impacts to over 900 listed species — is one of EPA’s most significant proposals to help overcome these challenges.\\nEPA focused the Strategy on agricultural crop uses in the lower 48 states because hundreds of millions of pounds of herbicides (and plant growth regulators) are applied each year, which is substantially more than for non-agricultural uses of herbicides and for other pesticide classes (e.g., insecticides, fungicides). Additionally, hundreds of listed species in the lower 48 states live in habitats adjacent to agricultural areas. The proposed mitigations in the Strategy would address the most common ways that conventional agricultural herbicides might impact these listed species. More specifically, EPA developed potential mitigation options for conventional agricultural herbicides to reduce pesticide transport via spray drift and runoff/erosion that could result in exposure to listed plants and listed animals that depend on plants.\\nEPA expects that the Strategy will increase the efficiency of future ESA consultations on herbicides with the U.S. Fish and Wildlife Service (FWS), which has authority over most listed species that could benefit from the proposed mitigations. Under the Strategy, EPA proposes to identify and begin mitigating for potential impacts even before EPA completes ESA consultations. These early mitigations should expedite EPA’s ability to fully comply with the ESA by reducing impacts to listed species before EPA conducts most of its ESA analysis. Adopting mitigations earlier will also allow EPA and FWS to use their resources more efficiently in ESA consultations.\\nThe Strategy’s proposed mitigations to reduce spray drift, runoff, and erosion and thereby reduce the potential exposure reflect practices that can be readily implemented by growers and identified by pesticide applicators and that provide flexibility for growers to select the mitigations that work best for them. The Strategy also gives credit to landowners who are already implementing certain measures to reduce pesticide runoff. For example, existing vegetated ditches and water retention ponds will qualify for credits that reduce the need for additional mitigation. Similarly, the Strategy would require less mitigation on flat lands, which are less prone to runoff, and in many western states, which typically experience less rain to carry pesticides off fields. The Strategy also describes how the Agency could add other mitigation practices to the menu of mitigation options in the future, particularly to incorporate emerging technology or new information on the effectiveness of specific practices.\\nDraft Herbicide Framework Document\\nThe draft framework document titled, “Draft Herbicide Strategy Framework to Reduce Exposure of Federally Listed Endangered and Threatened Species and Designated Critical Habitats from the Use of Conventional Agricultural Herbicides” is 97 pages long and includes a discussion of both the proposed scope of the Herbicide Strategy and the proposed decision framework to determine the level of mitigation needed for a particular conventional agricultural herbicide. The draft framework document also includes examples of how the proposed herbicide mitigation would apply to some of the herbicides for which EPA has conducted case studies as well as EPA's proposed implementation plan.\\nSome of the accompanying documents are quite lengthy. The “Herbicide Strategy Case Study Summary and Process” is 666 pages!\\xa0 Coincidence on the number? I’m not sure. I haven’t made it through it all yet. The primary thing I gathered from perusing through the spreadsheet files was that managing these complexities must be a nightmare. The document, “Application of EPA’s Draft Herbicide Strategy Framework Through Scenarios that Represent Crop Production Systems” is only 17 pages long and includes possible scenarios. Examples 1 and 2 would be particularly fitting for Illinois corn and soybean producers. These are shared to help producers better understand how these mitigation practices may be used.\\nIn its ESA Workplan and ESA Workplan Update, EPA outlined this and other ESA initiatives to develop early mitigations that provide listed species with practical protections from pesticides. The Strategy complements those other initiatives, such as targeted mitigations for listed species particularly vulnerable to pesticides and Interim Ecological Mitigations that EPA has begun incorporating under the Federal Insecticide, Fungicide, and Rodenticide Act. The draft framework describes how EPA would apply the mitigations in the Strategy compared to mitigations in the other initiatives.\\nWhat can you do? Submit comments! Learn more!\\nThe draft herbicide framework and accompanying documents are available in docket EPA-HQ-OPP-2023-0365 for public comment for 60 days.\\xa0Comments are due September 22, 2023. Agricultural pesticide users are encouraged to learn about EPA’s plan and to start thinking about how these mitigation measures could apply to herbicide use in their operation. While extensive recordkeeping is not currently required for the mitigation factors described in the strategy, it is highly recommended that users begin thinking about how to incorporate these new elements into their current record systems\", 'url': 'https://extension.illinois.edu/blogs/pesticide-news/2023-08-02-epa-releases-draft-herbicide-strategy-public-comment-period-open'}, {'base_url': 'https://extension.illinois.edu/newsletters/illinois-pesticide-review-newsletter/julyaugust-2023', 'course_name ': 'cropwizard', 'pagenumber': '', 'readable_filename': 'EPA releases draft herbicide strategy', 's3_path': 'courses/cropwizard/EPA_releases_draft_herbicide_strategy.html', 'text': 'EPA releases draft herbicide strategy; public comment period open | Illinois Extension | UIUC\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n Skip to main content\\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\nCollege of Agricultural, Consumer & Environmental Sciences\\n\\nIllinois Extension\\n\\n\\n\\n\\n\\nGive\\nVolunteer\\nCareers\\n\\n\\n\\n\\nTopics\\n\\n\\nAll Topics\\n\\n\\nCottage Food\\n\\n\\nFood\\n\\n\\nForestry\\n\\n\\nLocal Government Education\\n\\n\\nPlants\\n\\n\\nRainfall Management\\n\\n\\nSoil\\n\\n\\nVaccines\\n\\n\\nVegetable Gardening\\n\\n\\n\\n\\nLearn\\n\\n\\nOnline Courses\\n\\n\\nBlogs\\n\\n\\nNewsletters\\n\\n\\nPodcasts\\n\\n\\nVideos\\n\\n\\nPublications\\n\\n\\nSummer Resources\\n\\n\\n\\n\\nEvents\\n\\n\\nStatewide Webinars\\n\\n\\n\\n\\nNews\\n\\n\\nConnect\\n\\n\\nContact Staff\\n\\n\\nFind an Office\\n\\n\\nSocial Media\\n\\n\\nAdministration and Educator Teams\\n\\n\\nCommunications and Information Technology\\n\\n\\nIllini Science Policy Program\\n\\n\\nIllinois Indiana Sea Grant\\n\\n\\nMaster Gardeners\\n\\n\\nMaster Naturalists\\n\\n\\nPlant Clinic\\n\\n\\nResearch and Education Centers\\n\\n\\nSea Grant\\n\\n\\nEnergy Education Council\\n\\n\\nHome and Community Education\\n\\n\\nPlanning, Reporting, and Evaluation\\n\\n\\n\\n\\nImpact\\n\\n\\n2024 Extension Collaboration Grants\\n\\n\\nEconomic and Functional Impact\\n\\n\\nOur Impact in Agriculture and AgriBusiness\\n\\n\\nSNAP-Education Impact\\n\\n\\nExtension Funded Research Projects\\n\\n\\nOur Impact in Agriculture and Natural Resources\\n\\n\\nOur Impact in Community & Economic Development\\n\\n\\nOur Impact in Family and Consumer Sciences\\n\\n\\nOur Impact in Integrated Health Disparities\\n\\n\\n\\n\\nAbout\\n\\n\\nStrategic Planning\\n\\n\\nExtension Councils\\n\\n\\nCareers\\n\\n\\nProfessional Associations\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nBreadcrumb\\n\\n\\nHome\\n\\n\\nBlogs\\n\\n\\nPesticide News\\n\\n\\n EPA releases draft herbicide strategy; public comment period open \\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nPesticide News\\n\\n\\nEPA releases draft herbicide strategy; public comment period open \\n\\n\\n\\n\\n\\n\\nAugust 2, 2023\\n\\n\\n\\nMichelle Wiesbrook\\n\\n\\n \\n\\nStrategy aims to increase efficiencies while supporting farmers, herbicide users with continued use of important pesticide tools. \\xa0\\nThe U.S. Environmental Protection Agency (EPA) released the draft Herbicide Strategy for public comment, a major milestone in the Agency’s work to protect federally endangered and threatened (listed) species from conventional agricultural herbicides. The Strategy describes proposed early mitigations for more than 900 listed species and designated critical habitats to reduce potential impacts from the agricultural use of these herbicides while helping to ensure the continued availability of these important pesticide tools.\\n“Ensuring safe use of herbicides is an important part of EPA’s mission to protect the environment,” said Deputy Assistant Administrator for Pesticide Programs for the Office of Chemical Safety and Pollution Prevention Jake Li. “This strategy reflects one of our biggest steps to support farmers and other herbicide users with tools for managing weeds, while accelerating EPA’s ability to protect many endangered species that live near agricultural areas.”\\nThe Strategy is part of EPA’s ongoing efforts to develop a multichemical, multispecies approach toward meeting its obligations under the Endangered Species Act (ESA). EPA’s traditional chemical-by-chemical, species-by-species approach to meeting these obligations is slow and costly.\\xa0 As a result, EPA has completed its ESA obligations for less than 5% of its actions, creating legal vulnerabilities for the Agency, increased litigation, and uncertainty for farmers and other pesticide users about their continued ability to use many pesticides. The Strategy — which is primarily designed to provide early mitigations that minimize impacts to over 900 listed species — is one of EPA’s most significant proposals to help overcome these challenges.\\nEPA focused the Strategy on agricultural crop uses in the lower 48 states because hundreds of millions of pounds of herbicides (and plant growth regulators) are applied each year, which is substantially more than for non-agricultural uses of herbicides and for other pesticide classes (e.g., insecticides, fungicides). Additionally, hundreds of listed species in the lower 48 states live in habitats adjacent to agricultural areas. The proposed mitigations in the Strategy would address the most common ways that conventional agricultural herbicides might impact these listed species', 'url': 'https://extension.illinois.edu/blogs/pesticide-news/2023-08-02-epa-releases-draft-herbicide-strategy-public-comment-period-open'}, {'base_url': 'https://extension.illinois.edu/newsletters/illinois-pesticide-review-newsletter/julyaugust-2023', 'course_name ': 'cropwizard', 'pagenumber': '', 'readable_filename': 'News', 's3_path': 'courses/cropwizard/News.html', 'text': \". — The U.S. Environmental Protection Agency, in collaboration with the U.S. Department of Energy, is funding the creation of Thriving Communities Technical Assistance Centers to help rural and underserved communities access state and federal funding for environmental infrastructure…\\n\\n\\n \\nAddress health challenges with holistic solutions\\nSeptember 1, 2023\\n\\nURBANA, Ill. — The University of Illinois, along with the Interdisciplinary Health Sciences Institute, and in collaboration with Illinois Extension, has developed the Autumn Health Picks 2023 webinar series. This series is part of the Community Seminar Series, and it provides an opportunity for…\\n\\n\\n \\nDo artificial roosts help bats? Illinois experts say more research needed\\nSeptember 1, 2023\\n\\nURBANA, Ill.\\xa0— Artificial roosts for bats come in many forms — bat boxes, condos, bark mimics, clay roosts, and cinder block structures, to name a few — but a new conservation practice and policy article from researchers at the\\xa0University of Illinois Urbana-Champaign\\xa0suggests the structures…\\n\\n\\nMore news\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\nBlock Reference\\n\\nNews\\n\\n\\n\\nBy topic\\n- Any -4-H4-H alumni4-H FoundationAccessibilityAdministrationAgingAgricultural safetyAgricultureAnimal scienceAnnie's ProjectAnnualsAutismBeefBeef CattleBeekeepingBeesBeneficial InsectsBirdsBrain healthBulbsBullyingBusiness developmentBusiness retention and expansionButterflyCampingCaregivingCensusCertified Crop AdvisorCertified Livestock ManagerChild nutritionChronic diseasesCitizen ScienceCivic engagementClimateCollege ReadinessCommercial agricultureCommercial Vegetable ProductionCommunicationCommunity developmentCommunity gardenCommunity healthCommunity planningCommunity resiliencyCompostingConservationConsumer economicsCornCover cropsCreditCrop diseaseCropsDairy CattleDebt managementDementia Alzheimer’s diseaseDiabetesDicambaDisaster preparednessDiversity Equity InclusionDowntown developmentDrainageDronesEarly childhoodEconomic developmentEDEN Ready BusinessEFNEPElder careEmergency foodEnergyEnergy conservationEnergy efficiencyEntomologyEntrepreneurshipEnvironmentEstate planningExpensesFacultyFamily lifeFarm business managementFarm safetyFarmers marketsFinancial ExploitationFinancial planningFinancial wellnessFlowersFood accessFood PreservationFood safetyFood sanitationForestryFruitsFungicideGardeningGrassesHayHealthHealth CareHealthy cookingHealthy eatingHempHerbicideHerbsHolidaysHome OwnershipHorticultureHouseplantsIdentity TheftInclusionINEPInformation TechnologyInsect PestsInsecticideInsects and pestsInsuranceIntegrated Health DisparitiesIntegrated pest managementInvasive speciesInvestingLandscape architectureLandscape designLawn careLeadershipLeadership developmentLife skillsLivestockLocal foods and small farmsLocal governmentManaging stressManure managementMarketingMaster GardenersMaster NaturalistMeeting ManagementMental healthMindfulnessMoney MentorsMyPINative plantsNavigating differenceNutritionNutrition educationObesity preventionOrnamentalOutdoor SkillsParentingPasturePerennialsPesticidePesticide LabelPhysical ActivityPlant ClinicPlant diseasePlant health carePollinator HabitatPondsPoultryPoverty simulationPrivate/Commercial ApplicatorProfessional Development CEU CPDUPSEP trainingReal ColorsRecyclingRelationshipsResilienceRoboticsRosesSafetyShooting sportsShrubsSmall farmsSmart MeterSNAP-EdSocial-emotional healthSoilSoybeansSpecialty CropsSpendingState 4-H OfficeSTEMSubstance UseSustainable agricultureSwineTaxesTeam buildingTeenagersTime managementTrauma informed Adverse Childhood ExperiencesTree fruitsTreesTurfUrban AgricultureUrban gardeningVegetable gardeningVegetablesVolunteersWaterWeatherWeedsWellnessWheatWhole grainsWildlifeWorkforce developmentWorkplace wellnessYouth and MoneyYouth development\\n\\n\\nSearch\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\n\\n\\nSeptember 6, 2023\\n\\nIllinois Extension selected to establish environmental assistance center to help Illinois communities\\n\\n \\n URBANA, Ill. — The U.S. Environmental Protection Agency, in collaboration with the U.S. Department of Energy, is funding the creation of Thriving Communities Technical Assistance Centers to help rural and underserved communities access state and federal funding for environmental...\\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\n\\n\\nSeptember 1, 2023\\n\\nAddress health challenges with holistic solutions\\n\\n \\n URBANA, Ill\", 'url': 'https://extension.illinois.edu/global/news-releases'}, {'base_url': 'https://extension.illinois.edu/newsletters/illinois-pesticide-review-newsletter/julyaugust-2023', 'course_name ': 'cropwizard', 'pagenumber': 4, 'readable_filename': 'extension.pdf', 's3_path': 'courses/cropwizard/extension.pdf', 'text': \"Illinois Pesticide Review \\nVolume 36, No. 4, July/August \\n4\\nproposed mitigations. Under the Strategy, EPA \\nproposes to identify and begin mitigating for \\npotential impacts even before EPA completes \\nESA consultations. These early mitigations \\nshould expedite EPAs ability to fully comply \\nwith the ESA by reducing impacts to listed \\nspecies before EPA conducts most of its ESA \\nanalysis. Adopting mitigations earlier will \\nalso allow EPA and FWS to use their resources \\nmore efficiently in ESA consultations.\\nThe Strategys proposed mitigations to reduce \\nspray drift, runoff, and erosion and thereby \\nreduce the potential exposure reflect practices \\nthat can be readily implemented by growers \\nand identified by pesticide applicators and \\nthat provide flexibility for growers to select \\nthe mitigations that work best for them. The \\nStrategy also gives credit to landowners who \\nare already implementing certain measures to \\nreduce pesticide runoff. For example, existing \\nvegetated ditches and water retention ponds \\nwill qualify for credits that reduce the need for \\nadditional mitigation. Similarly, the Strategy \\nwould require less mitigation on flat lands, \\nwhich are less prone to runoff, and in many \\nwestern states, which typically experience \\nless rain to carry pesticides off fields. The \\nStrategy also describes how the Agency could \\nadd other mitigation practices to the menu of \\nmitigation options in the future, particularly \\nto incorporate emerging technology or new \\ninformation on the effectiveness of specific \\npractices.\\nDraft Herbicide Framework \\nDocument\\nThe draft framework document titled, Draft \\nHerbicide Strategy Framework to Reduce \\nExposure of Federally Listed Endangered \\nand Threatened Species and Designated \\nCritical Habitats from the Use of Conventional \\nAgricultural Herbicides is 97 pages long and \\nincludes a discussion of both the proposed \\nscope of the Herbicide Strategy and the \\nproposed decision framework to determine \\nthe level of mitigation needed for a particular \\nconventional agricultural herbicide. The draft \\nframework document also includes examples \\nof how the proposed herbicide mitigation \\nwould apply to some of the herbicides for \\nwhich EPA has conducted case studies as well \\nas EPA's proposed implementation plan.\\nSome of the accompanying documents are \\nquite lengthy. The Herbicide Strategy Case \\nStudy Summary and Process is 666 pages! \\nCoincidence on the number? Im not sure. I \\nhavent made it through it all yet. The primary \\nthing I gathered from perusing through \\nthe spreadsheet files was that managing \\nthese complexities must be a nightmare. \\nThe document, Application of EPAs Draft \\nHerbicide Strategy Framework Through \\nScenarios that Represent Crop Production \\nSystems is only 17 pages long and includes \\npossible scenarios. Examples 1 and 2 would \\nbe particularly fitting for Illinois corn and \\nsoybean producers. These are shared to \\nhelp producers better understand how these \\nmitigation practices may be used. \\nIn its ESA Workplan and ESA Workplan \\nUpdate, EPA outlined this and other ESA \\ninitiatives to develop early mitigations \\nthat provide listed species with practical \\nprotections from pesticides. The Strategy \\ncomplements those other initiatives, such \\nas targeted mitigations for listed species \\nparticularly vulnerable to pesticides and \\nInterim Ecological Mitigations that EPA \\nhas begun incorporating under the Federal \\nInsecticide, Fungicide, and Rodenticide Act. \\nThe draft framework describes how EPA would \\napply the mitigations in the Strategy compared \\nto mitigations in the other initiatives. \\nWhat can you do? Submit \\ncomments! Learn more!\\nThe draft herbicide framework and \\naccompanying documents are available in \\ndocket EPA-HQ-OPP-2023-0365 for public \\ncomment for 60 days. Comments are due \\nSeptember 22, 2023. Agricultural pesticide \\nusers are encouraged to learn about EPAs\", 'url': 'https://extension.illinois.edu/sites/default/files/2023-08/IPR%20Volume%2036%20Issue%204%20July%20August%20SECURE.pdf'}, {'base_url': 'https://extension.illinois.edu/newsletters/illinois-pesticide-review-newsletter/julyaugust-2023', 'course_name ': 'cropwizard', 'pagenumber': 3, 'readable_filename': 'extension.pdf', 's3_path': 'courses/cropwizard/extension.pdf', 'text': 'Illinois Pesticide Review \\nVolume 36, No. 4, July/August \\n3\\nIts important to consider that one should \\ntake the test in the language he or she is \\nmost comfortable with. If someone has \\nbeen studying the material in English, dont \\nbe surprised if they opt to take the exam in \\nEnglish too. \\nIn the end, it all comes down to good \\ncommunication between you and your \\nemployees. It could be that they dont know \\nyet which learning method would work best \\nfor them and theyll need to try a few things. \\nTheyll appreciate you taking the time to ask \\nthem and work with them to help ensure their \\nsuccess.\\nMichelle Wiesbrook \\nEPA Releases Draft \\nHerbicide Strategy, Public \\nComment Period Open \\nStrategy aims to increase \\nefficiencies while supporting \\nfarmers, herbicide users with \\ncontinued use of important \\npesticide tools \\nThe U.S. Environmental Protection Agency \\n(EPA) released the draft Herbicide Strategy \\nfor public comment, a major milestone in the \\nAgencys work to protect federally endangered \\nand threatened (listed) species from conven-\\ntional agricultural herbicides. The Strategy \\ndescribes proposed early mitigations for more \\nthan 900 listed species and designated criti-\\ncal habitats to reduce potential impacts from \\nthe agricultural use of these herbicides while \\nhelping to ensure the continued availability of \\nthese important pesticide tools.\\nEnsuring safe use of herbicides is an \\nimportant part of EPAs mission to protect \\nthe environment, said Deputy Assistant \\nAdministrator for Pesticide Programs for \\nthe Office of Chemical Safety and Pollution \\nPrevention Jake Li. This strategy reflects one \\nof our biggest steps to support farmers and \\nother herbicide users with tools for managing \\nweeds, while accelerating EPAs ability to \\nprotect many endangered species that live near \\nagricultural areas.\\nThe Strategy is part of EPAs ongoing efforts \\nto develop a multichemical, multispecies \\napproach toward meeting its obligations \\nunder the Endangered Species Act (ESA). \\nEPAs traditional chemical-by-chemical, \\nspecies-by-species approach to meeting these \\nobligations is slow and costly. As a result, EPA \\nhas completed its ESA obligations for less than \\n5% of its actions, creating legal vulnerabilities \\nfor the Agency, increased litigation, and \\nuncertainty for farmers and other pesticide \\nusers about their continued ability to use many \\npesticides. The Strategy which is primarily \\ndesigned to provide early mitigations that \\nminimize impacts to over 900 listed species \\nis one of EPAs most significant proposals to \\nhelp overcome these challenges.\\nEPA focused the Strategy on agricultural crop \\nuses in the lower 48 states because hundreds \\nof millions of pounds of herbicides (and plant \\ngrowth regulators) are applied each year, \\nwhich is substantially more than for non-\\nagricultural uses of herbicides and for other \\npesticide classes (e.g., insecticides, fungicides). \\nAdditionally, hundreds of listed species in \\nthe lower 48 states live in habitats adjacent to \\nagricultural areas. The proposed mitigations \\nin the Strategy would address the most \\ncommon ways that conventional agricultural \\nherbicides might impact these listed \\nspecies. More specifically, EPA developed \\npotential mitigation options for conventional \\nagricultural herbicides to reduce pesticide \\ntransport via spray drift and runoff/erosion \\nthat could result in exposure to listed plants \\nand listed animals that depend on plants.\\nEPA expects that the Strategy will increase \\nthe efficiency of future ESA consultations \\non herbicides with the U.S. Fish and Wildlife \\nService (FWS), which has authority over most \\nlisted species that could benefit from the', 'url': 'https://extension.illinois.edu/sites/default/files/2023-08/IPR%20Volume%2036%20Issue%204%20July%20August%20SECURE.pdf'}, {'base_url': 'https://extension.illinois.edu/newsletters/illinois-pesticide-review-newsletter/julyaugust-2023', 'course_name ': 'cropwizard', 'pagenumber': '', 'readable_filename': 'News', 's3_path': 'courses/cropwizard/News.html', 'text': \". — The U.S. Environmental Protection Agency, in collaboration with the U.S. Department of Energy, is funding the creation of Thriving Communities Technical Assistance Centers to help rural and underserved communities access state and federal funding for environmental...\\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\n\\n\\nSeptember 1, 2023\\n\\nAddress health challenges with holistic solutions\\n\\n \\n URBANA, Ill. — The University of Illinois, along with the Interdisciplinary Health Sciences Institute, and in collaboration with Illinois Extension, has developed...\\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\n\\n\\nSeptember 1, 2023\\n\\nDo artificial roosts help bats? Illinois experts say more research needed\\n\\n \\n URBANA, Ill.\\xa0— Artificial roosts for bats come in many forms — bat boxes, condos, bark mimics, clay roosts, and cinder block structures, to name a few — but a new conservation practice and policy article from researchers at the\\xa0University of...\\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\n\\n\\nAugust 29, 2023\\n\\nButterflies can eat to live, live to eat in a balanced garden\\n\\n \\n URBANA, Ill. — A favorite thing about visiting gardens in the summer is catching sight of a butterfly enjoying nectar from a brightly colored zinnia or a monarch caterpillar munching on a milkweed leaf. When designing a butterfly garden, expand and balance plant selection to provide more than...\\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\n\\n\\nAugust 24, 2023\\n\\nField Day event plans to keep beef cattle producers up on trends for their herds\\n\\n \\n URBANA, Ill. — Beef cattle producers will gain insights and stay up to date on current research from cow/calf patterns to alternative forages and more at the Orr Beef Research Center's Field Day on September 6.\\xa0The meeting will be held at the John Wood Community College Ag Center located west of...\\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\n\\n\\nAugust 23, 2023\\n\\nBuild drought-tolerant gardens this fall for next summer’s blooms\\n\\n \\n URBANA, Ill. — Many Illinois gardens are entering the fall stressed from the lack of summer rains combined with scorching hot temperatures. These conditions easily stress some plants; however, many plants quickly adapt to hot, dry conditions. Drought-tolerant plants are not only tough and...\\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\n\\n\\nAugust 23, 2023\\n\\nIllinois Extension exhibits research, programs, innovation at 2023 Farm Progress Show\\n\\n \\n DECATUR, Ill. — The Farm Progress Show returns to Decatur, Aug. 29-31, and\\xa0University of Illinois Extension will be on-site in the College of...\\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\n\\n\\nAugust 21, 2023\\n\\nBuild privacy with plants for secret gardens\\n\\n \\n URBANA, Ill.\\xa0— Plants serve a lot of purposes in the landscape. One of which is to add some privacy. Screening plants can help define and give purpose to a space. Homeowners may wish to screen a particular area or transparency in the landscape, creating interest in what lies beyond.\\xa0\\n\\n...\\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\n\\n\\nAugust 21, 2023\\n\\nIllinois Extension investing in the future of farming across the state\\n\\n \\n URBANA, Ill. — Helping Illinois farmers grow thriving crops and livestock has always been at the heart of University of Illinois Extension’s mission. Using feedback received from farmers and other agricultural stakeholders through a 2022 survey,...\\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\n\\n\\nAugust 21, 2023\\n\\nExtraordinary 4-H volunteers honored\\n\\n \\n SPRINGFIELD, Ill\", 'url': 'https://extension.illinois.edu/global/news-releases'}, {'base_url': 'https://extension.illinois.edu/newsletters/illinois-pesticide-review-newsletter/julyaugust-2023', 'course_name ': 'cropwizard', 'pagenumber': '', 'readable_filename': 'Extension Funded Research Projects', 's3_path': 'courses/cropwizard/Extension_Funded_Research_Projects.html', 'text': '. Today’s most pressing issues are related to climate change. An understanding of how human activity impacts the environment helps us make sense of how a zoonotic transfer of a virus to humans causes a global pandemic, and how rising temperatures increase the frequency and intensity of wildfires and storms. Environmental Education (EE) connects people to their environments, instilling values and knowledge that leads to better environmental stewardship of local environments and natural resources. There are several well-established EE programs offered throughout Cook County by Illinois\\xa0Extension and the Forest Preserve of Cook County (FPCC). Missing from these opportunities are programs available to middle school-aged children, the age best suited for EE experiences because their environmental sensitivities are still developing and early experiences in nature have been shown to have impacts throughout adulthood (Boom, 2017). This proposal seeks to develop a program, Illinois Inquiry Adventures in Nature (IIAN), for middle school children and their families, suitable for small groups during the pandemic\\xa0and expanding in scope to include classrooms when safe. A series of four seasonal activities\\xa0and teacher workshops\\xa0will be created to bring groups to their local green spaces, including FPCC sites. Groups will engage in open-ended investigations based on their own observations and questions, complete activities at home\\xa0and enact local community conservation projects. Research will be conducted to examine how individuals’ connections to nature and environmental stewardship change over the course of their participation. This program fills a local need in Cook County, creating a continuum of opportunities across ages, and will be made available to all residents in Illinois, and nationwide, encouraging the next generation of environmental leaders.\\n\\n\\nAssessing the Needs and Connecting Young & Beginning Farmers with Extension Resources in Northern Illinois\\nAwarded to: Illinois Extension in the College of ACES\\nPrincipal Investigator: Joseph Malual\\nCo-Investigators:\\nNikki Keltner, Extension program coordinator, Illinois Extension\\nGrant McCarty, Extension educator, Illinois Extension\\nHope Michelson, assistant professor,\\xa0Department of Agricultural & Consumer Economics\\nPROJECT SUMMARY\\nMore and more young people are engaging in small-scale farming, with many focusing on specialty crops and sustainable agricultural production. Despite this trend, entry into farming, which is a complex business, is challenging. Beginning farmers face serious obstacles in accessing critical assets, including startup capital to acquire land, farm equipment\\xa0and agricultural technical knowledge needed to develop a\\xa0successful agricultural practice and profitable business. The situation is complicated by lack of adequate research to understand the unique challenges facing this generation of farmers. In Illinois, there is limited research to understand how people new to farming navigate access to critical resources. This research project aims to provide a comprehensive assessment of the needs and opportunities facing young and beginning\\xa0farmers in northern Illinois. We will identify and map farms owned by young and beginning farmers, examine their experiences and strategies used to leverage critical startup assets, including farmland and equipment, financial capital\\xa0and agricultural technical assistance, as well as strategies for marketing agricultural products. This project will build relations and connect this new audience with Extension resources, which can help\\xa0beginning farmers develop the knowledge and skills necessary for solving critical problems. Through interdisciplinary collaboration between Extension educators and specialists with faculty at the University of Illinois at Urbana-Champaign, this research will generate useful knowledge that can help beginning farmers, businesses\\xa0and communities make informed decisions and plan for future support of those new to farming. The\\xa0knowledge and practices discovered and identified through this project will be shared with Extension across the state. Extension educators can build on this knowledge to plan and deliver educational programming that empowers farmers to develop financially viable and sustainable farms. Those successful endeavors will, in turn, help to revitalize their rural communities.\\n\\n\\nNew Immigrant Foodways\\nAwarded to: Department of History in the College of Liberal Arts and Sciences\\nPrincipal Investigator: Teresa Barnes\\nCo-Investigators:\\nGisela Sin, director, Center for Latin American and Caribbean Studies\\nMargarita Teran-Garcia, Extension specialist, Illinois Extension\\nPROJECT SUMMARY\\nThis project will leverage new and existing research with immigrant communities about challenges and strategies in adapting home foodways to American food systems to create short instructional videos related to nutrition and cooking. The project addresses a complex issue at the intersection of three critical areas of Extension’s mission: food, health\\xa0and environment. It addresses the public need of new immigrant families to access information and expertise and develop sustainable strategies when faced with the bewildering array of often unhealthy food options in the USA', 'url': 'https://extension.illinois.edu/global/extension-funded-research-projects'}]}\n" + ] + } + ], + "source": [ + "print(course_df[0]['messages'][1])" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [], + "source": [ + "# user email is in DF, outside of convo" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Mapping Conversations to Nomic" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "22\n", + "22\n" + ] + } + ], + "source": [ + "user_queries = []\n", + "metadata = []\n", + "i = 1\n", + "\n", + "# log conversation instead of individual messages\n", + "for index, row in df.iterrows():\n", + " user_email = row['user_email']\n", + " convo = row['convo']\n", + " messages = convo['messages']\n", + " first_message = messages[0]['content']\n", + " user_queries.append(first_message)\n", + " # create metadata for multi-turn conversation\n", + " conversation = \"\"\n", + " for message in messages:\n", + " # string of role: content, role: content, ...\n", + " conversation += \"\\n>>> \" + message['role'] + \": \" + message['content'] + \"\\n\"\n", + " # add to metadata\n", + " metadata_row = {\"course\": row['course_name'], \"conversation\": conversation, \"conversation_id\": convo['id'], \n", + " \"id\": i, \"user_email\": user_email, \"first_query\": first_message}\n", + " metadata.append(metadata_row)\n", + " i += 1\n", + "\n", + "print(len(user_queries))\n", + "print(len(metadata))" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'course': 'cropwizard', 'conversation': '\\n>>> user: What type of pest is most harmful to corn crops in Illinois and how can I prevent it?\\n\\n>>> assistant: The most harmful pest to corn crops in Illinois is the corn rootworm. Corn rootworms, specifically the western corn rootworm (Diabrotica virgifera virgifera), are the most impactful species in Illinois and the North Central U.S. These pests can cause significant damage to corn plants by feeding on the roots, leading to reduced plant vigor and yield.\\n\\nTo prevent corn rootworm infestations, several measures can be taken:\\n\\n1. Crop rotation: Implementing a crop rotation strategy by alternating corn with non-host crops, such as soybeans, can help reduce corn rootworm populations. The larvae are not capable of surviving on soybean or other broadleaf plants, so rotating crops disrupts their life cycle.\\n\\n2. Utilize Bt hybrids: Planting corn hybrids that have been genetically modified to express Bacillus thuringiensis (Bt) proteins targeting corn rootworm can provide effective control. These Bt hybrids produce toxins that are toxic to the pests, reducing their feeding and damage to corn plants.\\n\\n3. Insecticide treatments: Seed-applied insecticides containing neonicotinoid or diamide insecticides can be used to protect corn seedlings from early-season pests, including corn rootworms. Soil-applied insecticides can also be applied at planting to control corn rootworms and other early-season pests.\\n\\n4. Monitoring and scouting: Regular monitoring of corn fields for signs of corn rootworm damage, such as root feeding and lodging, is essential. Scouting helps determine if pest populations have reached economic thresholds that require action, such as insecticide application.\\n\\n5. Integrated Pest Management (IPM): Implementing an IPM approach involves combining multiple pest management strategies, including cultural practices, biological control, and chemical control, to minimize the impact of corn rootworms and other pests. This holistic approach aims to reduce reliance on chemical treatments and promote sustainable pest management.\\n\\nIt is important to consult with local agricultural extension services, seed dealers, or agricultural experts for specific recommendations tailored to your region and field conditions to effectively manage corn rootworm infestations.\\n', 'conversation_id': '3e5d4861-b128-4c64-96ac-87c74f3217e5', 'id': 2, 'user_email': 'avd6@illinois.edu', 'first_query': 'What type of pest is most harmful to corn crops in Illinois and how can I prevent it?'}\n" + ] + } + ], + "source": [ + "print(metadata[1])" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(22, 1536)\n" + ] + } + ], + "source": [ + "# initialize langchain OpenAI embeddings model\n", + "embeddings_model = OpenAIEmbeddings()\n", + "metadata = pd.DataFrame(metadata)\n", + "embeddings = embeddings_model.embed_documents(user_queries)\n", + "embeddings = np.array(embeddings)\n", + "print(embeddings.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-09-07 14:53:44.900 | INFO | nomic.project:_create_project:779 - Creating project `Conversation Map for cropwizard` in organization `dabholkar.asmita`\n", + "2023-09-07 14:53:45.794 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", + "2023-09-07 14:53:45.798 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", + "1it [00:00, 1.07it/s]\n", + "2023-09-07 14:53:46.743 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", + "2023-09-07 14:53:46.744 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", + "2023-09-07 14:53:47.568 | INFO | nomic.project:create_index:1111 - Created map `Conversation Map for cropwizard` in project `Conversation Map for cropwizard`: https://atlas.nomic.ai/map/27ba0013-71ba-4fd6-a710-187d114442ab/f74b45b3-7213-45df-a4cb-c16ec9feb8b7\n", + "2023-09-07 14:53:47.570 | INFO | nomic.atlas:map_embeddings:139 - Conversation Map for cropwizard: https://atlas.nomic.ai/map/27ba0013-71ba-4fd6-a710-187d114442ab/f74b45b3-7213-45df-a4cb-c16ec9feb8b7\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Conversation Map for cropwizard: https://atlas.nomic.ai/map/27ba0013-71ba-4fd6-a710-187d114442ab/f74b45b3-7213-45df-a4cb-c16ec9feb8b7]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-09-07 14:53:48.872 | INFO | nomic.project:create_index:1111 - Created map `cropwizard_convo_index_2` in project `Conversation Map for cropwizard`: https://atlas.nomic.ai/map/27ba0013-71ba-4fd6-a710-187d114442ab/3336aa7f-5995-4f02-831b-7161fd0c0b71\n" + ] + }, + { + "data": { + "text/html": [ + "Atlas Projection cropwizard_convo_index_2. Status Topic Modeling. view online" + ], + "text/plain": [ + "cropwizard_convo_index_2: https://atlas.nomic.ai/map/27ba0013-71ba-4fd6-a710-187d114442ab/3336aa7f-5995-4f02-831b-7161fd0c0b71" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# create an Atlas project\n", + "project_name = \"Conversation Map for \" + course\n", + "index_name = course + \"_convo_index_2\"\n", + "project = atlas.map_embeddings(embeddings=np.array(embeddings),\n", + " data=metadata,\n", + " id_field='id',\n", + " build_topic_model=True,\n", + " topic_label_field='first_query',\n", + " name=project_name,\n", + " colorable_fields=['conversation_id', 'first_query'])\n", + "print(project.maps)\n", + "\n", + "project.create_index(index_name, build_topic_model=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Mapping Query-Response Pairs to Nomic" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2\n", + "user queries: 1\n", + "metadata 1\n", + "------------------------\n" + ] + } + ], + "source": [ + "user_queries = []\n", + "metadata = []\n", + "i = 1\n", + "for convo in course_df:\n", + " messages = convo['messages']\n", + " print(len(messages))\n", + " pair_check = 0\n", + " for message in messages:\n", + " if message['role'] == 'user' and pair_check == 0:\n", + " query = message['content']\n", + " metadata_row = {'course': course, 'query': message['content']}\n", + " #print(\"metadata row: \", metadata_row)\n", + " pair_check += 1\n", + " if message['role'] == 'assistant' and pair_check == 1:\n", + " metadata_row['response'] = message['content']\n", + " metadata_row['id'] = i \n", + " #print(\"response metadata row: \", metadata_row)\n", + " i += 1\n", + " pair_check += 1\n", + " if pair_check == 2:\n", + " # one conversation pair is complete\n", + " user_queries.append(query)\n", + " metadata.append(metadata_row)\n", + " pair_check = 0\n", + "\n", + " \n", + "print(\"user queries: \", len(user_queries))\n", + "print(\"metadata\", len(metadata))\n", + "print(\"------------------------\")\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(590, 1536)\n" + ] + } + ], + "source": [ + "# initialize langchain OpenAI embeddings model\n", + "embeddings_model = OpenAIEmbeddings()\n", + "metadata = pd.DataFrame(metadata)\n", + "embeddings = embeddings_model.embed_documents(user_queries)\n", + "embeddings = np.array(embeddings)\n", + "print(embeddings.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-31 19:55:40.276 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for ECE408FA23` in organization `dabholkar.asmita`\n", + "2023-08-31 19:55:41.466 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", + "2023-08-31 19:55:41.491 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", + "1it [00:02, 2.43s/it]\n", + "2023-08-31 19:55:43.932 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", + "2023-08-31 19:55:43.932 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", + "2023-08-31 19:55:45.475 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for ECE408FA23` in project `Query-Response Map for ECE408FA23`: https://atlas.nomic.ai/map/d8e02f68-501e-4a25-9060-502710c082ce/b55cdca3-3ff4-454e-8f1b-a73ebc22cdff\n", + "2023-08-31 19:55:45.480 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for ECE408FA23: https://atlas.nomic.ai/map/d8e02f68-501e-4a25-9060-502710c082ce/b55cdca3-3ff4-454e-8f1b-a73ebc22cdff\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Query-Response Map for ECE408FA23: https://atlas.nomic.ai/map/d8e02f68-501e-4a25-9060-502710c082ce/b55cdca3-3ff4-454e-8f1b-a73ebc22cdff]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-31 19:55:47.813 | INFO | nomic.project:create_index:1111 - Created map `ECE408FA23_qr_index` in project `Query-Response Map for ECE408FA23`: https://atlas.nomic.ai/map/d8e02f68-501e-4a25-9060-502710c082ce/ff7276fc-942f-45cd-a199-e19a6e941db1\n" + ] + }, + { + "data": { + "text/html": [ + "Atlas Projection ECE408FA23_qr_index. Status Topic Modeling. view online" + ], + "text/plain": [ + "ECE408FA23_qr_index: https://atlas.nomic.ai/map/d8e02f68-501e-4a25-9060-502710c082ce/ff7276fc-942f-45cd-a199-e19a6e941db1" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# create an Atlas project\n", + "project_name = \"Query-Response Map for \" + course\n", + "index_name = course + \"_qr_index\"\n", + "project = atlas.map_embeddings(embeddings=np.array(embeddings),\n", + " data=metadata,\n", + " id_field='id',\n", + " build_topic_model=True,\n", + " topic_label_field='query',\n", + " name=project_name,\n", + " colorable_fields=['query'])\n", + "print(project.maps)\n", + "\n", + "project.create_index(index_name, build_topic_model=True)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1571\n" + ] + } + ], + "source": [ + "# cell for all course map creation\n", + "\n", + "response = supabase_client.table(\"llm-convo-monitor\").select(\"*\").execute()\n", + "data = response.data\n", + "print(len(data))" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "126" + ] + }, + "execution_count": 53, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.DataFrame(data)\n", + "course_names = df['course_name'].unique()\n", + "len(course_names)" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "course name: badm_550_ashley\n", + "(51, 1536)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:26:29.701 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for badm_550_ashley` in organization `dabholkar.asmita`\n", + "2023-08-30 15:26:31.242 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", + "2023-08-30 15:26:31.255 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", + "1it [00:00, 1.03it/s]\n", + "2023-08-30 15:26:32.239 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", + "2023-08-30 15:26:32.241 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", + "2023-08-30 15:26:33.498 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for badm_550_ashley` in project `Query-Response Map for badm_550_ashley`: https://atlas.nomic.ai/map/e7fe1faf-be7a-4ca7-b89b-22863fffe432/79d4480a-c4e5-4920-95cd-ed831d45f5d0\n", + "2023-08-30 15:26:33.500 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for badm_550_ashley: https://atlas.nomic.ai/map/e7fe1faf-be7a-4ca7-b89b-22863fffe432/79d4480a-c4e5-4920-95cd-ed831d45f5d0\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Query-Response Map for badm_550_ashley: https://atlas.nomic.ai/map/e7fe1faf-be7a-4ca7-b89b-22863fffe432/79d4480a-c4e5-4920-95cd-ed831d45f5d0]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:26:35.272 | INFO | nomic.project:create_index:1111 - Created map `badm_550_ashley_qr_index` in project `Query-Response Map for badm_550_ashley`: https://atlas.nomic.ai/map/e7fe1faf-be7a-4ca7-b89b-22863fffe432/2b997f7c-0084-4db7-8e9a-76eeb62d715b\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "course name: ece120\n", + "(298, 1536)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:26:42.765 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for ece120` in organization `dabholkar.asmita`\n", + "2023-08-30 15:26:43.831 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", + "2023-08-30 15:26:43.850 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", + "1it [00:01, 1.20s/it]\n", + "2023-08-30 15:26:45.059 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", + "2023-08-30 15:26:45.063 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", + "2023-08-30 15:26:46.221 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for ece120` in project `Query-Response Map for ece120`: https://atlas.nomic.ai/map/f35d2617-d029-4a29-ba4a-27a82088be24/0c8e9404-be65-4780-bba3-e3b44f14de9b\n", + "2023-08-30 15:26:46.230 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for ece120: https://atlas.nomic.ai/map/f35d2617-d029-4a29-ba4a-27a82088be24/0c8e9404-be65-4780-bba3-e3b44f14de9b\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Query-Response Map for ece120: https://atlas.nomic.ai/map/f35d2617-d029-4a29-ba4a-27a82088be24/0c8e9404-be65-4780-bba3-e3b44f14de9b]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:26:48.040 | INFO | nomic.project:create_index:1111 - Created map `ece120_qr_index` in project `Query-Response Map for ece120`: https://atlas.nomic.ai/map/f35d2617-d029-4a29-ba4a-27a82088be24/61277149-b12d-4b59-8bcd-e9dd29fc58a4\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "course name: badm-567-v3\n", + "(27, 1536)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:26:52.367 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for badm-567-v3` in organization `dabholkar.asmita`\n", + "2023-08-30 15:26:53.227 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", + "2023-08-30 15:26:53.236 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", + "1it [00:00, 1.07it/s]\n", + "2023-08-30 15:26:54.177 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", + "2023-08-30 15:26:54.185 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", + "2023-08-30 15:26:55.379 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for badm-567-v3` in project `Query-Response Map for badm-567-v3`: https://atlas.nomic.ai/map/022b4521-24df-401d-beff-bba4f881590f/adabe0a1-d147-462a-8992-72af89cb5ae2\n", + "2023-08-30 15:26:55.379 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for badm-567-v3: https://atlas.nomic.ai/map/022b4521-24df-401d-beff-bba4f881590f/adabe0a1-d147-462a-8992-72af89cb5ae2\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Query-Response Map for badm-567-v3: https://atlas.nomic.ai/map/022b4521-24df-401d-beff-bba4f881590f/adabe0a1-d147-462a-8992-72af89cb5ae2]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:26:57.012 | INFO | nomic.project:create_index:1111 - Created map `badm-567-v3_qr_index` in project `Query-Response Map for badm-567-v3`: https://atlas.nomic.ai/map/022b4521-24df-401d-beff-bba4f881590f/e6c9798b-c154-43e7-917e-dd5cb71f116f\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "course name: new-weather\n", + "(98, 1536)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:27:02.087 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for new-weather` in organization `dabholkar.asmita`\n", + "2023-08-30 15:27:03.117 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", + "2023-08-30 15:27:03.125 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", + "1it [00:00, 1.07it/s]\n", + "2023-08-30 15:27:04.071 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", + "2023-08-30 15:27:04.071 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", + "2023-08-30 15:27:05.459 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for new-weather` in project `Query-Response Map for new-weather`: https://atlas.nomic.ai/map/48db10b9-0a6e-4c43-83f7-f7fa2b406257/fd200444-86cb-4b1c-9be6-ebd0262e2428\n", + "2023-08-30 15:27:05.461 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for new-weather: https://atlas.nomic.ai/map/48db10b9-0a6e-4c43-83f7-f7fa2b406257/fd200444-86cb-4b1c-9be6-ebd0262e2428\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Query-Response Map for new-weather: https://atlas.nomic.ai/map/48db10b9-0a6e-4c43-83f7-f7fa2b406257/fd200444-86cb-4b1c-9be6-ebd0262e2428]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:27:07.001 | INFO | nomic.project:create_index:1111 - Created map `new-weather_qr_index` in project `Query-Response Map for new-weather`: https://atlas.nomic.ai/map/48db10b9-0a6e-4c43-83f7-f7fa2b406257/ff717c79-50cd-468b-9fcc-b391c8c167df\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "course name: gies-online-mba-v2\n", + "(52, 1536)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:27:10.946 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for gies-online-mba-v2` in organization `dabholkar.asmita`\n", + "2023-08-30 15:27:11.862 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", + "2023-08-30 15:27:11.868 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", + "1it [00:00, 1.33it/s]\n", + "2023-08-30 15:27:12.630 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", + "2023-08-30 15:27:12.634 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", + "2023-08-30 15:27:13.627 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for gies-online-mba-v2` in project `Query-Response Map for gies-online-mba-v2`: https://atlas.nomic.ai/map/8699b44f-8e2a-40e6-9562-0d2fc2898c3d/87144fbd-48a3-4d00-9136-ddc177685aa1\n", + "2023-08-30 15:27:13.627 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for gies-online-mba-v2: https://atlas.nomic.ai/map/8699b44f-8e2a-40e6-9562-0d2fc2898c3d/87144fbd-48a3-4d00-9136-ddc177685aa1\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Query-Response Map for gies-online-mba-v2: https://atlas.nomic.ai/map/8699b44f-8e2a-40e6-9562-0d2fc2898c3d/87144fbd-48a3-4d00-9136-ddc177685aa1]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:27:15.546 | INFO | nomic.project:create_index:1111 - Created map `gies-online-mba-v2_qr_index` in project `Query-Response Map for gies-online-mba-v2`: https://atlas.nomic.ai/map/8699b44f-8e2a-40e6-9562-0d2fc2898c3d/db13ea7a-f93d-4f97-b922-c51216d3d6e9\n", + "2023-08-30 15:27:15,670:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:27:15,673:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", + "2023-08-30 15:27:20,003:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:27:20,003:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "course name: frontend\n", + "(24, 1536)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:27:28.373 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for frontend` in organization `dabholkar.asmita`\n", + "2023-08-30 15:27:29.396 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", + "2023-08-30 15:27:29.405 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", + "1it [00:00, 1.11it/s]\n", + "2023-08-30 15:27:30.325 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", + "2023-08-30 15:27:30.325 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", + "2023-08-30 15:27:31.539 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for frontend` in project `Query-Response Map for frontend`: https://atlas.nomic.ai/map/3a8ff45a-ca2d-4750-ab1b-ea033d35580d/e427ecf0-339a-41bf-8f9a-dc7a2930ac70\n", + "2023-08-30 15:27:31.542 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for frontend: https://atlas.nomic.ai/map/3a8ff45a-ca2d-4750-ab1b-ea033d35580d/e427ecf0-339a-41bf-8f9a-dc7a2930ac70\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Query-Response Map for frontend: https://atlas.nomic.ai/map/3a8ff45a-ca2d-4750-ab1b-ea033d35580d/e427ecf0-339a-41bf-8f9a-dc7a2930ac70]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:27:33.028 | INFO | nomic.project:create_index:1111 - Created map `frontend_qr_index` in project `Query-Response Map for frontend`: https://atlas.nomic.ai/map/3a8ff45a-ca2d-4750-ab1b-ea033d35580d/006903b0-bb82-4432-9975-ff7c9ca80af9\n", + "2023-08-30 15:27:33,166:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:27:33,166:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", + "2023-08-30 15:27:37,279:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:27:37,281:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", + "2023-08-30 15:27:41,477:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:27:41,481:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "course name: ECE220FA23\n", + "(193, 1536)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:27:50.988 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for ECE220FA23` in organization `dabholkar.asmita`\n", + "2023-08-30 15:27:51.867 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", + "2023-08-30 15:27:51.878 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", + "1it [00:01, 1.01s/it]\n", + "2023-08-30 15:27:52.904 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", + "2023-08-30 15:27:52.908 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", + "2023-08-30 15:27:53.929 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for ECE220FA23` in project `Query-Response Map for ECE220FA23`: https://atlas.nomic.ai/map/29b6f987-ee46-40be-808e-3d79a56d1b04/6b9b60f6-e2b7-4863-911c-fe84a63b35a1\n", + "2023-08-30 15:27:53.929 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for ECE220FA23: https://atlas.nomic.ai/map/29b6f987-ee46-40be-808e-3d79a56d1b04/6b9b60f6-e2b7-4863-911c-fe84a63b35a1\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Query-Response Map for ECE220FA23: https://atlas.nomic.ai/map/29b6f987-ee46-40be-808e-3d79a56d1b04/6b9b60f6-e2b7-4863-911c-fe84a63b35a1]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:27:55.645 | INFO | nomic.project:create_index:1111 - Created map `ECE220FA23_qr_index` in project `Query-Response Map for ECE220FA23`: https://atlas.nomic.ai/map/29b6f987-ee46-40be-808e-3d79a56d1b04/d8163c57-a2e8-41ca-90fc-043c8a9469b3\n", + "2023-08-30 15:27:55,758:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:27:55,759:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", + "2023-08-30 15:27:59,841:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:27:59,841:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "course name: Snowmass\n", + "(23, 1536)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:28:08.067 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for Snowmass` in organization `dabholkar.asmita`\n", + "2023-08-30 15:28:09.006 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", + "2023-08-30 15:28:09.014 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", + "1it [00:00, 1.20it/s]\n", + "2023-08-30 15:28:09.854 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", + "2023-08-30 15:28:09.858 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", + "2023-08-30 15:28:10.994 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for Snowmass` in project `Query-Response Map for Snowmass`: https://atlas.nomic.ai/map/aa415361-5d23-44ba-ada2-252ed1dfe9a0/51de6990-9d60-4cc4-9a8e-46bc7de97a0e\n", + "2023-08-30 15:28:10.994 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for Snowmass: https://atlas.nomic.ai/map/aa415361-5d23-44ba-ada2-252ed1dfe9a0/51de6990-9d60-4cc4-9a8e-46bc7de97a0e\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Query-Response Map for Snowmass: https://atlas.nomic.ai/map/aa415361-5d23-44ba-ada2-252ed1dfe9a0/51de6990-9d60-4cc4-9a8e-46bc7de97a0e]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:28:12.968 | INFO | nomic.project:create_index:1111 - Created map `Snowmass_qr_index` in project `Query-Response Map for Snowmass`: https://atlas.nomic.ai/map/aa415361-5d23-44ba-ada2-252ed1dfe9a0/4fdea35b-cda2-434e-afd1-e46e01430a97\n", + "2023-08-30 15:28:13,066:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:28:13,068:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", + "2023-08-30 15:28:17,200:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:28:17,200:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", + "2023-08-30 15:28:21,297:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:28:21,297:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "course name: NPRE247\n", + "(54, 1536)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:28:29.951 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for NPRE247` in organization `dabholkar.asmita`\n", + "2023-08-30 15:28:31.043 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", + "2023-08-30 15:28:31.051 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", + "1it [00:01, 1.64s/it]\n", + "2023-08-30 15:28:32.709 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", + "2023-08-30 15:28:32.714 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", + "2023-08-30 15:28:33.787 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for NPRE247` in project `Query-Response Map for NPRE247`: https://atlas.nomic.ai/map/3ae13050-5e00-4f2b-9dd4-f05e98945405/d06b7c47-850c-48f4-a5f9-984fbb0ca424\n", + "2023-08-30 15:28:33.790 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for NPRE247: https://atlas.nomic.ai/map/3ae13050-5e00-4f2b-9dd4-f05e98945405/d06b7c47-850c-48f4-a5f9-984fbb0ca424\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Query-Response Map for NPRE247: https://atlas.nomic.ai/map/3ae13050-5e00-4f2b-9dd4-f05e98945405/d06b7c47-850c-48f4-a5f9-984fbb0ca424]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:28:35.347 | INFO | nomic.project:create_index:1111 - Created map `NPRE247_qr_index` in project `Query-Response Map for NPRE247`: https://atlas.nomic.ai/map/3ae13050-5e00-4f2b-9dd4-f05e98945405/03471048-46aa-473c-b599-0bc812c679c0\n", + "2023-08-30 15:28:35,479:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:28:35,484:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", + "2023-08-30 15:28:39,590:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:28:39,594:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "course name: your-awesome-course\n", + "(30, 1536)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:28:50.102 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for your-awesome-course` in organization `dabholkar.asmita`\n", + "2023-08-30 15:28:51.013 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", + "2023-08-30 15:28:51.022 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", + "1it [00:00, 1.10it/s]\n", + "2023-08-30 15:28:51.943 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", + "2023-08-30 15:28:51.945 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", + "2023-08-30 15:28:52.904 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for your-awesome-course` in project `Query-Response Map for your-awesome-course`: https://atlas.nomic.ai/map/dd5dfe40-c810-4402-9684-a43783ea6e52/468942cc-d46a-45a2-82a6-11fac2f2be78\n", + "2023-08-30 15:28:52.907 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for your-awesome-course: https://atlas.nomic.ai/map/dd5dfe40-c810-4402-9684-a43783ea6e52/468942cc-d46a-45a2-82a6-11fac2f2be78\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Query-Response Map for your-awesome-course: https://atlas.nomic.ai/map/dd5dfe40-c810-4402-9684-a43783ea6e52/468942cc-d46a-45a2-82a6-11fac2f2be78]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:28:54.405 | INFO | nomic.project:create_index:1111 - Created map `your-awesome-course_qr_index` in project `Query-Response Map for your-awesome-course`: https://atlas.nomic.ai/map/dd5dfe40-c810-4402-9684-a43783ea6e52/e58f20e3-fa19-4c4c-8764-a185e0691c85\n", + "2023-08-30 15:28:54,549:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:28:54,549:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", + "2023-08-30 15:28:58,646:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:28:58,653:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "course name: pract\n", + "(44, 1536)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:29:07.007 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for pract` in organization `dabholkar.asmita`\n", + "2023-08-30 15:29:08.243 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", + "2023-08-30 15:29:08.251 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", + "1it [00:01, 1.11s/it]\n", + "2023-08-30 15:29:09.368 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", + "2023-08-30 15:29:09.368 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", + "2023-08-30 15:29:10.392 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for pract` in project `Query-Response Map for pract`: https://atlas.nomic.ai/map/15b51f99-fe4b-41ec-97fd-5579884e39fb/669c8862-db60-41ad-94a9-fd815e1fdcd8\n", + "2023-08-30 15:29:10.392 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for pract: https://atlas.nomic.ai/map/15b51f99-fe4b-41ec-97fd-5579884e39fb/669c8862-db60-41ad-94a9-fd815e1fdcd8\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Query-Response Map for pract: https://atlas.nomic.ai/map/15b51f99-fe4b-41ec-97fd-5579884e39fb/669c8862-db60-41ad-94a9-fd815e1fdcd8]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:29:12.031 | INFO | nomic.project:create_index:1111 - Created map `pract_qr_index` in project `Query-Response Map for pract`: https://atlas.nomic.ai/map/15b51f99-fe4b-41ec-97fd-5579884e39fb/b15570eb-4db4-4b6f-9b4d-e80309d2dcb3\n", + "2023-08-30 15:29:12,113:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:29:12,115:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", + "2023-08-30 15:29:16,201:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:29:16,209:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", + "2023-08-30 15:29:20,282:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:29:20,285:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "course name: ece120FL22\n", + "(53, 1536)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:29:28.994 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for ece120FL22` in organization `dabholkar.asmita`\n", + "2023-08-30 15:29:29.838 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", + "2023-08-30 15:29:29.846 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", + "1it [00:00, 1.18it/s]\n", + "2023-08-30 15:29:30.708 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", + "2023-08-30 15:29:30.710 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", + "2023-08-30 15:29:31.828 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for ece120FL22` in project `Query-Response Map for ece120FL22`: https://atlas.nomic.ai/map/8e74c555-3192-451b-b4a1-2c86587df88b/888e5d82-2e4d-46ba-b920-915dc31b835b\n", + "2023-08-30 15:29:31.828 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for ece120FL22: https://atlas.nomic.ai/map/8e74c555-3192-451b-b4a1-2c86587df88b/888e5d82-2e4d-46ba-b920-915dc31b835b\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Query-Response Map for ece120FL22: https://atlas.nomic.ai/map/8e74c555-3192-451b-b4a1-2c86587df88b/888e5d82-2e4d-46ba-b920-915dc31b835b]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:29:33.354 | INFO | nomic.project:create_index:1111 - Created map `ece120FL22_qr_index` in project `Query-Response Map for ece120FL22`: https://atlas.nomic.ai/map/8e74c555-3192-451b-b4a1-2c86587df88b/eb4e553d-ecd3-4b11-9d75-468108ab08e2\n", + "2023-08-30 15:29:33,458:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:29:33,458:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", + "2023-08-30 15:29:37,544:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:29:37,545:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", + "2023-08-30 15:29:41,634:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:29:41,642:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "course name: Law794-TransactionalDraftingAlam\n", + "(21, 1536)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:29:49.618 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for Law794-TransactionalDraftingAlam` in organization `dabholkar.asmita`\n", + "2023-08-30 15:29:50.718 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", + "2023-08-30 15:29:50.731 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", + "1it [00:01, 1.11s/it]\n", + "2023-08-30 15:29:51.849 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", + "2023-08-30 15:29:51.851 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", + "2023-08-30 15:29:53.034 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for Law794-TransactionalDraftingAlam` in project `Query-Response Map for Law794-TransactionalDraftingAlam`: https://atlas.nomic.ai/map/c4328a33-8958-462e-ba30-50f342736dae/0da00b97-bf94-4805-935e-458fb5c3d76f\n", + "2023-08-30 15:29:53.034 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for Law794-TransactionalDraftingAlam: https://atlas.nomic.ai/map/c4328a33-8958-462e-ba30-50f342736dae/0da00b97-bf94-4805-935e-458fb5c3d76f\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Query-Response Map for Law794-TransactionalDraftingAlam: https://atlas.nomic.ai/map/c4328a33-8958-462e-ba30-50f342736dae/0da00b97-bf94-4805-935e-458fb5c3d76f]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:29:54.605 | INFO | nomic.project:create_index:1111 - Created map `Law794-TransactionalDraftingAlam_qr_index` in project `Query-Response Map for Law794-TransactionalDraftingAlam`: https://atlas.nomic.ai/map/c4328a33-8958-462e-ba30-50f342736dae/9792fd4e-2196-4e39-bded-cc2bfd42abbf\n", + "2023-08-30 15:29:54,728:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:29:54,731:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", + "2023-08-30 15:29:58,804:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:29:58,804:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "course name: NCSA\n", + "(84, 1536)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:30:07.528 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for NCSA` in organization `dabholkar.asmita`\n", + "2023-08-30 15:30:08.422 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", + "2023-08-30 15:30:08.431 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", + "1it [00:00, 1.09it/s]\n", + "2023-08-30 15:30:09.361 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", + "2023-08-30 15:30:09.361 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", + "2023-08-30 15:30:10.325 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for NCSA` in project `Query-Response Map for NCSA`: https://atlas.nomic.ai/map/237ff7ed-c987-4259-96e0-3c5ed6e21207/8f176512-6a1e-4c8f-9d94-5794b1d7e619\n", + "2023-08-30 15:30:10.325 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for NCSA: https://atlas.nomic.ai/map/237ff7ed-c987-4259-96e0-3c5ed6e21207/8f176512-6a1e-4c8f-9d94-5794b1d7e619\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Query-Response Map for NCSA: https://atlas.nomic.ai/map/237ff7ed-c987-4259-96e0-3c5ed6e21207/8f176512-6a1e-4c8f-9d94-5794b1d7e619]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:30:11.917 | INFO | nomic.project:create_index:1111 - Created map `NCSA_qr_index` in project `Query-Response Map for NCSA`: https://atlas.nomic.ai/map/237ff7ed-c987-4259-96e0-3c5ed6e21207/2ce836ec-557b-4037-9ebd-d3e8982c0926\n", + "2023-08-30 15:30:12,004:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:30:12,004:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", + "2023-08-30 15:30:16,092:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:30:16,092:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", + "2023-08-30 15:30:20,157:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:30:20,164:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "course name: NCSADelta\n", + "(22, 1536)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:30:28.362 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for NCSADelta` in organization `dabholkar.asmita`\n", + "2023-08-30 15:30:29.318 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", + "2023-08-30 15:30:29.326 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", + "1it [00:00, 1.10it/s]\n", + "2023-08-30 15:30:30.246 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", + "2023-08-30 15:30:30.251 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", + "2023-08-30 15:30:31.253 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for NCSADelta` in project `Query-Response Map for NCSADelta`: https://atlas.nomic.ai/map/f0c381d5-e2e9-41fb-807a-ae61bc818122/78d29eb7-be96-47c1-a856-b684c6419e34\n", + "2023-08-30 15:30:31.254 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for NCSADelta: https://atlas.nomic.ai/map/f0c381d5-e2e9-41fb-807a-ae61bc818122/78d29eb7-be96-47c1-a856-b684c6419e34\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Query-Response Map for NCSADelta: https://atlas.nomic.ai/map/f0c381d5-e2e9-41fb-807a-ae61bc818122/78d29eb7-be96-47c1-a856-b684c6419e34]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:30:33.104 | INFO | nomic.project:create_index:1111 - Created map `NCSADelta_qr_index` in project `Query-Response Map for NCSADelta`: https://atlas.nomic.ai/map/f0c381d5-e2e9-41fb-807a-ae61bc818122/b6e64fef-a829-435f-89b5-ed1a44c05202\n", + "2023-08-30 15:30:33,214:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:30:33,214:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", + "2023-08-30 15:30:37,289:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:30:37,290:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", + "2023-08-30 15:30:41,376:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:30:41,382:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "course name: NuclGPT-v1\n", + "(25, 1536)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:30:49.297 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for NuclGPT-v1` in organization `dabholkar.asmita`\n", + "2023-08-30 15:30:50.216 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", + "2023-08-30 15:30:50.222 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", + "1it [00:00, 1.23it/s]\n", + "2023-08-30 15:30:51.043 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", + "2023-08-30 15:30:51.043 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", + "2023-08-30 15:30:52.360 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for NuclGPT-v1` in project `Query-Response Map for NuclGPT-v1`: https://atlas.nomic.ai/map/76043322-073c-4d8c-8c90-8e563da8564f/335097b0-a80d-4f38-8559-c6075a46a8b2\n", + "2023-08-30 15:30:52.360 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for NuclGPT-v1: https://atlas.nomic.ai/map/76043322-073c-4d8c-8c90-8e563da8564f/335097b0-a80d-4f38-8559-c6075a46a8b2\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Query-Response Map for NuclGPT-v1: https://atlas.nomic.ai/map/76043322-073c-4d8c-8c90-8e563da8564f/335097b0-a80d-4f38-8559-c6075a46a8b2]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:30:53.900 | INFO | nomic.project:create_index:1111 - Created map `NuclGPT-v1_qr_index` in project `Query-Response Map for NuclGPT-v1`: https://atlas.nomic.ai/map/76043322-073c-4d8c-8c90-8e563da8564f/441b7ff6-00ef-47f3-98a9-e45d327a1414\n" + ] + } + ], + "source": [ + "low_volume_courses = []\n", + "high_volume_courses = []\n", + "for course in course_names:\n", + " if course is None or course == 'ece408':\n", + " continue\n", + " \n", + " user_queries = []\n", + " metadata = []\n", + " i = 1\n", + " course_df = df[df['course_name'] == course]['convo']\n", + " for convo in course_df: # iterate through all conversations in a course\n", + " messages = convo['messages']\n", + "\n", + " # form query-response pairs out of the messages\n", + " pair_check = 0\n", + " for message in messages:\n", + " if message['role'] == 'user' and pair_check == 0:\n", + " query = message['content']\n", + " metadata_row = {'course': course, 'query': message['content']}\n", + " #print(\"metadata row: \", metadata_row)\n", + " pair_check += 1\n", + " \n", + " if message['role'] == 'assistant' and pair_check == 1:\n", + " metadata_row['response'] = message['content']\n", + " metadata_row['id'] = i \n", + " #print(\"response metadata row: \", metadata_row)\n", + " i += 1\n", + " pair_check += 1\n", + "\n", + " if pair_check == 2:\n", + " # one conversation pair is complete\n", + " user_queries.append(query)\n", + " metadata.append(metadata_row)\n", + " pair_check = 0\n", + " # after iterating every conversation in a course, create the map\n", + " if len(user_queries) < 20:\n", + " low_volume_courses.append(course)\n", + " continue\n", + "\n", + " if len(user_queries) > 500:\n", + " high_volume_courses.append(course)\n", + " continue\n", + " \n", + " metadata = pd.DataFrame(metadata)\n", + " embeddings = embeddings_model.embed_documents(user_queries)\n", + " embeddings = np.array(embeddings)\n", + " print(\"course name: \", course)\n", + " print(embeddings.shape)\n", + "\n", + " # create an Atlas project\n", + " project_name = \"Query-Response Map for \" + course\n", + " index_name = course + \"_qr_index\"\n", + " project = atlas.map_embeddings(embeddings=np.array(embeddings),\n", + " data=metadata,\n", + " id_field='id',\n", + " build_topic_model=True,\n", + " topic_label_field='query',\n", + " name=project_name,\n", + " colorable_fields=['query'])\n", + " print(project.maps)\n", + "\n", + " project.create_index(index_name, build_topic_model=True)\n", + "\n", + " \n" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "low volume courses: ['test-video-ingest', 'badm-567', 'test-video-ingest-28', 'pdeploy999', 'badm-350-summer', 'previewtesting1', 'localtest2', 'your-favorite-url', 'mantine', 'test-video-ingest-2', 'badm_567_v2', 'erpnext', 'mip', 'farmdoc_test_kastan-v1', 'personalstatement', 'hrc', 'csv', 'star_nox', 'badm_567', 'SPaRCEd', 'localdemo8', 'badm_567_thumbnails', 'chatbot', 'erp', 'extreme', 'rohan_atree', 'zotero-extreme', 'test-video-ingest-20', 'gies-online-mba2', 'gies-online-mba', 'careerassistant', 'weather', 'lillian-wang-blog', 'local-test5', 'demo-for-vyriad', 'ag-gpt-beta', 'rtest', 'previewdeploy', 'r2test', 'personal-statement', 'rohan_excel', 'langchain-python', 'langchain', 'ncsa-live-demo', 'rohan_atree_individual', 'meta11-test', 'HealthyLivingGuide', 'rohan', 'babreu', 'test-video-ingest-31', 'p', 'test-video-ingest-17', 'summary', 'test-video-ingest-3', 'test-video-ingest-27', 'lillian-wang-blog-2', 'python-magic', 'ansible2', 'ece408fa23', 'farmdoc_test_josh_v2', 'local-test3', 'automata', 'SpaceFlorida-GT', 'GBSI-GT', 'newnew_ncsa', 'canvas', 'gbsi-gt', 'meditation-tutorial', 'profit', 'ansible', 'langchain-docs', 'testing_url_metadata_josh', 'test-india-biodiversity', 'vyriad', 'irc-workplans', 'kastanasdfasdfasdf', 'BADM-567-GT', 'mdt', 'vercel', 'gies-graduate-hub', 'test-video-ingest-12', 'test-video-ingest-13', 'Gies-graduate-hub', 'test_new_supa_scrape', 'doe-ascr-2023', 'arize', 'final-meta-test', 'preview-meta-test', 'gies-online-mba-V3', 'FoF-Drawdown-from-INVPEIV-5-24-23', 'FIN574-GT', 'test-video-ingest-30', 'test', 'NCSA-v2', 'conversational', 'clowder-docs', 'DA', 'test-video-ingest-21', 'test-video-ingest-25', 'test-ingest-10', 'eric-test-course', 'farmdoc-test', 'test-video-ingest-22', 'Academic-paper', 'starwars', 'AdamDemo']\n", + "high volume courses: ['gpt4', 'ECE408FA23']\n" + ] + } + ], + "source": [ + "print(\"low volume courses: \", low_volume_courses)\n", + "print(\"high volume courses: \", high_volume_courses)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 7cc4077b697454ef415b03cd51dcdc97d16513ec Mon Sep 17 00:00:00 2001 From: star-nox Date: Mon, 11 Sep 2023 16:49:14 -0500 Subject: [PATCH 45/61] handled emoji error --- ai_ta_backend/nomic_logging.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ai_ta_backend/nomic_logging.py b/ai_ta_backend/nomic_logging.py index 7da46e89..4c9f3677 100644 --- a/ai_ta_backend/nomic_logging.py +++ b/ai_ta_backend/nomic_logging.py @@ -37,6 +37,7 @@ def log_convo_to_nomic(course_name: str, conversation) -> str: project_name = NOMIC_MAP_NAME_PREFIX + course_name start_time = time.monotonic() + emoji = "" try: # fetch project metadata and embbeddings From 4f38670a7509ac04423a9dde90aef2d4c45a22b8 Mon Sep 17 00:00:00 2001 From: root Date: Tue, 12 Sep 2023 15:19:40 -0500 Subject: [PATCH 46/61] added missing emoji --- .env.template | 68 +- .gitignore | 334 +++---- ai_ta_backend/extreme_context_stuffing.py | 1082 ++++++++++----------- ai_ta_backend/nomic_logging.py | 4 + ai_ta_backend/nomic_map_creation.ipynb | 1031 +++++++++++++------- ai_ta_backend/utils_tokenization.py | 270 ++--- ai_ta_backend/web_scrape.py | 934 +++++++++--------- 7 files changed, 2021 insertions(+), 1702 deletions(-) diff --git a/.env.template b/.env.template index ba04c704..5c5520de 100644 --- a/.env.template +++ b/.env.template @@ -1,34 +1,34 @@ -# Supabase SQL -SUPABASE_URL= -SUPABASE_API_KEY= -SUPABASE_READ_ONLY= -SUPABASE_JWT_SECRET= - -MATERIALS_SUPABASE_TABLE=uiuc_chatbot -NEW_NEW_NEWNEW_MATERIALS_SUPABASE_TABLE=documents - -# QDRANT -QDRANT_COLLECTION_NAME=uiuc-chatbot -DEV_QDRANT_COLLECTION_NAME=dev -QDRANT_URL= -QDRANT_API_KEY= - -REFACTORED_MATERIALS_SUPABASE_TABLE= - -# AWS -S3_BUCKET_NAME=uiuc-chatbot -AWS_ACCESS_KEY_ID= -AWS_SECRET_ACCESS_KEY= - -OPENAI_API_KEY= - -NOMIC_API_KEY= -LINTRULE_SECRET= - -# Github Agent -GITHUB_APP_ID= -GITHUB_APP_PRIVATE_KEY="-----BEGIN RSA PRIVATE KEY----- - ------END RSA PRIVATE KEY-----" - -NUMEXPR_MAX_THREADS=2 +# Supabase SQL +SUPABASE_URL= +SUPABASE_API_KEY= +SUPABASE_READ_ONLY= +SUPABASE_JWT_SECRET= + +MATERIALS_SUPABASE_TABLE=uiuc_chatbot +NEW_NEW_NEWNEW_MATERIALS_SUPABASE_TABLE=documents + +# QDRANT +QDRANT_COLLECTION_NAME=uiuc-chatbot +DEV_QDRANT_COLLECTION_NAME=dev +QDRANT_URL= +QDRANT_API_KEY= + +REFACTORED_MATERIALS_SUPABASE_TABLE= + +# AWS +S3_BUCKET_NAME=uiuc-chatbot +AWS_ACCESS_KEY_ID= +AWS_SECRET_ACCESS_KEY= + +OPENAI_API_KEY= + +NOMIC_API_KEY= +LINTRULE_SECRET= + +# Github Agent +GITHUB_APP_ID= +GITHUB_APP_PRIVATE_KEY="-----BEGIN RSA PRIVATE KEY----- + +-----END RSA PRIVATE KEY-----" + +NUMEXPR_MAX_THREADS=2 diff --git a/.gitignore b/.gitignore index 70babf88..3db8ad0c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,167 +1,167 @@ -# don't sync coursera docs -coursera-dl/ -*parsed.json -wandb - -# don't expose env files -dummy.ipynb -.env -# Created by https://www.toptal.com/developers/gitignore/api/python -# Edit at https://www.toptal.com/developers/gitignore?templates=python - -### Python ### -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -pip-wheel-metadata/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coveage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ -pytestdebug.log - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ -doc/_build/ - -# PyBuilder -target/ - -# Jupyter Notebook -.ipynb_checkpoints - -# IPython -profile_default/ -ipython_config.py - -# pyenv -.python-version - -# pipenv -# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. -# However, in case of collaboration, if having platform-specific dependencies or dependencies -# having no cross-platform support, pipenv may install dependencies that don't work, or not -# install all needed dependencies. -#Pipfile.lock - -# PEP 582; used by e.g. github.com/David-OConnor/pyflow -__pypackages__/ - -# Celery stuff -celerybeat-schedule -celerybeat.pid - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ -pythonenv* - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ - -# pytype static type analyzer -.pytype/ - -# profiling data -.prof - -# Virtualenv -# http://iamzed.com/2009/05/07/a-primer-on-virtualenv/ -.Python -[Bb]in -[Ii]nclude -[Ll]ib -[Ll]ib64 -[Ll]ocal -[Ss]cripts -pyvenv.cfg -.venv -pip-selfcheck.json - - -# End of https://www.toptal.com/developers/gitignore/api/python -.aider* +# don't sync coursera docs +coursera-dl/ +*parsed.json +wandb + +# don't expose env files +dummy.ipynb +.env +# Created by https://www.toptal.com/developers/gitignore/api/python +# Edit at https://www.toptal.com/developers/gitignore?templates=python + +### Python ### +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coveage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +pytestdebug.log + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ +doc/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ +pythonenv* + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# profiling data +.prof + +# Virtualenv +# http://iamzed.com/2009/05/07/a-primer-on-virtualenv/ +.Python +[Bb]in +[Ii]nclude +[Ll]ib +[Ll]ib64 +[Ll]ocal +[Ss]cripts +pyvenv.cfg +.venv +pip-selfcheck.json + + +# End of https://www.toptal.com/developers/gitignore/api/python +.aider* diff --git a/ai_ta_backend/extreme_context_stuffing.py b/ai_ta_backend/extreme_context_stuffing.py index ed133a6a..03b56e86 100644 --- a/ai_ta_backend/extreme_context_stuffing.py +++ b/ai_ta_backend/extreme_context_stuffing.py @@ -1,541 +1,541 @@ -""" -API REQUEST PARALLEL PROCESSOR - -Using the OpenAI API to process lots of text quickly takes some care. -If you trickle in a million API requests one by one, they'll take days to complete. -If you flood a million API requests in parallel, they'll exceed the rate limits and fail with errors. -To maximize throughput, parallel requests need to be throttled to stay under rate limits. - -This script parallelizes requests to the OpenAI API while throttling to stay under rate limits. - -Features: -- Streams requests from file, to avoid running out of memory for giant jobs -- Makes requests concurrently, to maximize throughput -- Throttles request and token usage, to stay under rate limits -- Retries failed requests up to {max_attempts} times, to avoid missing data -- Logs errors, to diagnose problems with requests - -Example command to call script: -``` -python examples/api_request_parallel_processor.py \ - --requests_filepath examples/data/example_requests_to_parallel_process.jsonl \ - --save_filepath examples/data/example_requests_to_parallel_process_results.jsonl \ - --request_url https://api.openai.com/v1/embeddings \ - --max_requests_per_minute 1500 \ - --max_tokens_per_minute 6250000 \ - --token_encoding_name cl100k_base \ - --max_attempts 5 \ - --logging_level 20 -``` - -Inputs: -- requests_filepath : str - - path to the file containing the requests to be processed - - file should be a jsonl file, where each line is a json object with API parameters and an optional metadata field - - e.g., {"model": "text-embedding-ada-002", "input": "embed me", "metadata": {"row_id": 1}} - - as with all jsonl files, take care that newlines in the content are properly escaped (json.dumps does this automatically) - - an example file is provided at examples/data/example_requests_to_parallel_process.jsonl - - the code to generate the example file is appended to the bottom of this script -- save_filepath : str, optional - - path to the file where the results will be saved - - file will be a jsonl file, where each line is an array with the original request plus the API response - - e.g., [{"model": "text-embedding-ada-002", "input": "embed me"}, {...}] - - if omitted, results will be saved to {requests_filename}_results.jsonl -- request_url : str, optional - - URL of the API endpoint to call - - if omitted, will default to "https://api.openai.com/v1/embeddings" -- api_key : str, optional - - API key to use - - if omitted, the script will attempt to read it from an environment variable {os.getenv("OPENAI_API_KEY")} -- max_requests_per_minute : float, optional - - target number of requests to make per minute (will make less if limited by tokens) - - leave headroom by setting this to 50% or 75% of your limit - - if requests are limiting you, try batching multiple embeddings or completions into one request - - if omitted, will default to 1,500 -- max_tokens_per_minute : float, optional - - target number of tokens to use per minute (will use less if limited by requests) - - leave headroom by setting this to 50% or 75% of your limit - - if omitted, will default to 125,000 -- token_encoding_name : str, optional - - name of the token encoding used, as defined in the `tiktoken` package - - if omitted, will default to "cl100k_base" (used by `text-embedding-ada-002`) -- max_attempts : int, optional - - number of times to retry a failed request before giving up - - if omitted, will default to 5 -- logging_level : int, optional - - level of logging to use; higher numbers will log fewer messages - - 40 = ERROR; will log only when requests fail after all retries - - 30 = WARNING; will log when requests his rate limits or other errors - - 20 = INFO; will log when requests start and the status at finish - - 10 = DEBUG; will log various things as the loop runs to see when they occur - - if omitted, will default to 20 (INFO). - -The script is structured as follows: - - Imports - - Define main() - - Initialize things - - In main loop: - - Get next request if one is not already waiting for capacity - - Update available token & request capacity - - If enough capacity available, call API - - The loop pauses if a rate limit error is hit - - The loop breaks when no tasks remain - - Define dataclasses - - StatusTracker (stores script metadata counters; only one instance is created) - - APIRequest (stores API inputs, outputs, metadata; one method to call API) - - Define functions - - api_endpoint_from_url (extracts API endpoint from request URL) - - append_to_jsonl (writes to results file) - - num_tokens_consumed_from_request (bigger function to infer token usage from request) - - task_id_generator_function (yields 1, 2, 3, ...) - - Run main() -""" - -# import argparse -# import subprocess -# import tempfile -# from langchain.llms import OpenAI -import asyncio -import json -import logging -import os -import re -import time -from dataclasses import ( # for storing API inputs, outputs, and metadata - dataclass, field) -from typing import Any, List - -import aiohttp # for making API calls concurrently -import tiktoken # for counting tokens -from langchain.embeddings.openai import OpenAIEmbeddings -from langchain.vectorstores import Qdrant -from qdrant_client import QdrantClient, models - - -class OpenAIAPIProcessor: - - def __init__(self, input_prompts_list, request_url, api_key, max_requests_per_minute, max_tokens_per_minute, token_encoding_name, - max_attempts, logging_level): - self.request_url = request_url - self.api_key = api_key - self.max_requests_per_minute = max_requests_per_minute - self.max_tokens_per_minute = max_tokens_per_minute - self.token_encoding_name = token_encoding_name - self.max_attempts = max_attempts - self.logging_level = logging_level - self.input_prompts_list: List[dict] = input_prompts_list - self.results = [] - self.cleaned_results: List[str] = [] - - async def process_api_requests_from_file(self): - """Processes API requests in parallel, throttling to stay under rate limits.""" - # constants - seconds_to_pause_after_rate_limit_error = 15 - seconds_to_sleep_each_loop = 0.001 # 1 ms limits max throughput to 1,000 requests per second - - # initialize logging - logging.basicConfig(level=self.logging_level) - logging.debug(f"Logging initialized at level {self.logging_level}") - - # infer API endpoint and construct request header - api_endpoint = api_endpoint_from_url(self.request_url) - request_header = {"Authorization": f"Bearer {self.api_key}"} - - # initialize trackers - queue_of_requests_to_retry = asyncio.Queue() - task_id_generator = task_id_generator_function() # generates integer IDs of 1, 2, 3, ... - status_tracker = StatusTracker() # single instance to track a collection of variables - next_request = None # variable to hold the next request to call - - # initialize available capacity counts - available_request_capacity = self.max_requests_per_minute - available_token_capacity = self.max_tokens_per_minute - last_update_time = time.time() - - # initialize flags - file_not_finished = True # after file is empty, we'll skip reading it - logging.debug(f"Initialization complete.") - - requests = self.input_prompts_list.__iter__() - - logging.debug(f"File opened. Entering main loop") - - task_list = [] - - while True: - # get next request (if one is not already waiting for capacity) - if next_request is None: - if not queue_of_requests_to_retry.empty(): - next_request = queue_of_requests_to_retry.get_nowait() - logging.debug(f"Retrying request {next_request.task_id}: {next_request}") - elif file_not_finished: - try: - # get new request - # request_json = json.loads(next(requests)) - request_json = next(requests) - - next_request = APIRequest(task_id=next(task_id_generator), - request_json=request_json, - token_consumption=num_tokens_consumed_from_request(request_json, api_endpoint, - self.token_encoding_name), - attempts_left=self.max_attempts, - metadata=request_json.pop("metadata", None)) - status_tracker.num_tasks_started += 1 - status_tracker.num_tasks_in_progress += 1 - logging.debug(f"Reading request {next_request.task_id}: {next_request}") - except StopIteration: - # if file runs out, set flag to stop reading it - logging.debug("Read file exhausted") - file_not_finished = False - - # update available capacity - current_time = time.time() - seconds_since_update = current_time - last_update_time - available_request_capacity = min( - available_request_capacity + self.max_requests_per_minute * seconds_since_update / 60.0, - self.max_requests_per_minute, - ) - available_token_capacity = min( - available_token_capacity + self.max_tokens_per_minute * seconds_since_update / 60.0, - self.max_tokens_per_minute, - ) - last_update_time = current_time - - # if enough capacity available, call API - if next_request: - next_request_tokens = next_request.token_consumption - if (available_request_capacity >= 1 and available_token_capacity >= next_request_tokens): - # update counters - available_request_capacity -= 1 - available_token_capacity -= next_request_tokens - next_request.attempts_left -= 1 - - # call API - # TODO: NOT SURE RESPONSE WILL WORK HERE - task = asyncio.create_task( - next_request.call_api( - request_url=self.request_url, - request_header=request_header, - retry_queue=queue_of_requests_to_retry, - status_tracker=status_tracker, - )) - task_list.append(task) - next_request = None # reset next_request to empty - - # print("status_tracker.num_tasks_in_progress", status_tracker.num_tasks_in_progress) - # one_task_result = task.result() - # print("one_task_result", one_task_result) - - # if all tasks are finished, break - if status_tracker.num_tasks_in_progress == 0: - break - - # main loop sleeps briefly so concurrent tasks can run - await asyncio.sleep(seconds_to_sleep_each_loop) - - # if a rate limit error was hit recently, pause to cool down - seconds_since_rate_limit_error = (time.time() - status_tracker.time_of_last_rate_limit_error) - if seconds_since_rate_limit_error < seconds_to_pause_after_rate_limit_error: - remaining_seconds_to_pause = (seconds_to_pause_after_rate_limit_error - seconds_since_rate_limit_error) - await asyncio.sleep(remaining_seconds_to_pause) - # ^e.g., if pause is 15 seconds and final limit was hit 5 seconds ago - logging.warn( - f"Pausing to cool down until {time.ctime(status_tracker.time_of_last_rate_limit_error + seconds_to_pause_after_rate_limit_error)}" - ) - - # after finishing, log final status - logging.info(f"""Parallel processing complete. About to return.""") - if status_tracker.num_tasks_failed > 0: - logging.warning(f"{status_tracker.num_tasks_failed} / {status_tracker.num_tasks_started} requests failed.") - if status_tracker.num_rate_limit_errors > 0: - logging.warning(f"{status_tracker.num_rate_limit_errors} rate limit errors received. Consider running at a lower rate.") - - # asyncio wait for task_list - await asyncio.wait(task_list) - - for task in task_list: - openai_completion = task.result() - self.results.append(openai_completion) - - self.cleaned_results: List[str] = extract_context_from_results(self.results) - - -def extract_context_from_results(results: List[Any]) -> List[str]: - assistant_contents = [] - total_prompt_tokens = 0 - total_completion_tokens = 0 - - for element in results: - if element is not None: - for item in element: - if 'choices' in item: - for choice in item['choices']: - if choice['message']['role'] == 'assistant': - assistant_contents.append(choice['message']['content']) - total_prompt_tokens += item['usage']['prompt_tokens'] - total_completion_tokens += item['usage']['completion_tokens'] - # Note: I don't think the prompt_tokens or completion_tokens is working quite right... - - return assistant_contents - - -# dataclasses - - -@dataclass -class StatusTracker: - """Stores metadata about the script's progress. Only one instance is created.""" - - num_tasks_started: int = 0 - num_tasks_in_progress: int = 0 # script ends when this reaches 0 - num_tasks_succeeded: int = 0 - num_tasks_failed: int = 0 - num_rate_limit_errors: int = 0 - num_api_errors: int = 0 # excluding rate limit errors, counted above - num_other_errors: int = 0 - time_of_last_rate_limit_error: float = 0 # used to cool off after hitting rate limits - - -@dataclass -class APIRequest: - """Stores an API request's inputs, outputs, and other metadata. Contains a method to make an API call.""" - - task_id: int - request_json: dict - token_consumption: int - attempts_left: int - metadata: dict - result: list = field(default_factory=list) - - async def call_api( - self, - request_url: str, - request_header: dict, - retry_queue: asyncio.Queue, - status_tracker: StatusTracker, - ): - """Calls the OpenAI API and saves results.""" - # logging.info(f"Starting request #{self.task_id}") - error = None - try: - async with aiohttp.ClientSession() as session: - async with session.post(url=request_url, headers=request_header, json=self.request_json) as response: - response = await response.json() - if "error" in response: - logging.warning(f"Request {self.task_id} failed with error {response['error']}") - status_tracker.num_api_errors += 1 - error = response - if "Rate limit" in response["error"].get("message", ""): - status_tracker.time_of_last_rate_limit_error = time.time() - status_tracker.num_rate_limit_errors += 1 - status_tracker.num_api_errors -= 1 # rate limit errors are counted separately - - except Exception as e: # catching naked exceptions is bad practice, but in this case we'll log & save them - logging.warning(f"Request {self.task_id} failed with Exception {e}") - status_tracker.num_other_errors += 1 - error = e - if error: - self.result.append(error) - if self.attempts_left: - retry_queue.put_nowait(self) - else: - logging.error(f"Request {self.request_json} failed after all attempts. Saving errors: {self.result}") - data = ([self.request_json, [str(e) for e in self.result], self.metadata] - if self.metadata else [self.request_json, [str(e) for e in self.result]]) - #append_to_jsonl(data, save_filepath) - status_tracker.num_tasks_in_progress -= 1 - status_tracker.num_tasks_failed += 1 - return data - else: - data = ([self.request_json, response, self.metadata] if self.metadata else [self.request_json, response]) # type: ignore - #append_to_jsonl(data, save_filepath) - status_tracker.num_tasks_in_progress -= 1 - status_tracker.num_tasks_succeeded += 1 - # logging.debug(f"Request {self.task_id} saved to {save_filepath}") - - return data - - -# functions - - -def api_endpoint_from_url(request_url: str): - """Extract the API endpoint from the request URL.""" - match = re.search('^https://[^/]+/v\\d+/(.+)$', request_url) - return match[1] # type: ignore - - -def append_to_jsonl(data, filename: str) -> None: - """Append a json payload to the end of a jsonl file.""" - json_string = json.dumps(data) - with open(filename, "a") as f: - f.write(json_string + "\n") - - -def num_tokens_consumed_from_request( - request_json: dict, - api_endpoint: str, - token_encoding_name: str, -): - """Count the number of tokens in the request. Only supports completion and embedding requests.""" - encoding = tiktoken.get_encoding(token_encoding_name) - # if completions request, tokens = prompt + n * max_tokens - if api_endpoint.endswith("completions"): - max_tokens = request_json.get("max_tokens", 15) - n = request_json.get("n", 1) - completion_tokens = n * max_tokens - - # chat completions - if api_endpoint.startswith("chat/"): - num_tokens = 0 - for message in request_json["messages"]: - num_tokens += 4 # every message follows {role/name}\n{content}\n - for key, value in message.items(): - num_tokens += len(encoding.encode(value)) - if key == "name": # if there's a name, the role is omitted - num_tokens -= 1 # role is always required and always 1 token - num_tokens += 2 # every reply is primed with assistant - return num_tokens + completion_tokens - # normal completions - else: - prompt = request_json["prompt"] - if isinstance(prompt, str): # single prompt - prompt_tokens = len(encoding.encode(prompt)) - num_tokens = prompt_tokens + completion_tokens - return num_tokens - elif isinstance(prompt, list): # multiple prompts - prompt_tokens = sum([len(encoding.encode(p)) for p in prompt]) - num_tokens = prompt_tokens + completion_tokens * len(prompt) - return num_tokens - else: - raise TypeError('Expecting either string or list of strings for "prompt" field in completion request') - # if embeddings request, tokens = input tokens - elif api_endpoint == "embeddings": - input = request_json["input"] - if isinstance(input, str): # single input - num_tokens = len(encoding.encode(input)) - return num_tokens - elif isinstance(input, list): # multiple inputs - num_tokens = sum([len(encoding.encode(i)) for i in input]) - return num_tokens - else: - raise TypeError('Expecting either string or list of strings for "inputs" field in embedding request') - # more logic needed to support other API calls (e.g., edits, inserts, DALL-E) - else: - raise NotImplementedError(f'API endpoint "{api_endpoint}" not implemented in this script') - - -def task_id_generator_function(): - """Generate integers 0, 1, 2, and so on.""" - task_id = 0 - while True: - yield task_id - task_id += 1 - -if __name__ == '__main__': - pass - -# run script -# if __name__ == "__main__": -# qdrant_client = QdrantClient( -# url=os.getenv('QDRANT_URL'), -# api_key=os.getenv('QDRANT_API_KEY'), -# ) -# vectorstore = Qdrant( -# client=qdrant_client, -# collection_name=os.getenv('QDRANT_COLLECTION_NAME'), # type: ignore -# embeddings=OpenAIEmbeddings()) # type: ignore - -# user_question = "What is the significance of Six Sigma?" -# k = 4 -# fetch_k = 200 -# found_docs = vectorstore.max_marginal_relevance_search(user_question, k=k, fetch_k=200) - -# requests = [] -# for i, doc in enumerate(found_docs): -# dictionary = { -# "model": "gpt-3.5-turbo-0613", # 4k context -# "messages": [{ -# "role": "system", -# "content": "You are a factual summarizer of partial documents. Stick to the facts (including partial info when necessary to avoid making up potentially incorrect details), and say I don't know when necessary." -# }, { -# "role": -# "user", -# "content": -# f"What is a comprehensive summary of the given text, based on the question:\n{doc.page_content}\nQuestion: {user_question}\nThe summary should cover all the key points only relevant to the question, while also condensing the information into a concise and easy-to-understand format. Please ensure that the summary includes relevant details and examples that support the main ideas, while avoiding any unnecessary information or repetition. Feel free to include references, sentence fragments, keywords, or anything that could help someone learn about it, only as it relates to the given question. The length of the summary should be as short as possible, without losing relevant information.\n" -# }], -# "n": 1, -# "max_tokens": 500, -# "metadata": doc.metadata -# } -# requests.append(dictionary) - -# oai = OpenAIAPIProcessor( -# input_prompts_list=requests, -# request_url='https://api.openai.com/v1/chat/completions', -# api_key=os.getenv("OPENAI_API_KEY"), -# max_requests_per_minute=1500, -# max_tokens_per_minute=90000, -# token_encoding_name='cl100k_base', -# max_attempts=5, -# logging_level=20, -# ) -# # run script -# asyncio.run(oai.process_api_requests_from_file()) - -# assistant_contents = [] -# total_prompt_tokens = 0 -# total_completion_tokens = 0 - -# print("Results, end of main: ", oai.results) -# print("-"*50) - -# # jsonObject = json.loads(oai.results) -# for element in oai.results: -# for item in element: -# if 'choices' in item: -# for choice in item['choices']: -# if choice['message']['role'] == 'assistant': -# assistant_contents.append(choice['message']['content']) -# total_prompt_tokens += item['usage']['prompt_tokens'] -# total_completion_tokens += item['usage']['completion_tokens'] - -# print("Assistant Contents:", assistant_contents) -# print("Total Prompt Tokens:", total_prompt_tokens) -# print("Total Completion Tokens:", total_completion_tokens) -# turbo_total_cost = (total_prompt_tokens * 0.0015) + (total_completion_tokens * 0.002) -# print("Total cost (3.5-turbo):", (total_prompt_tokens * 0.0015), " + Completions: ", (total_completion_tokens * 0.002), " = ", turbo_total_cost) - -# gpt4_total_cost = (total_prompt_tokens * 0.03) + (total_completion_tokens * 0.06) -# print("Hypothetical cost for GPT-4:", (total_prompt_tokens * 0.03), " + Completions: ", (total_completion_tokens * 0.06), " = ", gpt4_total_cost) -# print("GPT-4 cost premium: ", (gpt4_total_cost / turbo_total_cost), "x") - ''' - Pricing: - GPT4: - * $0.03 prompt - * $0.06 completions - 3.5-turbo: - * $0.0015 prompt - * $0.002 completions - ''' -""" -APPENDIX - -The example requests file at openai-cookbook/examples/data/example_requests_to_parallel_process.jsonl contains 10,000 requests to text-embedding-ada-002. - -It was generated with the following code: - -```python -import json - -filename = "data/example_requests_to_parallel_process.jsonl" -n_requests = 10_000 -jobs = [{"model": "text-embedding-ada-002", "input": str(x) + "\n"} for x in range(n_requests)] -with open(filename, "w") as f: - for job in jobs: - json_string = json.dumps(job) - f.write(json_string + "\n") -``` - -As with all jsonl files, take care that newlines in the content are properly escaped (json.dumps does this automatically). -""" +""" +API REQUEST PARALLEL PROCESSOR + +Using the OpenAI API to process lots of text quickly takes some care. +If you trickle in a million API requests one by one, they'll take days to complete. +If you flood a million API requests in parallel, they'll exceed the rate limits and fail with errors. +To maximize throughput, parallel requests need to be throttled to stay under rate limits. + +This script parallelizes requests to the OpenAI API while throttling to stay under rate limits. + +Features: +- Streams requests from file, to avoid running out of memory for giant jobs +- Makes requests concurrently, to maximize throughput +- Throttles request and token usage, to stay under rate limits +- Retries failed requests up to {max_attempts} times, to avoid missing data +- Logs errors, to diagnose problems with requests + +Example command to call script: +``` +python examples/api_request_parallel_processor.py \ + --requests_filepath examples/data/example_requests_to_parallel_process.jsonl \ + --save_filepath examples/data/example_requests_to_parallel_process_results.jsonl \ + --request_url https://api.openai.com/v1/embeddings \ + --max_requests_per_minute 1500 \ + --max_tokens_per_minute 6250000 \ + --token_encoding_name cl100k_base \ + --max_attempts 5 \ + --logging_level 20 +``` + +Inputs: +- requests_filepath : str + - path to the file containing the requests to be processed + - file should be a jsonl file, where each line is a json object with API parameters and an optional metadata field + - e.g., {"model": "text-embedding-ada-002", "input": "embed me", "metadata": {"row_id": 1}} + - as with all jsonl files, take care that newlines in the content are properly escaped (json.dumps does this automatically) + - an example file is provided at examples/data/example_requests_to_parallel_process.jsonl + - the code to generate the example file is appended to the bottom of this script +- save_filepath : str, optional + - path to the file where the results will be saved + - file will be a jsonl file, where each line is an array with the original request plus the API response + - e.g., [{"model": "text-embedding-ada-002", "input": "embed me"}, {...}] + - if omitted, results will be saved to {requests_filename}_results.jsonl +- request_url : str, optional + - URL of the API endpoint to call + - if omitted, will default to "https://api.openai.com/v1/embeddings" +- api_key : str, optional + - API key to use + - if omitted, the script will attempt to read it from an environment variable {os.getenv("OPENAI_API_KEY")} +- max_requests_per_minute : float, optional + - target number of requests to make per minute (will make less if limited by tokens) + - leave headroom by setting this to 50% or 75% of your limit + - if requests are limiting you, try batching multiple embeddings or completions into one request + - if omitted, will default to 1,500 +- max_tokens_per_minute : float, optional + - target number of tokens to use per minute (will use less if limited by requests) + - leave headroom by setting this to 50% or 75% of your limit + - if omitted, will default to 125,000 +- token_encoding_name : str, optional + - name of the token encoding used, as defined in the `tiktoken` package + - if omitted, will default to "cl100k_base" (used by `text-embedding-ada-002`) +- max_attempts : int, optional + - number of times to retry a failed request before giving up + - if omitted, will default to 5 +- logging_level : int, optional + - level of logging to use; higher numbers will log fewer messages + - 40 = ERROR; will log only when requests fail after all retries + - 30 = WARNING; will log when requests his rate limits or other errors + - 20 = INFO; will log when requests start and the status at finish + - 10 = DEBUG; will log various things as the loop runs to see when they occur + - if omitted, will default to 20 (INFO). + +The script is structured as follows: + - Imports + - Define main() + - Initialize things + - In main loop: + - Get next request if one is not already waiting for capacity + - Update available token & request capacity + - If enough capacity available, call API + - The loop pauses if a rate limit error is hit + - The loop breaks when no tasks remain + - Define dataclasses + - StatusTracker (stores script metadata counters; only one instance is created) + - APIRequest (stores API inputs, outputs, metadata; one method to call API) + - Define functions + - api_endpoint_from_url (extracts API endpoint from request URL) + - append_to_jsonl (writes to results file) + - num_tokens_consumed_from_request (bigger function to infer token usage from request) + - task_id_generator_function (yields 1, 2, 3, ...) + - Run main() +""" + +# import argparse +# import subprocess +# import tempfile +# from langchain.llms import OpenAI +import asyncio +import json +import logging +import os +import re +import time +from dataclasses import ( # for storing API inputs, outputs, and metadata + dataclass, field) +from typing import Any, List + +import aiohttp # for making API calls concurrently +import tiktoken # for counting tokens +from langchain.embeddings.openai import OpenAIEmbeddings +from langchain.vectorstores import Qdrant +from qdrant_client import QdrantClient, models + + +class OpenAIAPIProcessor: + + def __init__(self, input_prompts_list, request_url, api_key, max_requests_per_minute, max_tokens_per_minute, token_encoding_name, + max_attempts, logging_level): + self.request_url = request_url + self.api_key = api_key + self.max_requests_per_minute = max_requests_per_minute + self.max_tokens_per_minute = max_tokens_per_minute + self.token_encoding_name = token_encoding_name + self.max_attempts = max_attempts + self.logging_level = logging_level + self.input_prompts_list: List[dict] = input_prompts_list + self.results = [] + self.cleaned_results: List[str] = [] + + async def process_api_requests_from_file(self): + """Processes API requests in parallel, throttling to stay under rate limits.""" + # constants + seconds_to_pause_after_rate_limit_error = 15 + seconds_to_sleep_each_loop = 0.001 # 1 ms limits max throughput to 1,000 requests per second + + # initialize logging + logging.basicConfig(level=self.logging_level) + logging.debug(f"Logging initialized at level {self.logging_level}") + + # infer API endpoint and construct request header + api_endpoint = api_endpoint_from_url(self.request_url) + request_header = {"Authorization": f"Bearer {self.api_key}"} + + # initialize trackers + queue_of_requests_to_retry = asyncio.Queue() + task_id_generator = task_id_generator_function() # generates integer IDs of 1, 2, 3, ... + status_tracker = StatusTracker() # single instance to track a collection of variables + next_request = None # variable to hold the next request to call + + # initialize available capacity counts + available_request_capacity = self.max_requests_per_minute + available_token_capacity = self.max_tokens_per_minute + last_update_time = time.time() + + # initialize flags + file_not_finished = True # after file is empty, we'll skip reading it + logging.debug(f"Initialization complete.") + + requests = self.input_prompts_list.__iter__() + + logging.debug(f"File opened. Entering main loop") + + task_list = [] + + while True: + # get next request (if one is not already waiting for capacity) + if next_request is None: + if not queue_of_requests_to_retry.empty(): + next_request = queue_of_requests_to_retry.get_nowait() + logging.debug(f"Retrying request {next_request.task_id}: {next_request}") + elif file_not_finished: + try: + # get new request + # request_json = json.loads(next(requests)) + request_json = next(requests) + + next_request = APIRequest(task_id=next(task_id_generator), + request_json=request_json, + token_consumption=num_tokens_consumed_from_request(request_json, api_endpoint, + self.token_encoding_name), + attempts_left=self.max_attempts, + metadata=request_json.pop("metadata", None)) + status_tracker.num_tasks_started += 1 + status_tracker.num_tasks_in_progress += 1 + logging.debug(f"Reading request {next_request.task_id}: {next_request}") + except StopIteration: + # if file runs out, set flag to stop reading it + logging.debug("Read file exhausted") + file_not_finished = False + + # update available capacity + current_time = time.time() + seconds_since_update = current_time - last_update_time + available_request_capacity = min( + available_request_capacity + self.max_requests_per_minute * seconds_since_update / 60.0, + self.max_requests_per_minute, + ) + available_token_capacity = min( + available_token_capacity + self.max_tokens_per_minute * seconds_since_update / 60.0, + self.max_tokens_per_minute, + ) + last_update_time = current_time + + # if enough capacity available, call API + if next_request: + next_request_tokens = next_request.token_consumption + if (available_request_capacity >= 1 and available_token_capacity >= next_request_tokens): + # update counters + available_request_capacity -= 1 + available_token_capacity -= next_request_tokens + next_request.attempts_left -= 1 + + # call API + # TODO: NOT SURE RESPONSE WILL WORK HERE + task = asyncio.create_task( + next_request.call_api( + request_url=self.request_url, + request_header=request_header, + retry_queue=queue_of_requests_to_retry, + status_tracker=status_tracker, + )) + task_list.append(task) + next_request = None # reset next_request to empty + + # print("status_tracker.num_tasks_in_progress", status_tracker.num_tasks_in_progress) + # one_task_result = task.result() + # print("one_task_result", one_task_result) + + # if all tasks are finished, break + if status_tracker.num_tasks_in_progress == 0: + break + + # main loop sleeps briefly so concurrent tasks can run + await asyncio.sleep(seconds_to_sleep_each_loop) + + # if a rate limit error was hit recently, pause to cool down + seconds_since_rate_limit_error = (time.time() - status_tracker.time_of_last_rate_limit_error) + if seconds_since_rate_limit_error < seconds_to_pause_after_rate_limit_error: + remaining_seconds_to_pause = (seconds_to_pause_after_rate_limit_error - seconds_since_rate_limit_error) + await asyncio.sleep(remaining_seconds_to_pause) + # ^e.g., if pause is 15 seconds and final limit was hit 5 seconds ago + logging.warn( + f"Pausing to cool down until {time.ctime(status_tracker.time_of_last_rate_limit_error + seconds_to_pause_after_rate_limit_error)}" + ) + + # after finishing, log final status + logging.info(f"""Parallel processing complete. About to return.""") + if status_tracker.num_tasks_failed > 0: + logging.warning(f"{status_tracker.num_tasks_failed} / {status_tracker.num_tasks_started} requests failed.") + if status_tracker.num_rate_limit_errors > 0: + logging.warning(f"{status_tracker.num_rate_limit_errors} rate limit errors received. Consider running at a lower rate.") + + # asyncio wait for task_list + await asyncio.wait(task_list) + + for task in task_list: + openai_completion = task.result() + self.results.append(openai_completion) + + self.cleaned_results: List[str] = extract_context_from_results(self.results) + + +def extract_context_from_results(results: List[Any]) -> List[str]: + assistant_contents = [] + total_prompt_tokens = 0 + total_completion_tokens = 0 + + for element in results: + if element is not None: + for item in element: + if 'choices' in item: + for choice in item['choices']: + if choice['message']['role'] == 'assistant': + assistant_contents.append(choice['message']['content']) + total_prompt_tokens += item['usage']['prompt_tokens'] + total_completion_tokens += item['usage']['completion_tokens'] + # Note: I don't think the prompt_tokens or completion_tokens is working quite right... + + return assistant_contents + + +# dataclasses + + +@dataclass +class StatusTracker: + """Stores metadata about the script's progress. Only one instance is created.""" + + num_tasks_started: int = 0 + num_tasks_in_progress: int = 0 # script ends when this reaches 0 + num_tasks_succeeded: int = 0 + num_tasks_failed: int = 0 + num_rate_limit_errors: int = 0 + num_api_errors: int = 0 # excluding rate limit errors, counted above + num_other_errors: int = 0 + time_of_last_rate_limit_error: float = 0 # used to cool off after hitting rate limits + + +@dataclass +class APIRequest: + """Stores an API request's inputs, outputs, and other metadata. Contains a method to make an API call.""" + + task_id: int + request_json: dict + token_consumption: int + attempts_left: int + metadata: dict + result: list = field(default_factory=list) + + async def call_api( + self, + request_url: str, + request_header: dict, + retry_queue: asyncio.Queue, + status_tracker: StatusTracker, + ): + """Calls the OpenAI API and saves results.""" + # logging.info(f"Starting request #{self.task_id}") + error = None + try: + async with aiohttp.ClientSession() as session: + async with session.post(url=request_url, headers=request_header, json=self.request_json) as response: + response = await response.json() + if "error" in response: + logging.warning(f"Request {self.task_id} failed with error {response['error']}") + status_tracker.num_api_errors += 1 + error = response + if "Rate limit" in response["error"].get("message", ""): + status_tracker.time_of_last_rate_limit_error = time.time() + status_tracker.num_rate_limit_errors += 1 + status_tracker.num_api_errors -= 1 # rate limit errors are counted separately + + except Exception as e: # catching naked exceptions is bad practice, but in this case we'll log & save them + logging.warning(f"Request {self.task_id} failed with Exception {e}") + status_tracker.num_other_errors += 1 + error = e + if error: + self.result.append(error) + if self.attempts_left: + retry_queue.put_nowait(self) + else: + logging.error(f"Request {self.request_json} failed after all attempts. Saving errors: {self.result}") + data = ([self.request_json, [str(e) for e in self.result], self.metadata] + if self.metadata else [self.request_json, [str(e) for e in self.result]]) + #append_to_jsonl(data, save_filepath) + status_tracker.num_tasks_in_progress -= 1 + status_tracker.num_tasks_failed += 1 + return data + else: + data = ([self.request_json, response, self.metadata] if self.metadata else [self.request_json, response]) # type: ignore + #append_to_jsonl(data, save_filepath) + status_tracker.num_tasks_in_progress -= 1 + status_tracker.num_tasks_succeeded += 1 + # logging.debug(f"Request {self.task_id} saved to {save_filepath}") + + return data + + +# functions + + +def api_endpoint_from_url(request_url: str): + """Extract the API endpoint from the request URL.""" + match = re.search('^https://[^/]+/v\\d+/(.+)$', request_url) + return match[1] # type: ignore + + +def append_to_jsonl(data, filename: str) -> None: + """Append a json payload to the end of a jsonl file.""" + json_string = json.dumps(data) + with open(filename, "a") as f: + f.write(json_string + "\n") + + +def num_tokens_consumed_from_request( + request_json: dict, + api_endpoint: str, + token_encoding_name: str, +): + """Count the number of tokens in the request. Only supports completion and embedding requests.""" + encoding = tiktoken.get_encoding(token_encoding_name) + # if completions request, tokens = prompt + n * max_tokens + if api_endpoint.endswith("completions"): + max_tokens = request_json.get("max_tokens", 15) + n = request_json.get("n", 1) + completion_tokens = n * max_tokens + + # chat completions + if api_endpoint.startswith("chat/"): + num_tokens = 0 + for message in request_json["messages"]: + num_tokens += 4 # every message follows {role/name}\n{content}\n + for key, value in message.items(): + num_tokens += len(encoding.encode(value)) + if key == "name": # if there's a name, the role is omitted + num_tokens -= 1 # role is always required and always 1 token + num_tokens += 2 # every reply is primed with assistant + return num_tokens + completion_tokens + # normal completions + else: + prompt = request_json["prompt"] + if isinstance(prompt, str): # single prompt + prompt_tokens = len(encoding.encode(prompt)) + num_tokens = prompt_tokens + completion_tokens + return num_tokens + elif isinstance(prompt, list): # multiple prompts + prompt_tokens = sum([len(encoding.encode(p)) for p in prompt]) + num_tokens = prompt_tokens + completion_tokens * len(prompt) + return num_tokens + else: + raise TypeError('Expecting either string or list of strings for "prompt" field in completion request') + # if embeddings request, tokens = input tokens + elif api_endpoint == "embeddings": + input = request_json["input"] + if isinstance(input, str): # single input + num_tokens = len(encoding.encode(input)) + return num_tokens + elif isinstance(input, list): # multiple inputs + num_tokens = sum([len(encoding.encode(i)) for i in input]) + return num_tokens + else: + raise TypeError('Expecting either string or list of strings for "inputs" field in embedding request') + # more logic needed to support other API calls (e.g., edits, inserts, DALL-E) + else: + raise NotImplementedError(f'API endpoint "{api_endpoint}" not implemented in this script') + + +def task_id_generator_function(): + """Generate integers 0, 1, 2, and so on.""" + task_id = 0 + while True: + yield task_id + task_id += 1 + +if __name__ == '__main__': + pass + +# run script +# if __name__ == "__main__": +# qdrant_client = QdrantClient( +# url=os.getenv('QDRANT_URL'), +# api_key=os.getenv('QDRANT_API_KEY'), +# ) +# vectorstore = Qdrant( +# client=qdrant_client, +# collection_name=os.getenv('QDRANT_COLLECTION_NAME'), # type: ignore +# embeddings=OpenAIEmbeddings()) # type: ignore + +# user_question = "What is the significance of Six Sigma?" +# k = 4 +# fetch_k = 200 +# found_docs = vectorstore.max_marginal_relevance_search(user_question, k=k, fetch_k=200) + +# requests = [] +# for i, doc in enumerate(found_docs): +# dictionary = { +# "model": "gpt-3.5-turbo-0613", # 4k context +# "messages": [{ +# "role": "system", +# "content": "You are a factual summarizer of partial documents. Stick to the facts (including partial info when necessary to avoid making up potentially incorrect details), and say I don't know when necessary." +# }, { +# "role": +# "user", +# "content": +# f"What is a comprehensive summary of the given text, based on the question:\n{doc.page_content}\nQuestion: {user_question}\nThe summary should cover all the key points only relevant to the question, while also condensing the information into a concise and easy-to-understand format. Please ensure that the summary includes relevant details and examples that support the main ideas, while avoiding any unnecessary information or repetition. Feel free to include references, sentence fragments, keywords, or anything that could help someone learn about it, only as it relates to the given question. The length of the summary should be as short as possible, without losing relevant information.\n" +# }], +# "n": 1, +# "max_tokens": 500, +# "metadata": doc.metadata +# } +# requests.append(dictionary) + +# oai = OpenAIAPIProcessor( +# input_prompts_list=requests, +# request_url='https://api.openai.com/v1/chat/completions', +# api_key=os.getenv("OPENAI_API_KEY"), +# max_requests_per_minute=1500, +# max_tokens_per_minute=90000, +# token_encoding_name='cl100k_base', +# max_attempts=5, +# logging_level=20, +# ) +# # run script +# asyncio.run(oai.process_api_requests_from_file()) + +# assistant_contents = [] +# total_prompt_tokens = 0 +# total_completion_tokens = 0 + +# print("Results, end of main: ", oai.results) +# print("-"*50) + +# # jsonObject = json.loads(oai.results) +# for element in oai.results: +# for item in element: +# if 'choices' in item: +# for choice in item['choices']: +# if choice['message']['role'] == 'assistant': +# assistant_contents.append(choice['message']['content']) +# total_prompt_tokens += item['usage']['prompt_tokens'] +# total_completion_tokens += item['usage']['completion_tokens'] + +# print("Assistant Contents:", assistant_contents) +# print("Total Prompt Tokens:", total_prompt_tokens) +# print("Total Completion Tokens:", total_completion_tokens) +# turbo_total_cost = (total_prompt_tokens * 0.0015) + (total_completion_tokens * 0.002) +# print("Total cost (3.5-turbo):", (total_prompt_tokens * 0.0015), " + Completions: ", (total_completion_tokens * 0.002), " = ", turbo_total_cost) + +# gpt4_total_cost = (total_prompt_tokens * 0.03) + (total_completion_tokens * 0.06) +# print("Hypothetical cost for GPT-4:", (total_prompt_tokens * 0.03), " + Completions: ", (total_completion_tokens * 0.06), " = ", gpt4_total_cost) +# print("GPT-4 cost premium: ", (gpt4_total_cost / turbo_total_cost), "x") + ''' + Pricing: + GPT4: + * $0.03 prompt + * $0.06 completions + 3.5-turbo: + * $0.0015 prompt + * $0.002 completions + ''' +""" +APPENDIX + +The example requests file at openai-cookbook/examples/data/example_requests_to_parallel_process.jsonl contains 10,000 requests to text-embedding-ada-002. + +It was generated with the following code: + +```python +import json + +filename = "data/example_requests_to_parallel_process.jsonl" +n_requests = 10_000 +jobs = [{"model": "text-embedding-ada-002", "input": str(x) + "\n"} for x in range(n_requests)] +with open(filename, "w") as f: + for job in jobs: + json_string = json.dumps(job) + f.write(json_string + "\n") +``` + +As with all jsonl files, take care that newlines in the content are properly escaped (json.dumps does this automatically). +""" diff --git a/ai_ta_backend/nomic_logging.py b/ai_ta_backend/nomic_logging.py index 4c9f3677..8e5d179f 100644 --- a/ai_ta_backend/nomic_logging.py +++ b/ai_ta_backend/nomic_logging.py @@ -87,6 +87,10 @@ def log_convo_to_nomic(course_name: str, conversation) -> str: user_queries.append(first_message) for message in messages: + if message['role'] == 'user': + emoji = "🙋" + else: + emoji = "🤖" conversation_string += "\n>>> " + emoji + message['role'] + ": " + message['content'] + "\n" metadata = [{"course": course_name, "conversation": conversation_string, "conversation_id": conversation_id, diff --git a/ai_ta_backend/nomic_map_creation.ipynb b/ai_ta_backend/nomic_map_creation.ipynb index 23924157..aeecd4bd 100644 --- a/ai_ta_backend/nomic_map_creation.ipynb +++ b/ai_ta_backend/nomic_map_creation.ipynb @@ -2,18 +2,9 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 6, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-09-07 11:57:29,274:INFO - Note: NumExpr detected 16 cores but \"NUMEXPR_MAX_THREADS\" not set, so enforcing safe limit of 8.\n", - "2023-09-07 11:57:29,274:INFO - NumExpr defaulting to 8 threads.\n" - ] - } - ], + "outputs": [], "source": [ "# import required libraries\n", "\n", @@ -29,7 +20,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -38,7 +29,7 @@ "True" ] }, - "execution_count": 2, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -52,7 +43,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -66,7 +57,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 21, "metadata": {}, "outputs": [ { @@ -90,324 +81,85 @@ " \n", " \n", " \n", - " id\n", - " created_at\n", - " convo\n", - " convo_id\n", " course_name\n", - " user_email\n", " \n", " \n", " \n", " \n", " 0\n", - " 5200\n", - " 2023-09-07T17:03:47.705812+00:00\n", - " {'id': 'abd2e044-fbff-455e-8c60-755cc7635182',...\n", - " abd2e044-fbff-455e-8c60-755cc7635182\n", - " cropwizard\n", - " avd6@illinois.edu\n", + " gpt4\n", " \n", " \n", " 1\n", - " 5201\n", - " 2023-09-07T17:05:25.863567+00:00\n", - " {'id': '3e5d4861-b128-4c64-96ac-87c74f3217e5',...\n", - " 3e5d4861-b128-4c64-96ac-87c74f3217e5\n", - " cropwizard\n", - " avd6@illinois.edu\n", + " gpt4\n", " \n", " \n", " 2\n", - " 5216\n", - " 2023-09-07T17:18:32.197379+00:00\n", - " {'id': '43ee631a-cb58-43f5-b2af-a5b91b7585cd',...\n", - " 43ee631a-cb58-43f5-b2af-a5b91b7585cd\n", - " cropwizard\n", - " avd6@illinois.edu\n", + " gpt4\n", " \n", " \n", " 3\n", - " 5212\n", - " 2023-09-07T17:16:34.267931+00:00\n", - " {'id': '0129ea46-207f-47e3-be90-da143857000f',...\n", - " 0129ea46-207f-47e3-be90-da143857000f\n", - " cropwizard\n", - " avd6@illinois.edu\n", + " gpt4\n", " \n", " \n", " 4\n", - " 5217\n", - " 2023-09-07T17:19:00.681823+00:00\n", - " {'id': 'c6b4e4d8-4de7-4387-b4e9-411084dffea6',...\n", - " c6b4e4d8-4de7-4387-b4e9-411084dffea6\n", - " cropwizard\n", - " avd6@illinois.edu\n", - " \n", - " \n", - " 5\n", - " 5223\n", - " 2023-09-07T17:22:38.970643+00:00\n", - " {'id': 'b5500763-7e7b-4b23-9031-cc320a51ccbf',...\n", - " b5500763-7e7b-4b23-9031-cc320a51ccbf\n", - " cropwizard\n", - " avd6@illinois.edu\n", - " \n", - " \n", - " 6\n", - " 5227\n", - " 2023-09-07T17:24:10.362647+00:00\n", - " {'id': 'd410955f-4398-4869-b395-e6b659cc2d06',...\n", - " d410955f-4398-4869-b395-e6b659cc2d06\n", - " cropwizard\n", - " avd6@illinois.edu\n", - " \n", - " \n", - " 7\n", - " 5209\n", - " 2023-09-07T17:14:43.518617+00:00\n", - " {'id': '0ecd2c05-772a-42aa-b29a-0a892bd0e9ab',...\n", - " 0ecd2c05-772a-42aa-b29a-0a892bd0e9ab\n", - " cropwizard\n", - " avd6@illinois.edu\n", - " \n", - " \n", - " 8\n", - " 5222\n", - " 2023-09-07T17:21:29.223343+00:00\n", - " {'id': 'c82056a0-2d67-4ce8-82e3-86a30f1f6dc0',...\n", - " c82056a0-2d67-4ce8-82e3-86a30f1f6dc0\n", - " cropwizard\n", - " avd6@illinois.edu\n", - " \n", - " \n", - " 9\n", - " 5224\n", - " 2023-09-07T17:22:54.856839+00:00\n", - " {'id': '2316bbd7-61f3-44aa-a79e-bb42bd688c47',...\n", - " 2316bbd7-61f3-44aa-a79e-bb42bd688c47\n", - " cropwizard\n", - " avd6@illinois.edu\n", - " \n", - " \n", - " 10\n", - " 5226\n", - " 2023-09-07T17:23:27.644745+00:00\n", - " {'id': '66abfe85-bb04-456e-8709-89f9aafe5508',...\n", - " 66abfe85-bb04-456e-8709-89f9aafe5508\n", - " cropwizard\n", - " avd6@illinois.edu\n", - " \n", - " \n", - " 11\n", - " 5228\n", - " 2023-09-07T17:24:41.32465+00:00\n", - " {'id': '175ad6b2-3bf2-4889-b2de-a18961ee8ecb',...\n", - " 175ad6b2-3bf2-4889-b2de-a18961ee8ecb\n", - " cropwizard\n", - " avd6@illinois.edu\n", - " \n", - " \n", - " 12\n", - " 5232\n", - " 2023-09-07T17:30:05.770146+00:00\n", - " {'id': 'f9859e36-bf76-40ab-9413-91ef6663dbd6',...\n", - " f9859e36-bf76-40ab-9413-91ef6663dbd6\n", - " cropwizard\n", - " avd6@illinois.edu\n", - " \n", - " \n", - " 13\n", - " 5233\n", - " 2023-09-07T17:30:52.749867+00:00\n", - " {'id': 'bab32d0b-8e2b-4eaa-a46e-069be0d0c3a2',...\n", - " bab32d0b-8e2b-4eaa-a46e-069be0d0c3a2\n", - " cropwizard\n", - " avd6@illinois.edu\n", - " \n", - " \n", - " 14\n", - " 5234\n", - " 2023-09-07T17:31:19.801611+00:00\n", - " {'id': 'ecaf3228-78f3-49f7-b46d-3a5c3d5b62fd',...\n", - " ecaf3228-78f3-49f7-b46d-3a5c3d5b62fd\n", - " cropwizard\n", - " avd6@illinois.edu\n", - " \n", - " \n", - " 15\n", - " 5237\n", - " 2023-09-07T17:36:14.68431+00:00\n", - " {'id': 'edead825-12df-417c-af40-059e83067c69',...\n", - " edead825-12df-417c-af40-059e83067c69\n", - " cropwizard\n", - " avd6@illinois.edu\n", + " gpt4\n", " \n", " \n", - " 16\n", - " 5238\n", - " 2023-09-07T17:36:42.984907+00:00\n", - " {'id': 'bc44d229-327a-452d-a386-8868216a1bd2',...\n", - " bc44d229-327a-452d-a386-8868216a1bd2\n", - " cropwizard\n", - " avd6@illinois.edu\n", + " ...\n", + " ...\n", " \n", " \n", - " 17\n", - " 5241\n", - " 2023-09-07T17:37:22.134543+00:00\n", - " {'id': 'ff7a1c27-e126-49db-be79-6deaefcffec3',...\n", - " ff7a1c27-e126-49db-be79-6deaefcffec3\n", - " cropwizard\n", - " avd6@illinois.edu\n", + " 1789\n", + " FIN574-GT\n", " \n", " \n", - " 18\n", - " 5304\n", - " 2023-09-07T19:45:21.73541+00:00\n", - " {'id': '6226b153-356a-408c-9483-49ef5808538c',...\n", - " 6226b153-356a-408c-9483-49ef5808538c\n", - " cropwizard\n", - " avd6@illinois.edu\n", + " 1790\n", + " NCSA\n", " \n", " \n", - " 19\n", - " 5305\n", - " 2023-09-07T19:46:03.626639+00:00\n", - " {'id': 'e9edae6b-b7e1-46a8-b5e8-6215890a2a01',...\n", - " e9edae6b-b7e1-46a8-b5e8-6215890a2a01\n", - " cropwizard\n", - " avd6@illinois.edu\n", + " 1791\n", + " gpt4\n", " \n", " \n", - " 20\n", - " 5306\n", - " 2023-09-07T19:46:36.076704+00:00\n", - " {'id': 'b2116035-da7b-4136-878d-66a10098a756',...\n", - " b2116035-da7b-4136-878d-66a10098a756\n", - " cropwizard\n", - " avd6@illinois.edu\n", + " 1792\n", + " NCSA\n", " \n", " \n", - " 21\n", - " 5195\n", - " 2023-09-06T23:43:38.201481+00:00\n", - " {'id': '543ee10e-faf0-47a8-bb1c-c040aec44ed1',...\n", - " 543ee10e-faf0-47a8-bb1c-c040aec44ed1\n", - " cropwizard\n", - " dabholkar.asmita@gmail.com\n", + " 1793\n", + " NCSA\n", " \n", " \n", "\n", + "

1794 rows × 1 columns

\n", "" ], "text/plain": [ - " id created_at \\\n", - "0 5200 2023-09-07T17:03:47.705812+00:00 \n", - "1 5201 2023-09-07T17:05:25.863567+00:00 \n", - "2 5216 2023-09-07T17:18:32.197379+00:00 \n", - "3 5212 2023-09-07T17:16:34.267931+00:00 \n", - "4 5217 2023-09-07T17:19:00.681823+00:00 \n", - "5 5223 2023-09-07T17:22:38.970643+00:00 \n", - "6 5227 2023-09-07T17:24:10.362647+00:00 \n", - "7 5209 2023-09-07T17:14:43.518617+00:00 \n", - "8 5222 2023-09-07T17:21:29.223343+00:00 \n", - "9 5224 2023-09-07T17:22:54.856839+00:00 \n", - "10 5226 2023-09-07T17:23:27.644745+00:00 \n", - "11 5228 2023-09-07T17:24:41.32465+00:00 \n", - "12 5232 2023-09-07T17:30:05.770146+00:00 \n", - "13 5233 2023-09-07T17:30:52.749867+00:00 \n", - "14 5234 2023-09-07T17:31:19.801611+00:00 \n", - "15 5237 2023-09-07T17:36:14.68431+00:00 \n", - "16 5238 2023-09-07T17:36:42.984907+00:00 \n", - "17 5241 2023-09-07T17:37:22.134543+00:00 \n", - "18 5304 2023-09-07T19:45:21.73541+00:00 \n", - "19 5305 2023-09-07T19:46:03.626639+00:00 \n", - "20 5306 2023-09-07T19:46:36.076704+00:00 \n", - "21 5195 2023-09-06T23:43:38.201481+00:00 \n", - "\n", - " convo \\\n", - "0 {'id': 'abd2e044-fbff-455e-8c60-755cc7635182',... \n", - "1 {'id': '3e5d4861-b128-4c64-96ac-87c74f3217e5',... \n", - "2 {'id': '43ee631a-cb58-43f5-b2af-a5b91b7585cd',... \n", - "3 {'id': '0129ea46-207f-47e3-be90-da143857000f',... \n", - "4 {'id': 'c6b4e4d8-4de7-4387-b4e9-411084dffea6',... \n", - "5 {'id': 'b5500763-7e7b-4b23-9031-cc320a51ccbf',... \n", - "6 {'id': 'd410955f-4398-4869-b395-e6b659cc2d06',... \n", - "7 {'id': '0ecd2c05-772a-42aa-b29a-0a892bd0e9ab',... \n", - "8 {'id': 'c82056a0-2d67-4ce8-82e3-86a30f1f6dc0',... \n", - "9 {'id': '2316bbd7-61f3-44aa-a79e-bb42bd688c47',... \n", - "10 {'id': '66abfe85-bb04-456e-8709-89f9aafe5508',... \n", - "11 {'id': '175ad6b2-3bf2-4889-b2de-a18961ee8ecb',... \n", - "12 {'id': 'f9859e36-bf76-40ab-9413-91ef6663dbd6',... \n", - "13 {'id': 'bab32d0b-8e2b-4eaa-a46e-069be0d0c3a2',... \n", - "14 {'id': 'ecaf3228-78f3-49f7-b46d-3a5c3d5b62fd',... \n", - "15 {'id': 'edead825-12df-417c-af40-059e83067c69',... \n", - "16 {'id': 'bc44d229-327a-452d-a386-8868216a1bd2',... \n", - "17 {'id': 'ff7a1c27-e126-49db-be79-6deaefcffec3',... \n", - "18 {'id': '6226b153-356a-408c-9483-49ef5808538c',... \n", - "19 {'id': 'e9edae6b-b7e1-46a8-b5e8-6215890a2a01',... \n", - "20 {'id': 'b2116035-da7b-4136-878d-66a10098a756',... \n", - "21 {'id': '543ee10e-faf0-47a8-bb1c-c040aec44ed1',... \n", + " course_name\n", + "0 gpt4\n", + "1 gpt4\n", + "2 gpt4\n", + "3 gpt4\n", + "4 gpt4\n", + "... ...\n", + "1789 FIN574-GT\n", + "1790 NCSA\n", + "1791 gpt4\n", + "1792 NCSA\n", + "1793 NCSA\n", "\n", - " convo_id course_name \\\n", - "0 abd2e044-fbff-455e-8c60-755cc7635182 cropwizard \n", - "1 3e5d4861-b128-4c64-96ac-87c74f3217e5 cropwizard \n", - "2 43ee631a-cb58-43f5-b2af-a5b91b7585cd cropwizard \n", - "3 0129ea46-207f-47e3-be90-da143857000f cropwizard \n", - "4 c6b4e4d8-4de7-4387-b4e9-411084dffea6 cropwizard \n", - "5 b5500763-7e7b-4b23-9031-cc320a51ccbf cropwizard \n", - "6 d410955f-4398-4869-b395-e6b659cc2d06 cropwizard \n", - "7 0ecd2c05-772a-42aa-b29a-0a892bd0e9ab cropwizard \n", - "8 c82056a0-2d67-4ce8-82e3-86a30f1f6dc0 cropwizard \n", - "9 2316bbd7-61f3-44aa-a79e-bb42bd688c47 cropwizard \n", - "10 66abfe85-bb04-456e-8709-89f9aafe5508 cropwizard \n", - "11 175ad6b2-3bf2-4889-b2de-a18961ee8ecb cropwizard \n", - "12 f9859e36-bf76-40ab-9413-91ef6663dbd6 cropwizard \n", - "13 bab32d0b-8e2b-4eaa-a46e-069be0d0c3a2 cropwizard \n", - "14 ecaf3228-78f3-49f7-b46d-3a5c3d5b62fd cropwizard \n", - "15 edead825-12df-417c-af40-059e83067c69 cropwizard \n", - "16 bc44d229-327a-452d-a386-8868216a1bd2 cropwizard \n", - "17 ff7a1c27-e126-49db-be79-6deaefcffec3 cropwizard \n", - "18 6226b153-356a-408c-9483-49ef5808538c cropwizard \n", - "19 e9edae6b-b7e1-46a8-b5e8-6215890a2a01 cropwizard \n", - "20 b2116035-da7b-4136-878d-66a10098a756 cropwizard \n", - "21 543ee10e-faf0-47a8-bb1c-c040aec44ed1 cropwizard \n", - "\n", - " user_email \n", - "0 avd6@illinois.edu \n", - "1 avd6@illinois.edu \n", - "2 avd6@illinois.edu \n", - "3 avd6@illinois.edu \n", - "4 avd6@illinois.edu \n", - "5 avd6@illinois.edu \n", - "6 avd6@illinois.edu \n", - "7 avd6@illinois.edu \n", - "8 avd6@illinois.edu \n", - "9 avd6@illinois.edu \n", - "10 avd6@illinois.edu \n", - "11 avd6@illinois.edu \n", - "12 avd6@illinois.edu \n", - "13 avd6@illinois.edu \n", - "14 avd6@illinois.edu \n", - "15 avd6@illinois.edu \n", - "16 avd6@illinois.edu \n", - "17 avd6@illinois.edu \n", - "18 avd6@illinois.edu \n", - "19 avd6@illinois.edu \n", - "20 avd6@illinois.edu \n", - "21 dabholkar.asmita@gmail.com " + "[1794 rows x 1 columns]" ] }, - "execution_count": 22, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# query data for one course for testing\n", - "course = 'cropwizard'\n", - "response = supabase_client.table(\"llm-convo-monitor\").select(\"*\").eq(\"course_name\", course).execute()\n", + "course = 'ece120'\n", + "response = supabase_client.table(\"llm-convo-monitor\").select(\"course_name\", count='exact').execute()\n", "data = response.data\n", "df = pd.DataFrame(data)\n", "df" @@ -415,45 +167,11 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 25, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 {'id': 'abd2e044-fbff-455e-8c60-755cc7635182',...\n", - "1 {'id': '3e5d4861-b128-4c64-96ac-87c74f3217e5',...\n", - "2 {'id': '43ee631a-cb58-43f5-b2af-a5b91b7585cd',...\n", - "3 {'id': '0129ea46-207f-47e3-be90-da143857000f',...\n", - "4 {'id': 'c6b4e4d8-4de7-4387-b4e9-411084dffea6',...\n", - "Name: convo, dtype: object" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "course_df = df[df['course_name'] == course]['convo']\n", - "course_df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'role': 'assistant', 'content': \"The U.S. Environmental Protection Agency (EPA)'s mission is to protect human health and the environment. As part of this mission, the EPA is involved in efforts such as developing strategies to protect endangered and threatened species from potential impacts of agricultural practices, including the use of herbicides. For instance, the EPA has released a draft Herbicide Strategy for public comment, aimed at proposing early mitigations for more than 900 listed species and designated critical habitats to reduce potential impacts from the agricultural use of herbicides(1^,2^,3^,4^).\\n\\n1. University of Illinois Extension\\n2. EPA releases draft herbicide strategy\\n3. EPA releases draft herbicide strategy\\n4. extension.pdf, page: 3\", 'contexts': [{'base_url': 'https://extension.illinois.edu/newsletters/illinois-pesticide-review-newsletter/julyaugust-2023', 'course_name ': 'cropwizard', 'pagenumber': '', 'readable_filename': 'University of Illinois Extension', 's3_path': 'courses/cropwizard/University_of_Illinois_Extension.html', 'text': \". — The U.S. Environmental Protection Agency, in collaboration with the U.S. Department of Energy, is funding the creation of Thriving Communities Technical Assistance Centers to help rural and underserved communities access state and federal funding for environmental infrastructure…\\n\\n\\n \\nAddress health challenges with holistic solutions\\nSeptember 1, 2023\\n\\nURBANA, Ill. — The University of Illinois, along with the Interdisciplinary Health Sciences Institute, and in collaboration with Illinois Extension, has developed the Autumn Health Picks 2023 webinar series. This series is part of the Community Seminar Series, and it provides an opportunity for…\\n\\n\\n \\nDo artificial roosts help bats? Illinois experts say more research needed\\nSeptember 1, 2023\\n\\nURBANA, Ill.\\xa0— Artificial roosts for bats come in many forms — bat boxes, condos, bark mimics, clay roosts, and cinder block structures, to name a few — but a new conservation practice and policy article from researchers at the\\xa0University of Illinois Urbana-Champaign\\xa0suggests the structures…\\n\\n\\nMore news\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\n\\nUpcoming Events\\n\\n\\n\\n\\n \\n\\nRead Before You Sign: Renting & Leasing \\n\\n\\nSeptember 6, 2023\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\nTechnology & Soil Health Field Day\\n\\n\\nSeptember 6, 2023\\n\\n\\nCounty\\n\\nHenry\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\nPollinator Plants to Span the Season\\n\\n\\nSeptember 6, 2023\\n\\n\\nCounty\\n\\nMacoupin\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\nOrr Beef Research Center Field Day\\n\\n\\nSeptember 6, 2023\\n\\n\\n\\n\\n\\n\\nMore Events\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nSubtitle\\nOpens the Door to New Opportunities\\n\\n\\n\\n\\nTitle\\nLearn @ Illinois Extension\\n\\n\\n\\n\\nBody\\nThere is so much you want to do, but the challenges of time, distance, and cost create barriers to achieving those goals. You need a program that's flexible to your schedule.\\xa0Learn @ Illinois Extension\\xa0helps remove those challenge by offering\\xa0flexible online learning programs that meet your personal interests and continuing education requirements. We provide learning on your terms so you can be who you were meant to be.\\xa0\\n\\n\\n\\nOnline Courses\\n\\n\\n\\n\\n\\n \\n\\n\\nLatest Podcast\\n\\n\\n\\n\\nGood Growing\\n\\n\\nGardenbite: Three tips for a healthier lawn | #GoodGrowingThis week on the Good Growing podcast Chris shares a Gardenbite of when retired horticulture educator Richard Hentschel visited the show in 2021 to talk about fall lawn care. During the show, Richard spoke about three things we could all do to reduce our lawn inputs.\\xa0Want to see or...\\n\\n\\n Your browser does not support iframes, but you can visit \\n\\n\\n\\n\\n\\nMore Podcasts\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\n\\nDig Deeper\\n\\n\\nTopics we address\\n\\n4-H Youth Development\\nBeef Cattle\\nClimate\\nCommunity Gardens\\nCommunity Planning\\nCottage Food\\nCrops\\nDisasters\\nEconomic Development\\nEnergy\\nEnvironmental Quality\\nFamily\\nFinances\\nFirewood\\nFlowers\\nFood\\nForestry\\nHealth\\nHemp\\nHerbs\\nHome Vegetable Gardening\\nIllinois Grasses\\nInsects\\nInvasives\\nLivestock\\nLocal Food Systems and Small Farms\\nLocal Government Education\\nMental Health\\nMushrooms\\nNatural Resources\\nPlant Problems\\nPlants\\nRainfall Management\\nSoil\\nSpecialty Crops\\nVaccines\\nWeather\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nShare with us\\n\\n\\n \\n\\nBody\\n\\n\\n\\n\\xa0\\n\\n\\xa0\\n\\xa0\\n\\n\\n\\xa0\\n\\n\\nView this profile on Instagram\\n\\n\\xa0\\n\\n\\n\\xa0\\n\\xa0\\n\\xa0\\n\\n\\n\\xa0\\n\\xa0\\n\\n\\n\\xa0\\n\\xa0\\n\\xa0\\n\\n\\n\\n\\xa0\\n\\xa0\\n\\nIllinois Extension (@ilextension) • Instagram photos and videos\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\nBody\\n\\xa0\\n\\nUniversity of Illinois Extension\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\n\\n\\nVolunteer with Extension\\nLearn Something New\\nRead a Blog\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nIllinois Extension\\n\\n 101 Mumford Hall (MC-710)\\n \\n1301 W\", 'url': 'https://extension.illinois.edu/'}, {'base_url': 'https://extension.illinois.edu/newsletters/illinois-pesticide-review-newsletter/julyaugust-2023', 'course_name ': 'cropwizard', 'pagenumber': '', 'readable_filename': 'EPA releases draft herbicide strategy', 's3_path': 'courses/cropwizard/EPA_releases_draft_herbicide_strategy.html', 'text': '. The draft framework describes how EPA would apply the mitigations in the Strategy compared to mitigations in the other initiatives.\\nWhat can you do? Submit comments! Learn more!\\nThe draft herbicide framework and accompanying documents are available in docket EPA-HQ-OPP-2023-0365 for public comment for 60 days.\\xa0Comments are due September 22, 2023. Agricultural pesticide users are encouraged to learn about EPA’s plan and to start thinking about how these mitigation measures could apply to herbicide use in their operation. While extensive recordkeeping is not currently required for the mitigation factors described in the strategy, it is highly recommended that users begin thinking about how to incorporate these new elements into their current record systems. If you are applying according to label directions, proper records can only assist your defense should you need it. To help guide you, watch for shared comments from professional organizations such as the Weed Science Society of America (WSSA). In April, a WSSA press release linked their comments to EPA and encouraged growers to act now to understand the impact of ESA’s new compliance initiatives. One good suggestion they offered to growers is to learn how to use EPA’s Bulletins Live! Two which is where important application instructions will be found.\\nEPA’s Office of Pesticide Programs will present a webinar on this draft herbicide Strategy on August 10th at Noon Central Time. EPA plans to walk through the framework and take questions from grower groups and other stakeholders. Register today. Questions may be submitted in advance of the webinar by emailing sm.opmp.pesticides@usda.gov.\\nTo learn more about EPA’s comprehensive ESA workplan Check out our article, “Change Coming to How EPA Protects Endangered Species from Pesticides – Feedback Needed” in the November/December 2022 issue of this newsletter. Proposed mitigation measures are discussed in more general terms in this comprehensive workplan. Please note that the comment period discussed there has ended.\\nVisit EPA’s website to learn more about how EPA’s pesticide program is protecting endangered species.\\nAdapted slightly from an EPA press release, “EPA Releases Draft Strategy to Better Protect Endangered Species from Herbicide Use” and related EPA documents. \\nABOUT THE AUTHOR: Michelle Wiesbrook\\xa0provides subject matter expertise and training in pesticide safety with an emphasis on horticultural weed science. She serves as the Illinois Pesticide Review newsletter editor, collecting and organizing material; and co-coordinates social media information for the PSEP program and ensures its timely publication.\\n\\nPesticide News\\n\\n\\n\\n\\nKeywords\\n\\nPesticide\\nHerbicide\\nInsecticide\\nFungicide\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nRelated Content\\n\\nUnintended herbicide injury on trees: A growing concernAugust 28, 2023\\n\\nTips to help employees succeedAugust 2, 2023\\n\\nParaquat certification valid 3 years: Are you due for training?August 2, 2023\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nIllinois Extension\\n\\n 101 Mumford Hall (MC-710)\\n \\n1301 W. Gregory Dr.\\n Urbana,\\n IL\\n 61801\\n \\nEmail: extension@illinois.edu\\n\\n\\n\\nInstagram\\nFacebook\\nTwitter\\nYouTube\\nLinkedIn\\n\\n\\n\\nCollege of Agricultural, Consumer & Environmental Sciences\\n\\n\\n\\n\\n\\nDig Deeper\\n\\n\\nTake an Online Course\\n\\n\\nRead a Blog\\n\\n\\nRead a Newsletter\\n\\n\\nListen to a Podcast\\n\\n\\nWatch a Video\\n\\n\\nBuy a Publication\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nExtension Network\\n\\n\\nEat.Move.Save.\\n\\n\\nIllinois 4-H\\n\\n\\nIllini Science Policy Program\\n\\n\\nIllinois Indiana Sea Grant\\n\\n\\nIllinois Master Gardeners\\n\\n\\nIllinois Master Naturalists\\n\\n\\nIllinois Nutrition Education Programs\\n\\n\\nPesticide Safety Education Program\\n\\n\\nResearch Centers\\n\\n\\nSafe Electricity\\n\\n\\nU of I Plant Clinic\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nAdditional links\\n\\nAbout Cookies\\nPrivacy Policy\\n© 2023 University of Illinois Board of Trustees\\nEEO\\nAccessibility\\nmyExtension\\nLogin', 'url': 'https://extension.illinois.edu/blogs/pesticide-news/2023-08-02-epa-releases-draft-herbicide-strategy-public-comment-period-open'}, {'base_url': 'https://extension.illinois.edu/newsletters/illinois-pesticide-review-newsletter/julyaugust-2023', 'course_name ': 'cropwizard', 'pagenumber': '', 'readable_filename': 'EPA releases draft herbicide strategy', 's3_path': 'courses/cropwizard/EPA_releases_draft_herbicide_strategy.html', 'text': \". The Strategy — which is primarily designed to provide early mitigations that minimize impacts to over 900 listed species — is one of EPA’s most significant proposals to help overcome these challenges.\\nEPA focused the Strategy on agricultural crop uses in the lower 48 states because hundreds of millions of pounds of herbicides (and plant growth regulators) are applied each year, which is substantially more than for non-agricultural uses of herbicides and for other pesticide classes (e.g., insecticides, fungicides). Additionally, hundreds of listed species in the lower 48 states live in habitats adjacent to agricultural areas. The proposed mitigations in the Strategy would address the most common ways that conventional agricultural herbicides might impact these listed species. More specifically, EPA developed potential mitigation options for conventional agricultural herbicides to reduce pesticide transport via spray drift and runoff/erosion that could result in exposure to listed plants and listed animals that depend on plants.\\nEPA expects that the Strategy will increase the efficiency of future ESA consultations on herbicides with the U.S. Fish and Wildlife Service (FWS), which has authority over most listed species that could benefit from the proposed mitigations. Under the Strategy, EPA proposes to identify and begin mitigating for potential impacts even before EPA completes ESA consultations. These early mitigations should expedite EPA’s ability to fully comply with the ESA by reducing impacts to listed species before EPA conducts most of its ESA analysis. Adopting mitigations earlier will also allow EPA and FWS to use their resources more efficiently in ESA consultations.\\nThe Strategy’s proposed mitigations to reduce spray drift, runoff, and erosion and thereby reduce the potential exposure reflect practices that can be readily implemented by growers and identified by pesticide applicators and that provide flexibility for growers to select the mitigations that work best for them. The Strategy also gives credit to landowners who are already implementing certain measures to reduce pesticide runoff. For example, existing vegetated ditches and water retention ponds will qualify for credits that reduce the need for additional mitigation. Similarly, the Strategy would require less mitigation on flat lands, which are less prone to runoff, and in many western states, which typically experience less rain to carry pesticides off fields. The Strategy also describes how the Agency could add other mitigation practices to the menu of mitigation options in the future, particularly to incorporate emerging technology or new information on the effectiveness of specific practices.\\nDraft Herbicide Framework Document\\nThe draft framework document titled, “Draft Herbicide Strategy Framework to Reduce Exposure of Federally Listed Endangered and Threatened Species and Designated Critical Habitats from the Use of Conventional Agricultural Herbicides” is 97 pages long and includes a discussion of both the proposed scope of the Herbicide Strategy and the proposed decision framework to determine the level of mitigation needed for a particular conventional agricultural herbicide. The draft framework document also includes examples of how the proposed herbicide mitigation would apply to some of the herbicides for which EPA has conducted case studies as well as EPA's proposed implementation plan.\\nSome of the accompanying documents are quite lengthy. The “Herbicide Strategy Case Study Summary and Process” is 666 pages!\\xa0 Coincidence on the number? I’m not sure. I haven’t made it through it all yet. The primary thing I gathered from perusing through the spreadsheet files was that managing these complexities must be a nightmare. The document, “Application of EPA’s Draft Herbicide Strategy Framework Through Scenarios that Represent Crop Production Systems” is only 17 pages long and includes possible scenarios. Examples 1 and 2 would be particularly fitting for Illinois corn and soybean producers. These are shared to help producers better understand how these mitigation practices may be used.\\nIn its ESA Workplan and ESA Workplan Update, EPA outlined this and other ESA initiatives to develop early mitigations that provide listed species with practical protections from pesticides. The Strategy complements those other initiatives, such as targeted mitigations for listed species particularly vulnerable to pesticides and Interim Ecological Mitigations that EPA has begun incorporating under the Federal Insecticide, Fungicide, and Rodenticide Act. The draft framework describes how EPA would apply the mitigations in the Strategy compared to mitigations in the other initiatives.\\nWhat can you do? Submit comments! Learn more!\\nThe draft herbicide framework and accompanying documents are available in docket EPA-HQ-OPP-2023-0365 for public comment for 60 days.\\xa0Comments are due September 22, 2023. Agricultural pesticide users are encouraged to learn about EPA’s plan and to start thinking about how these mitigation measures could apply to herbicide use in their operation. While extensive recordkeeping is not currently required for the mitigation factors described in the strategy, it is highly recommended that users begin thinking about how to incorporate these new elements into their current record systems\", 'url': 'https://extension.illinois.edu/blogs/pesticide-news/2023-08-02-epa-releases-draft-herbicide-strategy-public-comment-period-open'}, {'base_url': 'https://extension.illinois.edu/newsletters/illinois-pesticide-review-newsletter/julyaugust-2023', 'course_name ': 'cropwizard', 'pagenumber': '', 'readable_filename': 'EPA releases draft herbicide strategy', 's3_path': 'courses/cropwizard/EPA_releases_draft_herbicide_strategy.html', 'text': 'EPA releases draft herbicide strategy; public comment period open | Illinois Extension | UIUC\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n Skip to main content\\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\nCollege of Agricultural, Consumer & Environmental Sciences\\n\\nIllinois Extension\\n\\n\\n\\n\\n\\nGive\\nVolunteer\\nCareers\\n\\n\\n\\n\\nTopics\\n\\n\\nAll Topics\\n\\n\\nCottage Food\\n\\n\\nFood\\n\\n\\nForestry\\n\\n\\nLocal Government Education\\n\\n\\nPlants\\n\\n\\nRainfall Management\\n\\n\\nSoil\\n\\n\\nVaccines\\n\\n\\nVegetable Gardening\\n\\n\\n\\n\\nLearn\\n\\n\\nOnline Courses\\n\\n\\nBlogs\\n\\n\\nNewsletters\\n\\n\\nPodcasts\\n\\n\\nVideos\\n\\n\\nPublications\\n\\n\\nSummer Resources\\n\\n\\n\\n\\nEvents\\n\\n\\nStatewide Webinars\\n\\n\\n\\n\\nNews\\n\\n\\nConnect\\n\\n\\nContact Staff\\n\\n\\nFind an Office\\n\\n\\nSocial Media\\n\\n\\nAdministration and Educator Teams\\n\\n\\nCommunications and Information Technology\\n\\n\\nIllini Science Policy Program\\n\\n\\nIllinois Indiana Sea Grant\\n\\n\\nMaster Gardeners\\n\\n\\nMaster Naturalists\\n\\n\\nPlant Clinic\\n\\n\\nResearch and Education Centers\\n\\n\\nSea Grant\\n\\n\\nEnergy Education Council\\n\\n\\nHome and Community Education\\n\\n\\nPlanning, Reporting, and Evaluation\\n\\n\\n\\n\\nImpact\\n\\n\\n2024 Extension Collaboration Grants\\n\\n\\nEconomic and Functional Impact\\n\\n\\nOur Impact in Agriculture and AgriBusiness\\n\\n\\nSNAP-Education Impact\\n\\n\\nExtension Funded Research Projects\\n\\n\\nOur Impact in Agriculture and Natural Resources\\n\\n\\nOur Impact in Community & Economic Development\\n\\n\\nOur Impact in Family and Consumer Sciences\\n\\n\\nOur Impact in Integrated Health Disparities\\n\\n\\n\\n\\nAbout\\n\\n\\nStrategic Planning\\n\\n\\nExtension Councils\\n\\n\\nCareers\\n\\n\\nProfessional Associations\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nBreadcrumb\\n\\n\\nHome\\n\\n\\nBlogs\\n\\n\\nPesticide News\\n\\n\\n EPA releases draft herbicide strategy; public comment period open \\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nPesticide News\\n\\n\\nEPA releases draft herbicide strategy; public comment period open \\n\\n\\n\\n\\n\\n\\nAugust 2, 2023\\n\\n\\n\\nMichelle Wiesbrook\\n\\n\\n \\n\\nStrategy aims to increase efficiencies while supporting farmers, herbicide users with continued use of important pesticide tools. \\xa0\\nThe U.S. Environmental Protection Agency (EPA) released the draft Herbicide Strategy for public comment, a major milestone in the Agency’s work to protect federally endangered and threatened (listed) species from conventional agricultural herbicides. The Strategy describes proposed early mitigations for more than 900 listed species and designated critical habitats to reduce potential impacts from the agricultural use of these herbicides while helping to ensure the continued availability of these important pesticide tools.\\n“Ensuring safe use of herbicides is an important part of EPA’s mission to protect the environment,” said Deputy Assistant Administrator for Pesticide Programs for the Office of Chemical Safety and Pollution Prevention Jake Li. “This strategy reflects one of our biggest steps to support farmers and other herbicide users with tools for managing weeds, while accelerating EPA’s ability to protect many endangered species that live near agricultural areas.”\\nThe Strategy is part of EPA’s ongoing efforts to develop a multichemical, multispecies approach toward meeting its obligations under the Endangered Species Act (ESA). EPA’s traditional chemical-by-chemical, species-by-species approach to meeting these obligations is slow and costly.\\xa0 As a result, EPA has completed its ESA obligations for less than 5% of its actions, creating legal vulnerabilities for the Agency, increased litigation, and uncertainty for farmers and other pesticide users about their continued ability to use many pesticides. The Strategy — which is primarily designed to provide early mitigations that minimize impacts to over 900 listed species — is one of EPA’s most significant proposals to help overcome these challenges.\\nEPA focused the Strategy on agricultural crop uses in the lower 48 states because hundreds of millions of pounds of herbicides (and plant growth regulators) are applied each year, which is substantially more than for non-agricultural uses of herbicides and for other pesticide classes (e.g., insecticides, fungicides). Additionally, hundreds of listed species in the lower 48 states live in habitats adjacent to agricultural areas. The proposed mitigations in the Strategy would address the most common ways that conventional agricultural herbicides might impact these listed species', 'url': 'https://extension.illinois.edu/blogs/pesticide-news/2023-08-02-epa-releases-draft-herbicide-strategy-public-comment-period-open'}, {'base_url': 'https://extension.illinois.edu/newsletters/illinois-pesticide-review-newsletter/julyaugust-2023', 'course_name ': 'cropwizard', 'pagenumber': '', 'readable_filename': 'News', 's3_path': 'courses/cropwizard/News.html', 'text': \". — The U.S. Environmental Protection Agency, in collaboration with the U.S. Department of Energy, is funding the creation of Thriving Communities Technical Assistance Centers to help rural and underserved communities access state and federal funding for environmental infrastructure…\\n\\n\\n \\nAddress health challenges with holistic solutions\\nSeptember 1, 2023\\n\\nURBANA, Ill. — The University of Illinois, along with the Interdisciplinary Health Sciences Institute, and in collaboration with Illinois Extension, has developed the Autumn Health Picks 2023 webinar series. This series is part of the Community Seminar Series, and it provides an opportunity for…\\n\\n\\n \\nDo artificial roosts help bats? Illinois experts say more research needed\\nSeptember 1, 2023\\n\\nURBANA, Ill.\\xa0— Artificial roosts for bats come in many forms — bat boxes, condos, bark mimics, clay roosts, and cinder block structures, to name a few — but a new conservation practice and policy article from researchers at the\\xa0University of Illinois Urbana-Champaign\\xa0suggests the structures…\\n\\n\\nMore news\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\nBlock Reference\\n\\nNews\\n\\n\\n\\nBy topic\\n- Any -4-H4-H alumni4-H FoundationAccessibilityAdministrationAgingAgricultural safetyAgricultureAnimal scienceAnnie's ProjectAnnualsAutismBeefBeef CattleBeekeepingBeesBeneficial InsectsBirdsBrain healthBulbsBullyingBusiness developmentBusiness retention and expansionButterflyCampingCaregivingCensusCertified Crop AdvisorCertified Livestock ManagerChild nutritionChronic diseasesCitizen ScienceCivic engagementClimateCollege ReadinessCommercial agricultureCommercial Vegetable ProductionCommunicationCommunity developmentCommunity gardenCommunity healthCommunity planningCommunity resiliencyCompostingConservationConsumer economicsCornCover cropsCreditCrop diseaseCropsDairy CattleDebt managementDementia Alzheimer’s diseaseDiabetesDicambaDisaster preparednessDiversity Equity InclusionDowntown developmentDrainageDronesEarly childhoodEconomic developmentEDEN Ready BusinessEFNEPElder careEmergency foodEnergyEnergy conservationEnergy efficiencyEntomologyEntrepreneurshipEnvironmentEstate planningExpensesFacultyFamily lifeFarm business managementFarm safetyFarmers marketsFinancial ExploitationFinancial planningFinancial wellnessFlowersFood accessFood PreservationFood safetyFood sanitationForestryFruitsFungicideGardeningGrassesHayHealthHealth CareHealthy cookingHealthy eatingHempHerbicideHerbsHolidaysHome OwnershipHorticultureHouseplantsIdentity TheftInclusionINEPInformation TechnologyInsect PestsInsecticideInsects and pestsInsuranceIntegrated Health DisparitiesIntegrated pest managementInvasive speciesInvestingLandscape architectureLandscape designLawn careLeadershipLeadership developmentLife skillsLivestockLocal foods and small farmsLocal governmentManaging stressManure managementMarketingMaster GardenersMaster NaturalistMeeting ManagementMental healthMindfulnessMoney MentorsMyPINative plantsNavigating differenceNutritionNutrition educationObesity preventionOrnamentalOutdoor SkillsParentingPasturePerennialsPesticidePesticide LabelPhysical ActivityPlant ClinicPlant diseasePlant health carePollinator HabitatPondsPoultryPoverty simulationPrivate/Commercial ApplicatorProfessional Development CEU CPDUPSEP trainingReal ColorsRecyclingRelationshipsResilienceRoboticsRosesSafetyShooting sportsShrubsSmall farmsSmart MeterSNAP-EdSocial-emotional healthSoilSoybeansSpecialty CropsSpendingState 4-H OfficeSTEMSubstance UseSustainable agricultureSwineTaxesTeam buildingTeenagersTime managementTrauma informed Adverse Childhood ExperiencesTree fruitsTreesTurfUrban AgricultureUrban gardeningVegetable gardeningVegetablesVolunteersWaterWeatherWeedsWellnessWheatWhole grainsWildlifeWorkforce developmentWorkplace wellnessYouth and MoneyYouth development\\n\\n\\nSearch\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\n\\n\\nSeptember 6, 2023\\n\\nIllinois Extension selected to establish environmental assistance center to help Illinois communities\\n\\n \\n URBANA, Ill. — The U.S. Environmental Protection Agency, in collaboration with the U.S. Department of Energy, is funding the creation of Thriving Communities Technical Assistance Centers to help rural and underserved communities access state and federal funding for environmental...\\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\n\\n\\nSeptember 1, 2023\\n\\nAddress health challenges with holistic solutions\\n\\n \\n URBANA, Ill\", 'url': 'https://extension.illinois.edu/global/news-releases'}, {'base_url': 'https://extension.illinois.edu/newsletters/illinois-pesticide-review-newsletter/julyaugust-2023', 'course_name ': 'cropwizard', 'pagenumber': 4, 'readable_filename': 'extension.pdf', 's3_path': 'courses/cropwizard/extension.pdf', 'text': \"Illinois Pesticide Review \\nVolume 36, No. 4, July/August \\n4\\nproposed mitigations. Under the Strategy, EPA \\nproposes to identify and begin mitigating for \\npotential impacts even before EPA completes \\nESA consultations. These early mitigations \\nshould expedite EPAs ability to fully comply \\nwith the ESA by reducing impacts to listed \\nspecies before EPA conducts most of its ESA \\nanalysis. Adopting mitigations earlier will \\nalso allow EPA and FWS to use their resources \\nmore efficiently in ESA consultations.\\nThe Strategys proposed mitigations to reduce \\nspray drift, runoff, and erosion and thereby \\nreduce the potential exposure reflect practices \\nthat can be readily implemented by growers \\nand identified by pesticide applicators and \\nthat provide flexibility for growers to select \\nthe mitigations that work best for them. The \\nStrategy also gives credit to landowners who \\nare already implementing certain measures to \\nreduce pesticide runoff. For example, existing \\nvegetated ditches and water retention ponds \\nwill qualify for credits that reduce the need for \\nadditional mitigation. Similarly, the Strategy \\nwould require less mitigation on flat lands, \\nwhich are less prone to runoff, and in many \\nwestern states, which typically experience \\nless rain to carry pesticides off fields. The \\nStrategy also describes how the Agency could \\nadd other mitigation practices to the menu of \\nmitigation options in the future, particularly \\nto incorporate emerging technology or new \\ninformation on the effectiveness of specific \\npractices.\\nDraft Herbicide Framework \\nDocument\\nThe draft framework document titled, Draft \\nHerbicide Strategy Framework to Reduce \\nExposure of Federally Listed Endangered \\nand Threatened Species and Designated \\nCritical Habitats from the Use of Conventional \\nAgricultural Herbicides is 97 pages long and \\nincludes a discussion of both the proposed \\nscope of the Herbicide Strategy and the \\nproposed decision framework to determine \\nthe level of mitigation needed for a particular \\nconventional agricultural herbicide. The draft \\nframework document also includes examples \\nof how the proposed herbicide mitigation \\nwould apply to some of the herbicides for \\nwhich EPA has conducted case studies as well \\nas EPA's proposed implementation plan.\\nSome of the accompanying documents are \\nquite lengthy. The Herbicide Strategy Case \\nStudy Summary and Process is 666 pages! \\nCoincidence on the number? Im not sure. I \\nhavent made it through it all yet. The primary \\nthing I gathered from perusing through \\nthe spreadsheet files was that managing \\nthese complexities must be a nightmare. \\nThe document, Application of EPAs Draft \\nHerbicide Strategy Framework Through \\nScenarios that Represent Crop Production \\nSystems is only 17 pages long and includes \\npossible scenarios. Examples 1 and 2 would \\nbe particularly fitting for Illinois corn and \\nsoybean producers. These are shared to \\nhelp producers better understand how these \\nmitigation practices may be used. \\nIn its ESA Workplan and ESA Workplan \\nUpdate, EPA outlined this and other ESA \\ninitiatives to develop early mitigations \\nthat provide listed species with practical \\nprotections from pesticides. The Strategy \\ncomplements those other initiatives, such \\nas targeted mitigations for listed species \\nparticularly vulnerable to pesticides and \\nInterim Ecological Mitigations that EPA \\nhas begun incorporating under the Federal \\nInsecticide, Fungicide, and Rodenticide Act. \\nThe draft framework describes how EPA would \\napply the mitigations in the Strategy compared \\nto mitigations in the other initiatives. \\nWhat can you do? Submit \\ncomments! Learn more!\\nThe draft herbicide framework and \\naccompanying documents are available in \\ndocket EPA-HQ-OPP-2023-0365 for public \\ncomment for 60 days. Comments are due \\nSeptember 22, 2023. Agricultural pesticide \\nusers are encouraged to learn about EPAs\", 'url': 'https://extension.illinois.edu/sites/default/files/2023-08/IPR%20Volume%2036%20Issue%204%20July%20August%20SECURE.pdf'}, {'base_url': 'https://extension.illinois.edu/newsletters/illinois-pesticide-review-newsletter/julyaugust-2023', 'course_name ': 'cropwizard', 'pagenumber': 3, 'readable_filename': 'extension.pdf', 's3_path': 'courses/cropwizard/extension.pdf', 'text': 'Illinois Pesticide Review \\nVolume 36, No. 4, July/August \\n3\\nIts important to consider that one should \\ntake the test in the language he or she is \\nmost comfortable with. If someone has \\nbeen studying the material in English, dont \\nbe surprised if they opt to take the exam in \\nEnglish too. \\nIn the end, it all comes down to good \\ncommunication between you and your \\nemployees. It could be that they dont know \\nyet which learning method would work best \\nfor them and theyll need to try a few things. \\nTheyll appreciate you taking the time to ask \\nthem and work with them to help ensure their \\nsuccess.\\nMichelle Wiesbrook \\nEPA Releases Draft \\nHerbicide Strategy, Public \\nComment Period Open \\nStrategy aims to increase \\nefficiencies while supporting \\nfarmers, herbicide users with \\ncontinued use of important \\npesticide tools \\nThe U.S. Environmental Protection Agency \\n(EPA) released the draft Herbicide Strategy \\nfor public comment, a major milestone in the \\nAgencys work to protect federally endangered \\nand threatened (listed) species from conven-\\ntional agricultural herbicides. The Strategy \\ndescribes proposed early mitigations for more \\nthan 900 listed species and designated criti-\\ncal habitats to reduce potential impacts from \\nthe agricultural use of these herbicides while \\nhelping to ensure the continued availability of \\nthese important pesticide tools.\\nEnsuring safe use of herbicides is an \\nimportant part of EPAs mission to protect \\nthe environment, said Deputy Assistant \\nAdministrator for Pesticide Programs for \\nthe Office of Chemical Safety and Pollution \\nPrevention Jake Li. This strategy reflects one \\nof our biggest steps to support farmers and \\nother herbicide users with tools for managing \\nweeds, while accelerating EPAs ability to \\nprotect many endangered species that live near \\nagricultural areas.\\nThe Strategy is part of EPAs ongoing efforts \\nto develop a multichemical, multispecies \\napproach toward meeting its obligations \\nunder the Endangered Species Act (ESA). \\nEPAs traditional chemical-by-chemical, \\nspecies-by-species approach to meeting these \\nobligations is slow and costly. As a result, EPA \\nhas completed its ESA obligations for less than \\n5% of its actions, creating legal vulnerabilities \\nfor the Agency, increased litigation, and \\nuncertainty for farmers and other pesticide \\nusers about their continued ability to use many \\npesticides. The Strategy which is primarily \\ndesigned to provide early mitigations that \\nminimize impacts to over 900 listed species \\nis one of EPAs most significant proposals to \\nhelp overcome these challenges.\\nEPA focused the Strategy on agricultural crop \\nuses in the lower 48 states because hundreds \\nof millions of pounds of herbicides (and plant \\ngrowth regulators) are applied each year, \\nwhich is substantially more than for non-\\nagricultural uses of herbicides and for other \\npesticide classes (e.g., insecticides, fungicides). \\nAdditionally, hundreds of listed species in \\nthe lower 48 states live in habitats adjacent to \\nagricultural areas. The proposed mitigations \\nin the Strategy would address the most \\ncommon ways that conventional agricultural \\nherbicides might impact these listed \\nspecies. More specifically, EPA developed \\npotential mitigation options for conventional \\nagricultural herbicides to reduce pesticide \\ntransport via spray drift and runoff/erosion \\nthat could result in exposure to listed plants \\nand listed animals that depend on plants.\\nEPA expects that the Strategy will increase \\nthe efficiency of future ESA consultations \\non herbicides with the U.S. Fish and Wildlife \\nService (FWS), which has authority over most \\nlisted species that could benefit from the', 'url': 'https://extension.illinois.edu/sites/default/files/2023-08/IPR%20Volume%2036%20Issue%204%20July%20August%20SECURE.pdf'}, {'base_url': 'https://extension.illinois.edu/newsletters/illinois-pesticide-review-newsletter/julyaugust-2023', 'course_name ': 'cropwizard', 'pagenumber': '', 'readable_filename': 'News', 's3_path': 'courses/cropwizard/News.html', 'text': \". — The U.S. Environmental Protection Agency, in collaboration with the U.S. Department of Energy, is funding the creation of Thriving Communities Technical Assistance Centers to help rural and underserved communities access state and federal funding for environmental...\\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\n\\n\\nSeptember 1, 2023\\n\\nAddress health challenges with holistic solutions\\n\\n \\n URBANA, Ill. — The University of Illinois, along with the Interdisciplinary Health Sciences Institute, and in collaboration with Illinois Extension, has developed...\\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\n\\n\\nSeptember 1, 2023\\n\\nDo artificial roosts help bats? Illinois experts say more research needed\\n\\n \\n URBANA, Ill.\\xa0— Artificial roosts for bats come in many forms — bat boxes, condos, bark mimics, clay roosts, and cinder block structures, to name a few — but a new conservation practice and policy article from researchers at the\\xa0University of...\\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\n\\n\\nAugust 29, 2023\\n\\nButterflies can eat to live, live to eat in a balanced garden\\n\\n \\n URBANA, Ill. — A favorite thing about visiting gardens in the summer is catching sight of a butterfly enjoying nectar from a brightly colored zinnia or a monarch caterpillar munching on a milkweed leaf. When designing a butterfly garden, expand and balance plant selection to provide more than...\\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\n\\n\\nAugust 24, 2023\\n\\nField Day event plans to keep beef cattle producers up on trends for their herds\\n\\n \\n URBANA, Ill. — Beef cattle producers will gain insights and stay up to date on current research from cow/calf patterns to alternative forages and more at the Orr Beef Research Center's Field Day on September 6.\\xa0The meeting will be held at the John Wood Community College Ag Center located west of...\\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\n\\n\\nAugust 23, 2023\\n\\nBuild drought-tolerant gardens this fall for next summer’s blooms\\n\\n \\n URBANA, Ill. — Many Illinois gardens are entering the fall stressed from the lack of summer rains combined with scorching hot temperatures. These conditions easily stress some plants; however, many plants quickly adapt to hot, dry conditions. Drought-tolerant plants are not only tough and...\\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\n\\n\\nAugust 23, 2023\\n\\nIllinois Extension exhibits research, programs, innovation at 2023 Farm Progress Show\\n\\n \\n DECATUR, Ill. — The Farm Progress Show returns to Decatur, Aug. 29-31, and\\xa0University of Illinois Extension will be on-site in the College of...\\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\n\\n\\nAugust 21, 2023\\n\\nBuild privacy with plants for secret gardens\\n\\n \\n URBANA, Ill.\\xa0— Plants serve a lot of purposes in the landscape. One of which is to add some privacy. Screening plants can help define and give purpose to a space. Homeowners may wish to screen a particular area or transparency in the landscape, creating interest in what lies beyond.\\xa0\\n\\n...\\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\n\\n\\nAugust 21, 2023\\n\\nIllinois Extension investing in the future of farming across the state\\n\\n \\n URBANA, Ill. — Helping Illinois farmers grow thriving crops and livestock has always been at the heart of University of Illinois Extension’s mission. Using feedback received from farmers and other agricultural stakeholders through a 2022 survey,...\\n \\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n \\n\\n\\n\\nAugust 21, 2023\\n\\nExtraordinary 4-H volunteers honored\\n\\n \\n SPRINGFIELD, Ill\", 'url': 'https://extension.illinois.edu/global/news-releases'}, {'base_url': 'https://extension.illinois.edu/newsletters/illinois-pesticide-review-newsletter/julyaugust-2023', 'course_name ': 'cropwizard', 'pagenumber': '', 'readable_filename': 'Extension Funded Research Projects', 's3_path': 'courses/cropwizard/Extension_Funded_Research_Projects.html', 'text': '. Today’s most pressing issues are related to climate change. An understanding of how human activity impacts the environment helps us make sense of how a zoonotic transfer of a virus to humans causes a global pandemic, and how rising temperatures increase the frequency and intensity of wildfires and storms. Environmental Education (EE) connects people to their environments, instilling values and knowledge that leads to better environmental stewardship of local environments and natural resources. There are several well-established EE programs offered throughout Cook County by Illinois\\xa0Extension and the Forest Preserve of Cook County (FPCC). Missing from these opportunities are programs available to middle school-aged children, the age best suited for EE experiences because their environmental sensitivities are still developing and early experiences in nature have been shown to have impacts throughout adulthood (Boom, 2017). This proposal seeks to develop a program, Illinois Inquiry Adventures in Nature (IIAN), for middle school children and their families, suitable for small groups during the pandemic\\xa0and expanding in scope to include classrooms when safe. A series of four seasonal activities\\xa0and teacher workshops\\xa0will be created to bring groups to their local green spaces, including FPCC sites. Groups will engage in open-ended investigations based on their own observations and questions, complete activities at home\\xa0and enact local community conservation projects. Research will be conducted to examine how individuals’ connections to nature and environmental stewardship change over the course of their participation. This program fills a local need in Cook County, creating a continuum of opportunities across ages, and will be made available to all residents in Illinois, and nationwide, encouraging the next generation of environmental leaders.\\n\\n\\nAssessing the Needs and Connecting Young & Beginning Farmers with Extension Resources in Northern Illinois\\nAwarded to: Illinois Extension in the College of ACES\\nPrincipal Investigator: Joseph Malual\\nCo-Investigators:\\nNikki Keltner, Extension program coordinator, Illinois Extension\\nGrant McCarty, Extension educator, Illinois Extension\\nHope Michelson, assistant professor,\\xa0Department of Agricultural & Consumer Economics\\nPROJECT SUMMARY\\nMore and more young people are engaging in small-scale farming, with many focusing on specialty crops and sustainable agricultural production. Despite this trend, entry into farming, which is a complex business, is challenging. Beginning farmers face serious obstacles in accessing critical assets, including startup capital to acquire land, farm equipment\\xa0and agricultural technical knowledge needed to develop a\\xa0successful agricultural practice and profitable business. The situation is complicated by lack of adequate research to understand the unique challenges facing this generation of farmers. In Illinois, there is limited research to understand how people new to farming navigate access to critical resources. This research project aims to provide a comprehensive assessment of the needs and opportunities facing young and beginning\\xa0farmers in northern Illinois. We will identify and map farms owned by young and beginning farmers, examine their experiences and strategies used to leverage critical startup assets, including farmland and equipment, financial capital\\xa0and agricultural technical assistance, as well as strategies for marketing agricultural products. This project will build relations and connect this new audience with Extension resources, which can help\\xa0beginning farmers develop the knowledge and skills necessary for solving critical problems. Through interdisciplinary collaboration between Extension educators and specialists with faculty at the University of Illinois at Urbana-Champaign, this research will generate useful knowledge that can help beginning farmers, businesses\\xa0and communities make informed decisions and plan for future support of those new to farming. The\\xa0knowledge and practices discovered and identified through this project will be shared with Extension across the state. Extension educators can build on this knowledge to plan and deliver educational programming that empowers farmers to develop financially viable and sustainable farms. Those successful endeavors will, in turn, help to revitalize their rural communities.\\n\\n\\nNew Immigrant Foodways\\nAwarded to: Department of History in the College of Liberal Arts and Sciences\\nPrincipal Investigator: Teresa Barnes\\nCo-Investigators:\\nGisela Sin, director, Center for Latin American and Caribbean Studies\\nMargarita Teran-Garcia, Extension specialist, Illinois Extension\\nPROJECT SUMMARY\\nThis project will leverage new and existing research with immigrant communities about challenges and strategies in adapting home foodways to American food systems to create short instructional videos related to nutrition and cooking. The project addresses a complex issue at the intersection of three critical areas of Extension’s mission: food, health\\xa0and environment. It addresses the public need of new immigrant families to access information and expertise and develop sustainable strategies when faced with the bewildering array of often unhealthy food options in the USA', 'url': 'https://extension.illinois.edu/global/extension-funded-research-projects'}]}\n" - ] - } - ], + "outputs": [], "source": [ - "print(course_df[0]['messages'][1])" + "course_list = df['course_name'].unique()\n" ] }, { @@ -461,9 +179,7 @@ "execution_count": 32, "metadata": {}, "outputs": [], - "source": [ - "# user email is in DF, outside of convo" - ] + "source": [] }, { "cell_type": "markdown", @@ -481,36 +197,538 @@ "name": "stdout", "output_type": "stream", "text": [ - "22\n", - "22\n" + "gpt4\n", + "623\n", + "badm_550_ashley\n", + "17\n", + "None\n", + "0\n", + "ece120\n", + "154\n", + "test-video-ingest\n", + "13\n", + "badm-567-v3\n", + "15\n", + "badm-567\n", + "3\n", + "new-weather\n", + "65\n", + "gies-online-mba-v2\n", + "course_name: gies-online-mba-v2\n", + "error: The read operation timed out\n", + "frontend\n", + "8\n", + "test-video-ingest-28\n", + "2\n", + "ECE220FA23\n", + "74\n", + "ECE408FA23\n", + "259\n", + "pdeploy999\n", + "2\n", + "badm-350-summer\n", + "5\n", + "previewtesting1\n", + "1\n", + "localtest2\n", + "2\n", + "your-favorite-url\n", + "1\n", + "mantine\n", + "6\n", + "ece408\n", + "27\n", + "27\n", + "27\n", + "(27, 1536)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-09-12 15:42:49.002 | INFO | nomic.project:_create_project:790 - Creating project `Conversation Map for ece408` in organization `dabholkar.asmita`\n", + "2023-09-12 15:42:50.721 | INFO | nomic.atlas:map_embeddings:108 - Uploading embeddings to Atlas.\n", + "2023-09-12 15:42:50.721 | WARNING | nomic.project:_validate_and_correct_arrow_upload:238 - Replacing 20 null values for field user_email with string 'null'. This behavior will change in a future version.\n", + "2023-09-12 15:42:50.721 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", + "1it [00:02, 2.16s/it]\n", + "2023-09-12 15:42:52.900 | INFO | nomic.project:_add_data:1422 - Upload succeeded.\n", + "2023-09-12 15:42:52.908 | INFO | nomic.atlas:map_embeddings:127 - Embedding upload succeeded.\n", + "2023-09-12 15:42:56.033 | INFO | nomic.project:create_index:1132 - Created map `Conversation Map for ece408` in project `Conversation Map for ece408`: https://atlas.nomic.ai/map/df8e3337-396a-443d-a6f5-8240c66024ac/bc754afd-83fb-43cb-99db-e2bd26f1f40b\n", + "2023-09-12 15:42:56.046 | INFO | nomic.atlas:map_embeddings:140 - Conversation Map for ece408: https://atlas.nomic.ai/map/df8e3337-396a-443d-a6f5-8240c66024ac/bc754afd-83fb-43cb-99db-e2bd26f1f40b\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Conversation Map for ece408: https://atlas.nomic.ai/map/df8e3337-396a-443d-a6f5-8240c66024ac/bc754afd-83fb-43cb-99db-e2bd26f1f40b]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-09-12 15:43:00.951 | INFO | nomic.project:create_index:1132 - Created map `ece408_convo_index` in project `Conversation Map for ece408`: https://atlas.nomic.ai/map/df8e3337-396a-443d-a6f5-8240c66024ac/49bd2ab9-db8a-45ab-b399-5039c7b7e736\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "test-video-ingest-2\n", + "2\n", + "Snowmass\n", + "19\n", + "badm_567_v2\n", + "11\n", + "erpnext\n", + "1\n", + "mip\n", + "1\n", + "farmdoc_test_kastan-v1\n", + "9\n", + "personalstatement\n", + "2\n", + "test-canvas\n", + "4\n", + "hrc\n", + "3\n", + "csv\n", + "4\n", + "star_nox\n", + "3\n", + "badm_567\n", + "3\n", + "SPaRCEd\n", + "2\n", + "NPRE247\n", + "13\n", + "localdemo8\n", + "2\n", + "badm_567_thumbnails\n", + "2\n", + "your-awesome-course\n", + "course_name: your-awesome-course\n", + "error: The read operation timed out\n", + "chatbot\n", + "3\n", + "erp\n", + "2\n", + "extreme\n", + "3\n", + "rohan_atree\n", + "4\n", + "zotero-extreme\n", + "9\n", + "pract\n", + "18\n", + "test-video-ingest-20\n", + "3\n", + "gies-online-mba2\n", + "2\n", + "gies-online-mba\n", + "3\n", + "ece120FL22\n", + "15\n", + "careerassistant\n", + "7\n", + "weather\n", + "4\n", + "lillian-wang-blog\n", + "2\n", + "local-test5\n", + "4\n", + "demo-for-vyriad\n", + "6\n", + "ag-gpt-beta\n", + "5\n", + "rohan_atree_demo\n", + "2\n", + "cropwizard\n", + "25\n", + "25\n", + "25\n", + "(25, 1536)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-09-12 15:43:49.933 | INFO | nomic.project:_create_project:790 - Creating project `Conversation Map for cropwizard` in organization `dabholkar.asmita`\n", + "2023-09-12 15:43:50.980 | INFO | nomic.atlas:map_embeddings:108 - Uploading embeddings to Atlas.\n", + "2023-09-12 15:43:50.980 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", + "1it [00:03, 3.94s/it]\n", + "2023-09-12 15:43:54.938 | INFO | nomic.project:_add_data:1422 - Upload succeeded.\n", + "2023-09-12 15:43:54.953 | INFO | nomic.atlas:map_embeddings:127 - Embedding upload succeeded.\n", + "2023-09-12 15:43:58.534 | INFO | nomic.project:create_index:1132 - Created map `Conversation Map for cropwizard` in project `Conversation Map for cropwizard`: https://atlas.nomic.ai/map/c9b13bcc-d0cb-40a6-80c6-3e98b1bf0bda/53b8076a-7f80-455f-abea-2cf84bc1912c\n", + "2023-09-12 15:43:58.534 | INFO | nomic.atlas:map_embeddings:140 - Conversation Map for cropwizard: https://atlas.nomic.ai/map/c9b13bcc-d0cb-40a6-80c6-3e98b1bf0bda/53b8076a-7f80-455f-abea-2cf84bc1912c\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Conversation Map for cropwizard: https://atlas.nomic.ai/map/c9b13bcc-d0cb-40a6-80c6-3e98b1bf0bda/53b8076a-7f80-455f-abea-2cf84bc1912c]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-09-12 15:44:00.949 | INFO | nomic.project:create_index:1132 - Created map `cropwizard_convo_index` in project `Conversation Map for cropwizard`: https://atlas.nomic.ai/map/c9b13bcc-d0cb-40a6-80c6-3e98b1bf0bda/5eb008c1-5a10-4f20-ab7d-c42a238e1595\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "rtest\n", + "1\n", + "previewdeploy\n", + "3\n", + "r2test\n", + "1\n", + "Law794-TransactionalDraftingAlam\n", + "4\n", + "personal-statement\n", + "2\n", + "rohan_excel\n", + "1\n", + "langchain-python\n", + "1\n", + "langchain\n", + "4\n", + "ncsa-live-demo\n", + "1\n", + "rohan_atree_individual\n", + "2\n", + "meta11-test\n", + "14\n", + "ceesd-mirgecom\n", + "2\n", + "NCSADelta\n", + "10\n", + "HealthyLivingGuide\n", + "3\n", + "rohan\n", + "2\n", + "NCSA\n", + "40\n", + "40\n", + "40\n", + "(40, 1536)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-09-12 15:44:35.409 | INFO | nomic.project:_create_project:790 - Creating project `Conversation Map for NCSA` in organization `dabholkar.asmita`\n", + "2023-09-12 15:44:36.768 | INFO | nomic.atlas:map_embeddings:108 - Uploading embeddings to Atlas.\n", + "2023-09-12 15:44:36.778 | WARNING | nomic.project:_validate_and_correct_arrow_upload:238 - Replacing 10 null values for field user_email with string 'null'. This behavior will change in a future version.\n", + "2023-09-12 15:44:36.783 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", + "1it [00:01, 1.99s/it]\n", + "2023-09-12 15:44:38.783 | INFO | nomic.project:_add_data:1422 - Upload succeeded.\n", + "2023-09-12 15:44:38.784 | INFO | nomic.atlas:map_embeddings:127 - Embedding upload succeeded.\n", + "2023-09-12 15:44:40.137 | INFO | nomic.project:create_index:1132 - Created map `Conversation Map for NCSA` in project `Conversation Map for NCSA`: https://atlas.nomic.ai/map/d2aef24e-2ea4-4712-87c0-804da0ab96b0/7b2238ae-7eb9-407a-ac60-6d5a8fd1f447\n", + "2023-09-12 15:44:40.146 | INFO | nomic.atlas:map_embeddings:140 - Conversation Map for NCSA: https://atlas.nomic.ai/map/d2aef24e-2ea4-4712-87c0-804da0ab96b0/7b2238ae-7eb9-407a-ac60-6d5a8fd1f447\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Conversation Map for NCSA: https://atlas.nomic.ai/map/d2aef24e-2ea4-4712-87c0-804da0ab96b0/7b2238ae-7eb9-407a-ac60-6d5a8fd1f447]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-09-12 15:44:45.686 | INFO | nomic.project:create_index:1132 - Created map `NCSA_convo_index` in project `Conversation Map for NCSA`: https://atlas.nomic.ai/map/d2aef24e-2ea4-4712-87c0-804da0ab96b0/331ba551-f6b4-4c79-a31c-4cc5390bfac7\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "FIN574-GT\n", + "24\n", + "24\n", + "24\n", + "(24, 1536)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-09-12 15:45:00.655 | INFO | nomic.project:_create_project:790 - Creating project `Conversation Map for FIN574-GT` in organization `dabholkar.asmita`\n", + "2023-09-12 15:45:04.369 | INFO | nomic.atlas:map_embeddings:108 - Uploading embeddings to Atlas.\n", + "2023-09-12 15:45:04.385 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", + "1it [00:06, 6.08s/it]\n", + "2023-09-12 15:45:10.475 | INFO | nomic.project:_add_data:1422 - Upload succeeded.\n", + "2023-09-12 15:45:10.475 | INFO | nomic.atlas:map_embeddings:127 - Embedding upload succeeded.\n", + "2023-09-12 15:45:13.721 | INFO | nomic.project:create_index:1132 - Created map `Conversation Map for FIN574-GT` in project `Conversation Map for FIN574-GT`: https://atlas.nomic.ai/map/d83f5440-9ef1-45ed-a2e5-c3229398b0e8/149f6eab-f636-4754-8117-2da7f030c5b3\n", + "2023-09-12 15:45:13.723 | INFO | nomic.atlas:map_embeddings:140 - Conversation Map for FIN574-GT: https://atlas.nomic.ai/map/d83f5440-9ef1-45ed-a2e5-c3229398b0e8/149f6eab-f636-4754-8117-2da7f030c5b3\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Conversation Map for FIN574-GT: https://atlas.nomic.ai/map/d83f5440-9ef1-45ed-a2e5-c3229398b0e8/149f6eab-f636-4754-8117-2da7f030c5b3]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-09-12 15:45:19.164 | INFO | nomic.project:create_index:1132 - Created map `FIN574-GT_convo_index` in project `Conversation Map for FIN574-GT`: https://atlas.nomic.ai/map/d83f5440-9ef1-45ed-a2e5-c3229398b0e8/092d7d2c-b792-4304-ae04-d8f09ffbba5d\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "babreu\n", + "1\n", + "test-video-ingest-31\n", + "1\n", + "p\n", + "1\n", + "new_test_course\n", + "1\n", + "cropwizard-beta\n", + "21\n", + "21\n", + "21\n", + "(21, 1536)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-09-12 15:45:30.549 | INFO | nomic.project:_create_project:790 - Creating project `Conversation Map for cropwizard-beta` in organization `dabholkar.asmita`\n", + "2023-09-12 15:45:32.134 | INFO | nomic.atlas:map_embeddings:108 - Uploading embeddings to Atlas.\n", + "2023-09-12 15:45:32.150 | WARNING | nomic.project:_validate_and_correct_arrow_upload:238 - Replacing 3 null values for field user_email with string 'null'. This behavior will change in a future version.\n", + "2023-09-12 15:45:32.150 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", + "1it [00:00, 1.08it/s]\n", + "2023-09-12 15:45:33.088 | INFO | nomic.project:_add_data:1422 - Upload succeeded.\n", + "2023-09-12 15:45:33.092 | INFO | nomic.atlas:map_embeddings:127 - Embedding upload succeeded.\n", + "2023-09-12 15:45:34.335 | INFO | nomic.project:create_index:1132 - Created map `Conversation Map for cropwizard-beta` in project `Conversation Map for cropwizard-beta`: https://atlas.nomic.ai/map/44b31bc3-726e-4930-9584-616bbcb2d5d3/d3a66bb5-0ab9-4e9c-9fe5-10aa840ce9bd\n", + "2023-09-12 15:45:34.335 | INFO | nomic.atlas:map_embeddings:140 - Conversation Map for cropwizard-beta: https://atlas.nomic.ai/map/44b31bc3-726e-4930-9584-616bbcb2d5d3/d3a66bb5-0ab9-4e9c-9fe5-10aa840ce9bd\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Conversation Map for cropwizard-beta: https://atlas.nomic.ai/map/44b31bc3-726e-4930-9584-616bbcb2d5d3/d3a66bb5-0ab9-4e9c-9fe5-10aa840ce9bd]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-09-12 15:45:37.865 | INFO | nomic.project:create_index:1132 - Created map `cropwizard-beta_convo_index` in project `Conversation Map for cropwizard-beta`: https://atlas.nomic.ai/map/44b31bc3-726e-4930-9584-616bbcb2d5d3/20a567c6-056b-49b3-a421-f0e49f348cda\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "maxlindsey\n", + "1\n", + "Gies-graduate-hub\n", + "4\n", + "test-video-ingest-17\n", + "2\n", + "summary\n", + "10\n", + "test-video-ingest-3\n", + "8\n", + "test-video-ingest-27\n", + "2\n", + "lillian-wang-blog-2\n", + "1\n", + "python-magic\n", + "1\n", + "ansible2\n", + "1\n", + "ece408fa23\n", + "7\n", + "farmdoc_test_josh_v2\n", + "1\n", + "local-test3\n", + "1\n", + "automata\n", + "1\n", + "SpaceFlorida-GT\n", + "5\n", + "GBSI-GT\n", + "4\n", + "test-video-ingest-21\n", + "8\n", + "newnew_ncsa\n", + "1\n", + "canvas\n", + "1\n", + "gbsi-gt\n", + "3\n", + "meditation-tutorial\n", + "1\n", + "profit\n", + "1\n", + "ansible\n", + "8\n", + "langchain-docs\n", + "9\n", + "testing_url_metadata_josh\n", + "1\n", + "test-india-biodiversity\n", + "1\n", + "vyriad\n", + "10\n", + "irc-workplans\n", + "1\n", + "kastanasdfasdfasdf\n", + "1\n", + "testing_refactor\n", + "2\n", + "BADM-567-GT\n", + "3\n", + "mdt\n", + "1\n", + "vercel\n", + "1\n", + "gies-graduate-hub\n", + "course_name: gies-graduate-hub\n", + "error: The read operation timed out\n", + "test-video-ingest-12\n", + "3\n", + "NuclGPT-v1\n", + "2\n", + "test-video-ingest-13\n", + "1\n", + "test_new_supa_scrape\n", + "1\n", + "doe-ascr-2023\n", + "1\n", + "arize\n", + "2\n", + "final-meta-test\n", + "1\n", + "preview-meta-test\n", + "1\n", + "gies-online-mba-V3\n", + "2\n", + "FoF-Drawdown-from-INVPEIV-5-24-23\n", + "1\n", + "test-video-ingest-30\n", + "1\n", + "test\n", + "1\n", + "NCSA-v2\n", + "3\n", + "conversational\n", + "1\n", + "clowder-docs\n", + "5\n", + "DA\n", + "1\n", + "test-video-ingest-25\n", + "1\n", + "test-ingest-10\n", + "1\n", + "eric-test-course\n", + "1\n", + "farmdoc-test\n", + "1\n", + "test-video-ingest-22\n", + "2\n", + "Academic-paper\n", + "1\n", + "starwars\n", + "1\n", + "AdamDemo\n", + "1\n", + "OpenCI-ACCESS\n", + "1\n", + "clockkit-github\n", + "1\n" ] } ], "source": [ - "user_queries = []\n", - "metadata = []\n", - "i = 1\n", + "embeddings_model = OpenAIEmbeddings()\n", "\n", - "# log conversation instead of individual messages\n", - "for index, row in df.iterrows():\n", - " user_email = row['user_email']\n", - " convo = row['convo']\n", - " messages = convo['messages']\n", - " first_message = messages[0]['content']\n", - " user_queries.append(first_message)\n", - " # create metadata for multi-turn conversation\n", - " conversation = \"\"\n", - " for message in messages:\n", - " # string of role: content, role: content, ...\n", - " conversation += \"\\n>>> \" + message['role'] + \": \" + message['content'] + \"\\n\"\n", - " # add to metadata\n", - " metadata_row = {\"course\": row['course_name'], \"conversation\": conversation, \"conversation_id\": convo['id'], \n", - " \"id\": i, \"user_email\": user_email, \"first_query\": first_message}\n", - " metadata.append(metadata_row)\n", - " i += 1\n", + "for course in course_list:\n", + " print(course)\n", + " try:\n", + " response = supabase_client.table(\"llm-convo-monitor\").select(\"*\").eq('course_name', course).execute()\n", + " data = response.data\n", + " course_df = pd.DataFrame(data)\n", + " print(len(course_df))\n", + "\n", + " if len(course_df) < 20 or course in ['gpt4', 'badm_550_ashley', 'ece120', 'new-weather', 'ECE220FA23', 'ECE408FA23']:\n", + " continue\n", + " else:\n", + " \n", + " user_queries = []\n", + " metadata = []\n", + " i = 1\n", "\n", - "print(len(user_queries))\n", - "print(len(metadata))" + " # log conversation instead of individual messages\n", + " for index, row in course_df.iterrows():\n", + " user_email = row['user_email']\n", + " convo = row['convo']\n", + " messages = convo['messages']\n", + " first_message = messages[0]['content']\n", + " user_queries.append(first_message)\n", + " # create metadata for multi-turn conversation\n", + " conversation = \"\"\n", + " for message in messages:\n", + " # string of role: content, role: content, ...\n", + " if message['role'] == 'user':\n", + " emoji = \"🙋\"\n", + " else:\n", + " emoji = \"🤖\"\n", + " conversation += \"\\n>>> \" + emoji + message['role'] + \": \" + message['content'] + \"\\n\"\n", + " # add to metadata\n", + " metadata_row = {\"course\": row['course_name'], \"conversation\": conversation, \"conversation_id\": convo['id'], \n", + " \"id\": i, \"user_email\": user_email, \"first_query\": first_message}\n", + " metadata.append(metadata_row)\n", + " i += 1\n", + "\n", + " print(len(user_queries))\n", + " print(len(metadata))\n", + "\n", + " metadata = pd.DataFrame(metadata)\n", + " embeddings = embeddings_model.embed_documents(user_queries)\n", + " embeddings = np.array(embeddings)\n", + " print(embeddings.shape)\n", + "\n", + " # create an Atlas project\n", + " project_name = \"Conversation Map for \" + course\n", + " index_name = course + \"_convo_index\"\n", + " project = atlas.map_embeddings(embeddings=np.array(embeddings),\n", + " data=metadata,\n", + " id_field='id',\n", + " build_topic_model=True,\n", + " topic_label_field='first_query',\n", + " name=project_name,\n", + " colorable_fields=['conversation_id', 'first_query'])\n", + " print(project.maps)\n", + "\n", + " project.create_index(index_name, build_topic_model=True)\n", + " except Exception as e:\n", + " print(\"course_name:\", course)\n", + " print(\"error: \", e)\n", + " " ] }, { @@ -602,7 +820,7 @@ "source": [ "# create an Atlas project\n", "project_name = \"Conversation Map for \" + course\n", - "index_name = course + \"_convo_index_2\"\n", + "index_name = course + \"_convo_index\"\n", "project = atlas.map_embeddings(embeddings=np.array(embeddings),\n", " data=metadata,\n", " id_field='id',\n", @@ -615,6 +833,68 @@ "project.create_index(index_name, build_topic_model=True)" ] }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "ename": "ReadTimeout", + "evalue": "The read operation timed out", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mTimeoutError\u001b[0m Traceback (most recent call last)", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_exceptions.py:10\u001b[0m, in \u001b[0;36mmap_exceptions\u001b[1;34m(map)\u001b[0m\n\u001b[0;32m 9\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m---> 10\u001b[0m \u001b[39myield\u001b[39;00m\n\u001b[0;32m 11\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m \u001b[39mas\u001b[39;00m exc: \u001b[39m# noqa: PIE786\u001b[39;00m\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\backends\\sync.py:28\u001b[0m, in \u001b[0;36mSyncStream.read\u001b[1;34m(self, max_bytes, timeout)\u001b[0m\n\u001b[0;32m 27\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_sock\u001b[39m.\u001b[39msettimeout(timeout)\n\u001b[1;32m---> 28\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_sock\u001b[39m.\u001b[39;49mrecv(max_bytes)\n", + "File \u001b[1;32mC:\\Program Files\\WindowsApps\\PythonSoftwareFoundation.Python.3.10_3.10.3056.0_x64__qbz5n2kfra8p0\\lib\\ssl.py:1259\u001b[0m, in \u001b[0;36mSSLSocket.recv\u001b[1;34m(self, buflen, flags)\u001b[0m\n\u001b[0;32m 1256\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\n\u001b[0;32m 1257\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mnon-zero flags not allowed in calls to recv() on \u001b[39m\u001b[39m%s\u001b[39;00m\u001b[39m\"\u001b[39m \u001b[39m%\u001b[39m\n\u001b[0;32m 1258\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m\u001b[39m__class__\u001b[39m)\n\u001b[1;32m-> 1259\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mread(buflen)\n\u001b[0;32m 1260\u001b[0m \u001b[39melse\u001b[39;00m:\n", + "File \u001b[1;32mC:\\Program Files\\WindowsApps\\PythonSoftwareFoundation.Python.3.10_3.10.3056.0_x64__qbz5n2kfra8p0\\lib\\ssl.py:1132\u001b[0m, in \u001b[0;36mSSLSocket.read\u001b[1;34m(self, len, buffer)\u001b[0m\n\u001b[0;32m 1131\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m-> 1132\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_sslobj\u001b[39m.\u001b[39;49mread(\u001b[39mlen\u001b[39;49m)\n\u001b[0;32m 1133\u001b[0m \u001b[39mexcept\u001b[39;00m SSLError \u001b[39mas\u001b[39;00m x:\n", + "\u001b[1;31mTimeoutError\u001b[0m: The read operation timed out", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001b[1;31mReadTimeout\u001b[0m Traceback (most recent call last)", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_transports\\default.py:60\u001b[0m, in \u001b[0;36mmap_httpcore_exceptions\u001b[1;34m()\u001b[0m\n\u001b[0;32m 59\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m---> 60\u001b[0m \u001b[39myield\u001b[39;00m\n\u001b[0;32m 61\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m \u001b[39mas\u001b[39;00m exc: \u001b[39m# noqa: PIE-786\u001b[39;00m\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_transports\\default.py:218\u001b[0m, in \u001b[0;36mHTTPTransport.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 217\u001b[0m \u001b[39mwith\u001b[39;00m map_httpcore_exceptions():\n\u001b[1;32m--> 218\u001b[0m resp \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_pool\u001b[39m.\u001b[39;49mhandle_request(req)\n\u001b[0;32m 220\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39misinstance\u001b[39m(resp\u001b[39m.\u001b[39mstream, typing\u001b[39m.\u001b[39mIterable)\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_sync\\connection_pool.py:253\u001b[0m, in \u001b[0;36mConnectionPool.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 252\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mresponse_closed(status)\n\u001b[1;32m--> 253\u001b[0m \u001b[39mraise\u001b[39;00m exc\n\u001b[0;32m 254\u001b[0m \u001b[39melse\u001b[39;00m:\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_sync\\connection_pool.py:237\u001b[0m, in \u001b[0;36mConnectionPool.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 236\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m--> 237\u001b[0m response \u001b[39m=\u001b[39m connection\u001b[39m.\u001b[39;49mhandle_request(request)\n\u001b[0;32m 238\u001b[0m \u001b[39mexcept\u001b[39;00m ConnectionNotAvailable:\n\u001b[0;32m 239\u001b[0m \u001b[39m# The ConnectionNotAvailable exception is a special case, that\u001b[39;00m\n\u001b[0;32m 240\u001b[0m \u001b[39m# indicates we need to retry the request on a new connection.\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 244\u001b[0m \u001b[39m# might end up as an HTTP/2 connection, but which actually ends\u001b[39;00m\n\u001b[0;32m 245\u001b[0m \u001b[39m# up as HTTP/1.1.\u001b[39;00m\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_sync\\connection.py:90\u001b[0m, in \u001b[0;36mHTTPConnection.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 88\u001b[0m \u001b[39mraise\u001b[39;00m ConnectionNotAvailable()\n\u001b[1;32m---> 90\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_connection\u001b[39m.\u001b[39;49mhandle_request(request)\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_sync\\http11.py:112\u001b[0m, in \u001b[0;36mHTTP11Connection.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 111\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_response_closed()\n\u001b[1;32m--> 112\u001b[0m \u001b[39mraise\u001b[39;00m exc\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_sync\\http11.py:91\u001b[0m, in \u001b[0;36mHTTP11Connection.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 83\u001b[0m \u001b[39mwith\u001b[39;00m Trace(\n\u001b[0;32m 84\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mhttp11.receive_response_headers\u001b[39m\u001b[39m\"\u001b[39m, request, kwargs\n\u001b[0;32m 85\u001b[0m ) \u001b[39mas\u001b[39;00m trace:\n\u001b[0;32m 86\u001b[0m (\n\u001b[0;32m 87\u001b[0m http_version,\n\u001b[0;32m 88\u001b[0m status,\n\u001b[0;32m 89\u001b[0m reason_phrase,\n\u001b[0;32m 90\u001b[0m headers,\n\u001b[1;32m---> 91\u001b[0m ) \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_receive_response_headers(\u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n\u001b[0;32m 92\u001b[0m trace\u001b[39m.\u001b[39mreturn_value \u001b[39m=\u001b[39m (\n\u001b[0;32m 93\u001b[0m http_version,\n\u001b[0;32m 94\u001b[0m status,\n\u001b[0;32m 95\u001b[0m reason_phrase,\n\u001b[0;32m 96\u001b[0m headers,\n\u001b[0;32m 97\u001b[0m )\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_sync\\http11.py:155\u001b[0m, in \u001b[0;36mHTTP11Connection._receive_response_headers\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 154\u001b[0m \u001b[39mwhile\u001b[39;00m \u001b[39mTrue\u001b[39;00m:\n\u001b[1;32m--> 155\u001b[0m event \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_receive_event(timeout\u001b[39m=\u001b[39;49mtimeout)\n\u001b[0;32m 156\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(event, h11\u001b[39m.\u001b[39mResponse):\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_sync\\http11.py:191\u001b[0m, in \u001b[0;36mHTTP11Connection._receive_event\u001b[1;34m(self, timeout)\u001b[0m\n\u001b[0;32m 190\u001b[0m \u001b[39mif\u001b[39;00m event \u001b[39mis\u001b[39;00m h11\u001b[39m.\u001b[39mNEED_DATA:\n\u001b[1;32m--> 191\u001b[0m data \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_network_stream\u001b[39m.\u001b[39;49mread(\n\u001b[0;32m 192\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mREAD_NUM_BYTES, timeout\u001b[39m=\u001b[39;49mtimeout\n\u001b[0;32m 193\u001b[0m )\n\u001b[0;32m 195\u001b[0m \u001b[39m# If we feed this case through h11 we'll raise an exception like:\u001b[39;00m\n\u001b[0;32m 196\u001b[0m \u001b[39m#\u001b[39;00m\n\u001b[0;32m 197\u001b[0m \u001b[39m# httpcore.RemoteProtocolError: can't handle event type\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 201\u001b[0m \u001b[39m# perspective. Instead we handle this case distinctly and treat\u001b[39;00m\n\u001b[0;32m 202\u001b[0m \u001b[39m# it as a ConnectError.\u001b[39;00m\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\backends\\sync.py:26\u001b[0m, in \u001b[0;36mSyncStream.read\u001b[1;34m(self, max_bytes, timeout)\u001b[0m\n\u001b[0;32m 25\u001b[0m exc_map: ExceptionMapping \u001b[39m=\u001b[39m {socket\u001b[39m.\u001b[39mtimeout: ReadTimeout, \u001b[39mOSError\u001b[39;00m: ReadError}\n\u001b[1;32m---> 26\u001b[0m \u001b[39mwith\u001b[39;00m map_exceptions(exc_map):\n\u001b[0;32m 27\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_sock\u001b[39m.\u001b[39msettimeout(timeout)\n", + "File \u001b[1;32mC:\\Program Files\\WindowsApps\\PythonSoftwareFoundation.Python.3.10_3.10.3056.0_x64__qbz5n2kfra8p0\\lib\\contextlib.py:153\u001b[0m, in \u001b[0;36m_GeneratorContextManager.__exit__\u001b[1;34m(self, typ, value, traceback)\u001b[0m\n\u001b[0;32m 152\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m--> 153\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mgen\u001b[39m.\u001b[39;49mthrow(typ, value, traceback)\n\u001b[0;32m 154\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mStopIteration\u001b[39;00m \u001b[39mas\u001b[39;00m exc:\n\u001b[0;32m 155\u001b[0m \u001b[39m# Suppress StopIteration *unless* it's the same exception that\u001b[39;00m\n\u001b[0;32m 156\u001b[0m \u001b[39m# was passed to throw(). This prevents a StopIteration\u001b[39;00m\n\u001b[0;32m 157\u001b[0m \u001b[39m# raised inside the \"with\" statement from being suppressed.\u001b[39;00m\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_exceptions.py:14\u001b[0m, in \u001b[0;36mmap_exceptions\u001b[1;34m(map)\u001b[0m\n\u001b[0;32m 13\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(exc, from_exc):\n\u001b[1;32m---> 14\u001b[0m \u001b[39mraise\u001b[39;00m to_exc(exc)\n\u001b[0;32m 15\u001b[0m \u001b[39mraise\u001b[39;00m\n", + "\u001b[1;31mReadTimeout\u001b[0m: The read operation timed out", + "\nThe above exception was the direct cause of the following exception:\n", + "\u001b[1;31mReadTimeout\u001b[0m Traceback (most recent call last)", + "\u001b[1;32mf:\\MSIM\\ML_Projects\\ai-ta-backend\\ai_ta_backend\\nomic_map_creation.ipynb Cell 13\u001b[0m line \u001b[0;36m1\n\u001b[1;32m----> 1\u001b[0m response \u001b[39m=\u001b[39m supabase_client\u001b[39m.\u001b[39;49mtable(\u001b[39m\"\u001b[39;49m\u001b[39mllm-convo-monitor\u001b[39;49m\u001b[39m\"\u001b[39;49m)\u001b[39m.\u001b[39;49mselect(\u001b[39m\"\u001b[39;49m\u001b[39m*\u001b[39;49m\u001b[39m\"\u001b[39;49m)\u001b[39m.\u001b[39;49mexecute()\n\u001b[0;32m 2\u001b[0m data \u001b[39m=\u001b[39m response\u001b[39m.\u001b[39mdata\n\u001b[0;32m 3\u001b[0m df \u001b[39m=\u001b[39m pd\u001b[39m.\u001b[39mDataFrame(data)\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\postgrest\\_sync\\request_builder.py:55\u001b[0m, in \u001b[0;36mSyncQueryRequestBuilder.execute\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 43\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mexecute\u001b[39m(\u001b[39mself\u001b[39m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m APIResponse:\n\u001b[0;32m 44\u001b[0m \u001b[39m \u001b[39m\u001b[39m\"\"\"Execute the query.\u001b[39;00m\n\u001b[0;32m 45\u001b[0m \n\u001b[0;32m 46\u001b[0m \u001b[39m .. tip::\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 53\u001b[0m \u001b[39m :class:`APIError` If the API raised an error.\u001b[39;00m\n\u001b[0;32m 54\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[1;32m---> 55\u001b[0m r \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49msession\u001b[39m.\u001b[39;49mrequest(\n\u001b[0;32m 56\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mhttp_method,\n\u001b[0;32m 57\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mpath,\n\u001b[0;32m 58\u001b[0m json\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mjson,\n\u001b[0;32m 59\u001b[0m params\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mparams,\n\u001b[0;32m 60\u001b[0m headers\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mheaders,\n\u001b[0;32m 61\u001b[0m )\n\u001b[0;32m 62\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 63\u001b[0m \u001b[39mif\u001b[39;00m (\n\u001b[0;32m 64\u001b[0m \u001b[39m200\u001b[39m \u001b[39m<\u001b[39m\u001b[39m=\u001b[39m r\u001b[39m.\u001b[39mstatus_code \u001b[39m<\u001b[39m\u001b[39m=\u001b[39m \u001b[39m299\u001b[39m\n\u001b[0;32m 65\u001b[0m ): \u001b[39m# Response.ok from JS (https://developer.mozilla.org/en-US/docs/Web/API/Response/ok)\u001b[39;00m\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_client.py:821\u001b[0m, in \u001b[0;36mClient.request\u001b[1;34m(self, method, url, content, data, files, json, params, headers, cookies, auth, follow_redirects, timeout, extensions)\u001b[0m\n\u001b[0;32m 806\u001b[0m warnings\u001b[39m.\u001b[39mwarn(message, \u001b[39mDeprecationWarning\u001b[39;00m)\n\u001b[0;32m 808\u001b[0m request \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mbuild_request(\n\u001b[0;32m 809\u001b[0m method\u001b[39m=\u001b[39mmethod,\n\u001b[0;32m 810\u001b[0m url\u001b[39m=\u001b[39murl,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 819\u001b[0m extensions\u001b[39m=\u001b[39mextensions,\n\u001b[0;32m 820\u001b[0m )\n\u001b[1;32m--> 821\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49msend(request, auth\u001b[39m=\u001b[39;49mauth, follow_redirects\u001b[39m=\u001b[39;49mfollow_redirects)\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_client.py:908\u001b[0m, in \u001b[0;36mClient.send\u001b[1;34m(self, request, stream, auth, follow_redirects)\u001b[0m\n\u001b[0;32m 900\u001b[0m follow_redirects \u001b[39m=\u001b[39m (\n\u001b[0;32m 901\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mfollow_redirects\n\u001b[0;32m 902\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(follow_redirects, UseClientDefault)\n\u001b[0;32m 903\u001b[0m \u001b[39melse\u001b[39;00m follow_redirects\n\u001b[0;32m 904\u001b[0m )\n\u001b[0;32m 906\u001b[0m auth \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_build_request_auth(request, auth)\n\u001b[1;32m--> 908\u001b[0m response \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_send_handling_auth(\n\u001b[0;32m 909\u001b[0m request,\n\u001b[0;32m 910\u001b[0m auth\u001b[39m=\u001b[39;49mauth,\n\u001b[0;32m 911\u001b[0m follow_redirects\u001b[39m=\u001b[39;49mfollow_redirects,\n\u001b[0;32m 912\u001b[0m history\u001b[39m=\u001b[39;49m[],\n\u001b[0;32m 913\u001b[0m )\n\u001b[0;32m 914\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 915\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m stream:\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_client.py:936\u001b[0m, in \u001b[0;36mClient._send_handling_auth\u001b[1;34m(self, request, auth, follow_redirects, history)\u001b[0m\n\u001b[0;32m 933\u001b[0m request \u001b[39m=\u001b[39m \u001b[39mnext\u001b[39m(auth_flow)\n\u001b[0;32m 935\u001b[0m \u001b[39mwhile\u001b[39;00m \u001b[39mTrue\u001b[39;00m:\n\u001b[1;32m--> 936\u001b[0m response \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_send_handling_redirects(\n\u001b[0;32m 937\u001b[0m request,\n\u001b[0;32m 938\u001b[0m follow_redirects\u001b[39m=\u001b[39;49mfollow_redirects,\n\u001b[0;32m 939\u001b[0m history\u001b[39m=\u001b[39;49mhistory,\n\u001b[0;32m 940\u001b[0m )\n\u001b[0;32m 941\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 942\u001b[0m \u001b[39mtry\u001b[39;00m:\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_client.py:973\u001b[0m, in \u001b[0;36mClient._send_handling_redirects\u001b[1;34m(self, request, follow_redirects, history)\u001b[0m\n\u001b[0;32m 970\u001b[0m \u001b[39mfor\u001b[39;00m hook \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_event_hooks[\u001b[39m\"\u001b[39m\u001b[39mrequest\u001b[39m\u001b[39m\"\u001b[39m]:\n\u001b[0;32m 971\u001b[0m hook(request)\n\u001b[1;32m--> 973\u001b[0m response \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_send_single_request(request)\n\u001b[0;32m 974\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 975\u001b[0m \u001b[39mfor\u001b[39;00m hook \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_event_hooks[\u001b[39m\"\u001b[39m\u001b[39mresponse\u001b[39m\u001b[39m\"\u001b[39m]:\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_client.py:1009\u001b[0m, in \u001b[0;36mClient._send_single_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 1004\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mRuntimeError\u001b[39;00m(\n\u001b[0;32m 1005\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mAttempted to send an async request with a sync Client instance.\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m 1006\u001b[0m )\n\u001b[0;32m 1008\u001b[0m \u001b[39mwith\u001b[39;00m request_context(request\u001b[39m=\u001b[39mrequest):\n\u001b[1;32m-> 1009\u001b[0m response \u001b[39m=\u001b[39m transport\u001b[39m.\u001b[39;49mhandle_request(request)\n\u001b[0;32m 1011\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39misinstance\u001b[39m(response\u001b[39m.\u001b[39mstream, SyncByteStream)\n\u001b[0;32m 1013\u001b[0m response\u001b[39m.\u001b[39mrequest \u001b[39m=\u001b[39m request\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_transports\\default.py:217\u001b[0m, in \u001b[0;36mHTTPTransport.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 203\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39misinstance\u001b[39m(request\u001b[39m.\u001b[39mstream, SyncByteStream)\n\u001b[0;32m 205\u001b[0m req \u001b[39m=\u001b[39m httpcore\u001b[39m.\u001b[39mRequest(\n\u001b[0;32m 206\u001b[0m method\u001b[39m=\u001b[39mrequest\u001b[39m.\u001b[39mmethod,\n\u001b[0;32m 207\u001b[0m url\u001b[39m=\u001b[39mhttpcore\u001b[39m.\u001b[39mURL(\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 215\u001b[0m extensions\u001b[39m=\u001b[39mrequest\u001b[39m.\u001b[39mextensions,\n\u001b[0;32m 216\u001b[0m )\n\u001b[1;32m--> 217\u001b[0m \u001b[39mwith\u001b[39;00m map_httpcore_exceptions():\n\u001b[0;32m 218\u001b[0m resp \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_pool\u001b[39m.\u001b[39mhandle_request(req)\n\u001b[0;32m 220\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39misinstance\u001b[39m(resp\u001b[39m.\u001b[39mstream, typing\u001b[39m.\u001b[39mIterable)\n", + "File \u001b[1;32mC:\\Program Files\\WindowsApps\\PythonSoftwareFoundation.Python.3.10_3.10.3056.0_x64__qbz5n2kfra8p0\\lib\\contextlib.py:153\u001b[0m, in \u001b[0;36m_GeneratorContextManager.__exit__\u001b[1;34m(self, typ, value, traceback)\u001b[0m\n\u001b[0;32m 151\u001b[0m value \u001b[39m=\u001b[39m typ()\n\u001b[0;32m 152\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m--> 153\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mgen\u001b[39m.\u001b[39;49mthrow(typ, value, traceback)\n\u001b[0;32m 154\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mStopIteration\u001b[39;00m \u001b[39mas\u001b[39;00m exc:\n\u001b[0;32m 155\u001b[0m \u001b[39m# Suppress StopIteration *unless* it's the same exception that\u001b[39;00m\n\u001b[0;32m 156\u001b[0m \u001b[39m# was passed to throw(). This prevents a StopIteration\u001b[39;00m\n\u001b[0;32m 157\u001b[0m \u001b[39m# raised inside the \"with\" statement from being suppressed.\u001b[39;00m\n\u001b[0;32m 158\u001b[0m \u001b[39mreturn\u001b[39;00m exc \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m value\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_transports\\default.py:77\u001b[0m, in \u001b[0;36mmap_httpcore_exceptions\u001b[1;34m()\u001b[0m\n\u001b[0;32m 74\u001b[0m \u001b[39mraise\u001b[39;00m\n\u001b[0;32m 76\u001b[0m message \u001b[39m=\u001b[39m \u001b[39mstr\u001b[39m(exc)\n\u001b[1;32m---> 77\u001b[0m \u001b[39mraise\u001b[39;00m mapped_exc(message) \u001b[39mfrom\u001b[39;00m \u001b[39mexc\u001b[39;00m\n", + "\u001b[1;31mReadTimeout\u001b[0m: The read operation timed out" + ] + } + ], + "source": [ + "response = supabase_client.table(\"llm-convo-monitor\").select(\"*\").execute()\n", + "data = response.data\n", + "df = pd.DataFrame(data)\n", + "len(df)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "markdown", "metadata": {}, @@ -765,14 +1045,49 @@ }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 6, "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "1571\n" + "ename": "ReadTimeout", + "evalue": "The read operation timed out", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mTimeoutError\u001b[0m Traceback (most recent call last)", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_exceptions.py:10\u001b[0m, in \u001b[0;36mmap_exceptions\u001b[1;34m(map)\u001b[0m\n\u001b[0;32m 9\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m---> 10\u001b[0m \u001b[39myield\u001b[39;00m\n\u001b[0;32m 11\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m \u001b[39mas\u001b[39;00m exc: \u001b[39m# noqa: PIE786\u001b[39;00m\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\backends\\sync.py:28\u001b[0m, in \u001b[0;36mSyncStream.read\u001b[1;34m(self, max_bytes, timeout)\u001b[0m\n\u001b[0;32m 27\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_sock\u001b[39m.\u001b[39msettimeout(timeout)\n\u001b[1;32m---> 28\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_sock\u001b[39m.\u001b[39;49mrecv(max_bytes)\n", + "File \u001b[1;32mC:\\Program Files\\WindowsApps\\PythonSoftwareFoundation.Python.3.10_3.10.3056.0_x64__qbz5n2kfra8p0\\lib\\ssl.py:1259\u001b[0m, in \u001b[0;36mSSLSocket.recv\u001b[1;34m(self, buflen, flags)\u001b[0m\n\u001b[0;32m 1256\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\n\u001b[0;32m 1257\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mnon-zero flags not allowed in calls to recv() on \u001b[39m\u001b[39m%s\u001b[39;00m\u001b[39m\"\u001b[39m \u001b[39m%\u001b[39m\n\u001b[0;32m 1258\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m\u001b[39m__class__\u001b[39m)\n\u001b[1;32m-> 1259\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mread(buflen)\n\u001b[0;32m 1260\u001b[0m \u001b[39melse\u001b[39;00m:\n", + "File \u001b[1;32mC:\\Program Files\\WindowsApps\\PythonSoftwareFoundation.Python.3.10_3.10.3056.0_x64__qbz5n2kfra8p0\\lib\\ssl.py:1132\u001b[0m, in \u001b[0;36mSSLSocket.read\u001b[1;34m(self, len, buffer)\u001b[0m\n\u001b[0;32m 1131\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m-> 1132\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_sslobj\u001b[39m.\u001b[39;49mread(\u001b[39mlen\u001b[39;49m)\n\u001b[0;32m 1133\u001b[0m \u001b[39mexcept\u001b[39;00m SSLError \u001b[39mas\u001b[39;00m x:\n", + "\u001b[1;31mTimeoutError\u001b[0m: The read operation timed out", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001b[1;31mReadTimeout\u001b[0m Traceback (most recent call last)", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_transports\\default.py:60\u001b[0m, in \u001b[0;36mmap_httpcore_exceptions\u001b[1;34m()\u001b[0m\n\u001b[0;32m 59\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m---> 60\u001b[0m \u001b[39myield\u001b[39;00m\n\u001b[0;32m 61\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m \u001b[39mas\u001b[39;00m exc: \u001b[39m# noqa: PIE-786\u001b[39;00m\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_transports\\default.py:218\u001b[0m, in \u001b[0;36mHTTPTransport.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 217\u001b[0m \u001b[39mwith\u001b[39;00m map_httpcore_exceptions():\n\u001b[1;32m--> 218\u001b[0m resp \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_pool\u001b[39m.\u001b[39;49mhandle_request(req)\n\u001b[0;32m 220\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39misinstance\u001b[39m(resp\u001b[39m.\u001b[39mstream, typing\u001b[39m.\u001b[39mIterable)\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_sync\\connection_pool.py:253\u001b[0m, in \u001b[0;36mConnectionPool.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 252\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mresponse_closed(status)\n\u001b[1;32m--> 253\u001b[0m \u001b[39mraise\u001b[39;00m exc\n\u001b[0;32m 254\u001b[0m \u001b[39melse\u001b[39;00m:\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_sync\\connection_pool.py:237\u001b[0m, in \u001b[0;36mConnectionPool.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 236\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m--> 237\u001b[0m response \u001b[39m=\u001b[39m connection\u001b[39m.\u001b[39;49mhandle_request(request)\n\u001b[0;32m 238\u001b[0m \u001b[39mexcept\u001b[39;00m ConnectionNotAvailable:\n\u001b[0;32m 239\u001b[0m \u001b[39m# The ConnectionNotAvailable exception is a special case, that\u001b[39;00m\n\u001b[0;32m 240\u001b[0m \u001b[39m# indicates we need to retry the request on a new connection.\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 244\u001b[0m \u001b[39m# might end up as an HTTP/2 connection, but which actually ends\u001b[39;00m\n\u001b[0;32m 245\u001b[0m \u001b[39m# up as HTTP/1.1.\u001b[39;00m\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_sync\\connection.py:90\u001b[0m, in \u001b[0;36mHTTPConnection.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 88\u001b[0m \u001b[39mraise\u001b[39;00m ConnectionNotAvailable()\n\u001b[1;32m---> 90\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_connection\u001b[39m.\u001b[39;49mhandle_request(request)\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_sync\\http11.py:112\u001b[0m, in \u001b[0;36mHTTP11Connection.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 111\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_response_closed()\n\u001b[1;32m--> 112\u001b[0m \u001b[39mraise\u001b[39;00m exc\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_sync\\http11.py:91\u001b[0m, in \u001b[0;36mHTTP11Connection.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 83\u001b[0m \u001b[39mwith\u001b[39;00m Trace(\n\u001b[0;32m 84\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mhttp11.receive_response_headers\u001b[39m\u001b[39m\"\u001b[39m, request, kwargs\n\u001b[0;32m 85\u001b[0m ) \u001b[39mas\u001b[39;00m trace:\n\u001b[0;32m 86\u001b[0m (\n\u001b[0;32m 87\u001b[0m http_version,\n\u001b[0;32m 88\u001b[0m status,\n\u001b[0;32m 89\u001b[0m reason_phrase,\n\u001b[0;32m 90\u001b[0m headers,\n\u001b[1;32m---> 91\u001b[0m ) \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_receive_response_headers(\u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n\u001b[0;32m 92\u001b[0m trace\u001b[39m.\u001b[39mreturn_value \u001b[39m=\u001b[39m (\n\u001b[0;32m 93\u001b[0m http_version,\n\u001b[0;32m 94\u001b[0m status,\n\u001b[0;32m 95\u001b[0m reason_phrase,\n\u001b[0;32m 96\u001b[0m headers,\n\u001b[0;32m 97\u001b[0m )\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_sync\\http11.py:155\u001b[0m, in \u001b[0;36mHTTP11Connection._receive_response_headers\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 154\u001b[0m \u001b[39mwhile\u001b[39;00m \u001b[39mTrue\u001b[39;00m:\n\u001b[1;32m--> 155\u001b[0m event \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_receive_event(timeout\u001b[39m=\u001b[39;49mtimeout)\n\u001b[0;32m 156\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(event, h11\u001b[39m.\u001b[39mResponse):\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_sync\\http11.py:191\u001b[0m, in \u001b[0;36mHTTP11Connection._receive_event\u001b[1;34m(self, timeout)\u001b[0m\n\u001b[0;32m 190\u001b[0m \u001b[39mif\u001b[39;00m event \u001b[39mis\u001b[39;00m h11\u001b[39m.\u001b[39mNEED_DATA:\n\u001b[1;32m--> 191\u001b[0m data \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_network_stream\u001b[39m.\u001b[39;49mread(\n\u001b[0;32m 192\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mREAD_NUM_BYTES, timeout\u001b[39m=\u001b[39;49mtimeout\n\u001b[0;32m 193\u001b[0m )\n\u001b[0;32m 195\u001b[0m \u001b[39m# If we feed this case through h11 we'll raise an exception like:\u001b[39;00m\n\u001b[0;32m 196\u001b[0m \u001b[39m#\u001b[39;00m\n\u001b[0;32m 197\u001b[0m \u001b[39m# httpcore.RemoteProtocolError: can't handle event type\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 201\u001b[0m \u001b[39m# perspective. Instead we handle this case distinctly and treat\u001b[39;00m\n\u001b[0;32m 202\u001b[0m \u001b[39m# it as a ConnectError.\u001b[39;00m\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\backends\\sync.py:26\u001b[0m, in \u001b[0;36mSyncStream.read\u001b[1;34m(self, max_bytes, timeout)\u001b[0m\n\u001b[0;32m 25\u001b[0m exc_map: ExceptionMapping \u001b[39m=\u001b[39m {socket\u001b[39m.\u001b[39mtimeout: ReadTimeout, \u001b[39mOSError\u001b[39;00m: ReadError}\n\u001b[1;32m---> 26\u001b[0m \u001b[39mwith\u001b[39;00m map_exceptions(exc_map):\n\u001b[0;32m 27\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_sock\u001b[39m.\u001b[39msettimeout(timeout)\n", + "File \u001b[1;32mC:\\Program Files\\WindowsApps\\PythonSoftwareFoundation.Python.3.10_3.10.3056.0_x64__qbz5n2kfra8p0\\lib\\contextlib.py:153\u001b[0m, in \u001b[0;36m_GeneratorContextManager.__exit__\u001b[1;34m(self, typ, value, traceback)\u001b[0m\n\u001b[0;32m 152\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m--> 153\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mgen\u001b[39m.\u001b[39;49mthrow(typ, value, traceback)\n\u001b[0;32m 154\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mStopIteration\u001b[39;00m \u001b[39mas\u001b[39;00m exc:\n\u001b[0;32m 155\u001b[0m \u001b[39m# Suppress StopIteration *unless* it's the same exception that\u001b[39;00m\n\u001b[0;32m 156\u001b[0m \u001b[39m# was passed to throw(). This prevents a StopIteration\u001b[39;00m\n\u001b[0;32m 157\u001b[0m \u001b[39m# raised inside the \"with\" statement from being suppressed.\u001b[39;00m\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_exceptions.py:14\u001b[0m, in \u001b[0;36mmap_exceptions\u001b[1;34m(map)\u001b[0m\n\u001b[0;32m 13\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(exc, from_exc):\n\u001b[1;32m---> 14\u001b[0m \u001b[39mraise\u001b[39;00m to_exc(exc)\n\u001b[0;32m 15\u001b[0m \u001b[39mraise\u001b[39;00m\n", + "\u001b[1;31mReadTimeout\u001b[0m: The read operation timed out", + "\nThe above exception was the direct cause of the following exception:\n", + "\u001b[1;31mReadTimeout\u001b[0m Traceback (most recent call last)", + "\u001b[1;32mf:\\MSIM\\ML_Projects\\ai-ta-backend\\ai_ta_backend\\nomic_map_creation.ipynb Cell 19\u001b[0m line \u001b[0;36m3\n\u001b[0;32m 1\u001b[0m \u001b[39m# cell for all course map creation\u001b[39;00m\n\u001b[1;32m----> 3\u001b[0m response \u001b[39m=\u001b[39m supabase_client\u001b[39m.\u001b[39;49mtable(\u001b[39m\"\u001b[39;49m\u001b[39mllm-convo-monitor\u001b[39;49m\u001b[39m\"\u001b[39;49m)\u001b[39m.\u001b[39;49mselect(\u001b[39m\"\u001b[39;49m\u001b[39m*\u001b[39;49m\u001b[39m\"\u001b[39;49m)\u001b[39m.\u001b[39;49mexecute()\n\u001b[0;32m 4\u001b[0m data \u001b[39m=\u001b[39m response\u001b[39m.\u001b[39mdata\n\u001b[0;32m 5\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39mlen\u001b[39m(data))\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\postgrest\\_sync\\request_builder.py:55\u001b[0m, in \u001b[0;36mSyncQueryRequestBuilder.execute\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 43\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mexecute\u001b[39m(\u001b[39mself\u001b[39m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m APIResponse:\n\u001b[0;32m 44\u001b[0m \u001b[39m \u001b[39m\u001b[39m\"\"\"Execute the query.\u001b[39;00m\n\u001b[0;32m 45\u001b[0m \n\u001b[0;32m 46\u001b[0m \u001b[39m .. tip::\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 53\u001b[0m \u001b[39m :class:`APIError` If the API raised an error.\u001b[39;00m\n\u001b[0;32m 54\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[1;32m---> 55\u001b[0m r \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49msession\u001b[39m.\u001b[39;49mrequest(\n\u001b[0;32m 56\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mhttp_method,\n\u001b[0;32m 57\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mpath,\n\u001b[0;32m 58\u001b[0m json\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mjson,\n\u001b[0;32m 59\u001b[0m params\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mparams,\n\u001b[0;32m 60\u001b[0m headers\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mheaders,\n\u001b[0;32m 61\u001b[0m )\n\u001b[0;32m 62\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 63\u001b[0m \u001b[39mif\u001b[39;00m (\n\u001b[0;32m 64\u001b[0m \u001b[39m200\u001b[39m \u001b[39m<\u001b[39m\u001b[39m=\u001b[39m r\u001b[39m.\u001b[39mstatus_code \u001b[39m<\u001b[39m\u001b[39m=\u001b[39m \u001b[39m299\u001b[39m\n\u001b[0;32m 65\u001b[0m ): \u001b[39m# Response.ok from JS (https://developer.mozilla.org/en-US/docs/Web/API/Response/ok)\u001b[39;00m\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_client.py:821\u001b[0m, in \u001b[0;36mClient.request\u001b[1;34m(self, method, url, content, data, files, json, params, headers, cookies, auth, follow_redirects, timeout, extensions)\u001b[0m\n\u001b[0;32m 806\u001b[0m warnings\u001b[39m.\u001b[39mwarn(message, \u001b[39mDeprecationWarning\u001b[39;00m)\n\u001b[0;32m 808\u001b[0m request \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mbuild_request(\n\u001b[0;32m 809\u001b[0m method\u001b[39m=\u001b[39mmethod,\n\u001b[0;32m 810\u001b[0m url\u001b[39m=\u001b[39murl,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 819\u001b[0m extensions\u001b[39m=\u001b[39mextensions,\n\u001b[0;32m 820\u001b[0m )\n\u001b[1;32m--> 821\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49msend(request, auth\u001b[39m=\u001b[39;49mauth, follow_redirects\u001b[39m=\u001b[39;49mfollow_redirects)\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_client.py:908\u001b[0m, in \u001b[0;36mClient.send\u001b[1;34m(self, request, stream, auth, follow_redirects)\u001b[0m\n\u001b[0;32m 900\u001b[0m follow_redirects \u001b[39m=\u001b[39m (\n\u001b[0;32m 901\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mfollow_redirects\n\u001b[0;32m 902\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(follow_redirects, UseClientDefault)\n\u001b[0;32m 903\u001b[0m \u001b[39melse\u001b[39;00m follow_redirects\n\u001b[0;32m 904\u001b[0m )\n\u001b[0;32m 906\u001b[0m auth \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_build_request_auth(request, auth)\n\u001b[1;32m--> 908\u001b[0m response \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_send_handling_auth(\n\u001b[0;32m 909\u001b[0m request,\n\u001b[0;32m 910\u001b[0m auth\u001b[39m=\u001b[39;49mauth,\n\u001b[0;32m 911\u001b[0m follow_redirects\u001b[39m=\u001b[39;49mfollow_redirects,\n\u001b[0;32m 912\u001b[0m history\u001b[39m=\u001b[39;49m[],\n\u001b[0;32m 913\u001b[0m )\n\u001b[0;32m 914\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 915\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m stream:\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_client.py:936\u001b[0m, in \u001b[0;36mClient._send_handling_auth\u001b[1;34m(self, request, auth, follow_redirects, history)\u001b[0m\n\u001b[0;32m 933\u001b[0m request \u001b[39m=\u001b[39m \u001b[39mnext\u001b[39m(auth_flow)\n\u001b[0;32m 935\u001b[0m \u001b[39mwhile\u001b[39;00m \u001b[39mTrue\u001b[39;00m:\n\u001b[1;32m--> 936\u001b[0m response \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_send_handling_redirects(\n\u001b[0;32m 937\u001b[0m request,\n\u001b[0;32m 938\u001b[0m follow_redirects\u001b[39m=\u001b[39;49mfollow_redirects,\n\u001b[0;32m 939\u001b[0m history\u001b[39m=\u001b[39;49mhistory,\n\u001b[0;32m 940\u001b[0m )\n\u001b[0;32m 941\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 942\u001b[0m \u001b[39mtry\u001b[39;00m:\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_client.py:973\u001b[0m, in \u001b[0;36mClient._send_handling_redirects\u001b[1;34m(self, request, follow_redirects, history)\u001b[0m\n\u001b[0;32m 970\u001b[0m \u001b[39mfor\u001b[39;00m hook \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_event_hooks[\u001b[39m\"\u001b[39m\u001b[39mrequest\u001b[39m\u001b[39m\"\u001b[39m]:\n\u001b[0;32m 971\u001b[0m hook(request)\n\u001b[1;32m--> 973\u001b[0m response \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_send_single_request(request)\n\u001b[0;32m 974\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 975\u001b[0m \u001b[39mfor\u001b[39;00m hook \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_event_hooks[\u001b[39m\"\u001b[39m\u001b[39mresponse\u001b[39m\u001b[39m\"\u001b[39m]:\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_client.py:1009\u001b[0m, in \u001b[0;36mClient._send_single_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 1004\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mRuntimeError\u001b[39;00m(\n\u001b[0;32m 1005\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mAttempted to send an async request with a sync Client instance.\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m 1006\u001b[0m )\n\u001b[0;32m 1008\u001b[0m \u001b[39mwith\u001b[39;00m request_context(request\u001b[39m=\u001b[39mrequest):\n\u001b[1;32m-> 1009\u001b[0m response \u001b[39m=\u001b[39m transport\u001b[39m.\u001b[39;49mhandle_request(request)\n\u001b[0;32m 1011\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39misinstance\u001b[39m(response\u001b[39m.\u001b[39mstream, SyncByteStream)\n\u001b[0;32m 1013\u001b[0m response\u001b[39m.\u001b[39mrequest \u001b[39m=\u001b[39m request\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_transports\\default.py:217\u001b[0m, in \u001b[0;36mHTTPTransport.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 203\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39misinstance\u001b[39m(request\u001b[39m.\u001b[39mstream, SyncByteStream)\n\u001b[0;32m 205\u001b[0m req \u001b[39m=\u001b[39m httpcore\u001b[39m.\u001b[39mRequest(\n\u001b[0;32m 206\u001b[0m method\u001b[39m=\u001b[39mrequest\u001b[39m.\u001b[39mmethod,\n\u001b[0;32m 207\u001b[0m url\u001b[39m=\u001b[39mhttpcore\u001b[39m.\u001b[39mURL(\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 215\u001b[0m extensions\u001b[39m=\u001b[39mrequest\u001b[39m.\u001b[39mextensions,\n\u001b[0;32m 216\u001b[0m )\n\u001b[1;32m--> 217\u001b[0m \u001b[39mwith\u001b[39;00m map_httpcore_exceptions():\n\u001b[0;32m 218\u001b[0m resp \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_pool\u001b[39m.\u001b[39mhandle_request(req)\n\u001b[0;32m 220\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39misinstance\u001b[39m(resp\u001b[39m.\u001b[39mstream, typing\u001b[39m.\u001b[39mIterable)\n", + "File \u001b[1;32mC:\\Program Files\\WindowsApps\\PythonSoftwareFoundation.Python.3.10_3.10.3056.0_x64__qbz5n2kfra8p0\\lib\\contextlib.py:153\u001b[0m, in \u001b[0;36m_GeneratorContextManager.__exit__\u001b[1;34m(self, typ, value, traceback)\u001b[0m\n\u001b[0;32m 151\u001b[0m value \u001b[39m=\u001b[39m typ()\n\u001b[0;32m 152\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m--> 153\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mgen\u001b[39m.\u001b[39;49mthrow(typ, value, traceback)\n\u001b[0;32m 154\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mStopIteration\u001b[39;00m \u001b[39mas\u001b[39;00m exc:\n\u001b[0;32m 155\u001b[0m \u001b[39m# Suppress StopIteration *unless* it's the same exception that\u001b[39;00m\n\u001b[0;32m 156\u001b[0m \u001b[39m# was passed to throw(). This prevents a StopIteration\u001b[39;00m\n\u001b[0;32m 157\u001b[0m \u001b[39m# raised inside the \"with\" statement from being suppressed.\u001b[39;00m\n\u001b[0;32m 158\u001b[0m \u001b[39mreturn\u001b[39;00m exc \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m value\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_transports\\default.py:77\u001b[0m, in \u001b[0;36mmap_httpcore_exceptions\u001b[1;34m()\u001b[0m\n\u001b[0;32m 74\u001b[0m \u001b[39mraise\u001b[39;00m\n\u001b[0;32m 76\u001b[0m message \u001b[39m=\u001b[39m \u001b[39mstr\u001b[39m(exc)\n\u001b[1;32m---> 77\u001b[0m \u001b[39mraise\u001b[39;00m mapped_exc(message) \u001b[39mfrom\u001b[39;00m \u001b[39mexc\u001b[39;00m\n", + "\u001b[1;31mReadTimeout\u001b[0m: The read operation timed out" ] } ], diff --git a/ai_ta_backend/utils_tokenization.py b/ai_ta_backend/utils_tokenization.py index 096e2bb6..5b000e5f 100644 --- a/ai_ta_backend/utils_tokenization.py +++ b/ai_ta_backend/utils_tokenization.py @@ -1,136 +1,136 @@ -import json -import os -from typing import Any, List - -import supabase -import tiktoken - - -def count_tokens_and_cost(prompt: str, completion: str = '', openai_model_name: str = "gpt-3.5-turbo"): # -> tuple[int, float] | tuple[int, float, int, float]: - """ - Returns the number of tokens in a text string. - - Only the first parameter is required, a string of text to measure. The completion and model name are optional. - - num_tokens, prompt_cost = count_tokens_and_cost(prompt="hello there") - num_tokens_prompt, prompt_cost, num_tokens_completion, completion_cost = count_tokens_and_cost(prompt="hello there", completion="how are you?") - - Args: - prompt (str): _description_ - completion (str, optional): _description_. Defaults to ''. - openai_model_name (str, optional): _description_. Defaults to "gpt-3.5-turbo". - - Returns: - tuple[int, float] | tuple[int, float, int, float]: Returns the number of tokens consumed and the cost. The total cost you'll be billed is the sum of each individual cost (prompt_cost + completion_cost) - """ - # encoding = tiktoken.encoding_for_model(openai_model_name) - openai_model_name = openai_model_name.lower() - encoding = tiktoken.encoding_for_model("gpt-3.5-turbo") # I think they all use the same encoding - prompt_cost = 0 - completion_cost = 0 - - prompt_token_cost = 0 - completion_token_cost = 0 - - if openai_model_name.startswith("gpt-3.5-turbo"): - if "16k" in openai_model_name: - prompt_token_cost: float = 0.003 / 1_000 - completion_token_cost: float = 0.004 / 1_000 - else: - # 3.5-turbo regular (4k context) - prompt_token_cost: float = 0.0015 / 1_000 - completion_token_cost: float = 0.002 / 1_000 - - elif openai_model_name.startswith("gpt-4"): - if "32k" in openai_model_name: - prompt_token_cost = 0.06 / 1_000 - completion_token_cost = 0.12 / 1_000 - else: - # gpt-4 regular (8k context) - prompt_token_cost = 0.03 / 1_000 - completion_token_cost = 0.06 / 1_000 - elif openai_model_name.startswith("text-embedding-ada-002"): - prompt_token_cost = 0.0001 / 1_000 - completion_token_cost = 0.0001 / 1_000 - else: - # no idea of cost - print(f"NO IDEA OF COST, pricing not supported for model model: `{openai_model_name}`") - prompt_token_cost = 0 - completion_token_cost = 0 - - if completion == '': - num_tokens_prompt: int = len(encoding.encode(prompt)) - prompt_cost = float(prompt_token_cost * num_tokens_prompt) - return num_tokens_prompt, prompt_cost - elif prompt == '': - num_tokens_completion: int = len(encoding.encode(completion)) - completion_cost = float(completion_token_cost * num_tokens_completion) - return num_tokens_completion, completion_cost - else: - num_tokens_prompt: int = len(encoding.encode(prompt)) - num_tokens_completion: int = len(encoding.encode(completion)) - prompt_cost = float(prompt_token_cost * num_tokens_prompt) - completion_cost = float(completion_token_cost * num_tokens_completion) - return num_tokens_prompt, prompt_cost, num_tokens_completion, completion_cost - -# from dotenv import load_dotenv - -# load_dotenv() - -def analyze_conversations(supabase_client: Any = None): - - if supabase_client is None: - supabase_client = supabase.create_client( # type: ignore - supabase_url=os.getenv('SUPABASE_URL'), # type: ignore - supabase_key=os.getenv('SUPABASE_API_KEY')) # type: ignore - # Get all conversations - response = supabase_client.table('llm-convo-monitor').select('convo').execute() - # print("total entries", response.data.count) - - total_convos = 0 - total_messages = 0 - total_prompt_cost = 0 - total_completion_cost = 0 - - # Iterate through all conversations - # for convo in response['data']: - for convo in response.data: - total_convos += 1 - # print(convo) - # prase json from convo - # parse json into dict - # print(type(convo)) - # convo = json.loads(convo) - convo = convo['convo'] - messages = convo['messages'] - model_name = convo['model']['name'] - - # Iterate through all messages in each conversation - for message in messages: - total_messages += 1 - role = message['role'] - content = message['content'] - - # If the message is from the user, it's a prompt - # TODO: Fix these - # WARNING: Fix these error messages... they are the sign of a logic bug. - if role == 'user': - num_tokens, cost = count_tokens_and_cost(prompt=content, openai_model_name=model_name) - total_prompt_cost += cost - print(f'User Prompt: {content}, Tokens: {num_tokens}, cost: {cost}') - - # If the message is from the assistant, it's a completion - elif role == 'assistant': - num_tokens_completion, cost_completion = count_tokens_and_cost(prompt='', completion=content, openai_model_name=model_name) - total_completion_cost += cost_completion - print(f'Assistant Completion: {content}\nTokens: {num_tokens_completion}, cost: {cost_completion}') - return total_convos, total_messages, total_prompt_cost, total_completion_cost - -if __name__ == '__main__': - pass - -# if __name__ == '__main__': -# print('starting main') -# total_convos, total_messages, total_prompt_cost, total_completion_cost = analyze_conversations() -# print(f'total_convos: {total_convos}, total_messages: {total_messages}') +import json +import os +from typing import Any, List + +import supabase +import tiktoken + + +def count_tokens_and_cost(prompt: str, completion: str = '', openai_model_name: str = "gpt-3.5-turbo"): # -> tuple[int, float] | tuple[int, float, int, float]: + """ + Returns the number of tokens in a text string. + + Only the first parameter is required, a string of text to measure. The completion and model name are optional. + + num_tokens, prompt_cost = count_tokens_and_cost(prompt="hello there") + num_tokens_prompt, prompt_cost, num_tokens_completion, completion_cost = count_tokens_and_cost(prompt="hello there", completion="how are you?") + + Args: + prompt (str): _description_ + completion (str, optional): _description_. Defaults to ''. + openai_model_name (str, optional): _description_. Defaults to "gpt-3.5-turbo". + + Returns: + tuple[int, float] | tuple[int, float, int, float]: Returns the number of tokens consumed and the cost. The total cost you'll be billed is the sum of each individual cost (prompt_cost + completion_cost) + """ + # encoding = tiktoken.encoding_for_model(openai_model_name) + openai_model_name = openai_model_name.lower() + encoding = tiktoken.encoding_for_model("gpt-3.5-turbo") # I think they all use the same encoding + prompt_cost = 0 + completion_cost = 0 + + prompt_token_cost = 0 + completion_token_cost = 0 + + if openai_model_name.startswith("gpt-3.5-turbo"): + if "16k" in openai_model_name: + prompt_token_cost: float = 0.003 / 1_000 + completion_token_cost: float = 0.004 / 1_000 + else: + # 3.5-turbo regular (4k context) + prompt_token_cost: float = 0.0015 / 1_000 + completion_token_cost: float = 0.002 / 1_000 + + elif openai_model_name.startswith("gpt-4"): + if "32k" in openai_model_name: + prompt_token_cost = 0.06 / 1_000 + completion_token_cost = 0.12 / 1_000 + else: + # gpt-4 regular (8k context) + prompt_token_cost = 0.03 / 1_000 + completion_token_cost = 0.06 / 1_000 + elif openai_model_name.startswith("text-embedding-ada-002"): + prompt_token_cost = 0.0001 / 1_000 + completion_token_cost = 0.0001 / 1_000 + else: + # no idea of cost + print(f"NO IDEA OF COST, pricing not supported for model model: `{openai_model_name}`") + prompt_token_cost = 0 + completion_token_cost = 0 + + if completion == '': + num_tokens_prompt: int = len(encoding.encode(prompt)) + prompt_cost = float(prompt_token_cost * num_tokens_prompt) + return num_tokens_prompt, prompt_cost + elif prompt == '': + num_tokens_completion: int = len(encoding.encode(completion)) + completion_cost = float(completion_token_cost * num_tokens_completion) + return num_tokens_completion, completion_cost + else: + num_tokens_prompt: int = len(encoding.encode(prompt)) + num_tokens_completion: int = len(encoding.encode(completion)) + prompt_cost = float(prompt_token_cost * num_tokens_prompt) + completion_cost = float(completion_token_cost * num_tokens_completion) + return num_tokens_prompt, prompt_cost, num_tokens_completion, completion_cost + +# from dotenv import load_dotenv + +# load_dotenv() + +def analyze_conversations(supabase_client: Any = None): + + if supabase_client is None: + supabase_client = supabase.create_client( # type: ignore + supabase_url=os.getenv('SUPABASE_URL'), # type: ignore + supabase_key=os.getenv('SUPABASE_API_KEY')) # type: ignore + # Get all conversations + response = supabase_client.table('llm-convo-monitor').select('convo').execute() + # print("total entries", response.data.count) + + total_convos = 0 + total_messages = 0 + total_prompt_cost = 0 + total_completion_cost = 0 + + # Iterate through all conversations + # for convo in response['data']: + for convo in response.data: + total_convos += 1 + # print(convo) + # prase json from convo + # parse json into dict + # print(type(convo)) + # convo = json.loads(convo) + convo = convo['convo'] + messages = convo['messages'] + model_name = convo['model']['name'] + + # Iterate through all messages in each conversation + for message in messages: + total_messages += 1 + role = message['role'] + content = message['content'] + + # If the message is from the user, it's a prompt + # TODO: Fix these + # WARNING: Fix these error messages... they are the sign of a logic bug. + if role == 'user': + num_tokens, cost = count_tokens_and_cost(prompt=content, openai_model_name=model_name) + total_prompt_cost += cost + print(f'User Prompt: {content}, Tokens: {num_tokens}, cost: {cost}') + + # If the message is from the assistant, it's a completion + elif role == 'assistant': + num_tokens_completion, cost_completion = count_tokens_and_cost(prompt='', completion=content, openai_model_name=model_name) + total_completion_cost += cost_completion + print(f'Assistant Completion: {content}\nTokens: {num_tokens_completion}, cost: {cost_completion}') + return total_convos, total_messages, total_prompt_cost, total_completion_cost + +if __name__ == '__main__': + pass + +# if __name__ == '__main__': +# print('starting main') +# total_convos, total_messages, total_prompt_cost, total_completion_cost = analyze_conversations() +# print(f'total_convos: {total_convos}, total_messages: {total_messages}') # print(f'total_prompt_cost: {total_prompt_cost}, total_completion_cost: {total_completion_cost}') \ No newline at end of file diff --git a/ai_ta_backend/web_scrape.py b/ai_ta_backend/web_scrape.py index 36158db9..f77d695a 100644 --- a/ai_ta_backend/web_scrape.py +++ b/ai_ta_backend/web_scrape.py @@ -1,467 +1,467 @@ -import os -import re -import shutil -import time -from tempfile import NamedTemporaryFile -from zipfile import ZipFile - -import boto3 # type: ignore -import requests -from bs4 import BeautifulSoup - -import supabase - -from ai_ta_backend.aws import upload_data_files_to_s3 -from ai_ta_backend.vector_database import Ingest -import mimetypes - -def get_file_extension(filename): - match = re.search(r'\.([a-zA-Z0-9]+)$', filename) - valid_filetypes = list(mimetypes.types_map.keys()) - valid_filetypes = valid_filetypes + ['.html', '.py', '.vtt', '.pdf', '.txt', '.srt', '.docx', '.ppt', '.pptx'] - if match: - filetype = "." + match.group(1) - if filetype in valid_filetypes: - return filetype - else: - return '.html' - else: - return '.html' - -def valid_url(url): - '''Returns the URL and it's content if it's good, otherwise returns false. Prints the status code.''' - try: - response = requests.get(url, allow_redirects=True, timeout=20) - - redirect_loop_counter = 0 - while response.status_code == 301: - # Check for permanent redirect - if redirect_loop_counter > 3: - print("❌ Redirect loop (on 301 error) exceeded redirect limit of:", redirect_loop_counter, "❌") - return False - redirect_url = response.headers['Location'] - response = requests.head(redirect_url) - redirect_loop_counter += 1 - if response.status_code == 200: - filetype = get_file_extension(response.url) - print("file extension:", filetype) - if filetype == '.html': - content = BeautifulSoup(response.content, "html.parser") - if " len(urls): - max_urls = max_urls - len(urls) - elif max_urls < len(urls): - urls = urls[:max_urls] - max_urls = 0 - else: - max_urls = 0 - # We grab content out of these urls - - for url in urls: - if base_url_on: - if url.startswith(site): - url, s, filetype = valid_url(url) - if url: - print("Scraped:", url) - url_contents.append((url, s, filetype)) - else: - _invalid_urls.append(url) - else: - pass - else: - url, s, filetype = valid_url(url) - if url: - print("Scraped:", url) - url_contents.append((url, s, filetype)) - else: - _invalid_urls.append(url) - print("existing urls", _existing_urls) - url_contents = remove_duplicates(url_contents, _existing_urls) - max_urls = max_urls - len(url_contents) - print(max_urls, "urls left") - - # recursively go through crawler until we reach the max amount of urls. - for url in url_contents: - if url[0] not in _invalid_urls: - if max_urls > 0: - if _depth < max_depth: - temp_data = crawler(url[0], max_urls, max_depth, timeout, _invalid_urls, _depth, url[1], url[2]) - print("existing urls", _existing_urls) - temp_data = remove_duplicates(temp_data, _existing_urls) - max_urls = max_urls - len(temp_data) - print(max_urls, "urls left") - url_contents.extend(temp_data) - url_contents = remove_duplicates(url_contents, _existing_urls) - else: - print("Depth exceeded:", _depth+1, "out of", max_depth) - break - else: - break - else: - pass - - if _depth == 0: - if len(url_contents) < amount: - print("Max URLS not reached, returning all urls found:", len(url_contents), "out of", amount) - elif len(url_contents) == amount: - print("Max URLS reached:", len(url_contents), "out of", amount) - else: - print("Exceeded Max URLS, found:", len(url_contents), "out of", amount) - print(len(url_contents), "urls found") - - # Free up memory - # del url_contents[:] - # del urls[:] - # if _invalid_urls is not None: - # del _invalid_urls[:] - # if _existing_urls is not None: - # del _existing_urls[:] - # gc.collect() - - return url_contents - -def main_crawler(url:str, course_name:str, max_urls:int=100, max_depth:int=3, timeout:int=1, stay_on_baseurl:bool=False): - """ - Crawl a site and scrape its content and PDFs, then upload the data to S3 and ingest it. - - Args: - url (str): The URL of the site to crawl. - course_name (str): The name of the course to associate with the crawled data. - max_urls (int, optional): The maximum number of URLs to crawl. Defaults to 100. - max_depth (int, optional): The maximum depth of URLs to crawl. Defaults to 3. - timeout (int, optional): The number of seconds to wait between requests. Defaults to 1. - - Returns: - None - """ - print("\n") - max_urls = int(max_urls) - max_depth = int(max_depth) - timeout = int(timeout) - stay_on_baseurl = bool(stay_on_baseurl) - if stay_on_baseurl: - stay_on_baseurl = base_url(url) - print(stay_on_baseurl) - - ingester = Ingest() - s3_client = boto3.client( - 's3', - aws_access_key_id=os.getenv('AWS_ACCESS_KEY_ID'), - aws_secret_access_key=os.getenv('AWS_SECRET_ACCESS_KEY'), - ) - - # Check for GitHub repository coming soon - if url.startswith("https://github.com/"): - print("Begin Ingesting GitHub page") - results = ingester.ingest_github(url, course_name) - print("Finished ingesting GitHub page") - del ingester - return results - else: - try: - print("Gathering existing urls from Supabase") - supabase_client = supabase.create_client( # type: ignore - supabase_url=os.getenv('SUPABASE_URL'), # type: ignore - supabase_key=os.getenv('SUPABASE_API_KEY')) # type: ignore - urls = supabase_client.table(os.getenv('NEW_NEW_NEWNEW_MATERIALS_SUPABASE_TABLE')).select('course_name, url, contexts').eq('course_name', course_name).execute() - del supabase_client - if urls.data == []: - existing_urls = None - else: - existing_urls = [] - for thing in urls.data: - whole = '' - for t in thing['contexts']: - whole += t['text'] - existing_urls.append((thing['url'], whole)) - print("Finished gathering existing urls from Supabase") - except Exception as e: - print("Error:", e) - print("Could not gather existing urls from Supabase") - existing_urls = None - - print("Begin Ingesting Web page") - data = crawler(url=url, max_urls=max_urls, max_depth=max_depth, timeout=timeout, base_url_on=stay_on_baseurl, _existing_urls=existing_urls) - - # Clean some keys for a proper file name - # todo: have a default title - # titles = [value[1][1].title.string for value in data] - - titles = [] - for value in data: - try: - titles.append(value[1].title.string) - except AttributeError as e: - # if no title - try: - placeholder_title = re.findall(pattern=r'[a-zA-Z0-9.]*[a-z]', string=value[0])[1] - except Exception as e: - placeholder_title = "Title Not Found" - titles.append(placeholder_title) - print(f"URL is missing a title, using this title instead: {placeholder_title}") - - try: - clean = [re.match(r"[a-zA-Z0-9\s]*", title).group(0) for title in titles] # type: ignore - except Exception as e: - print("Error:", e) - clean = titles - print("title names after regex before cleaning", clean) - path_name = [] - counter = 0 - for value in clean: - value = value.strip() if value else "" - # value = value.strip() - value = value.replace(" ", "_") - if value == "403_Forbidden": - print("Found Forbidden Key, deleting data") - del data[counter] - counter -= 1 - else: - path_name.append(value) - counter += 1 - print("Cleaned title names", path_name) - - # Upload each html to S3 - print("Uploading files to S3") - paths = [] - counter = 0 - try: - for i, key in enumerate(data): - with NamedTemporaryFile(suffix=key[2]) as temp_file: - if key[1] != "" or key[1] != None: - if key[2] == ".html": - print("Writing", key[2] ,"to temp file") - temp_file.write(key[1].encode('utf-8')) - else: - print("Writing", key[2] ,"to temp file") - temp_file.write(key[1]) - temp_file.seek(0) - s3_upload_path = "courses/"+ course_name + "/" + path_name[i] + key[2] - paths.append(s3_upload_path) - with open(temp_file.name, 'rb') as f: - print("Uploading", key[2] ,"to S3") - s3_client.upload_fileobj(f, os.getenv('S3_BUCKET_NAME'), s3_upload_path) - ingester.bulk_ingest(s3_upload_path, course_name=course_name, url=key[0], base_url=url) - counter += 1 - else: - print("No", key[2] ,"to upload", key[1]) - except Exception as e: - print("Error in upload:", e) - finally: - del ingester - - print(f"Successfully uploaded files to s3: {counter}") - print("Finished /web-scrape") - -# Download an MIT course using its url -def mit_course_download(url:str, course_name:str, local_dir:str): - ingester = Ingest() - base = "https://ocw.mit.edu" - if url.endswith("download"): - pass - else: - url = url + "download" - - r = requests.get(url) - soup = BeautifulSoup(r.text,"html.parser") - - zip = '' - for ref in soup.find_all("a"): - if ref.attrs['href'].endswith("zip"): - zip = ref.attrs['href'] - - site = zip - print('site', site) - r = requests.get(url=site, stream=True) - - zip_file = local_dir + ".zip" - - try: - with open(zip_file, 'wb') as fd: - for chunk in r.iter_content(chunk_size=128): - fd.write(chunk) - print("course downloaded!") - except Exception as e: - print("Error:", e, site) - - with ZipFile(zip_file, 'r') as zObject: - zObject.extractall( - path=local_dir) - - shutil.move(local_dir+"/"+"robots.txt", local_dir+"/static_resources") - s3_paths = upload_data_files_to_s3(course_name, local_dir+"/static_resources") - success_fail = ingester.bulk_ingest(s3_paths, course_name) # type: ignore - - shutil.move(zip_file, local_dir) - shutil.rmtree(local_dir) - del ingester - print("Finished Ingest") - return success_fail - -if __name__ == '__main__': - pass +import os +import re +import shutil +import time +from tempfile import NamedTemporaryFile +from zipfile import ZipFile + +import boto3 # type: ignore +import requests +from bs4 import BeautifulSoup + +import supabase + +from ai_ta_backend.aws import upload_data_files_to_s3 +from ai_ta_backend.vector_database import Ingest +import mimetypes + +def get_file_extension(filename): + match = re.search(r'\.([a-zA-Z0-9]+)$', filename) + valid_filetypes = list(mimetypes.types_map.keys()) + valid_filetypes = valid_filetypes + ['.html', '.py', '.vtt', '.pdf', '.txt', '.srt', '.docx', '.ppt', '.pptx'] + if match: + filetype = "." + match.group(1) + if filetype in valid_filetypes: + return filetype + else: + return '.html' + else: + return '.html' + +def valid_url(url): + '''Returns the URL and it's content if it's good, otherwise returns false. Prints the status code.''' + try: + response = requests.get(url, allow_redirects=True, timeout=20) + + redirect_loop_counter = 0 + while response.status_code == 301: + # Check for permanent redirect + if redirect_loop_counter > 3: + print("❌ Redirect loop (on 301 error) exceeded redirect limit of:", redirect_loop_counter, "❌") + return False + redirect_url = response.headers['Location'] + response = requests.head(redirect_url) + redirect_loop_counter += 1 + if response.status_code == 200: + filetype = get_file_extension(response.url) + print("file extension:", filetype) + if filetype == '.html': + content = BeautifulSoup(response.content, "html.parser") + if " len(urls): + max_urls = max_urls - len(urls) + elif max_urls < len(urls): + urls = urls[:max_urls] + max_urls = 0 + else: + max_urls = 0 + # We grab content out of these urls + + for url in urls: + if base_url_on: + if url.startswith(site): + url, s, filetype = valid_url(url) + if url: + print("Scraped:", url) + url_contents.append((url, s, filetype)) + else: + _invalid_urls.append(url) + else: + pass + else: + url, s, filetype = valid_url(url) + if url: + print("Scraped:", url) + url_contents.append((url, s, filetype)) + else: + _invalid_urls.append(url) + print("existing urls", _existing_urls) + url_contents = remove_duplicates(url_contents, _existing_urls) + max_urls = max_urls - len(url_contents) + print(max_urls, "urls left") + + # recursively go through crawler until we reach the max amount of urls. + for url in url_contents: + if url[0] not in _invalid_urls: + if max_urls > 0: + if _depth < max_depth: + temp_data = crawler(url[0], max_urls, max_depth, timeout, _invalid_urls, _depth, url[1], url[2]) + print("existing urls", _existing_urls) + temp_data = remove_duplicates(temp_data, _existing_urls) + max_urls = max_urls - len(temp_data) + print(max_urls, "urls left") + url_contents.extend(temp_data) + url_contents = remove_duplicates(url_contents, _existing_urls) + else: + print("Depth exceeded:", _depth+1, "out of", max_depth) + break + else: + break + else: + pass + + if _depth == 0: + if len(url_contents) < amount: + print("Max URLS not reached, returning all urls found:", len(url_contents), "out of", amount) + elif len(url_contents) == amount: + print("Max URLS reached:", len(url_contents), "out of", amount) + else: + print("Exceeded Max URLS, found:", len(url_contents), "out of", amount) + print(len(url_contents), "urls found") + + # Free up memory + # del url_contents[:] + # del urls[:] + # if _invalid_urls is not None: + # del _invalid_urls[:] + # if _existing_urls is not None: + # del _existing_urls[:] + # gc.collect() + + return url_contents + +def main_crawler(url:str, course_name:str, max_urls:int=100, max_depth:int=3, timeout:int=1, stay_on_baseurl:bool=False): + """ + Crawl a site and scrape its content and PDFs, then upload the data to S3 and ingest it. + + Args: + url (str): The URL of the site to crawl. + course_name (str): The name of the course to associate with the crawled data. + max_urls (int, optional): The maximum number of URLs to crawl. Defaults to 100. + max_depth (int, optional): The maximum depth of URLs to crawl. Defaults to 3. + timeout (int, optional): The number of seconds to wait between requests. Defaults to 1. + + Returns: + None + """ + print("\n") + max_urls = int(max_urls) + max_depth = int(max_depth) + timeout = int(timeout) + stay_on_baseurl = bool(stay_on_baseurl) + if stay_on_baseurl: + stay_on_baseurl = base_url(url) + print(stay_on_baseurl) + + ingester = Ingest() + s3_client = boto3.client( + 's3', + aws_access_key_id=os.getenv('AWS_ACCESS_KEY_ID'), + aws_secret_access_key=os.getenv('AWS_SECRET_ACCESS_KEY'), + ) + + # Check for GitHub repository coming soon + if url.startswith("https://github.com/"): + print("Begin Ingesting GitHub page") + results = ingester.ingest_github(url, course_name) + print("Finished ingesting GitHub page") + del ingester + return results + else: + try: + print("Gathering existing urls from Supabase") + supabase_client = supabase.create_client( # type: ignore + supabase_url=os.getenv('SUPABASE_URL'), # type: ignore + supabase_key=os.getenv('SUPABASE_API_KEY')) # type: ignore + urls = supabase_client.table(os.getenv('NEW_NEW_NEWNEW_MATERIALS_SUPABASE_TABLE')).select('course_name, url, contexts').eq('course_name', course_name).execute() + del supabase_client + if urls.data == []: + existing_urls = None + else: + existing_urls = [] + for thing in urls.data: + whole = '' + for t in thing['contexts']: + whole += t['text'] + existing_urls.append((thing['url'], whole)) + print("Finished gathering existing urls from Supabase") + except Exception as e: + print("Error:", e) + print("Could not gather existing urls from Supabase") + existing_urls = None + + print("Begin Ingesting Web page") + data = crawler(url=url, max_urls=max_urls, max_depth=max_depth, timeout=timeout, base_url_on=stay_on_baseurl, _existing_urls=existing_urls) + + # Clean some keys for a proper file name + # todo: have a default title + # titles = [value[1][1].title.string for value in data] + + titles = [] + for value in data: + try: + titles.append(value[1].title.string) + except AttributeError as e: + # if no title + try: + placeholder_title = re.findall(pattern=r'[a-zA-Z0-9.]*[a-z]', string=value[0])[1] + except Exception as e: + placeholder_title = "Title Not Found" + titles.append(placeholder_title) + print(f"URL is missing a title, using this title instead: {placeholder_title}") + + try: + clean = [re.match(r"[a-zA-Z0-9\s]*", title).group(0) for title in titles] # type: ignore + except Exception as e: + print("Error:", e) + clean = titles + print("title names after regex before cleaning", clean) + path_name = [] + counter = 0 + for value in clean: + value = value.strip() if value else "" + # value = value.strip() + value = value.replace(" ", "_") + if value == "403_Forbidden": + print("Found Forbidden Key, deleting data") + del data[counter] + counter -= 1 + else: + path_name.append(value) + counter += 1 + print("Cleaned title names", path_name) + + # Upload each html to S3 + print("Uploading files to S3") + paths = [] + counter = 0 + try: + for i, key in enumerate(data): + with NamedTemporaryFile(suffix=key[2]) as temp_file: + if key[1] != "" or key[1] != None: + if key[2] == ".html": + print("Writing", key[2] ,"to temp file") + temp_file.write(key[1].encode('utf-8')) + else: + print("Writing", key[2] ,"to temp file") + temp_file.write(key[1]) + temp_file.seek(0) + s3_upload_path = "courses/"+ course_name + "/" + path_name[i] + key[2] + paths.append(s3_upload_path) + with open(temp_file.name, 'rb') as f: + print("Uploading", key[2] ,"to S3") + s3_client.upload_fileobj(f, os.getenv('S3_BUCKET_NAME'), s3_upload_path) + ingester.bulk_ingest(s3_upload_path, course_name=course_name, url=key[0], base_url=url) + counter += 1 + else: + print("No", key[2] ,"to upload", key[1]) + except Exception as e: + print("Error in upload:", e) + finally: + del ingester + + print(f"Successfully uploaded files to s3: {counter}") + print("Finished /web-scrape") + +# Download an MIT course using its url +def mit_course_download(url:str, course_name:str, local_dir:str): + ingester = Ingest() + base = "https://ocw.mit.edu" + if url.endswith("download"): + pass + else: + url = url + "download" + + r = requests.get(url) + soup = BeautifulSoup(r.text,"html.parser") + + zip = '' + for ref in soup.find_all("a"): + if ref.attrs['href'].endswith("zip"): + zip = ref.attrs['href'] + + site = zip + print('site', site) + r = requests.get(url=site, stream=True) + + zip_file = local_dir + ".zip" + + try: + with open(zip_file, 'wb') as fd: + for chunk in r.iter_content(chunk_size=128): + fd.write(chunk) + print("course downloaded!") + except Exception as e: + print("Error:", e, site) + + with ZipFile(zip_file, 'r') as zObject: + zObject.extractall( + path=local_dir) + + shutil.move(local_dir+"/"+"robots.txt", local_dir+"/static_resources") + s3_paths = upload_data_files_to_s3(course_name, local_dir+"/static_resources") + success_fail = ingester.bulk_ingest(s3_paths, course_name) # type: ignore + + shutil.move(zip_file, local_dir) + shutil.rmtree(local_dir) + del ingester + print("Finished Ingest") + return success_fail + +if __name__ == '__main__': + pass From 0e1f6af9fbae32a219e94bd89e0846ea5c21a52d Mon Sep 17 00:00:00 2001 From: star-nox Date: Tue, 12 Sep 2023 16:09:14 -0500 Subject: [PATCH 47/61] replace int id with time.time() --- ai_ta_backend/nomic_logging.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ai_ta_backend/nomic_logging.py b/ai_ta_backend/nomic_logging.py index 8e5d179f..6f74a455 100644 --- a/ai_ta_backend/nomic_logging.py +++ b/ai_ta_backend/nomic_logging.py @@ -76,7 +76,7 @@ def log_convo_to_nomic(course_name: str, conversation) -> str: # update metadata metadata = [{"course": course_name, "conversation": prev_convo, "conversation_id": conversation_id, - "id": last_id+1, "user_email": user_email, "first_query": first_message}] + "id": time.time(), "user_email": user_email, "first_query": first_message}] else: print("conversation_id does not exist") @@ -94,7 +94,7 @@ def log_convo_to_nomic(course_name: str, conversation) -> str: conversation_string += "\n>>> " + emoji + message['role'] + ": " + message['content'] + "\n" metadata = [{"course": course_name, "conversation": conversation_string, "conversation_id": conversation_id, - "id": last_id+1, "user_email": user_email, "first_query": first_message}] + "id": time.time(), "user_email": user_email, "first_query": first_message}] # create embeddings embeddings_model = OpenAIEmbeddings() From ad581c8ac8d3f70d0b7bff41b341f4659c987b0f Mon Sep 17 00:00:00 2001 From: star-nox Date: Tue, 12 Sep 2023 16:23:28 -0500 Subject: [PATCH 48/61] handled ID type errors --- ai_ta_backend/nomic_logging.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/ai_ta_backend/nomic_logging.py b/ai_ta_backend/nomic_logging.py index 6f74a455..f643f5b7 100644 --- a/ai_ta_backend/nomic_logging.py +++ b/ai_ta_backend/nomic_logging.py @@ -44,7 +44,8 @@ def log_convo_to_nomic(course_name: str, conversation) -> str: project = AtlasProject(name=project_name, add_datums_if_exists=True) map_metadata_df = project.maps[1].data.df map_embeddings_df = project.maps[1].embeddings.latent - last_id = int(map_metadata_df['id'].values[-1]) + map_metadata_df['id'] = map_metadata_df['id'].astype(int) + last_id = map_metadata_df['id'].max() print("last_id: ", last_id) if conversation_id in map_metadata_df.values: @@ -76,7 +77,7 @@ def log_convo_to_nomic(course_name: str, conversation) -> str: # update metadata metadata = [{"course": course_name, "conversation": prev_convo, "conversation_id": conversation_id, - "id": time.time(), "user_email": user_email, "first_query": first_message}] + "id": last_id+1, "user_email": user_email, "first_query": first_message}] else: print("conversation_id does not exist") @@ -94,7 +95,7 @@ def log_convo_to_nomic(course_name: str, conversation) -> str: conversation_string += "\n>>> " + emoji + message['role'] + ": " + message['content'] + "\n" metadata = [{"course": course_name, "conversation": conversation_string, "conversation_id": conversation_id, - "id": time.time(), "user_email": user_email, "first_query": first_message}] + "id": last_id+1, "user_email": user_email, "first_query": first_message}] # create embeddings embeddings_model = OpenAIEmbeddings() From 34ba4ddf5ca7572d2f29c36ed7b0adcb7841f520 Mon Sep 17 00:00:00 2001 From: star-nox Date: Tue, 12 Sep 2023 22:46:27 -0500 Subject: [PATCH 49/61] added timestamps to metadata --- ai_ta_backend/nomic_logging.py | 56 +- ai_ta_backend/nomic_map_creation.ipynb | 1006 +++++++++++++----------- 2 files changed, 567 insertions(+), 495 deletions(-) diff --git a/ai_ta_backend/nomic_logging.py b/ai_ta_backend/nomic_logging.py index f643f5b7..9e816302 100644 --- a/ai_ta_backend/nomic_logging.py +++ b/ai_ta_backend/nomic_logging.py @@ -5,6 +5,7 @@ from langchain.embeddings import OpenAIEmbeddings import numpy as np import time +import datetime import pandas as pd import supabase @@ -57,6 +58,7 @@ def log_convo_to_nomic(course_name: str, conversation) -> str: embeddings = map_embeddings_df[prev_index-1].reshape(1, 1536) prev_convo = prev_data['conversation'].values[0] prev_id = prev_data['id'].values[0] + prev_created_at = prev_data['created_at'].values[0] # delete that convo data point from Nomic project.delete_data([prev_id]) @@ -69,15 +71,21 @@ def log_convo_to_nomic(course_name: str, conversation) -> str: messages_to_be_logged = messages[-2:] for message in messages_to_be_logged: if message['role'] == 'user': - emoji = "🙋" + emoji = "🙋 " else: - emoji = "🤖" + emoji = "🤖 " prev_convo += "\n>>> " + emoji + message['role'] + ": " + message['content'] + "\n" + # modified timestamp + current_time = time.time() + dt_object = datetime.datetime.fromtimestamp(current_time) + current_timestamp = dt_object.strftime("%Y-%m-%d %H:%M:%S.%f+00:00") + # update metadata metadata = [{"course": course_name, "conversation": prev_convo, "conversation_id": conversation_id, - "id": last_id+1, "user_email": user_email, "first_query": first_message}] + "id": last_id+1, "user_email": user_email, "first_query": first_message, "created_at": prev_created_at, + "modified_at": current_timestamp}] else: print("conversation_id does not exist") @@ -89,13 +97,19 @@ def log_convo_to_nomic(course_name: str, conversation) -> str: for message in messages: if message['role'] == 'user': - emoji = "🙋" + emoji = "🙋 " else: - emoji = "🤖" + emoji = "🤖 " conversation_string += "\n>>> " + emoji + message['role'] + ": " + message['content'] + "\n" + # modified timestamp + current_time = time.time() + dt_object = datetime.datetime.fromtimestamp(current_time) + current_timestamp = dt_object.strftime("%Y-%m-%d %H:%M:%S.%f+00:00") + metadata = [{"course": course_name, "conversation": conversation_string, "conversation_id": conversation_id, - "id": last_id+1, "user_email": user_email, "first_query": first_message}] + "id": last_id+1, "user_email": user_email, "first_query": first_message, "created_at": current_timestamp, + "modified_at": current_timestamp}] # create embeddings embeddings_model = OpenAIEmbeddings() @@ -183,6 +197,7 @@ def create_nomic_map(course_name: str, log_data: list): for index, row in df.iterrows(): user_email = row['user_email'] + created_at = row['created_at'] convo = row['convo'] messages = convo['messages'] first_message = messages[0]['content'] @@ -191,9 +206,9 @@ def create_nomic_map(course_name: str, log_data: list): # create metadata for multi-turn conversation conversation = "" if message['role'] == 'user': - emoji = "🙋" + emoji = "🙋 " else: - emoji = "🤖" + emoji = "🤖 " for message in messages: # string of role: content, role: content, ... conversation += "\n>>> " + emoji + message['role'] + ": " + message['content'] + "\n" @@ -202,15 +217,21 @@ def create_nomic_map(course_name: str, log_data: list): if convo['id'] == log_conversation_id: conversation_exists = True if m['role'] == 'user': - emoji = "🙋" + emoji = "🙋 " else: - emoji = "🤖" + emoji = "🤖 " for m in log_messages: conversation += "\n>>> " + emoji + m['role'] + ": " + m['content'] + "\n" + # adding timestamp + current_time = time.time() + dt_object = datetime.datetime.fromtimestamp(current_time) + current_timestamp = dt_object.strftime("%Y-%m-%d %H:%M:%S.%f+00:00") + # add to metadata metadata_row = {"course": row['course_name'], "conversation": conversation, "conversation_id": convo['id'], - "id": i, "user_email": user_email, "first_query": first_message} + "id": i, "user_email": user_email, "first_query": first_message, "created_at": created_at, + "modified_at": current_timestamp} metadata.append(metadata_row) i += 1 @@ -220,12 +241,19 @@ def create_nomic_map(course_name: str, log_data: list): conversation = "" for message in log_messages: if message['role'] == 'user': - emoji = "🙋" + emoji = "🙋 " else: - emoji = "🤖" + emoji = "🤖 " conversation += "\n>>> " + emoji + message['role'] + ": " + message['content'] + "\n" + + # adding timestamp + current_time = time.time() + dt_object = datetime.datetime.fromtimestamp(current_time) + current_timestamp = dt_object.strftime("%Y-%m-%d %H:%M:%S.%f+00:00") + metadata_row = {"course": course_name, "conversation": conversation, "conversation_id": log_conversation_id, - "id": i, "user_email": log_user_email, "first_query": log_messages[0]['content']} + "id": i, "user_email": log_user_email, "first_query": log_messages[0]['content'], "created_at": current_timestamp, + "modified_at": current_timestamp} metadata.append(metadata_row) print("length of metadata: ", len(metadata)) diff --git a/ai_ta_backend/nomic_map_creation.ipynb b/ai_ta_backend/nomic_map_creation.ipynb index aeecd4bd..c11b80db 100644 --- a/ai_ta_backend/nomic_map_creation.ipynb +++ b/ai_ta_backend/nomic_map_creation.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 6, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ @@ -20,7 +20,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 18, "metadata": {}, "outputs": [ { @@ -29,7 +29,7 @@ "True" ] }, - "execution_count": 7, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -43,7 +43,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 19, "metadata": {}, "outputs": [], "source": [ @@ -57,7 +57,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -81,85 +81,310 @@ " \n", " \n", " \n", + " id\n", + " created_at\n", + " convo\n", + " convo_id\n", " course_name\n", + " user_email\n", " \n", " \n", " \n", " \n", " 0\n", - " gpt4\n", + " 5193\n", + " 2023-09-06T23:21:36.639848+00:00\n", + " {'id': 'd6e83182-56d8-4151-b84e-a09dd84b8836',...\n", + " d6e83182-56d8-4151-b84e-a09dd84b8836\n", + " cropwizard-beta\n", + " dabholkar.asmita@gmail.com\n", " \n", " \n", " 1\n", - " gpt4\n", + " 5192\n", + " 2023-09-06T23:04:50.982857+00:00\n", + " {'id': '1390b226-2115-4078-a594-ed4bad0fb4e0',...\n", + " 1390b226-2115-4078-a594-ed4bad0fb4e0\n", + " cropwizard-beta\n", + " kastanday@live.com\n", " \n", " \n", " 2\n", - " gpt4\n", + " 5174\n", + " 2023-09-06T22:22:44.107536+00:00\n", + " {'id': 'fca0cf0b-6bac-4cf6-bd4d-d3501827c947',...\n", + " fca0cf0b-6bac-4cf6-bd4d-d3501827c947\n", + " cropwizard-beta\n", + " dabholkar.asmita@gmail.com\n", " \n", " \n", " 3\n", - " gpt4\n", + " 5184\n", + " 2023-09-06T23:01:06.796384+00:00\n", + " {'id': '1572f8a1-9ceb-42f7-b39f-fe95ec2b3c0e',...\n", + " 1572f8a1-9ceb-42f7-b39f-fe95ec2b3c0e\n", + " cropwizard-beta\n", + " dabholkar.asmita@gmail.com\n", " \n", " \n", " 4\n", - " gpt4\n", + " 5182\n", + " 2023-09-06T22:58:21.66316+00:00\n", + " {'id': '0c9d9873-2c52-4b12-90ec-d4a495cbf4e0',...\n", + " 0c9d9873-2c52-4b12-90ec-d4a495cbf4e0\n", + " cropwizard-beta\n", + " dabholkar.asmita@gmail.com\n", + " \n", + " \n", + " 5\n", + " 5194\n", + " 2023-09-06T23:22:06.786717+00:00\n", + " {'id': 'd51bbdd8-c5c8-4e5b-a003-556a8ac74726',...\n", + " d51bbdd8-c5c8-4e5b-a003-556a8ac74726\n", + " cropwizard-beta\n", + " avd6@illinois.edu\n", + " \n", + " \n", + " 6\n", + " 5113\n", + " 2023-09-05T21:34:17.944623+00:00\n", + " {'id': '2a39551b-7b6c-4ba0-aa77-ffc9b1316718',...\n", + " 2a39551b-7b6c-4ba0-aa77-ffc9b1316718\n", + " cropwizard-beta\n", + " hetarth2@illinois.edu\n", + " \n", + " \n", + " 7\n", + " 5333\n", + " 2023-09-07T22:25:52.858867+00:00\n", + " {'id': 'bd3c9174-4512-4ac6-ae05-8aafa4099215',...\n", + " bd3c9174-4512-4ac6-ae05-8aafa4099215\n", + " cropwizard-beta\n", + " dabholkar.asmita@gmail.com\n", + " \n", + " \n", + " 8\n", + " 5108\n", + " 2023-09-05T21:11:51.289178+00:00\n", + " {'id': 'a49941c4-2049-478b-b43e-9c574cadbd05',...\n", + " a49941c4-2049-478b-b43e-9c574cadbd05\n", + " cropwizard-beta\n", + " vadve@illinois.edu\n", + " \n", + " \n", + " 9\n", + " 5104\n", + " 2023-09-05T20:38:27.687893+00:00\n", + " {'id': '1be7956c-d5c8-4b1f-a62f-145421a2e7f7',...\n", + " 1be7956c-d5c8-4b1f-a62f-145421a2e7f7\n", + " cropwizard-beta\n", + " kastanday@live.com\n", + " \n", + " \n", + " 10\n", + " 5103\n", + " 2023-09-05T20:11:51.810222+00:00\n", + " {'id': '20e6e160-6fd3-4e0a-82b7-98457f2ff1c3',...\n", + " 20e6e160-6fd3-4e0a-82b7-98457f2ff1c3\n", + " cropwizard-beta\n", + " kastanday@live.com\n", + " \n", + " \n", + " 11\n", + " 5098\n", + " 2023-09-05T19:17:36.190976+00:00\n", + " {'id': 'fbd19bd0-373b-4a9c-b58c-a47cbd4e9e1c',...\n", + " fbd19bd0-373b-4a9c-b58c-a47cbd4e9e1c\n", + " cropwizard-beta\n", + " dabholkar.asmita@gmail.com\n", + " \n", + " \n", + " 12\n", + " 5042\n", + " 2023-09-02T14:43:02.160608+00:00\n", + " {'id': '8df8a436-adbf-441a-92ec-f6d926aee789',...\n", + " 8df8a436-adbf-441a-92ec-f6d926aee789\n", + " cropwizard-beta\n", + " None\n", " \n", " \n", - " ...\n", - " ...\n", + " 13\n", + " 5009\n", + " 2023-09-01T18:26:57.628766+00:00\n", + " {'id': 'b401031c-5540-43fc-baff-7c6db90ff3ae',...\n", + " b401031c-5540-43fc-baff-7c6db90ff3ae\n", + " cropwizard-beta\n", + " rohan13@illinois.edu\n", " \n", " \n", - " 1789\n", - " FIN574-GT\n", + " 14\n", + " 5006\n", + " 2023-09-01T18:06:33.350981+00:00\n", + " {'id': 'c42e70a8-03c0-4c9e-ac7f-ee718b4ac968',...\n", + " c42e70a8-03c0-4c9e-ac7f-ee718b4ac968\n", + " cropwizard-beta\n", + " hetarth2@illinois.edu\n", " \n", " \n", - " 1790\n", - " NCSA\n", + " 15\n", + " 5045\n", + " 2023-09-03T20:07:34.895841+00:00\n", + " {'id': 'cbecc7f4-a94c-49d6-ae1a-e42f20136676',...\n", + " cbecc7f4-a94c-49d6-ae1a-e42f20136676\n", + " cropwizard-beta\n", + " None\n", " \n", " \n", - " 1791\n", - " gpt4\n", + " 16\n", + " 5014\n", + " 2023-09-01T18:36:04.202716+00:00\n", + " {'id': '3303ec4f-84d5-4eec-a4b3-af020e62b79d',...\n", + " 3303ec4f-84d5-4eec-a4b3-af020e62b79d\n", + " cropwizard-beta\n", + " vadve@illinois.edu\n", " \n", " \n", - " 1792\n", - " NCSA\n", + " 17\n", + " 5013\n", + " 2023-09-01T18:35:32.12103+00:00\n", + " {'id': 'fc91f681-54e3-4df5-8fd4-952d02b8feba',...\n", + " fc91f681-54e3-4df5-8fd4-952d02b8feba\n", + " cropwizard-beta\n", + " rohan13@illinois.edu\n", " \n", " \n", - " 1793\n", - " NCSA\n", + " 18\n", + " 5011\n", + " 2023-09-01T18:28:24.588312+00:00\n", + " {'id': '5c2d89b3-3e41-4a8a-84be-f93c7b8127c3',...\n", + " 5c2d89b3-3e41-4a8a-84be-f93c7b8127c3\n", + " cropwizard-beta\n", + " rohan13@illinois.edu\n", + " \n", + " \n", + " 19\n", + " 5007\n", + " 2023-09-01T18:15:08.636935+00:00\n", + " {'id': 'd7191543-c517-4007-a2fb-ae28660ef37c',...\n", + " d7191543-c517-4007-a2fb-ae28660ef37c\n", + " cropwizard-beta\n", + " None\n", + " \n", + " \n", + " 20\n", + " 5004\n", + " 2023-09-01T16:28:23.202471+00:00\n", + " {'id': 'edabed51-e581-48f7-865c-89a4b1d9d120',...\n", + " edabed51-e581-48f7-865c-89a4b1d9d120\n", + " cropwizard-beta\n", + " dabholkar.asmita@gmail.com\n", " \n", " \n", "\n", - "

1794 rows × 1 columns

\n", "" ], "text/plain": [ - " course_name\n", - "0 gpt4\n", - "1 gpt4\n", - "2 gpt4\n", - "3 gpt4\n", - "4 gpt4\n", - "... ...\n", - "1789 FIN574-GT\n", - "1790 NCSA\n", - "1791 gpt4\n", - "1792 NCSA\n", - "1793 NCSA\n", + " id created_at \\\n", + "0 5193 2023-09-06T23:21:36.639848+00:00 \n", + "1 5192 2023-09-06T23:04:50.982857+00:00 \n", + "2 5174 2023-09-06T22:22:44.107536+00:00 \n", + "3 5184 2023-09-06T23:01:06.796384+00:00 \n", + "4 5182 2023-09-06T22:58:21.66316+00:00 \n", + "5 5194 2023-09-06T23:22:06.786717+00:00 \n", + "6 5113 2023-09-05T21:34:17.944623+00:00 \n", + "7 5333 2023-09-07T22:25:52.858867+00:00 \n", + "8 5108 2023-09-05T21:11:51.289178+00:00 \n", + "9 5104 2023-09-05T20:38:27.687893+00:00 \n", + "10 5103 2023-09-05T20:11:51.810222+00:00 \n", + "11 5098 2023-09-05T19:17:36.190976+00:00 \n", + "12 5042 2023-09-02T14:43:02.160608+00:00 \n", + "13 5009 2023-09-01T18:26:57.628766+00:00 \n", + "14 5006 2023-09-01T18:06:33.350981+00:00 \n", + "15 5045 2023-09-03T20:07:34.895841+00:00 \n", + "16 5014 2023-09-01T18:36:04.202716+00:00 \n", + "17 5013 2023-09-01T18:35:32.12103+00:00 \n", + "18 5011 2023-09-01T18:28:24.588312+00:00 \n", + "19 5007 2023-09-01T18:15:08.636935+00:00 \n", + "20 5004 2023-09-01T16:28:23.202471+00:00 \n", + "\n", + " convo \\\n", + "0 {'id': 'd6e83182-56d8-4151-b84e-a09dd84b8836',... \n", + "1 {'id': '1390b226-2115-4078-a594-ed4bad0fb4e0',... \n", + "2 {'id': 'fca0cf0b-6bac-4cf6-bd4d-d3501827c947',... \n", + "3 {'id': '1572f8a1-9ceb-42f7-b39f-fe95ec2b3c0e',... \n", + "4 {'id': '0c9d9873-2c52-4b12-90ec-d4a495cbf4e0',... \n", + "5 {'id': 'd51bbdd8-c5c8-4e5b-a003-556a8ac74726',... \n", + "6 {'id': '2a39551b-7b6c-4ba0-aa77-ffc9b1316718',... \n", + "7 {'id': 'bd3c9174-4512-4ac6-ae05-8aafa4099215',... \n", + "8 {'id': 'a49941c4-2049-478b-b43e-9c574cadbd05',... \n", + "9 {'id': '1be7956c-d5c8-4b1f-a62f-145421a2e7f7',... \n", + "10 {'id': '20e6e160-6fd3-4e0a-82b7-98457f2ff1c3',... \n", + "11 {'id': 'fbd19bd0-373b-4a9c-b58c-a47cbd4e9e1c',... \n", + "12 {'id': '8df8a436-adbf-441a-92ec-f6d926aee789',... \n", + "13 {'id': 'b401031c-5540-43fc-baff-7c6db90ff3ae',... \n", + "14 {'id': 'c42e70a8-03c0-4c9e-ac7f-ee718b4ac968',... \n", + "15 {'id': 'cbecc7f4-a94c-49d6-ae1a-e42f20136676',... \n", + "16 {'id': '3303ec4f-84d5-4eec-a4b3-af020e62b79d',... \n", + "17 {'id': 'fc91f681-54e3-4df5-8fd4-952d02b8feba',... \n", + "18 {'id': '5c2d89b3-3e41-4a8a-84be-f93c7b8127c3',... \n", + "19 {'id': 'd7191543-c517-4007-a2fb-ae28660ef37c',... \n", + "20 {'id': 'edabed51-e581-48f7-865c-89a4b1d9d120',... \n", "\n", - "[1794 rows x 1 columns]" + " convo_id course_name \\\n", + "0 d6e83182-56d8-4151-b84e-a09dd84b8836 cropwizard-beta \n", + "1 1390b226-2115-4078-a594-ed4bad0fb4e0 cropwizard-beta \n", + "2 fca0cf0b-6bac-4cf6-bd4d-d3501827c947 cropwizard-beta \n", + "3 1572f8a1-9ceb-42f7-b39f-fe95ec2b3c0e cropwizard-beta \n", + "4 0c9d9873-2c52-4b12-90ec-d4a495cbf4e0 cropwizard-beta \n", + "5 d51bbdd8-c5c8-4e5b-a003-556a8ac74726 cropwizard-beta \n", + "6 2a39551b-7b6c-4ba0-aa77-ffc9b1316718 cropwizard-beta \n", + "7 bd3c9174-4512-4ac6-ae05-8aafa4099215 cropwizard-beta \n", + "8 a49941c4-2049-478b-b43e-9c574cadbd05 cropwizard-beta \n", + "9 1be7956c-d5c8-4b1f-a62f-145421a2e7f7 cropwizard-beta \n", + "10 20e6e160-6fd3-4e0a-82b7-98457f2ff1c3 cropwizard-beta \n", + "11 fbd19bd0-373b-4a9c-b58c-a47cbd4e9e1c cropwizard-beta \n", + "12 8df8a436-adbf-441a-92ec-f6d926aee789 cropwizard-beta \n", + "13 b401031c-5540-43fc-baff-7c6db90ff3ae cropwizard-beta \n", + "14 c42e70a8-03c0-4c9e-ac7f-ee718b4ac968 cropwizard-beta \n", + "15 cbecc7f4-a94c-49d6-ae1a-e42f20136676 cropwizard-beta \n", + "16 3303ec4f-84d5-4eec-a4b3-af020e62b79d cropwizard-beta \n", + "17 fc91f681-54e3-4df5-8fd4-952d02b8feba cropwizard-beta \n", + "18 5c2d89b3-3e41-4a8a-84be-f93c7b8127c3 cropwizard-beta \n", + "19 d7191543-c517-4007-a2fb-ae28660ef37c cropwizard-beta \n", + "20 edabed51-e581-48f7-865c-89a4b1d9d120 cropwizard-beta \n", + "\n", + " user_email \n", + "0 dabholkar.asmita@gmail.com \n", + "1 kastanday@live.com \n", + "2 dabholkar.asmita@gmail.com \n", + "3 dabholkar.asmita@gmail.com \n", + "4 dabholkar.asmita@gmail.com \n", + "5 avd6@illinois.edu \n", + "6 hetarth2@illinois.edu \n", + "7 dabholkar.asmita@gmail.com \n", + "8 vadve@illinois.edu \n", + "9 kastanday@live.com \n", + "10 kastanday@live.com \n", + "11 dabholkar.asmita@gmail.com \n", + "12 None \n", + "13 rohan13@illinois.edu \n", + "14 hetarth2@illinois.edu \n", + "15 None \n", + "16 vadve@illinois.edu \n", + "17 rohan13@illinois.edu \n", + "18 rohan13@illinois.edu \n", + "19 None \n", + "20 dabholkar.asmita@gmail.com " ] }, - "execution_count": 21, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# query data for one course for testing\n", - "course = 'ece120'\n", - "response = supabase_client.table(\"llm-convo-monitor\").select(\"course_name\", count='exact').execute()\n", + "course = 'cropwizard-beta'\n", + "response = supabase_client.table(\"llm-convo-monitor\").select(\"*\").eq('course_name', course).execute()\n", "data = response.data\n", "df = pd.DataFrame(data)\n", "df" @@ -167,340 +392,276 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 8, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "id int64\n", + "created_at object\n", + "convo object\n", + "convo_id object\n", + "course_name object\n", + "user_email object\n", + "dtype: object\n", + "id int64\n", + "created_at datetime64[ns, UTC]\n", + "convo object\n", + "convo_id object\n", + "course_name object\n", + "user_email object\n", + "dtype: object\n" + ] + } + ], "source": [ - "course_list = df['course_name'].unique()\n" + "\n" ] }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 9, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idcreated_atconvoconvo_idcourse_nameuser_email
050042023-09-01 16:28:23.202471+00:00{'id': 'edabed51-e581-48f7-865c-89a4b1d9d120',...edabed51-e581-48f7-865c-89a4b1d9d120cropwizard-betadabholkar.asmita@gmail.com
150062023-09-01 18:06:33.350981+00:00{'id': 'c42e70a8-03c0-4c9e-ac7f-ee718b4ac968',...c42e70a8-03c0-4c9e-ac7f-ee718b4ac968cropwizard-betahetarth2@illinois.edu
250072023-09-01 18:15:08.636935+00:00{'id': 'd7191543-c517-4007-a2fb-ae28660ef37c',...d7191543-c517-4007-a2fb-ae28660ef37ccropwizard-betaNone
350112023-09-01 18:28:24.588312+00:00{'id': '5c2d89b3-3e41-4a8a-84be-f93c7b8127c3',...5c2d89b3-3e41-4a8a-84be-f93c7b8127c3cropwizard-betarohan13@illinois.edu
450132023-09-01 18:35:32.121030+00:00{'id': 'fc91f681-54e3-4df5-8fd4-952d02b8feba',...fc91f681-54e3-4df5-8fd4-952d02b8febacropwizard-betarohan13@illinois.edu
\n", + "
" + ], + "text/plain": [ + " id created_at \\\n", + "0 5004 2023-09-01 16:28:23.202471+00:00 \n", + "1 5006 2023-09-01 18:06:33.350981+00:00 \n", + "2 5007 2023-09-01 18:15:08.636935+00:00 \n", + "3 5011 2023-09-01 18:28:24.588312+00:00 \n", + "4 5013 2023-09-01 18:35:32.121030+00:00 \n", + "\n", + " convo \\\n", + "0 {'id': 'edabed51-e581-48f7-865c-89a4b1d9d120',... \n", + "1 {'id': 'c42e70a8-03c0-4c9e-ac7f-ee718b4ac968',... \n", + "2 {'id': 'd7191543-c517-4007-a2fb-ae28660ef37c',... \n", + "3 {'id': '5c2d89b3-3e41-4a8a-84be-f93c7b8127c3',... \n", + "4 {'id': 'fc91f681-54e3-4df5-8fd4-952d02b8feba',... \n", + "\n", + " convo_id course_name \\\n", + "0 edabed51-e581-48f7-865c-89a4b1d9d120 cropwizard-beta \n", + "1 c42e70a8-03c0-4c9e-ac7f-ee718b4ac968 cropwizard-beta \n", + "2 d7191543-c517-4007-a2fb-ae28660ef37c cropwizard-beta \n", + "3 5c2d89b3-3e41-4a8a-84be-f93c7b8127c3 cropwizard-beta \n", + "4 fc91f681-54e3-4df5-8fd4-952d02b8feba cropwizard-beta \n", + "\n", + " user_email \n", + "0 dabholkar.asmita@gmail.com \n", + "1 hetarth2@illinois.edu \n", + "2 None \n", + "3 rohan13@illinois.edu \n", + "4 rohan13@illinois.edu " + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": 10, "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Timestamp('2023-09-01 16:28:23.202471+0000', tz='UTC')" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "## Mapping Conversations to Nomic" + "df.iloc[0]['created_at']" ] }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "course_list = df['course_name'].unique()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "gpt4\n", - "623\n", - "badm_550_ashley\n", - "17\n", - "None\n", - "0\n", - "ece120\n", - "154\n", - "test-video-ingest\n", - "13\n", - "badm-567-v3\n", - "15\n", - "badm-567\n", - "3\n", - "new-weather\n", - "65\n", - "gies-online-mba-v2\n", - "course_name: gies-online-mba-v2\n", - "error: The read operation timed out\n", - "frontend\n", - "8\n", - "test-video-ingest-28\n", - "2\n", - "ECE220FA23\n", - "74\n", - "ECE408FA23\n", - "259\n", - "pdeploy999\n", - "2\n", - "badm-350-summer\n", - "5\n", - "previewtesting1\n", - "1\n", - "localtest2\n", - "2\n", - "your-favorite-url\n", - "1\n", - "mantine\n", - "6\n", - "ece408\n", - "27\n", - "27\n", - "27\n", - "(27, 1536)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-09-12 15:42:49.002 | INFO | nomic.project:_create_project:790 - Creating project `Conversation Map for ece408` in organization `dabholkar.asmita`\n", - "2023-09-12 15:42:50.721 | INFO | nomic.atlas:map_embeddings:108 - Uploading embeddings to Atlas.\n", - "2023-09-12 15:42:50.721 | WARNING | nomic.project:_validate_and_correct_arrow_upload:238 - Replacing 20 null values for field user_email with string 'null'. This behavior will change in a future version.\n", - "2023-09-12 15:42:50.721 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", - "1it [00:02, 2.16s/it]\n", - "2023-09-12 15:42:52.900 | INFO | nomic.project:_add_data:1422 - Upload succeeded.\n", - "2023-09-12 15:42:52.908 | INFO | nomic.atlas:map_embeddings:127 - Embedding upload succeeded.\n", - "2023-09-12 15:42:56.033 | INFO | nomic.project:create_index:1132 - Created map `Conversation Map for ece408` in project `Conversation Map for ece408`: https://atlas.nomic.ai/map/df8e3337-396a-443d-a6f5-8240c66024ac/bc754afd-83fb-43cb-99db-e2bd26f1f40b\n", - "2023-09-12 15:42:56.046 | INFO | nomic.atlas:map_embeddings:140 - Conversation Map for ece408: https://atlas.nomic.ai/map/df8e3337-396a-443d-a6f5-8240c66024ac/bc754afd-83fb-43cb-99db-e2bd26f1f40b\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Conversation Map for ece408: https://atlas.nomic.ai/map/df8e3337-396a-443d-a6f5-8240c66024ac/bc754afd-83fb-43cb-99db-e2bd26f1f40b]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-09-12 15:43:00.951 | INFO | nomic.project:create_index:1132 - Created map `ece408_convo_index` in project `Conversation Map for ece408`: https://atlas.nomic.ai/map/df8e3337-396a-443d-a6f5-8240c66024ac/49bd2ab9-db8a-45ab-b399-5039c7b7e736\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "test-video-ingest-2\n", - "2\n", - "Snowmass\n", - "19\n", - "badm_567_v2\n", - "11\n", - "erpnext\n", - "1\n", - "mip\n", - "1\n", - "farmdoc_test_kastan-v1\n", - "9\n", - "personalstatement\n", - "2\n", - "test-canvas\n", - "4\n", - "hrc\n", - "3\n", - "csv\n", - "4\n", - "star_nox\n", - "3\n", - "badm_567\n", - "3\n", - "SPaRCEd\n", - "2\n", - "NPRE247\n", - "13\n", - "localdemo8\n", - "2\n", - "badm_567_thumbnails\n", - "2\n", - "your-awesome-course\n", - "course_name: your-awesome-course\n", - "error: The read operation timed out\n", - "chatbot\n", - "3\n", - "erp\n", - "2\n", - "extreme\n", - "3\n", - "rohan_atree\n", - "4\n", - "zotero-extreme\n", - "9\n", - "pract\n", - "18\n", - "test-video-ingest-20\n", - "3\n", - "gies-online-mba2\n", - "2\n", - "gies-online-mba\n", - "3\n", - "ece120FL22\n", - "15\n", - "careerassistant\n", - "7\n", - "weather\n", - "4\n", - "lillian-wang-blog\n", - "2\n", - "local-test5\n", - "4\n", - "demo-for-vyriad\n", - "6\n", - "ag-gpt-beta\n", - "5\n", - "rohan_atree_demo\n", - "2\n", - "cropwizard\n", - "25\n", - "25\n", - "25\n", - "(25, 1536)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-09-12 15:43:49.933 | INFO | nomic.project:_create_project:790 - Creating project `Conversation Map for cropwizard` in organization `dabholkar.asmita`\n", - "2023-09-12 15:43:50.980 | INFO | nomic.atlas:map_embeddings:108 - Uploading embeddings to Atlas.\n", - "2023-09-12 15:43:50.980 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", - "1it [00:03, 3.94s/it]\n", - "2023-09-12 15:43:54.938 | INFO | nomic.project:_add_data:1422 - Upload succeeded.\n", - "2023-09-12 15:43:54.953 | INFO | nomic.atlas:map_embeddings:127 - Embedding upload succeeded.\n", - "2023-09-12 15:43:58.534 | INFO | nomic.project:create_index:1132 - Created map `Conversation Map for cropwizard` in project `Conversation Map for cropwizard`: https://atlas.nomic.ai/map/c9b13bcc-d0cb-40a6-80c6-3e98b1bf0bda/53b8076a-7f80-455f-abea-2cf84bc1912c\n", - "2023-09-12 15:43:58.534 | INFO | nomic.atlas:map_embeddings:140 - Conversation Map for cropwizard: https://atlas.nomic.ai/map/c9b13bcc-d0cb-40a6-80c6-3e98b1bf0bda/53b8076a-7f80-455f-abea-2cf84bc1912c\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Conversation Map for cropwizard: https://atlas.nomic.ai/map/c9b13bcc-d0cb-40a6-80c6-3e98b1bf0bda/53b8076a-7f80-455f-abea-2cf84bc1912c]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-09-12 15:44:00.949 | INFO | nomic.project:create_index:1132 - Created map `cropwizard_convo_index` in project `Conversation Map for cropwizard`: https://atlas.nomic.ai/map/c9b13bcc-d0cb-40a6-80c6-3e98b1bf0bda/5eb008c1-5a10-4f20-ab7d-c42a238e1595\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "rtest\n", - "1\n", - "previewdeploy\n", - "3\n", - "r2test\n", - "1\n", - "Law794-TransactionalDraftingAlam\n", - "4\n", - "personal-statement\n", - "2\n", - "rohan_excel\n", - "1\n", - "langchain-python\n", - "1\n", - "langchain\n", - "4\n", - "ncsa-live-demo\n", - "1\n", - "rohan_atree_individual\n", - "2\n", - "meta11-test\n", - "14\n", - "ceesd-mirgecom\n", - "2\n", - "NCSADelta\n", - "10\n", - "HealthyLivingGuide\n", - "3\n", - "rohan\n", - "2\n", - "NCSA\n", - "40\n", - "40\n", - "40\n", - "(40, 1536)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-09-12 15:44:35.409 | INFO | nomic.project:_create_project:790 - Creating project `Conversation Map for NCSA` in organization `dabholkar.asmita`\n", - "2023-09-12 15:44:36.768 | INFO | nomic.atlas:map_embeddings:108 - Uploading embeddings to Atlas.\n", - "2023-09-12 15:44:36.778 | WARNING | nomic.project:_validate_and_correct_arrow_upload:238 - Replacing 10 null values for field user_email with string 'null'. This behavior will change in a future version.\n", - "2023-09-12 15:44:36.783 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", - "1it [00:01, 1.99s/it]\n", - "2023-09-12 15:44:38.783 | INFO | nomic.project:_add_data:1422 - Upload succeeded.\n", - "2023-09-12 15:44:38.784 | INFO | nomic.atlas:map_embeddings:127 - Embedding upload succeeded.\n", - "2023-09-12 15:44:40.137 | INFO | nomic.project:create_index:1132 - Created map `Conversation Map for NCSA` in project `Conversation Map for NCSA`: https://atlas.nomic.ai/map/d2aef24e-2ea4-4712-87c0-804da0ab96b0/7b2238ae-7eb9-407a-ac60-6d5a8fd1f447\n", - "2023-09-12 15:44:40.146 | INFO | nomic.atlas:map_embeddings:140 - Conversation Map for NCSA: https://atlas.nomic.ai/map/d2aef24e-2ea4-4712-87c0-804da0ab96b0/7b2238ae-7eb9-407a-ac60-6d5a8fd1f447\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Conversation Map for NCSA: https://atlas.nomic.ai/map/d2aef24e-2ea4-4712-87c0-804da0ab96b0/7b2238ae-7eb9-407a-ac60-6d5a8fd1f447]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-09-12 15:44:45.686 | INFO | nomic.project:create_index:1132 - Created map `NCSA_convo_index` in project `Conversation Map for NCSA`: https://atlas.nomic.ai/map/d2aef24e-2ea4-4712-87c0-804da0ab96b0/331ba551-f6b4-4c79-a31c-4cc5390bfac7\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "FIN574-GT\n", - "24\n", - "24\n", - "24\n", - "(24, 1536)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-09-12 15:45:00.655 | INFO | nomic.project:_create_project:790 - Creating project `Conversation Map for FIN574-GT` in organization `dabholkar.asmita`\n", - "2023-09-12 15:45:04.369 | INFO | nomic.atlas:map_embeddings:108 - Uploading embeddings to Atlas.\n", - "2023-09-12 15:45:04.385 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", - "1it [00:06, 6.08s/it]\n", - "2023-09-12 15:45:10.475 | INFO | nomic.project:_add_data:1422 - Upload succeeded.\n", - "2023-09-12 15:45:10.475 | INFO | nomic.atlas:map_embeddings:127 - Embedding upload succeeded.\n", - "2023-09-12 15:45:13.721 | INFO | nomic.project:create_index:1132 - Created map `Conversation Map for FIN574-GT` in project `Conversation Map for FIN574-GT`: https://atlas.nomic.ai/map/d83f5440-9ef1-45ed-a2e5-c3229398b0e8/149f6eab-f636-4754-8117-2da7f030c5b3\n", - "2023-09-12 15:45:13.723 | INFO | nomic.atlas:map_embeddings:140 - Conversation Map for FIN574-GT: https://atlas.nomic.ai/map/d83f5440-9ef1-45ed-a2e5-c3229398b0e8/149f6eab-f636-4754-8117-2da7f030c5b3\n" + "21\n", + "21\n" ] - }, + } + ], + "source": [ + "import datetime\n", + "\n", + "metadata = []\n", + "user_queries = []\n", + "i = 1\n", + "\n", + "# log conversation instead of individual messages\n", + "for index, row in df.iterrows():\n", + " user_email = row['user_email']\n", + " created_at = row['created_at']\n", + "\n", + " current_time = time.time()\n", + " dt_object = datetime.datetime.fromtimestamp(current_time)\n", + " formatted_timestamp = dt_object.strftime(\"%Y-%m-%d %H:%M:%S.%f+00:00\")\n", + "\n", + "\n", + "\n", + " convo = row['convo']\n", + " messages = convo['messages']\n", + " first_message = messages[0]['content']\n", + " \n", + " user_queries.append(first_message)\n", + " # create metadata for multi-turn conversation\n", + " conversation = \"\"\n", + " for message in messages:\n", + " # string of role: content, role: content, ...\n", + " if message['role'] == 'user':\n", + " emoji = \"🙋\"\n", + " else:\n", + " emoji = \"🤖\"\n", + " conversation += \"\\n>>> \" + emoji + message['role'] + \": \" + message['content'] + \"\\n\"\n", + " # add to metadata\n", + " metadata_row = {\"course\": row['course_name'], \"conversation\": conversation, \"conversation_id\": convo['id'], \n", + " \"id\": i, \"user_email\": user_email, \"first_query\": first_message, \"created_at\": created_at,\n", + " \"modified_at\": formatted_timestamp}\n", + " metadata.append(metadata_row)\n", + " i += 1\n", + "\n", + "print(len(user_queries))\n", + "print(len(metadata))" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[Conversation Map for FIN574-GT: https://atlas.nomic.ai/map/d83f5440-9ef1-45ed-a2e5-c3229398b0e8/149f6eab-f636-4754-8117-2da7f030c5b3]\n" + "{'course': 'cropwizard-beta', 'conversation': \"\\n>>> 🙋user: What is the best approach to prevent weeds in my soybean field?\\n\\n>>> 🤖assistant: Weed management in soybean fields involves a combination of cultural, mechanical, and chemical strategies. Here are some recommended practices:\\n\\n1. **Crop Rotation**: Rotating crops can help reduce the presence of certain weed species that are more prevalent in soybean fields. \\n\\n2. **Cover Crops**: Planting cover crops can suppress weed growth by competing for sunlight, water, and nutrients. \\n\\n3. **Tillage**: Tillage can be used to disrupt weed growth. However, it should be used judiciously to prevent soil erosion.\\n\\n4. **Planting Date and Seeding Rate**: Planting soybeans early and at a higher seeding rate can help the crop compete better against weeds.\\n\\n5. **Herbicide Application**: Use pre-emergent and post-emergent herbicides. Pre-emergent herbicides are applied before the weeds germinate, while post-emergent herbicides are applied after the weeds have sprouted. \\n\\n6. **Herbicide Rotation**: Rotate herbicides with different modes of action to prevent weeds from developing resistance.\\n\\n7. **Scouting and Monitoring**: Regularly scout and monitor your fields to identify weed problems early and respond quickly.\\n\\n8. **Clean Equipment**: Clean your farming equipment to prevent the spread of weed seeds from one field to another.\\n\\nRemember, the most effective weed management strategies often involve a combination of these practices. It's also essential to consider the specific types of weeds you're dealing with, as different species may require different control methods.\\n\\n>>> 🙋user: What is the best approach to prevent weeds in my soybean field?\\n\\n\\n\\n\\n>>> 🤖assistant: Managing weeds in soybean fields requires a comprehensive and integrated approach. Here are some best practices:\\n\\n1. **Crop Rotation**: This practice can help break the life cycle of certain weeds. By alternating the types of crops in each field, you can naturally reduce the presence of weeds that thrive in soybean fields.\\n\\n2. **Cover Crops**: Planting cover crops in the off-season can suppress weed growth by outcompeting them for resources such as sunlight, water, and nutrients.\\n\\n3. **Tillage**: Tillage can help control weeds by physically removing or burying them. However, excessive tillage can lead to soil erosion, so it should be used judiciously.\\n\\n4. **Planting Date and Seeding Rate**: Planting soybeans early and at a higher seeding rate can help the crop establish itself and compete more effectively against weeds.\\n\\n5. **Herbicide Application**: Use pre-emergence and post-emergence herbicides to control weeds. Pre-emergence herbicides are used before the weeds germinate, while post-emergence herbicides are used after the weeds have sprouted. \\n\\n6. **Herbicide Rotation**: To prevent herbicide resistance, rotate the types of herbicides used, ensuring they have different modes of action.\\n\\n7. **Regular Scouting**: Regularly inspect your fields to identify weed problems early. This allows for timely intervention and can prevent the spread of weeds.\\n\\n8. **Clean Equipment**: Ensure that your farming equipment is cleaned regularly to prevent the spread of weed seeds from one field to another.\\n\\nRemember, the most effective weed management strategies often involve a combination of these practices. Tailor your approach based on the specific types of weeds prevalent in your field, as different species may require different control strategies.\\n\", 'conversation_id': 'd6e83182-56d8-4151-b84e-a09dd84b8836', 'id': 1, 'user_email': 'dabholkar.asmita@gmail.com', 'first_query': 'What is the best approach to prevent weeds in my soybean field?', 'created_at': '2023-09-06T23:21:36.639848+00:00', 'modified_at': '2023-09-12 18:17:49.784979+00:00'}\n" ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-09-12 15:45:19.164 | INFO | nomic.project:create_index:1132 - Created map `FIN574-GT_convo_index` in project `Conversation Map for FIN574-GT`: https://atlas.nomic.ai/map/d83f5440-9ef1-45ed-a2e5-c3229398b0e8/092d7d2c-b792-4304-ae04-d8f09ffbba5d\n" - ] - }, + } + ], + "source": [ + "print(metadata[0])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Mapping Conversations to Nomic" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "babreu\n", - "1\n", - "test-video-ingest-31\n", - "1\n", - "p\n", - "1\n", - "new_test_course\n", - "1\n", "cropwizard-beta\n", "21\n", "21\n", @@ -512,159 +673,35 @@ "name": "stderr", "output_type": "stream", "text": [ - "2023-09-12 15:45:30.549 | INFO | nomic.project:_create_project:790 - Creating project `Conversation Map for cropwizard-beta` in organization `dabholkar.asmita`\n", - "2023-09-12 15:45:32.134 | INFO | nomic.atlas:map_embeddings:108 - Uploading embeddings to Atlas.\n", - "2023-09-12 15:45:32.150 | WARNING | nomic.project:_validate_and_correct_arrow_upload:238 - Replacing 3 null values for field user_email with string 'null'. This behavior will change in a future version.\n", - "2023-09-12 15:45:32.150 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", - "1it [00:00, 1.08it/s]\n", - "2023-09-12 15:45:33.088 | INFO | nomic.project:_add_data:1422 - Upload succeeded.\n", - "2023-09-12 15:45:33.092 | INFO | nomic.atlas:map_embeddings:127 - Embedding upload succeeded.\n", - "2023-09-12 15:45:34.335 | INFO | nomic.project:create_index:1132 - Created map `Conversation Map for cropwizard-beta` in project `Conversation Map for cropwizard-beta`: https://atlas.nomic.ai/map/44b31bc3-726e-4930-9584-616bbcb2d5d3/d3a66bb5-0ab9-4e9c-9fe5-10aa840ce9bd\n", - "2023-09-12 15:45:34.335 | INFO | nomic.atlas:map_embeddings:140 - Conversation Map for cropwizard-beta: https://atlas.nomic.ai/map/44b31bc3-726e-4930-9584-616bbcb2d5d3/d3a66bb5-0ab9-4e9c-9fe5-10aa840ce9bd\n" + "2023-09-12 22:36:00.418 | INFO | nomic.project:_create_project:790 - Creating project `Conversation Map for cropwizard-beta` in organization `dabholkar.asmita`\n", + "2023-09-12 22:36:01.434 | INFO | nomic.atlas:map_embeddings:108 - Uploading embeddings to Atlas.\n", + "2023-09-12 22:36:01.454 | WARNING | nomic.project:_validate_and_correct_arrow_upload:238 - Replacing 3 null values for field user_email with string 'null'. This behavior will change in a future version.\n", + "2023-09-12 22:36:01.459 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", + "1it [00:00, 1.11it/s]\n", + "2023-09-12 22:36:02.373 | INFO | nomic.project:_add_data:1422 - Upload succeeded.\n", + "2023-09-12 22:36:02.374 | INFO | nomic.atlas:map_embeddings:127 - Embedding upload succeeded.\n", + "2023-09-12 22:36:03.570 | INFO | nomic.project:create_index:1132 - Created map `Conversation Map for cropwizard-beta` in project `Conversation Map for cropwizard-beta`: https://atlas.nomic.ai/map/8634eb7a-22b5-4ab8-adc4-fbbc4bc654bf/f5ad20c7-d1cb-4fe7-be16-04d0502af4fe\n", + "2023-09-12 22:36:03.571 | INFO | nomic.atlas:map_embeddings:140 - Conversation Map for cropwizard-beta: https://atlas.nomic.ai/map/8634eb7a-22b5-4ab8-adc4-fbbc4bc654bf/f5ad20c7-d1cb-4fe7-be16-04d0502af4fe\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "[Conversation Map for cropwizard-beta: https://atlas.nomic.ai/map/44b31bc3-726e-4930-9584-616bbcb2d5d3/d3a66bb5-0ab9-4e9c-9fe5-10aa840ce9bd]\n" + "[Conversation Map for cropwizard-beta: https://atlas.nomic.ai/map/8634eb7a-22b5-4ab8-adc4-fbbc4bc654bf/f5ad20c7-d1cb-4fe7-be16-04d0502af4fe]\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "2023-09-12 15:45:37.865 | INFO | nomic.project:create_index:1132 - Created map `cropwizard-beta_convo_index` in project `Conversation Map for cropwizard-beta`: https://atlas.nomic.ai/map/44b31bc3-726e-4930-9584-616bbcb2d5d3/20a567c6-056b-49b3-a421-f0e49f348cda\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "maxlindsey\n", - "1\n", - "Gies-graduate-hub\n", - "4\n", - "test-video-ingest-17\n", - "2\n", - "summary\n", - "10\n", - "test-video-ingest-3\n", - "8\n", - "test-video-ingest-27\n", - "2\n", - "lillian-wang-blog-2\n", - "1\n", - "python-magic\n", - "1\n", - "ansible2\n", - "1\n", - "ece408fa23\n", - "7\n", - "farmdoc_test_josh_v2\n", - "1\n", - "local-test3\n", - "1\n", - "automata\n", - "1\n", - "SpaceFlorida-GT\n", - "5\n", - "GBSI-GT\n", - "4\n", - "test-video-ingest-21\n", - "8\n", - "newnew_ncsa\n", - "1\n", - "canvas\n", - "1\n", - "gbsi-gt\n", - "3\n", - "meditation-tutorial\n", - "1\n", - "profit\n", - "1\n", - "ansible\n", - "8\n", - "langchain-docs\n", - "9\n", - "testing_url_metadata_josh\n", - "1\n", - "test-india-biodiversity\n", - "1\n", - "vyriad\n", - "10\n", - "irc-workplans\n", - "1\n", - "kastanasdfasdfasdf\n", - "1\n", - "testing_refactor\n", - "2\n", - "BADM-567-GT\n", - "3\n", - "mdt\n", - "1\n", - "vercel\n", - "1\n", - "gies-graduate-hub\n", - "course_name: gies-graduate-hub\n", - "error: The read operation timed out\n", - "test-video-ingest-12\n", - "3\n", - "NuclGPT-v1\n", - "2\n", - "test-video-ingest-13\n", - "1\n", - "test_new_supa_scrape\n", - "1\n", - "doe-ascr-2023\n", - "1\n", - "arize\n", - "2\n", - "final-meta-test\n", - "1\n", - "preview-meta-test\n", - "1\n", - "gies-online-mba-V3\n", - "2\n", - "FoF-Drawdown-from-INVPEIV-5-24-23\n", - "1\n", - "test-video-ingest-30\n", - "1\n", - "test\n", - "1\n", - "NCSA-v2\n", - "3\n", - "conversational\n", - "1\n", - "clowder-docs\n", - "5\n", - "DA\n", - "1\n", - "test-video-ingest-25\n", - "1\n", - "test-ingest-10\n", - "1\n", - "eric-test-course\n", - "1\n", - "farmdoc-test\n", - "1\n", - "test-video-ingest-22\n", - "2\n", - "Academic-paper\n", - "1\n", - "starwars\n", - "1\n", - "AdamDemo\n", - "1\n", - "OpenCI-ACCESS\n", - "1\n", - "clockkit-github\n", - "1\n" + "2023-09-12 22:36:05.071 | INFO | nomic.project:create_index:1132 - Created map `cropwizard-beta_convo_index` in project `Conversation Map for cropwizard-beta`: https://atlas.nomic.ai/map/8634eb7a-22b5-4ab8-adc4-fbbc4bc654bf/9507bda2-b846-4e48-90d8-684a40fb3220\n" ] } ], "source": [ "embeddings_model = OpenAIEmbeddings()\n", + "course_list = ['cropwizard-beta']\n", "\n", "for course in course_list:\n", " print(course)\n", @@ -674,7 +711,7 @@ " course_df = pd.DataFrame(data)\n", " print(len(course_df))\n", "\n", - " if len(course_df) < 20 or course in ['gpt4', 'badm_550_ashley', 'ece120', 'new-weather', 'ECE220FA23', 'ECE408FA23']:\n", + " if len(course_df) < 20:\n", " continue\n", " else:\n", " \n", @@ -685,9 +722,15 @@ " # log conversation instead of individual messages\n", " for index, row in course_df.iterrows():\n", " user_email = row['user_email']\n", + " created_at = row['created_at']\n", " convo = row['convo']\n", " messages = convo['messages']\n", " first_message = messages[0]['content']\n", + "\n", + " current_time = time.time()\n", + " dt_object = datetime.datetime.fromtimestamp(current_time)\n", + " formatted_timestamp = dt_object.strftime(\"%Y-%m-%d %H:%M:%S.%f+00:00\")\n", + " \n", " user_queries.append(first_message)\n", " # create metadata for multi-turn conversation\n", " conversation = \"\"\n", @@ -700,7 +743,8 @@ " conversation += \"\\n>>> \" + emoji + message['role'] + \": \" + message['content'] + \"\\n\"\n", " # add to metadata\n", " metadata_row = {\"course\": row['course_name'], \"conversation\": conversation, \"conversation_id\": convo['id'], \n", - " \"id\": i, \"user_email\": user_email, \"first_query\": first_message}\n", + " \"id\": i, \"user_email\": user_email, \"first_query\": first_message, \"created_at\": created_at,\n", + " \"modified_at\": formatted_timestamp}\n", " metadata.append(metadata_row)\n", " i += 1\n", "\n", From aee14ddcc4d58fa0f3d0b0dc855f331b0947e229 Mon Sep 17 00:00:00 2001 From: star-nox Date: Tue, 12 Sep 2023 22:47:20 -0500 Subject: [PATCH 50/61] removed test files --- ai_ta_backend/nomic_map_creation.ipynb | 1915 ------------------------ 1 file changed, 1915 deletions(-) delete mode 100644 ai_ta_backend/nomic_map_creation.ipynb diff --git a/ai_ta_backend/nomic_map_creation.ipynb b/ai_ta_backend/nomic_map_creation.ipynb deleted file mode 100644 index c11b80db..00000000 --- a/ai_ta_backend/nomic_map_creation.ipynb +++ /dev/null @@ -1,1915 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [], - "source": [ - "# import required libraries\n", - "\n", - "import os\n", - "import supabase\n", - "from nomic import atlas\n", - "from dotenv import load_dotenv\n", - "from langchain.embeddings import OpenAIEmbeddings\n", - "import numpy as np\n", - "import time\n", - "import pandas as pd" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# loading environment variables\n", - "\n", - "env_path = \"../.env\"\n", - "load_dotenv(dotenv_path=env_path)" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [], - "source": [ - "# initialize supabase client\n", - "\n", - "url = os.environ.get(\"SUPABASE_URL\")\n", - "key = os.environ.get(\"SUPABASE_API_KEY\")\n", - "\n", - "supabase_client = supabase.create_client(url, key)" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idcreated_atconvoconvo_idcourse_nameuser_email
051932023-09-06T23:21:36.639848+00:00{'id': 'd6e83182-56d8-4151-b84e-a09dd84b8836',...d6e83182-56d8-4151-b84e-a09dd84b8836cropwizard-betadabholkar.asmita@gmail.com
151922023-09-06T23:04:50.982857+00:00{'id': '1390b226-2115-4078-a594-ed4bad0fb4e0',...1390b226-2115-4078-a594-ed4bad0fb4e0cropwizard-betakastanday@live.com
251742023-09-06T22:22:44.107536+00:00{'id': 'fca0cf0b-6bac-4cf6-bd4d-d3501827c947',...fca0cf0b-6bac-4cf6-bd4d-d3501827c947cropwizard-betadabholkar.asmita@gmail.com
351842023-09-06T23:01:06.796384+00:00{'id': '1572f8a1-9ceb-42f7-b39f-fe95ec2b3c0e',...1572f8a1-9ceb-42f7-b39f-fe95ec2b3c0ecropwizard-betadabholkar.asmita@gmail.com
451822023-09-06T22:58:21.66316+00:00{'id': '0c9d9873-2c52-4b12-90ec-d4a495cbf4e0',...0c9d9873-2c52-4b12-90ec-d4a495cbf4e0cropwizard-betadabholkar.asmita@gmail.com
551942023-09-06T23:22:06.786717+00:00{'id': 'd51bbdd8-c5c8-4e5b-a003-556a8ac74726',...d51bbdd8-c5c8-4e5b-a003-556a8ac74726cropwizard-betaavd6@illinois.edu
651132023-09-05T21:34:17.944623+00:00{'id': '2a39551b-7b6c-4ba0-aa77-ffc9b1316718',...2a39551b-7b6c-4ba0-aa77-ffc9b1316718cropwizard-betahetarth2@illinois.edu
753332023-09-07T22:25:52.858867+00:00{'id': 'bd3c9174-4512-4ac6-ae05-8aafa4099215',...bd3c9174-4512-4ac6-ae05-8aafa4099215cropwizard-betadabholkar.asmita@gmail.com
851082023-09-05T21:11:51.289178+00:00{'id': 'a49941c4-2049-478b-b43e-9c574cadbd05',...a49941c4-2049-478b-b43e-9c574cadbd05cropwizard-betavadve@illinois.edu
951042023-09-05T20:38:27.687893+00:00{'id': '1be7956c-d5c8-4b1f-a62f-145421a2e7f7',...1be7956c-d5c8-4b1f-a62f-145421a2e7f7cropwizard-betakastanday@live.com
1051032023-09-05T20:11:51.810222+00:00{'id': '20e6e160-6fd3-4e0a-82b7-98457f2ff1c3',...20e6e160-6fd3-4e0a-82b7-98457f2ff1c3cropwizard-betakastanday@live.com
1150982023-09-05T19:17:36.190976+00:00{'id': 'fbd19bd0-373b-4a9c-b58c-a47cbd4e9e1c',...fbd19bd0-373b-4a9c-b58c-a47cbd4e9e1ccropwizard-betadabholkar.asmita@gmail.com
1250422023-09-02T14:43:02.160608+00:00{'id': '8df8a436-adbf-441a-92ec-f6d926aee789',...8df8a436-adbf-441a-92ec-f6d926aee789cropwizard-betaNone
1350092023-09-01T18:26:57.628766+00:00{'id': 'b401031c-5540-43fc-baff-7c6db90ff3ae',...b401031c-5540-43fc-baff-7c6db90ff3aecropwizard-betarohan13@illinois.edu
1450062023-09-01T18:06:33.350981+00:00{'id': 'c42e70a8-03c0-4c9e-ac7f-ee718b4ac968',...c42e70a8-03c0-4c9e-ac7f-ee718b4ac968cropwizard-betahetarth2@illinois.edu
1550452023-09-03T20:07:34.895841+00:00{'id': 'cbecc7f4-a94c-49d6-ae1a-e42f20136676',...cbecc7f4-a94c-49d6-ae1a-e42f20136676cropwizard-betaNone
1650142023-09-01T18:36:04.202716+00:00{'id': '3303ec4f-84d5-4eec-a4b3-af020e62b79d',...3303ec4f-84d5-4eec-a4b3-af020e62b79dcropwizard-betavadve@illinois.edu
1750132023-09-01T18:35:32.12103+00:00{'id': 'fc91f681-54e3-4df5-8fd4-952d02b8feba',...fc91f681-54e3-4df5-8fd4-952d02b8febacropwizard-betarohan13@illinois.edu
1850112023-09-01T18:28:24.588312+00:00{'id': '5c2d89b3-3e41-4a8a-84be-f93c7b8127c3',...5c2d89b3-3e41-4a8a-84be-f93c7b8127c3cropwizard-betarohan13@illinois.edu
1950072023-09-01T18:15:08.636935+00:00{'id': 'd7191543-c517-4007-a2fb-ae28660ef37c',...d7191543-c517-4007-a2fb-ae28660ef37ccropwizard-betaNone
2050042023-09-01T16:28:23.202471+00:00{'id': 'edabed51-e581-48f7-865c-89a4b1d9d120',...edabed51-e581-48f7-865c-89a4b1d9d120cropwizard-betadabholkar.asmita@gmail.com
\n", - "
" - ], - "text/plain": [ - " id created_at \\\n", - "0 5193 2023-09-06T23:21:36.639848+00:00 \n", - "1 5192 2023-09-06T23:04:50.982857+00:00 \n", - "2 5174 2023-09-06T22:22:44.107536+00:00 \n", - "3 5184 2023-09-06T23:01:06.796384+00:00 \n", - "4 5182 2023-09-06T22:58:21.66316+00:00 \n", - "5 5194 2023-09-06T23:22:06.786717+00:00 \n", - "6 5113 2023-09-05T21:34:17.944623+00:00 \n", - "7 5333 2023-09-07T22:25:52.858867+00:00 \n", - "8 5108 2023-09-05T21:11:51.289178+00:00 \n", - "9 5104 2023-09-05T20:38:27.687893+00:00 \n", - "10 5103 2023-09-05T20:11:51.810222+00:00 \n", - "11 5098 2023-09-05T19:17:36.190976+00:00 \n", - "12 5042 2023-09-02T14:43:02.160608+00:00 \n", - "13 5009 2023-09-01T18:26:57.628766+00:00 \n", - "14 5006 2023-09-01T18:06:33.350981+00:00 \n", - "15 5045 2023-09-03T20:07:34.895841+00:00 \n", - "16 5014 2023-09-01T18:36:04.202716+00:00 \n", - "17 5013 2023-09-01T18:35:32.12103+00:00 \n", - "18 5011 2023-09-01T18:28:24.588312+00:00 \n", - "19 5007 2023-09-01T18:15:08.636935+00:00 \n", - "20 5004 2023-09-01T16:28:23.202471+00:00 \n", - "\n", - " convo \\\n", - "0 {'id': 'd6e83182-56d8-4151-b84e-a09dd84b8836',... \n", - "1 {'id': '1390b226-2115-4078-a594-ed4bad0fb4e0',... \n", - "2 {'id': 'fca0cf0b-6bac-4cf6-bd4d-d3501827c947',... \n", - "3 {'id': '1572f8a1-9ceb-42f7-b39f-fe95ec2b3c0e',... \n", - "4 {'id': '0c9d9873-2c52-4b12-90ec-d4a495cbf4e0',... \n", - "5 {'id': 'd51bbdd8-c5c8-4e5b-a003-556a8ac74726',... \n", - "6 {'id': '2a39551b-7b6c-4ba0-aa77-ffc9b1316718',... \n", - "7 {'id': 'bd3c9174-4512-4ac6-ae05-8aafa4099215',... \n", - "8 {'id': 'a49941c4-2049-478b-b43e-9c574cadbd05',... \n", - "9 {'id': '1be7956c-d5c8-4b1f-a62f-145421a2e7f7',... \n", - "10 {'id': '20e6e160-6fd3-4e0a-82b7-98457f2ff1c3',... \n", - "11 {'id': 'fbd19bd0-373b-4a9c-b58c-a47cbd4e9e1c',... \n", - "12 {'id': '8df8a436-adbf-441a-92ec-f6d926aee789',... \n", - "13 {'id': 'b401031c-5540-43fc-baff-7c6db90ff3ae',... \n", - "14 {'id': 'c42e70a8-03c0-4c9e-ac7f-ee718b4ac968',... \n", - "15 {'id': 'cbecc7f4-a94c-49d6-ae1a-e42f20136676',... \n", - "16 {'id': '3303ec4f-84d5-4eec-a4b3-af020e62b79d',... \n", - "17 {'id': 'fc91f681-54e3-4df5-8fd4-952d02b8feba',... \n", - "18 {'id': '5c2d89b3-3e41-4a8a-84be-f93c7b8127c3',... \n", - "19 {'id': 'd7191543-c517-4007-a2fb-ae28660ef37c',... \n", - "20 {'id': 'edabed51-e581-48f7-865c-89a4b1d9d120',... \n", - "\n", - " convo_id course_name \\\n", - "0 d6e83182-56d8-4151-b84e-a09dd84b8836 cropwizard-beta \n", - "1 1390b226-2115-4078-a594-ed4bad0fb4e0 cropwizard-beta \n", - "2 fca0cf0b-6bac-4cf6-bd4d-d3501827c947 cropwizard-beta \n", - "3 1572f8a1-9ceb-42f7-b39f-fe95ec2b3c0e cropwizard-beta \n", - "4 0c9d9873-2c52-4b12-90ec-d4a495cbf4e0 cropwizard-beta \n", - "5 d51bbdd8-c5c8-4e5b-a003-556a8ac74726 cropwizard-beta \n", - "6 2a39551b-7b6c-4ba0-aa77-ffc9b1316718 cropwizard-beta \n", - "7 bd3c9174-4512-4ac6-ae05-8aafa4099215 cropwizard-beta \n", - "8 a49941c4-2049-478b-b43e-9c574cadbd05 cropwizard-beta \n", - "9 1be7956c-d5c8-4b1f-a62f-145421a2e7f7 cropwizard-beta \n", - "10 20e6e160-6fd3-4e0a-82b7-98457f2ff1c3 cropwizard-beta \n", - "11 fbd19bd0-373b-4a9c-b58c-a47cbd4e9e1c cropwizard-beta \n", - "12 8df8a436-adbf-441a-92ec-f6d926aee789 cropwizard-beta \n", - "13 b401031c-5540-43fc-baff-7c6db90ff3ae cropwizard-beta \n", - "14 c42e70a8-03c0-4c9e-ac7f-ee718b4ac968 cropwizard-beta \n", - "15 cbecc7f4-a94c-49d6-ae1a-e42f20136676 cropwizard-beta \n", - "16 3303ec4f-84d5-4eec-a4b3-af020e62b79d cropwizard-beta \n", - "17 fc91f681-54e3-4df5-8fd4-952d02b8feba cropwizard-beta \n", - "18 5c2d89b3-3e41-4a8a-84be-f93c7b8127c3 cropwizard-beta \n", - "19 d7191543-c517-4007-a2fb-ae28660ef37c cropwizard-beta \n", - "20 edabed51-e581-48f7-865c-89a4b1d9d120 cropwizard-beta \n", - "\n", - " user_email \n", - "0 dabholkar.asmita@gmail.com \n", - "1 kastanday@live.com \n", - "2 dabholkar.asmita@gmail.com \n", - "3 dabholkar.asmita@gmail.com \n", - "4 dabholkar.asmita@gmail.com \n", - "5 avd6@illinois.edu \n", - "6 hetarth2@illinois.edu \n", - "7 dabholkar.asmita@gmail.com \n", - "8 vadve@illinois.edu \n", - "9 kastanday@live.com \n", - "10 kastanday@live.com \n", - "11 dabholkar.asmita@gmail.com \n", - "12 None \n", - "13 rohan13@illinois.edu \n", - "14 hetarth2@illinois.edu \n", - "15 None \n", - "16 vadve@illinois.edu \n", - "17 rohan13@illinois.edu \n", - "18 rohan13@illinois.edu \n", - "19 None \n", - "20 dabholkar.asmita@gmail.com " - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "course = 'cropwizard-beta'\n", - "response = supabase_client.table(\"llm-convo-monitor\").select(\"*\").eq('course_name', course).execute()\n", - "data = response.data\n", - "df = pd.DataFrame(data)\n", - "df" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "id int64\n", - "created_at object\n", - "convo object\n", - "convo_id object\n", - "course_name object\n", - "user_email object\n", - "dtype: object\n", - "id int64\n", - "created_at datetime64[ns, UTC]\n", - "convo object\n", - "convo_id object\n", - "course_name object\n", - "user_email object\n", - "dtype: object\n" - ] - } - ], - "source": [ - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idcreated_atconvoconvo_idcourse_nameuser_email
050042023-09-01 16:28:23.202471+00:00{'id': 'edabed51-e581-48f7-865c-89a4b1d9d120',...edabed51-e581-48f7-865c-89a4b1d9d120cropwizard-betadabholkar.asmita@gmail.com
150062023-09-01 18:06:33.350981+00:00{'id': 'c42e70a8-03c0-4c9e-ac7f-ee718b4ac968',...c42e70a8-03c0-4c9e-ac7f-ee718b4ac968cropwizard-betahetarth2@illinois.edu
250072023-09-01 18:15:08.636935+00:00{'id': 'd7191543-c517-4007-a2fb-ae28660ef37c',...d7191543-c517-4007-a2fb-ae28660ef37ccropwizard-betaNone
350112023-09-01 18:28:24.588312+00:00{'id': '5c2d89b3-3e41-4a8a-84be-f93c7b8127c3',...5c2d89b3-3e41-4a8a-84be-f93c7b8127c3cropwizard-betarohan13@illinois.edu
450132023-09-01 18:35:32.121030+00:00{'id': 'fc91f681-54e3-4df5-8fd4-952d02b8feba',...fc91f681-54e3-4df5-8fd4-952d02b8febacropwizard-betarohan13@illinois.edu
\n", - "
" - ], - "text/plain": [ - " id created_at \\\n", - "0 5004 2023-09-01 16:28:23.202471+00:00 \n", - "1 5006 2023-09-01 18:06:33.350981+00:00 \n", - "2 5007 2023-09-01 18:15:08.636935+00:00 \n", - "3 5011 2023-09-01 18:28:24.588312+00:00 \n", - "4 5013 2023-09-01 18:35:32.121030+00:00 \n", - "\n", - " convo \\\n", - "0 {'id': 'edabed51-e581-48f7-865c-89a4b1d9d120',... \n", - "1 {'id': 'c42e70a8-03c0-4c9e-ac7f-ee718b4ac968',... \n", - "2 {'id': 'd7191543-c517-4007-a2fb-ae28660ef37c',... \n", - "3 {'id': '5c2d89b3-3e41-4a8a-84be-f93c7b8127c3',... \n", - "4 {'id': 'fc91f681-54e3-4df5-8fd4-952d02b8feba',... \n", - "\n", - " convo_id course_name \\\n", - "0 edabed51-e581-48f7-865c-89a4b1d9d120 cropwizard-beta \n", - "1 c42e70a8-03c0-4c9e-ac7f-ee718b4ac968 cropwizard-beta \n", - "2 d7191543-c517-4007-a2fb-ae28660ef37c cropwizard-beta \n", - "3 5c2d89b3-3e41-4a8a-84be-f93c7b8127c3 cropwizard-beta \n", - "4 fc91f681-54e3-4df5-8fd4-952d02b8feba cropwizard-beta \n", - "\n", - " user_email \n", - "0 dabholkar.asmita@gmail.com \n", - "1 hetarth2@illinois.edu \n", - "2 None \n", - "3 rohan13@illinois.edu \n", - "4 rohan13@illinois.edu " - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Timestamp('2023-09-01 16:28:23.202471+0000', tz='UTC')" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.iloc[0]['created_at']" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [], - "source": [ - "course_list = df['course_name'].unique()\n" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "21\n", - "21\n" - ] - } - ], - "source": [ - "import datetime\n", - "\n", - "metadata = []\n", - "user_queries = []\n", - "i = 1\n", - "\n", - "# log conversation instead of individual messages\n", - "for index, row in df.iterrows():\n", - " user_email = row['user_email']\n", - " created_at = row['created_at']\n", - "\n", - " current_time = time.time()\n", - " dt_object = datetime.datetime.fromtimestamp(current_time)\n", - " formatted_timestamp = dt_object.strftime(\"%Y-%m-%d %H:%M:%S.%f+00:00\")\n", - "\n", - "\n", - "\n", - " convo = row['convo']\n", - " messages = convo['messages']\n", - " first_message = messages[0]['content']\n", - " \n", - " user_queries.append(first_message)\n", - " # create metadata for multi-turn conversation\n", - " conversation = \"\"\n", - " for message in messages:\n", - " # string of role: content, role: content, ...\n", - " if message['role'] == 'user':\n", - " emoji = \"🙋\"\n", - " else:\n", - " emoji = \"🤖\"\n", - " conversation += \"\\n>>> \" + emoji + message['role'] + \": \" + message['content'] + \"\\n\"\n", - " # add to metadata\n", - " metadata_row = {\"course\": row['course_name'], \"conversation\": conversation, \"conversation_id\": convo['id'], \n", - " \"id\": i, \"user_email\": user_email, \"first_query\": first_message, \"created_at\": created_at,\n", - " \"modified_at\": formatted_timestamp}\n", - " metadata.append(metadata_row)\n", - " i += 1\n", - "\n", - "print(len(user_queries))\n", - "print(len(metadata))" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'course': 'cropwizard-beta', 'conversation': \"\\n>>> 🙋user: What is the best approach to prevent weeds in my soybean field?\\n\\n>>> 🤖assistant: Weed management in soybean fields involves a combination of cultural, mechanical, and chemical strategies. Here are some recommended practices:\\n\\n1. **Crop Rotation**: Rotating crops can help reduce the presence of certain weed species that are more prevalent in soybean fields. \\n\\n2. **Cover Crops**: Planting cover crops can suppress weed growth by competing for sunlight, water, and nutrients. \\n\\n3. **Tillage**: Tillage can be used to disrupt weed growth. However, it should be used judiciously to prevent soil erosion.\\n\\n4. **Planting Date and Seeding Rate**: Planting soybeans early and at a higher seeding rate can help the crop compete better against weeds.\\n\\n5. **Herbicide Application**: Use pre-emergent and post-emergent herbicides. Pre-emergent herbicides are applied before the weeds germinate, while post-emergent herbicides are applied after the weeds have sprouted. \\n\\n6. **Herbicide Rotation**: Rotate herbicides with different modes of action to prevent weeds from developing resistance.\\n\\n7. **Scouting and Monitoring**: Regularly scout and monitor your fields to identify weed problems early and respond quickly.\\n\\n8. **Clean Equipment**: Clean your farming equipment to prevent the spread of weed seeds from one field to another.\\n\\nRemember, the most effective weed management strategies often involve a combination of these practices. It's also essential to consider the specific types of weeds you're dealing with, as different species may require different control methods.\\n\\n>>> 🙋user: What is the best approach to prevent weeds in my soybean field?\\n\\n\\n\\n\\n>>> 🤖assistant: Managing weeds in soybean fields requires a comprehensive and integrated approach. Here are some best practices:\\n\\n1. **Crop Rotation**: This practice can help break the life cycle of certain weeds. By alternating the types of crops in each field, you can naturally reduce the presence of weeds that thrive in soybean fields.\\n\\n2. **Cover Crops**: Planting cover crops in the off-season can suppress weed growth by outcompeting them for resources such as sunlight, water, and nutrients.\\n\\n3. **Tillage**: Tillage can help control weeds by physically removing or burying them. However, excessive tillage can lead to soil erosion, so it should be used judiciously.\\n\\n4. **Planting Date and Seeding Rate**: Planting soybeans early and at a higher seeding rate can help the crop establish itself and compete more effectively against weeds.\\n\\n5. **Herbicide Application**: Use pre-emergence and post-emergence herbicides to control weeds. Pre-emergence herbicides are used before the weeds germinate, while post-emergence herbicides are used after the weeds have sprouted. \\n\\n6. **Herbicide Rotation**: To prevent herbicide resistance, rotate the types of herbicides used, ensuring they have different modes of action.\\n\\n7. **Regular Scouting**: Regularly inspect your fields to identify weed problems early. This allows for timely intervention and can prevent the spread of weeds.\\n\\n8. **Clean Equipment**: Ensure that your farming equipment is cleaned regularly to prevent the spread of weed seeds from one field to another.\\n\\nRemember, the most effective weed management strategies often involve a combination of these practices. Tailor your approach based on the specific types of weeds prevalent in your field, as different species may require different control strategies.\\n\", 'conversation_id': 'd6e83182-56d8-4151-b84e-a09dd84b8836', 'id': 1, 'user_email': 'dabholkar.asmita@gmail.com', 'first_query': 'What is the best approach to prevent weeds in my soybean field?', 'created_at': '2023-09-06T23:21:36.639848+00:00', 'modified_at': '2023-09-12 18:17:49.784979+00:00'}\n" - ] - } - ], - "source": [ - "print(metadata[0])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Mapping Conversations to Nomic" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "cropwizard-beta\n", - "21\n", - "21\n", - "21\n", - "(21, 1536)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-09-12 22:36:00.418 | INFO | nomic.project:_create_project:790 - Creating project `Conversation Map for cropwizard-beta` in organization `dabholkar.asmita`\n", - "2023-09-12 22:36:01.434 | INFO | nomic.atlas:map_embeddings:108 - Uploading embeddings to Atlas.\n", - "2023-09-12 22:36:01.454 | WARNING | nomic.project:_validate_and_correct_arrow_upload:238 - Replacing 3 null values for field user_email with string 'null'. This behavior will change in a future version.\n", - "2023-09-12 22:36:01.459 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", - "1it [00:00, 1.11it/s]\n", - "2023-09-12 22:36:02.373 | INFO | nomic.project:_add_data:1422 - Upload succeeded.\n", - "2023-09-12 22:36:02.374 | INFO | nomic.atlas:map_embeddings:127 - Embedding upload succeeded.\n", - "2023-09-12 22:36:03.570 | INFO | nomic.project:create_index:1132 - Created map `Conversation Map for cropwizard-beta` in project `Conversation Map for cropwizard-beta`: https://atlas.nomic.ai/map/8634eb7a-22b5-4ab8-adc4-fbbc4bc654bf/f5ad20c7-d1cb-4fe7-be16-04d0502af4fe\n", - "2023-09-12 22:36:03.571 | INFO | nomic.atlas:map_embeddings:140 - Conversation Map for cropwizard-beta: https://atlas.nomic.ai/map/8634eb7a-22b5-4ab8-adc4-fbbc4bc654bf/f5ad20c7-d1cb-4fe7-be16-04d0502af4fe\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Conversation Map for cropwizard-beta: https://atlas.nomic.ai/map/8634eb7a-22b5-4ab8-adc4-fbbc4bc654bf/f5ad20c7-d1cb-4fe7-be16-04d0502af4fe]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-09-12 22:36:05.071 | INFO | nomic.project:create_index:1132 - Created map `cropwizard-beta_convo_index` in project `Conversation Map for cropwizard-beta`: https://atlas.nomic.ai/map/8634eb7a-22b5-4ab8-adc4-fbbc4bc654bf/9507bda2-b846-4e48-90d8-684a40fb3220\n" - ] - } - ], - "source": [ - "embeddings_model = OpenAIEmbeddings()\n", - "course_list = ['cropwizard-beta']\n", - "\n", - "for course in course_list:\n", - " print(course)\n", - " try:\n", - " response = supabase_client.table(\"llm-convo-monitor\").select(\"*\").eq('course_name', course).execute()\n", - " data = response.data\n", - " course_df = pd.DataFrame(data)\n", - " print(len(course_df))\n", - "\n", - " if len(course_df) < 20:\n", - " continue\n", - " else:\n", - " \n", - " user_queries = []\n", - " metadata = []\n", - " i = 1\n", - "\n", - " # log conversation instead of individual messages\n", - " for index, row in course_df.iterrows():\n", - " user_email = row['user_email']\n", - " created_at = row['created_at']\n", - " convo = row['convo']\n", - " messages = convo['messages']\n", - " first_message = messages[0]['content']\n", - "\n", - " current_time = time.time()\n", - " dt_object = datetime.datetime.fromtimestamp(current_time)\n", - " formatted_timestamp = dt_object.strftime(\"%Y-%m-%d %H:%M:%S.%f+00:00\")\n", - " \n", - " user_queries.append(first_message)\n", - " # create metadata for multi-turn conversation\n", - " conversation = \"\"\n", - " for message in messages:\n", - " # string of role: content, role: content, ...\n", - " if message['role'] == 'user':\n", - " emoji = \"🙋\"\n", - " else:\n", - " emoji = \"🤖\"\n", - " conversation += \"\\n>>> \" + emoji + message['role'] + \": \" + message['content'] + \"\\n\"\n", - " # add to metadata\n", - " metadata_row = {\"course\": row['course_name'], \"conversation\": conversation, \"conversation_id\": convo['id'], \n", - " \"id\": i, \"user_email\": user_email, \"first_query\": first_message, \"created_at\": created_at,\n", - " \"modified_at\": formatted_timestamp}\n", - " metadata.append(metadata_row)\n", - " i += 1\n", - "\n", - " print(len(user_queries))\n", - " print(len(metadata))\n", - "\n", - " metadata = pd.DataFrame(metadata)\n", - " embeddings = embeddings_model.embed_documents(user_queries)\n", - " embeddings = np.array(embeddings)\n", - " print(embeddings.shape)\n", - "\n", - " # create an Atlas project\n", - " project_name = \"Conversation Map for \" + course\n", - " index_name = course + \"_convo_index\"\n", - " project = atlas.map_embeddings(embeddings=np.array(embeddings),\n", - " data=metadata,\n", - " id_field='id',\n", - " build_topic_model=True,\n", - " topic_label_field='first_query',\n", - " name=project_name,\n", - " colorable_fields=['conversation_id', 'first_query'])\n", - " print(project.maps)\n", - "\n", - " project.create_index(index_name, build_topic_model=True)\n", - " except Exception as e:\n", - " print(\"course_name:\", course)\n", - " print(\"error: \", e)\n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'course': 'cropwizard', 'conversation': '\\n>>> user: What type of pest is most harmful to corn crops in Illinois and how can I prevent it?\\n\\n>>> assistant: The most harmful pest to corn crops in Illinois is the corn rootworm. Corn rootworms, specifically the western corn rootworm (Diabrotica virgifera virgifera), are the most impactful species in Illinois and the North Central U.S. These pests can cause significant damage to corn plants by feeding on the roots, leading to reduced plant vigor and yield.\\n\\nTo prevent corn rootworm infestations, several measures can be taken:\\n\\n1. Crop rotation: Implementing a crop rotation strategy by alternating corn with non-host crops, such as soybeans, can help reduce corn rootworm populations. The larvae are not capable of surviving on soybean or other broadleaf plants, so rotating crops disrupts their life cycle.\\n\\n2. Utilize Bt hybrids: Planting corn hybrids that have been genetically modified to express Bacillus thuringiensis (Bt) proteins targeting corn rootworm can provide effective control. These Bt hybrids produce toxins that are toxic to the pests, reducing their feeding and damage to corn plants.\\n\\n3. Insecticide treatments: Seed-applied insecticides containing neonicotinoid or diamide insecticides can be used to protect corn seedlings from early-season pests, including corn rootworms. Soil-applied insecticides can also be applied at planting to control corn rootworms and other early-season pests.\\n\\n4. Monitoring and scouting: Regular monitoring of corn fields for signs of corn rootworm damage, such as root feeding and lodging, is essential. Scouting helps determine if pest populations have reached economic thresholds that require action, such as insecticide application.\\n\\n5. Integrated Pest Management (IPM): Implementing an IPM approach involves combining multiple pest management strategies, including cultural practices, biological control, and chemical control, to minimize the impact of corn rootworms and other pests. This holistic approach aims to reduce reliance on chemical treatments and promote sustainable pest management.\\n\\nIt is important to consult with local agricultural extension services, seed dealers, or agricultural experts for specific recommendations tailored to your region and field conditions to effectively manage corn rootworm infestations.\\n', 'conversation_id': '3e5d4861-b128-4c64-96ac-87c74f3217e5', 'id': 2, 'user_email': 'avd6@illinois.edu', 'first_query': 'What type of pest is most harmful to corn crops in Illinois and how can I prevent it?'}\n" - ] - } - ], - "source": [ - "print(metadata[1])" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(22, 1536)\n" - ] - } - ], - "source": [ - "# initialize langchain OpenAI embeddings model\n", - "embeddings_model = OpenAIEmbeddings()\n", - "metadata = pd.DataFrame(metadata)\n", - "embeddings = embeddings_model.embed_documents(user_queries)\n", - "embeddings = np.array(embeddings)\n", - "print(embeddings.shape)" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-09-07 14:53:44.900 | INFO | nomic.project:_create_project:779 - Creating project `Conversation Map for cropwizard` in organization `dabholkar.asmita`\n", - "2023-09-07 14:53:45.794 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", - "2023-09-07 14:53:45.798 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", - "1it [00:00, 1.07it/s]\n", - "2023-09-07 14:53:46.743 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", - "2023-09-07 14:53:46.744 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", - "2023-09-07 14:53:47.568 | INFO | nomic.project:create_index:1111 - Created map `Conversation Map for cropwizard` in project `Conversation Map for cropwizard`: https://atlas.nomic.ai/map/27ba0013-71ba-4fd6-a710-187d114442ab/f74b45b3-7213-45df-a4cb-c16ec9feb8b7\n", - "2023-09-07 14:53:47.570 | INFO | nomic.atlas:map_embeddings:139 - Conversation Map for cropwizard: https://atlas.nomic.ai/map/27ba0013-71ba-4fd6-a710-187d114442ab/f74b45b3-7213-45df-a4cb-c16ec9feb8b7\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Conversation Map for cropwizard: https://atlas.nomic.ai/map/27ba0013-71ba-4fd6-a710-187d114442ab/f74b45b3-7213-45df-a4cb-c16ec9feb8b7]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-09-07 14:53:48.872 | INFO | nomic.project:create_index:1111 - Created map `cropwizard_convo_index_2` in project `Conversation Map for cropwizard`: https://atlas.nomic.ai/map/27ba0013-71ba-4fd6-a710-187d114442ab/3336aa7f-5995-4f02-831b-7161fd0c0b71\n" - ] - }, - { - "data": { - "text/html": [ - "Atlas Projection cropwizard_convo_index_2. Status Topic Modeling. view online" - ], - "text/plain": [ - "cropwizard_convo_index_2: https://atlas.nomic.ai/map/27ba0013-71ba-4fd6-a710-187d114442ab/3336aa7f-5995-4f02-831b-7161fd0c0b71" - ] - }, - "execution_count": 34, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# create an Atlas project\n", - "project_name = \"Conversation Map for \" + course\n", - "index_name = course + \"_convo_index\"\n", - "project = atlas.map_embeddings(embeddings=np.array(embeddings),\n", - " data=metadata,\n", - " id_field='id',\n", - " build_topic_model=True,\n", - " topic_label_field='first_query',\n", - " name=project_name,\n", - " colorable_fields=['conversation_id', 'first_query'])\n", - "print(project.maps)\n", - "\n", - "project.create_index(index_name, build_topic_model=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "ename": "ReadTimeout", - "evalue": "The read operation timed out", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mTimeoutError\u001b[0m Traceback (most recent call last)", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_exceptions.py:10\u001b[0m, in \u001b[0;36mmap_exceptions\u001b[1;34m(map)\u001b[0m\n\u001b[0;32m 9\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m---> 10\u001b[0m \u001b[39myield\u001b[39;00m\n\u001b[0;32m 11\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m \u001b[39mas\u001b[39;00m exc: \u001b[39m# noqa: PIE786\u001b[39;00m\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\backends\\sync.py:28\u001b[0m, in \u001b[0;36mSyncStream.read\u001b[1;34m(self, max_bytes, timeout)\u001b[0m\n\u001b[0;32m 27\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_sock\u001b[39m.\u001b[39msettimeout(timeout)\n\u001b[1;32m---> 28\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_sock\u001b[39m.\u001b[39;49mrecv(max_bytes)\n", - "File \u001b[1;32mC:\\Program Files\\WindowsApps\\PythonSoftwareFoundation.Python.3.10_3.10.3056.0_x64__qbz5n2kfra8p0\\lib\\ssl.py:1259\u001b[0m, in \u001b[0;36mSSLSocket.recv\u001b[1;34m(self, buflen, flags)\u001b[0m\n\u001b[0;32m 1256\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\n\u001b[0;32m 1257\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mnon-zero flags not allowed in calls to recv() on \u001b[39m\u001b[39m%s\u001b[39;00m\u001b[39m\"\u001b[39m \u001b[39m%\u001b[39m\n\u001b[0;32m 1258\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m\u001b[39m__class__\u001b[39m)\n\u001b[1;32m-> 1259\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mread(buflen)\n\u001b[0;32m 1260\u001b[0m \u001b[39melse\u001b[39;00m:\n", - "File \u001b[1;32mC:\\Program Files\\WindowsApps\\PythonSoftwareFoundation.Python.3.10_3.10.3056.0_x64__qbz5n2kfra8p0\\lib\\ssl.py:1132\u001b[0m, in \u001b[0;36mSSLSocket.read\u001b[1;34m(self, len, buffer)\u001b[0m\n\u001b[0;32m 1131\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m-> 1132\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_sslobj\u001b[39m.\u001b[39;49mread(\u001b[39mlen\u001b[39;49m)\n\u001b[0;32m 1133\u001b[0m \u001b[39mexcept\u001b[39;00m SSLError \u001b[39mas\u001b[39;00m x:\n", - "\u001b[1;31mTimeoutError\u001b[0m: The read operation timed out", - "\nDuring handling of the above exception, another exception occurred:\n", - "\u001b[1;31mReadTimeout\u001b[0m Traceback (most recent call last)", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_transports\\default.py:60\u001b[0m, in \u001b[0;36mmap_httpcore_exceptions\u001b[1;34m()\u001b[0m\n\u001b[0;32m 59\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m---> 60\u001b[0m \u001b[39myield\u001b[39;00m\n\u001b[0;32m 61\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m \u001b[39mas\u001b[39;00m exc: \u001b[39m# noqa: PIE-786\u001b[39;00m\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_transports\\default.py:218\u001b[0m, in \u001b[0;36mHTTPTransport.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 217\u001b[0m \u001b[39mwith\u001b[39;00m map_httpcore_exceptions():\n\u001b[1;32m--> 218\u001b[0m resp \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_pool\u001b[39m.\u001b[39;49mhandle_request(req)\n\u001b[0;32m 220\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39misinstance\u001b[39m(resp\u001b[39m.\u001b[39mstream, typing\u001b[39m.\u001b[39mIterable)\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_sync\\connection_pool.py:253\u001b[0m, in \u001b[0;36mConnectionPool.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 252\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mresponse_closed(status)\n\u001b[1;32m--> 253\u001b[0m \u001b[39mraise\u001b[39;00m exc\n\u001b[0;32m 254\u001b[0m \u001b[39melse\u001b[39;00m:\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_sync\\connection_pool.py:237\u001b[0m, in \u001b[0;36mConnectionPool.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 236\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m--> 237\u001b[0m response \u001b[39m=\u001b[39m connection\u001b[39m.\u001b[39;49mhandle_request(request)\n\u001b[0;32m 238\u001b[0m \u001b[39mexcept\u001b[39;00m ConnectionNotAvailable:\n\u001b[0;32m 239\u001b[0m \u001b[39m# The ConnectionNotAvailable exception is a special case, that\u001b[39;00m\n\u001b[0;32m 240\u001b[0m \u001b[39m# indicates we need to retry the request on a new connection.\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 244\u001b[0m \u001b[39m# might end up as an HTTP/2 connection, but which actually ends\u001b[39;00m\n\u001b[0;32m 245\u001b[0m \u001b[39m# up as HTTP/1.1.\u001b[39;00m\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_sync\\connection.py:90\u001b[0m, in \u001b[0;36mHTTPConnection.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 88\u001b[0m \u001b[39mraise\u001b[39;00m ConnectionNotAvailable()\n\u001b[1;32m---> 90\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_connection\u001b[39m.\u001b[39;49mhandle_request(request)\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_sync\\http11.py:112\u001b[0m, in \u001b[0;36mHTTP11Connection.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 111\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_response_closed()\n\u001b[1;32m--> 112\u001b[0m \u001b[39mraise\u001b[39;00m exc\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_sync\\http11.py:91\u001b[0m, in \u001b[0;36mHTTP11Connection.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 83\u001b[0m \u001b[39mwith\u001b[39;00m Trace(\n\u001b[0;32m 84\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mhttp11.receive_response_headers\u001b[39m\u001b[39m\"\u001b[39m, request, kwargs\n\u001b[0;32m 85\u001b[0m ) \u001b[39mas\u001b[39;00m trace:\n\u001b[0;32m 86\u001b[0m (\n\u001b[0;32m 87\u001b[0m http_version,\n\u001b[0;32m 88\u001b[0m status,\n\u001b[0;32m 89\u001b[0m reason_phrase,\n\u001b[0;32m 90\u001b[0m headers,\n\u001b[1;32m---> 91\u001b[0m ) \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_receive_response_headers(\u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n\u001b[0;32m 92\u001b[0m trace\u001b[39m.\u001b[39mreturn_value \u001b[39m=\u001b[39m (\n\u001b[0;32m 93\u001b[0m http_version,\n\u001b[0;32m 94\u001b[0m status,\n\u001b[0;32m 95\u001b[0m reason_phrase,\n\u001b[0;32m 96\u001b[0m headers,\n\u001b[0;32m 97\u001b[0m )\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_sync\\http11.py:155\u001b[0m, in \u001b[0;36mHTTP11Connection._receive_response_headers\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 154\u001b[0m \u001b[39mwhile\u001b[39;00m \u001b[39mTrue\u001b[39;00m:\n\u001b[1;32m--> 155\u001b[0m event \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_receive_event(timeout\u001b[39m=\u001b[39;49mtimeout)\n\u001b[0;32m 156\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(event, h11\u001b[39m.\u001b[39mResponse):\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_sync\\http11.py:191\u001b[0m, in \u001b[0;36mHTTP11Connection._receive_event\u001b[1;34m(self, timeout)\u001b[0m\n\u001b[0;32m 190\u001b[0m \u001b[39mif\u001b[39;00m event \u001b[39mis\u001b[39;00m h11\u001b[39m.\u001b[39mNEED_DATA:\n\u001b[1;32m--> 191\u001b[0m data \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_network_stream\u001b[39m.\u001b[39;49mread(\n\u001b[0;32m 192\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mREAD_NUM_BYTES, timeout\u001b[39m=\u001b[39;49mtimeout\n\u001b[0;32m 193\u001b[0m )\n\u001b[0;32m 195\u001b[0m \u001b[39m# If we feed this case through h11 we'll raise an exception like:\u001b[39;00m\n\u001b[0;32m 196\u001b[0m \u001b[39m#\u001b[39;00m\n\u001b[0;32m 197\u001b[0m \u001b[39m# httpcore.RemoteProtocolError: can't handle event type\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 201\u001b[0m \u001b[39m# perspective. Instead we handle this case distinctly and treat\u001b[39;00m\n\u001b[0;32m 202\u001b[0m \u001b[39m# it as a ConnectError.\u001b[39;00m\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\backends\\sync.py:26\u001b[0m, in \u001b[0;36mSyncStream.read\u001b[1;34m(self, max_bytes, timeout)\u001b[0m\n\u001b[0;32m 25\u001b[0m exc_map: ExceptionMapping \u001b[39m=\u001b[39m {socket\u001b[39m.\u001b[39mtimeout: ReadTimeout, \u001b[39mOSError\u001b[39;00m: ReadError}\n\u001b[1;32m---> 26\u001b[0m \u001b[39mwith\u001b[39;00m map_exceptions(exc_map):\n\u001b[0;32m 27\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_sock\u001b[39m.\u001b[39msettimeout(timeout)\n", - "File \u001b[1;32mC:\\Program Files\\WindowsApps\\PythonSoftwareFoundation.Python.3.10_3.10.3056.0_x64__qbz5n2kfra8p0\\lib\\contextlib.py:153\u001b[0m, in \u001b[0;36m_GeneratorContextManager.__exit__\u001b[1;34m(self, typ, value, traceback)\u001b[0m\n\u001b[0;32m 152\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m--> 153\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mgen\u001b[39m.\u001b[39;49mthrow(typ, value, traceback)\n\u001b[0;32m 154\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mStopIteration\u001b[39;00m \u001b[39mas\u001b[39;00m exc:\n\u001b[0;32m 155\u001b[0m \u001b[39m# Suppress StopIteration *unless* it's the same exception that\u001b[39;00m\n\u001b[0;32m 156\u001b[0m \u001b[39m# was passed to throw(). This prevents a StopIteration\u001b[39;00m\n\u001b[0;32m 157\u001b[0m \u001b[39m# raised inside the \"with\" statement from being suppressed.\u001b[39;00m\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_exceptions.py:14\u001b[0m, in \u001b[0;36mmap_exceptions\u001b[1;34m(map)\u001b[0m\n\u001b[0;32m 13\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(exc, from_exc):\n\u001b[1;32m---> 14\u001b[0m \u001b[39mraise\u001b[39;00m to_exc(exc)\n\u001b[0;32m 15\u001b[0m \u001b[39mraise\u001b[39;00m\n", - "\u001b[1;31mReadTimeout\u001b[0m: The read operation timed out", - "\nThe above exception was the direct cause of the following exception:\n", - "\u001b[1;31mReadTimeout\u001b[0m Traceback (most recent call last)", - "\u001b[1;32mf:\\MSIM\\ML_Projects\\ai-ta-backend\\ai_ta_backend\\nomic_map_creation.ipynb Cell 13\u001b[0m line \u001b[0;36m1\n\u001b[1;32m----> 1\u001b[0m response \u001b[39m=\u001b[39m supabase_client\u001b[39m.\u001b[39;49mtable(\u001b[39m\"\u001b[39;49m\u001b[39mllm-convo-monitor\u001b[39;49m\u001b[39m\"\u001b[39;49m)\u001b[39m.\u001b[39;49mselect(\u001b[39m\"\u001b[39;49m\u001b[39m*\u001b[39;49m\u001b[39m\"\u001b[39;49m)\u001b[39m.\u001b[39;49mexecute()\n\u001b[0;32m 2\u001b[0m data \u001b[39m=\u001b[39m response\u001b[39m.\u001b[39mdata\n\u001b[0;32m 3\u001b[0m df \u001b[39m=\u001b[39m pd\u001b[39m.\u001b[39mDataFrame(data)\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\postgrest\\_sync\\request_builder.py:55\u001b[0m, in \u001b[0;36mSyncQueryRequestBuilder.execute\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 43\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mexecute\u001b[39m(\u001b[39mself\u001b[39m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m APIResponse:\n\u001b[0;32m 44\u001b[0m \u001b[39m \u001b[39m\u001b[39m\"\"\"Execute the query.\u001b[39;00m\n\u001b[0;32m 45\u001b[0m \n\u001b[0;32m 46\u001b[0m \u001b[39m .. tip::\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 53\u001b[0m \u001b[39m :class:`APIError` If the API raised an error.\u001b[39;00m\n\u001b[0;32m 54\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[1;32m---> 55\u001b[0m r \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49msession\u001b[39m.\u001b[39;49mrequest(\n\u001b[0;32m 56\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mhttp_method,\n\u001b[0;32m 57\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mpath,\n\u001b[0;32m 58\u001b[0m json\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mjson,\n\u001b[0;32m 59\u001b[0m params\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mparams,\n\u001b[0;32m 60\u001b[0m headers\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mheaders,\n\u001b[0;32m 61\u001b[0m )\n\u001b[0;32m 62\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 63\u001b[0m \u001b[39mif\u001b[39;00m (\n\u001b[0;32m 64\u001b[0m \u001b[39m200\u001b[39m \u001b[39m<\u001b[39m\u001b[39m=\u001b[39m r\u001b[39m.\u001b[39mstatus_code \u001b[39m<\u001b[39m\u001b[39m=\u001b[39m \u001b[39m299\u001b[39m\n\u001b[0;32m 65\u001b[0m ): \u001b[39m# Response.ok from JS (https://developer.mozilla.org/en-US/docs/Web/API/Response/ok)\u001b[39;00m\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_client.py:821\u001b[0m, in \u001b[0;36mClient.request\u001b[1;34m(self, method, url, content, data, files, json, params, headers, cookies, auth, follow_redirects, timeout, extensions)\u001b[0m\n\u001b[0;32m 806\u001b[0m warnings\u001b[39m.\u001b[39mwarn(message, \u001b[39mDeprecationWarning\u001b[39;00m)\n\u001b[0;32m 808\u001b[0m request \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mbuild_request(\n\u001b[0;32m 809\u001b[0m method\u001b[39m=\u001b[39mmethod,\n\u001b[0;32m 810\u001b[0m url\u001b[39m=\u001b[39murl,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 819\u001b[0m extensions\u001b[39m=\u001b[39mextensions,\n\u001b[0;32m 820\u001b[0m )\n\u001b[1;32m--> 821\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49msend(request, auth\u001b[39m=\u001b[39;49mauth, follow_redirects\u001b[39m=\u001b[39;49mfollow_redirects)\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_client.py:908\u001b[0m, in \u001b[0;36mClient.send\u001b[1;34m(self, request, stream, auth, follow_redirects)\u001b[0m\n\u001b[0;32m 900\u001b[0m follow_redirects \u001b[39m=\u001b[39m (\n\u001b[0;32m 901\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mfollow_redirects\n\u001b[0;32m 902\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(follow_redirects, UseClientDefault)\n\u001b[0;32m 903\u001b[0m \u001b[39melse\u001b[39;00m follow_redirects\n\u001b[0;32m 904\u001b[0m )\n\u001b[0;32m 906\u001b[0m auth \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_build_request_auth(request, auth)\n\u001b[1;32m--> 908\u001b[0m response \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_send_handling_auth(\n\u001b[0;32m 909\u001b[0m request,\n\u001b[0;32m 910\u001b[0m auth\u001b[39m=\u001b[39;49mauth,\n\u001b[0;32m 911\u001b[0m follow_redirects\u001b[39m=\u001b[39;49mfollow_redirects,\n\u001b[0;32m 912\u001b[0m history\u001b[39m=\u001b[39;49m[],\n\u001b[0;32m 913\u001b[0m )\n\u001b[0;32m 914\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 915\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m stream:\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_client.py:936\u001b[0m, in \u001b[0;36mClient._send_handling_auth\u001b[1;34m(self, request, auth, follow_redirects, history)\u001b[0m\n\u001b[0;32m 933\u001b[0m request \u001b[39m=\u001b[39m \u001b[39mnext\u001b[39m(auth_flow)\n\u001b[0;32m 935\u001b[0m \u001b[39mwhile\u001b[39;00m \u001b[39mTrue\u001b[39;00m:\n\u001b[1;32m--> 936\u001b[0m response \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_send_handling_redirects(\n\u001b[0;32m 937\u001b[0m request,\n\u001b[0;32m 938\u001b[0m follow_redirects\u001b[39m=\u001b[39;49mfollow_redirects,\n\u001b[0;32m 939\u001b[0m history\u001b[39m=\u001b[39;49mhistory,\n\u001b[0;32m 940\u001b[0m )\n\u001b[0;32m 941\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 942\u001b[0m \u001b[39mtry\u001b[39;00m:\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_client.py:973\u001b[0m, in \u001b[0;36mClient._send_handling_redirects\u001b[1;34m(self, request, follow_redirects, history)\u001b[0m\n\u001b[0;32m 970\u001b[0m \u001b[39mfor\u001b[39;00m hook \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_event_hooks[\u001b[39m\"\u001b[39m\u001b[39mrequest\u001b[39m\u001b[39m\"\u001b[39m]:\n\u001b[0;32m 971\u001b[0m hook(request)\n\u001b[1;32m--> 973\u001b[0m response \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_send_single_request(request)\n\u001b[0;32m 974\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 975\u001b[0m \u001b[39mfor\u001b[39;00m hook \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_event_hooks[\u001b[39m\"\u001b[39m\u001b[39mresponse\u001b[39m\u001b[39m\"\u001b[39m]:\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_client.py:1009\u001b[0m, in \u001b[0;36mClient._send_single_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 1004\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mRuntimeError\u001b[39;00m(\n\u001b[0;32m 1005\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mAttempted to send an async request with a sync Client instance.\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m 1006\u001b[0m )\n\u001b[0;32m 1008\u001b[0m \u001b[39mwith\u001b[39;00m request_context(request\u001b[39m=\u001b[39mrequest):\n\u001b[1;32m-> 1009\u001b[0m response \u001b[39m=\u001b[39m transport\u001b[39m.\u001b[39;49mhandle_request(request)\n\u001b[0;32m 1011\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39misinstance\u001b[39m(response\u001b[39m.\u001b[39mstream, SyncByteStream)\n\u001b[0;32m 1013\u001b[0m response\u001b[39m.\u001b[39mrequest \u001b[39m=\u001b[39m request\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_transports\\default.py:217\u001b[0m, in \u001b[0;36mHTTPTransport.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 203\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39misinstance\u001b[39m(request\u001b[39m.\u001b[39mstream, SyncByteStream)\n\u001b[0;32m 205\u001b[0m req \u001b[39m=\u001b[39m httpcore\u001b[39m.\u001b[39mRequest(\n\u001b[0;32m 206\u001b[0m method\u001b[39m=\u001b[39mrequest\u001b[39m.\u001b[39mmethod,\n\u001b[0;32m 207\u001b[0m url\u001b[39m=\u001b[39mhttpcore\u001b[39m.\u001b[39mURL(\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 215\u001b[0m extensions\u001b[39m=\u001b[39mrequest\u001b[39m.\u001b[39mextensions,\n\u001b[0;32m 216\u001b[0m )\n\u001b[1;32m--> 217\u001b[0m \u001b[39mwith\u001b[39;00m map_httpcore_exceptions():\n\u001b[0;32m 218\u001b[0m resp \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_pool\u001b[39m.\u001b[39mhandle_request(req)\n\u001b[0;32m 220\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39misinstance\u001b[39m(resp\u001b[39m.\u001b[39mstream, typing\u001b[39m.\u001b[39mIterable)\n", - "File \u001b[1;32mC:\\Program Files\\WindowsApps\\PythonSoftwareFoundation.Python.3.10_3.10.3056.0_x64__qbz5n2kfra8p0\\lib\\contextlib.py:153\u001b[0m, in \u001b[0;36m_GeneratorContextManager.__exit__\u001b[1;34m(self, typ, value, traceback)\u001b[0m\n\u001b[0;32m 151\u001b[0m value \u001b[39m=\u001b[39m typ()\n\u001b[0;32m 152\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m--> 153\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mgen\u001b[39m.\u001b[39;49mthrow(typ, value, traceback)\n\u001b[0;32m 154\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mStopIteration\u001b[39;00m \u001b[39mas\u001b[39;00m exc:\n\u001b[0;32m 155\u001b[0m \u001b[39m# Suppress StopIteration *unless* it's the same exception that\u001b[39;00m\n\u001b[0;32m 156\u001b[0m \u001b[39m# was passed to throw(). This prevents a StopIteration\u001b[39;00m\n\u001b[0;32m 157\u001b[0m \u001b[39m# raised inside the \"with\" statement from being suppressed.\u001b[39;00m\n\u001b[0;32m 158\u001b[0m \u001b[39mreturn\u001b[39;00m exc \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m value\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_transports\\default.py:77\u001b[0m, in \u001b[0;36mmap_httpcore_exceptions\u001b[1;34m()\u001b[0m\n\u001b[0;32m 74\u001b[0m \u001b[39mraise\u001b[39;00m\n\u001b[0;32m 76\u001b[0m message \u001b[39m=\u001b[39m \u001b[39mstr\u001b[39m(exc)\n\u001b[1;32m---> 77\u001b[0m \u001b[39mraise\u001b[39;00m mapped_exc(message) \u001b[39mfrom\u001b[39;00m \u001b[39mexc\u001b[39;00m\n", - "\u001b[1;31mReadTimeout\u001b[0m: The read operation timed out" - ] - } - ], - "source": [ - "response = supabase_client.table(\"llm-convo-monitor\").select(\"*\").execute()\n", - "data = response.data\n", - "df = pd.DataFrame(data)\n", - "len(df)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Mapping Query-Response Pairs to Nomic" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2\n", - "user queries: 1\n", - "metadata 1\n", - "------------------------\n" - ] - } - ], - "source": [ - "user_queries = []\n", - "metadata = []\n", - "i = 1\n", - "for convo in course_df:\n", - " messages = convo['messages']\n", - " print(len(messages))\n", - " pair_check = 0\n", - " for message in messages:\n", - " if message['role'] == 'user' and pair_check == 0:\n", - " query = message['content']\n", - " metadata_row = {'course': course, 'query': message['content']}\n", - " #print(\"metadata row: \", metadata_row)\n", - " pair_check += 1\n", - " if message['role'] == 'assistant' and pair_check == 1:\n", - " metadata_row['response'] = message['content']\n", - " metadata_row['id'] = i \n", - " #print(\"response metadata row: \", metadata_row)\n", - " i += 1\n", - " pair_check += 1\n", - " if pair_check == 2:\n", - " # one conversation pair is complete\n", - " user_queries.append(query)\n", - " metadata.append(metadata_row)\n", - " pair_check = 0\n", - "\n", - " \n", - "print(\"user queries: \", len(user_queries))\n", - "print(\"metadata\", len(metadata))\n", - "print(\"------------------------\")\n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(590, 1536)\n" - ] - } - ], - "source": [ - "# initialize langchain OpenAI embeddings model\n", - "embeddings_model = OpenAIEmbeddings()\n", - "metadata = pd.DataFrame(metadata)\n", - "embeddings = embeddings_model.embed_documents(user_queries)\n", - "embeddings = np.array(embeddings)\n", - "print(embeddings.shape)" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-31 19:55:40.276 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for ECE408FA23` in organization `dabholkar.asmita`\n", - "2023-08-31 19:55:41.466 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", - "2023-08-31 19:55:41.491 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", - "1it [00:02, 2.43s/it]\n", - "2023-08-31 19:55:43.932 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", - "2023-08-31 19:55:43.932 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", - "2023-08-31 19:55:45.475 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for ECE408FA23` in project `Query-Response Map for ECE408FA23`: https://atlas.nomic.ai/map/d8e02f68-501e-4a25-9060-502710c082ce/b55cdca3-3ff4-454e-8f1b-a73ebc22cdff\n", - "2023-08-31 19:55:45.480 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for ECE408FA23: https://atlas.nomic.ai/map/d8e02f68-501e-4a25-9060-502710c082ce/b55cdca3-3ff4-454e-8f1b-a73ebc22cdff\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Query-Response Map for ECE408FA23: https://atlas.nomic.ai/map/d8e02f68-501e-4a25-9060-502710c082ce/b55cdca3-3ff4-454e-8f1b-a73ebc22cdff]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-31 19:55:47.813 | INFO | nomic.project:create_index:1111 - Created map `ECE408FA23_qr_index` in project `Query-Response Map for ECE408FA23`: https://atlas.nomic.ai/map/d8e02f68-501e-4a25-9060-502710c082ce/ff7276fc-942f-45cd-a199-e19a6e941db1\n" - ] - }, - { - "data": { - "text/html": [ - "Atlas Projection ECE408FA23_qr_index. Status Topic Modeling. view online" - ], - "text/plain": [ - "ECE408FA23_qr_index: https://atlas.nomic.ai/map/d8e02f68-501e-4a25-9060-502710c082ce/ff7276fc-942f-45cd-a199-e19a6e941db1" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# create an Atlas project\n", - "project_name = \"Query-Response Map for \" + course\n", - "index_name = course + \"_qr_index\"\n", - "project = atlas.map_embeddings(embeddings=np.array(embeddings),\n", - " data=metadata,\n", - " id_field='id',\n", - " build_topic_model=True,\n", - " topic_label_field='query',\n", - " name=project_name,\n", - " colorable_fields=['query'])\n", - "print(project.maps)\n", - "\n", - "project.create_index(index_name, build_topic_model=True)\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "ename": "ReadTimeout", - "evalue": "The read operation timed out", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mTimeoutError\u001b[0m Traceback (most recent call last)", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_exceptions.py:10\u001b[0m, in \u001b[0;36mmap_exceptions\u001b[1;34m(map)\u001b[0m\n\u001b[0;32m 9\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m---> 10\u001b[0m \u001b[39myield\u001b[39;00m\n\u001b[0;32m 11\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m \u001b[39mas\u001b[39;00m exc: \u001b[39m# noqa: PIE786\u001b[39;00m\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\backends\\sync.py:28\u001b[0m, in \u001b[0;36mSyncStream.read\u001b[1;34m(self, max_bytes, timeout)\u001b[0m\n\u001b[0;32m 27\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_sock\u001b[39m.\u001b[39msettimeout(timeout)\n\u001b[1;32m---> 28\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_sock\u001b[39m.\u001b[39;49mrecv(max_bytes)\n", - "File \u001b[1;32mC:\\Program Files\\WindowsApps\\PythonSoftwareFoundation.Python.3.10_3.10.3056.0_x64__qbz5n2kfra8p0\\lib\\ssl.py:1259\u001b[0m, in \u001b[0;36mSSLSocket.recv\u001b[1;34m(self, buflen, flags)\u001b[0m\n\u001b[0;32m 1256\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\n\u001b[0;32m 1257\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mnon-zero flags not allowed in calls to recv() on \u001b[39m\u001b[39m%s\u001b[39;00m\u001b[39m\"\u001b[39m \u001b[39m%\u001b[39m\n\u001b[0;32m 1258\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m\u001b[39m__class__\u001b[39m)\n\u001b[1;32m-> 1259\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mread(buflen)\n\u001b[0;32m 1260\u001b[0m \u001b[39melse\u001b[39;00m:\n", - "File \u001b[1;32mC:\\Program Files\\WindowsApps\\PythonSoftwareFoundation.Python.3.10_3.10.3056.0_x64__qbz5n2kfra8p0\\lib\\ssl.py:1132\u001b[0m, in \u001b[0;36mSSLSocket.read\u001b[1;34m(self, len, buffer)\u001b[0m\n\u001b[0;32m 1131\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m-> 1132\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_sslobj\u001b[39m.\u001b[39;49mread(\u001b[39mlen\u001b[39;49m)\n\u001b[0;32m 1133\u001b[0m \u001b[39mexcept\u001b[39;00m SSLError \u001b[39mas\u001b[39;00m x:\n", - "\u001b[1;31mTimeoutError\u001b[0m: The read operation timed out", - "\nDuring handling of the above exception, another exception occurred:\n", - "\u001b[1;31mReadTimeout\u001b[0m Traceback (most recent call last)", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_transports\\default.py:60\u001b[0m, in \u001b[0;36mmap_httpcore_exceptions\u001b[1;34m()\u001b[0m\n\u001b[0;32m 59\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m---> 60\u001b[0m \u001b[39myield\u001b[39;00m\n\u001b[0;32m 61\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m \u001b[39mas\u001b[39;00m exc: \u001b[39m# noqa: PIE-786\u001b[39;00m\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_transports\\default.py:218\u001b[0m, in \u001b[0;36mHTTPTransport.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 217\u001b[0m \u001b[39mwith\u001b[39;00m map_httpcore_exceptions():\n\u001b[1;32m--> 218\u001b[0m resp \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_pool\u001b[39m.\u001b[39;49mhandle_request(req)\n\u001b[0;32m 220\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39misinstance\u001b[39m(resp\u001b[39m.\u001b[39mstream, typing\u001b[39m.\u001b[39mIterable)\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_sync\\connection_pool.py:253\u001b[0m, in \u001b[0;36mConnectionPool.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 252\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mresponse_closed(status)\n\u001b[1;32m--> 253\u001b[0m \u001b[39mraise\u001b[39;00m exc\n\u001b[0;32m 254\u001b[0m \u001b[39melse\u001b[39;00m:\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_sync\\connection_pool.py:237\u001b[0m, in \u001b[0;36mConnectionPool.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 236\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m--> 237\u001b[0m response \u001b[39m=\u001b[39m connection\u001b[39m.\u001b[39;49mhandle_request(request)\n\u001b[0;32m 238\u001b[0m \u001b[39mexcept\u001b[39;00m ConnectionNotAvailable:\n\u001b[0;32m 239\u001b[0m \u001b[39m# The ConnectionNotAvailable exception is a special case, that\u001b[39;00m\n\u001b[0;32m 240\u001b[0m \u001b[39m# indicates we need to retry the request on a new connection.\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 244\u001b[0m \u001b[39m# might end up as an HTTP/2 connection, but which actually ends\u001b[39;00m\n\u001b[0;32m 245\u001b[0m \u001b[39m# up as HTTP/1.1.\u001b[39;00m\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_sync\\connection.py:90\u001b[0m, in \u001b[0;36mHTTPConnection.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 88\u001b[0m \u001b[39mraise\u001b[39;00m ConnectionNotAvailable()\n\u001b[1;32m---> 90\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_connection\u001b[39m.\u001b[39;49mhandle_request(request)\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_sync\\http11.py:112\u001b[0m, in \u001b[0;36mHTTP11Connection.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 111\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_response_closed()\n\u001b[1;32m--> 112\u001b[0m \u001b[39mraise\u001b[39;00m exc\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_sync\\http11.py:91\u001b[0m, in \u001b[0;36mHTTP11Connection.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 83\u001b[0m \u001b[39mwith\u001b[39;00m Trace(\n\u001b[0;32m 84\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mhttp11.receive_response_headers\u001b[39m\u001b[39m\"\u001b[39m, request, kwargs\n\u001b[0;32m 85\u001b[0m ) \u001b[39mas\u001b[39;00m trace:\n\u001b[0;32m 86\u001b[0m (\n\u001b[0;32m 87\u001b[0m http_version,\n\u001b[0;32m 88\u001b[0m status,\n\u001b[0;32m 89\u001b[0m reason_phrase,\n\u001b[0;32m 90\u001b[0m headers,\n\u001b[1;32m---> 91\u001b[0m ) \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_receive_response_headers(\u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n\u001b[0;32m 92\u001b[0m trace\u001b[39m.\u001b[39mreturn_value \u001b[39m=\u001b[39m (\n\u001b[0;32m 93\u001b[0m http_version,\n\u001b[0;32m 94\u001b[0m status,\n\u001b[0;32m 95\u001b[0m reason_phrase,\n\u001b[0;32m 96\u001b[0m headers,\n\u001b[0;32m 97\u001b[0m )\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_sync\\http11.py:155\u001b[0m, in \u001b[0;36mHTTP11Connection._receive_response_headers\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 154\u001b[0m \u001b[39mwhile\u001b[39;00m \u001b[39mTrue\u001b[39;00m:\n\u001b[1;32m--> 155\u001b[0m event \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_receive_event(timeout\u001b[39m=\u001b[39;49mtimeout)\n\u001b[0;32m 156\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(event, h11\u001b[39m.\u001b[39mResponse):\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_sync\\http11.py:191\u001b[0m, in \u001b[0;36mHTTP11Connection._receive_event\u001b[1;34m(self, timeout)\u001b[0m\n\u001b[0;32m 190\u001b[0m \u001b[39mif\u001b[39;00m event \u001b[39mis\u001b[39;00m h11\u001b[39m.\u001b[39mNEED_DATA:\n\u001b[1;32m--> 191\u001b[0m data \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_network_stream\u001b[39m.\u001b[39;49mread(\n\u001b[0;32m 192\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mREAD_NUM_BYTES, timeout\u001b[39m=\u001b[39;49mtimeout\n\u001b[0;32m 193\u001b[0m )\n\u001b[0;32m 195\u001b[0m \u001b[39m# If we feed this case through h11 we'll raise an exception like:\u001b[39;00m\n\u001b[0;32m 196\u001b[0m \u001b[39m#\u001b[39;00m\n\u001b[0;32m 197\u001b[0m \u001b[39m# httpcore.RemoteProtocolError: can't handle event type\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 201\u001b[0m \u001b[39m# perspective. Instead we handle this case distinctly and treat\u001b[39;00m\n\u001b[0;32m 202\u001b[0m \u001b[39m# it as a ConnectError.\u001b[39;00m\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\backends\\sync.py:26\u001b[0m, in \u001b[0;36mSyncStream.read\u001b[1;34m(self, max_bytes, timeout)\u001b[0m\n\u001b[0;32m 25\u001b[0m exc_map: ExceptionMapping \u001b[39m=\u001b[39m {socket\u001b[39m.\u001b[39mtimeout: ReadTimeout, \u001b[39mOSError\u001b[39;00m: ReadError}\n\u001b[1;32m---> 26\u001b[0m \u001b[39mwith\u001b[39;00m map_exceptions(exc_map):\n\u001b[0;32m 27\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_sock\u001b[39m.\u001b[39msettimeout(timeout)\n", - "File \u001b[1;32mC:\\Program Files\\WindowsApps\\PythonSoftwareFoundation.Python.3.10_3.10.3056.0_x64__qbz5n2kfra8p0\\lib\\contextlib.py:153\u001b[0m, in \u001b[0;36m_GeneratorContextManager.__exit__\u001b[1;34m(self, typ, value, traceback)\u001b[0m\n\u001b[0;32m 152\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m--> 153\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mgen\u001b[39m.\u001b[39;49mthrow(typ, value, traceback)\n\u001b[0;32m 154\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mStopIteration\u001b[39;00m \u001b[39mas\u001b[39;00m exc:\n\u001b[0;32m 155\u001b[0m \u001b[39m# Suppress StopIteration *unless* it's the same exception that\u001b[39;00m\n\u001b[0;32m 156\u001b[0m \u001b[39m# was passed to throw(). This prevents a StopIteration\u001b[39;00m\n\u001b[0;32m 157\u001b[0m \u001b[39m# raised inside the \"with\" statement from being suppressed.\u001b[39;00m\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_exceptions.py:14\u001b[0m, in \u001b[0;36mmap_exceptions\u001b[1;34m(map)\u001b[0m\n\u001b[0;32m 13\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(exc, from_exc):\n\u001b[1;32m---> 14\u001b[0m \u001b[39mraise\u001b[39;00m to_exc(exc)\n\u001b[0;32m 15\u001b[0m \u001b[39mraise\u001b[39;00m\n", - "\u001b[1;31mReadTimeout\u001b[0m: The read operation timed out", - "\nThe above exception was the direct cause of the following exception:\n", - "\u001b[1;31mReadTimeout\u001b[0m Traceback (most recent call last)", - "\u001b[1;32mf:\\MSIM\\ML_Projects\\ai-ta-backend\\ai_ta_backend\\nomic_map_creation.ipynb Cell 19\u001b[0m line \u001b[0;36m3\n\u001b[0;32m 1\u001b[0m \u001b[39m# cell for all course map creation\u001b[39;00m\n\u001b[1;32m----> 3\u001b[0m response \u001b[39m=\u001b[39m supabase_client\u001b[39m.\u001b[39;49mtable(\u001b[39m\"\u001b[39;49m\u001b[39mllm-convo-monitor\u001b[39;49m\u001b[39m\"\u001b[39;49m)\u001b[39m.\u001b[39;49mselect(\u001b[39m\"\u001b[39;49m\u001b[39m*\u001b[39;49m\u001b[39m\"\u001b[39;49m)\u001b[39m.\u001b[39;49mexecute()\n\u001b[0;32m 4\u001b[0m data \u001b[39m=\u001b[39m response\u001b[39m.\u001b[39mdata\n\u001b[0;32m 5\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39mlen\u001b[39m(data))\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\postgrest\\_sync\\request_builder.py:55\u001b[0m, in \u001b[0;36mSyncQueryRequestBuilder.execute\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 43\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mexecute\u001b[39m(\u001b[39mself\u001b[39m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m APIResponse:\n\u001b[0;32m 44\u001b[0m \u001b[39m \u001b[39m\u001b[39m\"\"\"Execute the query.\u001b[39;00m\n\u001b[0;32m 45\u001b[0m \n\u001b[0;32m 46\u001b[0m \u001b[39m .. tip::\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 53\u001b[0m \u001b[39m :class:`APIError` If the API raised an error.\u001b[39;00m\n\u001b[0;32m 54\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[1;32m---> 55\u001b[0m r \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49msession\u001b[39m.\u001b[39;49mrequest(\n\u001b[0;32m 56\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mhttp_method,\n\u001b[0;32m 57\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mpath,\n\u001b[0;32m 58\u001b[0m json\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mjson,\n\u001b[0;32m 59\u001b[0m params\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mparams,\n\u001b[0;32m 60\u001b[0m headers\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mheaders,\n\u001b[0;32m 61\u001b[0m )\n\u001b[0;32m 62\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 63\u001b[0m \u001b[39mif\u001b[39;00m (\n\u001b[0;32m 64\u001b[0m \u001b[39m200\u001b[39m \u001b[39m<\u001b[39m\u001b[39m=\u001b[39m r\u001b[39m.\u001b[39mstatus_code \u001b[39m<\u001b[39m\u001b[39m=\u001b[39m \u001b[39m299\u001b[39m\n\u001b[0;32m 65\u001b[0m ): \u001b[39m# Response.ok from JS (https://developer.mozilla.org/en-US/docs/Web/API/Response/ok)\u001b[39;00m\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_client.py:821\u001b[0m, in \u001b[0;36mClient.request\u001b[1;34m(self, method, url, content, data, files, json, params, headers, cookies, auth, follow_redirects, timeout, extensions)\u001b[0m\n\u001b[0;32m 806\u001b[0m warnings\u001b[39m.\u001b[39mwarn(message, \u001b[39mDeprecationWarning\u001b[39;00m)\n\u001b[0;32m 808\u001b[0m request \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mbuild_request(\n\u001b[0;32m 809\u001b[0m method\u001b[39m=\u001b[39mmethod,\n\u001b[0;32m 810\u001b[0m url\u001b[39m=\u001b[39murl,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 819\u001b[0m extensions\u001b[39m=\u001b[39mextensions,\n\u001b[0;32m 820\u001b[0m )\n\u001b[1;32m--> 821\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49msend(request, auth\u001b[39m=\u001b[39;49mauth, follow_redirects\u001b[39m=\u001b[39;49mfollow_redirects)\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_client.py:908\u001b[0m, in \u001b[0;36mClient.send\u001b[1;34m(self, request, stream, auth, follow_redirects)\u001b[0m\n\u001b[0;32m 900\u001b[0m follow_redirects \u001b[39m=\u001b[39m (\n\u001b[0;32m 901\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mfollow_redirects\n\u001b[0;32m 902\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(follow_redirects, UseClientDefault)\n\u001b[0;32m 903\u001b[0m \u001b[39melse\u001b[39;00m follow_redirects\n\u001b[0;32m 904\u001b[0m )\n\u001b[0;32m 906\u001b[0m auth \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_build_request_auth(request, auth)\n\u001b[1;32m--> 908\u001b[0m response \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_send_handling_auth(\n\u001b[0;32m 909\u001b[0m request,\n\u001b[0;32m 910\u001b[0m auth\u001b[39m=\u001b[39;49mauth,\n\u001b[0;32m 911\u001b[0m follow_redirects\u001b[39m=\u001b[39;49mfollow_redirects,\n\u001b[0;32m 912\u001b[0m history\u001b[39m=\u001b[39;49m[],\n\u001b[0;32m 913\u001b[0m )\n\u001b[0;32m 914\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 915\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m stream:\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_client.py:936\u001b[0m, in \u001b[0;36mClient._send_handling_auth\u001b[1;34m(self, request, auth, follow_redirects, history)\u001b[0m\n\u001b[0;32m 933\u001b[0m request \u001b[39m=\u001b[39m \u001b[39mnext\u001b[39m(auth_flow)\n\u001b[0;32m 935\u001b[0m \u001b[39mwhile\u001b[39;00m \u001b[39mTrue\u001b[39;00m:\n\u001b[1;32m--> 936\u001b[0m response \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_send_handling_redirects(\n\u001b[0;32m 937\u001b[0m request,\n\u001b[0;32m 938\u001b[0m follow_redirects\u001b[39m=\u001b[39;49mfollow_redirects,\n\u001b[0;32m 939\u001b[0m history\u001b[39m=\u001b[39;49mhistory,\n\u001b[0;32m 940\u001b[0m )\n\u001b[0;32m 941\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 942\u001b[0m \u001b[39mtry\u001b[39;00m:\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_client.py:973\u001b[0m, in \u001b[0;36mClient._send_handling_redirects\u001b[1;34m(self, request, follow_redirects, history)\u001b[0m\n\u001b[0;32m 970\u001b[0m \u001b[39mfor\u001b[39;00m hook \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_event_hooks[\u001b[39m\"\u001b[39m\u001b[39mrequest\u001b[39m\u001b[39m\"\u001b[39m]:\n\u001b[0;32m 971\u001b[0m hook(request)\n\u001b[1;32m--> 973\u001b[0m response \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_send_single_request(request)\n\u001b[0;32m 974\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 975\u001b[0m \u001b[39mfor\u001b[39;00m hook \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_event_hooks[\u001b[39m\"\u001b[39m\u001b[39mresponse\u001b[39m\u001b[39m\"\u001b[39m]:\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_client.py:1009\u001b[0m, in \u001b[0;36mClient._send_single_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 1004\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mRuntimeError\u001b[39;00m(\n\u001b[0;32m 1005\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mAttempted to send an async request with a sync Client instance.\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m 1006\u001b[0m )\n\u001b[0;32m 1008\u001b[0m \u001b[39mwith\u001b[39;00m request_context(request\u001b[39m=\u001b[39mrequest):\n\u001b[1;32m-> 1009\u001b[0m response \u001b[39m=\u001b[39m transport\u001b[39m.\u001b[39;49mhandle_request(request)\n\u001b[0;32m 1011\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39misinstance\u001b[39m(response\u001b[39m.\u001b[39mstream, SyncByteStream)\n\u001b[0;32m 1013\u001b[0m response\u001b[39m.\u001b[39mrequest \u001b[39m=\u001b[39m request\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_transports\\default.py:217\u001b[0m, in \u001b[0;36mHTTPTransport.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 203\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39misinstance\u001b[39m(request\u001b[39m.\u001b[39mstream, SyncByteStream)\n\u001b[0;32m 205\u001b[0m req \u001b[39m=\u001b[39m httpcore\u001b[39m.\u001b[39mRequest(\n\u001b[0;32m 206\u001b[0m method\u001b[39m=\u001b[39mrequest\u001b[39m.\u001b[39mmethod,\n\u001b[0;32m 207\u001b[0m url\u001b[39m=\u001b[39mhttpcore\u001b[39m.\u001b[39mURL(\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 215\u001b[0m extensions\u001b[39m=\u001b[39mrequest\u001b[39m.\u001b[39mextensions,\n\u001b[0;32m 216\u001b[0m )\n\u001b[1;32m--> 217\u001b[0m \u001b[39mwith\u001b[39;00m map_httpcore_exceptions():\n\u001b[0;32m 218\u001b[0m resp \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_pool\u001b[39m.\u001b[39mhandle_request(req)\n\u001b[0;32m 220\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39misinstance\u001b[39m(resp\u001b[39m.\u001b[39mstream, typing\u001b[39m.\u001b[39mIterable)\n", - "File \u001b[1;32mC:\\Program Files\\WindowsApps\\PythonSoftwareFoundation.Python.3.10_3.10.3056.0_x64__qbz5n2kfra8p0\\lib\\contextlib.py:153\u001b[0m, in \u001b[0;36m_GeneratorContextManager.__exit__\u001b[1;34m(self, typ, value, traceback)\u001b[0m\n\u001b[0;32m 151\u001b[0m value \u001b[39m=\u001b[39m typ()\n\u001b[0;32m 152\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m--> 153\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mgen\u001b[39m.\u001b[39;49mthrow(typ, value, traceback)\n\u001b[0;32m 154\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mStopIteration\u001b[39;00m \u001b[39mas\u001b[39;00m exc:\n\u001b[0;32m 155\u001b[0m \u001b[39m# Suppress StopIteration *unless* it's the same exception that\u001b[39;00m\n\u001b[0;32m 156\u001b[0m \u001b[39m# was passed to throw(). This prevents a StopIteration\u001b[39;00m\n\u001b[0;32m 157\u001b[0m \u001b[39m# raised inside the \"with\" statement from being suppressed.\u001b[39;00m\n\u001b[0;32m 158\u001b[0m \u001b[39mreturn\u001b[39;00m exc \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m value\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_transports\\default.py:77\u001b[0m, in \u001b[0;36mmap_httpcore_exceptions\u001b[1;34m()\u001b[0m\n\u001b[0;32m 74\u001b[0m \u001b[39mraise\u001b[39;00m\n\u001b[0;32m 76\u001b[0m message \u001b[39m=\u001b[39m \u001b[39mstr\u001b[39m(exc)\n\u001b[1;32m---> 77\u001b[0m \u001b[39mraise\u001b[39;00m mapped_exc(message) \u001b[39mfrom\u001b[39;00m \u001b[39mexc\u001b[39;00m\n", - "\u001b[1;31mReadTimeout\u001b[0m: The read operation timed out" - ] - } - ], - "source": [ - "# cell for all course map creation\n", - "\n", - "response = supabase_client.table(\"llm-convo-monitor\").select(\"*\").execute()\n", - "data = response.data\n", - "print(len(data))" - ] - }, - { - "cell_type": "code", - "execution_count": 53, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "126" - ] - }, - "execution_count": 53, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df = pd.DataFrame(data)\n", - "course_names = df['course_name'].unique()\n", - "len(course_names)" - ] - }, - { - "cell_type": "code", - "execution_count": 54, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "course name: badm_550_ashley\n", - "(51, 1536)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:26:29.701 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for badm_550_ashley` in organization `dabholkar.asmita`\n", - "2023-08-30 15:26:31.242 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", - "2023-08-30 15:26:31.255 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", - "1it [00:00, 1.03it/s]\n", - "2023-08-30 15:26:32.239 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", - "2023-08-30 15:26:32.241 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", - "2023-08-30 15:26:33.498 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for badm_550_ashley` in project `Query-Response Map for badm_550_ashley`: https://atlas.nomic.ai/map/e7fe1faf-be7a-4ca7-b89b-22863fffe432/79d4480a-c4e5-4920-95cd-ed831d45f5d0\n", - "2023-08-30 15:26:33.500 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for badm_550_ashley: https://atlas.nomic.ai/map/e7fe1faf-be7a-4ca7-b89b-22863fffe432/79d4480a-c4e5-4920-95cd-ed831d45f5d0\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Query-Response Map for badm_550_ashley: https://atlas.nomic.ai/map/e7fe1faf-be7a-4ca7-b89b-22863fffe432/79d4480a-c4e5-4920-95cd-ed831d45f5d0]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:26:35.272 | INFO | nomic.project:create_index:1111 - Created map `badm_550_ashley_qr_index` in project `Query-Response Map for badm_550_ashley`: https://atlas.nomic.ai/map/e7fe1faf-be7a-4ca7-b89b-22863fffe432/2b997f7c-0084-4db7-8e9a-76eeb62d715b\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "course name: ece120\n", - "(298, 1536)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:26:42.765 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for ece120` in organization `dabholkar.asmita`\n", - "2023-08-30 15:26:43.831 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", - "2023-08-30 15:26:43.850 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", - "1it [00:01, 1.20s/it]\n", - "2023-08-30 15:26:45.059 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", - "2023-08-30 15:26:45.063 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", - "2023-08-30 15:26:46.221 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for ece120` in project `Query-Response Map for ece120`: https://atlas.nomic.ai/map/f35d2617-d029-4a29-ba4a-27a82088be24/0c8e9404-be65-4780-bba3-e3b44f14de9b\n", - "2023-08-30 15:26:46.230 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for ece120: https://atlas.nomic.ai/map/f35d2617-d029-4a29-ba4a-27a82088be24/0c8e9404-be65-4780-bba3-e3b44f14de9b\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Query-Response Map for ece120: https://atlas.nomic.ai/map/f35d2617-d029-4a29-ba4a-27a82088be24/0c8e9404-be65-4780-bba3-e3b44f14de9b]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:26:48.040 | INFO | nomic.project:create_index:1111 - Created map `ece120_qr_index` in project `Query-Response Map for ece120`: https://atlas.nomic.ai/map/f35d2617-d029-4a29-ba4a-27a82088be24/61277149-b12d-4b59-8bcd-e9dd29fc58a4\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "course name: badm-567-v3\n", - "(27, 1536)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:26:52.367 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for badm-567-v3` in organization `dabholkar.asmita`\n", - "2023-08-30 15:26:53.227 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", - "2023-08-30 15:26:53.236 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", - "1it [00:00, 1.07it/s]\n", - "2023-08-30 15:26:54.177 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", - "2023-08-30 15:26:54.185 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", - "2023-08-30 15:26:55.379 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for badm-567-v3` in project `Query-Response Map for badm-567-v3`: https://atlas.nomic.ai/map/022b4521-24df-401d-beff-bba4f881590f/adabe0a1-d147-462a-8992-72af89cb5ae2\n", - "2023-08-30 15:26:55.379 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for badm-567-v3: https://atlas.nomic.ai/map/022b4521-24df-401d-beff-bba4f881590f/adabe0a1-d147-462a-8992-72af89cb5ae2\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Query-Response Map for badm-567-v3: https://atlas.nomic.ai/map/022b4521-24df-401d-beff-bba4f881590f/adabe0a1-d147-462a-8992-72af89cb5ae2]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:26:57.012 | INFO | nomic.project:create_index:1111 - Created map `badm-567-v3_qr_index` in project `Query-Response Map for badm-567-v3`: https://atlas.nomic.ai/map/022b4521-24df-401d-beff-bba4f881590f/e6c9798b-c154-43e7-917e-dd5cb71f116f\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "course name: new-weather\n", - "(98, 1536)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:27:02.087 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for new-weather` in organization `dabholkar.asmita`\n", - "2023-08-30 15:27:03.117 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", - "2023-08-30 15:27:03.125 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", - "1it [00:00, 1.07it/s]\n", - "2023-08-30 15:27:04.071 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", - "2023-08-30 15:27:04.071 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", - "2023-08-30 15:27:05.459 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for new-weather` in project `Query-Response Map for new-weather`: https://atlas.nomic.ai/map/48db10b9-0a6e-4c43-83f7-f7fa2b406257/fd200444-86cb-4b1c-9be6-ebd0262e2428\n", - "2023-08-30 15:27:05.461 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for new-weather: https://atlas.nomic.ai/map/48db10b9-0a6e-4c43-83f7-f7fa2b406257/fd200444-86cb-4b1c-9be6-ebd0262e2428\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Query-Response Map for new-weather: https://atlas.nomic.ai/map/48db10b9-0a6e-4c43-83f7-f7fa2b406257/fd200444-86cb-4b1c-9be6-ebd0262e2428]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:27:07.001 | INFO | nomic.project:create_index:1111 - Created map `new-weather_qr_index` in project `Query-Response Map for new-weather`: https://atlas.nomic.ai/map/48db10b9-0a6e-4c43-83f7-f7fa2b406257/ff717c79-50cd-468b-9fcc-b391c8c167df\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "course name: gies-online-mba-v2\n", - "(52, 1536)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:27:10.946 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for gies-online-mba-v2` in organization `dabholkar.asmita`\n", - "2023-08-30 15:27:11.862 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", - "2023-08-30 15:27:11.868 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", - "1it [00:00, 1.33it/s]\n", - "2023-08-30 15:27:12.630 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", - "2023-08-30 15:27:12.634 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", - "2023-08-30 15:27:13.627 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for gies-online-mba-v2` in project `Query-Response Map for gies-online-mba-v2`: https://atlas.nomic.ai/map/8699b44f-8e2a-40e6-9562-0d2fc2898c3d/87144fbd-48a3-4d00-9136-ddc177685aa1\n", - "2023-08-30 15:27:13.627 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for gies-online-mba-v2: https://atlas.nomic.ai/map/8699b44f-8e2a-40e6-9562-0d2fc2898c3d/87144fbd-48a3-4d00-9136-ddc177685aa1\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Query-Response Map for gies-online-mba-v2: https://atlas.nomic.ai/map/8699b44f-8e2a-40e6-9562-0d2fc2898c3d/87144fbd-48a3-4d00-9136-ddc177685aa1]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:27:15.546 | INFO | nomic.project:create_index:1111 - Created map `gies-online-mba-v2_qr_index` in project `Query-Response Map for gies-online-mba-v2`: https://atlas.nomic.ai/map/8699b44f-8e2a-40e6-9562-0d2fc2898c3d/db13ea7a-f93d-4f97-b922-c51216d3d6e9\n", - "2023-08-30 15:27:15,670:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:27:15,673:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", - "2023-08-30 15:27:20,003:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:27:20,003:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "course name: frontend\n", - "(24, 1536)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:27:28.373 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for frontend` in organization `dabholkar.asmita`\n", - "2023-08-30 15:27:29.396 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", - "2023-08-30 15:27:29.405 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", - "1it [00:00, 1.11it/s]\n", - "2023-08-30 15:27:30.325 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", - "2023-08-30 15:27:30.325 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", - "2023-08-30 15:27:31.539 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for frontend` in project `Query-Response Map for frontend`: https://atlas.nomic.ai/map/3a8ff45a-ca2d-4750-ab1b-ea033d35580d/e427ecf0-339a-41bf-8f9a-dc7a2930ac70\n", - "2023-08-30 15:27:31.542 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for frontend: https://atlas.nomic.ai/map/3a8ff45a-ca2d-4750-ab1b-ea033d35580d/e427ecf0-339a-41bf-8f9a-dc7a2930ac70\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Query-Response Map for frontend: https://atlas.nomic.ai/map/3a8ff45a-ca2d-4750-ab1b-ea033d35580d/e427ecf0-339a-41bf-8f9a-dc7a2930ac70]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:27:33.028 | INFO | nomic.project:create_index:1111 - Created map `frontend_qr_index` in project `Query-Response Map for frontend`: https://atlas.nomic.ai/map/3a8ff45a-ca2d-4750-ab1b-ea033d35580d/006903b0-bb82-4432-9975-ff7c9ca80af9\n", - "2023-08-30 15:27:33,166:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:27:33,166:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", - "2023-08-30 15:27:37,279:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:27:37,281:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", - "2023-08-30 15:27:41,477:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:27:41,481:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "course name: ECE220FA23\n", - "(193, 1536)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:27:50.988 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for ECE220FA23` in organization `dabholkar.asmita`\n", - "2023-08-30 15:27:51.867 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", - "2023-08-30 15:27:51.878 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", - "1it [00:01, 1.01s/it]\n", - "2023-08-30 15:27:52.904 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", - "2023-08-30 15:27:52.908 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", - "2023-08-30 15:27:53.929 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for ECE220FA23` in project `Query-Response Map for ECE220FA23`: https://atlas.nomic.ai/map/29b6f987-ee46-40be-808e-3d79a56d1b04/6b9b60f6-e2b7-4863-911c-fe84a63b35a1\n", - "2023-08-30 15:27:53.929 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for ECE220FA23: https://atlas.nomic.ai/map/29b6f987-ee46-40be-808e-3d79a56d1b04/6b9b60f6-e2b7-4863-911c-fe84a63b35a1\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Query-Response Map for ECE220FA23: https://atlas.nomic.ai/map/29b6f987-ee46-40be-808e-3d79a56d1b04/6b9b60f6-e2b7-4863-911c-fe84a63b35a1]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:27:55.645 | INFO | nomic.project:create_index:1111 - Created map `ECE220FA23_qr_index` in project `Query-Response Map for ECE220FA23`: https://atlas.nomic.ai/map/29b6f987-ee46-40be-808e-3d79a56d1b04/d8163c57-a2e8-41ca-90fc-043c8a9469b3\n", - "2023-08-30 15:27:55,758:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:27:55,759:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", - "2023-08-30 15:27:59,841:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:27:59,841:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "course name: Snowmass\n", - "(23, 1536)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:28:08.067 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for Snowmass` in organization `dabholkar.asmita`\n", - "2023-08-30 15:28:09.006 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", - "2023-08-30 15:28:09.014 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", - "1it [00:00, 1.20it/s]\n", - "2023-08-30 15:28:09.854 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", - "2023-08-30 15:28:09.858 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", - "2023-08-30 15:28:10.994 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for Snowmass` in project `Query-Response Map for Snowmass`: https://atlas.nomic.ai/map/aa415361-5d23-44ba-ada2-252ed1dfe9a0/51de6990-9d60-4cc4-9a8e-46bc7de97a0e\n", - "2023-08-30 15:28:10.994 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for Snowmass: https://atlas.nomic.ai/map/aa415361-5d23-44ba-ada2-252ed1dfe9a0/51de6990-9d60-4cc4-9a8e-46bc7de97a0e\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Query-Response Map for Snowmass: https://atlas.nomic.ai/map/aa415361-5d23-44ba-ada2-252ed1dfe9a0/51de6990-9d60-4cc4-9a8e-46bc7de97a0e]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:28:12.968 | INFO | nomic.project:create_index:1111 - Created map `Snowmass_qr_index` in project `Query-Response Map for Snowmass`: https://atlas.nomic.ai/map/aa415361-5d23-44ba-ada2-252ed1dfe9a0/4fdea35b-cda2-434e-afd1-e46e01430a97\n", - "2023-08-30 15:28:13,066:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:28:13,068:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", - "2023-08-30 15:28:17,200:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:28:17,200:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", - "2023-08-30 15:28:21,297:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:28:21,297:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "course name: NPRE247\n", - "(54, 1536)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:28:29.951 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for NPRE247` in organization `dabholkar.asmita`\n", - "2023-08-30 15:28:31.043 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", - "2023-08-30 15:28:31.051 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", - "1it [00:01, 1.64s/it]\n", - "2023-08-30 15:28:32.709 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", - "2023-08-30 15:28:32.714 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", - "2023-08-30 15:28:33.787 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for NPRE247` in project `Query-Response Map for NPRE247`: https://atlas.nomic.ai/map/3ae13050-5e00-4f2b-9dd4-f05e98945405/d06b7c47-850c-48f4-a5f9-984fbb0ca424\n", - "2023-08-30 15:28:33.790 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for NPRE247: https://atlas.nomic.ai/map/3ae13050-5e00-4f2b-9dd4-f05e98945405/d06b7c47-850c-48f4-a5f9-984fbb0ca424\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Query-Response Map for NPRE247: https://atlas.nomic.ai/map/3ae13050-5e00-4f2b-9dd4-f05e98945405/d06b7c47-850c-48f4-a5f9-984fbb0ca424]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:28:35.347 | INFO | nomic.project:create_index:1111 - Created map `NPRE247_qr_index` in project `Query-Response Map for NPRE247`: https://atlas.nomic.ai/map/3ae13050-5e00-4f2b-9dd4-f05e98945405/03471048-46aa-473c-b599-0bc812c679c0\n", - "2023-08-30 15:28:35,479:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:28:35,484:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", - "2023-08-30 15:28:39,590:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:28:39,594:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "course name: your-awesome-course\n", - "(30, 1536)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:28:50.102 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for your-awesome-course` in organization `dabholkar.asmita`\n", - "2023-08-30 15:28:51.013 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", - "2023-08-30 15:28:51.022 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", - "1it [00:00, 1.10it/s]\n", - "2023-08-30 15:28:51.943 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", - "2023-08-30 15:28:51.945 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", - "2023-08-30 15:28:52.904 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for your-awesome-course` in project `Query-Response Map for your-awesome-course`: https://atlas.nomic.ai/map/dd5dfe40-c810-4402-9684-a43783ea6e52/468942cc-d46a-45a2-82a6-11fac2f2be78\n", - "2023-08-30 15:28:52.907 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for your-awesome-course: https://atlas.nomic.ai/map/dd5dfe40-c810-4402-9684-a43783ea6e52/468942cc-d46a-45a2-82a6-11fac2f2be78\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Query-Response Map for your-awesome-course: https://atlas.nomic.ai/map/dd5dfe40-c810-4402-9684-a43783ea6e52/468942cc-d46a-45a2-82a6-11fac2f2be78]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:28:54.405 | INFO | nomic.project:create_index:1111 - Created map `your-awesome-course_qr_index` in project `Query-Response Map for your-awesome-course`: https://atlas.nomic.ai/map/dd5dfe40-c810-4402-9684-a43783ea6e52/e58f20e3-fa19-4c4c-8764-a185e0691c85\n", - "2023-08-30 15:28:54,549:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:28:54,549:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", - "2023-08-30 15:28:58,646:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:28:58,653:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "course name: pract\n", - "(44, 1536)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:29:07.007 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for pract` in organization `dabholkar.asmita`\n", - "2023-08-30 15:29:08.243 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", - "2023-08-30 15:29:08.251 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", - "1it [00:01, 1.11s/it]\n", - "2023-08-30 15:29:09.368 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", - "2023-08-30 15:29:09.368 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", - "2023-08-30 15:29:10.392 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for pract` in project `Query-Response Map for pract`: https://atlas.nomic.ai/map/15b51f99-fe4b-41ec-97fd-5579884e39fb/669c8862-db60-41ad-94a9-fd815e1fdcd8\n", - "2023-08-30 15:29:10.392 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for pract: https://atlas.nomic.ai/map/15b51f99-fe4b-41ec-97fd-5579884e39fb/669c8862-db60-41ad-94a9-fd815e1fdcd8\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Query-Response Map for pract: https://atlas.nomic.ai/map/15b51f99-fe4b-41ec-97fd-5579884e39fb/669c8862-db60-41ad-94a9-fd815e1fdcd8]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:29:12.031 | INFO | nomic.project:create_index:1111 - Created map `pract_qr_index` in project `Query-Response Map for pract`: https://atlas.nomic.ai/map/15b51f99-fe4b-41ec-97fd-5579884e39fb/b15570eb-4db4-4b6f-9b4d-e80309d2dcb3\n", - "2023-08-30 15:29:12,113:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:29:12,115:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", - "2023-08-30 15:29:16,201:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:29:16,209:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", - "2023-08-30 15:29:20,282:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:29:20,285:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "course name: ece120FL22\n", - "(53, 1536)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:29:28.994 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for ece120FL22` in organization `dabholkar.asmita`\n", - "2023-08-30 15:29:29.838 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", - "2023-08-30 15:29:29.846 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", - "1it [00:00, 1.18it/s]\n", - "2023-08-30 15:29:30.708 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", - "2023-08-30 15:29:30.710 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", - "2023-08-30 15:29:31.828 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for ece120FL22` in project `Query-Response Map for ece120FL22`: https://atlas.nomic.ai/map/8e74c555-3192-451b-b4a1-2c86587df88b/888e5d82-2e4d-46ba-b920-915dc31b835b\n", - "2023-08-30 15:29:31.828 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for ece120FL22: https://atlas.nomic.ai/map/8e74c555-3192-451b-b4a1-2c86587df88b/888e5d82-2e4d-46ba-b920-915dc31b835b\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Query-Response Map for ece120FL22: https://atlas.nomic.ai/map/8e74c555-3192-451b-b4a1-2c86587df88b/888e5d82-2e4d-46ba-b920-915dc31b835b]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:29:33.354 | INFO | nomic.project:create_index:1111 - Created map `ece120FL22_qr_index` in project `Query-Response Map for ece120FL22`: https://atlas.nomic.ai/map/8e74c555-3192-451b-b4a1-2c86587df88b/eb4e553d-ecd3-4b11-9d75-468108ab08e2\n", - "2023-08-30 15:29:33,458:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:29:33,458:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", - "2023-08-30 15:29:37,544:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:29:37,545:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", - "2023-08-30 15:29:41,634:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:29:41,642:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "course name: Law794-TransactionalDraftingAlam\n", - "(21, 1536)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:29:49.618 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for Law794-TransactionalDraftingAlam` in organization `dabholkar.asmita`\n", - "2023-08-30 15:29:50.718 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", - "2023-08-30 15:29:50.731 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", - "1it [00:01, 1.11s/it]\n", - "2023-08-30 15:29:51.849 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", - "2023-08-30 15:29:51.851 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", - "2023-08-30 15:29:53.034 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for Law794-TransactionalDraftingAlam` in project `Query-Response Map for Law794-TransactionalDraftingAlam`: https://atlas.nomic.ai/map/c4328a33-8958-462e-ba30-50f342736dae/0da00b97-bf94-4805-935e-458fb5c3d76f\n", - "2023-08-30 15:29:53.034 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for Law794-TransactionalDraftingAlam: https://atlas.nomic.ai/map/c4328a33-8958-462e-ba30-50f342736dae/0da00b97-bf94-4805-935e-458fb5c3d76f\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Query-Response Map for Law794-TransactionalDraftingAlam: https://atlas.nomic.ai/map/c4328a33-8958-462e-ba30-50f342736dae/0da00b97-bf94-4805-935e-458fb5c3d76f]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:29:54.605 | INFO | nomic.project:create_index:1111 - Created map `Law794-TransactionalDraftingAlam_qr_index` in project `Query-Response Map for Law794-TransactionalDraftingAlam`: https://atlas.nomic.ai/map/c4328a33-8958-462e-ba30-50f342736dae/9792fd4e-2196-4e39-bded-cc2bfd42abbf\n", - "2023-08-30 15:29:54,728:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:29:54,731:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", - "2023-08-30 15:29:58,804:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:29:58,804:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "course name: NCSA\n", - "(84, 1536)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:30:07.528 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for NCSA` in organization `dabholkar.asmita`\n", - "2023-08-30 15:30:08.422 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", - "2023-08-30 15:30:08.431 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", - "1it [00:00, 1.09it/s]\n", - "2023-08-30 15:30:09.361 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", - "2023-08-30 15:30:09.361 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", - "2023-08-30 15:30:10.325 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for NCSA` in project `Query-Response Map for NCSA`: https://atlas.nomic.ai/map/237ff7ed-c987-4259-96e0-3c5ed6e21207/8f176512-6a1e-4c8f-9d94-5794b1d7e619\n", - "2023-08-30 15:30:10.325 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for NCSA: https://atlas.nomic.ai/map/237ff7ed-c987-4259-96e0-3c5ed6e21207/8f176512-6a1e-4c8f-9d94-5794b1d7e619\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Query-Response Map for NCSA: https://atlas.nomic.ai/map/237ff7ed-c987-4259-96e0-3c5ed6e21207/8f176512-6a1e-4c8f-9d94-5794b1d7e619]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:30:11.917 | INFO | nomic.project:create_index:1111 - Created map `NCSA_qr_index` in project `Query-Response Map for NCSA`: https://atlas.nomic.ai/map/237ff7ed-c987-4259-96e0-3c5ed6e21207/2ce836ec-557b-4037-9ebd-d3e8982c0926\n", - "2023-08-30 15:30:12,004:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:30:12,004:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", - "2023-08-30 15:30:16,092:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:30:16,092:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", - "2023-08-30 15:30:20,157:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:30:20,164:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "course name: NCSADelta\n", - "(22, 1536)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:30:28.362 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for NCSADelta` in organization `dabholkar.asmita`\n", - "2023-08-30 15:30:29.318 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", - "2023-08-30 15:30:29.326 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", - "1it [00:00, 1.10it/s]\n", - "2023-08-30 15:30:30.246 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", - "2023-08-30 15:30:30.251 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", - "2023-08-30 15:30:31.253 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for NCSADelta` in project `Query-Response Map for NCSADelta`: https://atlas.nomic.ai/map/f0c381d5-e2e9-41fb-807a-ae61bc818122/78d29eb7-be96-47c1-a856-b684c6419e34\n", - "2023-08-30 15:30:31.254 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for NCSADelta: https://atlas.nomic.ai/map/f0c381d5-e2e9-41fb-807a-ae61bc818122/78d29eb7-be96-47c1-a856-b684c6419e34\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Query-Response Map for NCSADelta: https://atlas.nomic.ai/map/f0c381d5-e2e9-41fb-807a-ae61bc818122/78d29eb7-be96-47c1-a856-b684c6419e34]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:30:33.104 | INFO | nomic.project:create_index:1111 - Created map `NCSADelta_qr_index` in project `Query-Response Map for NCSADelta`: https://atlas.nomic.ai/map/f0c381d5-e2e9-41fb-807a-ae61bc818122/b6e64fef-a829-435f-89b5-ed1a44c05202\n", - "2023-08-30 15:30:33,214:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:30:33,214:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", - "2023-08-30 15:30:37,289:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:30:37,290:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", - "2023-08-30 15:30:41,376:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:30:41,382:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "course name: NuclGPT-v1\n", - "(25, 1536)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:30:49.297 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for NuclGPT-v1` in organization `dabholkar.asmita`\n", - "2023-08-30 15:30:50.216 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", - "2023-08-30 15:30:50.222 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", - "1it [00:00, 1.23it/s]\n", - "2023-08-30 15:30:51.043 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", - "2023-08-30 15:30:51.043 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", - "2023-08-30 15:30:52.360 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for NuclGPT-v1` in project `Query-Response Map for NuclGPT-v1`: https://atlas.nomic.ai/map/76043322-073c-4d8c-8c90-8e563da8564f/335097b0-a80d-4f38-8559-c6075a46a8b2\n", - "2023-08-30 15:30:52.360 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for NuclGPT-v1: https://atlas.nomic.ai/map/76043322-073c-4d8c-8c90-8e563da8564f/335097b0-a80d-4f38-8559-c6075a46a8b2\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Query-Response Map for NuclGPT-v1: https://atlas.nomic.ai/map/76043322-073c-4d8c-8c90-8e563da8564f/335097b0-a80d-4f38-8559-c6075a46a8b2]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:30:53.900 | INFO | nomic.project:create_index:1111 - Created map `NuclGPT-v1_qr_index` in project `Query-Response Map for NuclGPT-v1`: https://atlas.nomic.ai/map/76043322-073c-4d8c-8c90-8e563da8564f/441b7ff6-00ef-47f3-98a9-e45d327a1414\n" - ] - } - ], - "source": [ - "low_volume_courses = []\n", - "high_volume_courses = []\n", - "for course in course_names:\n", - " if course is None or course == 'ece408':\n", - " continue\n", - " \n", - " user_queries = []\n", - " metadata = []\n", - " i = 1\n", - " course_df = df[df['course_name'] == course]['convo']\n", - " for convo in course_df: # iterate through all conversations in a course\n", - " messages = convo['messages']\n", - "\n", - " # form query-response pairs out of the messages\n", - " pair_check = 0\n", - " for message in messages:\n", - " if message['role'] == 'user' and pair_check == 0:\n", - " query = message['content']\n", - " metadata_row = {'course': course, 'query': message['content']}\n", - " #print(\"metadata row: \", metadata_row)\n", - " pair_check += 1\n", - " \n", - " if message['role'] == 'assistant' and pair_check == 1:\n", - " metadata_row['response'] = message['content']\n", - " metadata_row['id'] = i \n", - " #print(\"response metadata row: \", metadata_row)\n", - " i += 1\n", - " pair_check += 1\n", - "\n", - " if pair_check == 2:\n", - " # one conversation pair is complete\n", - " user_queries.append(query)\n", - " metadata.append(metadata_row)\n", - " pair_check = 0\n", - " # after iterating every conversation in a course, create the map\n", - " if len(user_queries) < 20:\n", - " low_volume_courses.append(course)\n", - " continue\n", - "\n", - " if len(user_queries) > 500:\n", - " high_volume_courses.append(course)\n", - " continue\n", - " \n", - " metadata = pd.DataFrame(metadata)\n", - " embeddings = embeddings_model.embed_documents(user_queries)\n", - " embeddings = np.array(embeddings)\n", - " print(\"course name: \", course)\n", - " print(embeddings.shape)\n", - "\n", - " # create an Atlas project\n", - " project_name = \"Query-Response Map for \" + course\n", - " index_name = course + \"_qr_index\"\n", - " project = atlas.map_embeddings(embeddings=np.array(embeddings),\n", - " data=metadata,\n", - " id_field='id',\n", - " build_topic_model=True,\n", - " topic_label_field='query',\n", - " name=project_name,\n", - " colorable_fields=['query'])\n", - " print(project.maps)\n", - "\n", - " project.create_index(index_name, build_topic_model=True)\n", - "\n", - " \n" - ] - }, - { - "cell_type": "code", - "execution_count": 55, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "low volume courses: ['test-video-ingest', 'badm-567', 'test-video-ingest-28', 'pdeploy999', 'badm-350-summer', 'previewtesting1', 'localtest2', 'your-favorite-url', 'mantine', 'test-video-ingest-2', 'badm_567_v2', 'erpnext', 'mip', 'farmdoc_test_kastan-v1', 'personalstatement', 'hrc', 'csv', 'star_nox', 'badm_567', 'SPaRCEd', 'localdemo8', 'badm_567_thumbnails', 'chatbot', 'erp', 'extreme', 'rohan_atree', 'zotero-extreme', 'test-video-ingest-20', 'gies-online-mba2', 'gies-online-mba', 'careerassistant', 'weather', 'lillian-wang-blog', 'local-test5', 'demo-for-vyriad', 'ag-gpt-beta', 'rtest', 'previewdeploy', 'r2test', 'personal-statement', 'rohan_excel', 'langchain-python', 'langchain', 'ncsa-live-demo', 'rohan_atree_individual', 'meta11-test', 'HealthyLivingGuide', 'rohan', 'babreu', 'test-video-ingest-31', 'p', 'test-video-ingest-17', 'summary', 'test-video-ingest-3', 'test-video-ingest-27', 'lillian-wang-blog-2', 'python-magic', 'ansible2', 'ece408fa23', 'farmdoc_test_josh_v2', 'local-test3', 'automata', 'SpaceFlorida-GT', 'GBSI-GT', 'newnew_ncsa', 'canvas', 'gbsi-gt', 'meditation-tutorial', 'profit', 'ansible', 'langchain-docs', 'testing_url_metadata_josh', 'test-india-biodiversity', 'vyriad', 'irc-workplans', 'kastanasdfasdfasdf', 'BADM-567-GT', 'mdt', 'vercel', 'gies-graduate-hub', 'test-video-ingest-12', 'test-video-ingest-13', 'Gies-graduate-hub', 'test_new_supa_scrape', 'doe-ascr-2023', 'arize', 'final-meta-test', 'preview-meta-test', 'gies-online-mba-V3', 'FoF-Drawdown-from-INVPEIV-5-24-23', 'FIN574-GT', 'test-video-ingest-30', 'test', 'NCSA-v2', 'conversational', 'clowder-docs', 'DA', 'test-video-ingest-21', 'test-video-ingest-25', 'test-ingest-10', 'eric-test-course', 'farmdoc-test', 'test-video-ingest-22', 'Academic-paper', 'starwars', 'AdamDemo']\n", - "high volume courses: ['gpt4', 'ECE408FA23']\n" - ] - } - ], - "source": [ - "print(\"low volume courses: \", low_volume_courses)\n", - "print(\"high volume courses: \", high_volume_courses)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.11" - }, - "orig_nbformat": 4 - }, - "nbformat": 4, - "nbformat_minor": 2 -} From c32122514da9cf2caccdd37fa3338dfad003e82a Mon Sep 17 00:00:00 2001 From: star-nox Date: Tue, 12 Sep 2023 22:59:44 -0500 Subject: [PATCH 51/61] testing for json error --- ai_ta_backend/main.py | 8 +- ai_ta_backend/nomic_logging.py | 3 + ai_ta_backend/nomic_map_creation.ipynb | 1915 ++++++++++++++++++++++++ 3 files changed, 1922 insertions(+), 4 deletions(-) create mode 100644 ai_ta_backend/nomic_map_creation.ipynb diff --git a/ai_ta_backend/main.py b/ai_ta_backend/main.py index 1f2ee238..78fab519 100644 --- a/ai_ta_backend/main.py +++ b/ai_ta_backend/main.py @@ -427,10 +427,10 @@ def logToNomic(): # background execution of tasks!! response = executor.submit(log_convo_to_nomic, course_name, data) - #response = jsonify(response) - #response.headers.add('Access-Control-Allow-Origin', '*') - print(response) - return "response" + response = jsonify({'outcome': 'success'}) + response.headers.add('Access-Control-Allow-Origin', '*') + + return response if __name__ == '__main__': diff --git a/ai_ta_backend/nomic_logging.py b/ai_ta_backend/nomic_logging.py index 9e816302..b92be51d 100644 --- a/ai_ta_backend/nomic_logging.py +++ b/ai_ta_backend/nomic_logging.py @@ -58,7 +58,10 @@ def log_convo_to_nomic(course_name: str, conversation) -> str: embeddings = map_embeddings_df[prev_index-1].reshape(1, 1536) prev_convo = prev_data['conversation'].values[0] prev_id = prev_data['id'].values[0] + print("prev_id: ", prev_id) prev_created_at = prev_data['created_at'].values[0] + print("prev_created_at: ", prev_created_at) + print("before delete") # delete that convo data point from Nomic project.delete_data([prev_id]) diff --git a/ai_ta_backend/nomic_map_creation.ipynb b/ai_ta_backend/nomic_map_creation.ipynb new file mode 100644 index 00000000..c11b80db --- /dev/null +++ b/ai_ta_backend/nomic_map_creation.ipynb @@ -0,0 +1,1915 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "# import required libraries\n", + "\n", + "import os\n", + "import supabase\n", + "from nomic import atlas\n", + "from dotenv import load_dotenv\n", + "from langchain.embeddings import OpenAIEmbeddings\n", + "import numpy as np\n", + "import time\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# loading environment variables\n", + "\n", + "env_path = \"../.env\"\n", + "load_dotenv(dotenv_path=env_path)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "# initialize supabase client\n", + "\n", + "url = os.environ.get(\"SUPABASE_URL\")\n", + "key = os.environ.get(\"SUPABASE_API_KEY\")\n", + "\n", + "supabase_client = supabase.create_client(url, key)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idcreated_atconvoconvo_idcourse_nameuser_email
051932023-09-06T23:21:36.639848+00:00{'id': 'd6e83182-56d8-4151-b84e-a09dd84b8836',...d6e83182-56d8-4151-b84e-a09dd84b8836cropwizard-betadabholkar.asmita@gmail.com
151922023-09-06T23:04:50.982857+00:00{'id': '1390b226-2115-4078-a594-ed4bad0fb4e0',...1390b226-2115-4078-a594-ed4bad0fb4e0cropwizard-betakastanday@live.com
251742023-09-06T22:22:44.107536+00:00{'id': 'fca0cf0b-6bac-4cf6-bd4d-d3501827c947',...fca0cf0b-6bac-4cf6-bd4d-d3501827c947cropwizard-betadabholkar.asmita@gmail.com
351842023-09-06T23:01:06.796384+00:00{'id': '1572f8a1-9ceb-42f7-b39f-fe95ec2b3c0e',...1572f8a1-9ceb-42f7-b39f-fe95ec2b3c0ecropwizard-betadabholkar.asmita@gmail.com
451822023-09-06T22:58:21.66316+00:00{'id': '0c9d9873-2c52-4b12-90ec-d4a495cbf4e0',...0c9d9873-2c52-4b12-90ec-d4a495cbf4e0cropwizard-betadabholkar.asmita@gmail.com
551942023-09-06T23:22:06.786717+00:00{'id': 'd51bbdd8-c5c8-4e5b-a003-556a8ac74726',...d51bbdd8-c5c8-4e5b-a003-556a8ac74726cropwizard-betaavd6@illinois.edu
651132023-09-05T21:34:17.944623+00:00{'id': '2a39551b-7b6c-4ba0-aa77-ffc9b1316718',...2a39551b-7b6c-4ba0-aa77-ffc9b1316718cropwizard-betahetarth2@illinois.edu
753332023-09-07T22:25:52.858867+00:00{'id': 'bd3c9174-4512-4ac6-ae05-8aafa4099215',...bd3c9174-4512-4ac6-ae05-8aafa4099215cropwizard-betadabholkar.asmita@gmail.com
851082023-09-05T21:11:51.289178+00:00{'id': 'a49941c4-2049-478b-b43e-9c574cadbd05',...a49941c4-2049-478b-b43e-9c574cadbd05cropwizard-betavadve@illinois.edu
951042023-09-05T20:38:27.687893+00:00{'id': '1be7956c-d5c8-4b1f-a62f-145421a2e7f7',...1be7956c-d5c8-4b1f-a62f-145421a2e7f7cropwizard-betakastanday@live.com
1051032023-09-05T20:11:51.810222+00:00{'id': '20e6e160-6fd3-4e0a-82b7-98457f2ff1c3',...20e6e160-6fd3-4e0a-82b7-98457f2ff1c3cropwizard-betakastanday@live.com
1150982023-09-05T19:17:36.190976+00:00{'id': 'fbd19bd0-373b-4a9c-b58c-a47cbd4e9e1c',...fbd19bd0-373b-4a9c-b58c-a47cbd4e9e1ccropwizard-betadabholkar.asmita@gmail.com
1250422023-09-02T14:43:02.160608+00:00{'id': '8df8a436-adbf-441a-92ec-f6d926aee789',...8df8a436-adbf-441a-92ec-f6d926aee789cropwizard-betaNone
1350092023-09-01T18:26:57.628766+00:00{'id': 'b401031c-5540-43fc-baff-7c6db90ff3ae',...b401031c-5540-43fc-baff-7c6db90ff3aecropwizard-betarohan13@illinois.edu
1450062023-09-01T18:06:33.350981+00:00{'id': 'c42e70a8-03c0-4c9e-ac7f-ee718b4ac968',...c42e70a8-03c0-4c9e-ac7f-ee718b4ac968cropwizard-betahetarth2@illinois.edu
1550452023-09-03T20:07:34.895841+00:00{'id': 'cbecc7f4-a94c-49d6-ae1a-e42f20136676',...cbecc7f4-a94c-49d6-ae1a-e42f20136676cropwizard-betaNone
1650142023-09-01T18:36:04.202716+00:00{'id': '3303ec4f-84d5-4eec-a4b3-af020e62b79d',...3303ec4f-84d5-4eec-a4b3-af020e62b79dcropwizard-betavadve@illinois.edu
1750132023-09-01T18:35:32.12103+00:00{'id': 'fc91f681-54e3-4df5-8fd4-952d02b8feba',...fc91f681-54e3-4df5-8fd4-952d02b8febacropwizard-betarohan13@illinois.edu
1850112023-09-01T18:28:24.588312+00:00{'id': '5c2d89b3-3e41-4a8a-84be-f93c7b8127c3',...5c2d89b3-3e41-4a8a-84be-f93c7b8127c3cropwizard-betarohan13@illinois.edu
1950072023-09-01T18:15:08.636935+00:00{'id': 'd7191543-c517-4007-a2fb-ae28660ef37c',...d7191543-c517-4007-a2fb-ae28660ef37ccropwizard-betaNone
2050042023-09-01T16:28:23.202471+00:00{'id': 'edabed51-e581-48f7-865c-89a4b1d9d120',...edabed51-e581-48f7-865c-89a4b1d9d120cropwizard-betadabholkar.asmita@gmail.com
\n", + "
" + ], + "text/plain": [ + " id created_at \\\n", + "0 5193 2023-09-06T23:21:36.639848+00:00 \n", + "1 5192 2023-09-06T23:04:50.982857+00:00 \n", + "2 5174 2023-09-06T22:22:44.107536+00:00 \n", + "3 5184 2023-09-06T23:01:06.796384+00:00 \n", + "4 5182 2023-09-06T22:58:21.66316+00:00 \n", + "5 5194 2023-09-06T23:22:06.786717+00:00 \n", + "6 5113 2023-09-05T21:34:17.944623+00:00 \n", + "7 5333 2023-09-07T22:25:52.858867+00:00 \n", + "8 5108 2023-09-05T21:11:51.289178+00:00 \n", + "9 5104 2023-09-05T20:38:27.687893+00:00 \n", + "10 5103 2023-09-05T20:11:51.810222+00:00 \n", + "11 5098 2023-09-05T19:17:36.190976+00:00 \n", + "12 5042 2023-09-02T14:43:02.160608+00:00 \n", + "13 5009 2023-09-01T18:26:57.628766+00:00 \n", + "14 5006 2023-09-01T18:06:33.350981+00:00 \n", + "15 5045 2023-09-03T20:07:34.895841+00:00 \n", + "16 5014 2023-09-01T18:36:04.202716+00:00 \n", + "17 5013 2023-09-01T18:35:32.12103+00:00 \n", + "18 5011 2023-09-01T18:28:24.588312+00:00 \n", + "19 5007 2023-09-01T18:15:08.636935+00:00 \n", + "20 5004 2023-09-01T16:28:23.202471+00:00 \n", + "\n", + " convo \\\n", + "0 {'id': 'd6e83182-56d8-4151-b84e-a09dd84b8836',... \n", + "1 {'id': '1390b226-2115-4078-a594-ed4bad0fb4e0',... \n", + "2 {'id': 'fca0cf0b-6bac-4cf6-bd4d-d3501827c947',... \n", + "3 {'id': '1572f8a1-9ceb-42f7-b39f-fe95ec2b3c0e',... \n", + "4 {'id': '0c9d9873-2c52-4b12-90ec-d4a495cbf4e0',... \n", + "5 {'id': 'd51bbdd8-c5c8-4e5b-a003-556a8ac74726',... \n", + "6 {'id': '2a39551b-7b6c-4ba0-aa77-ffc9b1316718',... \n", + "7 {'id': 'bd3c9174-4512-4ac6-ae05-8aafa4099215',... \n", + "8 {'id': 'a49941c4-2049-478b-b43e-9c574cadbd05',... \n", + "9 {'id': '1be7956c-d5c8-4b1f-a62f-145421a2e7f7',... \n", + "10 {'id': '20e6e160-6fd3-4e0a-82b7-98457f2ff1c3',... \n", + "11 {'id': 'fbd19bd0-373b-4a9c-b58c-a47cbd4e9e1c',... \n", + "12 {'id': '8df8a436-adbf-441a-92ec-f6d926aee789',... \n", + "13 {'id': 'b401031c-5540-43fc-baff-7c6db90ff3ae',... \n", + "14 {'id': 'c42e70a8-03c0-4c9e-ac7f-ee718b4ac968',... \n", + "15 {'id': 'cbecc7f4-a94c-49d6-ae1a-e42f20136676',... \n", + "16 {'id': '3303ec4f-84d5-4eec-a4b3-af020e62b79d',... \n", + "17 {'id': 'fc91f681-54e3-4df5-8fd4-952d02b8feba',... \n", + "18 {'id': '5c2d89b3-3e41-4a8a-84be-f93c7b8127c3',... \n", + "19 {'id': 'd7191543-c517-4007-a2fb-ae28660ef37c',... \n", + "20 {'id': 'edabed51-e581-48f7-865c-89a4b1d9d120',... \n", + "\n", + " convo_id course_name \\\n", + "0 d6e83182-56d8-4151-b84e-a09dd84b8836 cropwizard-beta \n", + "1 1390b226-2115-4078-a594-ed4bad0fb4e0 cropwizard-beta \n", + "2 fca0cf0b-6bac-4cf6-bd4d-d3501827c947 cropwizard-beta \n", + "3 1572f8a1-9ceb-42f7-b39f-fe95ec2b3c0e cropwizard-beta \n", + "4 0c9d9873-2c52-4b12-90ec-d4a495cbf4e0 cropwizard-beta \n", + "5 d51bbdd8-c5c8-4e5b-a003-556a8ac74726 cropwizard-beta \n", + "6 2a39551b-7b6c-4ba0-aa77-ffc9b1316718 cropwizard-beta \n", + "7 bd3c9174-4512-4ac6-ae05-8aafa4099215 cropwizard-beta \n", + "8 a49941c4-2049-478b-b43e-9c574cadbd05 cropwizard-beta \n", + "9 1be7956c-d5c8-4b1f-a62f-145421a2e7f7 cropwizard-beta \n", + "10 20e6e160-6fd3-4e0a-82b7-98457f2ff1c3 cropwizard-beta \n", + "11 fbd19bd0-373b-4a9c-b58c-a47cbd4e9e1c cropwizard-beta \n", + "12 8df8a436-adbf-441a-92ec-f6d926aee789 cropwizard-beta \n", + "13 b401031c-5540-43fc-baff-7c6db90ff3ae cropwizard-beta \n", + "14 c42e70a8-03c0-4c9e-ac7f-ee718b4ac968 cropwizard-beta \n", + "15 cbecc7f4-a94c-49d6-ae1a-e42f20136676 cropwizard-beta \n", + "16 3303ec4f-84d5-4eec-a4b3-af020e62b79d cropwizard-beta \n", + "17 fc91f681-54e3-4df5-8fd4-952d02b8feba cropwizard-beta \n", + "18 5c2d89b3-3e41-4a8a-84be-f93c7b8127c3 cropwizard-beta \n", + "19 d7191543-c517-4007-a2fb-ae28660ef37c cropwizard-beta \n", + "20 edabed51-e581-48f7-865c-89a4b1d9d120 cropwizard-beta \n", + "\n", + " user_email \n", + "0 dabholkar.asmita@gmail.com \n", + "1 kastanday@live.com \n", + "2 dabholkar.asmita@gmail.com \n", + "3 dabholkar.asmita@gmail.com \n", + "4 dabholkar.asmita@gmail.com \n", + "5 avd6@illinois.edu \n", + "6 hetarth2@illinois.edu \n", + "7 dabholkar.asmita@gmail.com \n", + "8 vadve@illinois.edu \n", + "9 kastanday@live.com \n", + "10 kastanday@live.com \n", + "11 dabholkar.asmita@gmail.com \n", + "12 None \n", + "13 rohan13@illinois.edu \n", + "14 hetarth2@illinois.edu \n", + "15 None \n", + "16 vadve@illinois.edu \n", + "17 rohan13@illinois.edu \n", + "18 rohan13@illinois.edu \n", + "19 None \n", + "20 dabholkar.asmita@gmail.com " + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "course = 'cropwizard-beta'\n", + "response = supabase_client.table(\"llm-convo-monitor\").select(\"*\").eq('course_name', course).execute()\n", + "data = response.data\n", + "df = pd.DataFrame(data)\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "id int64\n", + "created_at object\n", + "convo object\n", + "convo_id object\n", + "course_name object\n", + "user_email object\n", + "dtype: object\n", + "id int64\n", + "created_at datetime64[ns, UTC]\n", + "convo object\n", + "convo_id object\n", + "course_name object\n", + "user_email object\n", + "dtype: object\n" + ] + } + ], + "source": [ + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idcreated_atconvoconvo_idcourse_nameuser_email
050042023-09-01 16:28:23.202471+00:00{'id': 'edabed51-e581-48f7-865c-89a4b1d9d120',...edabed51-e581-48f7-865c-89a4b1d9d120cropwizard-betadabholkar.asmita@gmail.com
150062023-09-01 18:06:33.350981+00:00{'id': 'c42e70a8-03c0-4c9e-ac7f-ee718b4ac968',...c42e70a8-03c0-4c9e-ac7f-ee718b4ac968cropwizard-betahetarth2@illinois.edu
250072023-09-01 18:15:08.636935+00:00{'id': 'd7191543-c517-4007-a2fb-ae28660ef37c',...d7191543-c517-4007-a2fb-ae28660ef37ccropwizard-betaNone
350112023-09-01 18:28:24.588312+00:00{'id': '5c2d89b3-3e41-4a8a-84be-f93c7b8127c3',...5c2d89b3-3e41-4a8a-84be-f93c7b8127c3cropwizard-betarohan13@illinois.edu
450132023-09-01 18:35:32.121030+00:00{'id': 'fc91f681-54e3-4df5-8fd4-952d02b8feba',...fc91f681-54e3-4df5-8fd4-952d02b8febacropwizard-betarohan13@illinois.edu
\n", + "
" + ], + "text/plain": [ + " id created_at \\\n", + "0 5004 2023-09-01 16:28:23.202471+00:00 \n", + "1 5006 2023-09-01 18:06:33.350981+00:00 \n", + "2 5007 2023-09-01 18:15:08.636935+00:00 \n", + "3 5011 2023-09-01 18:28:24.588312+00:00 \n", + "4 5013 2023-09-01 18:35:32.121030+00:00 \n", + "\n", + " convo \\\n", + "0 {'id': 'edabed51-e581-48f7-865c-89a4b1d9d120',... \n", + "1 {'id': 'c42e70a8-03c0-4c9e-ac7f-ee718b4ac968',... \n", + "2 {'id': 'd7191543-c517-4007-a2fb-ae28660ef37c',... \n", + "3 {'id': '5c2d89b3-3e41-4a8a-84be-f93c7b8127c3',... \n", + "4 {'id': 'fc91f681-54e3-4df5-8fd4-952d02b8feba',... \n", + "\n", + " convo_id course_name \\\n", + "0 edabed51-e581-48f7-865c-89a4b1d9d120 cropwizard-beta \n", + "1 c42e70a8-03c0-4c9e-ac7f-ee718b4ac968 cropwizard-beta \n", + "2 d7191543-c517-4007-a2fb-ae28660ef37c cropwizard-beta \n", + "3 5c2d89b3-3e41-4a8a-84be-f93c7b8127c3 cropwizard-beta \n", + "4 fc91f681-54e3-4df5-8fd4-952d02b8feba cropwizard-beta \n", + "\n", + " user_email \n", + "0 dabholkar.asmita@gmail.com \n", + "1 hetarth2@illinois.edu \n", + "2 None \n", + "3 rohan13@illinois.edu \n", + "4 rohan13@illinois.edu " + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Timestamp('2023-09-01 16:28:23.202471+0000', tz='UTC')" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.iloc[0]['created_at']" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "course_list = df['course_name'].unique()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "21\n", + "21\n" + ] + } + ], + "source": [ + "import datetime\n", + "\n", + "metadata = []\n", + "user_queries = []\n", + "i = 1\n", + "\n", + "# log conversation instead of individual messages\n", + "for index, row in df.iterrows():\n", + " user_email = row['user_email']\n", + " created_at = row['created_at']\n", + "\n", + " current_time = time.time()\n", + " dt_object = datetime.datetime.fromtimestamp(current_time)\n", + " formatted_timestamp = dt_object.strftime(\"%Y-%m-%d %H:%M:%S.%f+00:00\")\n", + "\n", + "\n", + "\n", + " convo = row['convo']\n", + " messages = convo['messages']\n", + " first_message = messages[0]['content']\n", + " \n", + " user_queries.append(first_message)\n", + " # create metadata for multi-turn conversation\n", + " conversation = \"\"\n", + " for message in messages:\n", + " # string of role: content, role: content, ...\n", + " if message['role'] == 'user':\n", + " emoji = \"🙋\"\n", + " else:\n", + " emoji = \"🤖\"\n", + " conversation += \"\\n>>> \" + emoji + message['role'] + \": \" + message['content'] + \"\\n\"\n", + " # add to metadata\n", + " metadata_row = {\"course\": row['course_name'], \"conversation\": conversation, \"conversation_id\": convo['id'], \n", + " \"id\": i, \"user_email\": user_email, \"first_query\": first_message, \"created_at\": created_at,\n", + " \"modified_at\": formatted_timestamp}\n", + " metadata.append(metadata_row)\n", + " i += 1\n", + "\n", + "print(len(user_queries))\n", + "print(len(metadata))" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'course': 'cropwizard-beta', 'conversation': \"\\n>>> 🙋user: What is the best approach to prevent weeds in my soybean field?\\n\\n>>> 🤖assistant: Weed management in soybean fields involves a combination of cultural, mechanical, and chemical strategies. Here are some recommended practices:\\n\\n1. **Crop Rotation**: Rotating crops can help reduce the presence of certain weed species that are more prevalent in soybean fields. \\n\\n2. **Cover Crops**: Planting cover crops can suppress weed growth by competing for sunlight, water, and nutrients. \\n\\n3. **Tillage**: Tillage can be used to disrupt weed growth. However, it should be used judiciously to prevent soil erosion.\\n\\n4. **Planting Date and Seeding Rate**: Planting soybeans early and at a higher seeding rate can help the crop compete better against weeds.\\n\\n5. **Herbicide Application**: Use pre-emergent and post-emergent herbicides. Pre-emergent herbicides are applied before the weeds germinate, while post-emergent herbicides are applied after the weeds have sprouted. \\n\\n6. **Herbicide Rotation**: Rotate herbicides with different modes of action to prevent weeds from developing resistance.\\n\\n7. **Scouting and Monitoring**: Regularly scout and monitor your fields to identify weed problems early and respond quickly.\\n\\n8. **Clean Equipment**: Clean your farming equipment to prevent the spread of weed seeds from one field to another.\\n\\nRemember, the most effective weed management strategies often involve a combination of these practices. It's also essential to consider the specific types of weeds you're dealing with, as different species may require different control methods.\\n\\n>>> 🙋user: What is the best approach to prevent weeds in my soybean field?\\n\\n\\n\\n\\n>>> 🤖assistant: Managing weeds in soybean fields requires a comprehensive and integrated approach. Here are some best practices:\\n\\n1. **Crop Rotation**: This practice can help break the life cycle of certain weeds. By alternating the types of crops in each field, you can naturally reduce the presence of weeds that thrive in soybean fields.\\n\\n2. **Cover Crops**: Planting cover crops in the off-season can suppress weed growth by outcompeting them for resources such as sunlight, water, and nutrients.\\n\\n3. **Tillage**: Tillage can help control weeds by physically removing or burying them. However, excessive tillage can lead to soil erosion, so it should be used judiciously.\\n\\n4. **Planting Date and Seeding Rate**: Planting soybeans early and at a higher seeding rate can help the crop establish itself and compete more effectively against weeds.\\n\\n5. **Herbicide Application**: Use pre-emergence and post-emergence herbicides to control weeds. Pre-emergence herbicides are used before the weeds germinate, while post-emergence herbicides are used after the weeds have sprouted. \\n\\n6. **Herbicide Rotation**: To prevent herbicide resistance, rotate the types of herbicides used, ensuring they have different modes of action.\\n\\n7. **Regular Scouting**: Regularly inspect your fields to identify weed problems early. This allows for timely intervention and can prevent the spread of weeds.\\n\\n8. **Clean Equipment**: Ensure that your farming equipment is cleaned regularly to prevent the spread of weed seeds from one field to another.\\n\\nRemember, the most effective weed management strategies often involve a combination of these practices. Tailor your approach based on the specific types of weeds prevalent in your field, as different species may require different control strategies.\\n\", 'conversation_id': 'd6e83182-56d8-4151-b84e-a09dd84b8836', 'id': 1, 'user_email': 'dabholkar.asmita@gmail.com', 'first_query': 'What is the best approach to prevent weeds in my soybean field?', 'created_at': '2023-09-06T23:21:36.639848+00:00', 'modified_at': '2023-09-12 18:17:49.784979+00:00'}\n" + ] + } + ], + "source": [ + "print(metadata[0])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Mapping Conversations to Nomic" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "cropwizard-beta\n", + "21\n", + "21\n", + "21\n", + "(21, 1536)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-09-12 22:36:00.418 | INFO | nomic.project:_create_project:790 - Creating project `Conversation Map for cropwizard-beta` in organization `dabholkar.asmita`\n", + "2023-09-12 22:36:01.434 | INFO | nomic.atlas:map_embeddings:108 - Uploading embeddings to Atlas.\n", + "2023-09-12 22:36:01.454 | WARNING | nomic.project:_validate_and_correct_arrow_upload:238 - Replacing 3 null values for field user_email with string 'null'. This behavior will change in a future version.\n", + "2023-09-12 22:36:01.459 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", + "1it [00:00, 1.11it/s]\n", + "2023-09-12 22:36:02.373 | INFO | nomic.project:_add_data:1422 - Upload succeeded.\n", + "2023-09-12 22:36:02.374 | INFO | nomic.atlas:map_embeddings:127 - Embedding upload succeeded.\n", + "2023-09-12 22:36:03.570 | INFO | nomic.project:create_index:1132 - Created map `Conversation Map for cropwizard-beta` in project `Conversation Map for cropwizard-beta`: https://atlas.nomic.ai/map/8634eb7a-22b5-4ab8-adc4-fbbc4bc654bf/f5ad20c7-d1cb-4fe7-be16-04d0502af4fe\n", + "2023-09-12 22:36:03.571 | INFO | nomic.atlas:map_embeddings:140 - Conversation Map for cropwizard-beta: https://atlas.nomic.ai/map/8634eb7a-22b5-4ab8-adc4-fbbc4bc654bf/f5ad20c7-d1cb-4fe7-be16-04d0502af4fe\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Conversation Map for cropwizard-beta: https://atlas.nomic.ai/map/8634eb7a-22b5-4ab8-adc4-fbbc4bc654bf/f5ad20c7-d1cb-4fe7-be16-04d0502af4fe]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-09-12 22:36:05.071 | INFO | nomic.project:create_index:1132 - Created map `cropwizard-beta_convo_index` in project `Conversation Map for cropwizard-beta`: https://atlas.nomic.ai/map/8634eb7a-22b5-4ab8-adc4-fbbc4bc654bf/9507bda2-b846-4e48-90d8-684a40fb3220\n" + ] + } + ], + "source": [ + "embeddings_model = OpenAIEmbeddings()\n", + "course_list = ['cropwizard-beta']\n", + "\n", + "for course in course_list:\n", + " print(course)\n", + " try:\n", + " response = supabase_client.table(\"llm-convo-monitor\").select(\"*\").eq('course_name', course).execute()\n", + " data = response.data\n", + " course_df = pd.DataFrame(data)\n", + " print(len(course_df))\n", + "\n", + " if len(course_df) < 20:\n", + " continue\n", + " else:\n", + " \n", + " user_queries = []\n", + " metadata = []\n", + " i = 1\n", + "\n", + " # log conversation instead of individual messages\n", + " for index, row in course_df.iterrows():\n", + " user_email = row['user_email']\n", + " created_at = row['created_at']\n", + " convo = row['convo']\n", + " messages = convo['messages']\n", + " first_message = messages[0]['content']\n", + "\n", + " current_time = time.time()\n", + " dt_object = datetime.datetime.fromtimestamp(current_time)\n", + " formatted_timestamp = dt_object.strftime(\"%Y-%m-%d %H:%M:%S.%f+00:00\")\n", + " \n", + " user_queries.append(first_message)\n", + " # create metadata for multi-turn conversation\n", + " conversation = \"\"\n", + " for message in messages:\n", + " # string of role: content, role: content, ...\n", + " if message['role'] == 'user':\n", + " emoji = \"🙋\"\n", + " else:\n", + " emoji = \"🤖\"\n", + " conversation += \"\\n>>> \" + emoji + message['role'] + \": \" + message['content'] + \"\\n\"\n", + " # add to metadata\n", + " metadata_row = {\"course\": row['course_name'], \"conversation\": conversation, \"conversation_id\": convo['id'], \n", + " \"id\": i, \"user_email\": user_email, \"first_query\": first_message, \"created_at\": created_at,\n", + " \"modified_at\": formatted_timestamp}\n", + " metadata.append(metadata_row)\n", + " i += 1\n", + "\n", + " print(len(user_queries))\n", + " print(len(metadata))\n", + "\n", + " metadata = pd.DataFrame(metadata)\n", + " embeddings = embeddings_model.embed_documents(user_queries)\n", + " embeddings = np.array(embeddings)\n", + " print(embeddings.shape)\n", + "\n", + " # create an Atlas project\n", + " project_name = \"Conversation Map for \" + course\n", + " index_name = course + \"_convo_index\"\n", + " project = atlas.map_embeddings(embeddings=np.array(embeddings),\n", + " data=metadata,\n", + " id_field='id',\n", + " build_topic_model=True,\n", + " topic_label_field='first_query',\n", + " name=project_name,\n", + " colorable_fields=['conversation_id', 'first_query'])\n", + " print(project.maps)\n", + "\n", + " project.create_index(index_name, build_topic_model=True)\n", + " except Exception as e:\n", + " print(\"course_name:\", course)\n", + " print(\"error: \", e)\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'course': 'cropwizard', 'conversation': '\\n>>> user: What type of pest is most harmful to corn crops in Illinois and how can I prevent it?\\n\\n>>> assistant: The most harmful pest to corn crops in Illinois is the corn rootworm. Corn rootworms, specifically the western corn rootworm (Diabrotica virgifera virgifera), are the most impactful species in Illinois and the North Central U.S. These pests can cause significant damage to corn plants by feeding on the roots, leading to reduced plant vigor and yield.\\n\\nTo prevent corn rootworm infestations, several measures can be taken:\\n\\n1. Crop rotation: Implementing a crop rotation strategy by alternating corn with non-host crops, such as soybeans, can help reduce corn rootworm populations. The larvae are not capable of surviving on soybean or other broadleaf plants, so rotating crops disrupts their life cycle.\\n\\n2. Utilize Bt hybrids: Planting corn hybrids that have been genetically modified to express Bacillus thuringiensis (Bt) proteins targeting corn rootworm can provide effective control. These Bt hybrids produce toxins that are toxic to the pests, reducing their feeding and damage to corn plants.\\n\\n3. Insecticide treatments: Seed-applied insecticides containing neonicotinoid or diamide insecticides can be used to protect corn seedlings from early-season pests, including corn rootworms. Soil-applied insecticides can also be applied at planting to control corn rootworms and other early-season pests.\\n\\n4. Monitoring and scouting: Regular monitoring of corn fields for signs of corn rootworm damage, such as root feeding and lodging, is essential. Scouting helps determine if pest populations have reached economic thresholds that require action, such as insecticide application.\\n\\n5. Integrated Pest Management (IPM): Implementing an IPM approach involves combining multiple pest management strategies, including cultural practices, biological control, and chemical control, to minimize the impact of corn rootworms and other pests. This holistic approach aims to reduce reliance on chemical treatments and promote sustainable pest management.\\n\\nIt is important to consult with local agricultural extension services, seed dealers, or agricultural experts for specific recommendations tailored to your region and field conditions to effectively manage corn rootworm infestations.\\n', 'conversation_id': '3e5d4861-b128-4c64-96ac-87c74f3217e5', 'id': 2, 'user_email': 'avd6@illinois.edu', 'first_query': 'What type of pest is most harmful to corn crops in Illinois and how can I prevent it?'}\n" + ] + } + ], + "source": [ + "print(metadata[1])" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(22, 1536)\n" + ] + } + ], + "source": [ + "# initialize langchain OpenAI embeddings model\n", + "embeddings_model = OpenAIEmbeddings()\n", + "metadata = pd.DataFrame(metadata)\n", + "embeddings = embeddings_model.embed_documents(user_queries)\n", + "embeddings = np.array(embeddings)\n", + "print(embeddings.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-09-07 14:53:44.900 | INFO | nomic.project:_create_project:779 - Creating project `Conversation Map for cropwizard` in organization `dabholkar.asmita`\n", + "2023-09-07 14:53:45.794 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", + "2023-09-07 14:53:45.798 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", + "1it [00:00, 1.07it/s]\n", + "2023-09-07 14:53:46.743 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", + "2023-09-07 14:53:46.744 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", + "2023-09-07 14:53:47.568 | INFO | nomic.project:create_index:1111 - Created map `Conversation Map for cropwizard` in project `Conversation Map for cropwizard`: https://atlas.nomic.ai/map/27ba0013-71ba-4fd6-a710-187d114442ab/f74b45b3-7213-45df-a4cb-c16ec9feb8b7\n", + "2023-09-07 14:53:47.570 | INFO | nomic.atlas:map_embeddings:139 - Conversation Map for cropwizard: https://atlas.nomic.ai/map/27ba0013-71ba-4fd6-a710-187d114442ab/f74b45b3-7213-45df-a4cb-c16ec9feb8b7\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Conversation Map for cropwizard: https://atlas.nomic.ai/map/27ba0013-71ba-4fd6-a710-187d114442ab/f74b45b3-7213-45df-a4cb-c16ec9feb8b7]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-09-07 14:53:48.872 | INFO | nomic.project:create_index:1111 - Created map `cropwizard_convo_index_2` in project `Conversation Map for cropwizard`: https://atlas.nomic.ai/map/27ba0013-71ba-4fd6-a710-187d114442ab/3336aa7f-5995-4f02-831b-7161fd0c0b71\n" + ] + }, + { + "data": { + "text/html": [ + "Atlas Projection cropwizard_convo_index_2. Status Topic Modeling. view online" + ], + "text/plain": [ + "cropwizard_convo_index_2: https://atlas.nomic.ai/map/27ba0013-71ba-4fd6-a710-187d114442ab/3336aa7f-5995-4f02-831b-7161fd0c0b71" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# create an Atlas project\n", + "project_name = \"Conversation Map for \" + course\n", + "index_name = course + \"_convo_index\"\n", + "project = atlas.map_embeddings(embeddings=np.array(embeddings),\n", + " data=metadata,\n", + " id_field='id',\n", + " build_topic_model=True,\n", + " topic_label_field='first_query',\n", + " name=project_name,\n", + " colorable_fields=['conversation_id', 'first_query'])\n", + "print(project.maps)\n", + "\n", + "project.create_index(index_name, build_topic_model=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "ename": "ReadTimeout", + "evalue": "The read operation timed out", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mTimeoutError\u001b[0m Traceback (most recent call last)", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_exceptions.py:10\u001b[0m, in \u001b[0;36mmap_exceptions\u001b[1;34m(map)\u001b[0m\n\u001b[0;32m 9\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m---> 10\u001b[0m \u001b[39myield\u001b[39;00m\n\u001b[0;32m 11\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m \u001b[39mas\u001b[39;00m exc: \u001b[39m# noqa: PIE786\u001b[39;00m\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\backends\\sync.py:28\u001b[0m, in \u001b[0;36mSyncStream.read\u001b[1;34m(self, max_bytes, timeout)\u001b[0m\n\u001b[0;32m 27\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_sock\u001b[39m.\u001b[39msettimeout(timeout)\n\u001b[1;32m---> 28\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_sock\u001b[39m.\u001b[39;49mrecv(max_bytes)\n", + "File \u001b[1;32mC:\\Program Files\\WindowsApps\\PythonSoftwareFoundation.Python.3.10_3.10.3056.0_x64__qbz5n2kfra8p0\\lib\\ssl.py:1259\u001b[0m, in \u001b[0;36mSSLSocket.recv\u001b[1;34m(self, buflen, flags)\u001b[0m\n\u001b[0;32m 1256\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\n\u001b[0;32m 1257\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mnon-zero flags not allowed in calls to recv() on \u001b[39m\u001b[39m%s\u001b[39;00m\u001b[39m\"\u001b[39m \u001b[39m%\u001b[39m\n\u001b[0;32m 1258\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m\u001b[39m__class__\u001b[39m)\n\u001b[1;32m-> 1259\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mread(buflen)\n\u001b[0;32m 1260\u001b[0m \u001b[39melse\u001b[39;00m:\n", + "File \u001b[1;32mC:\\Program Files\\WindowsApps\\PythonSoftwareFoundation.Python.3.10_3.10.3056.0_x64__qbz5n2kfra8p0\\lib\\ssl.py:1132\u001b[0m, in \u001b[0;36mSSLSocket.read\u001b[1;34m(self, len, buffer)\u001b[0m\n\u001b[0;32m 1131\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m-> 1132\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_sslobj\u001b[39m.\u001b[39;49mread(\u001b[39mlen\u001b[39;49m)\n\u001b[0;32m 1133\u001b[0m \u001b[39mexcept\u001b[39;00m SSLError \u001b[39mas\u001b[39;00m x:\n", + "\u001b[1;31mTimeoutError\u001b[0m: The read operation timed out", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001b[1;31mReadTimeout\u001b[0m Traceback (most recent call last)", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_transports\\default.py:60\u001b[0m, in \u001b[0;36mmap_httpcore_exceptions\u001b[1;34m()\u001b[0m\n\u001b[0;32m 59\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m---> 60\u001b[0m \u001b[39myield\u001b[39;00m\n\u001b[0;32m 61\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m \u001b[39mas\u001b[39;00m exc: \u001b[39m# noqa: PIE-786\u001b[39;00m\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_transports\\default.py:218\u001b[0m, in \u001b[0;36mHTTPTransport.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 217\u001b[0m \u001b[39mwith\u001b[39;00m map_httpcore_exceptions():\n\u001b[1;32m--> 218\u001b[0m resp \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_pool\u001b[39m.\u001b[39;49mhandle_request(req)\n\u001b[0;32m 220\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39misinstance\u001b[39m(resp\u001b[39m.\u001b[39mstream, typing\u001b[39m.\u001b[39mIterable)\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_sync\\connection_pool.py:253\u001b[0m, in \u001b[0;36mConnectionPool.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 252\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mresponse_closed(status)\n\u001b[1;32m--> 253\u001b[0m \u001b[39mraise\u001b[39;00m exc\n\u001b[0;32m 254\u001b[0m \u001b[39melse\u001b[39;00m:\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_sync\\connection_pool.py:237\u001b[0m, in \u001b[0;36mConnectionPool.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 236\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m--> 237\u001b[0m response \u001b[39m=\u001b[39m connection\u001b[39m.\u001b[39;49mhandle_request(request)\n\u001b[0;32m 238\u001b[0m \u001b[39mexcept\u001b[39;00m ConnectionNotAvailable:\n\u001b[0;32m 239\u001b[0m \u001b[39m# The ConnectionNotAvailable exception is a special case, that\u001b[39;00m\n\u001b[0;32m 240\u001b[0m \u001b[39m# indicates we need to retry the request on a new connection.\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 244\u001b[0m \u001b[39m# might end up as an HTTP/2 connection, but which actually ends\u001b[39;00m\n\u001b[0;32m 245\u001b[0m \u001b[39m# up as HTTP/1.1.\u001b[39;00m\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_sync\\connection.py:90\u001b[0m, in \u001b[0;36mHTTPConnection.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 88\u001b[0m \u001b[39mraise\u001b[39;00m ConnectionNotAvailable()\n\u001b[1;32m---> 90\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_connection\u001b[39m.\u001b[39;49mhandle_request(request)\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_sync\\http11.py:112\u001b[0m, in \u001b[0;36mHTTP11Connection.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 111\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_response_closed()\n\u001b[1;32m--> 112\u001b[0m \u001b[39mraise\u001b[39;00m exc\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_sync\\http11.py:91\u001b[0m, in \u001b[0;36mHTTP11Connection.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 83\u001b[0m \u001b[39mwith\u001b[39;00m Trace(\n\u001b[0;32m 84\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mhttp11.receive_response_headers\u001b[39m\u001b[39m\"\u001b[39m, request, kwargs\n\u001b[0;32m 85\u001b[0m ) \u001b[39mas\u001b[39;00m trace:\n\u001b[0;32m 86\u001b[0m (\n\u001b[0;32m 87\u001b[0m http_version,\n\u001b[0;32m 88\u001b[0m status,\n\u001b[0;32m 89\u001b[0m reason_phrase,\n\u001b[0;32m 90\u001b[0m headers,\n\u001b[1;32m---> 91\u001b[0m ) \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_receive_response_headers(\u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n\u001b[0;32m 92\u001b[0m trace\u001b[39m.\u001b[39mreturn_value \u001b[39m=\u001b[39m (\n\u001b[0;32m 93\u001b[0m http_version,\n\u001b[0;32m 94\u001b[0m status,\n\u001b[0;32m 95\u001b[0m reason_phrase,\n\u001b[0;32m 96\u001b[0m headers,\n\u001b[0;32m 97\u001b[0m )\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_sync\\http11.py:155\u001b[0m, in \u001b[0;36mHTTP11Connection._receive_response_headers\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 154\u001b[0m \u001b[39mwhile\u001b[39;00m \u001b[39mTrue\u001b[39;00m:\n\u001b[1;32m--> 155\u001b[0m event \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_receive_event(timeout\u001b[39m=\u001b[39;49mtimeout)\n\u001b[0;32m 156\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(event, h11\u001b[39m.\u001b[39mResponse):\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_sync\\http11.py:191\u001b[0m, in \u001b[0;36mHTTP11Connection._receive_event\u001b[1;34m(self, timeout)\u001b[0m\n\u001b[0;32m 190\u001b[0m \u001b[39mif\u001b[39;00m event \u001b[39mis\u001b[39;00m h11\u001b[39m.\u001b[39mNEED_DATA:\n\u001b[1;32m--> 191\u001b[0m data \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_network_stream\u001b[39m.\u001b[39;49mread(\n\u001b[0;32m 192\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mREAD_NUM_BYTES, timeout\u001b[39m=\u001b[39;49mtimeout\n\u001b[0;32m 193\u001b[0m )\n\u001b[0;32m 195\u001b[0m \u001b[39m# If we feed this case through h11 we'll raise an exception like:\u001b[39;00m\n\u001b[0;32m 196\u001b[0m \u001b[39m#\u001b[39;00m\n\u001b[0;32m 197\u001b[0m \u001b[39m# httpcore.RemoteProtocolError: can't handle event type\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 201\u001b[0m \u001b[39m# perspective. Instead we handle this case distinctly and treat\u001b[39;00m\n\u001b[0;32m 202\u001b[0m \u001b[39m# it as a ConnectError.\u001b[39;00m\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\backends\\sync.py:26\u001b[0m, in \u001b[0;36mSyncStream.read\u001b[1;34m(self, max_bytes, timeout)\u001b[0m\n\u001b[0;32m 25\u001b[0m exc_map: ExceptionMapping \u001b[39m=\u001b[39m {socket\u001b[39m.\u001b[39mtimeout: ReadTimeout, \u001b[39mOSError\u001b[39;00m: ReadError}\n\u001b[1;32m---> 26\u001b[0m \u001b[39mwith\u001b[39;00m map_exceptions(exc_map):\n\u001b[0;32m 27\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_sock\u001b[39m.\u001b[39msettimeout(timeout)\n", + "File \u001b[1;32mC:\\Program Files\\WindowsApps\\PythonSoftwareFoundation.Python.3.10_3.10.3056.0_x64__qbz5n2kfra8p0\\lib\\contextlib.py:153\u001b[0m, in \u001b[0;36m_GeneratorContextManager.__exit__\u001b[1;34m(self, typ, value, traceback)\u001b[0m\n\u001b[0;32m 152\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m--> 153\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mgen\u001b[39m.\u001b[39;49mthrow(typ, value, traceback)\n\u001b[0;32m 154\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mStopIteration\u001b[39;00m \u001b[39mas\u001b[39;00m exc:\n\u001b[0;32m 155\u001b[0m \u001b[39m# Suppress StopIteration *unless* it's the same exception that\u001b[39;00m\n\u001b[0;32m 156\u001b[0m \u001b[39m# was passed to throw(). This prevents a StopIteration\u001b[39;00m\n\u001b[0;32m 157\u001b[0m \u001b[39m# raised inside the \"with\" statement from being suppressed.\u001b[39;00m\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_exceptions.py:14\u001b[0m, in \u001b[0;36mmap_exceptions\u001b[1;34m(map)\u001b[0m\n\u001b[0;32m 13\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(exc, from_exc):\n\u001b[1;32m---> 14\u001b[0m \u001b[39mraise\u001b[39;00m to_exc(exc)\n\u001b[0;32m 15\u001b[0m \u001b[39mraise\u001b[39;00m\n", + "\u001b[1;31mReadTimeout\u001b[0m: The read operation timed out", + "\nThe above exception was the direct cause of the following exception:\n", + "\u001b[1;31mReadTimeout\u001b[0m Traceback (most recent call last)", + "\u001b[1;32mf:\\MSIM\\ML_Projects\\ai-ta-backend\\ai_ta_backend\\nomic_map_creation.ipynb Cell 13\u001b[0m line \u001b[0;36m1\n\u001b[1;32m----> 1\u001b[0m response \u001b[39m=\u001b[39m supabase_client\u001b[39m.\u001b[39;49mtable(\u001b[39m\"\u001b[39;49m\u001b[39mllm-convo-monitor\u001b[39;49m\u001b[39m\"\u001b[39;49m)\u001b[39m.\u001b[39;49mselect(\u001b[39m\"\u001b[39;49m\u001b[39m*\u001b[39;49m\u001b[39m\"\u001b[39;49m)\u001b[39m.\u001b[39;49mexecute()\n\u001b[0;32m 2\u001b[0m data \u001b[39m=\u001b[39m response\u001b[39m.\u001b[39mdata\n\u001b[0;32m 3\u001b[0m df \u001b[39m=\u001b[39m pd\u001b[39m.\u001b[39mDataFrame(data)\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\postgrest\\_sync\\request_builder.py:55\u001b[0m, in \u001b[0;36mSyncQueryRequestBuilder.execute\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 43\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mexecute\u001b[39m(\u001b[39mself\u001b[39m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m APIResponse:\n\u001b[0;32m 44\u001b[0m \u001b[39m \u001b[39m\u001b[39m\"\"\"Execute the query.\u001b[39;00m\n\u001b[0;32m 45\u001b[0m \n\u001b[0;32m 46\u001b[0m \u001b[39m .. tip::\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 53\u001b[0m \u001b[39m :class:`APIError` If the API raised an error.\u001b[39;00m\n\u001b[0;32m 54\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[1;32m---> 55\u001b[0m r \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49msession\u001b[39m.\u001b[39;49mrequest(\n\u001b[0;32m 56\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mhttp_method,\n\u001b[0;32m 57\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mpath,\n\u001b[0;32m 58\u001b[0m json\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mjson,\n\u001b[0;32m 59\u001b[0m params\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mparams,\n\u001b[0;32m 60\u001b[0m headers\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mheaders,\n\u001b[0;32m 61\u001b[0m )\n\u001b[0;32m 62\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 63\u001b[0m \u001b[39mif\u001b[39;00m (\n\u001b[0;32m 64\u001b[0m \u001b[39m200\u001b[39m \u001b[39m<\u001b[39m\u001b[39m=\u001b[39m r\u001b[39m.\u001b[39mstatus_code \u001b[39m<\u001b[39m\u001b[39m=\u001b[39m \u001b[39m299\u001b[39m\n\u001b[0;32m 65\u001b[0m ): \u001b[39m# Response.ok from JS (https://developer.mozilla.org/en-US/docs/Web/API/Response/ok)\u001b[39;00m\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_client.py:821\u001b[0m, in \u001b[0;36mClient.request\u001b[1;34m(self, method, url, content, data, files, json, params, headers, cookies, auth, follow_redirects, timeout, extensions)\u001b[0m\n\u001b[0;32m 806\u001b[0m warnings\u001b[39m.\u001b[39mwarn(message, \u001b[39mDeprecationWarning\u001b[39;00m)\n\u001b[0;32m 808\u001b[0m request \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mbuild_request(\n\u001b[0;32m 809\u001b[0m method\u001b[39m=\u001b[39mmethod,\n\u001b[0;32m 810\u001b[0m url\u001b[39m=\u001b[39murl,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 819\u001b[0m extensions\u001b[39m=\u001b[39mextensions,\n\u001b[0;32m 820\u001b[0m )\n\u001b[1;32m--> 821\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49msend(request, auth\u001b[39m=\u001b[39;49mauth, follow_redirects\u001b[39m=\u001b[39;49mfollow_redirects)\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_client.py:908\u001b[0m, in \u001b[0;36mClient.send\u001b[1;34m(self, request, stream, auth, follow_redirects)\u001b[0m\n\u001b[0;32m 900\u001b[0m follow_redirects \u001b[39m=\u001b[39m (\n\u001b[0;32m 901\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mfollow_redirects\n\u001b[0;32m 902\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(follow_redirects, UseClientDefault)\n\u001b[0;32m 903\u001b[0m \u001b[39melse\u001b[39;00m follow_redirects\n\u001b[0;32m 904\u001b[0m )\n\u001b[0;32m 906\u001b[0m auth \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_build_request_auth(request, auth)\n\u001b[1;32m--> 908\u001b[0m response \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_send_handling_auth(\n\u001b[0;32m 909\u001b[0m request,\n\u001b[0;32m 910\u001b[0m auth\u001b[39m=\u001b[39;49mauth,\n\u001b[0;32m 911\u001b[0m follow_redirects\u001b[39m=\u001b[39;49mfollow_redirects,\n\u001b[0;32m 912\u001b[0m history\u001b[39m=\u001b[39;49m[],\n\u001b[0;32m 913\u001b[0m )\n\u001b[0;32m 914\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 915\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m stream:\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_client.py:936\u001b[0m, in \u001b[0;36mClient._send_handling_auth\u001b[1;34m(self, request, auth, follow_redirects, history)\u001b[0m\n\u001b[0;32m 933\u001b[0m request \u001b[39m=\u001b[39m \u001b[39mnext\u001b[39m(auth_flow)\n\u001b[0;32m 935\u001b[0m \u001b[39mwhile\u001b[39;00m \u001b[39mTrue\u001b[39;00m:\n\u001b[1;32m--> 936\u001b[0m response \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_send_handling_redirects(\n\u001b[0;32m 937\u001b[0m request,\n\u001b[0;32m 938\u001b[0m follow_redirects\u001b[39m=\u001b[39;49mfollow_redirects,\n\u001b[0;32m 939\u001b[0m history\u001b[39m=\u001b[39;49mhistory,\n\u001b[0;32m 940\u001b[0m )\n\u001b[0;32m 941\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 942\u001b[0m \u001b[39mtry\u001b[39;00m:\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_client.py:973\u001b[0m, in \u001b[0;36mClient._send_handling_redirects\u001b[1;34m(self, request, follow_redirects, history)\u001b[0m\n\u001b[0;32m 970\u001b[0m \u001b[39mfor\u001b[39;00m hook \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_event_hooks[\u001b[39m\"\u001b[39m\u001b[39mrequest\u001b[39m\u001b[39m\"\u001b[39m]:\n\u001b[0;32m 971\u001b[0m hook(request)\n\u001b[1;32m--> 973\u001b[0m response \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_send_single_request(request)\n\u001b[0;32m 974\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 975\u001b[0m \u001b[39mfor\u001b[39;00m hook \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_event_hooks[\u001b[39m\"\u001b[39m\u001b[39mresponse\u001b[39m\u001b[39m\"\u001b[39m]:\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_client.py:1009\u001b[0m, in \u001b[0;36mClient._send_single_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 1004\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mRuntimeError\u001b[39;00m(\n\u001b[0;32m 1005\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mAttempted to send an async request with a sync Client instance.\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m 1006\u001b[0m )\n\u001b[0;32m 1008\u001b[0m \u001b[39mwith\u001b[39;00m request_context(request\u001b[39m=\u001b[39mrequest):\n\u001b[1;32m-> 1009\u001b[0m response \u001b[39m=\u001b[39m transport\u001b[39m.\u001b[39;49mhandle_request(request)\n\u001b[0;32m 1011\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39misinstance\u001b[39m(response\u001b[39m.\u001b[39mstream, SyncByteStream)\n\u001b[0;32m 1013\u001b[0m response\u001b[39m.\u001b[39mrequest \u001b[39m=\u001b[39m request\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_transports\\default.py:217\u001b[0m, in \u001b[0;36mHTTPTransport.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 203\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39misinstance\u001b[39m(request\u001b[39m.\u001b[39mstream, SyncByteStream)\n\u001b[0;32m 205\u001b[0m req \u001b[39m=\u001b[39m httpcore\u001b[39m.\u001b[39mRequest(\n\u001b[0;32m 206\u001b[0m method\u001b[39m=\u001b[39mrequest\u001b[39m.\u001b[39mmethod,\n\u001b[0;32m 207\u001b[0m url\u001b[39m=\u001b[39mhttpcore\u001b[39m.\u001b[39mURL(\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 215\u001b[0m extensions\u001b[39m=\u001b[39mrequest\u001b[39m.\u001b[39mextensions,\n\u001b[0;32m 216\u001b[0m )\n\u001b[1;32m--> 217\u001b[0m \u001b[39mwith\u001b[39;00m map_httpcore_exceptions():\n\u001b[0;32m 218\u001b[0m resp \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_pool\u001b[39m.\u001b[39mhandle_request(req)\n\u001b[0;32m 220\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39misinstance\u001b[39m(resp\u001b[39m.\u001b[39mstream, typing\u001b[39m.\u001b[39mIterable)\n", + "File \u001b[1;32mC:\\Program Files\\WindowsApps\\PythonSoftwareFoundation.Python.3.10_3.10.3056.0_x64__qbz5n2kfra8p0\\lib\\contextlib.py:153\u001b[0m, in \u001b[0;36m_GeneratorContextManager.__exit__\u001b[1;34m(self, typ, value, traceback)\u001b[0m\n\u001b[0;32m 151\u001b[0m value \u001b[39m=\u001b[39m typ()\n\u001b[0;32m 152\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m--> 153\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mgen\u001b[39m.\u001b[39;49mthrow(typ, value, traceback)\n\u001b[0;32m 154\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mStopIteration\u001b[39;00m \u001b[39mas\u001b[39;00m exc:\n\u001b[0;32m 155\u001b[0m \u001b[39m# Suppress StopIteration *unless* it's the same exception that\u001b[39;00m\n\u001b[0;32m 156\u001b[0m \u001b[39m# was passed to throw(). This prevents a StopIteration\u001b[39;00m\n\u001b[0;32m 157\u001b[0m \u001b[39m# raised inside the \"with\" statement from being suppressed.\u001b[39;00m\n\u001b[0;32m 158\u001b[0m \u001b[39mreturn\u001b[39;00m exc \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m value\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_transports\\default.py:77\u001b[0m, in \u001b[0;36mmap_httpcore_exceptions\u001b[1;34m()\u001b[0m\n\u001b[0;32m 74\u001b[0m \u001b[39mraise\u001b[39;00m\n\u001b[0;32m 76\u001b[0m message \u001b[39m=\u001b[39m \u001b[39mstr\u001b[39m(exc)\n\u001b[1;32m---> 77\u001b[0m \u001b[39mraise\u001b[39;00m mapped_exc(message) \u001b[39mfrom\u001b[39;00m \u001b[39mexc\u001b[39;00m\n", + "\u001b[1;31mReadTimeout\u001b[0m: The read operation timed out" + ] + } + ], + "source": [ + "response = supabase_client.table(\"llm-convo-monitor\").select(\"*\").execute()\n", + "data = response.data\n", + "df = pd.DataFrame(data)\n", + "len(df)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Mapping Query-Response Pairs to Nomic" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2\n", + "user queries: 1\n", + "metadata 1\n", + "------------------------\n" + ] + } + ], + "source": [ + "user_queries = []\n", + "metadata = []\n", + "i = 1\n", + "for convo in course_df:\n", + " messages = convo['messages']\n", + " print(len(messages))\n", + " pair_check = 0\n", + " for message in messages:\n", + " if message['role'] == 'user' and pair_check == 0:\n", + " query = message['content']\n", + " metadata_row = {'course': course, 'query': message['content']}\n", + " #print(\"metadata row: \", metadata_row)\n", + " pair_check += 1\n", + " if message['role'] == 'assistant' and pair_check == 1:\n", + " metadata_row['response'] = message['content']\n", + " metadata_row['id'] = i \n", + " #print(\"response metadata row: \", metadata_row)\n", + " i += 1\n", + " pair_check += 1\n", + " if pair_check == 2:\n", + " # one conversation pair is complete\n", + " user_queries.append(query)\n", + " metadata.append(metadata_row)\n", + " pair_check = 0\n", + "\n", + " \n", + "print(\"user queries: \", len(user_queries))\n", + "print(\"metadata\", len(metadata))\n", + "print(\"------------------------\")\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(590, 1536)\n" + ] + } + ], + "source": [ + "# initialize langchain OpenAI embeddings model\n", + "embeddings_model = OpenAIEmbeddings()\n", + "metadata = pd.DataFrame(metadata)\n", + "embeddings = embeddings_model.embed_documents(user_queries)\n", + "embeddings = np.array(embeddings)\n", + "print(embeddings.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-31 19:55:40.276 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for ECE408FA23` in organization `dabholkar.asmita`\n", + "2023-08-31 19:55:41.466 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", + "2023-08-31 19:55:41.491 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", + "1it [00:02, 2.43s/it]\n", + "2023-08-31 19:55:43.932 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", + "2023-08-31 19:55:43.932 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", + "2023-08-31 19:55:45.475 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for ECE408FA23` in project `Query-Response Map for ECE408FA23`: https://atlas.nomic.ai/map/d8e02f68-501e-4a25-9060-502710c082ce/b55cdca3-3ff4-454e-8f1b-a73ebc22cdff\n", + "2023-08-31 19:55:45.480 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for ECE408FA23: https://atlas.nomic.ai/map/d8e02f68-501e-4a25-9060-502710c082ce/b55cdca3-3ff4-454e-8f1b-a73ebc22cdff\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Query-Response Map for ECE408FA23: https://atlas.nomic.ai/map/d8e02f68-501e-4a25-9060-502710c082ce/b55cdca3-3ff4-454e-8f1b-a73ebc22cdff]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-31 19:55:47.813 | INFO | nomic.project:create_index:1111 - Created map `ECE408FA23_qr_index` in project `Query-Response Map for ECE408FA23`: https://atlas.nomic.ai/map/d8e02f68-501e-4a25-9060-502710c082ce/ff7276fc-942f-45cd-a199-e19a6e941db1\n" + ] + }, + { + "data": { + "text/html": [ + "Atlas Projection ECE408FA23_qr_index. Status Topic Modeling. view online" + ], + "text/plain": [ + "ECE408FA23_qr_index: https://atlas.nomic.ai/map/d8e02f68-501e-4a25-9060-502710c082ce/ff7276fc-942f-45cd-a199-e19a6e941db1" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# create an Atlas project\n", + "project_name = \"Query-Response Map for \" + course\n", + "index_name = course + \"_qr_index\"\n", + "project = atlas.map_embeddings(embeddings=np.array(embeddings),\n", + " data=metadata,\n", + " id_field='id',\n", + " build_topic_model=True,\n", + " topic_label_field='query',\n", + " name=project_name,\n", + " colorable_fields=['query'])\n", + "print(project.maps)\n", + "\n", + "project.create_index(index_name, build_topic_model=True)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "ename": "ReadTimeout", + "evalue": "The read operation timed out", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mTimeoutError\u001b[0m Traceback (most recent call last)", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_exceptions.py:10\u001b[0m, in \u001b[0;36mmap_exceptions\u001b[1;34m(map)\u001b[0m\n\u001b[0;32m 9\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m---> 10\u001b[0m \u001b[39myield\u001b[39;00m\n\u001b[0;32m 11\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m \u001b[39mas\u001b[39;00m exc: \u001b[39m# noqa: PIE786\u001b[39;00m\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\backends\\sync.py:28\u001b[0m, in \u001b[0;36mSyncStream.read\u001b[1;34m(self, max_bytes, timeout)\u001b[0m\n\u001b[0;32m 27\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_sock\u001b[39m.\u001b[39msettimeout(timeout)\n\u001b[1;32m---> 28\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_sock\u001b[39m.\u001b[39;49mrecv(max_bytes)\n", + "File \u001b[1;32mC:\\Program Files\\WindowsApps\\PythonSoftwareFoundation.Python.3.10_3.10.3056.0_x64__qbz5n2kfra8p0\\lib\\ssl.py:1259\u001b[0m, in \u001b[0;36mSSLSocket.recv\u001b[1;34m(self, buflen, flags)\u001b[0m\n\u001b[0;32m 1256\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\n\u001b[0;32m 1257\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mnon-zero flags not allowed in calls to recv() on \u001b[39m\u001b[39m%s\u001b[39;00m\u001b[39m\"\u001b[39m \u001b[39m%\u001b[39m\n\u001b[0;32m 1258\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m\u001b[39m__class__\u001b[39m)\n\u001b[1;32m-> 1259\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mread(buflen)\n\u001b[0;32m 1260\u001b[0m \u001b[39melse\u001b[39;00m:\n", + "File \u001b[1;32mC:\\Program Files\\WindowsApps\\PythonSoftwareFoundation.Python.3.10_3.10.3056.0_x64__qbz5n2kfra8p0\\lib\\ssl.py:1132\u001b[0m, in \u001b[0;36mSSLSocket.read\u001b[1;34m(self, len, buffer)\u001b[0m\n\u001b[0;32m 1131\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m-> 1132\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_sslobj\u001b[39m.\u001b[39;49mread(\u001b[39mlen\u001b[39;49m)\n\u001b[0;32m 1133\u001b[0m \u001b[39mexcept\u001b[39;00m SSLError \u001b[39mas\u001b[39;00m x:\n", + "\u001b[1;31mTimeoutError\u001b[0m: The read operation timed out", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001b[1;31mReadTimeout\u001b[0m Traceback (most recent call last)", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_transports\\default.py:60\u001b[0m, in \u001b[0;36mmap_httpcore_exceptions\u001b[1;34m()\u001b[0m\n\u001b[0;32m 59\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m---> 60\u001b[0m \u001b[39myield\u001b[39;00m\n\u001b[0;32m 61\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m \u001b[39mas\u001b[39;00m exc: \u001b[39m# noqa: PIE-786\u001b[39;00m\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_transports\\default.py:218\u001b[0m, in \u001b[0;36mHTTPTransport.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 217\u001b[0m \u001b[39mwith\u001b[39;00m map_httpcore_exceptions():\n\u001b[1;32m--> 218\u001b[0m resp \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_pool\u001b[39m.\u001b[39;49mhandle_request(req)\n\u001b[0;32m 220\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39misinstance\u001b[39m(resp\u001b[39m.\u001b[39mstream, typing\u001b[39m.\u001b[39mIterable)\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_sync\\connection_pool.py:253\u001b[0m, in \u001b[0;36mConnectionPool.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 252\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mresponse_closed(status)\n\u001b[1;32m--> 253\u001b[0m \u001b[39mraise\u001b[39;00m exc\n\u001b[0;32m 254\u001b[0m \u001b[39melse\u001b[39;00m:\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_sync\\connection_pool.py:237\u001b[0m, in \u001b[0;36mConnectionPool.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 236\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m--> 237\u001b[0m response \u001b[39m=\u001b[39m connection\u001b[39m.\u001b[39;49mhandle_request(request)\n\u001b[0;32m 238\u001b[0m \u001b[39mexcept\u001b[39;00m ConnectionNotAvailable:\n\u001b[0;32m 239\u001b[0m \u001b[39m# The ConnectionNotAvailable exception is a special case, that\u001b[39;00m\n\u001b[0;32m 240\u001b[0m \u001b[39m# indicates we need to retry the request on a new connection.\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 244\u001b[0m \u001b[39m# might end up as an HTTP/2 connection, but which actually ends\u001b[39;00m\n\u001b[0;32m 245\u001b[0m \u001b[39m# up as HTTP/1.1.\u001b[39;00m\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_sync\\connection.py:90\u001b[0m, in \u001b[0;36mHTTPConnection.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 88\u001b[0m \u001b[39mraise\u001b[39;00m ConnectionNotAvailable()\n\u001b[1;32m---> 90\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_connection\u001b[39m.\u001b[39;49mhandle_request(request)\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_sync\\http11.py:112\u001b[0m, in \u001b[0;36mHTTP11Connection.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 111\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_response_closed()\n\u001b[1;32m--> 112\u001b[0m \u001b[39mraise\u001b[39;00m exc\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_sync\\http11.py:91\u001b[0m, in \u001b[0;36mHTTP11Connection.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 83\u001b[0m \u001b[39mwith\u001b[39;00m Trace(\n\u001b[0;32m 84\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mhttp11.receive_response_headers\u001b[39m\u001b[39m\"\u001b[39m, request, kwargs\n\u001b[0;32m 85\u001b[0m ) \u001b[39mas\u001b[39;00m trace:\n\u001b[0;32m 86\u001b[0m (\n\u001b[0;32m 87\u001b[0m http_version,\n\u001b[0;32m 88\u001b[0m status,\n\u001b[0;32m 89\u001b[0m reason_phrase,\n\u001b[0;32m 90\u001b[0m headers,\n\u001b[1;32m---> 91\u001b[0m ) \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_receive_response_headers(\u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n\u001b[0;32m 92\u001b[0m trace\u001b[39m.\u001b[39mreturn_value \u001b[39m=\u001b[39m (\n\u001b[0;32m 93\u001b[0m http_version,\n\u001b[0;32m 94\u001b[0m status,\n\u001b[0;32m 95\u001b[0m reason_phrase,\n\u001b[0;32m 96\u001b[0m headers,\n\u001b[0;32m 97\u001b[0m )\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_sync\\http11.py:155\u001b[0m, in \u001b[0;36mHTTP11Connection._receive_response_headers\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 154\u001b[0m \u001b[39mwhile\u001b[39;00m \u001b[39mTrue\u001b[39;00m:\n\u001b[1;32m--> 155\u001b[0m event \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_receive_event(timeout\u001b[39m=\u001b[39;49mtimeout)\n\u001b[0;32m 156\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(event, h11\u001b[39m.\u001b[39mResponse):\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_sync\\http11.py:191\u001b[0m, in \u001b[0;36mHTTP11Connection._receive_event\u001b[1;34m(self, timeout)\u001b[0m\n\u001b[0;32m 190\u001b[0m \u001b[39mif\u001b[39;00m event \u001b[39mis\u001b[39;00m h11\u001b[39m.\u001b[39mNEED_DATA:\n\u001b[1;32m--> 191\u001b[0m data \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_network_stream\u001b[39m.\u001b[39;49mread(\n\u001b[0;32m 192\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mREAD_NUM_BYTES, timeout\u001b[39m=\u001b[39;49mtimeout\n\u001b[0;32m 193\u001b[0m )\n\u001b[0;32m 195\u001b[0m \u001b[39m# If we feed this case through h11 we'll raise an exception like:\u001b[39;00m\n\u001b[0;32m 196\u001b[0m \u001b[39m#\u001b[39;00m\n\u001b[0;32m 197\u001b[0m \u001b[39m# httpcore.RemoteProtocolError: can't handle event type\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 201\u001b[0m \u001b[39m# perspective. Instead we handle this case distinctly and treat\u001b[39;00m\n\u001b[0;32m 202\u001b[0m \u001b[39m# it as a ConnectError.\u001b[39;00m\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\backends\\sync.py:26\u001b[0m, in \u001b[0;36mSyncStream.read\u001b[1;34m(self, max_bytes, timeout)\u001b[0m\n\u001b[0;32m 25\u001b[0m exc_map: ExceptionMapping \u001b[39m=\u001b[39m {socket\u001b[39m.\u001b[39mtimeout: ReadTimeout, \u001b[39mOSError\u001b[39;00m: ReadError}\n\u001b[1;32m---> 26\u001b[0m \u001b[39mwith\u001b[39;00m map_exceptions(exc_map):\n\u001b[0;32m 27\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_sock\u001b[39m.\u001b[39msettimeout(timeout)\n", + "File \u001b[1;32mC:\\Program Files\\WindowsApps\\PythonSoftwareFoundation.Python.3.10_3.10.3056.0_x64__qbz5n2kfra8p0\\lib\\contextlib.py:153\u001b[0m, in \u001b[0;36m_GeneratorContextManager.__exit__\u001b[1;34m(self, typ, value, traceback)\u001b[0m\n\u001b[0;32m 152\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m--> 153\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mgen\u001b[39m.\u001b[39;49mthrow(typ, value, traceback)\n\u001b[0;32m 154\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mStopIteration\u001b[39;00m \u001b[39mas\u001b[39;00m exc:\n\u001b[0;32m 155\u001b[0m \u001b[39m# Suppress StopIteration *unless* it's the same exception that\u001b[39;00m\n\u001b[0;32m 156\u001b[0m \u001b[39m# was passed to throw(). This prevents a StopIteration\u001b[39;00m\n\u001b[0;32m 157\u001b[0m \u001b[39m# raised inside the \"with\" statement from being suppressed.\u001b[39;00m\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_exceptions.py:14\u001b[0m, in \u001b[0;36mmap_exceptions\u001b[1;34m(map)\u001b[0m\n\u001b[0;32m 13\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(exc, from_exc):\n\u001b[1;32m---> 14\u001b[0m \u001b[39mraise\u001b[39;00m to_exc(exc)\n\u001b[0;32m 15\u001b[0m \u001b[39mraise\u001b[39;00m\n", + "\u001b[1;31mReadTimeout\u001b[0m: The read operation timed out", + "\nThe above exception was the direct cause of the following exception:\n", + "\u001b[1;31mReadTimeout\u001b[0m Traceback (most recent call last)", + "\u001b[1;32mf:\\MSIM\\ML_Projects\\ai-ta-backend\\ai_ta_backend\\nomic_map_creation.ipynb Cell 19\u001b[0m line \u001b[0;36m3\n\u001b[0;32m 1\u001b[0m \u001b[39m# cell for all course map creation\u001b[39;00m\n\u001b[1;32m----> 3\u001b[0m response \u001b[39m=\u001b[39m supabase_client\u001b[39m.\u001b[39;49mtable(\u001b[39m\"\u001b[39;49m\u001b[39mllm-convo-monitor\u001b[39;49m\u001b[39m\"\u001b[39;49m)\u001b[39m.\u001b[39;49mselect(\u001b[39m\"\u001b[39;49m\u001b[39m*\u001b[39;49m\u001b[39m\"\u001b[39;49m)\u001b[39m.\u001b[39;49mexecute()\n\u001b[0;32m 4\u001b[0m data \u001b[39m=\u001b[39m response\u001b[39m.\u001b[39mdata\n\u001b[0;32m 5\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39mlen\u001b[39m(data))\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\postgrest\\_sync\\request_builder.py:55\u001b[0m, in \u001b[0;36mSyncQueryRequestBuilder.execute\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 43\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mexecute\u001b[39m(\u001b[39mself\u001b[39m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m APIResponse:\n\u001b[0;32m 44\u001b[0m \u001b[39m \u001b[39m\u001b[39m\"\"\"Execute the query.\u001b[39;00m\n\u001b[0;32m 45\u001b[0m \n\u001b[0;32m 46\u001b[0m \u001b[39m .. tip::\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 53\u001b[0m \u001b[39m :class:`APIError` If the API raised an error.\u001b[39;00m\n\u001b[0;32m 54\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[1;32m---> 55\u001b[0m r \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49msession\u001b[39m.\u001b[39;49mrequest(\n\u001b[0;32m 56\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mhttp_method,\n\u001b[0;32m 57\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mpath,\n\u001b[0;32m 58\u001b[0m json\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mjson,\n\u001b[0;32m 59\u001b[0m params\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mparams,\n\u001b[0;32m 60\u001b[0m headers\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mheaders,\n\u001b[0;32m 61\u001b[0m )\n\u001b[0;32m 62\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 63\u001b[0m \u001b[39mif\u001b[39;00m (\n\u001b[0;32m 64\u001b[0m \u001b[39m200\u001b[39m \u001b[39m<\u001b[39m\u001b[39m=\u001b[39m r\u001b[39m.\u001b[39mstatus_code \u001b[39m<\u001b[39m\u001b[39m=\u001b[39m \u001b[39m299\u001b[39m\n\u001b[0;32m 65\u001b[0m ): \u001b[39m# Response.ok from JS (https://developer.mozilla.org/en-US/docs/Web/API/Response/ok)\u001b[39;00m\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_client.py:821\u001b[0m, in \u001b[0;36mClient.request\u001b[1;34m(self, method, url, content, data, files, json, params, headers, cookies, auth, follow_redirects, timeout, extensions)\u001b[0m\n\u001b[0;32m 806\u001b[0m warnings\u001b[39m.\u001b[39mwarn(message, \u001b[39mDeprecationWarning\u001b[39;00m)\n\u001b[0;32m 808\u001b[0m request \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mbuild_request(\n\u001b[0;32m 809\u001b[0m method\u001b[39m=\u001b[39mmethod,\n\u001b[0;32m 810\u001b[0m url\u001b[39m=\u001b[39murl,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 819\u001b[0m extensions\u001b[39m=\u001b[39mextensions,\n\u001b[0;32m 820\u001b[0m )\n\u001b[1;32m--> 821\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49msend(request, auth\u001b[39m=\u001b[39;49mauth, follow_redirects\u001b[39m=\u001b[39;49mfollow_redirects)\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_client.py:908\u001b[0m, in \u001b[0;36mClient.send\u001b[1;34m(self, request, stream, auth, follow_redirects)\u001b[0m\n\u001b[0;32m 900\u001b[0m follow_redirects \u001b[39m=\u001b[39m (\n\u001b[0;32m 901\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mfollow_redirects\n\u001b[0;32m 902\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(follow_redirects, UseClientDefault)\n\u001b[0;32m 903\u001b[0m \u001b[39melse\u001b[39;00m follow_redirects\n\u001b[0;32m 904\u001b[0m )\n\u001b[0;32m 906\u001b[0m auth \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_build_request_auth(request, auth)\n\u001b[1;32m--> 908\u001b[0m response \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_send_handling_auth(\n\u001b[0;32m 909\u001b[0m request,\n\u001b[0;32m 910\u001b[0m auth\u001b[39m=\u001b[39;49mauth,\n\u001b[0;32m 911\u001b[0m follow_redirects\u001b[39m=\u001b[39;49mfollow_redirects,\n\u001b[0;32m 912\u001b[0m history\u001b[39m=\u001b[39;49m[],\n\u001b[0;32m 913\u001b[0m )\n\u001b[0;32m 914\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 915\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m stream:\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_client.py:936\u001b[0m, in \u001b[0;36mClient._send_handling_auth\u001b[1;34m(self, request, auth, follow_redirects, history)\u001b[0m\n\u001b[0;32m 933\u001b[0m request \u001b[39m=\u001b[39m \u001b[39mnext\u001b[39m(auth_flow)\n\u001b[0;32m 935\u001b[0m \u001b[39mwhile\u001b[39;00m \u001b[39mTrue\u001b[39;00m:\n\u001b[1;32m--> 936\u001b[0m response \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_send_handling_redirects(\n\u001b[0;32m 937\u001b[0m request,\n\u001b[0;32m 938\u001b[0m follow_redirects\u001b[39m=\u001b[39;49mfollow_redirects,\n\u001b[0;32m 939\u001b[0m history\u001b[39m=\u001b[39;49mhistory,\n\u001b[0;32m 940\u001b[0m )\n\u001b[0;32m 941\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 942\u001b[0m \u001b[39mtry\u001b[39;00m:\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_client.py:973\u001b[0m, in \u001b[0;36mClient._send_handling_redirects\u001b[1;34m(self, request, follow_redirects, history)\u001b[0m\n\u001b[0;32m 970\u001b[0m \u001b[39mfor\u001b[39;00m hook \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_event_hooks[\u001b[39m\"\u001b[39m\u001b[39mrequest\u001b[39m\u001b[39m\"\u001b[39m]:\n\u001b[0;32m 971\u001b[0m hook(request)\n\u001b[1;32m--> 973\u001b[0m response \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_send_single_request(request)\n\u001b[0;32m 974\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 975\u001b[0m \u001b[39mfor\u001b[39;00m hook \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_event_hooks[\u001b[39m\"\u001b[39m\u001b[39mresponse\u001b[39m\u001b[39m\"\u001b[39m]:\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_client.py:1009\u001b[0m, in \u001b[0;36mClient._send_single_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 1004\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mRuntimeError\u001b[39;00m(\n\u001b[0;32m 1005\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mAttempted to send an async request with a sync Client instance.\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m 1006\u001b[0m )\n\u001b[0;32m 1008\u001b[0m \u001b[39mwith\u001b[39;00m request_context(request\u001b[39m=\u001b[39mrequest):\n\u001b[1;32m-> 1009\u001b[0m response \u001b[39m=\u001b[39m transport\u001b[39m.\u001b[39;49mhandle_request(request)\n\u001b[0;32m 1011\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39misinstance\u001b[39m(response\u001b[39m.\u001b[39mstream, SyncByteStream)\n\u001b[0;32m 1013\u001b[0m response\u001b[39m.\u001b[39mrequest \u001b[39m=\u001b[39m request\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_transports\\default.py:217\u001b[0m, in \u001b[0;36mHTTPTransport.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 203\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39misinstance\u001b[39m(request\u001b[39m.\u001b[39mstream, SyncByteStream)\n\u001b[0;32m 205\u001b[0m req \u001b[39m=\u001b[39m httpcore\u001b[39m.\u001b[39mRequest(\n\u001b[0;32m 206\u001b[0m method\u001b[39m=\u001b[39mrequest\u001b[39m.\u001b[39mmethod,\n\u001b[0;32m 207\u001b[0m url\u001b[39m=\u001b[39mhttpcore\u001b[39m.\u001b[39mURL(\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 215\u001b[0m extensions\u001b[39m=\u001b[39mrequest\u001b[39m.\u001b[39mextensions,\n\u001b[0;32m 216\u001b[0m )\n\u001b[1;32m--> 217\u001b[0m \u001b[39mwith\u001b[39;00m map_httpcore_exceptions():\n\u001b[0;32m 218\u001b[0m resp \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_pool\u001b[39m.\u001b[39mhandle_request(req)\n\u001b[0;32m 220\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39misinstance\u001b[39m(resp\u001b[39m.\u001b[39mstream, typing\u001b[39m.\u001b[39mIterable)\n", + "File \u001b[1;32mC:\\Program Files\\WindowsApps\\PythonSoftwareFoundation.Python.3.10_3.10.3056.0_x64__qbz5n2kfra8p0\\lib\\contextlib.py:153\u001b[0m, in \u001b[0;36m_GeneratorContextManager.__exit__\u001b[1;34m(self, typ, value, traceback)\u001b[0m\n\u001b[0;32m 151\u001b[0m value \u001b[39m=\u001b[39m typ()\n\u001b[0;32m 152\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m--> 153\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mgen\u001b[39m.\u001b[39;49mthrow(typ, value, traceback)\n\u001b[0;32m 154\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mStopIteration\u001b[39;00m \u001b[39mas\u001b[39;00m exc:\n\u001b[0;32m 155\u001b[0m \u001b[39m# Suppress StopIteration *unless* it's the same exception that\u001b[39;00m\n\u001b[0;32m 156\u001b[0m \u001b[39m# was passed to throw(). This prevents a StopIteration\u001b[39;00m\n\u001b[0;32m 157\u001b[0m \u001b[39m# raised inside the \"with\" statement from being suppressed.\u001b[39;00m\n\u001b[0;32m 158\u001b[0m \u001b[39mreturn\u001b[39;00m exc \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m value\n", + "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_transports\\default.py:77\u001b[0m, in \u001b[0;36mmap_httpcore_exceptions\u001b[1;34m()\u001b[0m\n\u001b[0;32m 74\u001b[0m \u001b[39mraise\u001b[39;00m\n\u001b[0;32m 76\u001b[0m message \u001b[39m=\u001b[39m \u001b[39mstr\u001b[39m(exc)\n\u001b[1;32m---> 77\u001b[0m \u001b[39mraise\u001b[39;00m mapped_exc(message) \u001b[39mfrom\u001b[39;00m \u001b[39mexc\u001b[39;00m\n", + "\u001b[1;31mReadTimeout\u001b[0m: The read operation timed out" + ] + } + ], + "source": [ + "# cell for all course map creation\n", + "\n", + "response = supabase_client.table(\"llm-convo-monitor\").select(\"*\").execute()\n", + "data = response.data\n", + "print(len(data))" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "126" + ] + }, + "execution_count": 53, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.DataFrame(data)\n", + "course_names = df['course_name'].unique()\n", + "len(course_names)" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "course name: badm_550_ashley\n", + "(51, 1536)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:26:29.701 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for badm_550_ashley` in organization `dabholkar.asmita`\n", + "2023-08-30 15:26:31.242 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", + "2023-08-30 15:26:31.255 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", + "1it [00:00, 1.03it/s]\n", + "2023-08-30 15:26:32.239 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", + "2023-08-30 15:26:32.241 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", + "2023-08-30 15:26:33.498 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for badm_550_ashley` in project `Query-Response Map for badm_550_ashley`: https://atlas.nomic.ai/map/e7fe1faf-be7a-4ca7-b89b-22863fffe432/79d4480a-c4e5-4920-95cd-ed831d45f5d0\n", + "2023-08-30 15:26:33.500 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for badm_550_ashley: https://atlas.nomic.ai/map/e7fe1faf-be7a-4ca7-b89b-22863fffe432/79d4480a-c4e5-4920-95cd-ed831d45f5d0\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Query-Response Map for badm_550_ashley: https://atlas.nomic.ai/map/e7fe1faf-be7a-4ca7-b89b-22863fffe432/79d4480a-c4e5-4920-95cd-ed831d45f5d0]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:26:35.272 | INFO | nomic.project:create_index:1111 - Created map `badm_550_ashley_qr_index` in project `Query-Response Map for badm_550_ashley`: https://atlas.nomic.ai/map/e7fe1faf-be7a-4ca7-b89b-22863fffe432/2b997f7c-0084-4db7-8e9a-76eeb62d715b\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "course name: ece120\n", + "(298, 1536)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:26:42.765 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for ece120` in organization `dabholkar.asmita`\n", + "2023-08-30 15:26:43.831 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", + "2023-08-30 15:26:43.850 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", + "1it [00:01, 1.20s/it]\n", + "2023-08-30 15:26:45.059 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", + "2023-08-30 15:26:45.063 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", + "2023-08-30 15:26:46.221 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for ece120` in project `Query-Response Map for ece120`: https://atlas.nomic.ai/map/f35d2617-d029-4a29-ba4a-27a82088be24/0c8e9404-be65-4780-bba3-e3b44f14de9b\n", + "2023-08-30 15:26:46.230 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for ece120: https://atlas.nomic.ai/map/f35d2617-d029-4a29-ba4a-27a82088be24/0c8e9404-be65-4780-bba3-e3b44f14de9b\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Query-Response Map for ece120: https://atlas.nomic.ai/map/f35d2617-d029-4a29-ba4a-27a82088be24/0c8e9404-be65-4780-bba3-e3b44f14de9b]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:26:48.040 | INFO | nomic.project:create_index:1111 - Created map `ece120_qr_index` in project `Query-Response Map for ece120`: https://atlas.nomic.ai/map/f35d2617-d029-4a29-ba4a-27a82088be24/61277149-b12d-4b59-8bcd-e9dd29fc58a4\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "course name: badm-567-v3\n", + "(27, 1536)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:26:52.367 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for badm-567-v3` in organization `dabholkar.asmita`\n", + "2023-08-30 15:26:53.227 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", + "2023-08-30 15:26:53.236 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", + "1it [00:00, 1.07it/s]\n", + "2023-08-30 15:26:54.177 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", + "2023-08-30 15:26:54.185 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", + "2023-08-30 15:26:55.379 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for badm-567-v3` in project `Query-Response Map for badm-567-v3`: https://atlas.nomic.ai/map/022b4521-24df-401d-beff-bba4f881590f/adabe0a1-d147-462a-8992-72af89cb5ae2\n", + "2023-08-30 15:26:55.379 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for badm-567-v3: https://atlas.nomic.ai/map/022b4521-24df-401d-beff-bba4f881590f/adabe0a1-d147-462a-8992-72af89cb5ae2\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Query-Response Map for badm-567-v3: https://atlas.nomic.ai/map/022b4521-24df-401d-beff-bba4f881590f/adabe0a1-d147-462a-8992-72af89cb5ae2]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:26:57.012 | INFO | nomic.project:create_index:1111 - Created map `badm-567-v3_qr_index` in project `Query-Response Map for badm-567-v3`: https://atlas.nomic.ai/map/022b4521-24df-401d-beff-bba4f881590f/e6c9798b-c154-43e7-917e-dd5cb71f116f\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "course name: new-weather\n", + "(98, 1536)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:27:02.087 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for new-weather` in organization `dabholkar.asmita`\n", + "2023-08-30 15:27:03.117 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", + "2023-08-30 15:27:03.125 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", + "1it [00:00, 1.07it/s]\n", + "2023-08-30 15:27:04.071 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", + "2023-08-30 15:27:04.071 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", + "2023-08-30 15:27:05.459 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for new-weather` in project `Query-Response Map for new-weather`: https://atlas.nomic.ai/map/48db10b9-0a6e-4c43-83f7-f7fa2b406257/fd200444-86cb-4b1c-9be6-ebd0262e2428\n", + "2023-08-30 15:27:05.461 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for new-weather: https://atlas.nomic.ai/map/48db10b9-0a6e-4c43-83f7-f7fa2b406257/fd200444-86cb-4b1c-9be6-ebd0262e2428\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Query-Response Map for new-weather: https://atlas.nomic.ai/map/48db10b9-0a6e-4c43-83f7-f7fa2b406257/fd200444-86cb-4b1c-9be6-ebd0262e2428]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:27:07.001 | INFO | nomic.project:create_index:1111 - Created map `new-weather_qr_index` in project `Query-Response Map for new-weather`: https://atlas.nomic.ai/map/48db10b9-0a6e-4c43-83f7-f7fa2b406257/ff717c79-50cd-468b-9fcc-b391c8c167df\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "course name: gies-online-mba-v2\n", + "(52, 1536)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:27:10.946 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for gies-online-mba-v2` in organization `dabholkar.asmita`\n", + "2023-08-30 15:27:11.862 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", + "2023-08-30 15:27:11.868 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", + "1it [00:00, 1.33it/s]\n", + "2023-08-30 15:27:12.630 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", + "2023-08-30 15:27:12.634 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", + "2023-08-30 15:27:13.627 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for gies-online-mba-v2` in project `Query-Response Map for gies-online-mba-v2`: https://atlas.nomic.ai/map/8699b44f-8e2a-40e6-9562-0d2fc2898c3d/87144fbd-48a3-4d00-9136-ddc177685aa1\n", + "2023-08-30 15:27:13.627 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for gies-online-mba-v2: https://atlas.nomic.ai/map/8699b44f-8e2a-40e6-9562-0d2fc2898c3d/87144fbd-48a3-4d00-9136-ddc177685aa1\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Query-Response Map for gies-online-mba-v2: https://atlas.nomic.ai/map/8699b44f-8e2a-40e6-9562-0d2fc2898c3d/87144fbd-48a3-4d00-9136-ddc177685aa1]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:27:15.546 | INFO | nomic.project:create_index:1111 - Created map `gies-online-mba-v2_qr_index` in project `Query-Response Map for gies-online-mba-v2`: https://atlas.nomic.ai/map/8699b44f-8e2a-40e6-9562-0d2fc2898c3d/db13ea7a-f93d-4f97-b922-c51216d3d6e9\n", + "2023-08-30 15:27:15,670:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:27:15,673:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", + "2023-08-30 15:27:20,003:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:27:20,003:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "course name: frontend\n", + "(24, 1536)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:27:28.373 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for frontend` in organization `dabholkar.asmita`\n", + "2023-08-30 15:27:29.396 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", + "2023-08-30 15:27:29.405 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", + "1it [00:00, 1.11it/s]\n", + "2023-08-30 15:27:30.325 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", + "2023-08-30 15:27:30.325 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", + "2023-08-30 15:27:31.539 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for frontend` in project `Query-Response Map for frontend`: https://atlas.nomic.ai/map/3a8ff45a-ca2d-4750-ab1b-ea033d35580d/e427ecf0-339a-41bf-8f9a-dc7a2930ac70\n", + "2023-08-30 15:27:31.542 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for frontend: https://atlas.nomic.ai/map/3a8ff45a-ca2d-4750-ab1b-ea033d35580d/e427ecf0-339a-41bf-8f9a-dc7a2930ac70\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Query-Response Map for frontend: https://atlas.nomic.ai/map/3a8ff45a-ca2d-4750-ab1b-ea033d35580d/e427ecf0-339a-41bf-8f9a-dc7a2930ac70]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:27:33.028 | INFO | nomic.project:create_index:1111 - Created map `frontend_qr_index` in project `Query-Response Map for frontend`: https://atlas.nomic.ai/map/3a8ff45a-ca2d-4750-ab1b-ea033d35580d/006903b0-bb82-4432-9975-ff7c9ca80af9\n", + "2023-08-30 15:27:33,166:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:27:33,166:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", + "2023-08-30 15:27:37,279:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:27:37,281:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", + "2023-08-30 15:27:41,477:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:27:41,481:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "course name: ECE220FA23\n", + "(193, 1536)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:27:50.988 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for ECE220FA23` in organization `dabholkar.asmita`\n", + "2023-08-30 15:27:51.867 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", + "2023-08-30 15:27:51.878 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", + "1it [00:01, 1.01s/it]\n", + "2023-08-30 15:27:52.904 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", + "2023-08-30 15:27:52.908 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", + "2023-08-30 15:27:53.929 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for ECE220FA23` in project `Query-Response Map for ECE220FA23`: https://atlas.nomic.ai/map/29b6f987-ee46-40be-808e-3d79a56d1b04/6b9b60f6-e2b7-4863-911c-fe84a63b35a1\n", + "2023-08-30 15:27:53.929 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for ECE220FA23: https://atlas.nomic.ai/map/29b6f987-ee46-40be-808e-3d79a56d1b04/6b9b60f6-e2b7-4863-911c-fe84a63b35a1\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Query-Response Map for ECE220FA23: https://atlas.nomic.ai/map/29b6f987-ee46-40be-808e-3d79a56d1b04/6b9b60f6-e2b7-4863-911c-fe84a63b35a1]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:27:55.645 | INFO | nomic.project:create_index:1111 - Created map `ECE220FA23_qr_index` in project `Query-Response Map for ECE220FA23`: https://atlas.nomic.ai/map/29b6f987-ee46-40be-808e-3d79a56d1b04/d8163c57-a2e8-41ca-90fc-043c8a9469b3\n", + "2023-08-30 15:27:55,758:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:27:55,759:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", + "2023-08-30 15:27:59,841:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:27:59,841:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "course name: Snowmass\n", + "(23, 1536)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:28:08.067 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for Snowmass` in organization `dabholkar.asmita`\n", + "2023-08-30 15:28:09.006 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", + "2023-08-30 15:28:09.014 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", + "1it [00:00, 1.20it/s]\n", + "2023-08-30 15:28:09.854 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", + "2023-08-30 15:28:09.858 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", + "2023-08-30 15:28:10.994 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for Snowmass` in project `Query-Response Map for Snowmass`: https://atlas.nomic.ai/map/aa415361-5d23-44ba-ada2-252ed1dfe9a0/51de6990-9d60-4cc4-9a8e-46bc7de97a0e\n", + "2023-08-30 15:28:10.994 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for Snowmass: https://atlas.nomic.ai/map/aa415361-5d23-44ba-ada2-252ed1dfe9a0/51de6990-9d60-4cc4-9a8e-46bc7de97a0e\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Query-Response Map for Snowmass: https://atlas.nomic.ai/map/aa415361-5d23-44ba-ada2-252ed1dfe9a0/51de6990-9d60-4cc4-9a8e-46bc7de97a0e]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:28:12.968 | INFO | nomic.project:create_index:1111 - Created map `Snowmass_qr_index` in project `Query-Response Map for Snowmass`: https://atlas.nomic.ai/map/aa415361-5d23-44ba-ada2-252ed1dfe9a0/4fdea35b-cda2-434e-afd1-e46e01430a97\n", + "2023-08-30 15:28:13,066:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:28:13,068:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", + "2023-08-30 15:28:17,200:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:28:17,200:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", + "2023-08-30 15:28:21,297:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:28:21,297:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "course name: NPRE247\n", + "(54, 1536)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:28:29.951 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for NPRE247` in organization `dabholkar.asmita`\n", + "2023-08-30 15:28:31.043 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", + "2023-08-30 15:28:31.051 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", + "1it [00:01, 1.64s/it]\n", + "2023-08-30 15:28:32.709 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", + "2023-08-30 15:28:32.714 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", + "2023-08-30 15:28:33.787 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for NPRE247` in project `Query-Response Map for NPRE247`: https://atlas.nomic.ai/map/3ae13050-5e00-4f2b-9dd4-f05e98945405/d06b7c47-850c-48f4-a5f9-984fbb0ca424\n", + "2023-08-30 15:28:33.790 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for NPRE247: https://atlas.nomic.ai/map/3ae13050-5e00-4f2b-9dd4-f05e98945405/d06b7c47-850c-48f4-a5f9-984fbb0ca424\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Query-Response Map for NPRE247: https://atlas.nomic.ai/map/3ae13050-5e00-4f2b-9dd4-f05e98945405/d06b7c47-850c-48f4-a5f9-984fbb0ca424]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:28:35.347 | INFO | nomic.project:create_index:1111 - Created map `NPRE247_qr_index` in project `Query-Response Map for NPRE247`: https://atlas.nomic.ai/map/3ae13050-5e00-4f2b-9dd4-f05e98945405/03471048-46aa-473c-b599-0bc812c679c0\n", + "2023-08-30 15:28:35,479:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:28:35,484:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", + "2023-08-30 15:28:39,590:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:28:39,594:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "course name: your-awesome-course\n", + "(30, 1536)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:28:50.102 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for your-awesome-course` in organization `dabholkar.asmita`\n", + "2023-08-30 15:28:51.013 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", + "2023-08-30 15:28:51.022 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", + "1it [00:00, 1.10it/s]\n", + "2023-08-30 15:28:51.943 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", + "2023-08-30 15:28:51.945 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", + "2023-08-30 15:28:52.904 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for your-awesome-course` in project `Query-Response Map for your-awesome-course`: https://atlas.nomic.ai/map/dd5dfe40-c810-4402-9684-a43783ea6e52/468942cc-d46a-45a2-82a6-11fac2f2be78\n", + "2023-08-30 15:28:52.907 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for your-awesome-course: https://atlas.nomic.ai/map/dd5dfe40-c810-4402-9684-a43783ea6e52/468942cc-d46a-45a2-82a6-11fac2f2be78\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Query-Response Map for your-awesome-course: https://atlas.nomic.ai/map/dd5dfe40-c810-4402-9684-a43783ea6e52/468942cc-d46a-45a2-82a6-11fac2f2be78]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:28:54.405 | INFO | nomic.project:create_index:1111 - Created map `your-awesome-course_qr_index` in project `Query-Response Map for your-awesome-course`: https://atlas.nomic.ai/map/dd5dfe40-c810-4402-9684-a43783ea6e52/e58f20e3-fa19-4c4c-8764-a185e0691c85\n", + "2023-08-30 15:28:54,549:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:28:54,549:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", + "2023-08-30 15:28:58,646:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:28:58,653:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "course name: pract\n", + "(44, 1536)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:29:07.007 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for pract` in organization `dabholkar.asmita`\n", + "2023-08-30 15:29:08.243 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", + "2023-08-30 15:29:08.251 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", + "1it [00:01, 1.11s/it]\n", + "2023-08-30 15:29:09.368 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", + "2023-08-30 15:29:09.368 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", + "2023-08-30 15:29:10.392 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for pract` in project `Query-Response Map for pract`: https://atlas.nomic.ai/map/15b51f99-fe4b-41ec-97fd-5579884e39fb/669c8862-db60-41ad-94a9-fd815e1fdcd8\n", + "2023-08-30 15:29:10.392 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for pract: https://atlas.nomic.ai/map/15b51f99-fe4b-41ec-97fd-5579884e39fb/669c8862-db60-41ad-94a9-fd815e1fdcd8\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Query-Response Map for pract: https://atlas.nomic.ai/map/15b51f99-fe4b-41ec-97fd-5579884e39fb/669c8862-db60-41ad-94a9-fd815e1fdcd8]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:29:12.031 | INFO | nomic.project:create_index:1111 - Created map `pract_qr_index` in project `Query-Response Map for pract`: https://atlas.nomic.ai/map/15b51f99-fe4b-41ec-97fd-5579884e39fb/b15570eb-4db4-4b6f-9b4d-e80309d2dcb3\n", + "2023-08-30 15:29:12,113:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:29:12,115:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", + "2023-08-30 15:29:16,201:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:29:16,209:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", + "2023-08-30 15:29:20,282:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:29:20,285:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "course name: ece120FL22\n", + "(53, 1536)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:29:28.994 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for ece120FL22` in organization `dabholkar.asmita`\n", + "2023-08-30 15:29:29.838 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", + "2023-08-30 15:29:29.846 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", + "1it [00:00, 1.18it/s]\n", + "2023-08-30 15:29:30.708 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", + "2023-08-30 15:29:30.710 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", + "2023-08-30 15:29:31.828 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for ece120FL22` in project `Query-Response Map for ece120FL22`: https://atlas.nomic.ai/map/8e74c555-3192-451b-b4a1-2c86587df88b/888e5d82-2e4d-46ba-b920-915dc31b835b\n", + "2023-08-30 15:29:31.828 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for ece120FL22: https://atlas.nomic.ai/map/8e74c555-3192-451b-b4a1-2c86587df88b/888e5d82-2e4d-46ba-b920-915dc31b835b\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Query-Response Map for ece120FL22: https://atlas.nomic.ai/map/8e74c555-3192-451b-b4a1-2c86587df88b/888e5d82-2e4d-46ba-b920-915dc31b835b]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:29:33.354 | INFO | nomic.project:create_index:1111 - Created map `ece120FL22_qr_index` in project `Query-Response Map for ece120FL22`: https://atlas.nomic.ai/map/8e74c555-3192-451b-b4a1-2c86587df88b/eb4e553d-ecd3-4b11-9d75-468108ab08e2\n", + "2023-08-30 15:29:33,458:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:29:33,458:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", + "2023-08-30 15:29:37,544:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:29:37,545:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", + "2023-08-30 15:29:41,634:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:29:41,642:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "course name: Law794-TransactionalDraftingAlam\n", + "(21, 1536)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:29:49.618 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for Law794-TransactionalDraftingAlam` in organization `dabholkar.asmita`\n", + "2023-08-30 15:29:50.718 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", + "2023-08-30 15:29:50.731 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", + "1it [00:01, 1.11s/it]\n", + "2023-08-30 15:29:51.849 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", + "2023-08-30 15:29:51.851 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", + "2023-08-30 15:29:53.034 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for Law794-TransactionalDraftingAlam` in project `Query-Response Map for Law794-TransactionalDraftingAlam`: https://atlas.nomic.ai/map/c4328a33-8958-462e-ba30-50f342736dae/0da00b97-bf94-4805-935e-458fb5c3d76f\n", + "2023-08-30 15:29:53.034 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for Law794-TransactionalDraftingAlam: https://atlas.nomic.ai/map/c4328a33-8958-462e-ba30-50f342736dae/0da00b97-bf94-4805-935e-458fb5c3d76f\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Query-Response Map for Law794-TransactionalDraftingAlam: https://atlas.nomic.ai/map/c4328a33-8958-462e-ba30-50f342736dae/0da00b97-bf94-4805-935e-458fb5c3d76f]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:29:54.605 | INFO | nomic.project:create_index:1111 - Created map `Law794-TransactionalDraftingAlam_qr_index` in project `Query-Response Map for Law794-TransactionalDraftingAlam`: https://atlas.nomic.ai/map/c4328a33-8958-462e-ba30-50f342736dae/9792fd4e-2196-4e39-bded-cc2bfd42abbf\n", + "2023-08-30 15:29:54,728:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:29:54,731:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", + "2023-08-30 15:29:58,804:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:29:58,804:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "course name: NCSA\n", + "(84, 1536)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:30:07.528 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for NCSA` in organization `dabholkar.asmita`\n", + "2023-08-30 15:30:08.422 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", + "2023-08-30 15:30:08.431 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", + "1it [00:00, 1.09it/s]\n", + "2023-08-30 15:30:09.361 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", + "2023-08-30 15:30:09.361 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", + "2023-08-30 15:30:10.325 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for NCSA` in project `Query-Response Map for NCSA`: https://atlas.nomic.ai/map/237ff7ed-c987-4259-96e0-3c5ed6e21207/8f176512-6a1e-4c8f-9d94-5794b1d7e619\n", + "2023-08-30 15:30:10.325 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for NCSA: https://atlas.nomic.ai/map/237ff7ed-c987-4259-96e0-3c5ed6e21207/8f176512-6a1e-4c8f-9d94-5794b1d7e619\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Query-Response Map for NCSA: https://atlas.nomic.ai/map/237ff7ed-c987-4259-96e0-3c5ed6e21207/8f176512-6a1e-4c8f-9d94-5794b1d7e619]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:30:11.917 | INFO | nomic.project:create_index:1111 - Created map `NCSA_qr_index` in project `Query-Response Map for NCSA`: https://atlas.nomic.ai/map/237ff7ed-c987-4259-96e0-3c5ed6e21207/2ce836ec-557b-4037-9ebd-d3e8982c0926\n", + "2023-08-30 15:30:12,004:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:30:12,004:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", + "2023-08-30 15:30:16,092:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:30:16,092:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", + "2023-08-30 15:30:20,157:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:30:20,164:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "course name: NCSADelta\n", + "(22, 1536)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:30:28.362 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for NCSADelta` in organization `dabholkar.asmita`\n", + "2023-08-30 15:30:29.318 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", + "2023-08-30 15:30:29.326 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", + "1it [00:00, 1.10it/s]\n", + "2023-08-30 15:30:30.246 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", + "2023-08-30 15:30:30.251 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", + "2023-08-30 15:30:31.253 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for NCSADelta` in project `Query-Response Map for NCSADelta`: https://atlas.nomic.ai/map/f0c381d5-e2e9-41fb-807a-ae61bc818122/78d29eb7-be96-47c1-a856-b684c6419e34\n", + "2023-08-30 15:30:31.254 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for NCSADelta: https://atlas.nomic.ai/map/f0c381d5-e2e9-41fb-807a-ae61bc818122/78d29eb7-be96-47c1-a856-b684c6419e34\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Query-Response Map for NCSADelta: https://atlas.nomic.ai/map/f0c381d5-e2e9-41fb-807a-ae61bc818122/78d29eb7-be96-47c1-a856-b684c6419e34]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:30:33.104 | INFO | nomic.project:create_index:1111 - Created map `NCSADelta_qr_index` in project `Query-Response Map for NCSADelta`: https://atlas.nomic.ai/map/f0c381d5-e2e9-41fb-807a-ae61bc818122/b6e64fef-a829-435f-89b5-ed1a44c05202\n", + "2023-08-30 15:30:33,214:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:30:33,214:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", + "2023-08-30 15:30:37,289:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:30:37,290:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", + "2023-08-30 15:30:41,376:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", + "2023-08-30 15:30:41,382:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "course name: NuclGPT-v1\n", + "(25, 1536)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:30:49.297 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for NuclGPT-v1` in organization `dabholkar.asmita`\n", + "2023-08-30 15:30:50.216 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", + "2023-08-30 15:30:50.222 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", + "1it [00:00, 1.23it/s]\n", + "2023-08-30 15:30:51.043 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", + "2023-08-30 15:30:51.043 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", + "2023-08-30 15:30:52.360 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for NuclGPT-v1` in project `Query-Response Map for NuclGPT-v1`: https://atlas.nomic.ai/map/76043322-073c-4d8c-8c90-8e563da8564f/335097b0-a80d-4f38-8559-c6075a46a8b2\n", + "2023-08-30 15:30:52.360 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for NuclGPT-v1: https://atlas.nomic.ai/map/76043322-073c-4d8c-8c90-8e563da8564f/335097b0-a80d-4f38-8559-c6075a46a8b2\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Query-Response Map for NuclGPT-v1: https://atlas.nomic.ai/map/76043322-073c-4d8c-8c90-8e563da8564f/335097b0-a80d-4f38-8559-c6075a46a8b2]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-08-30 15:30:53.900 | INFO | nomic.project:create_index:1111 - Created map `NuclGPT-v1_qr_index` in project `Query-Response Map for NuclGPT-v1`: https://atlas.nomic.ai/map/76043322-073c-4d8c-8c90-8e563da8564f/441b7ff6-00ef-47f3-98a9-e45d327a1414\n" + ] + } + ], + "source": [ + "low_volume_courses = []\n", + "high_volume_courses = []\n", + "for course in course_names:\n", + " if course is None or course == 'ece408':\n", + " continue\n", + " \n", + " user_queries = []\n", + " metadata = []\n", + " i = 1\n", + " course_df = df[df['course_name'] == course]['convo']\n", + " for convo in course_df: # iterate through all conversations in a course\n", + " messages = convo['messages']\n", + "\n", + " # form query-response pairs out of the messages\n", + " pair_check = 0\n", + " for message in messages:\n", + " if message['role'] == 'user' and pair_check == 0:\n", + " query = message['content']\n", + " metadata_row = {'course': course, 'query': message['content']}\n", + " #print(\"metadata row: \", metadata_row)\n", + " pair_check += 1\n", + " \n", + " if message['role'] == 'assistant' and pair_check == 1:\n", + " metadata_row['response'] = message['content']\n", + " metadata_row['id'] = i \n", + " #print(\"response metadata row: \", metadata_row)\n", + " i += 1\n", + " pair_check += 1\n", + "\n", + " if pair_check == 2:\n", + " # one conversation pair is complete\n", + " user_queries.append(query)\n", + " metadata.append(metadata_row)\n", + " pair_check = 0\n", + " # after iterating every conversation in a course, create the map\n", + " if len(user_queries) < 20:\n", + " low_volume_courses.append(course)\n", + " continue\n", + "\n", + " if len(user_queries) > 500:\n", + " high_volume_courses.append(course)\n", + " continue\n", + " \n", + " metadata = pd.DataFrame(metadata)\n", + " embeddings = embeddings_model.embed_documents(user_queries)\n", + " embeddings = np.array(embeddings)\n", + " print(\"course name: \", course)\n", + " print(embeddings.shape)\n", + "\n", + " # create an Atlas project\n", + " project_name = \"Query-Response Map for \" + course\n", + " index_name = course + \"_qr_index\"\n", + " project = atlas.map_embeddings(embeddings=np.array(embeddings),\n", + " data=metadata,\n", + " id_field='id',\n", + " build_topic_model=True,\n", + " topic_label_field='query',\n", + " name=project_name,\n", + " colorable_fields=['query'])\n", + " print(project.maps)\n", + "\n", + " project.create_index(index_name, build_topic_model=True)\n", + "\n", + " \n" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "low volume courses: ['test-video-ingest', 'badm-567', 'test-video-ingest-28', 'pdeploy999', 'badm-350-summer', 'previewtesting1', 'localtest2', 'your-favorite-url', 'mantine', 'test-video-ingest-2', 'badm_567_v2', 'erpnext', 'mip', 'farmdoc_test_kastan-v1', 'personalstatement', 'hrc', 'csv', 'star_nox', 'badm_567', 'SPaRCEd', 'localdemo8', 'badm_567_thumbnails', 'chatbot', 'erp', 'extreme', 'rohan_atree', 'zotero-extreme', 'test-video-ingest-20', 'gies-online-mba2', 'gies-online-mba', 'careerassistant', 'weather', 'lillian-wang-blog', 'local-test5', 'demo-for-vyriad', 'ag-gpt-beta', 'rtest', 'previewdeploy', 'r2test', 'personal-statement', 'rohan_excel', 'langchain-python', 'langchain', 'ncsa-live-demo', 'rohan_atree_individual', 'meta11-test', 'HealthyLivingGuide', 'rohan', 'babreu', 'test-video-ingest-31', 'p', 'test-video-ingest-17', 'summary', 'test-video-ingest-3', 'test-video-ingest-27', 'lillian-wang-blog-2', 'python-magic', 'ansible2', 'ece408fa23', 'farmdoc_test_josh_v2', 'local-test3', 'automata', 'SpaceFlorida-GT', 'GBSI-GT', 'newnew_ncsa', 'canvas', 'gbsi-gt', 'meditation-tutorial', 'profit', 'ansible', 'langchain-docs', 'testing_url_metadata_josh', 'test-india-biodiversity', 'vyriad', 'irc-workplans', 'kastanasdfasdfasdf', 'BADM-567-GT', 'mdt', 'vercel', 'gies-graduate-hub', 'test-video-ingest-12', 'test-video-ingest-13', 'Gies-graduate-hub', 'test_new_supa_scrape', 'doe-ascr-2023', 'arize', 'final-meta-test', 'preview-meta-test', 'gies-online-mba-V3', 'FoF-Drawdown-from-INVPEIV-5-24-23', 'FIN574-GT', 'test-video-ingest-30', 'test', 'NCSA-v2', 'conversational', 'clowder-docs', 'DA', 'test-video-ingest-21', 'test-video-ingest-25', 'test-ingest-10', 'eric-test-course', 'farmdoc-test', 'test-video-ingest-22', 'Academic-paper', 'starwars', 'AdamDemo']\n", + "high volume courses: ['gpt4', 'ECE408FA23']\n" + ] + } + ], + "source": [ + "print(\"low volume courses: \", low_volume_courses)\n", + "print(\"high volume courses: \", high_volume_courses)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} From fb37eed3f3fff7e7556dcb3dd418db68e8194864 Mon Sep 17 00:00:00 2001 From: star-nox Date: Tue, 12 Sep 2023 23:10:05 -0500 Subject: [PATCH 52/61] testing for json error --- ai_ta_backend/nomic_logging.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ai_ta_backend/nomic_logging.py b/ai_ta_backend/nomic_logging.py index b92be51d..02053bdc 100644 --- a/ai_ta_backend/nomic_logging.py +++ b/ai_ta_backend/nomic_logging.py @@ -64,7 +64,7 @@ def log_convo_to_nomic(course_name: str, conversation) -> str: print("before delete") # delete that convo data point from Nomic - project.delete_data([prev_id]) + print(project.delete_data([prev_id])) # prep for new point first_message = prev_convo.split("\n")[1].split(": ")[1] From 90cdc5d49f8eb7b97fac042488df4b218beef0dc Mon Sep 17 00:00:00 2001 From: star-nox Date: Thu, 14 Sep 2023 17:34:45 -0500 Subject: [PATCH 53/61] testing index errors --- ai_ta_backend/nomic_logging.py | 1 + ai_ta_backend/nomic_map_creation.ipynb | 1915 ------------------------ 2 files changed, 1 insertion(+), 1915 deletions(-) delete mode 100644 ai_ta_backend/nomic_map_creation.ipynb diff --git a/ai_ta_backend/nomic_logging.py b/ai_ta_backend/nomic_logging.py index 02053bdc..a32f74f5 100644 --- a/ai_ta_backend/nomic_logging.py +++ b/ai_ta_backend/nomic_logging.py @@ -55,6 +55,7 @@ def log_convo_to_nomic(course_name: str, conversation) -> str: # store that convo metadata locally prev_data = map_metadata_df[map_metadata_df['conversation_id'] == conversation_id] prev_index = prev_data.index.values[0] + print("prev_index: ", prev_index) embeddings = map_embeddings_df[prev_index-1].reshape(1, 1536) prev_convo = prev_data['conversation'].values[0] prev_id = prev_data['id'].values[0] diff --git a/ai_ta_backend/nomic_map_creation.ipynb b/ai_ta_backend/nomic_map_creation.ipynb deleted file mode 100644 index c11b80db..00000000 --- a/ai_ta_backend/nomic_map_creation.ipynb +++ /dev/null @@ -1,1915 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [], - "source": [ - "# import required libraries\n", - "\n", - "import os\n", - "import supabase\n", - "from nomic import atlas\n", - "from dotenv import load_dotenv\n", - "from langchain.embeddings import OpenAIEmbeddings\n", - "import numpy as np\n", - "import time\n", - "import pandas as pd" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# loading environment variables\n", - "\n", - "env_path = \"../.env\"\n", - "load_dotenv(dotenv_path=env_path)" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [], - "source": [ - "# initialize supabase client\n", - "\n", - "url = os.environ.get(\"SUPABASE_URL\")\n", - "key = os.environ.get(\"SUPABASE_API_KEY\")\n", - "\n", - "supabase_client = supabase.create_client(url, key)" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idcreated_atconvoconvo_idcourse_nameuser_email
051932023-09-06T23:21:36.639848+00:00{'id': 'd6e83182-56d8-4151-b84e-a09dd84b8836',...d6e83182-56d8-4151-b84e-a09dd84b8836cropwizard-betadabholkar.asmita@gmail.com
151922023-09-06T23:04:50.982857+00:00{'id': '1390b226-2115-4078-a594-ed4bad0fb4e0',...1390b226-2115-4078-a594-ed4bad0fb4e0cropwizard-betakastanday@live.com
251742023-09-06T22:22:44.107536+00:00{'id': 'fca0cf0b-6bac-4cf6-bd4d-d3501827c947',...fca0cf0b-6bac-4cf6-bd4d-d3501827c947cropwizard-betadabholkar.asmita@gmail.com
351842023-09-06T23:01:06.796384+00:00{'id': '1572f8a1-9ceb-42f7-b39f-fe95ec2b3c0e',...1572f8a1-9ceb-42f7-b39f-fe95ec2b3c0ecropwizard-betadabholkar.asmita@gmail.com
451822023-09-06T22:58:21.66316+00:00{'id': '0c9d9873-2c52-4b12-90ec-d4a495cbf4e0',...0c9d9873-2c52-4b12-90ec-d4a495cbf4e0cropwizard-betadabholkar.asmita@gmail.com
551942023-09-06T23:22:06.786717+00:00{'id': 'd51bbdd8-c5c8-4e5b-a003-556a8ac74726',...d51bbdd8-c5c8-4e5b-a003-556a8ac74726cropwizard-betaavd6@illinois.edu
651132023-09-05T21:34:17.944623+00:00{'id': '2a39551b-7b6c-4ba0-aa77-ffc9b1316718',...2a39551b-7b6c-4ba0-aa77-ffc9b1316718cropwizard-betahetarth2@illinois.edu
753332023-09-07T22:25:52.858867+00:00{'id': 'bd3c9174-4512-4ac6-ae05-8aafa4099215',...bd3c9174-4512-4ac6-ae05-8aafa4099215cropwizard-betadabholkar.asmita@gmail.com
851082023-09-05T21:11:51.289178+00:00{'id': 'a49941c4-2049-478b-b43e-9c574cadbd05',...a49941c4-2049-478b-b43e-9c574cadbd05cropwizard-betavadve@illinois.edu
951042023-09-05T20:38:27.687893+00:00{'id': '1be7956c-d5c8-4b1f-a62f-145421a2e7f7',...1be7956c-d5c8-4b1f-a62f-145421a2e7f7cropwizard-betakastanday@live.com
1051032023-09-05T20:11:51.810222+00:00{'id': '20e6e160-6fd3-4e0a-82b7-98457f2ff1c3',...20e6e160-6fd3-4e0a-82b7-98457f2ff1c3cropwizard-betakastanday@live.com
1150982023-09-05T19:17:36.190976+00:00{'id': 'fbd19bd0-373b-4a9c-b58c-a47cbd4e9e1c',...fbd19bd0-373b-4a9c-b58c-a47cbd4e9e1ccropwizard-betadabholkar.asmita@gmail.com
1250422023-09-02T14:43:02.160608+00:00{'id': '8df8a436-adbf-441a-92ec-f6d926aee789',...8df8a436-adbf-441a-92ec-f6d926aee789cropwizard-betaNone
1350092023-09-01T18:26:57.628766+00:00{'id': 'b401031c-5540-43fc-baff-7c6db90ff3ae',...b401031c-5540-43fc-baff-7c6db90ff3aecropwizard-betarohan13@illinois.edu
1450062023-09-01T18:06:33.350981+00:00{'id': 'c42e70a8-03c0-4c9e-ac7f-ee718b4ac968',...c42e70a8-03c0-4c9e-ac7f-ee718b4ac968cropwizard-betahetarth2@illinois.edu
1550452023-09-03T20:07:34.895841+00:00{'id': 'cbecc7f4-a94c-49d6-ae1a-e42f20136676',...cbecc7f4-a94c-49d6-ae1a-e42f20136676cropwizard-betaNone
1650142023-09-01T18:36:04.202716+00:00{'id': '3303ec4f-84d5-4eec-a4b3-af020e62b79d',...3303ec4f-84d5-4eec-a4b3-af020e62b79dcropwizard-betavadve@illinois.edu
1750132023-09-01T18:35:32.12103+00:00{'id': 'fc91f681-54e3-4df5-8fd4-952d02b8feba',...fc91f681-54e3-4df5-8fd4-952d02b8febacropwizard-betarohan13@illinois.edu
1850112023-09-01T18:28:24.588312+00:00{'id': '5c2d89b3-3e41-4a8a-84be-f93c7b8127c3',...5c2d89b3-3e41-4a8a-84be-f93c7b8127c3cropwizard-betarohan13@illinois.edu
1950072023-09-01T18:15:08.636935+00:00{'id': 'd7191543-c517-4007-a2fb-ae28660ef37c',...d7191543-c517-4007-a2fb-ae28660ef37ccropwizard-betaNone
2050042023-09-01T16:28:23.202471+00:00{'id': 'edabed51-e581-48f7-865c-89a4b1d9d120',...edabed51-e581-48f7-865c-89a4b1d9d120cropwizard-betadabholkar.asmita@gmail.com
\n", - "
" - ], - "text/plain": [ - " id created_at \\\n", - "0 5193 2023-09-06T23:21:36.639848+00:00 \n", - "1 5192 2023-09-06T23:04:50.982857+00:00 \n", - "2 5174 2023-09-06T22:22:44.107536+00:00 \n", - "3 5184 2023-09-06T23:01:06.796384+00:00 \n", - "4 5182 2023-09-06T22:58:21.66316+00:00 \n", - "5 5194 2023-09-06T23:22:06.786717+00:00 \n", - "6 5113 2023-09-05T21:34:17.944623+00:00 \n", - "7 5333 2023-09-07T22:25:52.858867+00:00 \n", - "8 5108 2023-09-05T21:11:51.289178+00:00 \n", - "9 5104 2023-09-05T20:38:27.687893+00:00 \n", - "10 5103 2023-09-05T20:11:51.810222+00:00 \n", - "11 5098 2023-09-05T19:17:36.190976+00:00 \n", - "12 5042 2023-09-02T14:43:02.160608+00:00 \n", - "13 5009 2023-09-01T18:26:57.628766+00:00 \n", - "14 5006 2023-09-01T18:06:33.350981+00:00 \n", - "15 5045 2023-09-03T20:07:34.895841+00:00 \n", - "16 5014 2023-09-01T18:36:04.202716+00:00 \n", - "17 5013 2023-09-01T18:35:32.12103+00:00 \n", - "18 5011 2023-09-01T18:28:24.588312+00:00 \n", - "19 5007 2023-09-01T18:15:08.636935+00:00 \n", - "20 5004 2023-09-01T16:28:23.202471+00:00 \n", - "\n", - " convo \\\n", - "0 {'id': 'd6e83182-56d8-4151-b84e-a09dd84b8836',... \n", - "1 {'id': '1390b226-2115-4078-a594-ed4bad0fb4e0',... \n", - "2 {'id': 'fca0cf0b-6bac-4cf6-bd4d-d3501827c947',... \n", - "3 {'id': '1572f8a1-9ceb-42f7-b39f-fe95ec2b3c0e',... \n", - "4 {'id': '0c9d9873-2c52-4b12-90ec-d4a495cbf4e0',... \n", - "5 {'id': 'd51bbdd8-c5c8-4e5b-a003-556a8ac74726',... \n", - "6 {'id': '2a39551b-7b6c-4ba0-aa77-ffc9b1316718',... \n", - "7 {'id': 'bd3c9174-4512-4ac6-ae05-8aafa4099215',... \n", - "8 {'id': 'a49941c4-2049-478b-b43e-9c574cadbd05',... \n", - "9 {'id': '1be7956c-d5c8-4b1f-a62f-145421a2e7f7',... \n", - "10 {'id': '20e6e160-6fd3-4e0a-82b7-98457f2ff1c3',... \n", - "11 {'id': 'fbd19bd0-373b-4a9c-b58c-a47cbd4e9e1c',... \n", - "12 {'id': '8df8a436-adbf-441a-92ec-f6d926aee789',... \n", - "13 {'id': 'b401031c-5540-43fc-baff-7c6db90ff3ae',... \n", - "14 {'id': 'c42e70a8-03c0-4c9e-ac7f-ee718b4ac968',... \n", - "15 {'id': 'cbecc7f4-a94c-49d6-ae1a-e42f20136676',... \n", - "16 {'id': '3303ec4f-84d5-4eec-a4b3-af020e62b79d',... \n", - "17 {'id': 'fc91f681-54e3-4df5-8fd4-952d02b8feba',... \n", - "18 {'id': '5c2d89b3-3e41-4a8a-84be-f93c7b8127c3',... \n", - "19 {'id': 'd7191543-c517-4007-a2fb-ae28660ef37c',... \n", - "20 {'id': 'edabed51-e581-48f7-865c-89a4b1d9d120',... \n", - "\n", - " convo_id course_name \\\n", - "0 d6e83182-56d8-4151-b84e-a09dd84b8836 cropwizard-beta \n", - "1 1390b226-2115-4078-a594-ed4bad0fb4e0 cropwizard-beta \n", - "2 fca0cf0b-6bac-4cf6-bd4d-d3501827c947 cropwizard-beta \n", - "3 1572f8a1-9ceb-42f7-b39f-fe95ec2b3c0e cropwizard-beta \n", - "4 0c9d9873-2c52-4b12-90ec-d4a495cbf4e0 cropwizard-beta \n", - "5 d51bbdd8-c5c8-4e5b-a003-556a8ac74726 cropwizard-beta \n", - "6 2a39551b-7b6c-4ba0-aa77-ffc9b1316718 cropwizard-beta \n", - "7 bd3c9174-4512-4ac6-ae05-8aafa4099215 cropwizard-beta \n", - "8 a49941c4-2049-478b-b43e-9c574cadbd05 cropwizard-beta \n", - "9 1be7956c-d5c8-4b1f-a62f-145421a2e7f7 cropwizard-beta \n", - "10 20e6e160-6fd3-4e0a-82b7-98457f2ff1c3 cropwizard-beta \n", - "11 fbd19bd0-373b-4a9c-b58c-a47cbd4e9e1c cropwizard-beta \n", - "12 8df8a436-adbf-441a-92ec-f6d926aee789 cropwizard-beta \n", - "13 b401031c-5540-43fc-baff-7c6db90ff3ae cropwizard-beta \n", - "14 c42e70a8-03c0-4c9e-ac7f-ee718b4ac968 cropwizard-beta \n", - "15 cbecc7f4-a94c-49d6-ae1a-e42f20136676 cropwizard-beta \n", - "16 3303ec4f-84d5-4eec-a4b3-af020e62b79d cropwizard-beta \n", - "17 fc91f681-54e3-4df5-8fd4-952d02b8feba cropwizard-beta \n", - "18 5c2d89b3-3e41-4a8a-84be-f93c7b8127c3 cropwizard-beta \n", - "19 d7191543-c517-4007-a2fb-ae28660ef37c cropwizard-beta \n", - "20 edabed51-e581-48f7-865c-89a4b1d9d120 cropwizard-beta \n", - "\n", - " user_email \n", - "0 dabholkar.asmita@gmail.com \n", - "1 kastanday@live.com \n", - "2 dabholkar.asmita@gmail.com \n", - "3 dabholkar.asmita@gmail.com \n", - "4 dabholkar.asmita@gmail.com \n", - "5 avd6@illinois.edu \n", - "6 hetarth2@illinois.edu \n", - "7 dabholkar.asmita@gmail.com \n", - "8 vadve@illinois.edu \n", - "9 kastanday@live.com \n", - "10 kastanday@live.com \n", - "11 dabholkar.asmita@gmail.com \n", - "12 None \n", - "13 rohan13@illinois.edu \n", - "14 hetarth2@illinois.edu \n", - "15 None \n", - "16 vadve@illinois.edu \n", - "17 rohan13@illinois.edu \n", - "18 rohan13@illinois.edu \n", - "19 None \n", - "20 dabholkar.asmita@gmail.com " - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "course = 'cropwizard-beta'\n", - "response = supabase_client.table(\"llm-convo-monitor\").select(\"*\").eq('course_name', course).execute()\n", - "data = response.data\n", - "df = pd.DataFrame(data)\n", - "df" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "id int64\n", - "created_at object\n", - "convo object\n", - "convo_id object\n", - "course_name object\n", - "user_email object\n", - "dtype: object\n", - "id int64\n", - "created_at datetime64[ns, UTC]\n", - "convo object\n", - "convo_id object\n", - "course_name object\n", - "user_email object\n", - "dtype: object\n" - ] - } - ], - "source": [ - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idcreated_atconvoconvo_idcourse_nameuser_email
050042023-09-01 16:28:23.202471+00:00{'id': 'edabed51-e581-48f7-865c-89a4b1d9d120',...edabed51-e581-48f7-865c-89a4b1d9d120cropwizard-betadabholkar.asmita@gmail.com
150062023-09-01 18:06:33.350981+00:00{'id': 'c42e70a8-03c0-4c9e-ac7f-ee718b4ac968',...c42e70a8-03c0-4c9e-ac7f-ee718b4ac968cropwizard-betahetarth2@illinois.edu
250072023-09-01 18:15:08.636935+00:00{'id': 'd7191543-c517-4007-a2fb-ae28660ef37c',...d7191543-c517-4007-a2fb-ae28660ef37ccropwizard-betaNone
350112023-09-01 18:28:24.588312+00:00{'id': '5c2d89b3-3e41-4a8a-84be-f93c7b8127c3',...5c2d89b3-3e41-4a8a-84be-f93c7b8127c3cropwizard-betarohan13@illinois.edu
450132023-09-01 18:35:32.121030+00:00{'id': 'fc91f681-54e3-4df5-8fd4-952d02b8feba',...fc91f681-54e3-4df5-8fd4-952d02b8febacropwizard-betarohan13@illinois.edu
\n", - "
" - ], - "text/plain": [ - " id created_at \\\n", - "0 5004 2023-09-01 16:28:23.202471+00:00 \n", - "1 5006 2023-09-01 18:06:33.350981+00:00 \n", - "2 5007 2023-09-01 18:15:08.636935+00:00 \n", - "3 5011 2023-09-01 18:28:24.588312+00:00 \n", - "4 5013 2023-09-01 18:35:32.121030+00:00 \n", - "\n", - " convo \\\n", - "0 {'id': 'edabed51-e581-48f7-865c-89a4b1d9d120',... \n", - "1 {'id': 'c42e70a8-03c0-4c9e-ac7f-ee718b4ac968',... \n", - "2 {'id': 'd7191543-c517-4007-a2fb-ae28660ef37c',... \n", - "3 {'id': '5c2d89b3-3e41-4a8a-84be-f93c7b8127c3',... \n", - "4 {'id': 'fc91f681-54e3-4df5-8fd4-952d02b8feba',... \n", - "\n", - " convo_id course_name \\\n", - "0 edabed51-e581-48f7-865c-89a4b1d9d120 cropwizard-beta \n", - "1 c42e70a8-03c0-4c9e-ac7f-ee718b4ac968 cropwizard-beta \n", - "2 d7191543-c517-4007-a2fb-ae28660ef37c cropwizard-beta \n", - "3 5c2d89b3-3e41-4a8a-84be-f93c7b8127c3 cropwizard-beta \n", - "4 fc91f681-54e3-4df5-8fd4-952d02b8feba cropwizard-beta \n", - "\n", - " user_email \n", - "0 dabholkar.asmita@gmail.com \n", - "1 hetarth2@illinois.edu \n", - "2 None \n", - "3 rohan13@illinois.edu \n", - "4 rohan13@illinois.edu " - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Timestamp('2023-09-01 16:28:23.202471+0000', tz='UTC')" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.iloc[0]['created_at']" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [], - "source": [ - "course_list = df['course_name'].unique()\n" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "21\n", - "21\n" - ] - } - ], - "source": [ - "import datetime\n", - "\n", - "metadata = []\n", - "user_queries = []\n", - "i = 1\n", - "\n", - "# log conversation instead of individual messages\n", - "for index, row in df.iterrows():\n", - " user_email = row['user_email']\n", - " created_at = row['created_at']\n", - "\n", - " current_time = time.time()\n", - " dt_object = datetime.datetime.fromtimestamp(current_time)\n", - " formatted_timestamp = dt_object.strftime(\"%Y-%m-%d %H:%M:%S.%f+00:00\")\n", - "\n", - "\n", - "\n", - " convo = row['convo']\n", - " messages = convo['messages']\n", - " first_message = messages[0]['content']\n", - " \n", - " user_queries.append(first_message)\n", - " # create metadata for multi-turn conversation\n", - " conversation = \"\"\n", - " for message in messages:\n", - " # string of role: content, role: content, ...\n", - " if message['role'] == 'user':\n", - " emoji = \"🙋\"\n", - " else:\n", - " emoji = \"🤖\"\n", - " conversation += \"\\n>>> \" + emoji + message['role'] + \": \" + message['content'] + \"\\n\"\n", - " # add to metadata\n", - " metadata_row = {\"course\": row['course_name'], \"conversation\": conversation, \"conversation_id\": convo['id'], \n", - " \"id\": i, \"user_email\": user_email, \"first_query\": first_message, \"created_at\": created_at,\n", - " \"modified_at\": formatted_timestamp}\n", - " metadata.append(metadata_row)\n", - " i += 1\n", - "\n", - "print(len(user_queries))\n", - "print(len(metadata))" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'course': 'cropwizard-beta', 'conversation': \"\\n>>> 🙋user: What is the best approach to prevent weeds in my soybean field?\\n\\n>>> 🤖assistant: Weed management in soybean fields involves a combination of cultural, mechanical, and chemical strategies. Here are some recommended practices:\\n\\n1. **Crop Rotation**: Rotating crops can help reduce the presence of certain weed species that are more prevalent in soybean fields. \\n\\n2. **Cover Crops**: Planting cover crops can suppress weed growth by competing for sunlight, water, and nutrients. \\n\\n3. **Tillage**: Tillage can be used to disrupt weed growth. However, it should be used judiciously to prevent soil erosion.\\n\\n4. **Planting Date and Seeding Rate**: Planting soybeans early and at a higher seeding rate can help the crop compete better against weeds.\\n\\n5. **Herbicide Application**: Use pre-emergent and post-emergent herbicides. Pre-emergent herbicides are applied before the weeds germinate, while post-emergent herbicides are applied after the weeds have sprouted. \\n\\n6. **Herbicide Rotation**: Rotate herbicides with different modes of action to prevent weeds from developing resistance.\\n\\n7. **Scouting and Monitoring**: Regularly scout and monitor your fields to identify weed problems early and respond quickly.\\n\\n8. **Clean Equipment**: Clean your farming equipment to prevent the spread of weed seeds from one field to another.\\n\\nRemember, the most effective weed management strategies often involve a combination of these practices. It's also essential to consider the specific types of weeds you're dealing with, as different species may require different control methods.\\n\\n>>> 🙋user: What is the best approach to prevent weeds in my soybean field?\\n\\n\\n\\n\\n>>> 🤖assistant: Managing weeds in soybean fields requires a comprehensive and integrated approach. Here are some best practices:\\n\\n1. **Crop Rotation**: This practice can help break the life cycle of certain weeds. By alternating the types of crops in each field, you can naturally reduce the presence of weeds that thrive in soybean fields.\\n\\n2. **Cover Crops**: Planting cover crops in the off-season can suppress weed growth by outcompeting them for resources such as sunlight, water, and nutrients.\\n\\n3. **Tillage**: Tillage can help control weeds by physically removing or burying them. However, excessive tillage can lead to soil erosion, so it should be used judiciously.\\n\\n4. **Planting Date and Seeding Rate**: Planting soybeans early and at a higher seeding rate can help the crop establish itself and compete more effectively against weeds.\\n\\n5. **Herbicide Application**: Use pre-emergence and post-emergence herbicides to control weeds. Pre-emergence herbicides are used before the weeds germinate, while post-emergence herbicides are used after the weeds have sprouted. \\n\\n6. **Herbicide Rotation**: To prevent herbicide resistance, rotate the types of herbicides used, ensuring they have different modes of action.\\n\\n7. **Regular Scouting**: Regularly inspect your fields to identify weed problems early. This allows for timely intervention and can prevent the spread of weeds.\\n\\n8. **Clean Equipment**: Ensure that your farming equipment is cleaned regularly to prevent the spread of weed seeds from one field to another.\\n\\nRemember, the most effective weed management strategies often involve a combination of these practices. Tailor your approach based on the specific types of weeds prevalent in your field, as different species may require different control strategies.\\n\", 'conversation_id': 'd6e83182-56d8-4151-b84e-a09dd84b8836', 'id': 1, 'user_email': 'dabholkar.asmita@gmail.com', 'first_query': 'What is the best approach to prevent weeds in my soybean field?', 'created_at': '2023-09-06T23:21:36.639848+00:00', 'modified_at': '2023-09-12 18:17:49.784979+00:00'}\n" - ] - } - ], - "source": [ - "print(metadata[0])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Mapping Conversations to Nomic" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "cropwizard-beta\n", - "21\n", - "21\n", - "21\n", - "(21, 1536)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-09-12 22:36:00.418 | INFO | nomic.project:_create_project:790 - Creating project `Conversation Map for cropwizard-beta` in organization `dabholkar.asmita`\n", - "2023-09-12 22:36:01.434 | INFO | nomic.atlas:map_embeddings:108 - Uploading embeddings to Atlas.\n", - "2023-09-12 22:36:01.454 | WARNING | nomic.project:_validate_and_correct_arrow_upload:238 - Replacing 3 null values for field user_email with string 'null'. This behavior will change in a future version.\n", - "2023-09-12 22:36:01.459 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", - "1it [00:00, 1.11it/s]\n", - "2023-09-12 22:36:02.373 | INFO | nomic.project:_add_data:1422 - Upload succeeded.\n", - "2023-09-12 22:36:02.374 | INFO | nomic.atlas:map_embeddings:127 - Embedding upload succeeded.\n", - "2023-09-12 22:36:03.570 | INFO | nomic.project:create_index:1132 - Created map `Conversation Map for cropwizard-beta` in project `Conversation Map for cropwizard-beta`: https://atlas.nomic.ai/map/8634eb7a-22b5-4ab8-adc4-fbbc4bc654bf/f5ad20c7-d1cb-4fe7-be16-04d0502af4fe\n", - "2023-09-12 22:36:03.571 | INFO | nomic.atlas:map_embeddings:140 - Conversation Map for cropwizard-beta: https://atlas.nomic.ai/map/8634eb7a-22b5-4ab8-adc4-fbbc4bc654bf/f5ad20c7-d1cb-4fe7-be16-04d0502af4fe\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Conversation Map for cropwizard-beta: https://atlas.nomic.ai/map/8634eb7a-22b5-4ab8-adc4-fbbc4bc654bf/f5ad20c7-d1cb-4fe7-be16-04d0502af4fe]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-09-12 22:36:05.071 | INFO | nomic.project:create_index:1132 - Created map `cropwizard-beta_convo_index` in project `Conversation Map for cropwizard-beta`: https://atlas.nomic.ai/map/8634eb7a-22b5-4ab8-adc4-fbbc4bc654bf/9507bda2-b846-4e48-90d8-684a40fb3220\n" - ] - } - ], - "source": [ - "embeddings_model = OpenAIEmbeddings()\n", - "course_list = ['cropwizard-beta']\n", - "\n", - "for course in course_list:\n", - " print(course)\n", - " try:\n", - " response = supabase_client.table(\"llm-convo-monitor\").select(\"*\").eq('course_name', course).execute()\n", - " data = response.data\n", - " course_df = pd.DataFrame(data)\n", - " print(len(course_df))\n", - "\n", - " if len(course_df) < 20:\n", - " continue\n", - " else:\n", - " \n", - " user_queries = []\n", - " metadata = []\n", - " i = 1\n", - "\n", - " # log conversation instead of individual messages\n", - " for index, row in course_df.iterrows():\n", - " user_email = row['user_email']\n", - " created_at = row['created_at']\n", - " convo = row['convo']\n", - " messages = convo['messages']\n", - " first_message = messages[0]['content']\n", - "\n", - " current_time = time.time()\n", - " dt_object = datetime.datetime.fromtimestamp(current_time)\n", - " formatted_timestamp = dt_object.strftime(\"%Y-%m-%d %H:%M:%S.%f+00:00\")\n", - " \n", - " user_queries.append(first_message)\n", - " # create metadata for multi-turn conversation\n", - " conversation = \"\"\n", - " for message in messages:\n", - " # string of role: content, role: content, ...\n", - " if message['role'] == 'user':\n", - " emoji = \"🙋\"\n", - " else:\n", - " emoji = \"🤖\"\n", - " conversation += \"\\n>>> \" + emoji + message['role'] + \": \" + message['content'] + \"\\n\"\n", - " # add to metadata\n", - " metadata_row = {\"course\": row['course_name'], \"conversation\": conversation, \"conversation_id\": convo['id'], \n", - " \"id\": i, \"user_email\": user_email, \"first_query\": first_message, \"created_at\": created_at,\n", - " \"modified_at\": formatted_timestamp}\n", - " metadata.append(metadata_row)\n", - " i += 1\n", - "\n", - " print(len(user_queries))\n", - " print(len(metadata))\n", - "\n", - " metadata = pd.DataFrame(metadata)\n", - " embeddings = embeddings_model.embed_documents(user_queries)\n", - " embeddings = np.array(embeddings)\n", - " print(embeddings.shape)\n", - "\n", - " # create an Atlas project\n", - " project_name = \"Conversation Map for \" + course\n", - " index_name = course + \"_convo_index\"\n", - " project = atlas.map_embeddings(embeddings=np.array(embeddings),\n", - " data=metadata,\n", - " id_field='id',\n", - " build_topic_model=True,\n", - " topic_label_field='first_query',\n", - " name=project_name,\n", - " colorable_fields=['conversation_id', 'first_query'])\n", - " print(project.maps)\n", - "\n", - " project.create_index(index_name, build_topic_model=True)\n", - " except Exception as e:\n", - " print(\"course_name:\", course)\n", - " print(\"error: \", e)\n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'course': 'cropwizard', 'conversation': '\\n>>> user: What type of pest is most harmful to corn crops in Illinois and how can I prevent it?\\n\\n>>> assistant: The most harmful pest to corn crops in Illinois is the corn rootworm. Corn rootworms, specifically the western corn rootworm (Diabrotica virgifera virgifera), are the most impactful species in Illinois and the North Central U.S. These pests can cause significant damage to corn plants by feeding on the roots, leading to reduced plant vigor and yield.\\n\\nTo prevent corn rootworm infestations, several measures can be taken:\\n\\n1. Crop rotation: Implementing a crop rotation strategy by alternating corn with non-host crops, such as soybeans, can help reduce corn rootworm populations. The larvae are not capable of surviving on soybean or other broadleaf plants, so rotating crops disrupts their life cycle.\\n\\n2. Utilize Bt hybrids: Planting corn hybrids that have been genetically modified to express Bacillus thuringiensis (Bt) proteins targeting corn rootworm can provide effective control. These Bt hybrids produce toxins that are toxic to the pests, reducing their feeding and damage to corn plants.\\n\\n3. Insecticide treatments: Seed-applied insecticides containing neonicotinoid or diamide insecticides can be used to protect corn seedlings from early-season pests, including corn rootworms. Soil-applied insecticides can also be applied at planting to control corn rootworms and other early-season pests.\\n\\n4. Monitoring and scouting: Regular monitoring of corn fields for signs of corn rootworm damage, such as root feeding and lodging, is essential. Scouting helps determine if pest populations have reached economic thresholds that require action, such as insecticide application.\\n\\n5. Integrated Pest Management (IPM): Implementing an IPM approach involves combining multiple pest management strategies, including cultural practices, biological control, and chemical control, to minimize the impact of corn rootworms and other pests. This holistic approach aims to reduce reliance on chemical treatments and promote sustainable pest management.\\n\\nIt is important to consult with local agricultural extension services, seed dealers, or agricultural experts for specific recommendations tailored to your region and field conditions to effectively manage corn rootworm infestations.\\n', 'conversation_id': '3e5d4861-b128-4c64-96ac-87c74f3217e5', 'id': 2, 'user_email': 'avd6@illinois.edu', 'first_query': 'What type of pest is most harmful to corn crops in Illinois and how can I prevent it?'}\n" - ] - } - ], - "source": [ - "print(metadata[1])" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(22, 1536)\n" - ] - } - ], - "source": [ - "# initialize langchain OpenAI embeddings model\n", - "embeddings_model = OpenAIEmbeddings()\n", - "metadata = pd.DataFrame(metadata)\n", - "embeddings = embeddings_model.embed_documents(user_queries)\n", - "embeddings = np.array(embeddings)\n", - "print(embeddings.shape)" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-09-07 14:53:44.900 | INFO | nomic.project:_create_project:779 - Creating project `Conversation Map for cropwizard` in organization `dabholkar.asmita`\n", - "2023-09-07 14:53:45.794 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", - "2023-09-07 14:53:45.798 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", - "1it [00:00, 1.07it/s]\n", - "2023-09-07 14:53:46.743 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", - "2023-09-07 14:53:46.744 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", - "2023-09-07 14:53:47.568 | INFO | nomic.project:create_index:1111 - Created map `Conversation Map for cropwizard` in project `Conversation Map for cropwizard`: https://atlas.nomic.ai/map/27ba0013-71ba-4fd6-a710-187d114442ab/f74b45b3-7213-45df-a4cb-c16ec9feb8b7\n", - "2023-09-07 14:53:47.570 | INFO | nomic.atlas:map_embeddings:139 - Conversation Map for cropwizard: https://atlas.nomic.ai/map/27ba0013-71ba-4fd6-a710-187d114442ab/f74b45b3-7213-45df-a4cb-c16ec9feb8b7\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Conversation Map for cropwizard: https://atlas.nomic.ai/map/27ba0013-71ba-4fd6-a710-187d114442ab/f74b45b3-7213-45df-a4cb-c16ec9feb8b7]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-09-07 14:53:48.872 | INFO | nomic.project:create_index:1111 - Created map `cropwizard_convo_index_2` in project `Conversation Map for cropwizard`: https://atlas.nomic.ai/map/27ba0013-71ba-4fd6-a710-187d114442ab/3336aa7f-5995-4f02-831b-7161fd0c0b71\n" - ] - }, - { - "data": { - "text/html": [ - "Atlas Projection cropwizard_convo_index_2. Status Topic Modeling. view online" - ], - "text/plain": [ - "cropwizard_convo_index_2: https://atlas.nomic.ai/map/27ba0013-71ba-4fd6-a710-187d114442ab/3336aa7f-5995-4f02-831b-7161fd0c0b71" - ] - }, - "execution_count": 34, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# create an Atlas project\n", - "project_name = \"Conversation Map for \" + course\n", - "index_name = course + \"_convo_index\"\n", - "project = atlas.map_embeddings(embeddings=np.array(embeddings),\n", - " data=metadata,\n", - " id_field='id',\n", - " build_topic_model=True,\n", - " topic_label_field='first_query',\n", - " name=project_name,\n", - " colorable_fields=['conversation_id', 'first_query'])\n", - "print(project.maps)\n", - "\n", - "project.create_index(index_name, build_topic_model=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "ename": "ReadTimeout", - "evalue": "The read operation timed out", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mTimeoutError\u001b[0m Traceback (most recent call last)", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_exceptions.py:10\u001b[0m, in \u001b[0;36mmap_exceptions\u001b[1;34m(map)\u001b[0m\n\u001b[0;32m 9\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m---> 10\u001b[0m \u001b[39myield\u001b[39;00m\n\u001b[0;32m 11\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m \u001b[39mas\u001b[39;00m exc: \u001b[39m# noqa: PIE786\u001b[39;00m\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\backends\\sync.py:28\u001b[0m, in \u001b[0;36mSyncStream.read\u001b[1;34m(self, max_bytes, timeout)\u001b[0m\n\u001b[0;32m 27\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_sock\u001b[39m.\u001b[39msettimeout(timeout)\n\u001b[1;32m---> 28\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_sock\u001b[39m.\u001b[39;49mrecv(max_bytes)\n", - "File \u001b[1;32mC:\\Program Files\\WindowsApps\\PythonSoftwareFoundation.Python.3.10_3.10.3056.0_x64__qbz5n2kfra8p0\\lib\\ssl.py:1259\u001b[0m, in \u001b[0;36mSSLSocket.recv\u001b[1;34m(self, buflen, flags)\u001b[0m\n\u001b[0;32m 1256\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\n\u001b[0;32m 1257\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mnon-zero flags not allowed in calls to recv() on \u001b[39m\u001b[39m%s\u001b[39;00m\u001b[39m\"\u001b[39m \u001b[39m%\u001b[39m\n\u001b[0;32m 1258\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m\u001b[39m__class__\u001b[39m)\n\u001b[1;32m-> 1259\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mread(buflen)\n\u001b[0;32m 1260\u001b[0m \u001b[39melse\u001b[39;00m:\n", - "File \u001b[1;32mC:\\Program Files\\WindowsApps\\PythonSoftwareFoundation.Python.3.10_3.10.3056.0_x64__qbz5n2kfra8p0\\lib\\ssl.py:1132\u001b[0m, in \u001b[0;36mSSLSocket.read\u001b[1;34m(self, len, buffer)\u001b[0m\n\u001b[0;32m 1131\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m-> 1132\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_sslobj\u001b[39m.\u001b[39;49mread(\u001b[39mlen\u001b[39;49m)\n\u001b[0;32m 1133\u001b[0m \u001b[39mexcept\u001b[39;00m SSLError \u001b[39mas\u001b[39;00m x:\n", - "\u001b[1;31mTimeoutError\u001b[0m: The read operation timed out", - "\nDuring handling of the above exception, another exception occurred:\n", - "\u001b[1;31mReadTimeout\u001b[0m Traceback (most recent call last)", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_transports\\default.py:60\u001b[0m, in \u001b[0;36mmap_httpcore_exceptions\u001b[1;34m()\u001b[0m\n\u001b[0;32m 59\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m---> 60\u001b[0m \u001b[39myield\u001b[39;00m\n\u001b[0;32m 61\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m \u001b[39mas\u001b[39;00m exc: \u001b[39m# noqa: PIE-786\u001b[39;00m\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_transports\\default.py:218\u001b[0m, in \u001b[0;36mHTTPTransport.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 217\u001b[0m \u001b[39mwith\u001b[39;00m map_httpcore_exceptions():\n\u001b[1;32m--> 218\u001b[0m resp \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_pool\u001b[39m.\u001b[39;49mhandle_request(req)\n\u001b[0;32m 220\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39misinstance\u001b[39m(resp\u001b[39m.\u001b[39mstream, typing\u001b[39m.\u001b[39mIterable)\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_sync\\connection_pool.py:253\u001b[0m, in \u001b[0;36mConnectionPool.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 252\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mresponse_closed(status)\n\u001b[1;32m--> 253\u001b[0m \u001b[39mraise\u001b[39;00m exc\n\u001b[0;32m 254\u001b[0m \u001b[39melse\u001b[39;00m:\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_sync\\connection_pool.py:237\u001b[0m, in \u001b[0;36mConnectionPool.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 236\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m--> 237\u001b[0m response \u001b[39m=\u001b[39m connection\u001b[39m.\u001b[39;49mhandle_request(request)\n\u001b[0;32m 238\u001b[0m \u001b[39mexcept\u001b[39;00m ConnectionNotAvailable:\n\u001b[0;32m 239\u001b[0m \u001b[39m# The ConnectionNotAvailable exception is a special case, that\u001b[39;00m\n\u001b[0;32m 240\u001b[0m \u001b[39m# indicates we need to retry the request on a new connection.\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 244\u001b[0m \u001b[39m# might end up as an HTTP/2 connection, but which actually ends\u001b[39;00m\n\u001b[0;32m 245\u001b[0m \u001b[39m# up as HTTP/1.1.\u001b[39;00m\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_sync\\connection.py:90\u001b[0m, in \u001b[0;36mHTTPConnection.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 88\u001b[0m \u001b[39mraise\u001b[39;00m ConnectionNotAvailable()\n\u001b[1;32m---> 90\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_connection\u001b[39m.\u001b[39;49mhandle_request(request)\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_sync\\http11.py:112\u001b[0m, in \u001b[0;36mHTTP11Connection.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 111\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_response_closed()\n\u001b[1;32m--> 112\u001b[0m \u001b[39mraise\u001b[39;00m exc\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_sync\\http11.py:91\u001b[0m, in \u001b[0;36mHTTP11Connection.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 83\u001b[0m \u001b[39mwith\u001b[39;00m Trace(\n\u001b[0;32m 84\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mhttp11.receive_response_headers\u001b[39m\u001b[39m\"\u001b[39m, request, kwargs\n\u001b[0;32m 85\u001b[0m ) \u001b[39mas\u001b[39;00m trace:\n\u001b[0;32m 86\u001b[0m (\n\u001b[0;32m 87\u001b[0m http_version,\n\u001b[0;32m 88\u001b[0m status,\n\u001b[0;32m 89\u001b[0m reason_phrase,\n\u001b[0;32m 90\u001b[0m headers,\n\u001b[1;32m---> 91\u001b[0m ) \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_receive_response_headers(\u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n\u001b[0;32m 92\u001b[0m trace\u001b[39m.\u001b[39mreturn_value \u001b[39m=\u001b[39m (\n\u001b[0;32m 93\u001b[0m http_version,\n\u001b[0;32m 94\u001b[0m status,\n\u001b[0;32m 95\u001b[0m reason_phrase,\n\u001b[0;32m 96\u001b[0m headers,\n\u001b[0;32m 97\u001b[0m )\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_sync\\http11.py:155\u001b[0m, in \u001b[0;36mHTTP11Connection._receive_response_headers\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 154\u001b[0m \u001b[39mwhile\u001b[39;00m \u001b[39mTrue\u001b[39;00m:\n\u001b[1;32m--> 155\u001b[0m event \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_receive_event(timeout\u001b[39m=\u001b[39;49mtimeout)\n\u001b[0;32m 156\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(event, h11\u001b[39m.\u001b[39mResponse):\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_sync\\http11.py:191\u001b[0m, in \u001b[0;36mHTTP11Connection._receive_event\u001b[1;34m(self, timeout)\u001b[0m\n\u001b[0;32m 190\u001b[0m \u001b[39mif\u001b[39;00m event \u001b[39mis\u001b[39;00m h11\u001b[39m.\u001b[39mNEED_DATA:\n\u001b[1;32m--> 191\u001b[0m data \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_network_stream\u001b[39m.\u001b[39;49mread(\n\u001b[0;32m 192\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mREAD_NUM_BYTES, timeout\u001b[39m=\u001b[39;49mtimeout\n\u001b[0;32m 193\u001b[0m )\n\u001b[0;32m 195\u001b[0m \u001b[39m# If we feed this case through h11 we'll raise an exception like:\u001b[39;00m\n\u001b[0;32m 196\u001b[0m \u001b[39m#\u001b[39;00m\n\u001b[0;32m 197\u001b[0m \u001b[39m# httpcore.RemoteProtocolError: can't handle event type\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 201\u001b[0m \u001b[39m# perspective. Instead we handle this case distinctly and treat\u001b[39;00m\n\u001b[0;32m 202\u001b[0m \u001b[39m# it as a ConnectError.\u001b[39;00m\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\backends\\sync.py:26\u001b[0m, in \u001b[0;36mSyncStream.read\u001b[1;34m(self, max_bytes, timeout)\u001b[0m\n\u001b[0;32m 25\u001b[0m exc_map: ExceptionMapping \u001b[39m=\u001b[39m {socket\u001b[39m.\u001b[39mtimeout: ReadTimeout, \u001b[39mOSError\u001b[39;00m: ReadError}\n\u001b[1;32m---> 26\u001b[0m \u001b[39mwith\u001b[39;00m map_exceptions(exc_map):\n\u001b[0;32m 27\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_sock\u001b[39m.\u001b[39msettimeout(timeout)\n", - "File \u001b[1;32mC:\\Program Files\\WindowsApps\\PythonSoftwareFoundation.Python.3.10_3.10.3056.0_x64__qbz5n2kfra8p0\\lib\\contextlib.py:153\u001b[0m, in \u001b[0;36m_GeneratorContextManager.__exit__\u001b[1;34m(self, typ, value, traceback)\u001b[0m\n\u001b[0;32m 152\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m--> 153\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mgen\u001b[39m.\u001b[39;49mthrow(typ, value, traceback)\n\u001b[0;32m 154\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mStopIteration\u001b[39;00m \u001b[39mas\u001b[39;00m exc:\n\u001b[0;32m 155\u001b[0m \u001b[39m# Suppress StopIteration *unless* it's the same exception that\u001b[39;00m\n\u001b[0;32m 156\u001b[0m \u001b[39m# was passed to throw(). This prevents a StopIteration\u001b[39;00m\n\u001b[0;32m 157\u001b[0m \u001b[39m# raised inside the \"with\" statement from being suppressed.\u001b[39;00m\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_exceptions.py:14\u001b[0m, in \u001b[0;36mmap_exceptions\u001b[1;34m(map)\u001b[0m\n\u001b[0;32m 13\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(exc, from_exc):\n\u001b[1;32m---> 14\u001b[0m \u001b[39mraise\u001b[39;00m to_exc(exc)\n\u001b[0;32m 15\u001b[0m \u001b[39mraise\u001b[39;00m\n", - "\u001b[1;31mReadTimeout\u001b[0m: The read operation timed out", - "\nThe above exception was the direct cause of the following exception:\n", - "\u001b[1;31mReadTimeout\u001b[0m Traceback (most recent call last)", - "\u001b[1;32mf:\\MSIM\\ML_Projects\\ai-ta-backend\\ai_ta_backend\\nomic_map_creation.ipynb Cell 13\u001b[0m line \u001b[0;36m1\n\u001b[1;32m----> 1\u001b[0m response \u001b[39m=\u001b[39m supabase_client\u001b[39m.\u001b[39;49mtable(\u001b[39m\"\u001b[39;49m\u001b[39mllm-convo-monitor\u001b[39;49m\u001b[39m\"\u001b[39;49m)\u001b[39m.\u001b[39;49mselect(\u001b[39m\"\u001b[39;49m\u001b[39m*\u001b[39;49m\u001b[39m\"\u001b[39;49m)\u001b[39m.\u001b[39;49mexecute()\n\u001b[0;32m 2\u001b[0m data \u001b[39m=\u001b[39m response\u001b[39m.\u001b[39mdata\n\u001b[0;32m 3\u001b[0m df \u001b[39m=\u001b[39m pd\u001b[39m.\u001b[39mDataFrame(data)\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\postgrest\\_sync\\request_builder.py:55\u001b[0m, in \u001b[0;36mSyncQueryRequestBuilder.execute\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 43\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mexecute\u001b[39m(\u001b[39mself\u001b[39m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m APIResponse:\n\u001b[0;32m 44\u001b[0m \u001b[39m \u001b[39m\u001b[39m\"\"\"Execute the query.\u001b[39;00m\n\u001b[0;32m 45\u001b[0m \n\u001b[0;32m 46\u001b[0m \u001b[39m .. tip::\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 53\u001b[0m \u001b[39m :class:`APIError` If the API raised an error.\u001b[39;00m\n\u001b[0;32m 54\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[1;32m---> 55\u001b[0m r \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49msession\u001b[39m.\u001b[39;49mrequest(\n\u001b[0;32m 56\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mhttp_method,\n\u001b[0;32m 57\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mpath,\n\u001b[0;32m 58\u001b[0m json\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mjson,\n\u001b[0;32m 59\u001b[0m params\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mparams,\n\u001b[0;32m 60\u001b[0m headers\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mheaders,\n\u001b[0;32m 61\u001b[0m )\n\u001b[0;32m 62\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 63\u001b[0m \u001b[39mif\u001b[39;00m (\n\u001b[0;32m 64\u001b[0m \u001b[39m200\u001b[39m \u001b[39m<\u001b[39m\u001b[39m=\u001b[39m r\u001b[39m.\u001b[39mstatus_code \u001b[39m<\u001b[39m\u001b[39m=\u001b[39m \u001b[39m299\u001b[39m\n\u001b[0;32m 65\u001b[0m ): \u001b[39m# Response.ok from JS (https://developer.mozilla.org/en-US/docs/Web/API/Response/ok)\u001b[39;00m\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_client.py:821\u001b[0m, in \u001b[0;36mClient.request\u001b[1;34m(self, method, url, content, data, files, json, params, headers, cookies, auth, follow_redirects, timeout, extensions)\u001b[0m\n\u001b[0;32m 806\u001b[0m warnings\u001b[39m.\u001b[39mwarn(message, \u001b[39mDeprecationWarning\u001b[39;00m)\n\u001b[0;32m 808\u001b[0m request \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mbuild_request(\n\u001b[0;32m 809\u001b[0m method\u001b[39m=\u001b[39mmethod,\n\u001b[0;32m 810\u001b[0m url\u001b[39m=\u001b[39murl,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 819\u001b[0m extensions\u001b[39m=\u001b[39mextensions,\n\u001b[0;32m 820\u001b[0m )\n\u001b[1;32m--> 821\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49msend(request, auth\u001b[39m=\u001b[39;49mauth, follow_redirects\u001b[39m=\u001b[39;49mfollow_redirects)\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_client.py:908\u001b[0m, in \u001b[0;36mClient.send\u001b[1;34m(self, request, stream, auth, follow_redirects)\u001b[0m\n\u001b[0;32m 900\u001b[0m follow_redirects \u001b[39m=\u001b[39m (\n\u001b[0;32m 901\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mfollow_redirects\n\u001b[0;32m 902\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(follow_redirects, UseClientDefault)\n\u001b[0;32m 903\u001b[0m \u001b[39melse\u001b[39;00m follow_redirects\n\u001b[0;32m 904\u001b[0m )\n\u001b[0;32m 906\u001b[0m auth \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_build_request_auth(request, auth)\n\u001b[1;32m--> 908\u001b[0m response \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_send_handling_auth(\n\u001b[0;32m 909\u001b[0m request,\n\u001b[0;32m 910\u001b[0m auth\u001b[39m=\u001b[39;49mauth,\n\u001b[0;32m 911\u001b[0m follow_redirects\u001b[39m=\u001b[39;49mfollow_redirects,\n\u001b[0;32m 912\u001b[0m history\u001b[39m=\u001b[39;49m[],\n\u001b[0;32m 913\u001b[0m )\n\u001b[0;32m 914\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 915\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m stream:\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_client.py:936\u001b[0m, in \u001b[0;36mClient._send_handling_auth\u001b[1;34m(self, request, auth, follow_redirects, history)\u001b[0m\n\u001b[0;32m 933\u001b[0m request \u001b[39m=\u001b[39m \u001b[39mnext\u001b[39m(auth_flow)\n\u001b[0;32m 935\u001b[0m \u001b[39mwhile\u001b[39;00m \u001b[39mTrue\u001b[39;00m:\n\u001b[1;32m--> 936\u001b[0m response \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_send_handling_redirects(\n\u001b[0;32m 937\u001b[0m request,\n\u001b[0;32m 938\u001b[0m follow_redirects\u001b[39m=\u001b[39;49mfollow_redirects,\n\u001b[0;32m 939\u001b[0m history\u001b[39m=\u001b[39;49mhistory,\n\u001b[0;32m 940\u001b[0m )\n\u001b[0;32m 941\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 942\u001b[0m \u001b[39mtry\u001b[39;00m:\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_client.py:973\u001b[0m, in \u001b[0;36mClient._send_handling_redirects\u001b[1;34m(self, request, follow_redirects, history)\u001b[0m\n\u001b[0;32m 970\u001b[0m \u001b[39mfor\u001b[39;00m hook \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_event_hooks[\u001b[39m\"\u001b[39m\u001b[39mrequest\u001b[39m\u001b[39m\"\u001b[39m]:\n\u001b[0;32m 971\u001b[0m hook(request)\n\u001b[1;32m--> 973\u001b[0m response \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_send_single_request(request)\n\u001b[0;32m 974\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 975\u001b[0m \u001b[39mfor\u001b[39;00m hook \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_event_hooks[\u001b[39m\"\u001b[39m\u001b[39mresponse\u001b[39m\u001b[39m\"\u001b[39m]:\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_client.py:1009\u001b[0m, in \u001b[0;36mClient._send_single_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 1004\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mRuntimeError\u001b[39;00m(\n\u001b[0;32m 1005\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mAttempted to send an async request with a sync Client instance.\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m 1006\u001b[0m )\n\u001b[0;32m 1008\u001b[0m \u001b[39mwith\u001b[39;00m request_context(request\u001b[39m=\u001b[39mrequest):\n\u001b[1;32m-> 1009\u001b[0m response \u001b[39m=\u001b[39m transport\u001b[39m.\u001b[39;49mhandle_request(request)\n\u001b[0;32m 1011\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39misinstance\u001b[39m(response\u001b[39m.\u001b[39mstream, SyncByteStream)\n\u001b[0;32m 1013\u001b[0m response\u001b[39m.\u001b[39mrequest \u001b[39m=\u001b[39m request\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_transports\\default.py:217\u001b[0m, in \u001b[0;36mHTTPTransport.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 203\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39misinstance\u001b[39m(request\u001b[39m.\u001b[39mstream, SyncByteStream)\n\u001b[0;32m 205\u001b[0m req \u001b[39m=\u001b[39m httpcore\u001b[39m.\u001b[39mRequest(\n\u001b[0;32m 206\u001b[0m method\u001b[39m=\u001b[39mrequest\u001b[39m.\u001b[39mmethod,\n\u001b[0;32m 207\u001b[0m url\u001b[39m=\u001b[39mhttpcore\u001b[39m.\u001b[39mURL(\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 215\u001b[0m extensions\u001b[39m=\u001b[39mrequest\u001b[39m.\u001b[39mextensions,\n\u001b[0;32m 216\u001b[0m )\n\u001b[1;32m--> 217\u001b[0m \u001b[39mwith\u001b[39;00m map_httpcore_exceptions():\n\u001b[0;32m 218\u001b[0m resp \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_pool\u001b[39m.\u001b[39mhandle_request(req)\n\u001b[0;32m 220\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39misinstance\u001b[39m(resp\u001b[39m.\u001b[39mstream, typing\u001b[39m.\u001b[39mIterable)\n", - "File \u001b[1;32mC:\\Program Files\\WindowsApps\\PythonSoftwareFoundation.Python.3.10_3.10.3056.0_x64__qbz5n2kfra8p0\\lib\\contextlib.py:153\u001b[0m, in \u001b[0;36m_GeneratorContextManager.__exit__\u001b[1;34m(self, typ, value, traceback)\u001b[0m\n\u001b[0;32m 151\u001b[0m value \u001b[39m=\u001b[39m typ()\n\u001b[0;32m 152\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m--> 153\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mgen\u001b[39m.\u001b[39;49mthrow(typ, value, traceback)\n\u001b[0;32m 154\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mStopIteration\u001b[39;00m \u001b[39mas\u001b[39;00m exc:\n\u001b[0;32m 155\u001b[0m \u001b[39m# Suppress StopIteration *unless* it's the same exception that\u001b[39;00m\n\u001b[0;32m 156\u001b[0m \u001b[39m# was passed to throw(). This prevents a StopIteration\u001b[39;00m\n\u001b[0;32m 157\u001b[0m \u001b[39m# raised inside the \"with\" statement from being suppressed.\u001b[39;00m\n\u001b[0;32m 158\u001b[0m \u001b[39mreturn\u001b[39;00m exc \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m value\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_transports\\default.py:77\u001b[0m, in \u001b[0;36mmap_httpcore_exceptions\u001b[1;34m()\u001b[0m\n\u001b[0;32m 74\u001b[0m \u001b[39mraise\u001b[39;00m\n\u001b[0;32m 76\u001b[0m message \u001b[39m=\u001b[39m \u001b[39mstr\u001b[39m(exc)\n\u001b[1;32m---> 77\u001b[0m \u001b[39mraise\u001b[39;00m mapped_exc(message) \u001b[39mfrom\u001b[39;00m \u001b[39mexc\u001b[39;00m\n", - "\u001b[1;31mReadTimeout\u001b[0m: The read operation timed out" - ] - } - ], - "source": [ - "response = supabase_client.table(\"llm-convo-monitor\").select(\"*\").execute()\n", - "data = response.data\n", - "df = pd.DataFrame(data)\n", - "len(df)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Mapping Query-Response Pairs to Nomic" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2\n", - "user queries: 1\n", - "metadata 1\n", - "------------------------\n" - ] - } - ], - "source": [ - "user_queries = []\n", - "metadata = []\n", - "i = 1\n", - "for convo in course_df:\n", - " messages = convo['messages']\n", - " print(len(messages))\n", - " pair_check = 0\n", - " for message in messages:\n", - " if message['role'] == 'user' and pair_check == 0:\n", - " query = message['content']\n", - " metadata_row = {'course': course, 'query': message['content']}\n", - " #print(\"metadata row: \", metadata_row)\n", - " pair_check += 1\n", - " if message['role'] == 'assistant' and pair_check == 1:\n", - " metadata_row['response'] = message['content']\n", - " metadata_row['id'] = i \n", - " #print(\"response metadata row: \", metadata_row)\n", - " i += 1\n", - " pair_check += 1\n", - " if pair_check == 2:\n", - " # one conversation pair is complete\n", - " user_queries.append(query)\n", - " metadata.append(metadata_row)\n", - " pair_check = 0\n", - "\n", - " \n", - "print(\"user queries: \", len(user_queries))\n", - "print(\"metadata\", len(metadata))\n", - "print(\"------------------------\")\n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(590, 1536)\n" - ] - } - ], - "source": [ - "# initialize langchain OpenAI embeddings model\n", - "embeddings_model = OpenAIEmbeddings()\n", - "metadata = pd.DataFrame(metadata)\n", - "embeddings = embeddings_model.embed_documents(user_queries)\n", - "embeddings = np.array(embeddings)\n", - "print(embeddings.shape)" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-31 19:55:40.276 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for ECE408FA23` in organization `dabholkar.asmita`\n", - "2023-08-31 19:55:41.466 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", - "2023-08-31 19:55:41.491 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", - "1it [00:02, 2.43s/it]\n", - "2023-08-31 19:55:43.932 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", - "2023-08-31 19:55:43.932 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", - "2023-08-31 19:55:45.475 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for ECE408FA23` in project `Query-Response Map for ECE408FA23`: https://atlas.nomic.ai/map/d8e02f68-501e-4a25-9060-502710c082ce/b55cdca3-3ff4-454e-8f1b-a73ebc22cdff\n", - "2023-08-31 19:55:45.480 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for ECE408FA23: https://atlas.nomic.ai/map/d8e02f68-501e-4a25-9060-502710c082ce/b55cdca3-3ff4-454e-8f1b-a73ebc22cdff\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Query-Response Map for ECE408FA23: https://atlas.nomic.ai/map/d8e02f68-501e-4a25-9060-502710c082ce/b55cdca3-3ff4-454e-8f1b-a73ebc22cdff]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-31 19:55:47.813 | INFO | nomic.project:create_index:1111 - Created map `ECE408FA23_qr_index` in project `Query-Response Map for ECE408FA23`: https://atlas.nomic.ai/map/d8e02f68-501e-4a25-9060-502710c082ce/ff7276fc-942f-45cd-a199-e19a6e941db1\n" - ] - }, - { - "data": { - "text/html": [ - "Atlas Projection ECE408FA23_qr_index. Status Topic Modeling. view online" - ], - "text/plain": [ - "ECE408FA23_qr_index: https://atlas.nomic.ai/map/d8e02f68-501e-4a25-9060-502710c082ce/ff7276fc-942f-45cd-a199-e19a6e941db1" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# create an Atlas project\n", - "project_name = \"Query-Response Map for \" + course\n", - "index_name = course + \"_qr_index\"\n", - "project = atlas.map_embeddings(embeddings=np.array(embeddings),\n", - " data=metadata,\n", - " id_field='id',\n", - " build_topic_model=True,\n", - " topic_label_field='query',\n", - " name=project_name,\n", - " colorable_fields=['query'])\n", - "print(project.maps)\n", - "\n", - "project.create_index(index_name, build_topic_model=True)\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "ename": "ReadTimeout", - "evalue": "The read operation timed out", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mTimeoutError\u001b[0m Traceback (most recent call last)", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_exceptions.py:10\u001b[0m, in \u001b[0;36mmap_exceptions\u001b[1;34m(map)\u001b[0m\n\u001b[0;32m 9\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m---> 10\u001b[0m \u001b[39myield\u001b[39;00m\n\u001b[0;32m 11\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m \u001b[39mas\u001b[39;00m exc: \u001b[39m# noqa: PIE786\u001b[39;00m\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\backends\\sync.py:28\u001b[0m, in \u001b[0;36mSyncStream.read\u001b[1;34m(self, max_bytes, timeout)\u001b[0m\n\u001b[0;32m 27\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_sock\u001b[39m.\u001b[39msettimeout(timeout)\n\u001b[1;32m---> 28\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_sock\u001b[39m.\u001b[39;49mrecv(max_bytes)\n", - "File \u001b[1;32mC:\\Program Files\\WindowsApps\\PythonSoftwareFoundation.Python.3.10_3.10.3056.0_x64__qbz5n2kfra8p0\\lib\\ssl.py:1259\u001b[0m, in \u001b[0;36mSSLSocket.recv\u001b[1;34m(self, buflen, flags)\u001b[0m\n\u001b[0;32m 1256\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\n\u001b[0;32m 1257\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mnon-zero flags not allowed in calls to recv() on \u001b[39m\u001b[39m%s\u001b[39;00m\u001b[39m\"\u001b[39m \u001b[39m%\u001b[39m\n\u001b[0;32m 1258\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m\u001b[39m__class__\u001b[39m)\n\u001b[1;32m-> 1259\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mread(buflen)\n\u001b[0;32m 1260\u001b[0m \u001b[39melse\u001b[39;00m:\n", - "File \u001b[1;32mC:\\Program Files\\WindowsApps\\PythonSoftwareFoundation.Python.3.10_3.10.3056.0_x64__qbz5n2kfra8p0\\lib\\ssl.py:1132\u001b[0m, in \u001b[0;36mSSLSocket.read\u001b[1;34m(self, len, buffer)\u001b[0m\n\u001b[0;32m 1131\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m-> 1132\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_sslobj\u001b[39m.\u001b[39;49mread(\u001b[39mlen\u001b[39;49m)\n\u001b[0;32m 1133\u001b[0m \u001b[39mexcept\u001b[39;00m SSLError \u001b[39mas\u001b[39;00m x:\n", - "\u001b[1;31mTimeoutError\u001b[0m: The read operation timed out", - "\nDuring handling of the above exception, another exception occurred:\n", - "\u001b[1;31mReadTimeout\u001b[0m Traceback (most recent call last)", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_transports\\default.py:60\u001b[0m, in \u001b[0;36mmap_httpcore_exceptions\u001b[1;34m()\u001b[0m\n\u001b[0;32m 59\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m---> 60\u001b[0m \u001b[39myield\u001b[39;00m\n\u001b[0;32m 61\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m \u001b[39mas\u001b[39;00m exc: \u001b[39m# noqa: PIE-786\u001b[39;00m\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_transports\\default.py:218\u001b[0m, in \u001b[0;36mHTTPTransport.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 217\u001b[0m \u001b[39mwith\u001b[39;00m map_httpcore_exceptions():\n\u001b[1;32m--> 218\u001b[0m resp \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_pool\u001b[39m.\u001b[39;49mhandle_request(req)\n\u001b[0;32m 220\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39misinstance\u001b[39m(resp\u001b[39m.\u001b[39mstream, typing\u001b[39m.\u001b[39mIterable)\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_sync\\connection_pool.py:253\u001b[0m, in \u001b[0;36mConnectionPool.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 252\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mresponse_closed(status)\n\u001b[1;32m--> 253\u001b[0m \u001b[39mraise\u001b[39;00m exc\n\u001b[0;32m 254\u001b[0m \u001b[39melse\u001b[39;00m:\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_sync\\connection_pool.py:237\u001b[0m, in \u001b[0;36mConnectionPool.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 236\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m--> 237\u001b[0m response \u001b[39m=\u001b[39m connection\u001b[39m.\u001b[39;49mhandle_request(request)\n\u001b[0;32m 238\u001b[0m \u001b[39mexcept\u001b[39;00m ConnectionNotAvailable:\n\u001b[0;32m 239\u001b[0m \u001b[39m# The ConnectionNotAvailable exception is a special case, that\u001b[39;00m\n\u001b[0;32m 240\u001b[0m \u001b[39m# indicates we need to retry the request on a new connection.\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 244\u001b[0m \u001b[39m# might end up as an HTTP/2 connection, but which actually ends\u001b[39;00m\n\u001b[0;32m 245\u001b[0m \u001b[39m# up as HTTP/1.1.\u001b[39;00m\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_sync\\connection.py:90\u001b[0m, in \u001b[0;36mHTTPConnection.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 88\u001b[0m \u001b[39mraise\u001b[39;00m ConnectionNotAvailable()\n\u001b[1;32m---> 90\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_connection\u001b[39m.\u001b[39;49mhandle_request(request)\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_sync\\http11.py:112\u001b[0m, in \u001b[0;36mHTTP11Connection.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 111\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_response_closed()\n\u001b[1;32m--> 112\u001b[0m \u001b[39mraise\u001b[39;00m exc\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_sync\\http11.py:91\u001b[0m, in \u001b[0;36mHTTP11Connection.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 83\u001b[0m \u001b[39mwith\u001b[39;00m Trace(\n\u001b[0;32m 84\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mhttp11.receive_response_headers\u001b[39m\u001b[39m\"\u001b[39m, request, kwargs\n\u001b[0;32m 85\u001b[0m ) \u001b[39mas\u001b[39;00m trace:\n\u001b[0;32m 86\u001b[0m (\n\u001b[0;32m 87\u001b[0m http_version,\n\u001b[0;32m 88\u001b[0m status,\n\u001b[0;32m 89\u001b[0m reason_phrase,\n\u001b[0;32m 90\u001b[0m headers,\n\u001b[1;32m---> 91\u001b[0m ) \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_receive_response_headers(\u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n\u001b[0;32m 92\u001b[0m trace\u001b[39m.\u001b[39mreturn_value \u001b[39m=\u001b[39m (\n\u001b[0;32m 93\u001b[0m http_version,\n\u001b[0;32m 94\u001b[0m status,\n\u001b[0;32m 95\u001b[0m reason_phrase,\n\u001b[0;32m 96\u001b[0m headers,\n\u001b[0;32m 97\u001b[0m )\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_sync\\http11.py:155\u001b[0m, in \u001b[0;36mHTTP11Connection._receive_response_headers\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 154\u001b[0m \u001b[39mwhile\u001b[39;00m \u001b[39mTrue\u001b[39;00m:\n\u001b[1;32m--> 155\u001b[0m event \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_receive_event(timeout\u001b[39m=\u001b[39;49mtimeout)\n\u001b[0;32m 156\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(event, h11\u001b[39m.\u001b[39mResponse):\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_sync\\http11.py:191\u001b[0m, in \u001b[0;36mHTTP11Connection._receive_event\u001b[1;34m(self, timeout)\u001b[0m\n\u001b[0;32m 190\u001b[0m \u001b[39mif\u001b[39;00m event \u001b[39mis\u001b[39;00m h11\u001b[39m.\u001b[39mNEED_DATA:\n\u001b[1;32m--> 191\u001b[0m data \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_network_stream\u001b[39m.\u001b[39;49mread(\n\u001b[0;32m 192\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mREAD_NUM_BYTES, timeout\u001b[39m=\u001b[39;49mtimeout\n\u001b[0;32m 193\u001b[0m )\n\u001b[0;32m 195\u001b[0m \u001b[39m# If we feed this case through h11 we'll raise an exception like:\u001b[39;00m\n\u001b[0;32m 196\u001b[0m \u001b[39m#\u001b[39;00m\n\u001b[0;32m 197\u001b[0m \u001b[39m# httpcore.RemoteProtocolError: can't handle event type\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 201\u001b[0m \u001b[39m# perspective. Instead we handle this case distinctly and treat\u001b[39;00m\n\u001b[0;32m 202\u001b[0m \u001b[39m# it as a ConnectError.\u001b[39;00m\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\backends\\sync.py:26\u001b[0m, in \u001b[0;36mSyncStream.read\u001b[1;34m(self, max_bytes, timeout)\u001b[0m\n\u001b[0;32m 25\u001b[0m exc_map: ExceptionMapping \u001b[39m=\u001b[39m {socket\u001b[39m.\u001b[39mtimeout: ReadTimeout, \u001b[39mOSError\u001b[39;00m: ReadError}\n\u001b[1;32m---> 26\u001b[0m \u001b[39mwith\u001b[39;00m map_exceptions(exc_map):\n\u001b[0;32m 27\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_sock\u001b[39m.\u001b[39msettimeout(timeout)\n", - "File \u001b[1;32mC:\\Program Files\\WindowsApps\\PythonSoftwareFoundation.Python.3.10_3.10.3056.0_x64__qbz5n2kfra8p0\\lib\\contextlib.py:153\u001b[0m, in \u001b[0;36m_GeneratorContextManager.__exit__\u001b[1;34m(self, typ, value, traceback)\u001b[0m\n\u001b[0;32m 152\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m--> 153\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mgen\u001b[39m.\u001b[39;49mthrow(typ, value, traceback)\n\u001b[0;32m 154\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mStopIteration\u001b[39;00m \u001b[39mas\u001b[39;00m exc:\n\u001b[0;32m 155\u001b[0m \u001b[39m# Suppress StopIteration *unless* it's the same exception that\u001b[39;00m\n\u001b[0;32m 156\u001b[0m \u001b[39m# was passed to throw(). This prevents a StopIteration\u001b[39;00m\n\u001b[0;32m 157\u001b[0m \u001b[39m# raised inside the \"with\" statement from being suppressed.\u001b[39;00m\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpcore\\_exceptions.py:14\u001b[0m, in \u001b[0;36mmap_exceptions\u001b[1;34m(map)\u001b[0m\n\u001b[0;32m 13\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(exc, from_exc):\n\u001b[1;32m---> 14\u001b[0m \u001b[39mraise\u001b[39;00m to_exc(exc)\n\u001b[0;32m 15\u001b[0m \u001b[39mraise\u001b[39;00m\n", - "\u001b[1;31mReadTimeout\u001b[0m: The read operation timed out", - "\nThe above exception was the direct cause of the following exception:\n", - "\u001b[1;31mReadTimeout\u001b[0m Traceback (most recent call last)", - "\u001b[1;32mf:\\MSIM\\ML_Projects\\ai-ta-backend\\ai_ta_backend\\nomic_map_creation.ipynb Cell 19\u001b[0m line \u001b[0;36m3\n\u001b[0;32m 1\u001b[0m \u001b[39m# cell for all course map creation\u001b[39;00m\n\u001b[1;32m----> 3\u001b[0m response \u001b[39m=\u001b[39m supabase_client\u001b[39m.\u001b[39;49mtable(\u001b[39m\"\u001b[39;49m\u001b[39mllm-convo-monitor\u001b[39;49m\u001b[39m\"\u001b[39;49m)\u001b[39m.\u001b[39;49mselect(\u001b[39m\"\u001b[39;49m\u001b[39m*\u001b[39;49m\u001b[39m\"\u001b[39;49m)\u001b[39m.\u001b[39;49mexecute()\n\u001b[0;32m 4\u001b[0m data \u001b[39m=\u001b[39m response\u001b[39m.\u001b[39mdata\n\u001b[0;32m 5\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39mlen\u001b[39m(data))\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\postgrest\\_sync\\request_builder.py:55\u001b[0m, in \u001b[0;36mSyncQueryRequestBuilder.execute\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 43\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mexecute\u001b[39m(\u001b[39mself\u001b[39m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m APIResponse:\n\u001b[0;32m 44\u001b[0m \u001b[39m \u001b[39m\u001b[39m\"\"\"Execute the query.\u001b[39;00m\n\u001b[0;32m 45\u001b[0m \n\u001b[0;32m 46\u001b[0m \u001b[39m .. tip::\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 53\u001b[0m \u001b[39m :class:`APIError` If the API raised an error.\u001b[39;00m\n\u001b[0;32m 54\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[1;32m---> 55\u001b[0m r \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49msession\u001b[39m.\u001b[39;49mrequest(\n\u001b[0;32m 56\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mhttp_method,\n\u001b[0;32m 57\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mpath,\n\u001b[0;32m 58\u001b[0m json\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mjson,\n\u001b[0;32m 59\u001b[0m params\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mparams,\n\u001b[0;32m 60\u001b[0m headers\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mheaders,\n\u001b[0;32m 61\u001b[0m )\n\u001b[0;32m 62\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 63\u001b[0m \u001b[39mif\u001b[39;00m (\n\u001b[0;32m 64\u001b[0m \u001b[39m200\u001b[39m \u001b[39m<\u001b[39m\u001b[39m=\u001b[39m r\u001b[39m.\u001b[39mstatus_code \u001b[39m<\u001b[39m\u001b[39m=\u001b[39m \u001b[39m299\u001b[39m\n\u001b[0;32m 65\u001b[0m ): \u001b[39m# Response.ok from JS (https://developer.mozilla.org/en-US/docs/Web/API/Response/ok)\u001b[39;00m\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_client.py:821\u001b[0m, in \u001b[0;36mClient.request\u001b[1;34m(self, method, url, content, data, files, json, params, headers, cookies, auth, follow_redirects, timeout, extensions)\u001b[0m\n\u001b[0;32m 806\u001b[0m warnings\u001b[39m.\u001b[39mwarn(message, \u001b[39mDeprecationWarning\u001b[39;00m)\n\u001b[0;32m 808\u001b[0m request \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mbuild_request(\n\u001b[0;32m 809\u001b[0m method\u001b[39m=\u001b[39mmethod,\n\u001b[0;32m 810\u001b[0m url\u001b[39m=\u001b[39murl,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 819\u001b[0m extensions\u001b[39m=\u001b[39mextensions,\n\u001b[0;32m 820\u001b[0m )\n\u001b[1;32m--> 821\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49msend(request, auth\u001b[39m=\u001b[39;49mauth, follow_redirects\u001b[39m=\u001b[39;49mfollow_redirects)\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_client.py:908\u001b[0m, in \u001b[0;36mClient.send\u001b[1;34m(self, request, stream, auth, follow_redirects)\u001b[0m\n\u001b[0;32m 900\u001b[0m follow_redirects \u001b[39m=\u001b[39m (\n\u001b[0;32m 901\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mfollow_redirects\n\u001b[0;32m 902\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(follow_redirects, UseClientDefault)\n\u001b[0;32m 903\u001b[0m \u001b[39melse\u001b[39;00m follow_redirects\n\u001b[0;32m 904\u001b[0m )\n\u001b[0;32m 906\u001b[0m auth \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_build_request_auth(request, auth)\n\u001b[1;32m--> 908\u001b[0m response \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_send_handling_auth(\n\u001b[0;32m 909\u001b[0m request,\n\u001b[0;32m 910\u001b[0m auth\u001b[39m=\u001b[39;49mauth,\n\u001b[0;32m 911\u001b[0m follow_redirects\u001b[39m=\u001b[39;49mfollow_redirects,\n\u001b[0;32m 912\u001b[0m history\u001b[39m=\u001b[39;49m[],\n\u001b[0;32m 913\u001b[0m )\n\u001b[0;32m 914\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 915\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m stream:\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_client.py:936\u001b[0m, in \u001b[0;36mClient._send_handling_auth\u001b[1;34m(self, request, auth, follow_redirects, history)\u001b[0m\n\u001b[0;32m 933\u001b[0m request \u001b[39m=\u001b[39m \u001b[39mnext\u001b[39m(auth_flow)\n\u001b[0;32m 935\u001b[0m \u001b[39mwhile\u001b[39;00m \u001b[39mTrue\u001b[39;00m:\n\u001b[1;32m--> 936\u001b[0m response \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_send_handling_redirects(\n\u001b[0;32m 937\u001b[0m request,\n\u001b[0;32m 938\u001b[0m follow_redirects\u001b[39m=\u001b[39;49mfollow_redirects,\n\u001b[0;32m 939\u001b[0m history\u001b[39m=\u001b[39;49mhistory,\n\u001b[0;32m 940\u001b[0m )\n\u001b[0;32m 941\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 942\u001b[0m \u001b[39mtry\u001b[39;00m:\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_client.py:973\u001b[0m, in \u001b[0;36mClient._send_handling_redirects\u001b[1;34m(self, request, follow_redirects, history)\u001b[0m\n\u001b[0;32m 970\u001b[0m \u001b[39mfor\u001b[39;00m hook \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_event_hooks[\u001b[39m\"\u001b[39m\u001b[39mrequest\u001b[39m\u001b[39m\"\u001b[39m]:\n\u001b[0;32m 971\u001b[0m hook(request)\n\u001b[1;32m--> 973\u001b[0m response \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_send_single_request(request)\n\u001b[0;32m 974\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 975\u001b[0m \u001b[39mfor\u001b[39;00m hook \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_event_hooks[\u001b[39m\"\u001b[39m\u001b[39mresponse\u001b[39m\u001b[39m\"\u001b[39m]:\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_client.py:1009\u001b[0m, in \u001b[0;36mClient._send_single_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 1004\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mRuntimeError\u001b[39;00m(\n\u001b[0;32m 1005\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mAttempted to send an async request with a sync Client instance.\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m 1006\u001b[0m )\n\u001b[0;32m 1008\u001b[0m \u001b[39mwith\u001b[39;00m request_context(request\u001b[39m=\u001b[39mrequest):\n\u001b[1;32m-> 1009\u001b[0m response \u001b[39m=\u001b[39m transport\u001b[39m.\u001b[39;49mhandle_request(request)\n\u001b[0;32m 1011\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39misinstance\u001b[39m(response\u001b[39m.\u001b[39mstream, SyncByteStream)\n\u001b[0;32m 1013\u001b[0m response\u001b[39m.\u001b[39mrequest \u001b[39m=\u001b[39m request\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_transports\\default.py:217\u001b[0m, in \u001b[0;36mHTTPTransport.handle_request\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 203\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39misinstance\u001b[39m(request\u001b[39m.\u001b[39mstream, SyncByteStream)\n\u001b[0;32m 205\u001b[0m req \u001b[39m=\u001b[39m httpcore\u001b[39m.\u001b[39mRequest(\n\u001b[0;32m 206\u001b[0m method\u001b[39m=\u001b[39mrequest\u001b[39m.\u001b[39mmethod,\n\u001b[0;32m 207\u001b[0m url\u001b[39m=\u001b[39mhttpcore\u001b[39m.\u001b[39mURL(\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 215\u001b[0m extensions\u001b[39m=\u001b[39mrequest\u001b[39m.\u001b[39mextensions,\n\u001b[0;32m 216\u001b[0m )\n\u001b[1;32m--> 217\u001b[0m \u001b[39mwith\u001b[39;00m map_httpcore_exceptions():\n\u001b[0;32m 218\u001b[0m resp \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_pool\u001b[39m.\u001b[39mhandle_request(req)\n\u001b[0;32m 220\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39misinstance\u001b[39m(resp\u001b[39m.\u001b[39mstream, typing\u001b[39m.\u001b[39mIterable)\n", - "File \u001b[1;32mC:\\Program Files\\WindowsApps\\PythonSoftwareFoundation.Python.3.10_3.10.3056.0_x64__qbz5n2kfra8p0\\lib\\contextlib.py:153\u001b[0m, in \u001b[0;36m_GeneratorContextManager.__exit__\u001b[1;34m(self, typ, value, traceback)\u001b[0m\n\u001b[0;32m 151\u001b[0m value \u001b[39m=\u001b[39m typ()\n\u001b[0;32m 152\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m--> 153\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mgen\u001b[39m.\u001b[39;49mthrow(typ, value, traceback)\n\u001b[0;32m 154\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mStopIteration\u001b[39;00m \u001b[39mas\u001b[39;00m exc:\n\u001b[0;32m 155\u001b[0m \u001b[39m# Suppress StopIteration *unless* it's the same exception that\u001b[39;00m\n\u001b[0;32m 156\u001b[0m \u001b[39m# was passed to throw(). This prevents a StopIteration\u001b[39;00m\n\u001b[0;32m 157\u001b[0m \u001b[39m# raised inside the \"with\" statement from being suppressed.\u001b[39;00m\n\u001b[0;32m 158\u001b[0m \u001b[39mreturn\u001b[39;00m exc \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m value\n", - "File \u001b[1;32m~\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\httpx\\_transports\\default.py:77\u001b[0m, in \u001b[0;36mmap_httpcore_exceptions\u001b[1;34m()\u001b[0m\n\u001b[0;32m 74\u001b[0m \u001b[39mraise\u001b[39;00m\n\u001b[0;32m 76\u001b[0m message \u001b[39m=\u001b[39m \u001b[39mstr\u001b[39m(exc)\n\u001b[1;32m---> 77\u001b[0m \u001b[39mraise\u001b[39;00m mapped_exc(message) \u001b[39mfrom\u001b[39;00m \u001b[39mexc\u001b[39;00m\n", - "\u001b[1;31mReadTimeout\u001b[0m: The read operation timed out" - ] - } - ], - "source": [ - "# cell for all course map creation\n", - "\n", - "response = supabase_client.table(\"llm-convo-monitor\").select(\"*\").execute()\n", - "data = response.data\n", - "print(len(data))" - ] - }, - { - "cell_type": "code", - "execution_count": 53, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "126" - ] - }, - "execution_count": 53, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df = pd.DataFrame(data)\n", - "course_names = df['course_name'].unique()\n", - "len(course_names)" - ] - }, - { - "cell_type": "code", - "execution_count": 54, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "course name: badm_550_ashley\n", - "(51, 1536)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:26:29.701 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for badm_550_ashley` in organization `dabholkar.asmita`\n", - "2023-08-30 15:26:31.242 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", - "2023-08-30 15:26:31.255 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", - "1it [00:00, 1.03it/s]\n", - "2023-08-30 15:26:32.239 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", - "2023-08-30 15:26:32.241 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", - "2023-08-30 15:26:33.498 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for badm_550_ashley` in project `Query-Response Map for badm_550_ashley`: https://atlas.nomic.ai/map/e7fe1faf-be7a-4ca7-b89b-22863fffe432/79d4480a-c4e5-4920-95cd-ed831d45f5d0\n", - "2023-08-30 15:26:33.500 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for badm_550_ashley: https://atlas.nomic.ai/map/e7fe1faf-be7a-4ca7-b89b-22863fffe432/79d4480a-c4e5-4920-95cd-ed831d45f5d0\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Query-Response Map for badm_550_ashley: https://atlas.nomic.ai/map/e7fe1faf-be7a-4ca7-b89b-22863fffe432/79d4480a-c4e5-4920-95cd-ed831d45f5d0]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:26:35.272 | INFO | nomic.project:create_index:1111 - Created map `badm_550_ashley_qr_index` in project `Query-Response Map for badm_550_ashley`: https://atlas.nomic.ai/map/e7fe1faf-be7a-4ca7-b89b-22863fffe432/2b997f7c-0084-4db7-8e9a-76eeb62d715b\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "course name: ece120\n", - "(298, 1536)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:26:42.765 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for ece120` in organization `dabholkar.asmita`\n", - "2023-08-30 15:26:43.831 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", - "2023-08-30 15:26:43.850 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", - "1it [00:01, 1.20s/it]\n", - "2023-08-30 15:26:45.059 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", - "2023-08-30 15:26:45.063 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", - "2023-08-30 15:26:46.221 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for ece120` in project `Query-Response Map for ece120`: https://atlas.nomic.ai/map/f35d2617-d029-4a29-ba4a-27a82088be24/0c8e9404-be65-4780-bba3-e3b44f14de9b\n", - "2023-08-30 15:26:46.230 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for ece120: https://atlas.nomic.ai/map/f35d2617-d029-4a29-ba4a-27a82088be24/0c8e9404-be65-4780-bba3-e3b44f14de9b\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Query-Response Map for ece120: https://atlas.nomic.ai/map/f35d2617-d029-4a29-ba4a-27a82088be24/0c8e9404-be65-4780-bba3-e3b44f14de9b]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:26:48.040 | INFO | nomic.project:create_index:1111 - Created map `ece120_qr_index` in project `Query-Response Map for ece120`: https://atlas.nomic.ai/map/f35d2617-d029-4a29-ba4a-27a82088be24/61277149-b12d-4b59-8bcd-e9dd29fc58a4\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "course name: badm-567-v3\n", - "(27, 1536)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:26:52.367 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for badm-567-v3` in organization `dabholkar.asmita`\n", - "2023-08-30 15:26:53.227 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", - "2023-08-30 15:26:53.236 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", - "1it [00:00, 1.07it/s]\n", - "2023-08-30 15:26:54.177 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", - "2023-08-30 15:26:54.185 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", - "2023-08-30 15:26:55.379 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for badm-567-v3` in project `Query-Response Map for badm-567-v3`: https://atlas.nomic.ai/map/022b4521-24df-401d-beff-bba4f881590f/adabe0a1-d147-462a-8992-72af89cb5ae2\n", - "2023-08-30 15:26:55.379 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for badm-567-v3: https://atlas.nomic.ai/map/022b4521-24df-401d-beff-bba4f881590f/adabe0a1-d147-462a-8992-72af89cb5ae2\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Query-Response Map for badm-567-v3: https://atlas.nomic.ai/map/022b4521-24df-401d-beff-bba4f881590f/adabe0a1-d147-462a-8992-72af89cb5ae2]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:26:57.012 | INFO | nomic.project:create_index:1111 - Created map `badm-567-v3_qr_index` in project `Query-Response Map for badm-567-v3`: https://atlas.nomic.ai/map/022b4521-24df-401d-beff-bba4f881590f/e6c9798b-c154-43e7-917e-dd5cb71f116f\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "course name: new-weather\n", - "(98, 1536)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:27:02.087 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for new-weather` in organization `dabholkar.asmita`\n", - "2023-08-30 15:27:03.117 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", - "2023-08-30 15:27:03.125 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", - "1it [00:00, 1.07it/s]\n", - "2023-08-30 15:27:04.071 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", - "2023-08-30 15:27:04.071 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", - "2023-08-30 15:27:05.459 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for new-weather` in project `Query-Response Map for new-weather`: https://atlas.nomic.ai/map/48db10b9-0a6e-4c43-83f7-f7fa2b406257/fd200444-86cb-4b1c-9be6-ebd0262e2428\n", - "2023-08-30 15:27:05.461 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for new-weather: https://atlas.nomic.ai/map/48db10b9-0a6e-4c43-83f7-f7fa2b406257/fd200444-86cb-4b1c-9be6-ebd0262e2428\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Query-Response Map for new-weather: https://atlas.nomic.ai/map/48db10b9-0a6e-4c43-83f7-f7fa2b406257/fd200444-86cb-4b1c-9be6-ebd0262e2428]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:27:07.001 | INFO | nomic.project:create_index:1111 - Created map `new-weather_qr_index` in project `Query-Response Map for new-weather`: https://atlas.nomic.ai/map/48db10b9-0a6e-4c43-83f7-f7fa2b406257/ff717c79-50cd-468b-9fcc-b391c8c167df\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "course name: gies-online-mba-v2\n", - "(52, 1536)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:27:10.946 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for gies-online-mba-v2` in organization `dabholkar.asmita`\n", - "2023-08-30 15:27:11.862 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", - "2023-08-30 15:27:11.868 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", - "1it [00:00, 1.33it/s]\n", - "2023-08-30 15:27:12.630 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", - "2023-08-30 15:27:12.634 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", - "2023-08-30 15:27:13.627 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for gies-online-mba-v2` in project `Query-Response Map for gies-online-mba-v2`: https://atlas.nomic.ai/map/8699b44f-8e2a-40e6-9562-0d2fc2898c3d/87144fbd-48a3-4d00-9136-ddc177685aa1\n", - "2023-08-30 15:27:13.627 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for gies-online-mba-v2: https://atlas.nomic.ai/map/8699b44f-8e2a-40e6-9562-0d2fc2898c3d/87144fbd-48a3-4d00-9136-ddc177685aa1\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Query-Response Map for gies-online-mba-v2: https://atlas.nomic.ai/map/8699b44f-8e2a-40e6-9562-0d2fc2898c3d/87144fbd-48a3-4d00-9136-ddc177685aa1]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:27:15.546 | INFO | nomic.project:create_index:1111 - Created map `gies-online-mba-v2_qr_index` in project `Query-Response Map for gies-online-mba-v2`: https://atlas.nomic.ai/map/8699b44f-8e2a-40e6-9562-0d2fc2898c3d/db13ea7a-f93d-4f97-b922-c51216d3d6e9\n", - "2023-08-30 15:27:15,670:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:27:15,673:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", - "2023-08-30 15:27:20,003:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:27:20,003:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "course name: frontend\n", - "(24, 1536)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:27:28.373 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for frontend` in organization `dabholkar.asmita`\n", - "2023-08-30 15:27:29.396 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", - "2023-08-30 15:27:29.405 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", - "1it [00:00, 1.11it/s]\n", - "2023-08-30 15:27:30.325 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", - "2023-08-30 15:27:30.325 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", - "2023-08-30 15:27:31.539 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for frontend` in project `Query-Response Map for frontend`: https://atlas.nomic.ai/map/3a8ff45a-ca2d-4750-ab1b-ea033d35580d/e427ecf0-339a-41bf-8f9a-dc7a2930ac70\n", - "2023-08-30 15:27:31.542 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for frontend: https://atlas.nomic.ai/map/3a8ff45a-ca2d-4750-ab1b-ea033d35580d/e427ecf0-339a-41bf-8f9a-dc7a2930ac70\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Query-Response Map for frontend: https://atlas.nomic.ai/map/3a8ff45a-ca2d-4750-ab1b-ea033d35580d/e427ecf0-339a-41bf-8f9a-dc7a2930ac70]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:27:33.028 | INFO | nomic.project:create_index:1111 - Created map `frontend_qr_index` in project `Query-Response Map for frontend`: https://atlas.nomic.ai/map/3a8ff45a-ca2d-4750-ab1b-ea033d35580d/006903b0-bb82-4432-9975-ff7c9ca80af9\n", - "2023-08-30 15:27:33,166:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:27:33,166:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", - "2023-08-30 15:27:37,279:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:27:37,281:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", - "2023-08-30 15:27:41,477:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:27:41,481:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "course name: ECE220FA23\n", - "(193, 1536)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:27:50.988 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for ECE220FA23` in organization `dabholkar.asmita`\n", - "2023-08-30 15:27:51.867 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", - "2023-08-30 15:27:51.878 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", - "1it [00:01, 1.01s/it]\n", - "2023-08-30 15:27:52.904 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", - "2023-08-30 15:27:52.908 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", - "2023-08-30 15:27:53.929 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for ECE220FA23` in project `Query-Response Map for ECE220FA23`: https://atlas.nomic.ai/map/29b6f987-ee46-40be-808e-3d79a56d1b04/6b9b60f6-e2b7-4863-911c-fe84a63b35a1\n", - "2023-08-30 15:27:53.929 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for ECE220FA23: https://atlas.nomic.ai/map/29b6f987-ee46-40be-808e-3d79a56d1b04/6b9b60f6-e2b7-4863-911c-fe84a63b35a1\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Query-Response Map for ECE220FA23: https://atlas.nomic.ai/map/29b6f987-ee46-40be-808e-3d79a56d1b04/6b9b60f6-e2b7-4863-911c-fe84a63b35a1]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:27:55.645 | INFO | nomic.project:create_index:1111 - Created map `ECE220FA23_qr_index` in project `Query-Response Map for ECE220FA23`: https://atlas.nomic.ai/map/29b6f987-ee46-40be-808e-3d79a56d1b04/d8163c57-a2e8-41ca-90fc-043c8a9469b3\n", - "2023-08-30 15:27:55,758:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:27:55,759:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", - "2023-08-30 15:27:59,841:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:27:59,841:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "course name: Snowmass\n", - "(23, 1536)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:28:08.067 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for Snowmass` in organization `dabholkar.asmita`\n", - "2023-08-30 15:28:09.006 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", - "2023-08-30 15:28:09.014 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", - "1it [00:00, 1.20it/s]\n", - "2023-08-30 15:28:09.854 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", - "2023-08-30 15:28:09.858 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", - "2023-08-30 15:28:10.994 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for Snowmass` in project `Query-Response Map for Snowmass`: https://atlas.nomic.ai/map/aa415361-5d23-44ba-ada2-252ed1dfe9a0/51de6990-9d60-4cc4-9a8e-46bc7de97a0e\n", - "2023-08-30 15:28:10.994 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for Snowmass: https://atlas.nomic.ai/map/aa415361-5d23-44ba-ada2-252ed1dfe9a0/51de6990-9d60-4cc4-9a8e-46bc7de97a0e\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Query-Response Map for Snowmass: https://atlas.nomic.ai/map/aa415361-5d23-44ba-ada2-252ed1dfe9a0/51de6990-9d60-4cc4-9a8e-46bc7de97a0e]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:28:12.968 | INFO | nomic.project:create_index:1111 - Created map `Snowmass_qr_index` in project `Query-Response Map for Snowmass`: https://atlas.nomic.ai/map/aa415361-5d23-44ba-ada2-252ed1dfe9a0/4fdea35b-cda2-434e-afd1-e46e01430a97\n", - "2023-08-30 15:28:13,066:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:28:13,068:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", - "2023-08-30 15:28:17,200:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:28:17,200:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", - "2023-08-30 15:28:21,297:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:28:21,297:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "course name: NPRE247\n", - "(54, 1536)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:28:29.951 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for NPRE247` in organization `dabholkar.asmita`\n", - "2023-08-30 15:28:31.043 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", - "2023-08-30 15:28:31.051 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", - "1it [00:01, 1.64s/it]\n", - "2023-08-30 15:28:32.709 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", - "2023-08-30 15:28:32.714 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", - "2023-08-30 15:28:33.787 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for NPRE247` in project `Query-Response Map for NPRE247`: https://atlas.nomic.ai/map/3ae13050-5e00-4f2b-9dd4-f05e98945405/d06b7c47-850c-48f4-a5f9-984fbb0ca424\n", - "2023-08-30 15:28:33.790 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for NPRE247: https://atlas.nomic.ai/map/3ae13050-5e00-4f2b-9dd4-f05e98945405/d06b7c47-850c-48f4-a5f9-984fbb0ca424\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Query-Response Map for NPRE247: https://atlas.nomic.ai/map/3ae13050-5e00-4f2b-9dd4-f05e98945405/d06b7c47-850c-48f4-a5f9-984fbb0ca424]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:28:35.347 | INFO | nomic.project:create_index:1111 - Created map `NPRE247_qr_index` in project `Query-Response Map for NPRE247`: https://atlas.nomic.ai/map/3ae13050-5e00-4f2b-9dd4-f05e98945405/03471048-46aa-473c-b599-0bc812c679c0\n", - "2023-08-30 15:28:35,479:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:28:35,484:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", - "2023-08-30 15:28:39,590:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:28:39,594:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "course name: your-awesome-course\n", - "(30, 1536)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:28:50.102 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for your-awesome-course` in organization `dabholkar.asmita`\n", - "2023-08-30 15:28:51.013 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", - "2023-08-30 15:28:51.022 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", - "1it [00:00, 1.10it/s]\n", - "2023-08-30 15:28:51.943 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", - "2023-08-30 15:28:51.945 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", - "2023-08-30 15:28:52.904 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for your-awesome-course` in project `Query-Response Map for your-awesome-course`: https://atlas.nomic.ai/map/dd5dfe40-c810-4402-9684-a43783ea6e52/468942cc-d46a-45a2-82a6-11fac2f2be78\n", - "2023-08-30 15:28:52.907 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for your-awesome-course: https://atlas.nomic.ai/map/dd5dfe40-c810-4402-9684-a43783ea6e52/468942cc-d46a-45a2-82a6-11fac2f2be78\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Query-Response Map for your-awesome-course: https://atlas.nomic.ai/map/dd5dfe40-c810-4402-9684-a43783ea6e52/468942cc-d46a-45a2-82a6-11fac2f2be78]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:28:54.405 | INFO | nomic.project:create_index:1111 - Created map `your-awesome-course_qr_index` in project `Query-Response Map for your-awesome-course`: https://atlas.nomic.ai/map/dd5dfe40-c810-4402-9684-a43783ea6e52/e58f20e3-fa19-4c4c-8764-a185e0691c85\n", - "2023-08-30 15:28:54,549:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:28:54,549:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", - "2023-08-30 15:28:58,646:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:28:58,653:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "course name: pract\n", - "(44, 1536)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:29:07.007 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for pract` in organization `dabholkar.asmita`\n", - "2023-08-30 15:29:08.243 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", - "2023-08-30 15:29:08.251 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", - "1it [00:01, 1.11s/it]\n", - "2023-08-30 15:29:09.368 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", - "2023-08-30 15:29:09.368 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", - "2023-08-30 15:29:10.392 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for pract` in project `Query-Response Map for pract`: https://atlas.nomic.ai/map/15b51f99-fe4b-41ec-97fd-5579884e39fb/669c8862-db60-41ad-94a9-fd815e1fdcd8\n", - "2023-08-30 15:29:10.392 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for pract: https://atlas.nomic.ai/map/15b51f99-fe4b-41ec-97fd-5579884e39fb/669c8862-db60-41ad-94a9-fd815e1fdcd8\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Query-Response Map for pract: https://atlas.nomic.ai/map/15b51f99-fe4b-41ec-97fd-5579884e39fb/669c8862-db60-41ad-94a9-fd815e1fdcd8]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:29:12.031 | INFO | nomic.project:create_index:1111 - Created map `pract_qr_index` in project `Query-Response Map for pract`: https://atlas.nomic.ai/map/15b51f99-fe4b-41ec-97fd-5579884e39fb/b15570eb-4db4-4b6f-9b4d-e80309d2dcb3\n", - "2023-08-30 15:29:12,113:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:29:12,115:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", - "2023-08-30 15:29:16,201:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:29:16,209:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", - "2023-08-30 15:29:20,282:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:29:20,285:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "course name: ece120FL22\n", - "(53, 1536)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:29:28.994 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for ece120FL22` in organization `dabholkar.asmita`\n", - "2023-08-30 15:29:29.838 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", - "2023-08-30 15:29:29.846 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", - "1it [00:00, 1.18it/s]\n", - "2023-08-30 15:29:30.708 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", - "2023-08-30 15:29:30.710 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", - "2023-08-30 15:29:31.828 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for ece120FL22` in project `Query-Response Map for ece120FL22`: https://atlas.nomic.ai/map/8e74c555-3192-451b-b4a1-2c86587df88b/888e5d82-2e4d-46ba-b920-915dc31b835b\n", - "2023-08-30 15:29:31.828 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for ece120FL22: https://atlas.nomic.ai/map/8e74c555-3192-451b-b4a1-2c86587df88b/888e5d82-2e4d-46ba-b920-915dc31b835b\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Query-Response Map for ece120FL22: https://atlas.nomic.ai/map/8e74c555-3192-451b-b4a1-2c86587df88b/888e5d82-2e4d-46ba-b920-915dc31b835b]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:29:33.354 | INFO | nomic.project:create_index:1111 - Created map `ece120FL22_qr_index` in project `Query-Response Map for ece120FL22`: https://atlas.nomic.ai/map/8e74c555-3192-451b-b4a1-2c86587df88b/eb4e553d-ecd3-4b11-9d75-468108ab08e2\n", - "2023-08-30 15:29:33,458:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:29:33,458:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", - "2023-08-30 15:29:37,544:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:29:37,545:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", - "2023-08-30 15:29:41,634:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:29:41,642:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "course name: Law794-TransactionalDraftingAlam\n", - "(21, 1536)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:29:49.618 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for Law794-TransactionalDraftingAlam` in organization `dabholkar.asmita`\n", - "2023-08-30 15:29:50.718 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", - "2023-08-30 15:29:50.731 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", - "1it [00:01, 1.11s/it]\n", - "2023-08-30 15:29:51.849 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", - "2023-08-30 15:29:51.851 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", - "2023-08-30 15:29:53.034 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for Law794-TransactionalDraftingAlam` in project `Query-Response Map for Law794-TransactionalDraftingAlam`: https://atlas.nomic.ai/map/c4328a33-8958-462e-ba30-50f342736dae/0da00b97-bf94-4805-935e-458fb5c3d76f\n", - "2023-08-30 15:29:53.034 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for Law794-TransactionalDraftingAlam: https://atlas.nomic.ai/map/c4328a33-8958-462e-ba30-50f342736dae/0da00b97-bf94-4805-935e-458fb5c3d76f\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Query-Response Map for Law794-TransactionalDraftingAlam: https://atlas.nomic.ai/map/c4328a33-8958-462e-ba30-50f342736dae/0da00b97-bf94-4805-935e-458fb5c3d76f]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:29:54.605 | INFO | nomic.project:create_index:1111 - Created map `Law794-TransactionalDraftingAlam_qr_index` in project `Query-Response Map for Law794-TransactionalDraftingAlam`: https://atlas.nomic.ai/map/c4328a33-8958-462e-ba30-50f342736dae/9792fd4e-2196-4e39-bded-cc2bfd42abbf\n", - "2023-08-30 15:29:54,728:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:29:54,731:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", - "2023-08-30 15:29:58,804:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:29:58,804:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "course name: NCSA\n", - "(84, 1536)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:30:07.528 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for NCSA` in organization `dabholkar.asmita`\n", - "2023-08-30 15:30:08.422 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", - "2023-08-30 15:30:08.431 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", - "1it [00:00, 1.09it/s]\n", - "2023-08-30 15:30:09.361 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", - "2023-08-30 15:30:09.361 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", - "2023-08-30 15:30:10.325 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for NCSA` in project `Query-Response Map for NCSA`: https://atlas.nomic.ai/map/237ff7ed-c987-4259-96e0-3c5ed6e21207/8f176512-6a1e-4c8f-9d94-5794b1d7e619\n", - "2023-08-30 15:30:10.325 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for NCSA: https://atlas.nomic.ai/map/237ff7ed-c987-4259-96e0-3c5ed6e21207/8f176512-6a1e-4c8f-9d94-5794b1d7e619\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Query-Response Map for NCSA: https://atlas.nomic.ai/map/237ff7ed-c987-4259-96e0-3c5ed6e21207/8f176512-6a1e-4c8f-9d94-5794b1d7e619]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:30:11.917 | INFO | nomic.project:create_index:1111 - Created map `NCSA_qr_index` in project `Query-Response Map for NCSA`: https://atlas.nomic.ai/map/237ff7ed-c987-4259-96e0-3c5ed6e21207/2ce836ec-557b-4037-9ebd-d3e8982c0926\n", - "2023-08-30 15:30:12,004:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:30:12,004:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", - "2023-08-30 15:30:16,092:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:30:16,092:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", - "2023-08-30 15:30:20,157:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:30:20,164:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "course name: NCSADelta\n", - "(22, 1536)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:30:28.362 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for NCSADelta` in organization `dabholkar.asmita`\n", - "2023-08-30 15:30:29.318 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", - "2023-08-30 15:30:29.326 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", - "1it [00:00, 1.10it/s]\n", - "2023-08-30 15:30:30.246 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", - "2023-08-30 15:30:30.251 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", - "2023-08-30 15:30:31.253 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for NCSADelta` in project `Query-Response Map for NCSADelta`: https://atlas.nomic.ai/map/f0c381d5-e2e9-41fb-807a-ae61bc818122/78d29eb7-be96-47c1-a856-b684c6419e34\n", - "2023-08-30 15:30:31.254 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for NCSADelta: https://atlas.nomic.ai/map/f0c381d5-e2e9-41fb-807a-ae61bc818122/78d29eb7-be96-47c1-a856-b684c6419e34\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Query-Response Map for NCSADelta: https://atlas.nomic.ai/map/f0c381d5-e2e9-41fb-807a-ae61bc818122/78d29eb7-be96-47c1-a856-b684c6419e34]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:30:33.104 | INFO | nomic.project:create_index:1111 - Created map `NCSADelta_qr_index` in project `Query-Response Map for NCSADelta`: https://atlas.nomic.ai/map/f0c381d5-e2e9-41fb-807a-ae61bc818122/b6e64fef-a829-435f-89b5-ed1a44c05202\n", - "2023-08-30 15:30:33,214:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:30:33,214:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", - "2023-08-30 15:30:37,289:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:30:37,290:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", - "2023-08-30 15:30:41,376:INFO - error_code=rate_limit_exceeded error_message='Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False\n", - "2023-08-30 15:30:41,382:WARNING - Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-4dafnkfdjctuHfuAPPkPsN46 on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "course name: NuclGPT-v1\n", - "(25, 1536)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:30:49.297 | INFO | nomic.project:_create_project:779 - Creating project `Query-Response Map for NuclGPT-v1` in organization `dabholkar.asmita`\n", - "2023-08-30 15:30:50.216 | INFO | nomic.atlas:map_embeddings:107 - Uploading embeddings to Atlas.\n", - "2023-08-30 15:30:50.222 | WARNING | nomic.project:_validate_and_correct_arrow_upload:261 - id_field is not a string. Converting to string from int32\n", - "1it [00:00, 1.23it/s]\n", - "2023-08-30 15:30:51.043 | INFO | nomic.project:_add_data:1401 - Upload succeeded.\n", - "2023-08-30 15:30:51.043 | INFO | nomic.atlas:map_embeddings:126 - Embedding upload succeeded.\n", - "2023-08-30 15:30:52.360 | INFO | nomic.project:create_index:1111 - Created map `Query-Response Map for NuclGPT-v1` in project `Query-Response Map for NuclGPT-v1`: https://atlas.nomic.ai/map/76043322-073c-4d8c-8c90-8e563da8564f/335097b0-a80d-4f38-8559-c6075a46a8b2\n", - "2023-08-30 15:30:52.360 | INFO | nomic.atlas:map_embeddings:139 - Query-Response Map for NuclGPT-v1: https://atlas.nomic.ai/map/76043322-073c-4d8c-8c90-8e563da8564f/335097b0-a80d-4f38-8559-c6075a46a8b2\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Query-Response Map for NuclGPT-v1: https://atlas.nomic.ai/map/76043322-073c-4d8c-8c90-8e563da8564f/335097b0-a80d-4f38-8559-c6075a46a8b2]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-08-30 15:30:53.900 | INFO | nomic.project:create_index:1111 - Created map `NuclGPT-v1_qr_index` in project `Query-Response Map for NuclGPT-v1`: https://atlas.nomic.ai/map/76043322-073c-4d8c-8c90-8e563da8564f/441b7ff6-00ef-47f3-98a9-e45d327a1414\n" - ] - } - ], - "source": [ - "low_volume_courses = []\n", - "high_volume_courses = []\n", - "for course in course_names:\n", - " if course is None or course == 'ece408':\n", - " continue\n", - " \n", - " user_queries = []\n", - " metadata = []\n", - " i = 1\n", - " course_df = df[df['course_name'] == course]['convo']\n", - " for convo in course_df: # iterate through all conversations in a course\n", - " messages = convo['messages']\n", - "\n", - " # form query-response pairs out of the messages\n", - " pair_check = 0\n", - " for message in messages:\n", - " if message['role'] == 'user' and pair_check == 0:\n", - " query = message['content']\n", - " metadata_row = {'course': course, 'query': message['content']}\n", - " #print(\"metadata row: \", metadata_row)\n", - " pair_check += 1\n", - " \n", - " if message['role'] == 'assistant' and pair_check == 1:\n", - " metadata_row['response'] = message['content']\n", - " metadata_row['id'] = i \n", - " #print(\"response metadata row: \", metadata_row)\n", - " i += 1\n", - " pair_check += 1\n", - "\n", - " if pair_check == 2:\n", - " # one conversation pair is complete\n", - " user_queries.append(query)\n", - " metadata.append(metadata_row)\n", - " pair_check = 0\n", - " # after iterating every conversation in a course, create the map\n", - " if len(user_queries) < 20:\n", - " low_volume_courses.append(course)\n", - " continue\n", - "\n", - " if len(user_queries) > 500:\n", - " high_volume_courses.append(course)\n", - " continue\n", - " \n", - " metadata = pd.DataFrame(metadata)\n", - " embeddings = embeddings_model.embed_documents(user_queries)\n", - " embeddings = np.array(embeddings)\n", - " print(\"course name: \", course)\n", - " print(embeddings.shape)\n", - "\n", - " # create an Atlas project\n", - " project_name = \"Query-Response Map for \" + course\n", - " index_name = course + \"_qr_index\"\n", - " project = atlas.map_embeddings(embeddings=np.array(embeddings),\n", - " data=metadata,\n", - " id_field='id',\n", - " build_topic_model=True,\n", - " topic_label_field='query',\n", - " name=project_name,\n", - " colorable_fields=['query'])\n", - " print(project.maps)\n", - "\n", - " project.create_index(index_name, build_topic_model=True)\n", - "\n", - " \n" - ] - }, - { - "cell_type": "code", - "execution_count": 55, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "low volume courses: ['test-video-ingest', 'badm-567', 'test-video-ingest-28', 'pdeploy999', 'badm-350-summer', 'previewtesting1', 'localtest2', 'your-favorite-url', 'mantine', 'test-video-ingest-2', 'badm_567_v2', 'erpnext', 'mip', 'farmdoc_test_kastan-v1', 'personalstatement', 'hrc', 'csv', 'star_nox', 'badm_567', 'SPaRCEd', 'localdemo8', 'badm_567_thumbnails', 'chatbot', 'erp', 'extreme', 'rohan_atree', 'zotero-extreme', 'test-video-ingest-20', 'gies-online-mba2', 'gies-online-mba', 'careerassistant', 'weather', 'lillian-wang-blog', 'local-test5', 'demo-for-vyriad', 'ag-gpt-beta', 'rtest', 'previewdeploy', 'r2test', 'personal-statement', 'rohan_excel', 'langchain-python', 'langchain', 'ncsa-live-demo', 'rohan_atree_individual', 'meta11-test', 'HealthyLivingGuide', 'rohan', 'babreu', 'test-video-ingest-31', 'p', 'test-video-ingest-17', 'summary', 'test-video-ingest-3', 'test-video-ingest-27', 'lillian-wang-blog-2', 'python-magic', 'ansible2', 'ece408fa23', 'farmdoc_test_josh_v2', 'local-test3', 'automata', 'SpaceFlorida-GT', 'GBSI-GT', 'newnew_ncsa', 'canvas', 'gbsi-gt', 'meditation-tutorial', 'profit', 'ansible', 'langchain-docs', 'testing_url_metadata_josh', 'test-india-biodiversity', 'vyriad', 'irc-workplans', 'kastanasdfasdfasdf', 'BADM-567-GT', 'mdt', 'vercel', 'gies-graduate-hub', 'test-video-ingest-12', 'test-video-ingest-13', 'Gies-graduate-hub', 'test_new_supa_scrape', 'doe-ascr-2023', 'arize', 'final-meta-test', 'preview-meta-test', 'gies-online-mba-V3', 'FoF-Drawdown-from-INVPEIV-5-24-23', 'FIN574-GT', 'test-video-ingest-30', 'test', 'NCSA-v2', 'conversational', 'clowder-docs', 'DA', 'test-video-ingest-21', 'test-video-ingest-25', 'test-ingest-10', 'eric-test-course', 'farmdoc-test', 'test-video-ingest-22', 'Academic-paper', 'starwars', 'AdamDemo']\n", - "high volume courses: ['gpt4', 'ECE408FA23']\n" - ] - } - ], - "source": [ - "print(\"low volume courses: \", low_volume_courses)\n", - "print(\"high volume courses: \", high_volume_courses)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.11" - }, - "orig_nbformat": 4 - }, - "nbformat": 4, - "nbformat_minor": 2 -} From c20d5e21b43da6a3b98581602a042afbbcffac20 Mon Sep 17 00:00:00 2001 From: star-nox Date: Thu, 14 Sep 2023 18:54:09 -0500 Subject: [PATCH 54/61] fixed timestamp formats for nomic logs --- ai_ta_backend/main.py | 3 --- ai_ta_backend/nomic_logging.py | 38 ++++++++++++++-------------------- 2 files changed, 15 insertions(+), 26 deletions(-) diff --git a/ai_ta_backend/main.py b/ai_ta_backend/main.py index 78fab519..72aa7990 100644 --- a/ai_ta_backend/main.py +++ b/ai_ta_backend/main.py @@ -401,9 +401,6 @@ def logToNomic(): conversation: str = request.args.get('conversation', default='', type=str) print("In /onResponseCompletion") - print("course_name: ", course_name) - print("conversation: ", conversation) - # print("print json: ", request.get_json()) data = request.get_json() print(len(data)) diff --git a/ai_ta_backend/nomic_logging.py b/ai_ta_backend/nomic_logging.py index a32f74f5..1829751d 100644 --- a/ai_ta_backend/nomic_logging.py +++ b/ai_ta_backend/nomic_logging.py @@ -60,8 +60,8 @@ def log_convo_to_nomic(course_name: str, conversation) -> str: prev_convo = prev_data['conversation'].values[0] prev_id = prev_data['id'].values[0] print("prev_id: ", prev_id) - prev_created_at = prev_data['created_at'].values[0] - print("prev_created_at: ", prev_created_at) + created_at = pd.to_datetime(prev_data['created_at'].values[0]).strftime('%Y-%m-%d %H:%M:%S') + print("prev_created_at: ", created_at) print("before delete") # delete that convo data point from Nomic @@ -82,14 +82,12 @@ def log_convo_to_nomic(course_name: str, conversation) -> str: prev_convo += "\n>>> " + emoji + message['role'] + ": " + message['content'] + "\n" # modified timestamp - current_time = time.time() - dt_object = datetime.datetime.fromtimestamp(current_time) - current_timestamp = dt_object.strftime("%Y-%m-%d %H:%M:%S.%f+00:00") + current_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") # update metadata metadata = [{"course": course_name, "conversation": prev_convo, "conversation_id": conversation_id, - "id": last_id+1, "user_email": user_email, "first_query": first_message, "created_at": prev_created_at, - "modified_at": current_timestamp}] + "id": last_id+1, "user_email": user_email, "first_query": first_message, "created_at": created_at, + "modified_at": current_time}] else: print("conversation_id does not exist") @@ -107,13 +105,11 @@ def log_convo_to_nomic(course_name: str, conversation) -> str: conversation_string += "\n>>> " + emoji + message['role'] + ": " + message['content'] + "\n" # modified timestamp - current_time = time.time() - dt_object = datetime.datetime.fromtimestamp(current_time) - current_timestamp = dt_object.strftime("%Y-%m-%d %H:%M:%S.%f+00:00") + current_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") metadata = [{"course": course_name, "conversation": conversation_string, "conversation_id": conversation_id, - "id": last_id+1, "user_email": user_email, "first_query": first_message, "created_at": current_timestamp, - "modified_at": current_timestamp}] + "id": last_id+1, "user_email": user_email, "first_query": first_message, "created_at": current_time, + "modified_at": current_time}] # create embeddings embeddings_model = OpenAIEmbeddings() @@ -201,7 +197,7 @@ def create_nomic_map(course_name: str, log_data: list): for index, row in df.iterrows(): user_email = row['user_email'] - created_at = row['created_at'] + created_at = pd.to_datetime(row['created_at']).strftime('%Y-%m-%d %H:%M:%S') convo = row['convo'] messages = convo['messages'] first_message = messages[0]['content'] @@ -227,15 +223,13 @@ def create_nomic_map(course_name: str, log_data: list): for m in log_messages: conversation += "\n>>> " + emoji + m['role'] + ": " + m['content'] + "\n" - # adding timestamp - current_time = time.time() - dt_object = datetime.datetime.fromtimestamp(current_time) - current_timestamp = dt_object.strftime("%Y-%m-%d %H:%M:%S.%f+00:00") + # adding modified timestamp + current_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") # add to metadata metadata_row = {"course": row['course_name'], "conversation": conversation, "conversation_id": convo['id'], "id": i, "user_email": user_email, "first_query": first_message, "created_at": created_at, - "modified_at": current_timestamp} + "modified_at": current_time} metadata.append(metadata_row) i += 1 @@ -251,13 +245,11 @@ def create_nomic_map(course_name: str, log_data: list): conversation += "\n>>> " + emoji + message['role'] + ": " + message['content'] + "\n" # adding timestamp - current_time = time.time() - dt_object = datetime.datetime.fromtimestamp(current_time) - current_timestamp = dt_object.strftime("%Y-%m-%d %H:%M:%S.%f+00:00") + current_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") metadata_row = {"course": course_name, "conversation": conversation, "conversation_id": log_conversation_id, - "id": i, "user_email": log_user_email, "first_query": log_messages[0]['content'], "created_at": current_timestamp, - "modified_at": current_timestamp} + "id": i, "user_email": log_user_email, "first_query": log_messages[0]['content'], "created_at": current_time, + "modified_at": current_time} metadata.append(metadata_row) print("length of metadata: ", len(metadata)) From 65eddd9a94a161aab537e6fcb4159ff7f86a6b09 Mon Sep 17 00:00:00 2001 From: star-nox Date: Thu, 14 Sep 2023 19:11:03 -0500 Subject: [PATCH 55/61] checking for JSON serialization errors --- ai_ta_backend/nomic_logging.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ai_ta_backend/nomic_logging.py b/ai_ta_backend/nomic_logging.py index 1829751d..ebff3c40 100644 --- a/ai_ta_backend/nomic_logging.py +++ b/ai_ta_backend/nomic_logging.py @@ -59,6 +59,7 @@ def log_convo_to_nomic(course_name: str, conversation) -> str: embeddings = map_embeddings_df[prev_index-1].reshape(1, 1536) prev_convo = prev_data['conversation'].values[0] prev_id = prev_data['id'].values[0] + print("prev_id print: ", prev_data['id']) print("prev_id: ", prev_id) created_at = pd.to_datetime(prev_data['created_at'].values[0]).strftime('%Y-%m-%d %H:%M:%S') print("prev_created_at: ", created_at) From 041ff060c59d2caf8638e56ac3f4fab63d1b0fe7 Mon Sep 17 00:00:00 2001 From: star-nox Date: Thu, 14 Sep 2023 21:37:51 -0500 Subject: [PATCH 56/61] checking for JSON serialization errors --- ai_ta_backend/nomic_logging.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ai_ta_backend/nomic_logging.py b/ai_ta_backend/nomic_logging.py index ebff3c40..f48de745 100644 --- a/ai_ta_backend/nomic_logging.py +++ b/ai_ta_backend/nomic_logging.py @@ -59,7 +59,7 @@ def log_convo_to_nomic(course_name: str, conversation) -> str: embeddings = map_embeddings_df[prev_index-1].reshape(1, 1536) prev_convo = prev_data['conversation'].values[0] prev_id = prev_data['id'].values[0] - print("prev_id print: ", prev_data['id']) + print("prev_id print: ", [prev_id]) print("prev_id: ", prev_id) created_at = pd.to_datetime(prev_data['created_at'].values[0]).strftime('%Y-%m-%d %H:%M:%S') print("prev_created_at: ", created_at) From 5a84764331bda9a3d5b9bcf263413e23f6444b48 Mon Sep 17 00:00:00 2001 From: star-nox Date: Thu, 14 Sep 2023 21:48:52 -0500 Subject: [PATCH 57/61] typecasted previous convo ID to string for deletion --- ai_ta_backend/nomic_logging.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ai_ta_backend/nomic_logging.py b/ai_ta_backend/nomic_logging.py index f48de745..c86142ca 100644 --- a/ai_ta_backend/nomic_logging.py +++ b/ai_ta_backend/nomic_logging.py @@ -59,14 +59,13 @@ def log_convo_to_nomic(course_name: str, conversation) -> str: embeddings = map_embeddings_df[prev_index-1].reshape(1, 1536) prev_convo = prev_data['conversation'].values[0] prev_id = prev_data['id'].values[0] - print("prev_id print: ", [prev_id]) print("prev_id: ", prev_id) created_at = pd.to_datetime(prev_data['created_at'].values[0]).strftime('%Y-%m-%d %H:%M:%S') print("prev_created_at: ", created_at) print("before delete") # delete that convo data point from Nomic - print(project.delete_data([prev_id])) + print(project.delete_data([str(prev_id)])) # prep for new point first_message = prev_convo.split("\n")[1].split(": ")[1] From fe0de52b7d5387c0ca746fc2ff3bd107044e7e66 Mon Sep 17 00:00:00 2001 From: Kastan Day Date: Fri, 15 Sep 2023 15:06:53 -0700 Subject: [PATCH 58/61] Yapf format ONLY --- ai_ta_backend/nomic_logging.py | 119 +++++++++++++++++++++------------ 1 file changed, 77 insertions(+), 42 deletions(-) diff --git a/ai_ta_backend/nomic_logging.py b/ai_ta_backend/nomic_logging.py index c86142ca..9e625f05 100644 --- a/ai_ta_backend/nomic_logging.py +++ b/ai_ta_backend/nomic_logging.py @@ -9,9 +9,10 @@ import pandas as pd import supabase -nomic.login(os.getenv('NOMIC_API_KEY')) # login during start of flask app +nomic.login(os.getenv('NOMIC_API_KEY')) # login during start of flask app NOMIC_MAP_NAME_PREFIX = 'Conversation Map for ' + def log_convo_to_nomic(course_name: str, conversation) -> str: """ Logs conversation to Nomic. @@ -24,22 +25,22 @@ def log_convo_to_nomic(course_name: str, conversation) -> str: print("in log_convo_to_nomic()") print("conversation: ", conversation) - + messages = conversation['conversation']['messages'] user_email = conversation['conversation']['user_email'] conversation_id = conversation['conversation']['id'] #print("conversation: ", conversation) - + # we have to upload whole conversations # check what the fetched data looks like - pandas df or pyarrow table - # check if conversation ID exists in Nomic, if yes fetch all data from it and delete it. + # check if conversation ID exists in Nomic, if yes fetch all data from it and delete it. # will have current QA and historical QA from Nomic, append new data and add_embeddings() project_name = NOMIC_MAP_NAME_PREFIX + course_name start_time = time.monotonic() emoji = "" - + try: # fetch project metadata and embbeddings project = AtlasProject(name=project_name, add_datums_if_exists=True) @@ -48,29 +49,29 @@ def log_convo_to_nomic(course_name: str, conversation) -> str: map_metadata_df['id'] = map_metadata_df['id'].astype(int) last_id = map_metadata_df['id'].max() print("last_id: ", last_id) - + if conversation_id in map_metadata_df.values: print("conversation_id exists") - + # store that convo metadata locally prev_data = map_metadata_df[map_metadata_df['conversation_id'] == conversation_id] prev_index = prev_data.index.values[0] print("prev_index: ", prev_index) - embeddings = map_embeddings_df[prev_index-1].reshape(1, 1536) + embeddings = map_embeddings_df[prev_index - 1].reshape(1, 1536) prev_convo = prev_data['conversation'].values[0] prev_id = prev_data['id'].values[0] print("prev_id: ", prev_id) created_at = pd.to_datetime(prev_data['created_at'].values[0]).strftime('%Y-%m-%d %H:%M:%S') print("prev_created_at: ", created_at) print("before delete") - + # delete that convo data point from Nomic print(project.delete_data([str(prev_id)])) - + # prep for new point first_message = prev_convo.split("\n")[1].split(": ")[1] print("first_message: ", first_message) - + # select the last 2 messages and append new convo to prev convo messages_to_be_logged = messages[-2:] for message in messages_to_be_logged: @@ -78,16 +79,23 @@ def log_convo_to_nomic(course_name: str, conversation) -> str: emoji = "🙋 " else: emoji = "🤖 " - + prev_convo += "\n>>> " + emoji + message['role'] + ": " + message['content'] + "\n" # modified timestamp current_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") # update metadata - metadata = [{"course": course_name, "conversation": prev_convo, "conversation_id": conversation_id, - "id": last_id+1, "user_email": user_email, "first_query": first_message, "created_at": created_at, - "modified_at": current_time}] + metadata = [{ + "course": course_name, + "conversation": prev_convo, + "conversation_id": conversation_id, + "id": last_id + 1, + "user_email": user_email, + "first_query": first_message, + "created_at": created_at, + "modified_at": current_time + }] else: print("conversation_id does not exist") @@ -107,19 +115,26 @@ def log_convo_to_nomic(course_name: str, conversation) -> str: # modified timestamp current_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") - metadata = [{"course": course_name, "conversation": conversation_string, "conversation_id": conversation_id, - "id": last_id+1, "user_email": user_email, "first_query": first_message, "created_at": current_time, - "modified_at": current_time}] - + metadata = [{ + "course": course_name, + "conversation": conversation_string, + "conversation_id": conversation_id, + "id": last_id + 1, + "user_email": user_email, + "first_query": first_message, + "created_at": current_time, + "modified_at": current_time + }] + # create embeddings embeddings_model = OpenAIEmbeddings() embeddings = embeddings_model.embed_documents(user_queries) - + # add embeddings to the project project = atlas.AtlasProject(name=project_name, add_datums_if_exists=True) project.add_embeddings(embeddings=np.array(embeddings), data=pd.DataFrame(metadata)) project.rebuild_maps() - + except Exception as e: # if project doesn't exist, create it print(e) @@ -155,13 +170,13 @@ def get_nomic_map(course_name: str): # Moved this to the logging function to keep our UI fast. # with project.wait_for_project_lock() as project: # project.rebuild_maps() - + map = project.get_map(project_name) print(f"⏰ Nomic Full Map Retrieval: {(time.monotonic() - start_time):.2f} seconds") - return {"map_id": f"iframe{map.id}", - "map_link": map.map_link} + return {"map_id": f"iframe{map.id}", "map_link": map.map_link} + def create_nomic_map(course_name: str, log_data: list): """ @@ -173,14 +188,14 @@ def create_nomic_map(course_name: str, log_data: list): print("in create_nomic_map()") # initialize supabase supabase_client = supabase.create_client( # type: ignore - supabase_url=os.getenv('SUPABASE_URL'), # type: ignore - supabase_key=os.getenv('SUPABASE_API_KEY')) # type: ignore + supabase_url=os.getenv('SUPABASE_URL'), # type: ignore + supabase_key=os.getenv('SUPABASE_API_KEY')) # type: ignore # fetch all conversations with this new course (we expect <=20 conversations, because otherwise the map should be made already) response = supabase_client.table("llm-convo-monitor").select("*").eq("course_name", course_name).execute() data = response.data df = pd.DataFrame(data) - + if len(data) < 19: return None else: @@ -197,7 +212,7 @@ def create_nomic_map(course_name: str, log_data: list): for index, row in df.iterrows(): user_email = row['user_email'] - created_at = pd.to_datetime(row['created_at']).strftime('%Y-%m-%d %H:%M:%S') + created_at = pd.to_datetime(row['created_at']).strftime('%Y-%m-%d %H:%M:%S') convo = row['convo'] messages = convo['messages'] first_message = messages[0]['content'] @@ -206,13 +221,13 @@ def create_nomic_map(course_name: str, log_data: list): # create metadata for multi-turn conversation conversation = "" if message['role'] == 'user': - emoji = "🙋 " + emoji = "🙋 " else: - emoji = "🤖 " + emoji = "🤖 " for message in messages: # string of role: content, role: content, ... conversation += "\n>>> " + emoji + message['role'] + ": " + message['content'] + "\n" - + # append current chat to previous chat if convo already exists if convo['id'] == log_conversation_id: conversation_exists = True @@ -225,11 +240,18 @@ def create_nomic_map(course_name: str, log_data: list): # adding modified timestamp current_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") - + # add to metadata - metadata_row = {"course": row['course_name'], "conversation": conversation, "conversation_id": convo['id'], - "id": i, "user_email": user_email, "first_query": first_message, "created_at": created_at, - "modified_at": current_time} + metadata_row = { + "course": row['course_name'], + "conversation": conversation, + "conversation_id": convo['id'], + "id": i, + "user_email": user_email, + "first_query": first_message, + "created_at": created_at, + "modified_at": current_time + } metadata.append(metadata_row) i += 1 @@ -247,27 +269,40 @@ def create_nomic_map(course_name: str, log_data: list): # adding timestamp current_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") - metadata_row = {"course": course_name, "conversation": conversation, "conversation_id": log_conversation_id, - "id": i, "user_email": log_user_email, "first_query": log_messages[0]['content'], "created_at": current_time, - "modified_at": current_time} + metadata_row = { + "course": course_name, + "conversation": conversation, + "conversation_id": log_conversation_id, + "id": i, + "user_email": log_user_email, + "first_query": log_messages[0]['content'], + "created_at": current_time, + "modified_at": current_time + } metadata.append(metadata_row) print("length of metadata: ", len(metadata)) metadata = pd.DataFrame(metadata) - embeddings_model = OpenAIEmbeddings() # type: ignore + embeddings_model = OpenAIEmbeddings() # type: ignore embeddings = embeddings_model.embed_documents(user_queries) # create Atlas project project_name = NOMIC_MAP_NAME_PREFIX + course_name index_name = course_name + "_convo_index" print("project_name: ", project_name) - project = atlas.map_embeddings(embeddings=np.array(embeddings), data=metadata, # type: ignore -- this is actually the correc type, the function signature from Nomic is incomplete - id_field='id', build_topic_model=True, topic_label_field='first_query', - name=project_name, colorable_fields=['conversation_id', 'first_query']) + project = atlas.map_embeddings( + embeddings=np.array(embeddings), + data=metadata, # type: ignore -- this is actually the correc type, the function signature from Nomic is incomplete + id_field='id', + build_topic_model=True, + topic_label_field='first_query', + name=project_name, + colorable_fields=['conversation_id', 'first_query']) project.create_index(index_name, build_topic_model=True) print("project: ", project) return f"Successfully created Nomic map for {course_name}" + if __name__ == '__main__': pass From adb1b50780c9950779cbd08cf2ce3ea0bc637b20 Mon Sep 17 00:00:00 2001 From: Kastan Day Date: Fri, 15 Sep 2023 15:11:15 -0700 Subject: [PATCH 59/61] revert all files with no changes except whitespace on newlines --- .env.template | 68 +- .github/workflows/yapf-format.yml | 2 +- .gitignore | 334 +++---- ai_ta_backend/extreme_context_stuffing.py | 1082 ++++++++++----------- ai_ta_backend/utils_tokenization.py | 270 ++--- ai_ta_backend/web_scrape.py | 934 +++++++++--------- 6 files changed, 1345 insertions(+), 1345 deletions(-) diff --git a/.env.template b/.env.template index 5c5520de..ba04c704 100644 --- a/.env.template +++ b/.env.template @@ -1,34 +1,34 @@ -# Supabase SQL -SUPABASE_URL= -SUPABASE_API_KEY= -SUPABASE_READ_ONLY= -SUPABASE_JWT_SECRET= - -MATERIALS_SUPABASE_TABLE=uiuc_chatbot -NEW_NEW_NEWNEW_MATERIALS_SUPABASE_TABLE=documents - -# QDRANT -QDRANT_COLLECTION_NAME=uiuc-chatbot -DEV_QDRANT_COLLECTION_NAME=dev -QDRANT_URL= -QDRANT_API_KEY= - -REFACTORED_MATERIALS_SUPABASE_TABLE= - -# AWS -S3_BUCKET_NAME=uiuc-chatbot -AWS_ACCESS_KEY_ID= -AWS_SECRET_ACCESS_KEY= - -OPENAI_API_KEY= - -NOMIC_API_KEY= -LINTRULE_SECRET= - -# Github Agent -GITHUB_APP_ID= -GITHUB_APP_PRIVATE_KEY="-----BEGIN RSA PRIVATE KEY----- - ------END RSA PRIVATE KEY-----" - -NUMEXPR_MAX_THREADS=2 +# Supabase SQL +SUPABASE_URL= +SUPABASE_API_KEY= +SUPABASE_READ_ONLY= +SUPABASE_JWT_SECRET= + +MATERIALS_SUPABASE_TABLE=uiuc_chatbot +NEW_NEW_NEWNEW_MATERIALS_SUPABASE_TABLE=documents + +# QDRANT +QDRANT_COLLECTION_NAME=uiuc-chatbot +DEV_QDRANT_COLLECTION_NAME=dev +QDRANT_URL= +QDRANT_API_KEY= + +REFACTORED_MATERIALS_SUPABASE_TABLE= + +# AWS +S3_BUCKET_NAME=uiuc-chatbot +AWS_ACCESS_KEY_ID= +AWS_SECRET_ACCESS_KEY= + +OPENAI_API_KEY= + +NOMIC_API_KEY= +LINTRULE_SECRET= + +# Github Agent +GITHUB_APP_ID= +GITHUB_APP_PRIVATE_KEY="-----BEGIN RSA PRIVATE KEY----- + +-----END RSA PRIVATE KEY-----" + +NUMEXPR_MAX_THREADS=2 diff --git a/.github/workflows/yapf-format.yml b/.github/workflows/yapf-format.yml index 3c0c1321..52d2df31 100644 --- a/.github/workflows/yapf-format.yml +++ b/.github/workflows/yapf-format.yml @@ -14,7 +14,7 @@ jobs: - name: pip install yapf run: pip install yapf - name: Format code with yapf - run: yapf --in-place --recursive --parallel --style="{based_on_style: google, column_limit: 140, indent_width: 2}" --exclude '*.env' . + run: yapf --in-place --recursive --parallel --style='{based_on_style: google, column_limit: 140, indent_width: 2}' --exclude '*.env' . - name: Commit changes uses: EndBug/add-and-commit@v4 with: diff --git a/.gitignore b/.gitignore index 3db8ad0c..70babf88 100644 --- a/.gitignore +++ b/.gitignore @@ -1,167 +1,167 @@ -# don't sync coursera docs -coursera-dl/ -*parsed.json -wandb - -# don't expose env files -dummy.ipynb -.env -# Created by https://www.toptal.com/developers/gitignore/api/python -# Edit at https://www.toptal.com/developers/gitignore?templates=python - -### Python ### -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -pip-wheel-metadata/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coveage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ -pytestdebug.log - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ -doc/_build/ - -# PyBuilder -target/ - -# Jupyter Notebook -.ipynb_checkpoints - -# IPython -profile_default/ -ipython_config.py - -# pyenv -.python-version - -# pipenv -# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. -# However, in case of collaboration, if having platform-specific dependencies or dependencies -# having no cross-platform support, pipenv may install dependencies that don't work, or not -# install all needed dependencies. -#Pipfile.lock - -# PEP 582; used by e.g. github.com/David-OConnor/pyflow -__pypackages__/ - -# Celery stuff -celerybeat-schedule -celerybeat.pid - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ -pythonenv* - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ - -# pytype static type analyzer -.pytype/ - -# profiling data -.prof - -# Virtualenv -# http://iamzed.com/2009/05/07/a-primer-on-virtualenv/ -.Python -[Bb]in -[Ii]nclude -[Ll]ib -[Ll]ib64 -[Ll]ocal -[Ss]cripts -pyvenv.cfg -.venv -pip-selfcheck.json - - -# End of https://www.toptal.com/developers/gitignore/api/python -.aider* +# don't sync coursera docs +coursera-dl/ +*parsed.json +wandb + +# don't expose env files +dummy.ipynb +.env +# Created by https://www.toptal.com/developers/gitignore/api/python +# Edit at https://www.toptal.com/developers/gitignore?templates=python + +### Python ### +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coveage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +pytestdebug.log + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ +doc/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ +pythonenv* + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# profiling data +.prof + +# Virtualenv +# http://iamzed.com/2009/05/07/a-primer-on-virtualenv/ +.Python +[Bb]in +[Ii]nclude +[Ll]ib +[Ll]ib64 +[Ll]ocal +[Ss]cripts +pyvenv.cfg +.venv +pip-selfcheck.json + + +# End of https://www.toptal.com/developers/gitignore/api/python +.aider* diff --git a/ai_ta_backend/extreme_context_stuffing.py b/ai_ta_backend/extreme_context_stuffing.py index 03b56e86..ed133a6a 100644 --- a/ai_ta_backend/extreme_context_stuffing.py +++ b/ai_ta_backend/extreme_context_stuffing.py @@ -1,541 +1,541 @@ -""" -API REQUEST PARALLEL PROCESSOR - -Using the OpenAI API to process lots of text quickly takes some care. -If you trickle in a million API requests one by one, they'll take days to complete. -If you flood a million API requests in parallel, they'll exceed the rate limits and fail with errors. -To maximize throughput, parallel requests need to be throttled to stay under rate limits. - -This script parallelizes requests to the OpenAI API while throttling to stay under rate limits. - -Features: -- Streams requests from file, to avoid running out of memory for giant jobs -- Makes requests concurrently, to maximize throughput -- Throttles request and token usage, to stay under rate limits -- Retries failed requests up to {max_attempts} times, to avoid missing data -- Logs errors, to diagnose problems with requests - -Example command to call script: -``` -python examples/api_request_parallel_processor.py \ - --requests_filepath examples/data/example_requests_to_parallel_process.jsonl \ - --save_filepath examples/data/example_requests_to_parallel_process_results.jsonl \ - --request_url https://api.openai.com/v1/embeddings \ - --max_requests_per_minute 1500 \ - --max_tokens_per_minute 6250000 \ - --token_encoding_name cl100k_base \ - --max_attempts 5 \ - --logging_level 20 -``` - -Inputs: -- requests_filepath : str - - path to the file containing the requests to be processed - - file should be a jsonl file, where each line is a json object with API parameters and an optional metadata field - - e.g., {"model": "text-embedding-ada-002", "input": "embed me", "metadata": {"row_id": 1}} - - as with all jsonl files, take care that newlines in the content are properly escaped (json.dumps does this automatically) - - an example file is provided at examples/data/example_requests_to_parallel_process.jsonl - - the code to generate the example file is appended to the bottom of this script -- save_filepath : str, optional - - path to the file where the results will be saved - - file will be a jsonl file, where each line is an array with the original request plus the API response - - e.g., [{"model": "text-embedding-ada-002", "input": "embed me"}, {...}] - - if omitted, results will be saved to {requests_filename}_results.jsonl -- request_url : str, optional - - URL of the API endpoint to call - - if omitted, will default to "https://api.openai.com/v1/embeddings" -- api_key : str, optional - - API key to use - - if omitted, the script will attempt to read it from an environment variable {os.getenv("OPENAI_API_KEY")} -- max_requests_per_minute : float, optional - - target number of requests to make per minute (will make less if limited by tokens) - - leave headroom by setting this to 50% or 75% of your limit - - if requests are limiting you, try batching multiple embeddings or completions into one request - - if omitted, will default to 1,500 -- max_tokens_per_minute : float, optional - - target number of tokens to use per minute (will use less if limited by requests) - - leave headroom by setting this to 50% or 75% of your limit - - if omitted, will default to 125,000 -- token_encoding_name : str, optional - - name of the token encoding used, as defined in the `tiktoken` package - - if omitted, will default to "cl100k_base" (used by `text-embedding-ada-002`) -- max_attempts : int, optional - - number of times to retry a failed request before giving up - - if omitted, will default to 5 -- logging_level : int, optional - - level of logging to use; higher numbers will log fewer messages - - 40 = ERROR; will log only when requests fail after all retries - - 30 = WARNING; will log when requests his rate limits or other errors - - 20 = INFO; will log when requests start and the status at finish - - 10 = DEBUG; will log various things as the loop runs to see when they occur - - if omitted, will default to 20 (INFO). - -The script is structured as follows: - - Imports - - Define main() - - Initialize things - - In main loop: - - Get next request if one is not already waiting for capacity - - Update available token & request capacity - - If enough capacity available, call API - - The loop pauses if a rate limit error is hit - - The loop breaks when no tasks remain - - Define dataclasses - - StatusTracker (stores script metadata counters; only one instance is created) - - APIRequest (stores API inputs, outputs, metadata; one method to call API) - - Define functions - - api_endpoint_from_url (extracts API endpoint from request URL) - - append_to_jsonl (writes to results file) - - num_tokens_consumed_from_request (bigger function to infer token usage from request) - - task_id_generator_function (yields 1, 2, 3, ...) - - Run main() -""" - -# import argparse -# import subprocess -# import tempfile -# from langchain.llms import OpenAI -import asyncio -import json -import logging -import os -import re -import time -from dataclasses import ( # for storing API inputs, outputs, and metadata - dataclass, field) -from typing import Any, List - -import aiohttp # for making API calls concurrently -import tiktoken # for counting tokens -from langchain.embeddings.openai import OpenAIEmbeddings -from langchain.vectorstores import Qdrant -from qdrant_client import QdrantClient, models - - -class OpenAIAPIProcessor: - - def __init__(self, input_prompts_list, request_url, api_key, max_requests_per_minute, max_tokens_per_minute, token_encoding_name, - max_attempts, logging_level): - self.request_url = request_url - self.api_key = api_key - self.max_requests_per_minute = max_requests_per_minute - self.max_tokens_per_minute = max_tokens_per_minute - self.token_encoding_name = token_encoding_name - self.max_attempts = max_attempts - self.logging_level = logging_level - self.input_prompts_list: List[dict] = input_prompts_list - self.results = [] - self.cleaned_results: List[str] = [] - - async def process_api_requests_from_file(self): - """Processes API requests in parallel, throttling to stay under rate limits.""" - # constants - seconds_to_pause_after_rate_limit_error = 15 - seconds_to_sleep_each_loop = 0.001 # 1 ms limits max throughput to 1,000 requests per second - - # initialize logging - logging.basicConfig(level=self.logging_level) - logging.debug(f"Logging initialized at level {self.logging_level}") - - # infer API endpoint and construct request header - api_endpoint = api_endpoint_from_url(self.request_url) - request_header = {"Authorization": f"Bearer {self.api_key}"} - - # initialize trackers - queue_of_requests_to_retry = asyncio.Queue() - task_id_generator = task_id_generator_function() # generates integer IDs of 1, 2, 3, ... - status_tracker = StatusTracker() # single instance to track a collection of variables - next_request = None # variable to hold the next request to call - - # initialize available capacity counts - available_request_capacity = self.max_requests_per_minute - available_token_capacity = self.max_tokens_per_minute - last_update_time = time.time() - - # initialize flags - file_not_finished = True # after file is empty, we'll skip reading it - logging.debug(f"Initialization complete.") - - requests = self.input_prompts_list.__iter__() - - logging.debug(f"File opened. Entering main loop") - - task_list = [] - - while True: - # get next request (if one is not already waiting for capacity) - if next_request is None: - if not queue_of_requests_to_retry.empty(): - next_request = queue_of_requests_to_retry.get_nowait() - logging.debug(f"Retrying request {next_request.task_id}: {next_request}") - elif file_not_finished: - try: - # get new request - # request_json = json.loads(next(requests)) - request_json = next(requests) - - next_request = APIRequest(task_id=next(task_id_generator), - request_json=request_json, - token_consumption=num_tokens_consumed_from_request(request_json, api_endpoint, - self.token_encoding_name), - attempts_left=self.max_attempts, - metadata=request_json.pop("metadata", None)) - status_tracker.num_tasks_started += 1 - status_tracker.num_tasks_in_progress += 1 - logging.debug(f"Reading request {next_request.task_id}: {next_request}") - except StopIteration: - # if file runs out, set flag to stop reading it - logging.debug("Read file exhausted") - file_not_finished = False - - # update available capacity - current_time = time.time() - seconds_since_update = current_time - last_update_time - available_request_capacity = min( - available_request_capacity + self.max_requests_per_minute * seconds_since_update / 60.0, - self.max_requests_per_minute, - ) - available_token_capacity = min( - available_token_capacity + self.max_tokens_per_minute * seconds_since_update / 60.0, - self.max_tokens_per_minute, - ) - last_update_time = current_time - - # if enough capacity available, call API - if next_request: - next_request_tokens = next_request.token_consumption - if (available_request_capacity >= 1 and available_token_capacity >= next_request_tokens): - # update counters - available_request_capacity -= 1 - available_token_capacity -= next_request_tokens - next_request.attempts_left -= 1 - - # call API - # TODO: NOT SURE RESPONSE WILL WORK HERE - task = asyncio.create_task( - next_request.call_api( - request_url=self.request_url, - request_header=request_header, - retry_queue=queue_of_requests_to_retry, - status_tracker=status_tracker, - )) - task_list.append(task) - next_request = None # reset next_request to empty - - # print("status_tracker.num_tasks_in_progress", status_tracker.num_tasks_in_progress) - # one_task_result = task.result() - # print("one_task_result", one_task_result) - - # if all tasks are finished, break - if status_tracker.num_tasks_in_progress == 0: - break - - # main loop sleeps briefly so concurrent tasks can run - await asyncio.sleep(seconds_to_sleep_each_loop) - - # if a rate limit error was hit recently, pause to cool down - seconds_since_rate_limit_error = (time.time() - status_tracker.time_of_last_rate_limit_error) - if seconds_since_rate_limit_error < seconds_to_pause_after_rate_limit_error: - remaining_seconds_to_pause = (seconds_to_pause_after_rate_limit_error - seconds_since_rate_limit_error) - await asyncio.sleep(remaining_seconds_to_pause) - # ^e.g., if pause is 15 seconds and final limit was hit 5 seconds ago - logging.warn( - f"Pausing to cool down until {time.ctime(status_tracker.time_of_last_rate_limit_error + seconds_to_pause_after_rate_limit_error)}" - ) - - # after finishing, log final status - logging.info(f"""Parallel processing complete. About to return.""") - if status_tracker.num_tasks_failed > 0: - logging.warning(f"{status_tracker.num_tasks_failed} / {status_tracker.num_tasks_started} requests failed.") - if status_tracker.num_rate_limit_errors > 0: - logging.warning(f"{status_tracker.num_rate_limit_errors} rate limit errors received. Consider running at a lower rate.") - - # asyncio wait for task_list - await asyncio.wait(task_list) - - for task in task_list: - openai_completion = task.result() - self.results.append(openai_completion) - - self.cleaned_results: List[str] = extract_context_from_results(self.results) - - -def extract_context_from_results(results: List[Any]) -> List[str]: - assistant_contents = [] - total_prompt_tokens = 0 - total_completion_tokens = 0 - - for element in results: - if element is not None: - for item in element: - if 'choices' in item: - for choice in item['choices']: - if choice['message']['role'] == 'assistant': - assistant_contents.append(choice['message']['content']) - total_prompt_tokens += item['usage']['prompt_tokens'] - total_completion_tokens += item['usage']['completion_tokens'] - # Note: I don't think the prompt_tokens or completion_tokens is working quite right... - - return assistant_contents - - -# dataclasses - - -@dataclass -class StatusTracker: - """Stores metadata about the script's progress. Only one instance is created.""" - - num_tasks_started: int = 0 - num_tasks_in_progress: int = 0 # script ends when this reaches 0 - num_tasks_succeeded: int = 0 - num_tasks_failed: int = 0 - num_rate_limit_errors: int = 0 - num_api_errors: int = 0 # excluding rate limit errors, counted above - num_other_errors: int = 0 - time_of_last_rate_limit_error: float = 0 # used to cool off after hitting rate limits - - -@dataclass -class APIRequest: - """Stores an API request's inputs, outputs, and other metadata. Contains a method to make an API call.""" - - task_id: int - request_json: dict - token_consumption: int - attempts_left: int - metadata: dict - result: list = field(default_factory=list) - - async def call_api( - self, - request_url: str, - request_header: dict, - retry_queue: asyncio.Queue, - status_tracker: StatusTracker, - ): - """Calls the OpenAI API and saves results.""" - # logging.info(f"Starting request #{self.task_id}") - error = None - try: - async with aiohttp.ClientSession() as session: - async with session.post(url=request_url, headers=request_header, json=self.request_json) as response: - response = await response.json() - if "error" in response: - logging.warning(f"Request {self.task_id} failed with error {response['error']}") - status_tracker.num_api_errors += 1 - error = response - if "Rate limit" in response["error"].get("message", ""): - status_tracker.time_of_last_rate_limit_error = time.time() - status_tracker.num_rate_limit_errors += 1 - status_tracker.num_api_errors -= 1 # rate limit errors are counted separately - - except Exception as e: # catching naked exceptions is bad practice, but in this case we'll log & save them - logging.warning(f"Request {self.task_id} failed with Exception {e}") - status_tracker.num_other_errors += 1 - error = e - if error: - self.result.append(error) - if self.attempts_left: - retry_queue.put_nowait(self) - else: - logging.error(f"Request {self.request_json} failed after all attempts. Saving errors: {self.result}") - data = ([self.request_json, [str(e) for e in self.result], self.metadata] - if self.metadata else [self.request_json, [str(e) for e in self.result]]) - #append_to_jsonl(data, save_filepath) - status_tracker.num_tasks_in_progress -= 1 - status_tracker.num_tasks_failed += 1 - return data - else: - data = ([self.request_json, response, self.metadata] if self.metadata else [self.request_json, response]) # type: ignore - #append_to_jsonl(data, save_filepath) - status_tracker.num_tasks_in_progress -= 1 - status_tracker.num_tasks_succeeded += 1 - # logging.debug(f"Request {self.task_id} saved to {save_filepath}") - - return data - - -# functions - - -def api_endpoint_from_url(request_url: str): - """Extract the API endpoint from the request URL.""" - match = re.search('^https://[^/]+/v\\d+/(.+)$', request_url) - return match[1] # type: ignore - - -def append_to_jsonl(data, filename: str) -> None: - """Append a json payload to the end of a jsonl file.""" - json_string = json.dumps(data) - with open(filename, "a") as f: - f.write(json_string + "\n") - - -def num_tokens_consumed_from_request( - request_json: dict, - api_endpoint: str, - token_encoding_name: str, -): - """Count the number of tokens in the request. Only supports completion and embedding requests.""" - encoding = tiktoken.get_encoding(token_encoding_name) - # if completions request, tokens = prompt + n * max_tokens - if api_endpoint.endswith("completions"): - max_tokens = request_json.get("max_tokens", 15) - n = request_json.get("n", 1) - completion_tokens = n * max_tokens - - # chat completions - if api_endpoint.startswith("chat/"): - num_tokens = 0 - for message in request_json["messages"]: - num_tokens += 4 # every message follows {role/name}\n{content}\n - for key, value in message.items(): - num_tokens += len(encoding.encode(value)) - if key == "name": # if there's a name, the role is omitted - num_tokens -= 1 # role is always required and always 1 token - num_tokens += 2 # every reply is primed with assistant - return num_tokens + completion_tokens - # normal completions - else: - prompt = request_json["prompt"] - if isinstance(prompt, str): # single prompt - prompt_tokens = len(encoding.encode(prompt)) - num_tokens = prompt_tokens + completion_tokens - return num_tokens - elif isinstance(prompt, list): # multiple prompts - prompt_tokens = sum([len(encoding.encode(p)) for p in prompt]) - num_tokens = prompt_tokens + completion_tokens * len(prompt) - return num_tokens - else: - raise TypeError('Expecting either string or list of strings for "prompt" field in completion request') - # if embeddings request, tokens = input tokens - elif api_endpoint == "embeddings": - input = request_json["input"] - if isinstance(input, str): # single input - num_tokens = len(encoding.encode(input)) - return num_tokens - elif isinstance(input, list): # multiple inputs - num_tokens = sum([len(encoding.encode(i)) for i in input]) - return num_tokens - else: - raise TypeError('Expecting either string or list of strings for "inputs" field in embedding request') - # more logic needed to support other API calls (e.g., edits, inserts, DALL-E) - else: - raise NotImplementedError(f'API endpoint "{api_endpoint}" not implemented in this script') - - -def task_id_generator_function(): - """Generate integers 0, 1, 2, and so on.""" - task_id = 0 - while True: - yield task_id - task_id += 1 - -if __name__ == '__main__': - pass - -# run script -# if __name__ == "__main__": -# qdrant_client = QdrantClient( -# url=os.getenv('QDRANT_URL'), -# api_key=os.getenv('QDRANT_API_KEY'), -# ) -# vectorstore = Qdrant( -# client=qdrant_client, -# collection_name=os.getenv('QDRANT_COLLECTION_NAME'), # type: ignore -# embeddings=OpenAIEmbeddings()) # type: ignore - -# user_question = "What is the significance of Six Sigma?" -# k = 4 -# fetch_k = 200 -# found_docs = vectorstore.max_marginal_relevance_search(user_question, k=k, fetch_k=200) - -# requests = [] -# for i, doc in enumerate(found_docs): -# dictionary = { -# "model": "gpt-3.5-turbo-0613", # 4k context -# "messages": [{ -# "role": "system", -# "content": "You are a factual summarizer of partial documents. Stick to the facts (including partial info when necessary to avoid making up potentially incorrect details), and say I don't know when necessary." -# }, { -# "role": -# "user", -# "content": -# f"What is a comprehensive summary of the given text, based on the question:\n{doc.page_content}\nQuestion: {user_question}\nThe summary should cover all the key points only relevant to the question, while also condensing the information into a concise and easy-to-understand format. Please ensure that the summary includes relevant details and examples that support the main ideas, while avoiding any unnecessary information or repetition. Feel free to include references, sentence fragments, keywords, or anything that could help someone learn about it, only as it relates to the given question. The length of the summary should be as short as possible, without losing relevant information.\n" -# }], -# "n": 1, -# "max_tokens": 500, -# "metadata": doc.metadata -# } -# requests.append(dictionary) - -# oai = OpenAIAPIProcessor( -# input_prompts_list=requests, -# request_url='https://api.openai.com/v1/chat/completions', -# api_key=os.getenv("OPENAI_API_KEY"), -# max_requests_per_minute=1500, -# max_tokens_per_minute=90000, -# token_encoding_name='cl100k_base', -# max_attempts=5, -# logging_level=20, -# ) -# # run script -# asyncio.run(oai.process_api_requests_from_file()) - -# assistant_contents = [] -# total_prompt_tokens = 0 -# total_completion_tokens = 0 - -# print("Results, end of main: ", oai.results) -# print("-"*50) - -# # jsonObject = json.loads(oai.results) -# for element in oai.results: -# for item in element: -# if 'choices' in item: -# for choice in item['choices']: -# if choice['message']['role'] == 'assistant': -# assistant_contents.append(choice['message']['content']) -# total_prompt_tokens += item['usage']['prompt_tokens'] -# total_completion_tokens += item['usage']['completion_tokens'] - -# print("Assistant Contents:", assistant_contents) -# print("Total Prompt Tokens:", total_prompt_tokens) -# print("Total Completion Tokens:", total_completion_tokens) -# turbo_total_cost = (total_prompt_tokens * 0.0015) + (total_completion_tokens * 0.002) -# print("Total cost (3.5-turbo):", (total_prompt_tokens * 0.0015), " + Completions: ", (total_completion_tokens * 0.002), " = ", turbo_total_cost) - -# gpt4_total_cost = (total_prompt_tokens * 0.03) + (total_completion_tokens * 0.06) -# print("Hypothetical cost for GPT-4:", (total_prompt_tokens * 0.03), " + Completions: ", (total_completion_tokens * 0.06), " = ", gpt4_total_cost) -# print("GPT-4 cost premium: ", (gpt4_total_cost / turbo_total_cost), "x") - ''' - Pricing: - GPT4: - * $0.03 prompt - * $0.06 completions - 3.5-turbo: - * $0.0015 prompt - * $0.002 completions - ''' -""" -APPENDIX - -The example requests file at openai-cookbook/examples/data/example_requests_to_parallel_process.jsonl contains 10,000 requests to text-embedding-ada-002. - -It was generated with the following code: - -```python -import json - -filename = "data/example_requests_to_parallel_process.jsonl" -n_requests = 10_000 -jobs = [{"model": "text-embedding-ada-002", "input": str(x) + "\n"} for x in range(n_requests)] -with open(filename, "w") as f: - for job in jobs: - json_string = json.dumps(job) - f.write(json_string + "\n") -``` - -As with all jsonl files, take care that newlines in the content are properly escaped (json.dumps does this automatically). -""" +""" +API REQUEST PARALLEL PROCESSOR + +Using the OpenAI API to process lots of text quickly takes some care. +If you trickle in a million API requests one by one, they'll take days to complete. +If you flood a million API requests in parallel, they'll exceed the rate limits and fail with errors. +To maximize throughput, parallel requests need to be throttled to stay under rate limits. + +This script parallelizes requests to the OpenAI API while throttling to stay under rate limits. + +Features: +- Streams requests from file, to avoid running out of memory for giant jobs +- Makes requests concurrently, to maximize throughput +- Throttles request and token usage, to stay under rate limits +- Retries failed requests up to {max_attempts} times, to avoid missing data +- Logs errors, to diagnose problems with requests + +Example command to call script: +``` +python examples/api_request_parallel_processor.py \ + --requests_filepath examples/data/example_requests_to_parallel_process.jsonl \ + --save_filepath examples/data/example_requests_to_parallel_process_results.jsonl \ + --request_url https://api.openai.com/v1/embeddings \ + --max_requests_per_minute 1500 \ + --max_tokens_per_minute 6250000 \ + --token_encoding_name cl100k_base \ + --max_attempts 5 \ + --logging_level 20 +``` + +Inputs: +- requests_filepath : str + - path to the file containing the requests to be processed + - file should be a jsonl file, where each line is a json object with API parameters and an optional metadata field + - e.g., {"model": "text-embedding-ada-002", "input": "embed me", "metadata": {"row_id": 1}} + - as with all jsonl files, take care that newlines in the content are properly escaped (json.dumps does this automatically) + - an example file is provided at examples/data/example_requests_to_parallel_process.jsonl + - the code to generate the example file is appended to the bottom of this script +- save_filepath : str, optional + - path to the file where the results will be saved + - file will be a jsonl file, where each line is an array with the original request plus the API response + - e.g., [{"model": "text-embedding-ada-002", "input": "embed me"}, {...}] + - if omitted, results will be saved to {requests_filename}_results.jsonl +- request_url : str, optional + - URL of the API endpoint to call + - if omitted, will default to "https://api.openai.com/v1/embeddings" +- api_key : str, optional + - API key to use + - if omitted, the script will attempt to read it from an environment variable {os.getenv("OPENAI_API_KEY")} +- max_requests_per_minute : float, optional + - target number of requests to make per minute (will make less if limited by tokens) + - leave headroom by setting this to 50% or 75% of your limit + - if requests are limiting you, try batching multiple embeddings or completions into one request + - if omitted, will default to 1,500 +- max_tokens_per_minute : float, optional + - target number of tokens to use per minute (will use less if limited by requests) + - leave headroom by setting this to 50% or 75% of your limit + - if omitted, will default to 125,000 +- token_encoding_name : str, optional + - name of the token encoding used, as defined in the `tiktoken` package + - if omitted, will default to "cl100k_base" (used by `text-embedding-ada-002`) +- max_attempts : int, optional + - number of times to retry a failed request before giving up + - if omitted, will default to 5 +- logging_level : int, optional + - level of logging to use; higher numbers will log fewer messages + - 40 = ERROR; will log only when requests fail after all retries + - 30 = WARNING; will log when requests his rate limits or other errors + - 20 = INFO; will log when requests start and the status at finish + - 10 = DEBUG; will log various things as the loop runs to see when they occur + - if omitted, will default to 20 (INFO). + +The script is structured as follows: + - Imports + - Define main() + - Initialize things + - In main loop: + - Get next request if one is not already waiting for capacity + - Update available token & request capacity + - If enough capacity available, call API + - The loop pauses if a rate limit error is hit + - The loop breaks when no tasks remain + - Define dataclasses + - StatusTracker (stores script metadata counters; only one instance is created) + - APIRequest (stores API inputs, outputs, metadata; one method to call API) + - Define functions + - api_endpoint_from_url (extracts API endpoint from request URL) + - append_to_jsonl (writes to results file) + - num_tokens_consumed_from_request (bigger function to infer token usage from request) + - task_id_generator_function (yields 1, 2, 3, ...) + - Run main() +""" + +# import argparse +# import subprocess +# import tempfile +# from langchain.llms import OpenAI +import asyncio +import json +import logging +import os +import re +import time +from dataclasses import ( # for storing API inputs, outputs, and metadata + dataclass, field) +from typing import Any, List + +import aiohttp # for making API calls concurrently +import tiktoken # for counting tokens +from langchain.embeddings.openai import OpenAIEmbeddings +from langchain.vectorstores import Qdrant +from qdrant_client import QdrantClient, models + + +class OpenAIAPIProcessor: + + def __init__(self, input_prompts_list, request_url, api_key, max_requests_per_minute, max_tokens_per_minute, token_encoding_name, + max_attempts, logging_level): + self.request_url = request_url + self.api_key = api_key + self.max_requests_per_minute = max_requests_per_minute + self.max_tokens_per_minute = max_tokens_per_minute + self.token_encoding_name = token_encoding_name + self.max_attempts = max_attempts + self.logging_level = logging_level + self.input_prompts_list: List[dict] = input_prompts_list + self.results = [] + self.cleaned_results: List[str] = [] + + async def process_api_requests_from_file(self): + """Processes API requests in parallel, throttling to stay under rate limits.""" + # constants + seconds_to_pause_after_rate_limit_error = 15 + seconds_to_sleep_each_loop = 0.001 # 1 ms limits max throughput to 1,000 requests per second + + # initialize logging + logging.basicConfig(level=self.logging_level) + logging.debug(f"Logging initialized at level {self.logging_level}") + + # infer API endpoint and construct request header + api_endpoint = api_endpoint_from_url(self.request_url) + request_header = {"Authorization": f"Bearer {self.api_key}"} + + # initialize trackers + queue_of_requests_to_retry = asyncio.Queue() + task_id_generator = task_id_generator_function() # generates integer IDs of 1, 2, 3, ... + status_tracker = StatusTracker() # single instance to track a collection of variables + next_request = None # variable to hold the next request to call + + # initialize available capacity counts + available_request_capacity = self.max_requests_per_minute + available_token_capacity = self.max_tokens_per_minute + last_update_time = time.time() + + # initialize flags + file_not_finished = True # after file is empty, we'll skip reading it + logging.debug(f"Initialization complete.") + + requests = self.input_prompts_list.__iter__() + + logging.debug(f"File opened. Entering main loop") + + task_list = [] + + while True: + # get next request (if one is not already waiting for capacity) + if next_request is None: + if not queue_of_requests_to_retry.empty(): + next_request = queue_of_requests_to_retry.get_nowait() + logging.debug(f"Retrying request {next_request.task_id}: {next_request}") + elif file_not_finished: + try: + # get new request + # request_json = json.loads(next(requests)) + request_json = next(requests) + + next_request = APIRequest(task_id=next(task_id_generator), + request_json=request_json, + token_consumption=num_tokens_consumed_from_request(request_json, api_endpoint, + self.token_encoding_name), + attempts_left=self.max_attempts, + metadata=request_json.pop("metadata", None)) + status_tracker.num_tasks_started += 1 + status_tracker.num_tasks_in_progress += 1 + logging.debug(f"Reading request {next_request.task_id}: {next_request}") + except StopIteration: + # if file runs out, set flag to stop reading it + logging.debug("Read file exhausted") + file_not_finished = False + + # update available capacity + current_time = time.time() + seconds_since_update = current_time - last_update_time + available_request_capacity = min( + available_request_capacity + self.max_requests_per_minute * seconds_since_update / 60.0, + self.max_requests_per_minute, + ) + available_token_capacity = min( + available_token_capacity + self.max_tokens_per_minute * seconds_since_update / 60.0, + self.max_tokens_per_minute, + ) + last_update_time = current_time + + # if enough capacity available, call API + if next_request: + next_request_tokens = next_request.token_consumption + if (available_request_capacity >= 1 and available_token_capacity >= next_request_tokens): + # update counters + available_request_capacity -= 1 + available_token_capacity -= next_request_tokens + next_request.attempts_left -= 1 + + # call API + # TODO: NOT SURE RESPONSE WILL WORK HERE + task = asyncio.create_task( + next_request.call_api( + request_url=self.request_url, + request_header=request_header, + retry_queue=queue_of_requests_to_retry, + status_tracker=status_tracker, + )) + task_list.append(task) + next_request = None # reset next_request to empty + + # print("status_tracker.num_tasks_in_progress", status_tracker.num_tasks_in_progress) + # one_task_result = task.result() + # print("one_task_result", one_task_result) + + # if all tasks are finished, break + if status_tracker.num_tasks_in_progress == 0: + break + + # main loop sleeps briefly so concurrent tasks can run + await asyncio.sleep(seconds_to_sleep_each_loop) + + # if a rate limit error was hit recently, pause to cool down + seconds_since_rate_limit_error = (time.time() - status_tracker.time_of_last_rate_limit_error) + if seconds_since_rate_limit_error < seconds_to_pause_after_rate_limit_error: + remaining_seconds_to_pause = (seconds_to_pause_after_rate_limit_error - seconds_since_rate_limit_error) + await asyncio.sleep(remaining_seconds_to_pause) + # ^e.g., if pause is 15 seconds and final limit was hit 5 seconds ago + logging.warn( + f"Pausing to cool down until {time.ctime(status_tracker.time_of_last_rate_limit_error + seconds_to_pause_after_rate_limit_error)}" + ) + + # after finishing, log final status + logging.info(f"""Parallel processing complete. About to return.""") + if status_tracker.num_tasks_failed > 0: + logging.warning(f"{status_tracker.num_tasks_failed} / {status_tracker.num_tasks_started} requests failed.") + if status_tracker.num_rate_limit_errors > 0: + logging.warning(f"{status_tracker.num_rate_limit_errors} rate limit errors received. Consider running at a lower rate.") + + # asyncio wait for task_list + await asyncio.wait(task_list) + + for task in task_list: + openai_completion = task.result() + self.results.append(openai_completion) + + self.cleaned_results: List[str] = extract_context_from_results(self.results) + + +def extract_context_from_results(results: List[Any]) -> List[str]: + assistant_contents = [] + total_prompt_tokens = 0 + total_completion_tokens = 0 + + for element in results: + if element is not None: + for item in element: + if 'choices' in item: + for choice in item['choices']: + if choice['message']['role'] == 'assistant': + assistant_contents.append(choice['message']['content']) + total_prompt_tokens += item['usage']['prompt_tokens'] + total_completion_tokens += item['usage']['completion_tokens'] + # Note: I don't think the prompt_tokens or completion_tokens is working quite right... + + return assistant_contents + + +# dataclasses + + +@dataclass +class StatusTracker: + """Stores metadata about the script's progress. Only one instance is created.""" + + num_tasks_started: int = 0 + num_tasks_in_progress: int = 0 # script ends when this reaches 0 + num_tasks_succeeded: int = 0 + num_tasks_failed: int = 0 + num_rate_limit_errors: int = 0 + num_api_errors: int = 0 # excluding rate limit errors, counted above + num_other_errors: int = 0 + time_of_last_rate_limit_error: float = 0 # used to cool off after hitting rate limits + + +@dataclass +class APIRequest: + """Stores an API request's inputs, outputs, and other metadata. Contains a method to make an API call.""" + + task_id: int + request_json: dict + token_consumption: int + attempts_left: int + metadata: dict + result: list = field(default_factory=list) + + async def call_api( + self, + request_url: str, + request_header: dict, + retry_queue: asyncio.Queue, + status_tracker: StatusTracker, + ): + """Calls the OpenAI API and saves results.""" + # logging.info(f"Starting request #{self.task_id}") + error = None + try: + async with aiohttp.ClientSession() as session: + async with session.post(url=request_url, headers=request_header, json=self.request_json) as response: + response = await response.json() + if "error" in response: + logging.warning(f"Request {self.task_id} failed with error {response['error']}") + status_tracker.num_api_errors += 1 + error = response + if "Rate limit" in response["error"].get("message", ""): + status_tracker.time_of_last_rate_limit_error = time.time() + status_tracker.num_rate_limit_errors += 1 + status_tracker.num_api_errors -= 1 # rate limit errors are counted separately + + except Exception as e: # catching naked exceptions is bad practice, but in this case we'll log & save them + logging.warning(f"Request {self.task_id} failed with Exception {e}") + status_tracker.num_other_errors += 1 + error = e + if error: + self.result.append(error) + if self.attempts_left: + retry_queue.put_nowait(self) + else: + logging.error(f"Request {self.request_json} failed after all attempts. Saving errors: {self.result}") + data = ([self.request_json, [str(e) for e in self.result], self.metadata] + if self.metadata else [self.request_json, [str(e) for e in self.result]]) + #append_to_jsonl(data, save_filepath) + status_tracker.num_tasks_in_progress -= 1 + status_tracker.num_tasks_failed += 1 + return data + else: + data = ([self.request_json, response, self.metadata] if self.metadata else [self.request_json, response]) # type: ignore + #append_to_jsonl(data, save_filepath) + status_tracker.num_tasks_in_progress -= 1 + status_tracker.num_tasks_succeeded += 1 + # logging.debug(f"Request {self.task_id} saved to {save_filepath}") + + return data + + +# functions + + +def api_endpoint_from_url(request_url: str): + """Extract the API endpoint from the request URL.""" + match = re.search('^https://[^/]+/v\\d+/(.+)$', request_url) + return match[1] # type: ignore + + +def append_to_jsonl(data, filename: str) -> None: + """Append a json payload to the end of a jsonl file.""" + json_string = json.dumps(data) + with open(filename, "a") as f: + f.write(json_string + "\n") + + +def num_tokens_consumed_from_request( + request_json: dict, + api_endpoint: str, + token_encoding_name: str, +): + """Count the number of tokens in the request. Only supports completion and embedding requests.""" + encoding = tiktoken.get_encoding(token_encoding_name) + # if completions request, tokens = prompt + n * max_tokens + if api_endpoint.endswith("completions"): + max_tokens = request_json.get("max_tokens", 15) + n = request_json.get("n", 1) + completion_tokens = n * max_tokens + + # chat completions + if api_endpoint.startswith("chat/"): + num_tokens = 0 + for message in request_json["messages"]: + num_tokens += 4 # every message follows {role/name}\n{content}\n + for key, value in message.items(): + num_tokens += len(encoding.encode(value)) + if key == "name": # if there's a name, the role is omitted + num_tokens -= 1 # role is always required and always 1 token + num_tokens += 2 # every reply is primed with assistant + return num_tokens + completion_tokens + # normal completions + else: + prompt = request_json["prompt"] + if isinstance(prompt, str): # single prompt + prompt_tokens = len(encoding.encode(prompt)) + num_tokens = prompt_tokens + completion_tokens + return num_tokens + elif isinstance(prompt, list): # multiple prompts + prompt_tokens = sum([len(encoding.encode(p)) for p in prompt]) + num_tokens = prompt_tokens + completion_tokens * len(prompt) + return num_tokens + else: + raise TypeError('Expecting either string or list of strings for "prompt" field in completion request') + # if embeddings request, tokens = input tokens + elif api_endpoint == "embeddings": + input = request_json["input"] + if isinstance(input, str): # single input + num_tokens = len(encoding.encode(input)) + return num_tokens + elif isinstance(input, list): # multiple inputs + num_tokens = sum([len(encoding.encode(i)) for i in input]) + return num_tokens + else: + raise TypeError('Expecting either string or list of strings for "inputs" field in embedding request') + # more logic needed to support other API calls (e.g., edits, inserts, DALL-E) + else: + raise NotImplementedError(f'API endpoint "{api_endpoint}" not implemented in this script') + + +def task_id_generator_function(): + """Generate integers 0, 1, 2, and so on.""" + task_id = 0 + while True: + yield task_id + task_id += 1 + +if __name__ == '__main__': + pass + +# run script +# if __name__ == "__main__": +# qdrant_client = QdrantClient( +# url=os.getenv('QDRANT_URL'), +# api_key=os.getenv('QDRANT_API_KEY'), +# ) +# vectorstore = Qdrant( +# client=qdrant_client, +# collection_name=os.getenv('QDRANT_COLLECTION_NAME'), # type: ignore +# embeddings=OpenAIEmbeddings()) # type: ignore + +# user_question = "What is the significance of Six Sigma?" +# k = 4 +# fetch_k = 200 +# found_docs = vectorstore.max_marginal_relevance_search(user_question, k=k, fetch_k=200) + +# requests = [] +# for i, doc in enumerate(found_docs): +# dictionary = { +# "model": "gpt-3.5-turbo-0613", # 4k context +# "messages": [{ +# "role": "system", +# "content": "You are a factual summarizer of partial documents. Stick to the facts (including partial info when necessary to avoid making up potentially incorrect details), and say I don't know when necessary." +# }, { +# "role": +# "user", +# "content": +# f"What is a comprehensive summary of the given text, based on the question:\n{doc.page_content}\nQuestion: {user_question}\nThe summary should cover all the key points only relevant to the question, while also condensing the information into a concise and easy-to-understand format. Please ensure that the summary includes relevant details and examples that support the main ideas, while avoiding any unnecessary information or repetition. Feel free to include references, sentence fragments, keywords, or anything that could help someone learn about it, only as it relates to the given question. The length of the summary should be as short as possible, without losing relevant information.\n" +# }], +# "n": 1, +# "max_tokens": 500, +# "metadata": doc.metadata +# } +# requests.append(dictionary) + +# oai = OpenAIAPIProcessor( +# input_prompts_list=requests, +# request_url='https://api.openai.com/v1/chat/completions', +# api_key=os.getenv("OPENAI_API_KEY"), +# max_requests_per_minute=1500, +# max_tokens_per_minute=90000, +# token_encoding_name='cl100k_base', +# max_attempts=5, +# logging_level=20, +# ) +# # run script +# asyncio.run(oai.process_api_requests_from_file()) + +# assistant_contents = [] +# total_prompt_tokens = 0 +# total_completion_tokens = 0 + +# print("Results, end of main: ", oai.results) +# print("-"*50) + +# # jsonObject = json.loads(oai.results) +# for element in oai.results: +# for item in element: +# if 'choices' in item: +# for choice in item['choices']: +# if choice['message']['role'] == 'assistant': +# assistant_contents.append(choice['message']['content']) +# total_prompt_tokens += item['usage']['prompt_tokens'] +# total_completion_tokens += item['usage']['completion_tokens'] + +# print("Assistant Contents:", assistant_contents) +# print("Total Prompt Tokens:", total_prompt_tokens) +# print("Total Completion Tokens:", total_completion_tokens) +# turbo_total_cost = (total_prompt_tokens * 0.0015) + (total_completion_tokens * 0.002) +# print("Total cost (3.5-turbo):", (total_prompt_tokens * 0.0015), " + Completions: ", (total_completion_tokens * 0.002), " = ", turbo_total_cost) + +# gpt4_total_cost = (total_prompt_tokens * 0.03) + (total_completion_tokens * 0.06) +# print("Hypothetical cost for GPT-4:", (total_prompt_tokens * 0.03), " + Completions: ", (total_completion_tokens * 0.06), " = ", gpt4_total_cost) +# print("GPT-4 cost premium: ", (gpt4_total_cost / turbo_total_cost), "x") + ''' + Pricing: + GPT4: + * $0.03 prompt + * $0.06 completions + 3.5-turbo: + * $0.0015 prompt + * $0.002 completions + ''' +""" +APPENDIX + +The example requests file at openai-cookbook/examples/data/example_requests_to_parallel_process.jsonl contains 10,000 requests to text-embedding-ada-002. + +It was generated with the following code: + +```python +import json + +filename = "data/example_requests_to_parallel_process.jsonl" +n_requests = 10_000 +jobs = [{"model": "text-embedding-ada-002", "input": str(x) + "\n"} for x in range(n_requests)] +with open(filename, "w") as f: + for job in jobs: + json_string = json.dumps(job) + f.write(json_string + "\n") +``` + +As with all jsonl files, take care that newlines in the content are properly escaped (json.dumps does this automatically). +""" diff --git a/ai_ta_backend/utils_tokenization.py b/ai_ta_backend/utils_tokenization.py index 5b000e5f..096e2bb6 100644 --- a/ai_ta_backend/utils_tokenization.py +++ b/ai_ta_backend/utils_tokenization.py @@ -1,136 +1,136 @@ -import json -import os -from typing import Any, List - -import supabase -import tiktoken - - -def count_tokens_and_cost(prompt: str, completion: str = '', openai_model_name: str = "gpt-3.5-turbo"): # -> tuple[int, float] | tuple[int, float, int, float]: - """ - Returns the number of tokens in a text string. - - Only the first parameter is required, a string of text to measure. The completion and model name are optional. - - num_tokens, prompt_cost = count_tokens_and_cost(prompt="hello there") - num_tokens_prompt, prompt_cost, num_tokens_completion, completion_cost = count_tokens_and_cost(prompt="hello there", completion="how are you?") - - Args: - prompt (str): _description_ - completion (str, optional): _description_. Defaults to ''. - openai_model_name (str, optional): _description_. Defaults to "gpt-3.5-turbo". - - Returns: - tuple[int, float] | tuple[int, float, int, float]: Returns the number of tokens consumed and the cost. The total cost you'll be billed is the sum of each individual cost (prompt_cost + completion_cost) - """ - # encoding = tiktoken.encoding_for_model(openai_model_name) - openai_model_name = openai_model_name.lower() - encoding = tiktoken.encoding_for_model("gpt-3.5-turbo") # I think they all use the same encoding - prompt_cost = 0 - completion_cost = 0 - - prompt_token_cost = 0 - completion_token_cost = 0 - - if openai_model_name.startswith("gpt-3.5-turbo"): - if "16k" in openai_model_name: - prompt_token_cost: float = 0.003 / 1_000 - completion_token_cost: float = 0.004 / 1_000 - else: - # 3.5-turbo regular (4k context) - prompt_token_cost: float = 0.0015 / 1_000 - completion_token_cost: float = 0.002 / 1_000 - - elif openai_model_name.startswith("gpt-4"): - if "32k" in openai_model_name: - prompt_token_cost = 0.06 / 1_000 - completion_token_cost = 0.12 / 1_000 - else: - # gpt-4 regular (8k context) - prompt_token_cost = 0.03 / 1_000 - completion_token_cost = 0.06 / 1_000 - elif openai_model_name.startswith("text-embedding-ada-002"): - prompt_token_cost = 0.0001 / 1_000 - completion_token_cost = 0.0001 / 1_000 - else: - # no idea of cost - print(f"NO IDEA OF COST, pricing not supported for model model: `{openai_model_name}`") - prompt_token_cost = 0 - completion_token_cost = 0 - - if completion == '': - num_tokens_prompt: int = len(encoding.encode(prompt)) - prompt_cost = float(prompt_token_cost * num_tokens_prompt) - return num_tokens_prompt, prompt_cost - elif prompt == '': - num_tokens_completion: int = len(encoding.encode(completion)) - completion_cost = float(completion_token_cost * num_tokens_completion) - return num_tokens_completion, completion_cost - else: - num_tokens_prompt: int = len(encoding.encode(prompt)) - num_tokens_completion: int = len(encoding.encode(completion)) - prompt_cost = float(prompt_token_cost * num_tokens_prompt) - completion_cost = float(completion_token_cost * num_tokens_completion) - return num_tokens_prompt, prompt_cost, num_tokens_completion, completion_cost - -# from dotenv import load_dotenv - -# load_dotenv() - -def analyze_conversations(supabase_client: Any = None): - - if supabase_client is None: - supabase_client = supabase.create_client( # type: ignore - supabase_url=os.getenv('SUPABASE_URL'), # type: ignore - supabase_key=os.getenv('SUPABASE_API_KEY')) # type: ignore - # Get all conversations - response = supabase_client.table('llm-convo-monitor').select('convo').execute() - # print("total entries", response.data.count) - - total_convos = 0 - total_messages = 0 - total_prompt_cost = 0 - total_completion_cost = 0 - - # Iterate through all conversations - # for convo in response['data']: - for convo in response.data: - total_convos += 1 - # print(convo) - # prase json from convo - # parse json into dict - # print(type(convo)) - # convo = json.loads(convo) - convo = convo['convo'] - messages = convo['messages'] - model_name = convo['model']['name'] - - # Iterate through all messages in each conversation - for message in messages: - total_messages += 1 - role = message['role'] - content = message['content'] - - # If the message is from the user, it's a prompt - # TODO: Fix these - # WARNING: Fix these error messages... they are the sign of a logic bug. - if role == 'user': - num_tokens, cost = count_tokens_and_cost(prompt=content, openai_model_name=model_name) - total_prompt_cost += cost - print(f'User Prompt: {content}, Tokens: {num_tokens}, cost: {cost}') - - # If the message is from the assistant, it's a completion - elif role == 'assistant': - num_tokens_completion, cost_completion = count_tokens_and_cost(prompt='', completion=content, openai_model_name=model_name) - total_completion_cost += cost_completion - print(f'Assistant Completion: {content}\nTokens: {num_tokens_completion}, cost: {cost_completion}') - return total_convos, total_messages, total_prompt_cost, total_completion_cost - -if __name__ == '__main__': - pass - -# if __name__ == '__main__': -# print('starting main') -# total_convos, total_messages, total_prompt_cost, total_completion_cost = analyze_conversations() -# print(f'total_convos: {total_convos}, total_messages: {total_messages}') +import json +import os +from typing import Any, List + +import supabase +import tiktoken + + +def count_tokens_and_cost(prompt: str, completion: str = '', openai_model_name: str = "gpt-3.5-turbo"): # -> tuple[int, float] | tuple[int, float, int, float]: + """ + Returns the number of tokens in a text string. + + Only the first parameter is required, a string of text to measure. The completion and model name are optional. + + num_tokens, prompt_cost = count_tokens_and_cost(prompt="hello there") + num_tokens_prompt, prompt_cost, num_tokens_completion, completion_cost = count_tokens_and_cost(prompt="hello there", completion="how are you?") + + Args: + prompt (str): _description_ + completion (str, optional): _description_. Defaults to ''. + openai_model_name (str, optional): _description_. Defaults to "gpt-3.5-turbo". + + Returns: + tuple[int, float] | tuple[int, float, int, float]: Returns the number of tokens consumed and the cost. The total cost you'll be billed is the sum of each individual cost (prompt_cost + completion_cost) + """ + # encoding = tiktoken.encoding_for_model(openai_model_name) + openai_model_name = openai_model_name.lower() + encoding = tiktoken.encoding_for_model("gpt-3.5-turbo") # I think they all use the same encoding + prompt_cost = 0 + completion_cost = 0 + + prompt_token_cost = 0 + completion_token_cost = 0 + + if openai_model_name.startswith("gpt-3.5-turbo"): + if "16k" in openai_model_name: + prompt_token_cost: float = 0.003 / 1_000 + completion_token_cost: float = 0.004 / 1_000 + else: + # 3.5-turbo regular (4k context) + prompt_token_cost: float = 0.0015 / 1_000 + completion_token_cost: float = 0.002 / 1_000 + + elif openai_model_name.startswith("gpt-4"): + if "32k" in openai_model_name: + prompt_token_cost = 0.06 / 1_000 + completion_token_cost = 0.12 / 1_000 + else: + # gpt-4 regular (8k context) + prompt_token_cost = 0.03 / 1_000 + completion_token_cost = 0.06 / 1_000 + elif openai_model_name.startswith("text-embedding-ada-002"): + prompt_token_cost = 0.0001 / 1_000 + completion_token_cost = 0.0001 / 1_000 + else: + # no idea of cost + print(f"NO IDEA OF COST, pricing not supported for model model: `{openai_model_name}`") + prompt_token_cost = 0 + completion_token_cost = 0 + + if completion == '': + num_tokens_prompt: int = len(encoding.encode(prompt)) + prompt_cost = float(prompt_token_cost * num_tokens_prompt) + return num_tokens_prompt, prompt_cost + elif prompt == '': + num_tokens_completion: int = len(encoding.encode(completion)) + completion_cost = float(completion_token_cost * num_tokens_completion) + return num_tokens_completion, completion_cost + else: + num_tokens_prompt: int = len(encoding.encode(prompt)) + num_tokens_completion: int = len(encoding.encode(completion)) + prompt_cost = float(prompt_token_cost * num_tokens_prompt) + completion_cost = float(completion_token_cost * num_tokens_completion) + return num_tokens_prompt, prompt_cost, num_tokens_completion, completion_cost + +# from dotenv import load_dotenv + +# load_dotenv() + +def analyze_conversations(supabase_client: Any = None): + + if supabase_client is None: + supabase_client = supabase.create_client( # type: ignore + supabase_url=os.getenv('SUPABASE_URL'), # type: ignore + supabase_key=os.getenv('SUPABASE_API_KEY')) # type: ignore + # Get all conversations + response = supabase_client.table('llm-convo-monitor').select('convo').execute() + # print("total entries", response.data.count) + + total_convos = 0 + total_messages = 0 + total_prompt_cost = 0 + total_completion_cost = 0 + + # Iterate through all conversations + # for convo in response['data']: + for convo in response.data: + total_convos += 1 + # print(convo) + # prase json from convo + # parse json into dict + # print(type(convo)) + # convo = json.loads(convo) + convo = convo['convo'] + messages = convo['messages'] + model_name = convo['model']['name'] + + # Iterate through all messages in each conversation + for message in messages: + total_messages += 1 + role = message['role'] + content = message['content'] + + # If the message is from the user, it's a prompt + # TODO: Fix these + # WARNING: Fix these error messages... they are the sign of a logic bug. + if role == 'user': + num_tokens, cost = count_tokens_and_cost(prompt=content, openai_model_name=model_name) + total_prompt_cost += cost + print(f'User Prompt: {content}, Tokens: {num_tokens}, cost: {cost}') + + # If the message is from the assistant, it's a completion + elif role == 'assistant': + num_tokens_completion, cost_completion = count_tokens_and_cost(prompt='', completion=content, openai_model_name=model_name) + total_completion_cost += cost_completion + print(f'Assistant Completion: {content}\nTokens: {num_tokens_completion}, cost: {cost_completion}') + return total_convos, total_messages, total_prompt_cost, total_completion_cost + +if __name__ == '__main__': + pass + +# if __name__ == '__main__': +# print('starting main') +# total_convos, total_messages, total_prompt_cost, total_completion_cost = analyze_conversations() +# print(f'total_convos: {total_convos}, total_messages: {total_messages}') # print(f'total_prompt_cost: {total_prompt_cost}, total_completion_cost: {total_completion_cost}') \ No newline at end of file diff --git a/ai_ta_backend/web_scrape.py b/ai_ta_backend/web_scrape.py index f77d695a..36158db9 100644 --- a/ai_ta_backend/web_scrape.py +++ b/ai_ta_backend/web_scrape.py @@ -1,467 +1,467 @@ -import os -import re -import shutil -import time -from tempfile import NamedTemporaryFile -from zipfile import ZipFile - -import boto3 # type: ignore -import requests -from bs4 import BeautifulSoup - -import supabase - -from ai_ta_backend.aws import upload_data_files_to_s3 -from ai_ta_backend.vector_database import Ingest -import mimetypes - -def get_file_extension(filename): - match = re.search(r'\.([a-zA-Z0-9]+)$', filename) - valid_filetypes = list(mimetypes.types_map.keys()) - valid_filetypes = valid_filetypes + ['.html', '.py', '.vtt', '.pdf', '.txt', '.srt', '.docx', '.ppt', '.pptx'] - if match: - filetype = "." + match.group(1) - if filetype in valid_filetypes: - return filetype - else: - return '.html' - else: - return '.html' - -def valid_url(url): - '''Returns the URL and it's content if it's good, otherwise returns false. Prints the status code.''' - try: - response = requests.get(url, allow_redirects=True, timeout=20) - - redirect_loop_counter = 0 - while response.status_code == 301: - # Check for permanent redirect - if redirect_loop_counter > 3: - print("❌ Redirect loop (on 301 error) exceeded redirect limit of:", redirect_loop_counter, "❌") - return False - redirect_url = response.headers['Location'] - response = requests.head(redirect_url) - redirect_loop_counter += 1 - if response.status_code == 200: - filetype = get_file_extension(response.url) - print("file extension:", filetype) - if filetype == '.html': - content = BeautifulSoup(response.content, "html.parser") - if " len(urls): - max_urls = max_urls - len(urls) - elif max_urls < len(urls): - urls = urls[:max_urls] - max_urls = 0 - else: - max_urls = 0 - # We grab content out of these urls - - for url in urls: - if base_url_on: - if url.startswith(site): - url, s, filetype = valid_url(url) - if url: - print("Scraped:", url) - url_contents.append((url, s, filetype)) - else: - _invalid_urls.append(url) - else: - pass - else: - url, s, filetype = valid_url(url) - if url: - print("Scraped:", url) - url_contents.append((url, s, filetype)) - else: - _invalid_urls.append(url) - print("existing urls", _existing_urls) - url_contents = remove_duplicates(url_contents, _existing_urls) - max_urls = max_urls - len(url_contents) - print(max_urls, "urls left") - - # recursively go through crawler until we reach the max amount of urls. - for url in url_contents: - if url[0] not in _invalid_urls: - if max_urls > 0: - if _depth < max_depth: - temp_data = crawler(url[0], max_urls, max_depth, timeout, _invalid_urls, _depth, url[1], url[2]) - print("existing urls", _existing_urls) - temp_data = remove_duplicates(temp_data, _existing_urls) - max_urls = max_urls - len(temp_data) - print(max_urls, "urls left") - url_contents.extend(temp_data) - url_contents = remove_duplicates(url_contents, _existing_urls) - else: - print("Depth exceeded:", _depth+1, "out of", max_depth) - break - else: - break - else: - pass - - if _depth == 0: - if len(url_contents) < amount: - print("Max URLS not reached, returning all urls found:", len(url_contents), "out of", amount) - elif len(url_contents) == amount: - print("Max URLS reached:", len(url_contents), "out of", amount) - else: - print("Exceeded Max URLS, found:", len(url_contents), "out of", amount) - print(len(url_contents), "urls found") - - # Free up memory - # del url_contents[:] - # del urls[:] - # if _invalid_urls is not None: - # del _invalid_urls[:] - # if _existing_urls is not None: - # del _existing_urls[:] - # gc.collect() - - return url_contents - -def main_crawler(url:str, course_name:str, max_urls:int=100, max_depth:int=3, timeout:int=1, stay_on_baseurl:bool=False): - """ - Crawl a site and scrape its content and PDFs, then upload the data to S3 and ingest it. - - Args: - url (str): The URL of the site to crawl. - course_name (str): The name of the course to associate with the crawled data. - max_urls (int, optional): The maximum number of URLs to crawl. Defaults to 100. - max_depth (int, optional): The maximum depth of URLs to crawl. Defaults to 3. - timeout (int, optional): The number of seconds to wait between requests. Defaults to 1. - - Returns: - None - """ - print("\n") - max_urls = int(max_urls) - max_depth = int(max_depth) - timeout = int(timeout) - stay_on_baseurl = bool(stay_on_baseurl) - if stay_on_baseurl: - stay_on_baseurl = base_url(url) - print(stay_on_baseurl) - - ingester = Ingest() - s3_client = boto3.client( - 's3', - aws_access_key_id=os.getenv('AWS_ACCESS_KEY_ID'), - aws_secret_access_key=os.getenv('AWS_SECRET_ACCESS_KEY'), - ) - - # Check for GitHub repository coming soon - if url.startswith("https://github.com/"): - print("Begin Ingesting GitHub page") - results = ingester.ingest_github(url, course_name) - print("Finished ingesting GitHub page") - del ingester - return results - else: - try: - print("Gathering existing urls from Supabase") - supabase_client = supabase.create_client( # type: ignore - supabase_url=os.getenv('SUPABASE_URL'), # type: ignore - supabase_key=os.getenv('SUPABASE_API_KEY')) # type: ignore - urls = supabase_client.table(os.getenv('NEW_NEW_NEWNEW_MATERIALS_SUPABASE_TABLE')).select('course_name, url, contexts').eq('course_name', course_name).execute() - del supabase_client - if urls.data == []: - existing_urls = None - else: - existing_urls = [] - for thing in urls.data: - whole = '' - for t in thing['contexts']: - whole += t['text'] - existing_urls.append((thing['url'], whole)) - print("Finished gathering existing urls from Supabase") - except Exception as e: - print("Error:", e) - print("Could not gather existing urls from Supabase") - existing_urls = None - - print("Begin Ingesting Web page") - data = crawler(url=url, max_urls=max_urls, max_depth=max_depth, timeout=timeout, base_url_on=stay_on_baseurl, _existing_urls=existing_urls) - - # Clean some keys for a proper file name - # todo: have a default title - # titles = [value[1][1].title.string for value in data] - - titles = [] - for value in data: - try: - titles.append(value[1].title.string) - except AttributeError as e: - # if no title - try: - placeholder_title = re.findall(pattern=r'[a-zA-Z0-9.]*[a-z]', string=value[0])[1] - except Exception as e: - placeholder_title = "Title Not Found" - titles.append(placeholder_title) - print(f"URL is missing a title, using this title instead: {placeholder_title}") - - try: - clean = [re.match(r"[a-zA-Z0-9\s]*", title).group(0) for title in titles] # type: ignore - except Exception as e: - print("Error:", e) - clean = titles - print("title names after regex before cleaning", clean) - path_name = [] - counter = 0 - for value in clean: - value = value.strip() if value else "" - # value = value.strip() - value = value.replace(" ", "_") - if value == "403_Forbidden": - print("Found Forbidden Key, deleting data") - del data[counter] - counter -= 1 - else: - path_name.append(value) - counter += 1 - print("Cleaned title names", path_name) - - # Upload each html to S3 - print("Uploading files to S3") - paths = [] - counter = 0 - try: - for i, key in enumerate(data): - with NamedTemporaryFile(suffix=key[2]) as temp_file: - if key[1] != "" or key[1] != None: - if key[2] == ".html": - print("Writing", key[2] ,"to temp file") - temp_file.write(key[1].encode('utf-8')) - else: - print("Writing", key[2] ,"to temp file") - temp_file.write(key[1]) - temp_file.seek(0) - s3_upload_path = "courses/"+ course_name + "/" + path_name[i] + key[2] - paths.append(s3_upload_path) - with open(temp_file.name, 'rb') as f: - print("Uploading", key[2] ,"to S3") - s3_client.upload_fileobj(f, os.getenv('S3_BUCKET_NAME'), s3_upload_path) - ingester.bulk_ingest(s3_upload_path, course_name=course_name, url=key[0], base_url=url) - counter += 1 - else: - print("No", key[2] ,"to upload", key[1]) - except Exception as e: - print("Error in upload:", e) - finally: - del ingester - - print(f"Successfully uploaded files to s3: {counter}") - print("Finished /web-scrape") - -# Download an MIT course using its url -def mit_course_download(url:str, course_name:str, local_dir:str): - ingester = Ingest() - base = "https://ocw.mit.edu" - if url.endswith("download"): - pass - else: - url = url + "download" - - r = requests.get(url) - soup = BeautifulSoup(r.text,"html.parser") - - zip = '' - for ref in soup.find_all("a"): - if ref.attrs['href'].endswith("zip"): - zip = ref.attrs['href'] - - site = zip - print('site', site) - r = requests.get(url=site, stream=True) - - zip_file = local_dir + ".zip" - - try: - with open(zip_file, 'wb') as fd: - for chunk in r.iter_content(chunk_size=128): - fd.write(chunk) - print("course downloaded!") - except Exception as e: - print("Error:", e, site) - - with ZipFile(zip_file, 'r') as zObject: - zObject.extractall( - path=local_dir) - - shutil.move(local_dir+"/"+"robots.txt", local_dir+"/static_resources") - s3_paths = upload_data_files_to_s3(course_name, local_dir+"/static_resources") - success_fail = ingester.bulk_ingest(s3_paths, course_name) # type: ignore - - shutil.move(zip_file, local_dir) - shutil.rmtree(local_dir) - del ingester - print("Finished Ingest") - return success_fail - -if __name__ == '__main__': - pass +import os +import re +import shutil +import time +from tempfile import NamedTemporaryFile +from zipfile import ZipFile + +import boto3 # type: ignore +import requests +from bs4 import BeautifulSoup + +import supabase + +from ai_ta_backend.aws import upload_data_files_to_s3 +from ai_ta_backend.vector_database import Ingest +import mimetypes + +def get_file_extension(filename): + match = re.search(r'\.([a-zA-Z0-9]+)$', filename) + valid_filetypes = list(mimetypes.types_map.keys()) + valid_filetypes = valid_filetypes + ['.html', '.py', '.vtt', '.pdf', '.txt', '.srt', '.docx', '.ppt', '.pptx'] + if match: + filetype = "." + match.group(1) + if filetype in valid_filetypes: + return filetype + else: + return '.html' + else: + return '.html' + +def valid_url(url): + '''Returns the URL and it's content if it's good, otherwise returns false. Prints the status code.''' + try: + response = requests.get(url, allow_redirects=True, timeout=20) + + redirect_loop_counter = 0 + while response.status_code == 301: + # Check for permanent redirect + if redirect_loop_counter > 3: + print("❌ Redirect loop (on 301 error) exceeded redirect limit of:", redirect_loop_counter, "❌") + return False + redirect_url = response.headers['Location'] + response = requests.head(redirect_url) + redirect_loop_counter += 1 + if response.status_code == 200: + filetype = get_file_extension(response.url) + print("file extension:", filetype) + if filetype == '.html': + content = BeautifulSoup(response.content, "html.parser") + if " len(urls): + max_urls = max_urls - len(urls) + elif max_urls < len(urls): + urls = urls[:max_urls] + max_urls = 0 + else: + max_urls = 0 + # We grab content out of these urls + + for url in urls: + if base_url_on: + if url.startswith(site): + url, s, filetype = valid_url(url) + if url: + print("Scraped:", url) + url_contents.append((url, s, filetype)) + else: + _invalid_urls.append(url) + else: + pass + else: + url, s, filetype = valid_url(url) + if url: + print("Scraped:", url) + url_contents.append((url, s, filetype)) + else: + _invalid_urls.append(url) + print("existing urls", _existing_urls) + url_contents = remove_duplicates(url_contents, _existing_urls) + max_urls = max_urls - len(url_contents) + print(max_urls, "urls left") + + # recursively go through crawler until we reach the max amount of urls. + for url in url_contents: + if url[0] not in _invalid_urls: + if max_urls > 0: + if _depth < max_depth: + temp_data = crawler(url[0], max_urls, max_depth, timeout, _invalid_urls, _depth, url[1], url[2]) + print("existing urls", _existing_urls) + temp_data = remove_duplicates(temp_data, _existing_urls) + max_urls = max_urls - len(temp_data) + print(max_urls, "urls left") + url_contents.extend(temp_data) + url_contents = remove_duplicates(url_contents, _existing_urls) + else: + print("Depth exceeded:", _depth+1, "out of", max_depth) + break + else: + break + else: + pass + + if _depth == 0: + if len(url_contents) < amount: + print("Max URLS not reached, returning all urls found:", len(url_contents), "out of", amount) + elif len(url_contents) == amount: + print("Max URLS reached:", len(url_contents), "out of", amount) + else: + print("Exceeded Max URLS, found:", len(url_contents), "out of", amount) + print(len(url_contents), "urls found") + + # Free up memory + # del url_contents[:] + # del urls[:] + # if _invalid_urls is not None: + # del _invalid_urls[:] + # if _existing_urls is not None: + # del _existing_urls[:] + # gc.collect() + + return url_contents + +def main_crawler(url:str, course_name:str, max_urls:int=100, max_depth:int=3, timeout:int=1, stay_on_baseurl:bool=False): + """ + Crawl a site and scrape its content and PDFs, then upload the data to S3 and ingest it. + + Args: + url (str): The URL of the site to crawl. + course_name (str): The name of the course to associate with the crawled data. + max_urls (int, optional): The maximum number of URLs to crawl. Defaults to 100. + max_depth (int, optional): The maximum depth of URLs to crawl. Defaults to 3. + timeout (int, optional): The number of seconds to wait between requests. Defaults to 1. + + Returns: + None + """ + print("\n") + max_urls = int(max_urls) + max_depth = int(max_depth) + timeout = int(timeout) + stay_on_baseurl = bool(stay_on_baseurl) + if stay_on_baseurl: + stay_on_baseurl = base_url(url) + print(stay_on_baseurl) + + ingester = Ingest() + s3_client = boto3.client( + 's3', + aws_access_key_id=os.getenv('AWS_ACCESS_KEY_ID'), + aws_secret_access_key=os.getenv('AWS_SECRET_ACCESS_KEY'), + ) + + # Check for GitHub repository coming soon + if url.startswith("https://github.com/"): + print("Begin Ingesting GitHub page") + results = ingester.ingest_github(url, course_name) + print("Finished ingesting GitHub page") + del ingester + return results + else: + try: + print("Gathering existing urls from Supabase") + supabase_client = supabase.create_client( # type: ignore + supabase_url=os.getenv('SUPABASE_URL'), # type: ignore + supabase_key=os.getenv('SUPABASE_API_KEY')) # type: ignore + urls = supabase_client.table(os.getenv('NEW_NEW_NEWNEW_MATERIALS_SUPABASE_TABLE')).select('course_name, url, contexts').eq('course_name', course_name).execute() + del supabase_client + if urls.data == []: + existing_urls = None + else: + existing_urls = [] + for thing in urls.data: + whole = '' + for t in thing['contexts']: + whole += t['text'] + existing_urls.append((thing['url'], whole)) + print("Finished gathering existing urls from Supabase") + except Exception as e: + print("Error:", e) + print("Could not gather existing urls from Supabase") + existing_urls = None + + print("Begin Ingesting Web page") + data = crawler(url=url, max_urls=max_urls, max_depth=max_depth, timeout=timeout, base_url_on=stay_on_baseurl, _existing_urls=existing_urls) + + # Clean some keys for a proper file name + # todo: have a default title + # titles = [value[1][1].title.string for value in data] + + titles = [] + for value in data: + try: + titles.append(value[1].title.string) + except AttributeError as e: + # if no title + try: + placeholder_title = re.findall(pattern=r'[a-zA-Z0-9.]*[a-z]', string=value[0])[1] + except Exception as e: + placeholder_title = "Title Not Found" + titles.append(placeholder_title) + print(f"URL is missing a title, using this title instead: {placeholder_title}") + + try: + clean = [re.match(r"[a-zA-Z0-9\s]*", title).group(0) for title in titles] # type: ignore + except Exception as e: + print("Error:", e) + clean = titles + print("title names after regex before cleaning", clean) + path_name = [] + counter = 0 + for value in clean: + value = value.strip() if value else "" + # value = value.strip() + value = value.replace(" ", "_") + if value == "403_Forbidden": + print("Found Forbidden Key, deleting data") + del data[counter] + counter -= 1 + else: + path_name.append(value) + counter += 1 + print("Cleaned title names", path_name) + + # Upload each html to S3 + print("Uploading files to S3") + paths = [] + counter = 0 + try: + for i, key in enumerate(data): + with NamedTemporaryFile(suffix=key[2]) as temp_file: + if key[1] != "" or key[1] != None: + if key[2] == ".html": + print("Writing", key[2] ,"to temp file") + temp_file.write(key[1].encode('utf-8')) + else: + print("Writing", key[2] ,"to temp file") + temp_file.write(key[1]) + temp_file.seek(0) + s3_upload_path = "courses/"+ course_name + "/" + path_name[i] + key[2] + paths.append(s3_upload_path) + with open(temp_file.name, 'rb') as f: + print("Uploading", key[2] ,"to S3") + s3_client.upload_fileobj(f, os.getenv('S3_BUCKET_NAME'), s3_upload_path) + ingester.bulk_ingest(s3_upload_path, course_name=course_name, url=key[0], base_url=url) + counter += 1 + else: + print("No", key[2] ,"to upload", key[1]) + except Exception as e: + print("Error in upload:", e) + finally: + del ingester + + print(f"Successfully uploaded files to s3: {counter}") + print("Finished /web-scrape") + +# Download an MIT course using its url +def mit_course_download(url:str, course_name:str, local_dir:str): + ingester = Ingest() + base = "https://ocw.mit.edu" + if url.endswith("download"): + pass + else: + url = url + "download" + + r = requests.get(url) + soup = BeautifulSoup(r.text,"html.parser") + + zip = '' + for ref in soup.find_all("a"): + if ref.attrs['href'].endswith("zip"): + zip = ref.attrs['href'] + + site = zip + print('site', site) + r = requests.get(url=site, stream=True) + + zip_file = local_dir + ".zip" + + try: + with open(zip_file, 'wb') as fd: + for chunk in r.iter_content(chunk_size=128): + fd.write(chunk) + print("course downloaded!") + except Exception as e: + print("Error:", e, site) + + with ZipFile(zip_file, 'r') as zObject: + zObject.extractall( + path=local_dir) + + shutil.move(local_dir+"/"+"robots.txt", local_dir+"/static_resources") + s3_paths = upload_data_files_to_s3(course_name, local_dir+"/static_resources") + success_fail = ingester.bulk_ingest(s3_paths, course_name) # type: ignore + + shutil.move(zip_file, local_dir) + shutil.rmtree(local_dir) + del ingester + print("Finished Ingest") + return success_fail + +if __name__ == '__main__': + pass From cf2284927a463fc86a2d2af272ebd4877117decb Mon Sep 17 00:00:00 2001 From: Kastan Day Date: Fri, 15 Sep 2023 15:23:09 -0700 Subject: [PATCH 60/61] cleanup prints, looks good to me --- ai_ta_backend/main.py | 24 +++------------ ai_ta_backend/nomic_logging.py | 54 +++++++++++----------------------- 2 files changed, 21 insertions(+), 57 deletions(-) diff --git a/ai_ta_backend/main.py b/ai_ta_backend/main.py index 72aa7990..d64447db 100644 --- a/ai_ta_backend/main.py +++ b/ai_ta_backend/main.py @@ -1,20 +1,18 @@ import gc -import gc +import json import os import time from typing import List -import json from dotenv import load_dotenv -from flask import Flask, Response, Response, abort, jsonify, request +from flask import Flask, Response, abort, jsonify, request from flask_cors import CORS from flask_executor import Executor from sqlalchemy import JSON +from ai_ta_backend.nomic_logging import get_nomic_map, log_convo_to_nomic from ai_ta_backend.vector_database import Ingest from ai_ta_backend.web_scrape import main_crawler, mit_course_download -from ai_ta_backend.nomic_logging import get_nomic_map, log_convo_to_nomic -from flask_executor import Executor app = Flask(__name__) CORS(app) @@ -397,21 +395,9 @@ def nomic_map(): @app.route('/onResponseCompletion', methods=['POST']) def logToNomic(): - course_name: str = request.args.get('course_name', default='', type=str) - conversation: str = request.args.get('conversation', default='', type=str) - print("In /onResponseCompletion") - - # print("print json: ", request.get_json()) data = request.get_json() - print(len(data)) - print(type(data)) - course_name = data['course_name'] conversation = data['conversation'] - - # print("course_name: ", course_name) - # print("conversation: ", conversation) - if course_name == '' or conversation == '': # proper web error "400 Bad request" abort( @@ -419,14 +405,12 @@ def logToNomic(): description= f"Missing one or more required parameters: 'course_name' and 'conversation' must be provided. Course name: `{course_name}`, Conversation: `{conversation}`" ) + print(f"In /onResponseCompletion for course: {course_name}") - #conversation_json = json.loads(conversation) - # background execution of tasks!! response = executor.submit(log_convo_to_nomic, course_name, data) response = jsonify({'outcome': 'success'}) response.headers.add('Access-Control-Allow-Origin', '*') - return response diff --git a/ai_ta_backend/nomic_logging.py b/ai_ta_backend/nomic_logging.py index 9e625f05..374313d4 100644 --- a/ai_ta_backend/nomic_logging.py +++ b/ai_ta_backend/nomic_logging.py @@ -1,13 +1,13 @@ +import datetime import os +import time + import nomic -from nomic import atlas -from nomic import AtlasProject -from langchain.embeddings import OpenAIEmbeddings import numpy as np -import time -import datetime import pandas as pd import supabase +from langchain.embeddings import OpenAIEmbeddings +from nomic import AtlasProject, atlas nomic.login(os.getenv('NOMIC_API_KEY')) # login during start of flask app NOMIC_MAP_NAME_PREFIX = 'Conversation Map for ' @@ -22,16 +22,12 @@ def log_convo_to_nomic(course_name: str, conversation) -> str: - if no, add new data point 3. Keep current logic for map doesn't exist - update metadata """ - print("in log_convo_to_nomic()") - - print("conversation: ", conversation) + print(f"in log_convo_to_nomic() for course: {course_name}") messages = conversation['conversation']['messages'] user_email = conversation['conversation']['user_email'] conversation_id = conversation['conversation']['id'] - #print("conversation: ", conversation) - # we have to upload whole conversations # check what the fetched data looks like - pandas df or pyarrow table # check if conversation ID exists in Nomic, if yes fetch all data from it and delete it. @@ -44,33 +40,25 @@ def log_convo_to_nomic(course_name: str, conversation) -> str: try: # fetch project metadata and embbeddings project = AtlasProject(name=project_name, add_datums_if_exists=True) - map_metadata_df = project.maps[1].data.df + map_metadata_df = project.maps[1].data.df # type: ignore map_embeddings_df = project.maps[1].embeddings.latent map_metadata_df['id'] = map_metadata_df['id'].astype(int) last_id = map_metadata_df['id'].max() - print("last_id: ", last_id) if conversation_id in map_metadata_df.values: - print("conversation_id exists") - # store that convo metadata locally prev_data = map_metadata_df[map_metadata_df['conversation_id'] == conversation_id] prev_index = prev_data.index.values[0] - print("prev_index: ", prev_index) embeddings = map_embeddings_df[prev_index - 1].reshape(1, 1536) prev_convo = prev_data['conversation'].values[0] prev_id = prev_data['id'].values[0] - print("prev_id: ", prev_id) created_at = pd.to_datetime(prev_data['created_at'].values[0]).strftime('%Y-%m-%d %H:%M:%S') - print("prev_created_at: ", created_at) - print("before delete") - # delete that convo data point from Nomic - print(project.delete_data([str(prev_id)])) + # delete that convo data point from Nomic, and print result + print("Deleting point from nomic:", project.delete_data([str(prev_id)])) # prep for new point first_message = prev_convo.split("\n")[1].split(": ")[1] - print("first_message: ", first_message) # select the last 2 messages and append new convo to prev convo messages_to_be_logged = messages[-2:] @@ -127,7 +115,7 @@ def log_convo_to_nomic(course_name: str, conversation) -> str: }] # create embeddings - embeddings_model = OpenAIEmbeddings() + embeddings_model = OpenAIEmbeddings() # type: ignore embeddings = embeddings_model.embed_documents(user_queries) # add embeddings to the project @@ -137,7 +125,7 @@ def log_convo_to_nomic(course_name: str, conversation) -> str: except Exception as e: # if project doesn't exist, create it - print(e) + print("ERROR in log_convo_to_nomic():", e) result = create_nomic_map(course_name, conversation) if result is None: print("Nomic map does not exist yet, probably because you have less than 20 queries on your project: ", e) @@ -167,10 +155,6 @@ def get_nomic_map(course_name: str): print(err) return {"map_id": None, "map_link": None} - # Moved this to the logging function to keep our UI fast. - # with project.wait_for_project_lock() as project: - # project.rebuild_maps() - map = project.get_map(project_name) print(f"⏰ Nomic Full Map Retrieval: {(time.monotonic() - start_time):.2f} seconds") @@ -185,7 +169,7 @@ def create_nomic_map(course_name: str, log_data: list): 2. appends current embeddings and metadata to it 2. creates map if there are at least 20 queries """ - print("in create_nomic_map()") + print(f"in create_nomic_map() for {course_name}") # initialize supabase supabase_client = supabase.create_client( # type: ignore supabase_url=os.getenv('SUPABASE_URL'), # type: ignore @@ -206,9 +190,9 @@ def create_nomic_map(course_name: str, log_data: list): conversation_exists = False # current log details - log_messages = log_data['conversation']['messages'] - log_user_email = log_data['conversation']['user_email'] - log_conversation_id = log_data['conversation']['id'] + log_messages = log_data['conversation']['messages'] # type: ignore + log_user_email = log_data['conversation']['user_email'] # type: ignore + log_conversation_id = log_data['conversation']['id'] # type: ignore for index, row in df.iterrows(): user_email = row['user_email'] @@ -220,7 +204,7 @@ def create_nomic_map(course_name: str, log_data: list): # create metadata for multi-turn conversation conversation = "" - if message['role'] == 'user': + if message['role'] == 'user': # type: ignore emoji = "🙋 " else: emoji = "🤖 " @@ -231,7 +215,7 @@ def create_nomic_map(course_name: str, log_data: list): # append current chat to previous chat if convo already exists if convo['id'] == log_conversation_id: conversation_exists = True - if m['role'] == 'user': + if m['role'] == 'user': # type: ignore emoji = "🙋 " else: emoji = "🤖 " @@ -281,16 +265,13 @@ def create_nomic_map(course_name: str, log_data: list): } metadata.append(metadata_row) - print("length of metadata: ", len(metadata)) metadata = pd.DataFrame(metadata) - embeddings_model = OpenAIEmbeddings() # type: ignore embeddings = embeddings_model.embed_documents(user_queries) # create Atlas project project_name = NOMIC_MAP_NAME_PREFIX + course_name index_name = course_name + "_convo_index" - print("project_name: ", project_name) project = atlas.map_embeddings( embeddings=np.array(embeddings), data=metadata, # type: ignore -- this is actually the correc type, the function signature from Nomic is incomplete @@ -300,7 +281,6 @@ def create_nomic_map(course_name: str, log_data: list): name=project_name, colorable_fields=['conversation_id', 'first_query']) project.create_index(index_name, build_topic_model=True) - print("project: ", project) return f"Successfully created Nomic map for {course_name}" From b461282d49390f95fc68c7b3e8b677435050523c Mon Sep 17 00:00:00 2001 From: Kastan Day Date: Fri, 15 Sep 2023 15:26:57 -0700 Subject: [PATCH 61/61] minor cleanup of when logging happens --- ai_ta_backend/main.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/ai_ta_backend/main.py b/ai_ta_backend/main.py index d64447db..3a640ed9 100644 --- a/ai_ta_backend/main.py +++ b/ai_ta_backend/main.py @@ -145,9 +145,6 @@ def getTopContexts() -> Response: found_documents = ingester.getTopContexts(search_query, course_name, token_limit) del ingester - # background execution of tasks!! - #executor.submit(log_query_to_nomic, course_name, search_query) - response = jsonify(found_documents) response.headers.add('Access-Control-Allow-Origin', '*') return response