-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
added .md ingest and fixed .py ingest
- Loading branch information
Showing
4 changed files
with
214 additions
and
12 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
import os | ||
import nomic | ||
from nomic import atlas | ||
from langchain.embeddings import OpenAIEmbeddings | ||
import numpy as np | ||
|
||
class DataLog(): | ||
def __init__(self): | ||
self.login = nomic.login(os.getenv('NOMIC_API_KEY')) | ||
|
||
def nomic_log(self, course_name:str, search_query:str, retrieved_contexts)-> str: | ||
""" | ||
Logs user query and retrieved contexts to Nomic. | ||
""" | ||
print("course_name: ", course_name) | ||
print("search_query: ", search_query) | ||
print("retrieved_contexts: ", len(retrieved_contexts)) | ||
|
||
# concat all retrieved contexts into one string | ||
context_string = "" | ||
for context in retrieved_contexts: | ||
context_string += context['text'] + " " | ||
|
||
print("context_string: ", context_string) | ||
|
||
# convert query and context to embeddings | ||
embeddings_model = OpenAIEmbeddings() | ||
#embeddings = embeddings_model.embed_documents([search_query, context_string]) | ||
#embeddings = np.array(embeddings) | ||
|
||
num_embeddings = 2 | ||
embeddings = np.random.rand(num_embeddings, 1536) | ||
This comment has been minimized.
Sorry, something went wrong. |
||
|
||
data = [{'course': course_name, 'id': i} for i in range(len(embeddings))] | ||
print("len of data: ", len(data)) | ||
print("len of embeddings: ", embeddings.shape) | ||
|
||
# project = atlas.map_embeddings(embeddings=np.array(embeddings), | ||
# data=data, | ||
# id_field='id', | ||
# name='Search Query Viz', | ||
# colorable_fields=['course']) | ||
# print(project.maps) | ||
|
||
project = atlas.AtlasProject(name="Search Query Viz", add_datums_if_exists=True) | ||
#map = project.get_map('Search Query Viz') | ||
print(project.name) | ||
#print(map) | ||
|
||
with project.wait_for_project_lock() as project: | ||
project.add_embeddings(embeddings=embeddings, data=data) | ||
project.rebuild_maps() | ||
|
||
print("done") | ||
# log to Nomic | ||
return "WIP" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import nomic\n", | ||
"from nomic import atlas\n", | ||
"import supabase\n", | ||
"import os\n", | ||
"\n", | ||
"nomic.login(\"z9vzLMiZvEv-Ub1Vc0wOFoKEI70Ute9OQC2_YjocH5TC8\")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 5, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"https://twzwfuydgnnjcaopyfdv.supabase.co\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"# create supabase client\n", | ||
"url = \"https://twzwfuydgnnjcaopyfdv.supabase.co\"\n", | ||
"key = \"eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6InR3endmdXlkZ25uamNhb3B5ZmR2Iiwicm9sZSI6InNlcnZpY2Vfcm9sZSIsImlhdCI6MTY4NDQzMzc0NiwiZXhwIjoyMDAwMDA5NzQ2fQ.dMyvULsZ8jnnLk8TUZtP0Ec_6LeOxcoLnD-uIjoYYs4\"\n", | ||
"\n", | ||
"client = supabase.create_client(supabase_url=url, supabase_key=key)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 21, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"response = client.table(\"documents\").select(\"*\").eq(\"course_name\", \"canvas\").limit(2).execute()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 22, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"data=[{'id': 46461, 'created_at': '2023-08-10T20:32:20.802466+00:00', 's3_path': 'courses/canvas/How_businesses_and_websites_can_use_third-party_data_to_target_advertising_through_LinkedIn_|_LinkedIn_Help.html', 'readable_filename': '8/10/23 How businesses and websites can use third-party data to target advertising through LinkedIn | LinkedIn Help', 'course_name': 'canvas', 'url': 'https://www.linkedin.com/help/linkedin/answer/a426264?trk=microsites-frontend_legal_privacy-policy&lang=en', 'contexts': [{'text': 'How businesses and websites can use third-party data to target advertising through LinkedIn | LinkedIn Help\\nDue to high support volume, it may take longer than usual to hear back from our Support Agents. \\xa0dismiss this messageAttention screen reader users, you are in a mobile optimized view and content may not appear where you expect it to be. To return the screen to its desktop view, please maximize your browser.Skip to contentSkip to searchClose jump menuClose menuGet help with:LinkedInSales NavigatorTalent HubRecruiterSales InsightsMarketing SolutionsTalent InsightsCorporate BillingLearningGo to LinkedInSign inHelpSign inContact usEnglish (English)Čeština (Czech)Deutsch (German)Bahasa Indonesia (Indonesian)Español (Spanish)Türkçe (Turkish)Français (French)हिंदी (Hindi)Italiano (Italian)日本語 (Japanese)Nederlands (Dutch)Português (Portuguese)Svenska (Swedish)Polski (Polish)Bahasa Malaysia (Malay)한국어 (Korean)Dansk (Danish)Norsk (Norwegian)Română (Romanian)Русский (Russian)Українська (Ukrainian)简体中文 (Chinese (Simplified))正體中文 (Chinese (Traditional))ภาษาไทย (Thai)العربية (Arabic)LinkedIn Corporation © 2023AboutTransparency CenterPrivacy and TermsCookiesCopyrightTermsPrivacyGuest controlsYour California Privacy ChoicesDismiss privacy menuLinkedIn Corporation © 2023', 'embedding': None, 'timestamp': None, 'pagenumber': ''}, {'text': 'How businesses and websites can use third-party data to target advertising through LinkedIn | LinkedIn Help\\nDue to high support volume, it may take longer than usual to hear back from our Support Agents. \\xa0dismiss this messageAttention screen reader users, you are in a mobile optimized view and content may not appear where you expect it to be. To return the screen to its desktop view, please maximize your browser.Skip to contentSkip to searchClose jump menuClose menuGet help with:LinkedInLearningCorporate BillingTalent InsightsMarketing SolutionsSales InsightsTalent HubSales NavigatorRecruiterGo to LinkedInSign inHelpSign inContact usEnglish (English)Čeština (Czech)Deutsch (German)Bahasa Indonesia (Indonesian)Español (Spanish)Türkçe (Turkish)Français (French)हिंदी (Hindi)Italiano (Italian)日本語 (Japanese)Nederlands (Dutch)Português (Portuguese)Svenska (Swedish)Polski (Polish)Bahasa Malaysia (Malay)한국어 (Korean)Dansk (Danish)Norsk (Norwegian)Română (Romanian)Русский (Russian)Українська (Ukrainian)简体中文 (Chinese (Simplified))正體中文 (Chinese (Traditional))ภาษาไทย (Thai)العربية (Arabic)LinkedIn Corporation © 2023AboutTransparency CenterPrivacy and TermsCookiesCopyrightTermsPrivacyGuest controlsYour California Privacy ChoicesDismiss privacy menuLinkedIn Corporation © 2023', 'embedding': None, 'timestamp': None, 'pagenumber': ''}], 'base_url': None}, {'id': 46519, 'created_at': '2023-08-10T20:32:30.368445+00:00', 's3_path': 'courses/canvas/Off-LinkedIn_Visibility_|_LinkedIn_Help.html', 'readable_filename': '8/10/23 Off-LinkedIn Visibility | LinkedIn Help', 'course_name': 'canvas', 'url': 'https://www.linkedin.com/help/linkedin/answer/a1340507?trk=microsites-frontend_legal_privacy-policy&lang=en', 'contexts': [{'text': 'Off-LinkedIn Visibility | LinkedIn Help\\nDue to high support volume, it may take longer than usual to hear back from our Support Agents. \\xa0dismiss this messageAttention screen reader users, you are in a mobile optimized view and content may not appear where you expect it to be. To return the screen to its desktop view, please maximize your browser.Skip to contentSkip to searchClose jump menuClose menuGet help with:LinkedInLearningRecruiterSales NavigatorTalent HubSales InsightsMarketing SolutionsCorporate BillingTalent InsightsGo to LinkedInSign inHelpSign inContact usEnglish (English)Čeština (Czech)Deutsch (German)Bahasa Indonesia (Indonesian)Español (Spanish)Türkçe (Turkish)Français (French)हिंदी (Hindi)Italiano (Italian)日本語 (Japanese)Nederlands (Dutch)Português (Portuguese)Svenska (Swedish)Polski (Polish)Bahasa Malaysia (Malay)한국어 (Korean)Dansk (Danish)Norsk (Norwegian)Română (Romanian)Русский (Russian)Українська (Ukrainian)简体中文 (Chinese (Simplified))正體中文 (Chinese (Traditional))ภาษาไทย (Thai)العربية (Arabic)LinkedIn Corporation © 2023AboutTransparency CenterPrivacy and TermsCookiesCopyrightTermsPrivacyGuest controlsYour California Privacy ChoicesDismiss privacy menuLinkedIn Corporation © 2023', 'embedding': None, 'timestamp': None, 'pagenumber': ''}, {'text': 'Off-LinkedIn Visibility | LinkedIn Help\\nDue to high support volume, it may take longer than usual to hear back from our Support Agents. \\xa0dismiss this messageAttention screen reader users, you are in a mobile optimized view and content may not appear where you expect it to be. To return the screen to its desktop view, please maximize your browser.Skip to contentSkip to searchClose jump menuClose menuGet help with:LinkedInSales NavigatorCorporate BillingTalent HubRecruiterMarketing SolutionsTalent InsightsLearningSales InsightsGo to LinkedInSign inHelpSign inContact usEnglish (English)Čeština (Czech)Deutsch (German)Bahasa Indonesia (Indonesian)Español (Spanish)Türkçe (Turkish)Français (French)हिंदी (Hindi)Italiano (Italian)日本語 (Japanese)Nederlands (Dutch)Português (Portuguese)Svenska (Swedish)Polski (Polish)Bahasa Malaysia (Malay)한국어 (Korean)Dansk (Danish)Norsk (Norwegian)Română (Romanian)Русский (Russian)Українська (Ukrainian)简体中文 (Chinese (Simplified))正體中文 (Chinese (Traditional))ภาษาไทย (Thai)العربية (Arabic)LinkedIn Corporation © 2023AboutTransparency CenterPrivacy and TermsCookiesCopyrightTermsPrivacyGuest controlsYour California Privacy ChoicesDismiss privacy menuLinkedIn Corporation © 2023', 'embedding': None, 'timestamp': None, 'pagenumber': ''}], 'base_url': None}] count=None\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"print(response)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"[Document(page_content='import os\\nfrom canvasapi import Canvas\\nimport requests\\nfrom zipfile import ZipFile\\nfrom ai_ta_backend.aws import upload_data_files_to_s3\\nfrom ai_ta_backend.vector_database import Ingest\\n\\n\\nclass CanvasAPI():\\n def __init__(self):\\n self.canvas_client = Canvas(\"https://canvas.illinois.edu\", \\n os.getenv(\\'CANVAS_ACCESS_TOKEN\\'))\\n \\n def add_users(self, canvas_course_id: str, course_name: str):\\n \"\"\"\\n Get all users in a course\\n \"\"\"\\n course = self.canvas_client.get_course(canvas_course_id)\\n users = course.get_users()\\n user_emails = []\\n for user in users:\\n net_id = user.sis_user_id\\n email_id = net_id + \"@illinois.edu\"\\n user_emails.append(email_id)\\n \\n print(user_emails)\\n \\n if len(user_emails) > 0:\\n return \"Success\"\\n else:\\n return \"Failed\"\\n \\n def ingest_course_content(self, canvas_course_id: str, course_name: str):\\n \"\"\"\\n Ingests all Canvas course materials through the course ID.\\n \"\"\"\\n print(\"In ingest_course_content\")\\n\\n api_path = \"https://canvas.illinois.edu/api/v1/courses/\" + str(canvas_course_id)\\n headers = {\"Authorization\": \"Bearer \" + os.getenv(\\'CANVAS_ACCESS_TOKEN\\')}\\n\\n try:\\n # Start the content export\\n content_export_api_path = api_path + \"/content_exports?export_type=zip\"\\n start_content_export = requests.post(content_export_api_path, headers=headers)\\n content_export_id = start_content_export.json()[\\'id\\']\\n progress_url = start_content_export.json()[\\'progress_url\\']\\n\\n # Wait for the content export to finish\\n export_progress = requests.get(progress_url, headers=headers)\\n while export_progress.json()[\\'workflow_state\\'] != \\'completed\\':\\n export_progress = requests.get(progress_url, headers=headers)\\n \\n # View content export and get download URL\\n show_content_export_api_path = api_path + \"/content_exports/\" + str(content_export_id)\\n print(\"Show export path: \", show_content_export_api_path)\\n\\n show_content_export = requests.get(show_content_export_api_path, headers=headers)\\n download_url = show_content_export.json()[\\'attachment\\'][\\'url\\']\\n file_name = show_content_export.json()[\\'attachment\\'][\\'filename\\']\\n\\n # Create a directory for the content\\n directory = os.path.join(os.getcwd(), \"course_content\")\\n if not os.path.exists(directory):\\n os.mkdir(directory)\\n\\n # Download zip and save to directory\\n download = requests.get(download_url, headers=headers)\\n with open(os.path.join(directory, file_name), \\'wb\\') as f:\\n f.write(download.content)\\n print(\"Downloaded!\")\\n\\n # Extract and read from zip file\\n filepath = \"course_content/\" + file_name\\n with ZipFile(filepath, \\'r\\') as zip:\\n zip.printdir()\\n zip.extractall(\"course_content\")\\n print(\\'Done!\\')\\n os.remove(filepath)\\n\\n # Upload files to S3 and call bulk_ingest\\n s3_paths = upload_data_files_to_s3(course_name, \"course_content\")\\n ingest = Ingest()\\n canvas_ingest = ingest.bulk_ingest(s3_paths, course_name=course_name)\\n \\n return canvas_ingest\\n \\n except Exception as e:\\n print(e)\\n return \"Failed\"\\n\\n\\n ', metadata={'source': 'canvas.py'})]\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"from langchain.document_loaders import PythonLoader\n", | ||
"\n", | ||
"file = \"canvas.py\"\n", | ||
"\n", | ||
"loader = PythonLoader(file)\n", | ||
"data = loader.load()\n", | ||
"\n", | ||
"print(data)" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.8.16" | ||
}, | ||
"orig_nbformat": 4 | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This is why the embeddings look normally distributed...