Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding various Canvas functions #33

Merged
merged 24 commits into from
Oct 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
c42f606
added the add_users() for Canvas
star-nox Aug 10, 2023
6854205
added canvas course ingest
star-nox Aug 13, 2023
54e3fb0
updated requirements
star-nox Aug 13, 2023
07238a2
added .md ingest and fixed .py ingest
star-nox Aug 15, 2023
deceb15
deleted test ipynb file
star-nox Aug 15, 2023
27383e1
added nomic viz
star-nox Aug 16, 2023
6f08340
added canvas file update function
Aug 21, 2023
34cbbdc
completed update function
star-nox Aug 25, 2023
efd9048
updated course export to include all contents
star-nox Aug 25, 2023
bf3726b
modified to handle diff file structures of downloaded content
star-nox Aug 25, 2023
93646ac
modified canvas update
Aug 30, 2023
05ab444
modified add_users() and ingest_course_content() functions
Sep 21, 2023
f5655ab
modified ingest function
star-nox Sep 21, 2023
6f80b96
modified update_files() for file replacement
star-nox Sep 22, 2023
0223a22
removed the extra os.remove()
star-nox Sep 22, 2023
2e10cc8
fix underscore to dash in for pip
KastanDay Sep 29, 2023
a38fb90
removed json import and added abort to canvas functions
star-nox Oct 2, 2023
79142c5
Merge branch 'main' into canvas
star-nox Oct 2, 2023
7a9c21d
removed file update functions from this PR
star-nox Oct 2, 2023
65b2ba1
added ability to receive canvas checkbox data
Maxwell-Lindsey Oct 23, 2023
acd61e7
Merge branch 'main' into canvas
Maxwell-Lindsey Oct 23, 2023
7c4a839
removed unused import
Maxwell-Lindsey Oct 23, 2023
8de8900
added comment for debugging
Maxwell-Lindsey Oct 23, 2023
46646c7
removing pypandoc because it appears to be unused
KastanDay Oct 23, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file added .github/workflows/rules.yml
Empty file.
252 changes: 252 additions & 0 deletions ai_ta_backend/canvas.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,252 @@
import os
import shutil
from canvasapi import Canvas
import requests
from zipfile import ZipFile
from ai_ta_backend.aws import upload_data_files_to_s3
from ai_ta_backend.vector_database import Ingest
from pathlib import Path


class CanvasAPI():
def __init__(self):
self.canvas_client = Canvas("https://canvas.illinois.edu",
os.getenv('CANVAS_ACCESS_TOKEN'))
self.headers = {"Authorization": "Bearer " + os.getenv('CANVAS_ACCESS_TOKEN')}

def add_users(self, canvas_course_id: str, course_name: str):
"""
Get all users in a course by course ID and add them to uiuc.chat course
- Student profile does not have access to emails.
- Currently collecting all names in a list.
"""
course = self.canvas_client.get_course(canvas_course_id)
users = course.get_users()

user_names = []
for user in users:
user_names.append(user.name)

print("Collected names: ", user_names)

if len(user_names) > 0:
return "Success"
else:
return "Failed"

def download_course_content(self, canvas_course_id: int, dest_folder: str, content_ingest_dict: dict) -> str:
"""
Downloads all Canvas course materials through the course ID and stores in local directory.
1. Iterate through content_ingest_dict and download all.
2. Maintain a list of URLs and convert HTML strings to proper format.
"""
print("In download_course_content")

try:
api_path = "https://canvas.illinois.edu/api/v1/courses/" + str(canvas_course_id)

# Iterate over the content_ingest_dict
for key, value in content_ingest_dict.items():
if value == True:
if key == 'files':
self.download_files(dest_folder, api_path)
elif key == 'pages':
self.download_pages(dest_folder, api_path)
elif key == 'modules':
self.download_modules(dest_folder, api_path)
elif key == 'syllabus':
self.download_syllabus(dest_folder, api_path)
elif key == 'assignments':
self.download_assignments(dest_folder, api_path)
elif key == 'discussions':
self.download_discussions(dest_folder, api_path)

# at this point, we have all extracted files in the dest_folder.

return "Success"
except Exception as e:
return "Failed! Error: " + str(e)


def ingest_course_content(self, canvas_course_id: int, course_name: str, content_ingest_dict: dict = None) -> str:
"""
Ingests all Canvas course materials through the course ID.
1. Download zip file from Canvas and store in local directory
2. Upload all files to S3
3. Call bulk_ingest() to ingest all files into QDRANT
4. Delete extracted files from local directory
"""

print("In ingest_course_content")
try:
# a dictionary of all contents we want to ingest - files, pages, modules, syllabus, assignments, discussions.
if content_ingest_dict is None:
content_ingest_dict = {
'files': True,
'pages': True,
'modules': True,
'syllabus': True,
'assignments': True,
'discussions': True
}

# Create a canvas directory with a course folder inside it.
canvas_dir = "canvas_materials"
folder_name = "canvas_course_" + str(canvas_course_id) + "_ingest"
folder_path = canvas_dir + "/" + folder_name

if os.path.exists(canvas_dir):
print("Canvas directory already exists")
else:
os.mkdir(canvas_dir)
print("Canvas directory created")

if os.path.exists(canvas_dir + "/" + folder_name):
print("Course folder already exists")
else:
os.mkdir(canvas_dir + "/" + folder_name)
print("Course folder created")

# Download course content
self.download_course_content(canvas_course_id, folder_path, content_ingest_dict)

# Upload files to S3
s3_paths = upload_data_files_to_s3(course_name, folder_path)

# Delete files from local directory
shutil.rmtree(folder_path)

# Ingest files into QDRANT
ingest = Ingest()
canvas_ingest = ingest.bulk_ingest(s3_paths, course_name=course_name)
return canvas_ingest

except Exception as e:
print(e)
return "Failed"

def download_files(self, dest_folder: str, api_path: str) -> str:
"""
Downloads all files in a Canvas course into given folder.
"""
try:
files_request = requests.get(api_path + "/files", headers=self.headers)
files = files_request.json()

for file in files:
file_name = file['filename']
print("Downloading file: ", file_name)

file_download = requests.get(file['url'], headers=self.headers)
with open(os.path.join(dest_folder, file_name), 'wb') as f:
f.write(file_download.content)

return "Success"
except Exception as e:
return "Failed! Error: " + str(e)

def download_pages(self, dest_folder: str, api_path: str) -> str:
"""
Downloads all pages as HTML and stores them in given folder.
"""
print("In download_pages")
try:
pages_request = requests.get(api_path + "/pages", headers=self.headers)
pages = pages_request.json()

for page in pages:
if page['html_url'] != '':
page_name = page['url'] + ".html"
page_content_request = requests.get(api_path + "/pages/" + str(page['page_id']), headers=self.headers)
page_body = page_content_request.json()['body']

with open(dest_folder + "/" + page_name, 'w') as html_file:
html_file.write(page_body)

return "Success"
except Exception as e:
return "Failed! Error: " + str(e)

def download_syllabus(self, dest_folder: str, api_path: str) -> str:
"""
Downloads syllabus as HTML and stores in given folder.
"""
print("In download_syllabus")
try:
course_settings_request = requests.get(api_path + "?include=syllabus_body", headers=self.headers)
syllabus_body = course_settings_request.json()['syllabus_body']
syllabus_name = "syllabus.html"

with open(dest_folder + "/" + syllabus_name, 'w') as html_file:
html_file.write(syllabus_body)
return "Success"
except Exception as e:
return "Failed! Error: " + str(e)

def download_modules(self, dest_folder: str, api_path: str) -> list:
"""
Returns a list of all external URLs uploaded in modules.
Modules may contain: assignments, quizzes, files, pages, discussions, external tools and external urls.
Rest of the things are covered in other functions.
"""
print("In download_modules")
try:
module_request = requests.get(api_path + "/modules?include=items", headers=self.headers)
modules = module_request.json()

for module in modules:
module_items = module['items']
for item in module_items:
if item['type'] == 'ExternalUrl':
external_url = item['external_url']
url_title = item['title']

# Download external url as HTML
response = requests.get(external_url)
if response.status_code == 200:
html_file_name = url_title + ".html"
with open(dest_folder + "/" + html_file_name, 'w') as html_file:
html_file.write(response.text)
return "Success"
except Exception as e:
return "Failed! Error: " + str(e)

def download_assignments(self, dest_folder: str, api_path: str) -> str:
"""
The description attribute has the assignment content in HTML format. Access that and store it as an HTML file.
"""
print("In download_assignments")
try:
assignment_request = requests.get(api_path + "/assignments", headers=self.headers)
assignments = assignment_request.json()

for assignment in assignments:
if assignment['description'] is not None:
assignment_name = "assignment_" + str(assignment['id']) + ".html"
assignment_description = assignment['description']

with open(dest_folder + "/" + assignment_name, 'w') as html_file:
html_file.write(assignment_description)
return "Success"
except Exception as e:
return "Failed! Error: " + str(e)

def download_discussions(self, dest_folder: str, api_path: str) -> str:
"""
Download course discussions as HTML and store in given folder.
"""
print("In download_discussions")
try:
discussion_request = requests.get(api_path + "/discussion_topics", headers=self.headers)
discussions = discussion_request.json()

for discussion in discussions:
discussion_content = discussion['message']
discussion_name = discussion['title'] + ".html"

with open(dest_folder + "/" + discussion_name, 'w') as html_file:
html_file.write(discussion_content)

except Exception as e:
return "Failed! Error: " + str(e)

Loading