Skip to content

Commit

Permalink
modified update_files() for file replacement
Browse files Browse the repository at this point in the history
  • Loading branch information
star-nox committed Sep 22, 2023
1 parent f5655ab commit 6f80b96
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 50 deletions.
37 changes: 29 additions & 8 deletions ai_ta_backend/canvas.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,6 @@ def ingest_course_content(self, canvas_course_id: int, course_name: str)-> str:
os.mkdir(canvas_dir + "/" + folder_name)
print("Course folder created")


# Download course content
self.download_course_content(canvas_course_id, folder_path, content_to_ingest)

Expand Down Expand Up @@ -135,16 +134,38 @@ def update_course_content(self, canvas_course_id: int, course_name: str) -> str:
"""
print("In update_course_content")

api_path = "https://canvas.illinois.edu/api/v1/courses/" + str(canvas_course_id)
headers = {"Authorization": "Bearer " + os.getenv('CANVAS_ACCESS_TOKEN')}

try:
# a dictionary of all contents we want to ingest - files, pages, modules, syllabus, assignments, discussions.
content_to_ingest = {
'files': True,
'pages': True,
'modules': True,
'syllabus': True,
'assignments': True,
'discussions': True
}

# Create a canvas directory with a course folder inside it.
canvas_dir = "canvas_materials"
folder_name = "canvas_course_" + str(canvas_course_id) + "_ingest"
folder_path = canvas_dir + "/" + folder_name

if os.path.exists(canvas_dir):
print("Canvas directory already exists")
else:
os.mkdir(canvas_dir)
print("Canvas directory created")

if os.path.exists(canvas_dir + "/" + folder_name):
print("Course folder already exists")
else:
os.mkdir(canvas_dir + "/" + folder_name)
print("Course folder created")

# Download course content
folder_name = "canvas_course_" + str(canvas_course_id) + "_update"
folder_path = os.path.join(os.getcwd(), "canvas_materials/" + folder_name)
self.download_course_content(canvas_course_id, folder_path)
self.download_course_content(canvas_course_id, folder_path, content_to_ingest)
print("Downloaded and extracted canvas materials")

# Call diff function
response = update_materials.update_files(folder_path, course_name)
print(response)
Expand Down
79 changes: 37 additions & 42 deletions ai_ta_backend/update_materials.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,10 @@ def update_files(source_path: str, course_name: str):
"""
print("In update_files")



ingester = Ingest()
# Get S3 paths of files for given course_name
s3_files = ingester.getAll(course_name)
print("s3 files: ", s3_files)


# Access checksum of s3 files
Expand All @@ -42,51 +41,47 @@ def update_files(source_path: str, course_name: str):
# Compute checksum of every file in source_path folder
total_files = 0
files_removed = 0
for root, subdirs, files in os.walk(source_path):
for file in files:
total_files += 1
print("file: ", file)
filepath = os.path.join(root, file)
file_checksum = generate_checksum(filepath)

# compare file checksum with checksum of all s3 files
for s3_file in s3_files:
s3_path = s3_file['s3_path']
#print("existing s3 file: ", s3_path)
s3_object = s3_client.get_object(Bucket=os.getenv('S3_BUCKET_NAME'), Key=s3_path)
s3_checksum = s3_object['ETag']

# remove file from the folder if checksums match
if str(file_checksum) == s3_checksum[1:-1]:
print("checksums match: ", file)
os.remove(filepath)
files_removed += 1
continue

files = os.listdir(source_path)

for file in files:
filepath = os.path.join(source_path, file)
total_files += 1
#print("file: ", file)
#print("filepath: ", filepath)
file_checksum = generate_checksum(filepath)

# compare this checksum with checksum of all s3 files
for s3_file in s3_files:
s3_path = s3_file['s3_path']
#print("existing s3 file: ", s3_path)

s3_object = s3_client.get_object(Bucket=os.getenv('S3_BUCKET_NAME'), Key=s3_path)
s3_checksum = s3_object['ETag']

# remove file from the folder if checksums match
if str(file_checksum) == s3_checksum[1:-1]:
print("checksums match: ", filepath)
os.remove(filepath)
files_removed += 1
continue

# different checksums but same file name - delete the file from s3
elif str(file_checksum) != s3_checksum[1:-1] and file == s3_path.split('/')[-1]:
print("in replace file condition: ", file)
delete_s3_file = ingester.delete_data(s3_path, course_name)
print("deletion update: ", delete_s3_file)
s3_files.remove(s3_file)
break

print("total files: ", total_files)
print("files removed: ", files_removed)
if total_files > 0:
new_s3_paths = upload_data_files_to_s3(course_name, source_path)
subdir_ingest = ingester.bulk_ingest(new_s3_paths, course_name=course_name)

# # Upload remaining files to S3 - canvas export contains subdirectories
# subdirectories = [subdir for subdir in os.listdir(source_path) if os.path.isdir(os.path.join(source_path, subdir))]
# print("subdirs: ", subdirectories)
if total_files - files_removed > 0:
# Upload files to S3 and ingest
new_s3_paths = upload_data_files_to_s3(course_name, source_path)
file_ingest = ingester.bulk_ingest(new_s3_paths, course_name=course_name)

# if len(subdirectories) == 0:
# # pass the source path
# new_s3_paths = upload_data_files_to_s3(course_name, source_path)
# else:
# # pass the subdirectory paths
# for subdir in subdirectories:
# subdir_path = os.path.join(source_path, subdir)
# if len(os.listdir(subdir_path)) == 0:
# continue
# new_s3_paths = upload_data_files_to_s3(course_name, subdir_path)
# print("----------------------------------")
# print("new s3 paths: ", new_s3_paths)
# subdir_ingest = ingester.bulk_ingest(new_s3_paths, course_name=course_name)

# Delete files from local directory
shutil.rmtree(source_path)

Expand Down

0 comments on commit 6f80b96

Please sign in to comment.