From 472c4e4bd401006a5f1bb83237d7f8ac3ec9107e Mon Sep 17 00:00:00 2001 From: CanvasSIS Date: Wed, 24 Jan 2024 14:18:26 +1100 Subject: [PATCH 01/23] First Deployemnt Changes to requirements.txt - additional packages and - moved env variables value --- sis/canvas_synergetic_integration/main.py | 99 ++++++++++++++----- .../requirements.txt | 8 +- 2 files changed, 78 insertions(+), 29 deletions(-) diff --git a/sis/canvas_synergetic_integration/main.py b/sis/canvas_synergetic_integration/main.py index ce9fb42..59db800 100755 --- a/sis/canvas_synergetic_integration/main.py +++ b/sis/canvas_synergetic_integration/main.py @@ -11,22 +11,25 @@ import os import time import pyodbc +import datetime + +now = datetime.datetime.now() + from contextlib import contextmanager working_dir = './canvas/' -base_url = 'https://******.instructure.com/api/v1/accounts/self/' +base_url = os.environ['base_url'] token = os.environ['TOKEN'] header = {'Authorization' : 'Bearer {token}'.format(token=token)} payload = {'import_type' : 'instructure_csv', 'extension' : 'zip'} -DB_URL = os.environ['DB_URL'] -DB = os.environ['DB'] -DB_USER = os.environ['DB_USER'] -DB_PASSWORD = os.environ['DB_PASSWORD'] -DRIVER = os.environ['DRIVER'] +SERVER = os.environ['SERVER'] +DATABASE = os.environ['DATABASE'] +USERNAME = os.environ['USERNAME'] +PASSWORD = os.environ['PASSWORD'] -conn_string = 'DRIVER={DRIVER};SERVER={DB_URL};DATABASE={DB};UID={DB_USER};PWD={DB_PASSWORD}'.format(DRIVER=DRIVER, DB_URL=DB_URL, DB=DB, DB_USER=DB_USER, DB_PASSWORD=DB_PASSWORD) +conn_string = f'DRIVER={{ODBC Driver 17 for SQL Server}};SERVER={SERVER};DATABASE={DATABASE};UID={USERNAME};PWD={PASSWORD}' @contextmanager @@ -70,16 +73,13 @@ def get_canvas_users(): results = [[ 'user_id', - 'integration_id', 'login_id', - 'password', - 'ssha_password', 'first_name', 'last_name', 'full_name', 'sortable_name', - 'short_name', 'email', + 'declared_user_type', 'status' ]] @@ -93,7 +93,7 @@ def get_canvas_courses(): Selects canvas courses from uvCanvasCourses ''' with db_connect(conn_string) as cur: - cur.execute("select * from uvCanvasCourses") + cur.execute("SELECT * FROM uvCanvasCourses") canvas_accounts = cur.fetchall() results = [[ @@ -103,11 +103,7 @@ def get_canvas_courses(): 'account_id', 'term_id', 'status', - 'integration_id', - 'start_date', - 'end_date', - 'course_format', - 'blueprint_course_id' + 'course_format' ]] for row in canvas_accounts: @@ -117,23 +113,20 @@ def get_canvas_courses(): def get_canvas_enrolments(): ''' - Selects canvas enrolments from uvCanvasEnrolments + Selects canvas enrolments from uvCanvasEnrollments ''' with db_connect(conn_string) as cur: - cur.execute("select * from uvCanvasEnrolments") + cur.execute("select * from uvCanvasEnrollments") canvas_accounts = cur.fetchall() results = [[ 'course_id', - 'root_account', + 'start_date', + 'end_date', 'user_id', - 'user_integration_id', 'role', - 'role_id', 'section_id', - 'status', - 'associated_user_id', - 'limit_section_priviledges' + 'status' ]] for row in canvas_accounts: @@ -141,6 +134,49 @@ def get_canvas_enrolments(): return results +def get_canvas_terms(): + ''' + Selects canvas enrolments from uvCanvasTerms + ''' + with db_connect(conn_string) as cur: + cur.execute("select * from uvCanvasTerms") + canvas_terms = cur.fetchall() + + results = [[ + 'term_id', + 'name', + 'status', + 'start_date', + 'end_date' + + ]] + + for row in canvas_terms: + results.append(row) + + return results + +def get_canvas_sections(): + ''' + Selects canvas enrolments from uvCanvasSections + ''' + with db_connect(conn_string) as cur: + cur.execute("select * from uvCanvasSections") + canvas_sections = cur.fetchall() + + results = [[ + 'section_id', + 'course_id', + 'name', + 'status' + + ]] + + for row in canvas_sections: + results.append(row) + + return results + def zipdir(path, ziph): ''' Helper function for zipping the directory @@ -157,7 +193,7 @@ def post_data(base_url, header, payload): r = requests.post(base_url + "/sis_imports/", headers=header, params=payload, data=data) - print(r.text) + print(now.strftime("%Y-%m-%d %H:%M:%S"), r.text) if __name__ == '__main__': @@ -181,6 +217,14 @@ def post_data(base_url, header, payload): writer = csv.writer(f) writer.writerows(get_canvas_enrolments()) + with open(working_dir + "terms.csv", "w", newline='', encoding='utf-8') as f: + writer = csv.writer(f) + writer.writerows(get_canvas_terms()) + + with open(working_dir + "sections.csv", "w", newline='', encoding='utf-8') as f: + writer = csv.writer(f) + writer.writerows(get_canvas_sections()) + # ============= # ZIP Directory # ============= @@ -191,4 +235,5 @@ def post_data(base_url, header, payload): # =================== # Post Data to Canvas # =================== - post_data(base_url, header, payload) \ No newline at end of file + post_data(base_url, header, payload) + diff --git a/sis/canvas_synergetic_integration/requirements.txt b/sis/canvas_synergetic_integration/requirements.txt index a2c118f..b4cd7f6 100755 --- a/sis/canvas_synergetic_integration/requirements.txt +++ b/sis/canvas_synergetic_integration/requirements.txt @@ -1,2 +1,6 @@ -requests>=2.20.0 -pyodbc==4.0.21 +requests +pyodbc +urllib3 +idna +charset-normalizer +certifi \ No newline at end of file From 522bcaa19c2799e93fef6abebd3d5d5eca11b6f2 Mon Sep 17 00:00:00 2001 From: Geoff Crompton Date: Thu, 25 Jan 2024 16:29:20 +1100 Subject: [PATCH 02/23] Refactor canvas_synergetic_integration to use logging. --- sis/canvas_synergetic_integration/main.py | 99 +++++++++++++------ .../requirements.txt | 3 +- 2 files changed, 70 insertions(+), 32 deletions(-) diff --git a/sis/canvas_synergetic_integration/main.py b/sis/canvas_synergetic_integration/main.py index 59db800..c592b1e 100755 --- a/sis/canvas_synergetic_integration/main.py +++ b/sis/canvas_synergetic_integration/main.py @@ -12,25 +12,10 @@ import time import pyodbc import datetime - -now = datetime.datetime.now() - from contextlib import contextmanager +import logging, logging.handlers -working_dir = './canvas/' - -base_url = os.environ['base_url'] -token = os.environ['TOKEN'] -header = {'Authorization' : 'Bearer {token}'.format(token=token)} -payload = {'import_type' : 'instructure_csv', 'extension' : 'zip'} - -SERVER = os.environ['SERVER'] -DATABASE = os.environ['DATABASE'] -USERNAME = os.environ['USERNAME'] -PASSWORD = os.environ['PASSWORD'] - -conn_string = f'DRIVER={{ODBC Driver 17 for SQL Server}};SERVER={SERVER};DATABASE={DATABASE};UID={USERNAME};PWD={PASSWORD}' - +logger = logging.getLogger('canvas_SI') @contextmanager def db_connect(conn_string): @@ -43,7 +28,7 @@ def db_connect(conn_string): yield cur cnxn.close() -def get_canvas_accounts(): +def get_canvas_accounts(conn_string): ''' Selects canvas accounts from uvCanvasAccounts ''' @@ -63,7 +48,7 @@ def get_canvas_accounts(): return results -def get_canvas_users(): +def get_canvas_users(conn_string): ''' Selects canvas users from uvCanvasUsers ''' @@ -88,7 +73,7 @@ def get_canvas_users(): return results -def get_canvas_courses(): +def get_canvas_courses(conn_string): ''' Selects canvas courses from uvCanvasCourses ''' @@ -111,7 +96,7 @@ def get_canvas_courses(): return results -def get_canvas_enrolments(): +def get_canvas_enrolments(conn_string): ''' Selects canvas enrolments from uvCanvasEnrollments ''' @@ -134,7 +119,7 @@ def get_canvas_enrolments(): return results -def get_canvas_terms(): +def get_canvas_terms(conn_string): ''' Selects canvas enrolments from uvCanvasTerms ''' @@ -156,7 +141,7 @@ def get_canvas_terms(): return results -def get_canvas_sections(): +def get_canvas_sections(conn_string): ''' Selects canvas enrolments from uvCanvasSections ''' @@ -193,37 +178,86 @@ def post_data(base_url, header, payload): r = requests.post(base_url + "/sis_imports/", headers=header, params=payload, data=data) - print(now.strftime("%Y-%m-%d %H:%M:%S"), r.text) + logger.info(r.text) -if __name__ == '__main__': +def main(): + + + + logger.setLevel(logging.INFO) + + # Setup a logging to stderr + se = logging.StreamHandler() + se.setLevel(logging.WARNING) + logger.addHandler(se) + # And logging to event viewer + nt = logging.handlers.NTEventLogHandler(appname=__name__,) + nt.setLevel(logging.INFO) + nt.setFormatter(logging.Formatter("%(message)s")) + logger.addHandler(nt) + + # And logging to a local file. Might make this optional later one, when I setup some command line arguments. + now = datetime.datetime.now() + logging_filename = now.strftime('canvas_SI_%Y-%m.log') + fl = logging.FileHandler(logging_filename, encoding='utf-8') + fl.setLevel(logging.INFO) + fl.setFormatter(logging.Formatter("%(asctime)s %(message)s")) + logger.addHandler(fl) + + working_dir = './canvas/' + + try: + base_url = os.environ['base_url'] + token = os.environ['TOKEN'] + header = {'Authorization' : 'Bearer {token}'.format(token=token)} + payload = {'import_type' : 'instructure_csv', 'extension' : 'zip'} + + SERVER = os.environ['SERVER'] + DATABASE = os.environ['DATABASE'] + USERNAME = os.environ['USERNAME'] + PASSWORD = os.environ['PASSWORD'] + except KeyError as e: + logger.exception("missing required environment variable") + raise SystemExit() + + conn_string = f'DRIVER={{ODBC Driver 17 for SQL Server}};SERVER={SERVER};DATABASE={DATABASE};UID={USERNAME};PWD={PASSWORD}' + + try: + go(conn_string, working_dir, base_url, header, payload) + except Exception as e: + logger.exception("Unhandled exception while running") + raise e + + +def go(conn_string, working_dir, base_url, header, payload): # =========== # Create CSVs # =========== with open(working_dir + "accounts.csv", "w", newline='', encoding='utf-8') as f: writer = csv.writer(f) - writer.writerows(get_canvas_accounts()) + writer.writerows(get_canvas_accounts(conn_string)) with open(working_dir + "users.csv", "w", newline='', encoding='utf-8') as f: writer = csv.writer(f) - writer.writerows(get_canvas_users()) + writer.writerows(get_canvas_users(conn_string)) with open(working_dir + "courses.csv", "w", newline='', encoding='utf-8') as f: writer = csv.writer(f) - writer.writerows(get_canvas_courses()) + writer.writerows(get_canvas_courses(conn_string)) with open(working_dir + "enrolments.csv", "w", newline='', encoding='utf-8') as f: writer = csv.writer(f) - writer.writerows(get_canvas_enrolments()) + writer.writerows(get_canvas_enrolments(conn_string)) with open(working_dir + "terms.csv", "w", newline='', encoding='utf-8') as f: writer = csv.writer(f) - writer.writerows(get_canvas_terms()) + writer.writerows(get_canvas_terms(conn_string)) with open(working_dir + "sections.csv", "w", newline='', encoding='utf-8') as f: writer = csv.writer(f) - writer.writerows(get_canvas_sections()) + writer.writerows(get_canvas_sections(conn_string)) # ============= # ZIP Directory @@ -237,3 +271,6 @@ def post_data(base_url, header, payload): # =================== post_data(base_url, header, payload) +if __name__ == '__main__': + main() + diff --git a/sis/canvas_synergetic_integration/requirements.txt b/sis/canvas_synergetic_integration/requirements.txt index b4cd7f6..8ecece6 100755 --- a/sis/canvas_synergetic_integration/requirements.txt +++ b/sis/canvas_synergetic_integration/requirements.txt @@ -3,4 +3,5 @@ pyodbc urllib3 idna charset-normalizer -certifi \ No newline at end of file +certifi +pywin32 \ No newline at end of file From 7f5bbd7c4325e4dccb593da50f02e85fc8248f85 Mon Sep 17 00:00:00 2001 From: Geoff Crompton Date: Thu, 25 Jan 2024 16:49:42 +1100 Subject: [PATCH 03/23] Refactoring away DRY code. The functions that execute the SQL statemetns all do exactly the same thing, but with different parameters. So I'm refactoring that data into a data structre, and just writing the code once. --- sis/canvas_synergetic_integration/main.py | 293 +++++++++------------- 1 file changed, 125 insertions(+), 168 deletions(-) diff --git a/sis/canvas_synergetic_integration/main.py b/sis/canvas_synergetic_integration/main.py index c592b1e..8b9902b 100755 --- a/sis/canvas_synergetic_integration/main.py +++ b/sis/canvas_synergetic_integration/main.py @@ -14,9 +14,23 @@ import datetime from contextlib import contextmanager import logging, logging.handlers +from dataclasses import dataclass +from typing import List logger = logging.getLogger('canvas_SI') + +@contextmanager +def db_cursor(conn): + ''' + Context manager for database cursor. Allows for + complete cursor close on failure. + ''' + + cur = conn.cursor() + yield cur + cur.close() + @contextmanager def db_connect(conn_string): ''' @@ -24,143 +38,102 @@ def db_connect(conn_string): complete connection close on failure. ''' cnxn = pyodbc.connect(conn_string) - cur = cnxn.cursor() - yield cur + yield cnxn cnxn.close() -def get_canvas_accounts(conn_string): - ''' - Selects canvas accounts from uvCanvasAccounts - ''' - with db_connect(conn_string) as cur: - cur.execute("select * from uvCanvasAccounts ORDER BY parent_account_id") - canvas_accounts = cur.fetchall() - - results = [[ - 'account_id', - 'parent_account_id', - 'name', - 'status' - ]] - - for row in canvas_accounts: - results.append(row) - - return results - -def get_canvas_users(conn_string): - ''' - Selects canvas users from uvCanvasUsers - ''' - with db_connect(conn_string) as cur: - cur.execute("select * from uvCanvasUsers") - canvas_accounts = cur.fetchall() - - results = [[ - 'user_id', - 'login_id', - 'first_name', - 'last_name', - 'full_name', - 'sortable_name', - 'email', - 'declared_user_type', - 'status' - ]] - - for row in canvas_accounts: - results.append(row) - - return results - -def get_canvas_courses(conn_string): - ''' - Selects canvas courses from uvCanvasCourses - ''' - with db_connect(conn_string) as cur: - cur.execute("SELECT * FROM uvCanvasCourses") - canvas_accounts = cur.fetchall() - - results = [[ - 'course_id', - 'short_name', - 'long_name', - 'account_id', - 'term_id', - 'status', - 'course_format' - ]] - - for row in canvas_accounts: - results.append(row) - - return results - -def get_canvas_enrolments(conn_string): - ''' - Selects canvas enrolments from uvCanvasEnrollments - ''' - with db_connect(conn_string) as cur: - cur.execute("select * from uvCanvasEnrollments") - canvas_accounts = cur.fetchall() - - results = [[ - 'course_id', - 'start_date', - 'end_date', - 'user_id', - 'role', - 'section_id', - 'status' - ]] - - for row in canvas_accounts: - results.append(row) - - return results - -def get_canvas_terms(conn_string): - ''' - Selects canvas enrolments from uvCanvasTerms - ''' - with db_connect(conn_string) as cur: - cur.execute("select * from uvCanvasTerms") - canvas_terms = cur.fetchall() - - results = [[ - 'term_id', - 'name', - 'status', - 'start_date', - 'end_date' - - ]] - - for row in canvas_terms: - results.append(row) - - return results - -def get_canvas_sections(conn_string): - ''' - Selects canvas enrolments from uvCanvasSections - ''' - with db_connect(conn_string) as cur: - cur.execute("select * from uvCanvasSections") - canvas_sections = cur.fetchall() - - results = [[ - 'section_id', - 'course_id', - 'name', - 'status' - - ]] - - for row in canvas_sections: - results.append(row) - return results +@dataclass +class SIS_Datatype: + name: str + sql_view: str + result_columns : List[str] + + def run(self, cur): + cur.execute(self.sql_view) + + results = self.results_columns + for row in cur.fetch(): + results.append(row) + return results + +views = [ + SIS_Datatype( + name="canvas_accounts", + sql_view="select * from uvCanvasAccounts ORDER BY parent_account_id", + result_columns = [ + 'account_id', + 'parent_account_id', + 'name', + 'status', + ] + ), + SIS_Datatype( + name="canvas_users", + sql_view="select * from uvCanvasUsers", + result_columns = [ + 'user_id', + 'login_id', + 'first_name', + 'last_name', + 'full_name', + 'sortable_name', + 'email', + 'declared_user_type', + 'status', + ] + ), + SIS_Datatype( + name="canvas_courses", + sql_view="SELECT * FROM uvCanvasCourses", + result_columns = [ + 'course_id', + 'short_name', + 'long_name', + 'account_id', + 'term_id', + 'status', + 'course_format' + ] + ), + + SIS_Datatype( + name="canvas_enrolments", + sql_view="select * from uvCanvasEnrollments", + result_columns = [ + 'course_id', + 'start_date', + 'end_date', + 'user_id', + 'role', + 'section_id', + 'status' + ] + ), + + SIS_Datatype( + name="canvas_terms", + sql_view="select * from uvCanvasTerms", + result_columns = [ + 'term_id', + 'name', + 'status', + 'start_date', + 'end_date' + ] + ), + + SIS_Datatype( + name="canvas_sections", + sql_view="select * from uvCanvasSections", + result_columns = [ + 'section_id', + 'course_id', + 'name', + 'status' + ] + ), + +] def zipdir(path, ziph): ''' @@ -224,40 +197,24 @@ def main(): conn_string = f'DRIVER={{ODBC Driver 17 for SQL Server}};SERVER={SERVER};DATABASE={DATABASE};UID={USERNAME};PWD={PASSWORD}' - try: - go(conn_string, working_dir, base_url, header, payload) - except Exception as e: - logger.exception("Unhandled exception while running") - raise e - - -def go(conn_string, working_dir, base_url, header, payload): - # =========== - # Create CSVs - # =========== - with open(working_dir + "accounts.csv", "w", newline='', encoding='utf-8') as f: - writer = csv.writer(f) - writer.writerows(get_canvas_accounts(conn_string)) - - with open(working_dir + "users.csv", "w", newline='', encoding='utf-8') as f: - writer = csv.writer(f) - writer.writerows(get_canvas_users(conn_string)) - - with open(working_dir + "courses.csv", "w", newline='', encoding='utf-8') as f: - writer = csv.writer(f) - writer.writerows(get_canvas_courses(conn_string)) - - with open(working_dir + "enrolments.csv", "w", newline='', encoding='utf-8') as f: - writer = csv.writer(f) - writer.writerows(get_canvas_enrolments(conn_string)) - - with open(working_dir + "terms.csv", "w", newline='', encoding='utf-8') as f: - writer = csv.writer(f) - writer.writerows(get_canvas_terms(conn_string)) - - with open(working_dir + "sections.csv", "w", newline='', encoding='utf-8') as f: - writer = csv.writer(f) - writer.writerows(get_canvas_sections(conn_string)) + with db_connect(conn_string) as conn: + try: + go(conn, working_dir, base_url, header, payload) + except Exception as e: + logger.exception("Unhandled exception while running") + raise e + + + +def go(conn, working_dir, base_url, header, payload): + + + for view in views: + with db_cursor(conn) as cur: + with open(working_dir + view.name, newline='', encoding='utf-8') as f: + writer = csv.writer(f) + writer.writerow(view.run(cur)) + # ============= # ZIP Directory From 17ee3c0411a936866b72d7791a2faf4f100a40e5 Mon Sep 17 00:00:00 2001 From: Geoff Crompton Date: Wed, 31 Jan 2024 12:02:08 +1100 Subject: [PATCH 04/23] Use temporary directory instead of a hard coded working directory. --- sis/canvas_synergetic_integration/main.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/sis/canvas_synergetic_integration/main.py b/sis/canvas_synergetic_integration/main.py index 8b9902b..268ad76 100755 --- a/sis/canvas_synergetic_integration/main.py +++ b/sis/canvas_synergetic_integration/main.py @@ -16,6 +16,7 @@ import logging, logging.handlers from dataclasses import dataclass from typing import List +from tempfile import TemporaryDirectory logger = logging.getLogger('canvas_SI') @@ -156,8 +157,6 @@ def post_data(base_url, header, payload): def main(): - - logger.setLevel(logging.INFO) # Setup a logging to stderr @@ -179,8 +178,6 @@ def main(): fl.setFormatter(logging.Formatter("%(asctime)s %(message)s")) logger.addHandler(fl) - working_dir = './canvas/' - try: base_url = os.environ['base_url'] token = os.environ['TOKEN'] @@ -197,7 +194,7 @@ def main(): conn_string = f'DRIVER={{ODBC Driver 17 for SQL Server}};SERVER={SERVER};DATABASE={DATABASE};UID={USERNAME};PWD={PASSWORD}' - with db_connect(conn_string) as conn: + with db_connect(conn_string) as conn, TemporaryDirectory() as working_dir: try: go(conn, working_dir, base_url, header, payload) except Exception as e: From 9ad7f06618d15f17adc702836e4d19518b3f5025 Mon Sep 17 00:00:00 2001 From: Geoff Crompton Date: Wed, 31 Jan 2024 12:15:15 +1100 Subject: [PATCH 05/23] Small fixes for temporary directory change, and others. The previous hard coded working_directory variable had a trailing folder separator. But the directory name generated by TemporaryDirectory does not have a separator, leading to the script failing. While here I also fixed the name of the file, as I'd forgotten to add the .csv file extension, and I corrected the mode used when opening the csv file, so it's writeable. --- sis/canvas_synergetic_integration/main.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sis/canvas_synergetic_integration/main.py b/sis/canvas_synergetic_integration/main.py index 268ad76..03da95f 100755 --- a/sis/canvas_synergetic_integration/main.py +++ b/sis/canvas_synergetic_integration/main.py @@ -17,6 +17,7 @@ from dataclasses import dataclass from typing import List from tempfile import TemporaryDirectory +from pathlib import Path logger = logging.getLogger('canvas_SI') @@ -205,10 +206,10 @@ def main(): def go(conn, working_dir, base_url, header, payload): - for view in views: with db_cursor(conn) as cur: - with open(working_dir + view.name, newline='', encoding='utf-8') as f: + csv_filename = (Path(working_dir) / view.name).with_suffix('.csv') + with open(csv_filename, 'w', newline='', encoding='utf-8') as f: writer = csv.writer(f) writer.writerow(view.run(cur)) From 81bed617dc96084250d59001381e6dbd68b075fc Mon Sep 17 00:00:00 2001 From: Geoff Crompton Date: Wed, 31 Jan 2024 12:26:02 +1100 Subject: [PATCH 06/23] A couple of small fixes. I think I had gotten the column handling wrong in the SIS_Datatype class. And I found that cur.fetch() doesn't exist, hence changed it to cur.fetchall(). --- sis/canvas_synergetic_integration/main.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sis/canvas_synergetic_integration/main.py b/sis/canvas_synergetic_integration/main.py index 03da95f..db4dffe 100755 --- a/sis/canvas_synergetic_integration/main.py +++ b/sis/canvas_synergetic_integration/main.py @@ -53,8 +53,9 @@ class SIS_Datatype: def run(self, cur): cur.execute(self.sql_view) - results = self.results_columns - for row in cur.fetch(): + # Copy the column names into results. + results = [self.result_columns[:]] + for row in cur.fetchall(): results.append(row) return results From 4e30b24846c006bc6162bd703135a9ad1d6bf8c2 Mon Sep 17 00:00:00 2001 From: Geoff Crompton Date: Wed, 31 Jan 2024 16:19:07 +1100 Subject: [PATCH 07/23] Start refactoring for uploading single CSV files. --- sis/canvas_synergetic_integration/main.py | 67 ++++++++++++++++------- 1 file changed, 48 insertions(+), 19 deletions(-) diff --git a/sis/canvas_synergetic_integration/main.py b/sis/canvas_synergetic_integration/main.py index db4dffe..a639747 100755 --- a/sis/canvas_synergetic_integration/main.py +++ b/sis/canvas_synergetic_integration/main.py @@ -138,6 +138,16 @@ def run(self, cur): ] +class SI_Exception(Exception): + pass + +class SIS_ImportError(SI_Exception): + def __init__(self, message, status_code, text): + super().__init__(message) + self.status_code = status_code + self.text = text + + def zipdir(path, ziph): ''' Helper function for zipping the directory @@ -146,16 +156,26 @@ def zipdir(path, ziph): for file in files: ziph.write(os.path.join(root, file)) -def post_data(base_url, header, payload): +def post_data(base_url, header, filename): +def post_data(base_url, header, filename): ''' - Posts data to the canvas api endpoint + Posts data to the canvas api endpoint. Returns identifier for import ''' - data = open('results.zip', 'rb').read() + + extension = filename.suffix[1:] + # Setup URL parameters + payload = {'import_type' : 'instructure_csv', 'extension': extension} + + data = open(filename, 'rb').read() r = requests.post(base_url + "/sis_imports/", headers=header, params=payload, data=data) - logger.info(r.text) - + # Rather than just log the output, I would like to inspect the r object and check the return code. If there is an error I want to log that. If there is no error... I might log the output as well, but at a INFO level? + if r.status_code != 200: + raise SIS_ImportError(status_code=r.status_code, text=r.text) + r_json = r.json() + logger.info(f"Import started for {filename}, import id {r_json['id']}") + return r_json['id'] def main(): @@ -184,7 +204,7 @@ def main(): base_url = os.environ['base_url'] token = os.environ['TOKEN'] header = {'Authorization' : 'Bearer {token}'.format(token=token)} - payload = {'import_type' : 'instructure_csv', 'extension' : 'zip'} + SERVER = os.environ['SERVER'] DATABASE = os.environ['DATABASE'] @@ -198,34 +218,43 @@ def main(): with db_connect(conn_string) as conn, TemporaryDirectory() as working_dir: try: - go(conn, working_dir, base_url, header, payload) + go(conn, working_dir, base_url, header) except Exception as e: logger.exception("Unhandled exception while running") raise e -def go(conn, working_dir, base_url, header, payload): +def go(conn, working_dir, base_url, header, zip=False): + imports_started = [] for view in views: with db_cursor(conn) as cur: csv_filename = (Path(working_dir) / view.name).with_suffix('.csv') with open(csv_filename, 'w', newline='', encoding='utf-8') as f: writer = csv.writer(f) writer.writerow(view.run(cur)) + if not zip: + import_id = post_data(base_url=base_url, header=header, filename=csv_filename) + imports_started.append(import_id) - # ============= - # ZIP Directory - # ============= - zipf = zipfile.ZipFile('results.zip', 'w') - zipdir(working_dir, zipf) - zipf.close() - - # =================== - # Post Data to Canvas - # =================== - post_data(base_url, header, payload) + if zip: + # ============= + # ZIP Directory + # ============= + zipf = zipfile.ZipFile('results.zip', 'w') + zipdir(working_dir, zipf) + zipf.close() + + # =================== + # Post Data to Canvas + # =================== + import_id = post_data(base_url=base_url, header=header, filename='results.zip') + imports_started.append(import_id) + + # TODO: Use GET /api/v1/accounts/:account_id/sis_imports/:id for each of the imports to track the status of the imports and log the outcome. + if __name__ == '__main__': main() From 66d387c7b437a7bc238c6e267b47f2ae73f04c96 Mon Sep 17 00:00:00 2001 From: Geoff Crompton Date: Mon, 5 Feb 2024 17:13:32 +1100 Subject: [PATCH 08/23] Add diffing parameters Modify post_data so that a variety of parameters are sent when in diffing mode. Also extend main() to prompt for those parameters. --- sis/canvas_synergetic_integration/main.py | 58 ++++++++++++++++++----- 1 file changed, 47 insertions(+), 11 deletions(-) diff --git a/sis/canvas_synergetic_integration/main.py b/sis/canvas_synergetic_integration/main.py index a639747..c3eeba2 100755 --- a/sis/canvas_synergetic_integration/main.py +++ b/sis/canvas_synergetic_integration/main.py @@ -5,11 +5,9 @@ # ============================= import requests -import json import csv import zipfile import os -import time import pyodbc import datetime from contextlib import contextmanager @@ -18,9 +16,34 @@ from typing import List from tempfile import TemporaryDirectory from pathlib import Path +import argparse logger = logging.getLogger('canvas_SI') +@dataclass +class SIS_Diffing_Parameter: + name: str + type: type + default: str + description: str + + +diffing_params = [ + SIS_Diffing_Parameter(name='diffing_drop_status', default='inactive', type=str, + description='''If diffing_drop_status is passed, this SIS import will use this status for enrollments that are +not included in the sis_batch. Defaults to ‘deleted’. +Allowed values: +deleted, completed, inactive'''), + SIS_Diffing_Parameter(name='diffing_data_set_identifier', default='canvas_syn_integration', type=str, + description='''If set on a CSV import, Canvas will attempt to optimize the SIS import by comparing this set of CSVs to +the previous set that has the same data set identifier, and only applying the difference between the two'''), + SIS_Diffing_Parameter(name='diffing_user_remove_status', default='suspended', type=str, + description='''For users removed from one batch to the next one using the same diffing_data_set_identifier, set their +status to the value of this argument.'''), + SIS_Diffing_Parameter(name='change_threshold_percent', default=10, type=int, + description='If set with diffing, diffing will not be performed if the files are greater than the threshold as a percent.'), +] + @contextmanager def db_cursor(conn): @@ -58,6 +81,9 @@ def run(self, cur): for row in cur.fetchall(): results.append(row) return results + + + views = [ SIS_Datatype( @@ -156,19 +182,22 @@ def zipdir(path, ziph): for file in files: ziph.write(os.path.join(root, file)) -def post_data(base_url, header, filename): -def post_data(base_url, header, filename): +def post_data(base_url, header, filename, cli_args=None, diffing_mode=False): ''' Posts data to the canvas api endpoint. Returns identifier for import ''' extension = filename.suffix[1:] # Setup URL parameters - payload = {'import_type' : 'instructure_csv', 'extension': extension} + url_params = {'import_type' : 'instructure_csv', 'extension': extension} + if diffing_mode: + for arg in vars(cli_args): + url_params[arg] = getattr(cli_args, arg) + data = open(filename, 'rb').read() - r = requests.post(base_url + "/sis_imports/", headers=header, params=payload, data=data) + r = requests.post(base_url + "/sis_imports/", headers=header, params=url_params, data=data) # Rather than just log the output, I would like to inspect the r object and check the return code. If there is an error I want to log that. If there is no error... I might log the output as well, but at a INFO level? if r.status_code != 200: @@ -177,7 +206,13 @@ def post_data(base_url, header, filename): logger.info(f"Import started for {filename}, import id {r_json['id']}") return r_json['id'] -def main(): +def main(arg_strs): + + ap = argparse.ArgumentParser() + for param in diffing_params: + ap.add_argument('--'+param.name, type=param.type, help=param.description, default=param.default) + + args = ap.parse_args(args=arg_strs) logger.setLevel(logging.INFO) @@ -218,14 +253,14 @@ def main(): with db_connect(conn_string) as conn, TemporaryDirectory() as working_dir: try: - go(conn, working_dir, base_url, header) + go(conn, working_dir, base_url, header, args) except Exception as e: logger.exception("Unhandled exception while running") raise e -def go(conn, working_dir, base_url, header, zip=False): +def go(conn, working_dir, base_url, header, cli_args, zip=False): imports_started = [] for view in views: @@ -235,7 +270,7 @@ def go(conn, working_dir, base_url, header, zip=False): writer = csv.writer(f) writer.writerow(view.run(cur)) if not zip: - import_id = post_data(base_url=base_url, header=header, filename=csv_filename) + import_id = post_data(base_url=base_url, header=header, filename=csv_filename, cli_args=cli_args, diffing_mode=True) imports_started.append(import_id) @@ -257,5 +292,6 @@ def go(conn, working_dir, base_url, header, zip=False): if __name__ == '__main__': - main() + import sys + main(arg_strs=sys.argv[1:]) From 4271eacc8e54da57216d01d9b4e3e3af96aebf59 Mon Sep 17 00:00:00 2001 From: Geoff Crompton Date: Wed, 7 Feb 2024 09:16:37 +1100 Subject: [PATCH 09/23] Ignore .log files --- .gitignore | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 15b52c4..9ed1659 100644 --- a/.gitignore +++ b/.gitignore @@ -40,4 +40,6 @@ env #NodeJS -node_modules/ \ No newline at end of file +node_modules/ + +*.log \ No newline at end of file From d3b586dc3fe980262e8892f75d35aeb7e17cf03c Mon Sep 17 00:00:00 2001 From: Geoff Crompton Date: Wed, 7 Feb 2024 09:20:05 +1100 Subject: [PATCH 10/23] Tweak handling of missing environment variable --- sis/canvas_synergetic_integration/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sis/canvas_synergetic_integration/main.py b/sis/canvas_synergetic_integration/main.py index c3eeba2..de8229c 100755 --- a/sis/canvas_synergetic_integration/main.py +++ b/sis/canvas_synergetic_integration/main.py @@ -246,7 +246,7 @@ def main(arg_strs): USERNAME = os.environ['USERNAME'] PASSWORD = os.environ['PASSWORD'] except KeyError as e: - logger.exception("missing required environment variable") + logger.critical(f"missing required environment variable {e.args[0]}") raise SystemExit() conn_string = f'DRIVER={{ODBC Driver 17 for SQL Server}};SERVER={SERVER};DATABASE={DATABASE};UID={USERNAME};PWD={PASSWORD}' From 1a194268b8388a0596707ec466388da7a4614950 Mon Sep 17 00:00:00 2001 From: Geoff Crompton Date: Thu, 8 Feb 2024 11:53:28 +1100 Subject: [PATCH 11/23] Alter main() to use arguments supplied. And store into the global dict the diffing parameters we will use. --- sis/canvas_synergetic_integration/main.py | 40 ++++++++++++++--------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/sis/canvas_synergetic_integration/main.py b/sis/canvas_synergetic_integration/main.py index de8229c..ce88801 100755 --- a/sis/canvas_synergetic_integration/main.py +++ b/sis/canvas_synergetic_integration/main.py @@ -22,27 +22,30 @@ @dataclass class SIS_Diffing_Parameter: - name: str type: type - default: str + value: str description: str -diffing_params = [ - SIS_Diffing_Parameter(name='diffing_drop_status', default='inactive', type=str, +diffing_params = { + 'diffing_drop_status': + SIS_Diffing_Parameter(value='inactive', type=str, description='''If diffing_drop_status is passed, this SIS import will use this status for enrollments that are not included in the sis_batch. Defaults to ‘deleted’. Allowed values: deleted, completed, inactive'''), - SIS_Diffing_Parameter(name='diffing_data_set_identifier', default='canvas_syn_integration', type=str, + 'diffing_data_set_identifier': + SIS_Diffing_Parameter(value='canvas_syn_integration', type=str, description='''If set on a CSV import, Canvas will attempt to optimize the SIS import by comparing this set of CSVs to the previous set that has the same data set identifier, and only applying the difference between the two'''), - SIS_Diffing_Parameter(name='diffing_user_remove_status', default='suspended', type=str, + 'diffing_user_remove_status': + SIS_Diffing_Parameter(value='suspended', type=str, description='''For users removed from one batch to the next one using the same diffing_data_set_identifier, set their status to the value of this argument.'''), - SIS_Diffing_Parameter(name='change_threshold_percent', default=10, type=int, + 'change_threshold_percent': + SIS_Diffing_Parameter(value=10, type=int, description='If set with diffing, diffing will not be performed if the files are greater than the threshold as a percent.'), -] +} @contextmanager @@ -182,7 +185,7 @@ def zipdir(path, ziph): for file in files: ziph.write(os.path.join(root, file)) -def post_data(base_url, header, filename, cli_args=None, diffing_mode=False): +def post_data(base_url, header, filename, diffing_mode=False): ''' Posts data to the canvas api endpoint. Returns identifier for import ''' @@ -192,8 +195,8 @@ def post_data(base_url, header, filename, cli_args=None, diffing_mode=False): url_params = {'import_type' : 'instructure_csv', 'extension': extension} if diffing_mode: - for arg in vars(cli_args): - url_params[arg] = getattr(cli_args, arg) + for name, param in diffing_params.items(): + url_params[name] = param.value data = open(filename, 'rb').read() @@ -209,11 +212,16 @@ def post_data(base_url, header, filename, cli_args=None, diffing_mode=False): def main(arg_strs): ap = argparse.ArgumentParser() - for param in diffing_params: - ap.add_argument('--'+param.name, type=param.type, help=param.description, default=param.default) + for name, param in diffing_params.items(): + ap.add_argument('--'+name, type=param.type, help=param.description, default=param.value) args = ap.parse_args(args=arg_strs) + diffing_params['diffing_data_set_identifier'].value = args.diffing_data_set_identifier + diffing_params['change_threshold_percent'].value = args.change_threshold_percent + diffing_params['diffing_drop_status'].value = args.diffing_drop_status + diffing_params['diffing_user_remove_status'].value = args.diffing_user_remove_status + logger.setLevel(logging.INFO) # Setup a logging to stderr @@ -253,14 +261,14 @@ def main(arg_strs): with db_connect(conn_string) as conn, TemporaryDirectory() as working_dir: try: - go(conn, working_dir, base_url, header, args) + go(conn, working_dir, base_url, header) except Exception as e: logger.exception("Unhandled exception while running") raise e -def go(conn, working_dir, base_url, header, cli_args, zip=False): +def go(conn, working_dir, base_url, header, zip=False): imports_started = [] for view in views: @@ -270,7 +278,7 @@ def go(conn, working_dir, base_url, header, cli_args, zip=False): writer = csv.writer(f) writer.writerow(view.run(cur)) if not zip: - import_id = post_data(base_url=base_url, header=header, filename=csv_filename, cli_args=cli_args, diffing_mode=True) + import_id = post_data(base_url=base_url, header=header, filename=csv_filename, diffing_mode=True) imports_started.append(import_id) From 98abad1de9056277b2ef734239b8b11f4456f9f3 Mon Sep 17 00:00:00 2001 From: Geoff Crompton Date: Thu, 8 Feb 2024 12:54:56 +1100 Subject: [PATCH 12/23] Minor fix for writing CSV files. I'm also changing the wording slightly on a log message, as I think this phrasing is slightly more correct. --- sis/canvas_synergetic_integration/main.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sis/canvas_synergetic_integration/main.py b/sis/canvas_synergetic_integration/main.py index ce88801..0d83bf2 100755 --- a/sis/canvas_synergetic_integration/main.py +++ b/sis/canvas_synergetic_integration/main.py @@ -206,7 +206,7 @@ def post_data(base_url, header, filename, diffing_mode=False): if r.status_code != 200: raise SIS_ImportError(status_code=r.status_code, text=r.text) r_json = r.json() - logger.info(f"Import started for {filename}, import id {r_json['id']}") + logger.info(f"Import upload for {filename}, import id {r_json['id']}") return r_json['id'] def main(arg_strs): @@ -276,7 +276,7 @@ def go(conn, working_dir, base_url, header, zip=False): csv_filename = (Path(working_dir) / view.name).with_suffix('.csv') with open(csv_filename, 'w', newline='', encoding='utf-8') as f: writer = csv.writer(f) - writer.writerow(view.run(cur)) + writer.writerows(view.run(cur)) if not zip: import_id = post_data(base_url=base_url, header=header, filename=csv_filename, diffing_mode=True) imports_started.append(import_id) From 2da271bd0d7ce54c1884abe45da16ea0f13aa93f Mon Sep 17 00:00:00 2001 From: Geoff Crompton Date: Thu, 29 Feb 2024 15:11:36 +1100 Subject: [PATCH 13/23] Let operator name view in main.py Normally this iterates over all the defined views. This allows an operator to optionally name the views to run on. --- sis/canvas_synergetic_integration/main.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/sis/canvas_synergetic_integration/main.py b/sis/canvas_synergetic_integration/main.py index 0d83bf2..6035ff9 100755 --- a/sis/canvas_synergetic_integration/main.py +++ b/sis/canvas_synergetic_integration/main.py @@ -212,6 +212,8 @@ def post_data(base_url, header, filename, diffing_mode=False): def main(arg_strs): ap = argparse.ArgumentParser() + views_help = ' '.join([i.name for i in views]) + ap.add_argument('views', nargs='*', help=f'Names of views to process in this execution. One of {views_help}') for name, param in diffing_params.items(): ap.add_argument('--'+name, type=param.type, help=param.description, default=param.value) @@ -261,17 +263,22 @@ def main(arg_strs): with db_connect(conn_string) as conn, TemporaryDirectory() as working_dir: try: - go(conn, working_dir, base_url, header) + go(conn, working_dir, base_url, header, args.views) except Exception as e: logger.exception("Unhandled exception while running") raise e -def go(conn, working_dir, base_url, header, zip=False): +def go(conn, working_dir, base_url, header, arg_views, zip=False): imports_started = [] for view in views: + + # If the operator has named a view, but this view is not one of them, skip it. + if len(arg_views) > 0 and view.name not in arg_views: + continue + with db_cursor(conn) as cur: csv_filename = (Path(working_dir) / view.name).with_suffix('.csv') with open(csv_filename, 'w', newline='', encoding='utf-8') as f: From e87cbf89e8b37a713c31069762bd5420bc0b2378 Mon Sep 17 00:00:00 2001 From: Geoff Crompton Date: Thu, 29 Feb 2024 17:29:39 +1100 Subject: [PATCH 14/23] Make canvas_synergetic_integration/main.py log status of imports. --- sis/canvas_synergetic_integration/main.py | 77 +++++++++++++++++++---- 1 file changed, 64 insertions(+), 13 deletions(-) diff --git a/sis/canvas_synergetic_integration/main.py b/sis/canvas_synergetic_integration/main.py index 6035ff9..6ec546d 100755 --- a/sis/canvas_synergetic_integration/main.py +++ b/sis/canvas_synergetic_integration/main.py @@ -8,7 +8,8 @@ import csv import zipfile import os -import pyodbc +import time +import pprint import datetime from contextlib import contextmanager import logging, logging.handlers @@ -18,6 +19,8 @@ from pathlib import Path import argparse +import pyodbc + logger = logging.getLogger('canvas_SI') @dataclass @@ -176,6 +179,12 @@ def __init__(self, message, status_code, text): self.status_code = status_code self.text = text +class SIS_ImportStatusError(SI_Exception): + def __init__(self, message, status_code, text): + super().__init__(message) + self.status_code = status_code + self.text = text + def zipdir(path, ziph): ''' @@ -206,9 +215,19 @@ def post_data(base_url, header, filename, diffing_mode=False): if r.status_code != 200: raise SIS_ImportError(status_code=r.status_code, text=r.text) r_json = r.json() - logger.info(f"Import upload for {filename}, import id {r_json['id']}") return r_json['id'] +def get_status(base_url, header, import_id): + ''' + Returns a dictionary showing the result of an import + ''' + url = base_url + f"/sis_imports/{import_id}" + resp = requests.get(url, headers=header) + if resp.status_code != 200: + raise SIS_ImportStatusError(status_code=resp.status_code, message="Failed to get status", text=resp.text) + return resp.json() + + def main(arg_strs): ap = argparse.ArgumentParser() @@ -224,16 +243,16 @@ def main(arg_strs): diffing_params['diffing_drop_status'].value = args.diffing_drop_status diffing_params['diffing_user_remove_status'].value = args.diffing_user_remove_status - logger.setLevel(logging.INFO) + logger.setLevel(logging.DEBUG) # Setup a logging to stderr se = logging.StreamHandler() - se.setLevel(logging.WARNING) + se.setLevel(logging.INFO) logger.addHandler(se) # And logging to event viewer nt = logging.handlers.NTEventLogHandler(appname=__name__,) - nt.setLevel(logging.INFO) + nt.setLevel(logging.DEBUG) nt.setFormatter(logging.Formatter("%(message)s")) logger.addHandler(nt) @@ -241,7 +260,7 @@ def main(arg_strs): now = datetime.datetime.now() logging_filename = now.strftime('canvas_SI_%Y-%m.log') fl = logging.FileHandler(logging_filename, encoding='utf-8') - fl.setLevel(logging.INFO) + fl.setLevel(logging.DEBUG) fl.setFormatter(logging.Formatter("%(asctime)s %(message)s")) logger.addHandler(fl) @@ -264,10 +283,8 @@ def main(arg_strs): with db_connect(conn_string) as conn, TemporaryDirectory() as working_dir: try: go(conn, working_dir, base_url, header, args.views) - except Exception as e: - logger.exception("Unhandled exception while running") - raise e - + except SI_Exception as e: + logger.exception(f"Unhandled SI exception while running: {e}") def go(conn, working_dir, base_url, header, arg_views, zip=False): @@ -286,7 +303,8 @@ def go(conn, working_dir, base_url, header, arg_views, zip=False): writer.writerows(view.run(cur)) if not zip: import_id = post_data(base_url=base_url, header=header, filename=csv_filename, diffing_mode=True) - imports_started.append(import_id) + logger.debug(f"SI Import for {view.name} started, import ID {import_id}") + imports_started.append( (import_id, view.name) ) if zip: @@ -301,9 +319,42 @@ def go(conn, working_dir, base_url, header, arg_views, zip=False): # Post Data to Canvas # =================== import_id = post_data(base_url=base_url, header=header, filename='results.zip') - imports_started.append(import_id) + imports_started.append((import_id, 'results.zip')) + + + # Get the status of the imports we have started. These may not be + # immediately available, so the script will loop until we've successfully + # retrieved those statuses. But not loop forever. + pp = pprint.PrettyPrinter(sort_dicts=False) + status_loop_count = 0 + while len(imports_started) > 0: + status_loop_count += 1 + tmp_list_imports = imports_started[:] + imports_started = [] + for import_id, view_name in tmp_list_imports: + status = get_status(base_url=base_url, header=header, import_id=import_id) + if status is None: + imports_started.append( (import_id, view_name) ) + continue + + progress = status['progress'] + if progress != 100: + logger.info(f"Import for {view_name} in progress, progress = {progress}") + imports_started.append( (import_id, view_name) ) + continue + + workflow_state = status['workflow_state'] + logger.warning(f"Import for {view_name} finished, result: {workflow_state}") + logger.debug(f"Import for {view_name} details: {pp.pformat(status)}") + + # Don't loop forever + if status_loop_count > 1000: + logger.warning(f"Import status has taken to long, quiting") + break + + if len(imports_started) > 0: + time.sleep(2) - # TODO: Use GET /api/v1/accounts/:account_id/sis_imports/:id for each of the imports to track the status of the imports and log the outcome. if __name__ == '__main__': From dde29036dd714bad733132e34c2cbd1445ebf5f9 Mon Sep 17 00:00:00 2001 From: Geoff Crompton Date: Mon, 4 Mar 2024 10:10:14 +1100 Subject: [PATCH 15/23] Add --no-upload and --diffing-mode options to canvas_synergetic_integration While working this I've also tried to improve some comments, and I've stopped using "from import" style of imports. --- sis/canvas_synergetic_integration/main.py | 69 +++++++++++++++-------- 1 file changed, 47 insertions(+), 22 deletions(-) diff --git a/sis/canvas_synergetic_integration/main.py b/sis/canvas_synergetic_integration/main.py index 6ec546d..05d9ca8 100755 --- a/sis/canvas_synergetic_integration/main.py +++ b/sis/canvas_synergetic_integration/main.py @@ -11,47 +11,54 @@ import time import pprint import datetime -from contextlib import contextmanager +import shutil +import contextlib import logging, logging.handlers -from dataclasses import dataclass -from typing import List -from tempfile import TemporaryDirectory -from pathlib import Path +import dataclasses +import typing +import tempfile +import pathlib import argparse import pyodbc logger = logging.getLogger('canvas_SI') -@dataclass +@dataclasses.dataclass class SIS_Diffing_Parameter: type: type value: str description: str +# Various URL parameters we can use in the Canvas SI Import API. This +# dictionary both documents which ones we are using, and the default value. +# This script uses this as a global variable so that the main() method can +# setup options for the CLI, and store the chosen values back in after the CLI +# arguments are parsed. Then this dictionary is referred to when building up +# the URL parameters. diffing_params = { 'diffing_drop_status': SIS_Diffing_Parameter(value='inactive', type=str, - description='''If diffing_drop_status is passed, this SIS import will use this status for enrollments that are + description='''If this script is in diffing mode then this script will pass diffing_drop_status as a URL parameter to the POST. Hence the CANVAS SIS import will use this status for enrollments that are not included in the sis_batch. Defaults to ‘deleted’. Allowed values: deleted, completed, inactive'''), 'diffing_data_set_identifier': SIS_Diffing_Parameter(value='canvas_syn_integration', type=str, - description='''If set on a CSV import, Canvas will attempt to optimize the SIS import by comparing this set of CSVs to + description='''If this scrip is running in diffing mode then diffing_data_set_identifier is used as a URL parameter in the POST to CANVAS. Hence Canvas will attempt to optimize the SIS import by comparing this set of CSVs to the previous set that has the same data set identifier, and only applying the difference between the two'''), 'diffing_user_remove_status': SIS_Diffing_Parameter(value='suspended', type=str, - description='''For users removed from one batch to the next one using the same diffing_data_set_identifier, set their + description='''If this script is running in diffing mode then this is used as a URL parameter in the POST to Canvas as a parameter. For users removed from one batch to the next one using the same diffing_data_set_identifier, set their status to the value of this argument.'''), 'change_threshold_percent': SIS_Diffing_Parameter(value=10, type=int, - description='If set with diffing, diffing will not be performed if the files are greater than the threshold as a percent.'), + description='If this script is running in diffing mode then this parameter is used as a URL parameter in the POST to Canvas as a parameter. Canvas diffing will not be performed if the files are greater than the threshold as a percent.'), } -@contextmanager +@contextlib.contextmanager def db_cursor(conn): ''' Context manager for database cursor. Allows for @@ -62,7 +69,7 @@ def db_cursor(conn): yield cur cur.close() -@contextmanager +@contextlib.contextmanager def db_connect(conn_string): ''' Context manager for database connection. Allows for @@ -73,11 +80,11 @@ def db_connect(conn_string): cnxn.close() -@dataclass +@dataclasses.dataclass class SIS_Datatype: name: str sql_view: str - result_columns : List[str] + result_columns : typing.List[str] def run(self, cur): cur.execute(self.sql_view) @@ -233,6 +240,9 @@ def main(arg_strs): ap = argparse.ArgumentParser() views_help = ' '.join([i.name for i in views]) ap.add_argument('views', nargs='*', help=f'Names of views to process in this execution. One of {views_help}') + ap.add_argument('--diffing-mode', action='store_true', help='Instead of building a zip file of all the views upload individual synergetic views as CSV files and enable diffing mode. See Canvas documentation for more details on diffing mode') + ap.add_argument('--no-upload', action='store_true', help='Do not POST to canvas, instead only pull the data from Synergetic and save the relevant csv or zip files and save them to the local directory') + ap.add_argument('--output-dir', help='If running with --no-upload use the named directory instead of the current directory') for name, param in diffing_params.items(): ap.add_argument('--'+name, type=param.type, help=param.description, default=param.value) @@ -280,14 +290,25 @@ def main(arg_strs): conn_string = f'DRIVER={{ODBC Driver 17 for SQL Server}};SERVER={SERVER};DATABASE={DATABASE};UID={USERNAME};PWD={PASSWORD}' - with db_connect(conn_string) as conn, TemporaryDirectory() as working_dir: + with db_connect(conn_string) as conn, tempfile.TemporaryDirectory() as working_dir: try: - go(conn, working_dir, base_url, header, args.views) + go(conn, working_dir, base_url, header, args.views, diffing_mode=args.diffing_mode, no_upload=args.no_upload) except SI_Exception as e: logger.exception(f"Unhandled SI exception while running: {e}") + finally: + # If the operator said to not upload, then we will copy all the generated files out of the temporary directory before they are deleted. + if args.no_upload: + if args.output_dir: + output_dir = pathlib.Path(args.output_dir) + else: + output_dir = pathlib.Path(".") + for filename in pathlib.Path(working_dir).iterdir(): + shutil.copy(filename, output_dir) + -def go(conn, working_dir, base_url, header, arg_views, zip=False): + +def go(conn, working_dir, base_url, header, arg_views, diffing_mode=False, no_upload=False): imports_started = [] for view in views: @@ -297,17 +318,20 @@ def go(conn, working_dir, base_url, header, arg_views, zip=False): continue with db_cursor(conn) as cur: - csv_filename = (Path(working_dir) / view.name).with_suffix('.csv') + csv_filename = (pathlib.Path(working_dir) / view.name).with_suffix('.csv') with open(csv_filename, 'w', newline='', encoding='utf-8') as f: writer = csv.writer(f) writer.writerows(view.run(cur)) - if not zip: + + # If in diffing mode upload these CSV files individually + if diffing_mode and not no_upload: import_id = post_data(base_url=base_url, header=header, filename=csv_filename, diffing_mode=True) logger.debug(f"SI Import for {view.name} started, import ID {import_id}") imports_started.append( (import_id, view.name) ) - if zip: + # If we are not running in diffing mode create a zip file of all the CSV files created. + if not diffing_mode: # ============= # ZIP Directory # ============= @@ -318,8 +342,9 @@ def go(conn, working_dir, base_url, header, arg_views, zip=False): # =================== # Post Data to Canvas # =================== - import_id = post_data(base_url=base_url, header=header, filename='results.zip') - imports_started.append((import_id, 'results.zip')) + if not no_upload: + import_id = post_data(base_url=base_url, header=header, filename='results.zip', diffing_mode=False) + imports_started.append((import_id, 'results.zip')) # Get the status of the imports we have started. These may not be From 82fc359d21489900f788c3994f0b4b5db87ba867 Mon Sep 17 00:00:00 2001 From: Geoff Crompton Date: Wed, 6 Mar 2024 14:01:04 +1100 Subject: [PATCH 16/23] Correct path for zip file The zip file was not being created in the temporary directory. Instead it was being created in the operators working directory. --- sis/canvas_synergetic_integration/main.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/sis/canvas_synergetic_integration/main.py b/sis/canvas_synergetic_integration/main.py index 05d9ca8..d26028c 100755 --- a/sis/canvas_synergetic_integration/main.py +++ b/sis/canvas_synergetic_integration/main.py @@ -332,18 +332,12 @@ def go(conn, working_dir, base_url, header, arg_views, diffing_mode=False, no_up # If we are not running in diffing mode create a zip file of all the CSV files created. if not diffing_mode: - # ============= - # ZIP Directory - # ============= - zipf = zipfile.ZipFile('results.zip', 'w') + zip_filename = pathlib.Path(working_dir) / 'results.zip' + zipf = zipfile.ZipFile(zip_filename, 'w') zipdir(working_dir, zipf) zipf.close() - - # =================== - # Post Data to Canvas - # =================== if not no_upload: - import_id = post_data(base_url=base_url, header=header, filename='results.zip', diffing_mode=False) + import_id = post_data(base_url=base_url, header=header, filename=zip_filename, diffing_mode=False) imports_started.append((import_id, 'results.zip')) From 1d04286926f0fae90744eddee5173fc94fbc3111 Mon Sep 17 00:00:00 2001 From: Geoff Crompton Date: Wed, 6 Mar 2024 14:23:51 +1100 Subject: [PATCH 17/23] Fix so ziping directory doesn't try to include zipfile itself. --- sis/canvas_synergetic_integration/main.py | 24 +++++++++++------------ 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/sis/canvas_synergetic_integration/main.py b/sis/canvas_synergetic_integration/main.py index d26028c..e7745d2 100755 --- a/sis/canvas_synergetic_integration/main.py +++ b/sis/canvas_synergetic_integration/main.py @@ -193,14 +193,6 @@ def __init__(self, message, status_code, text): self.text = text -def zipdir(path, ziph): - ''' - Helper function for zipping the directory - ''' - for root, dirs, files in os.walk(path): - for file in files: - ziph.write(os.path.join(root, file)) - def post_data(base_url, header, filename, diffing_mode=False): ''' Posts data to the canvas api endpoint. Returns identifier for import @@ -290,7 +282,9 @@ def main(arg_strs): conn_string = f'DRIVER={{ODBC Driver 17 for SQL Server}};SERVER={SERVER};DATABASE={DATABASE};UID={USERNAME};PWD={PASSWORD}' - with db_connect(conn_string) as conn, tempfile.TemporaryDirectory() as working_dir: + with db_connect(conn_string) as conn, tempfile.TemporaryDirectory() as working_dir_name: + working_dir = pathlib.Path(working_dir_name) + try: go(conn, working_dir, base_url, header, args.views, diffing_mode=args.diffing_mode, no_upload=args.no_upload) except SI_Exception as e: @@ -303,7 +297,7 @@ def main(arg_strs): output_dir = pathlib.Path(args.output_dir) else: output_dir = pathlib.Path(".") - for filename in pathlib.Path(working_dir).iterdir(): + for filename in working_dir.iterdir(): shutil.copy(filename, output_dir) @@ -318,7 +312,7 @@ def go(conn, working_dir, base_url, header, arg_views, diffing_mode=False, no_up continue with db_cursor(conn) as cur: - csv_filename = (pathlib.Path(working_dir) / view.name).with_suffix('.csv') + csv_filename = (working_dir / view.name).with_suffix('.csv') with open(csv_filename, 'w', newline='', encoding='utf-8') as f: writer = csv.writer(f) writer.writerows(view.run(cur)) @@ -332,9 +326,13 @@ def go(conn, working_dir, base_url, header, arg_views, diffing_mode=False, no_up # If we are not running in diffing mode create a zip file of all the CSV files created. if not diffing_mode: - zip_filename = pathlib.Path(working_dir) / 'results.zip' + zip_filename = working_dir / 'results.zip' zipf = zipfile.ZipFile(zip_filename, 'w') - zipdir(working_dir, zipf) + + # zip the directory + for obj in working_dir.rglob('*.csv'): + if obj.is_file(): + zipf.write(obj) zipf.close() if not no_upload: import_id = post_data(base_url=base_url, header=header, filename=zip_filename, diffing_mode=False) From 5021a31e8991792ef966bcc94c3098685559eb79 Mon Sep 17 00:00:00 2001 From: Geoff Crompton Date: Thu, 7 Mar 2024 09:53:40 +1100 Subject: [PATCH 18/23] Add --diffing-remaster-dataset option to sis/canvas_synergetic_integration/main.py --- sis/canvas_synergetic_integration/main.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/sis/canvas_synergetic_integration/main.py b/sis/canvas_synergetic_integration/main.py index e7745d2..bd29adf 100755 --- a/sis/canvas_synergetic_integration/main.py +++ b/sis/canvas_synergetic_integration/main.py @@ -26,9 +26,10 @@ @dataclasses.dataclass class SIS_Diffing_Parameter: - type: type value: str description: str + type: typing.Optional[type] = None + action: typing.Optional[str] = None # Various URL parameters we can use in the Canvas SI Import API. This @@ -55,6 +56,11 @@ class SIS_Diffing_Parameter: 'change_threshold_percent': SIS_Diffing_Parameter(value=10, type=int, description='If this script is running in diffing mode then this parameter is used as a URL parameter in the POST to Canvas as a parameter. Canvas diffing will not be performed if the files are greater than the threshold as a percent.'), + 'diffing-remaster-dataset': + SIS_Diffing_Parameter(value=False, action='store_true', + description='''If changes are made to SIS-managed objects outside of the normal import process, it may be necessary to process a SIS import with the same data set identifier, but apply the entire import rather than applying just the diff. To enable this mode, set the diffing_remaster_data_set=true option when creating the import, and it will be applied without diffing. The next import for the same data set will still diff against that import. + When scheduling this script to run frequently you should not set this option. + ''') } @@ -235,8 +241,18 @@ def main(arg_strs): ap.add_argument('--diffing-mode', action='store_true', help='Instead of building a zip file of all the views upload individual synergetic views as CSV files and enable diffing mode. See Canvas documentation for more details on diffing mode') ap.add_argument('--no-upload', action='store_true', help='Do not POST to canvas, instead only pull the data from Synergetic and save the relevant csv or zip files and save them to the local directory') ap.add_argument('--output-dir', help='If running with --no-upload use the named directory instead of the current directory') + + # Add all of the diffing parameters to the scripts arguments/options. for name, param in diffing_params.items(): - ap.add_argument('--'+name, type=param.type, help=param.description, default=param.value) + kwargs = { + 'help': param.description, + 'default': param.value, + } + if param.action: + kwargs['action'] = param.action + if param.type: + kwargs['type'] = param.type + ap.add_argument('--'+name, **kwargs) args = ap.parse_args(args=arg_strs) From dcbe80c106a4babbd96ded0a4f1fe0a4ff25d976 Mon Sep 17 00:00:00 2001 From: Geoff Crompton Date: Fri, 8 Mar 2024 15:02:02 +1100 Subject: [PATCH 19/23] Fix SIS_ImportError call. --- sis/canvas_synergetic_integration/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sis/canvas_synergetic_integration/main.py b/sis/canvas_synergetic_integration/main.py index bd29adf..b26032b 100755 --- a/sis/canvas_synergetic_integration/main.py +++ b/sis/canvas_synergetic_integration/main.py @@ -218,7 +218,7 @@ def post_data(base_url, header, filename, diffing_mode=False): # Rather than just log the output, I would like to inspect the r object and check the return code. If there is an error I want to log that. If there is no error... I might log the output as well, but at a INFO level? if r.status_code != 200: - raise SIS_ImportError(status_code=r.status_code, text=r.text) + raise SIS_ImportError(status_code=r.status_code, message="SIS Import post failed", text=r.text) r_json = r.json() return r_json['id'] From e89a371aaa1a5abbda7369d1c6ee5808aa8bc2f6 Mon Sep 17 00:00:00 2001 From: Geoff Crompton Date: Mon, 4 Nov 2024 16:23:35 +1100 Subject: [PATCH 20/23] Set a diffing identifier that's based on the view name. --- sis/canvas_synergetic_integration/main.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sis/canvas_synergetic_integration/main.py b/sis/canvas_synergetic_integration/main.py index b26032b..8415226 100755 --- a/sis/canvas_synergetic_integration/main.py +++ b/sis/canvas_synergetic_integration/main.py @@ -256,7 +256,6 @@ def main(arg_strs): args = ap.parse_args(args=arg_strs) - diffing_params['diffing_data_set_identifier'].value = args.diffing_data_set_identifier diffing_params['change_threshold_percent'].value = args.change_threshold_percent diffing_params['diffing_drop_status'].value = args.diffing_drop_status diffing_params['diffing_user_remove_status'].value = args.diffing_user_remove_status @@ -335,6 +334,10 @@ def go(conn, working_dir, base_url, header, arg_views, diffing_mode=False, no_up # If in diffing mode upload these CSV files individually if diffing_mode and not no_upload: + + # But first, set the identifier to be specific to this view. + diffing_params['diffing_data_set_identifier'].value = view.name + import_id = post_data(base_url=base_url, header=header, filename=csv_filename, diffing_mode=True) logger.debug(f"SI Import for {view.name} started, import ID {import_id}") imports_started.append( (import_id, view.name) ) From 3e046129810986c3ae9fc35b083af84728f172b0 Mon Sep 17 00:00:00 2001 From: Geoff Crompton Date: Mon, 6 Jan 2025 15:22:11 +1100 Subject: [PATCH 21/23] Change diffing_drop_status parameter Rachelle Ferrer suggested the diffing_drop_status parameter be changed from inactive to deleted_last_completed. She explained that this option is not technically a status, but combines the deleted and completed statuses. If there is at least one other active enrollment in the course, enrollments marked as deleted_last_completed will be deleted. If it is the last enrollment in the course, the deleted_last_completed enrollment will be set as complete. Rachelle Ferrer also recommended diffing is only done for enrolments, but this script has not yet been modified to account for that recommendation. --- sis/canvas_synergetic_integration/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sis/canvas_synergetic_integration/main.py b/sis/canvas_synergetic_integration/main.py index b26032b..4a70803 100755 --- a/sis/canvas_synergetic_integration/main.py +++ b/sis/canvas_synergetic_integration/main.py @@ -40,7 +40,7 @@ class SIS_Diffing_Parameter: # the URL parameters. diffing_params = { 'diffing_drop_status': - SIS_Diffing_Parameter(value='inactive', type=str, + SIS_Diffing_Parameter(value='deleted_last_completed', type=str, description='''If this script is in diffing mode then this script will pass diffing_drop_status as a URL parameter to the POST. Hence the CANVAS SIS import will use this status for enrollments that are not included in the sis_batch. Defaults to ‘deleted’. Allowed values: From bf2b18a945e9515ee7c2463fd99a6f661f3f9417 Mon Sep 17 00:00:00 2001 From: Geoff Crompton Date: Fri, 19 Sep 2025 16:58:27 +1000 Subject: [PATCH 22/23] Try to add more sanity checking of views user passes on command line. --- sis/canvas_synergetic_integration/main.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sis/canvas_synergetic_integration/main.py b/sis/canvas_synergetic_integration/main.py index 8175883..6c67772 100755 --- a/sis/canvas_synergetic_integration/main.py +++ b/sis/canvas_synergetic_integration/main.py @@ -295,6 +295,16 @@ def main(arg_strs): logger.critical(f"missing required environment variable {e.args[0]}") raise SystemExit() + if args.views: + incorrect_view_name = False + views_help = ' '.join([i.name for i in views]) + for view in args.views: + if view not in names_of_defined_views: + logger.warning(f"You have named view '{view}', but that is not a valid view name. Valid vewnames are: {views_help}") + incorrect_view_name = True + if incorrect_view_name: + raise SystemExit() + conn_string = f'DRIVER={{ODBC Driver 17 for SQL Server}};SERVER={SERVER};DATABASE={DATABASE};UID={USERNAME};PWD={PASSWORD}' with db_connect(conn_string) as conn, tempfile.TemporaryDirectory() as working_dir_name: From 2261cc59a0b4b4994c950798b30f55575d17c6a7 Mon Sep 17 00:00:00 2001 From: Geoff Crompton Date: Wed, 24 Sep 2025 11:07:53 +1000 Subject: [PATCH 23/23] Fix up logic of checking view names. --- sis/canvas_synergetic_integration/main.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sis/canvas_synergetic_integration/main.py b/sis/canvas_synergetic_integration/main.py index 6c67772..131603e 100755 --- a/sis/canvas_synergetic_integration/main.py +++ b/sis/canvas_synergetic_integration/main.py @@ -297,9 +297,8 @@ def main(arg_strs): if args.views: incorrect_view_name = False - views_help = ' '.join([i.name for i in views]) for view in args.views: - if view not in names_of_defined_views: + if view not in views_help: logger.warning(f"You have named view '{view}', but that is not a valid view name. Valid vewnames are: {views_help}") incorrect_view_name = True if incorrect_view_name: