berkeley-dsep-infra · shaneknapp · Aug 16, 2024 · Jul 2, 2024 · Jul 5, 2024 · Jul 5, 2024
diff --git a/notebooks/logs_visualization_dashboard.ipynb b/notebooks/logs_visualization_dashboard.ipynb
@@ -2395,7 +2395,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.6"
+   "version": "3.11.9"
   }
  },
  "nbformat": 4,

diff --git a/scripts/README.md b/scripts/README.md
@@ -0,0 +1,58 @@
+## DataHub Usage Analysis (scripts/test)
+
+## Setup Instructions
+
+1. **Change Directory to the `test` Folder inside the `scripts`**:
+```bash
+cd /datahub-usage-analysis/scripts/test
+```
+
+2. **Run this in the terminal with replace “ “ with your own credentials**:
+```bash
+export APP_ID='13eb564e'
+export APP_KEY=''
+```
+
+3. **You can run this to check if credential were correctly set**:
+```bash
+echo $APP_ID
+echo $APP_KEY
+```
+
+4. **test by running these commands from the terminal**:
+
+```bash
+python course_info_fetcher.py data100 data8 compsci189
+```
+or intereactivately 
+
+```bash
+python course_info_fetcher.py
+```
+eg.:
+Please enter the term ID: 2242
+Please enter the class names (separated by commas): data100, data8, comopsci189
+
+Expexted Output:
+
+{
+    "title": "Principles & Techniques of Data Science",
+    "display_name": "2024 Spring DATA C100 001 LEC 001",
+    "department": "Data Science Undergrad Studies",
+    "enrollment_count": 1132,
+    "instructor_PI": "'Joseph E. Gonzalez', 'Narges Norouzi'"
+}
+{
+    "title": "Foundations of Data Science",
+    "display_name": "2024 Spring DATA C8 001 LEC 001",
+    "department": "Data Science Undergrad Studies",
+    "enrollment_count": 1287,
+    "instructor_PI": "'Swupnil K Sahai', 'Muhammad R Khan'"
+}
+{
+    "title": "Introduction to Machine Learning",
+    "display_name": "2024 Spring COMPSCI 189 001 LEC 001",
+    "department": "Electrical Eng & Computer Sci",
+    "enrollment_count": 704,
+    "instructor_PI": "'Jonathan Shewchuk'"
+}
diff --git a/scripts/api.py b/scripts/api.py
@@ -0,0 +1,97 @@
+import json
+import logging
+import os
+import requests
+
+from helpers import extract_subject_area_and_number, construct_url
+
+BASE_URL = "https://gateway.api.berkeley.edu/uat/sis/v1/classes/sections?"
+
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+
+
+def extract_info_from_response(response):
+    data = response.json()['apiResponse']['response']
+    class_section = data.get('classSections', [])[0]
+
+    # Extract title, department, display name, and enrollment count
+    title = class_section.get('class', {}).get('course', {}).get('title', 'N/A')
+    display_name = class_section.get('displayName', 'N/A')
+    department = class_section.get('academicOrganization', {}).get('description', 'N/A')
+    enrollment_count = class_section.get('enrollmentStatus', {}).get('enrolledCount', 0)
+
+    # Extract primary instructors names
+    instructor_PI = ', '.join(
+        f"'{instructor['instructor']['names'][0]['formattedName']}'"
+        for meeting in class_section.get('meetings', [])
+        for instructor in meeting.get('assignedInstructors', [])
+        if instructor.get('role', {}).get('code') == 'PI'
+    )
+
+    return {
+        "title": title,
+        "display_name": display_name,
+        "department": department,
+        "enrollment_count": enrollment_count,
+        "instructor_PI": instructor_PI
+    }
+
+
+def fetch_course_info(term_id, class_name):
+    """
+    Fetch course information from the Berkeley API, trying both with and without 'C' in the catalog number if needed.
+    Args:
+        term_id (int): The term ID.
+        class_name (str): The class name.
+    Returns: dict: The extracted course information or an error message.
+    """
+    api_id = os.getenv('APP_ID')
+    api_key = os.getenv('APP_KEY')
+
+    headers = {
+        "app_id": api_id,
+        "app_key": api_key,
+    }
+
+    # Extract the subject area code and number part from the class name
+    subject_area_code, number = extract_subject_area_and_number(class_name)
+
+    # Construct the full URL for the API request
+    full_url = construct_url(term_id, subject_area_code, number, BASE_URL)
+
+    try:
+        response = requests.get(full_url, headers=headers)
+        response.raise_for_status()  # Raise an exception for HTTP errors
+
+        if response.status_code == 200:
+            return extract_info_from_response(response)
+
+    except requests.exceptions.RequestException:
+        # If the initial fetch fails, try adding 'C' to the number
+        number_with_c = 'C' + number
+        full_url_with_c = construct_url(term_id, subject_area_code, number_with_c, BASE_URL)
+
+        try:
+            response_with_c = requests.get(full_url_with_c, headers=headers)
+            response_with_c.raise_for_status()  # Raise an exception for HTTP errors
+            return extract_info_from_response(response_with_c)
+
+        except requests.exceptions.RequestException as e:
+            logging.error(f"Failed to retrieve data. Error: {str(e)}")
+            return {"error": f"Failed to retrieve data for the given class name: {class_name}"}
+
+
+def get_course_information(term_id, class_name):
+    """
+    Fetch course information from the Berkeley API.
+    Args:
+        term_id (int): The term ID.
+        class_name (str): The class name.
+    Returns: str: JSON string with extracted information or error message.
+    """ 
+    # Fetch the course information from the API
+    result = fetch_course_info(term_id, class_name)
+
+    # Return the result as a JSON string
+    return json.dumps(result, indent=4)
diff --git a/scripts/course_info_fetcher.py b/scripts/course_info_fetcher.py
@@ -0,0 +1,49 @@
+#!/usr/bin/env python3
+
+import argparse
+import logging
+import os
+
+from api import get_course_information
+
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+
+
+def main():
+    """
+    Main function to handle command-line arguments and fetch course information.
+    """
+    parser = argparse.ArgumentParser(
+        description="Fetch course information based on term ID and class name."
+    )
+
+    parser.add_argument('term_id',
+                        type=int,
+                        nargs='?',
+                        default=2232,
+                        help="The term ID (e.g., 2232).")
+
+    parser.add_argument('class_names',
+                        type=str,
+                        nargs='*',
+                        help="One or more class names (e.g., data8, compsci189).")
+
+    args = parser.parse_args()
+
+    if not os.getenv('APP_ID') or not os.getenv('APP_KEY'):
+        logging.error("The environment variables APP_ID and APP_KEY must be set.")
+        return
+
+    if not args.class_names:
+        term_id = input("Please enter the term ID: ")
+        class_names = input("Please enter the class names (separated by commas): ").split(',')
+        args.term_id = int(term_id)
+        args.class_names = [class_name.strip() for class_name in class_names]
+
+    for class_name in args.class_names:
+        result = get_course_information(args.term_id, class_name)
+        print(result)
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/example_query_courseid.txt b/scripts/example_query_courseid.txt
diff --git a/scripts/helpers.py b/scripts/helpers.py
@@ -0,0 +1,40 @@
+import re
+
+
+def has_digits(input_string):
+    """
+    Check if the input string contains any digits.
+    Args: input_string (str): The input string.
+    Returns: bool: True if the input string contains digits, False otherwise.
+    """
+    return any(char.isdigit() for char in input_string)
+
+
+def extract_subject_area_and_number(class_name):
+    """
+    Extract the subject area code and catalog number from the class name.
+    Args: class_name (str): The class name.
+    Returns: tuple: A tuple containing the subject area code and catalog number.
+    """
+    # Extract the subject area code from the class name
+    subject_area_code = re.search(r'([a-zA-Z]+)', class_name).group(1).upper()
+    # Extract the number part from the class name
+    number = ''.join(filter(str.isdigit, class_name))
+
+    return subject_area_code, number
+
+
+def construct_url(term_id, subject_area_code, catalog_number, base_url, page_number=1, page_size=100):
+    """
+    Construct URL parameters for the API request.
+    Args:
+        term_id (int): The term ID.
+        subject_area_code (str): The subject area code.
+        catalog_number (str): The catalog number.
+        base_url (str): The base URL for the API.
+        page_number (int, optional): The page number. Defaults to 1.
+        page_size (int, optional): The page size. Defaults to 100.
+    Returns: str: The constructed full URL for the API request.
+    """
+    url_params = f"term-id={term_id}&subject-area-code={subject_area_code}&catalog-number={catalog_number}&page-number={page_number}&page-size={page_size}"
+    return base_url + url_params