From 36d658f24c1c6125eb5d20bbf3617a80083bd102 Mon Sep 17 00:00:00 2001
From: Bryce Adelstein Lelbach aka wash <brycelelbach@gmail.com>
Date: Mon, 8 Dec 2025 15:52:44 -0500
Subject: [PATCH 01/15] CI: Add debugging to the notebooks using nsightful ncu
 that are failing for mysterious reasons in CI.

---
 ...40__kernel_authoring__copy__SOLUTION.ipynb | 20 +++++++++++++++++--
 ..._authoring__book_histogram__SOLUTION.ipynb | 20 +++++++++++++++++--
 2 files changed, 36 insertions(+), 4 deletions(-)

diff --git a/tutorials/accelerated-python/notebooks/kernels/solutions/40__kernel_authoring__copy__SOLUTION.ipynb b/tutorials/accelerated-python/notebooks/kernels/solutions/40__kernel_authoring__copy__SOLUTION.ipynb
index 4c16f24f..143d5cdc 100644
--- a/tutorials/accelerated-python/notebooks/kernels/solutions/40__kernel_authoring__copy__SOLUTION.ipynb
+++ b/tutorials/accelerated-python/notebooks/kernels/solutions/40__kernel_authoring__copy__SOLUTION.ipynb
@@ -163,7 +163,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/",
@@ -299,6 +299,14 @@
    "source": [
     "import nsightful\n",
     "\n",
+    "# DEBUG: Print CSV info for CI debugging\n",
+    "print(f\"DEBUG: copy_blocked_csv type: {type(copy_blocked_csv)}\")\n",
+    "print(f\"DEBUG: copy_blocked_csv length: {len(copy_blocked_csv)}\")\n",
+    "if len(copy_blocked_csv) > 0:\n",
+    "    print(f\"DEBUG: First 10 lines of copy_blocked_csv:\")\n",
+    "    for i, line in enumerate(copy_blocked_csv[:10]):\n",
+    "        print(f\"  {i}: {repr(line)}\")\n",
+    "\n",
     "nsightful.display_ncu_csv_in_notebook(copy_blocked_csv)"
    ]
   },
@@ -473,7 +481,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/",
@@ -607,6 +615,14 @@
     }
    ],
    "source": [
+    "# DEBUG: Print CSV info for CI debugging\n",
+    "print(f\"DEBUG: copy_optimized_csv type: {type(copy_optimized_csv)}\")\n",
+    "print(f\"DEBUG: copy_optimized_csv length: {len(copy_optimized_csv)}\")\n",
+    "if len(copy_optimized_csv) > 0:\n",
+    "    print(f\"DEBUG: First 10 lines of copy_optimized_csv:\")\n",
+    "    for i, line in enumerate(copy_optimized_csv[:10]):\n",
+    "        print(f\"  {i}: {repr(line)}\")\n",
+    "\n",
     "nsightful.display_ncu_csv_in_notebook(copy_optimized_csv)"
    ]
   }
diff --git a/tutorials/accelerated-python/notebooks/kernels/solutions/41__kernel_authoring__book_histogram__SOLUTION.ipynb b/tutorials/accelerated-python/notebooks/kernels/solutions/41__kernel_authoring__book_histogram__SOLUTION.ipynb
index 733d392e..838b55e8 100644
--- a/tutorials/accelerated-python/notebooks/kernels/solutions/41__kernel_authoring__book_histogram__SOLUTION.ipynb
+++ b/tutorials/accelerated-python/notebooks/kernels/solutions/41__kernel_authoring__book_histogram__SOLUTION.ipynb
@@ -281,7 +281,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
    "id": "ad12380e-253b-4410-ab34-9479411fdf81",
    "metadata": {
     "colab": {
@@ -418,6 +418,14 @@
     }
    ],
    "source": [
+    "# DEBUG: Print CSV info for CI debugging\n",
+    "print(f\"DEBUG: histogram_global_csv type: {type(histogram_global_csv)}\")\n",
+    "print(f\"DEBUG: histogram_global_csv length: {len(histogram_global_csv)}\")\n",
+    "if len(histogram_global_csv) > 0:\n",
+    "    print(f\"DEBUG: First 10 lines of histogram_global_csv:\")\n",
+    "    for i, line in enumerate(histogram_global_csv[:10]):\n",
+    "        print(f\"  {i}: {repr(line)}\")\n",
+    "\n",
     "nsightful.display_ncu_csv_in_notebook(histogram_global_csv)"
    ]
   },
@@ -635,7 +643,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": null,
    "id": "114e8ff7-b6fb-42ad-abda-f6d53479c052",
    "metadata": {
     "colab": {
@@ -772,6 +780,14 @@
     }
    ],
    "source": [
+    "# DEBUG: Print CSV info for CI debugging\n",
+    "print(f\"DEBUG: histogram_localized_csv type: {type(histogram_localized_csv)}\")\n",
+    "print(f\"DEBUG: histogram_localized_csv length: {len(histogram_localized_csv)}\")\n",
+    "if len(histogram_localized_csv) > 0:\n",
+    "    print(f\"DEBUG: First 10 lines of histogram_localized_csv:\")\n",
+    "    for i, line in enumerate(histogram_localized_csv[:10]):\n",
+    "        print(f\"  {i}: {repr(line)}\")\n",
+    "\n",
     "nsightful.display_ncu_csv_in_notebook(histogram_localized_csv)"
    ]
   },

From 8edf00e386f44268e709c9f62edd93d48ff000a8 Mon Sep 17 00:00:00 2001
From: Bryce Adelstein Lelbach aka wash <brycelelbach@gmail.com>
Date: Mon, 8 Dec 2025 16:32:09 -0500
Subject: [PATCH 02/15] CI: Print out ncu command output.

---
 ...40__kernel_authoring__copy__SOLUTION.ipynb | 32 ++++++++++++++++---
 ..._authoring__book_histogram__SOLUTION.ipynb | 32 ++++++++++++++++---
 2 files changed, 56 insertions(+), 8 deletions(-)

diff --git a/tutorials/accelerated-python/notebooks/kernels/solutions/40__kernel_authoring__copy__SOLUTION.ipynb b/tutorials/accelerated-python/notebooks/kernels/solutions/40__kernel_authoring__copy__SOLUTION.ipynb
index 143d5cdc..84a8db90 100644
--- a/tutorials/accelerated-python/notebooks/kernels/solutions/40__kernel_authoring__copy__SOLUTION.ipynb
+++ b/tutorials/accelerated-python/notebooks/kernels/solutions/40__kernel_authoring__copy__SOLUTION.ipynb
@@ -125,7 +125,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/"
@@ -146,7 +146,19 @@
     }
    ],
    "source": [
-    "!ncu -f --kernel-name regex:copy_blocked --set full -o copy_blocked python copy_blocked.py\n",
+    "# DEBUG: Capture NCU profiling output to diagnose CI failures\n",
+    "ncu_profile_output = !ncu -f --kernel-name regex:copy_blocked --set full -o copy_blocked python copy_blocked.py\n",
+    "print(\"DEBUG: NCU profiling output:\")\n",
+    "for line in ncu_profile_output:\n",
+    "    print(f\"  {line}\")\n",
+    "\n",
+    "# Check if report file was created\n",
+    "import os\n",
+    "report_exists = os.path.exists(\"copy_blocked.ncu-rep\")\n",
+    "print(f\"DEBUG: copy_blocked.ncu-rep exists: {report_exists}\")\n",
+    "if report_exists:\n",
+    "    print(f\"DEBUG: copy_blocked.ncu-rep size: {os.path.getsize('copy_blocked.ncu-rep')} bytes\")\n",
+    "\n",
     "copy_blocked_csv = !ncu --import copy_blocked.ncu-rep --csv"
    ]
   },
@@ -445,7 +457,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/"
@@ -466,7 +478,19 @@
     }
    ],
    "source": [
-    "!ncu -f --kernel-name regex:copy_optimized --set full -o copy_optimized python copy_optimized.py\n",
+    "# DEBUG: Capture NCU profiling output to diagnose CI failures\n",
+    "ncu_profile_output = !ncu -f --kernel-name regex:copy_optimized --set full -o copy_optimized python copy_optimized.py\n",
+    "print(\"DEBUG: NCU profiling output:\")\n",
+    "for line in ncu_profile_output:\n",
+    "    print(f\"  {line}\")\n",
+    "\n",
+    "# Check if report file was created\n",
+    "import os\n",
+    "report_exists = os.path.exists(\"copy_optimized.ncu-rep\")\n",
+    "print(f\"DEBUG: copy_optimized.ncu-rep exists: {report_exists}\")\n",
+    "if report_exists:\n",
+    "    print(f\"DEBUG: copy_optimized.ncu-rep size: {os.path.getsize('copy_optimized.ncu-rep')} bytes\")\n",
+    "\n",
     "copy_optimized_csv = !ncu --import copy_optimized.ncu-rep --csv"
    ]
   },
diff --git a/tutorials/accelerated-python/notebooks/kernels/solutions/41__kernel_authoring__book_histogram__SOLUTION.ipynb b/tutorials/accelerated-python/notebooks/kernels/solutions/41__kernel_authoring__book_histogram__SOLUTION.ipynb
index 838b55e8..e16a0f14 100644
--- a/tutorials/accelerated-python/notebooks/kernels/solutions/41__kernel_authoring__book_histogram__SOLUTION.ipynb
+++ b/tutorials/accelerated-python/notebooks/kernels/solutions/41__kernel_authoring__book_histogram__SOLUTION.ipynb
@@ -253,7 +253,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "id": "8dbd226c-66f2-43df-868a-6b024b1de24c",
    "metadata": {
     "colab": {
@@ -275,7 +275,19 @@
     }
    ],
    "source": [
-    "!ncu -f --kernel-name regex:histogram_global --set full -o histogram_global python histogram_global.py\n",
+    "# DEBUG: Capture NCU profiling output to diagnose CI failures\n",
+    "ncu_profile_output = !ncu -f --kernel-name regex:histogram_global --set full -o histogram_global python histogram_global.py\n",
+    "print(\"DEBUG: NCU profiling output:\")\n",
+    "for line in ncu_profile_output:\n",
+    "    print(f\"  {line}\")\n",
+    "\n",
+    "# Check if report file was created\n",
+    "import os\n",
+    "report_exists = os.path.exists(\"histogram_global.ncu-rep\")\n",
+    "print(f\"DEBUG: histogram_global.ncu-rep exists: {report_exists}\")\n",
+    "if report_exists:\n",
+    "    print(f\"DEBUG: histogram_global.ncu-rep size: {os.path.getsize('histogram_global.ncu-rep')} bytes\")\n",
+    "\n",
     "histogram_global_csv = !ncu --import histogram_global.ncu-rep --csv"
    ]
   },
@@ -615,7 +627,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": null,
    "id": "d637b6b1-fb0b-4807-b70b-c80227c0fd6f",
    "metadata": {
     "colab": {
@@ -637,7 +649,19 @@
     }
    ],
    "source": [
-    "!ncu -f --kernel-name regex:histogram_localized --set full -o histogram_localized python histogram_localized.py\n",
+    "# DEBUG: Capture NCU profiling output to diagnose CI failures\n",
+    "ncu_profile_output = !ncu -f --kernel-name regex:histogram_localized --set full -o histogram_localized python histogram_localized.py\n",
+    "print(\"DEBUG: NCU profiling output:\")\n",
+    "for line in ncu_profile_output:\n",
+    "    print(f\"  {line}\")\n",
+    "\n",
+    "# Check if report file was created\n",
+    "import os\n",
+    "report_exists = os.path.exists(\"histogram_localized.ncu-rep\")\n",
+    "print(f\"DEBUG: histogram_localized.ncu-rep exists: {report_exists}\")\n",
+    "if report_exists:\n",
+    "    print(f\"DEBUG: histogram_localized.ncu-rep size: {os.path.getsize('histogram_localized.ncu-rep')} bytes\")\n",
+    "\n",
     "histogram_localized_csv = !ncu --import histogram_localized.ncu-rep --csv"
    ]
   },

From bffb081b5ded4be335b345ff4b03a3913e1901d9 Mon Sep 17 00:00:00 2001
From: Bryce Adelstein Lelbach aka wash <brycelelbach@gmail.com>
Date: Mon, 8 Dec 2025 23:17:32 -0500
Subject: [PATCH 03/15] CI: Print out all cell outputs if a notebook fails.

---
 .../accelerated-python/test/test_notebooks.py | 30 +++++++++++++++++--
 1 file changed, 27 insertions(+), 3 deletions(-)

diff --git a/tutorials/accelerated-python/test/test_notebooks.py b/tutorials/accelerated-python/test/test_notebooks.py
index 9ef7b510..7ea9f26a 100644
--- a/tutorials/accelerated-python/test/test_notebooks.py
+++ b/tutorials/accelerated-python/test/test_notebooks.py
@@ -18,6 +18,28 @@
 notebook_ids = [nb.relative_to(NOTEBOOKS_DIR).as_posix() for nb in solution_notebooks]
 
 
+def extract_cell_outputs(nb):
+    """Extract stdout/stderr from all executed cells for debugging."""
+    outputs = []
+    for i, cell in enumerate(nb.cells):
+        if cell.cell_type != 'code':
+            continue
+        cell_outputs = []
+        for output in cell.get('outputs', []):
+            if output.get('output_type') == 'stream':
+                stream_name = output.get('name', 'stdout')
+                text = output.get('text', '')
+                cell_outputs.append(f"[{stream_name}] {text}")
+            elif output.get('output_type') == 'error':
+                ename = output.get('ename', 'Error')
+                evalue = output.get('evalue', '')
+                cell_outputs.append(f"[error] {ename}: {evalue}")
+        if cell_outputs:
+            source_preview = cell.source[:100].replace('\n', ' ')
+            outputs.append(f"--- Cell {i}: {source_preview}... ---\n" + ''.join(cell_outputs))
+    return '\n'.join(outputs)
+
+
 @pytest.mark.parametrize('notebook_path', solution_notebooks, ids=notebook_ids)
 def test_solution_notebook_executes(notebook_path):
     """
@@ -49,8 +71,10 @@ def test_solution_notebook_executes(notebook_path):
         client.execute()
     except CellExecutionError as e:
         # Provide detailed error information
-        # CellExecutionError stores the error message in str(e)
-        pytest.fail(f"Notebook execution failed:\n{str(e)}")
+        # Include output from ALL cells, not just the failing one
+        all_outputs = extract_cell_outputs(nb)
+        pytest.fail(f"Notebook execution failed:\n{str(e)}\n\n=== ALL CELL OUTPUTS ===\n{all_outputs}")
     except Exception as e:
         # Catch any other execution errors
-        pytest.fail(f"Notebook execution failed: {str(e)}")
+        all_outputs = extract_cell_outputs(nb)
+        pytest.fail(f"Notebook execution failed: {str(e)}\n\n=== ALL CELL OUTPUTS ===\n{all_outputs}")

From aaf7114506f521f651f709089d6ef914f690fd35 Mon Sep 17 00:00:00 2001
From: Bryce Adelstein Lelbach aka wash <brycelelbach@gmail.com>
Date: Mon, 8 Dec 2025 23:32:32 -0500
Subject: [PATCH 04/15] CI: Try to fix Docker caching and add caching
 diagnostics.

---
 .../workflows/build-brev-tutorial-docker-images.yml | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/build-brev-tutorial-docker-images.yml b/.github/workflows/build-brev-tutorial-docker-images.yml
index 9686b734..1d24fb07 100644
--- a/.github/workflows/build-brev-tutorial-docker-images.yml
+++ b/.github/workflows/build-brev-tutorial-docker-images.yml
@@ -181,15 +181,22 @@ jobs:
           command: |
             cd ${{ matrix.tutorial }}/brev
 
+            # Debug: Show buildx version and check for cache images
+            docker buildx version
+            echo "Checking for existing cache images..."
+            docker manifest inspect "${IMAGE_NAME}:buildcache-${DOCKER_TAG_BRANCH}" > /dev/null 2>&1 && echo "✓ Branch cache exists" || echo "✗ Branch cache not found"
+            docker manifest inspect "${IMAGE_NAME}:buildcache-main" > /dev/null 2>&1 && echo "✓ Main cache exists" || echo "✗ Main cache not found"
+
             docker buildx bake \
+              --progress=plain \
               --allow=fs.read=/home/runner \
               --set "base.output=type=registry" \
               --set "base.tags=${IMAGE_NAME}:${DOCKER_TAG_BRANCH}-latest" \
               --set "base.tags=${IMAGE_NAME}:${DOCKER_TAG_BRANCH}-git-${GIT_SHORT_SHA}" \
               $([ "${GIT_BRANCH_NAME}" = "main" ] && echo "--set base.tags=${IMAGE_NAME}:latest") \
-              --set "base.cache-from=type=registry,ref=${IMAGE_NAME}:buildcache-${DOCKER_TAG_BRANCH},oci-mediatypes=true" \
-              --set "base.cache-from=type=registry,ref=${IMAGE_NAME}:buildcache-main,oci-mediatypes=true" \
-              --set "base.cache-to=type=registry,ref=${IMAGE_NAME}:buildcache-${DOCKER_TAG_BRANCH},mode=max,oci-mediatypes=true,compression=zstd,compression-level=3" \
+              --set "base.cache-from=type=registry,ref=${IMAGE_NAME}:buildcache-${DOCKER_TAG_BRANCH}" \
+              --set "base.cache-from=type=registry,ref=${IMAGE_NAME}:buildcache-main" \
+              --set "base.cache-to=type=registry,ref=${IMAGE_NAME}:buildcache-${DOCKER_TAG_BRANCH},mode=max,image-manifest=true,compression=zstd,compression-level=3" \
               --set "base.platform=linux/amd64" \
               -f docker-compose.yml \
               base

From 23e6483316ba96b73cbed755cdb560dcc79e6218 Mon Sep 17 00:00:00 2001
From: Bryce Adelstein Lelbach aka wash <brycelelbach@gmail.com>
Date: Mon, 8 Dec 2025 23:47:48 -0500
Subject: [PATCH 05/15] CI: DCGM strikes again, it appears to be running on the
 nv-gha-runners.

---
 .../workflows/test-brev-tutorial-docker-images.yml | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/.github/workflows/test-brev-tutorial-docker-images.yml b/.github/workflows/test-brev-tutorial-docker-images.yml
index a4f0c86a..750317ff 100644
--- a/.github/workflows/test-brev-tutorial-docker-images.yml
+++ b/.github/workflows/test-brev-tutorial-docker-images.yml
@@ -63,6 +63,20 @@ jobs:
           username: ${{ github.actor }}
           password: ${{ secrets.GITHUB_TOKEN }}
 
+      - name: Stop DCGM to allow NCU profiling
+        run: |
+          # DCGM (Data Center GPU Manager) locks the GPU and prevents NCU from profiling.
+          # Stop it before running the container tests.
+          echo "Stopping DCGM services..."
+          sudo systemctl stop nvidia-dcgm || echo "nvidia-dcgm service not found or already stopped"
+          sudo systemctl stop dcgm || echo "dcgm service not found or already stopped"
+          # Also try nv-hostengine which DCGM uses
+          sudo systemctl stop nv-hostengine || echo "nv-hostengine service not found or already stopped"
+          # Kill any remaining dcgm processes
+          sudo pkill -9 nv-hostengine || echo "No nv-hostengine processes found"
+          sudo pkill -9 dcgm || echo "No dcgm processes found"
+          echo "DCGM services stopped."
+
       - name: Test Docker Compose
         id: test
         run: |

From a69ae0974bed3b43af5fee72e86a176e44148167 Mon Sep 17 00:00:00 2001
From: Bryce Adelstein Lelbach aka wash <brycelelbach@gmail.com>
Date: Tue, 9 Dec 2025 11:01:05 -0500
Subject: [PATCH 06/15] CI: Clean up CI logs by disabling docker progress
 output and add more debugging output to get to the bottom of why NCU can't
 run on the NV GHA T4 runners.

---
 .../build-brev-tutorial-docker-images.yml     |   1 +
 .github/workflows/mirror-base-images.yml      |   8 +-
 .../test-brev-tutorial-docker-images.yml      | 107 ++++++++++++++++++
 brev/test-docker-compose.bash                 |   2 +-
 4 files changed, 114 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/build-brev-tutorial-docker-images.yml b/.github/workflows/build-brev-tutorial-docker-images.yml
index 1d24fb07..ff462f80 100644
--- a/.github/workflows/build-brev-tutorial-docker-images.yml
+++ b/.github/workflows/build-brev-tutorial-docker-images.yml
@@ -174,6 +174,7 @@ jobs:
         uses: nick-fields/retry@v3
         env:
           DOCKER_BUILDKIT: 1
+          BUILDKIT_PROGRESS: plain
         with:
           timeout_minutes: 60
           max_attempts: 3
diff --git a/.github/workflows/mirror-base-images.yml b/.github/workflows/mirror-base-images.yml
index 5bde54cf..2e127b4b 100644
--- a/.github/workflows/mirror-base-images.yml
+++ b/.github/workflows/mirror-base-images.yml
@@ -59,9 +59,11 @@ jobs:
 
       - name: Pull source image from Docker Hub
         if: steps.check_image.outputs.exists != 'true' || github.event.inputs.force_pull == 'true'
+        env:
+          BUILDKIT_PROGRESS: plain
         run: |
           echo "Pulling ${{ matrix.image.source }} from Docker Hub..."
-          docker pull ${{ matrix.image.source }}
+          docker pull --quiet ${{ matrix.image.source }}
 
       - name: Tag image for GHCR
         if: steps.check_image.outputs.exists != 'true' || github.event.inputs.force_pull == 'true'
@@ -72,7 +74,7 @@ jobs:
         if: steps.check_image.outputs.exists != 'true' || github.event.inputs.force_pull == 'true'
         run: |
           echo "Pushing ${TARGET_IMAGE} to GHCR..."
-          docker push ${TARGET_IMAGE}
+          docker push --quiet ${TARGET_IMAGE}
           echo "✓ Successfully mirrored ${{ matrix.image.source }}"
 
       - name: Skipped (image exists)
@@ -84,6 +86,6 @@ jobs:
       - name: Verify mirrored image
         if: steps.check_image.outputs.exists != 'true' || github.event.inputs.force_pull == 'true'
         run: |
-          docker pull ${TARGET_IMAGE}
+          docker pull --quiet ${TARGET_IMAGE}
           docker images ${TARGET_IMAGE}
           echo "✓ Image verified successfully"
diff --git a/.github/workflows/test-brev-tutorial-docker-images.yml b/.github/workflows/test-brev-tutorial-docker-images.yml
index 750317ff..3867086c 100644
--- a/.github/workflows/test-brev-tutorial-docker-images.yml
+++ b/.github/workflows/test-brev-tutorial-docker-images.yml
@@ -25,6 +25,9 @@ jobs:
         working-directory: ${{ github.workspace }}
     permissions:
       statuses: write
+    env:
+      BUILDKIT_PROGRESS: plain
+      DOCKER_CLI_HINTS: false
     steps:
       - name: Show runner info
         run: |
@@ -77,6 +80,110 @@ jobs:
           sudo pkill -9 dcgm || echo "No dcgm processes found"
           echo "DCGM services stopped."
 
+      - name: Debug GPU and NCU configuration
+        run: |
+          echo "=== GPU Information ==="
+          nvidia-smi || echo "nvidia-smi failed"
+          echo ""
+
+          echo "=== NVIDIA Driver Version ==="
+          cat /proc/driver/nvidia/version 2>/dev/null || echo "Could not read driver version"
+          echo ""
+
+          echo "=== GPU Processes ==="
+          nvidia-smi pmon -c 1 2>/dev/null || echo "nvidia-smi pmon failed"
+          echo ""
+
+          echo "=== DCGM/NCU Blocking Processes ==="
+          ps aux | grep -E "(dcgm|nv-hostengine|ncu)" | grep -v grep || echo "No DCGM/NCU processes found"
+          echo ""
+
+          echo "=== Systemd Services (nvidia/dcgm related) ==="
+          systemctl list-units --type=service | grep -iE "(nvidia|dcgm|gpu)" || echo "No matching services"
+          echo ""
+
+          echo "=== NCU on Host ==="
+          which ncu 2>/dev/null && ncu --version 2>/dev/null || echo "NCU not found on host"
+          echo ""
+
+          echo "=== Profiling Permissions ==="
+          cat /proc/sys/kernel/perf_event_paranoid 2>/dev/null || echo "Could not read perf_event_paranoid"
+          cat /proc/sys/kernel/kptr_restrict 2>/dev/null || echo "Could not read kptr_restrict"
+          echo ""
+
+          echo "=== NVIDIA Kernel Modules ==="
+          lsmod | grep nvidia || echo "No nvidia modules loaded"
+          echo ""
+
+          echo "=== /dev/nvidia* devices ==="
+          ls -la /dev/nvidia* 2>/dev/null || echo "No /dev/nvidia* devices found"
+          echo ""
+
+          echo "=== Docker GPU Access Test ==="
+          docker run --rm --gpus all nvidia/cuda:12.8.0-base-ubuntu24.04 nvidia-smi 2>&1 || echo "Docker GPU test failed"
+          echo ""
+
+      - name: Test NCU inside container
+        run: |
+          echo "=== Testing NCU profiling inside container ==="
+          # Pull the tutorial image and test NCU directly
+          COMPOSE_FILE="artifacts/commit-specific/${{ inputs.tutorial }}/brev/docker-compose.yml"
+
+          # Extract the image name from the compose file
+          IMAGE=$(grep -A5 "base:" "$COMPOSE_FILE" | grep "image:" | head -1 | awk '{print $2}')
+          echo "Testing with image: $IMAGE"
+
+          # Create a simple test script
+          cat > /tmp/test_ncu.py << 'EOF'
+          from numba import cuda
+          import numpy as np
+
+          @cuda.jit
+          def simple_add(a, b, c):
+              i = cuda.grid(1)
+              if i < a.size:
+                  c[i] = a[i] + b[i]
+
+          n = 1024
+          a = np.ones(n, dtype=np.float32)
+          b = np.ones(n, dtype=np.float32)
+          c = np.zeros(n, dtype=np.float32)
+
+          d_a = cuda.to_device(a)
+          d_b = cuda.to_device(b)
+          d_c = cuda.to_device(c)
+
+          simple_add[4, 256](d_a, d_b, d_c)
+          cuda.synchronize()
+          print("Kernel executed successfully")
+          EOF
+
+          # Test NCU inside the container
+          echo "--- Running NCU profiling test ---"
+          docker run --rm --gpus all \
+            -v /tmp/test_ncu.py:/test_ncu.py:ro \
+            "$IMAGE" \
+            bash -c '
+              echo "NCU version:"
+              ncu --version || echo "NCU not found"
+              echo ""
+              echo "Running simple kernel without profiling:"
+              python /test_ncu.py
+              echo ""
+              echo "Running NCU profiling:"
+              ncu --set full -o /tmp/test_profile python /test_ncu.py 2>&1
+              NCU_EXIT=$?
+              echo "NCU exit code: $NCU_EXIT"
+              echo ""
+              echo "Checking if profile was created:"
+              ls -la /tmp/test_profile.ncu-rep 2>&1 || echo "Profile file not created"
+              echo ""
+              if [ -f /tmp/test_profile.ncu-rep ]; then
+                echo "Profile file exists, testing CSV export:"
+                ncu --import /tmp/test_profile.ncu-rep --csv 2>&1 | head -20
+              fi
+            ' 2>&1 || echo "Container NCU test failed"
+
       - name: Test Docker Compose
         id: test
         run: |
diff --git a/brev/test-docker-compose.bash b/brev/test-docker-compose.bash
index 16417f1a..3a7fd34d 100755
--- a/brev/test-docker-compose.bash
+++ b/brev/test-docker-compose.bash
@@ -115,7 +115,7 @@ export ACH_RUN_TESTS=1
 # Start container
 echo "📦 Starting containers..."
 echo ""
-if docker compose -f "${COMPOSE_FILE}" up -d; then
+if docker compose -f "${COMPOSE_FILE}" up -d --quiet-pull; then
     echo ""
     echo -e "${GREEN}✅ Containers started successfully${NC}"
     echo ""

From 10d56056276d7111da6313ec3934cdbb17d7b100 Mon Sep 17 00:00:00 2001
From: Bryce Adelstein Lelbach aka wash <brycelelbach@gmail.com>
Date: Tue, 9 Dec 2025 11:17:06 -0500
Subject: [PATCH 07/15] CI: Use the buildkitd caching config setup on NVIDIA
 runners.

---
 .github/workflows/build-brev-tutorial-docker-images.yml | 8 +++++---
 .github/workflows/mirror-base-images.yml                | 2 +-
 .github/workflows/test-brev-tutorial-docker-images.yml  | 2 +-
 3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/build-brev-tutorial-docker-images.yml b/.github/workflows/build-brev-tutorial-docker-images.yml
index ff462f80..441688c1 100644
--- a/.github/workflows/build-brev-tutorial-docker-images.yml
+++ b/.github/workflows/build-brev-tutorial-docker-images.yml
@@ -162,6 +162,8 @@ jobs:
 
       - name: Set up Docker Buildx
         uses: docker/setup-buildx-action@v3
+        with:
+          buildkitd-config: /etc/buildkit/buildkitd.toml
 
       - name: Log in to GitHub Container Registry
         uses: docker/login-action@v3
@@ -174,7 +176,7 @@ jobs:
         uses: nick-fields/retry@v3
         env:
           DOCKER_BUILDKIT: 1
-          BUILDKIT_PROGRESS: plain
+          BUILDKIT_PROGRESS: quiet
         with:
           timeout_minutes: 60
           max_attempts: 3
@@ -182,14 +184,14 @@ jobs:
           command: |
             cd ${{ matrix.tutorial }}/brev
 
-            # Debug: Show buildx version and check for cache images
+            # Show buildx version and check for cache images
             docker buildx version
             echo "Checking for existing cache images..."
             docker manifest inspect "${IMAGE_NAME}:buildcache-${DOCKER_TAG_BRANCH}" > /dev/null 2>&1 && echo "✓ Branch cache exists" || echo "✗ Branch cache not found"
             docker manifest inspect "${IMAGE_NAME}:buildcache-main" > /dev/null 2>&1 && echo "✓ Main cache exists" || echo "✗ Main cache not found"
 
             docker buildx bake \
-              --progress=plain \
+              --progress=quiet \
               --allow=fs.read=/home/runner \
               --set "base.output=type=registry" \
               --set "base.tags=${IMAGE_NAME}:${DOCKER_TAG_BRANCH}-latest" \
diff --git a/.github/workflows/mirror-base-images.yml b/.github/workflows/mirror-base-images.yml
index 2e127b4b..ec3ce8d7 100644
--- a/.github/workflows/mirror-base-images.yml
+++ b/.github/workflows/mirror-base-images.yml
@@ -60,7 +60,7 @@ jobs:
       - name: Pull source image from Docker Hub
         if: steps.check_image.outputs.exists != 'true' || github.event.inputs.force_pull == 'true'
         env:
-          BUILDKIT_PROGRESS: plain
+          BUILDKIT_PROGRESS: quiet
         run: |
           echo "Pulling ${{ matrix.image.source }} from Docker Hub..."
           docker pull --quiet ${{ matrix.image.source }}
diff --git a/.github/workflows/test-brev-tutorial-docker-images.yml b/.github/workflows/test-brev-tutorial-docker-images.yml
index 3867086c..01685492 100644
--- a/.github/workflows/test-brev-tutorial-docker-images.yml
+++ b/.github/workflows/test-brev-tutorial-docker-images.yml
@@ -26,7 +26,7 @@ jobs:
     permissions:
       statuses: write
     env:
-      BUILDKIT_PROGRESS: plain
+      BUILDKIT_PROGRESS: quiet
       DOCKER_CLI_HINTS: false
     steps:
       - name: Show runner info

From b3a8c1eea540e17f01f7a5bc28d34d0565118ee3 Mon Sep 17 00:00:00 2001
From: Bryce Adelstein Lelbach aka wash <brycelelbach@gmail.com>
Date: Tue, 9 Dec 2025 11:20:26 -0500
Subject: [PATCH 08/15] CI: Fix an issue in the NCU test image extraction.

---
 .github/workflows/test-brev-tutorial-docker-images.yml | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/test-brev-tutorial-docker-images.yml b/.github/workflows/test-brev-tutorial-docker-images.yml
index 01685492..06955878 100644
--- a/.github/workflows/test-brev-tutorial-docker-images.yml
+++ b/.github/workflows/test-brev-tutorial-docker-images.yml
@@ -130,7 +130,13 @@ jobs:
           COMPOSE_FILE="artifacts/commit-specific/${{ inputs.tutorial }}/brev/docker-compose.yml"
 
           # Extract the image name from the compose file
-          IMAGE=$(grep -A5 "base:" "$COMPOSE_FILE" | grep "image:" | head -1 | awk '{print $2}')
+          # The image is defined with a YAML anchor like: image: &image ghcr.io/...
+          # We need to extract the actual URL, not the anchor reference
+          IMAGE=$(grep "image: &image" "$COMPOSE_FILE" | sed 's/.*image: &image //')
+          if [ -z "$IMAGE" ]; then
+            # Fallback: try to find any ghcr.io image reference
+            IMAGE=$(grep -o 'ghcr.io/[^"]*' "$COMPOSE_FILE" | head -1)
+          fi
           echo "Testing with image: $IMAGE"
 
           # Create a simple test script

From 31d5a43c9d82a82ea263e9660bb51135c27ec388 Mon Sep 17 00:00:00 2001
From: Bryce Adelstein Lelbach aka wash <brycelelbach@gmail.com>
Date: Tue, 9 Dec 2025 11:37:47 -0500
Subject: [PATCH 09/15] CI: Go back to plain Docker progress because quiet
 shows nothing.

---
 .github/workflows/build-brev-tutorial-docker-images.yml | 4 ++--
 .github/workflows/test-brev-tutorial-docker-images.yml  | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/build-brev-tutorial-docker-images.yml b/.github/workflows/build-brev-tutorial-docker-images.yml
index 441688c1..274cc059 100644
--- a/.github/workflows/build-brev-tutorial-docker-images.yml
+++ b/.github/workflows/build-brev-tutorial-docker-images.yml
@@ -176,7 +176,7 @@ jobs:
         uses: nick-fields/retry@v3
         env:
           DOCKER_BUILDKIT: 1
-          BUILDKIT_PROGRESS: quiet
+          BUILDKIT_PROGRESS: plain
         with:
           timeout_minutes: 60
           max_attempts: 3
@@ -191,7 +191,7 @@ jobs:
             docker manifest inspect "${IMAGE_NAME}:buildcache-main" > /dev/null 2>&1 && echo "✓ Main cache exists" || echo "✗ Main cache not found"
 
             docker buildx bake \
-              --progress=quiet \
+              --progress=plain \
               --allow=fs.read=/home/runner \
               --set "base.output=type=registry" \
               --set "base.tags=${IMAGE_NAME}:${DOCKER_TAG_BRANCH}-latest" \
diff --git a/.github/workflows/test-brev-tutorial-docker-images.yml b/.github/workflows/test-brev-tutorial-docker-images.yml
index 06955878..406f1a73 100644
--- a/.github/workflows/test-brev-tutorial-docker-images.yml
+++ b/.github/workflows/test-brev-tutorial-docker-images.yml
@@ -26,7 +26,7 @@ jobs:
     permissions:
       statuses: write
     env:
-      BUILDKIT_PROGRESS: quiet
+      BUILDKIT_PROGRESS: plain
       DOCKER_CLI_HINTS: false
     steps:
       - name: Show runner info

From cd21d3d194d0d7f93b6385c36f7f7f54e54195e4 Mon Sep 17 00:00:00 2001
From: Bryce Adelstein Lelbach aka wash <brycelelbach@gmail.com>
Date: Tue, 9 Dec 2025 11:49:53 -0500
Subject: [PATCH 10/15] CI: Properly kill DCGM and fix permissions on NV GHA
 runners.

---
 .../test-brev-tutorial-docker-images.yml       | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/test-brev-tutorial-docker-images.yml b/.github/workflows/test-brev-tutorial-docker-images.yml
index 406f1a73..8aabeda0 100644
--- a/.github/workflows/test-brev-tutorial-docker-images.yml
+++ b/.github/workflows/test-brev-tutorial-docker-images.yml
@@ -71,14 +71,28 @@ jobs:
           # DCGM (Data Center GPU Manager) locks the GPU and prevents NCU from profiling.
           # Stop it before running the container tests.
           echo "Stopping DCGM services..."
+
+          # Stop the dcgm-exporter Docker container (this is how it runs on GitHub runners)
+          echo "Stopping dcgm-exporter Docker container..."
+          docker stop dcgm-exporter 2>/dev/null && echo "Stopped dcgm-exporter container" || echo "dcgm-exporter container not running"
+          docker rm -f dcgm-exporter 2>/dev/null || echo "dcgm-exporter container already removed"
+
+          # Stop systemd services
+          sudo systemctl stop dcgm-exporter || echo "dcgm-exporter service not found or already stopped"
           sudo systemctl stop nvidia-dcgm || echo "nvidia-dcgm service not found or already stopped"
           sudo systemctl stop dcgm || echo "dcgm service not found or already stopped"
-          # Also try nv-hostengine which DCGM uses
           sudo systemctl stop nv-hostengine || echo "nv-hostengine service not found or already stopped"
+
           # Kill any remaining dcgm processes
           sudo pkill -9 nv-hostengine || echo "No nv-hostengine processes found"
           sudo pkill -9 dcgm || echo "No dcgm processes found"
-          echo "DCGM services stopped."
+
+          # Relax profiling permissions (perf_event_paranoid=4 is very restrictive)
+          echo "Relaxing profiling permissions..."
+          sudo sysctl -w kernel.perf_event_paranoid=2 || echo "Could not set perf_event_paranoid"
+          sudo sysctl -w kernel.kptr_restrict=0 || echo "Could not set kptr_restrict"
+
+          echo "DCGM services stopped and profiling permissions relaxed."
 
       - name: Debug GPU and NCU configuration
         run: |

From c91bcf2ceaf4c64ed21aa761138e27bb7b7c8cd4 Mon Sep 17 00:00:00 2001
From: Bryce Adelstein Lelbach aka wash <brycelelbach@gmail.com>
Date: Tue, 9 Dec 2025 12:27:32 -0500
Subject: [PATCH 11/15] CI: Disable NCU sanity test that is improperly
 implemented.

---
 .../test-brev-tutorial-docker-images.yml      | 77 +------------------
 1 file changed, 1 insertion(+), 76 deletions(-)

diff --git a/.github/workflows/test-brev-tutorial-docker-images.yml b/.github/workflows/test-brev-tutorial-docker-images.yml
index 8aabeda0..8da78a86 100644
--- a/.github/workflows/test-brev-tutorial-docker-images.yml
+++ b/.github/workflows/test-brev-tutorial-docker-images.yml
@@ -72,7 +72,7 @@ jobs:
           # Stop it before running the container tests.
           echo "Stopping DCGM services..."
 
-          # Stop the dcgm-exporter Docker container (this is how it runs on GitHub runners)
+          # Stop the dcgm-exporter Docker container
           echo "Stopping dcgm-exporter Docker container..."
           docker stop dcgm-exporter 2>/dev/null && echo "Stopped dcgm-exporter container" || echo "dcgm-exporter container not running"
           docker rm -f dcgm-exporter 2>/dev/null || echo "dcgm-exporter container already removed"
@@ -116,10 +116,6 @@ jobs:
           systemctl list-units --type=service | grep -iE "(nvidia|dcgm|gpu)" || echo "No matching services"
           echo ""
 
-          echo "=== NCU on Host ==="
-          which ncu 2>/dev/null && ncu --version 2>/dev/null || echo "NCU not found on host"
-          echo ""
-
           echo "=== Profiling Permissions ==="
           cat /proc/sys/kernel/perf_event_paranoid 2>/dev/null || echo "Could not read perf_event_paranoid"
           cat /proc/sys/kernel/kptr_restrict 2>/dev/null || echo "Could not read kptr_restrict"
@@ -133,77 +129,6 @@ jobs:
           ls -la /dev/nvidia* 2>/dev/null || echo "No /dev/nvidia* devices found"
           echo ""
 
-          echo "=== Docker GPU Access Test ==="
-          docker run --rm --gpus all nvidia/cuda:12.8.0-base-ubuntu24.04 nvidia-smi 2>&1 || echo "Docker GPU test failed"
-          echo ""
-
-      - name: Test NCU inside container
-        run: |
-          echo "=== Testing NCU profiling inside container ==="
-          # Pull the tutorial image and test NCU directly
-          COMPOSE_FILE="artifacts/commit-specific/${{ inputs.tutorial }}/brev/docker-compose.yml"
-
-          # Extract the image name from the compose file
-          # The image is defined with a YAML anchor like: image: &image ghcr.io/...
-          # We need to extract the actual URL, not the anchor reference
-          IMAGE=$(grep "image: &image" "$COMPOSE_FILE" | sed 's/.*image: &image //')
-          if [ -z "$IMAGE" ]; then
-            # Fallback: try to find any ghcr.io image reference
-            IMAGE=$(grep -o 'ghcr.io/[^"]*' "$COMPOSE_FILE" | head -1)
-          fi
-          echo "Testing with image: $IMAGE"
-
-          # Create a simple test script
-          cat > /tmp/test_ncu.py << 'EOF'
-          from numba import cuda
-          import numpy as np
-
-          @cuda.jit
-          def simple_add(a, b, c):
-              i = cuda.grid(1)
-              if i < a.size:
-                  c[i] = a[i] + b[i]
-
-          n = 1024
-          a = np.ones(n, dtype=np.float32)
-          b = np.ones(n, dtype=np.float32)
-          c = np.zeros(n, dtype=np.float32)
-
-          d_a = cuda.to_device(a)
-          d_b = cuda.to_device(b)
-          d_c = cuda.to_device(c)
-
-          simple_add[4, 256](d_a, d_b, d_c)
-          cuda.synchronize()
-          print("Kernel executed successfully")
-          EOF
-
-          # Test NCU inside the container
-          echo "--- Running NCU profiling test ---"
-          docker run --rm --gpus all \
-            -v /tmp/test_ncu.py:/test_ncu.py:ro \
-            "$IMAGE" \
-            bash -c '
-              echo "NCU version:"
-              ncu --version || echo "NCU not found"
-              echo ""
-              echo "Running simple kernel without profiling:"
-              python /test_ncu.py
-              echo ""
-              echo "Running NCU profiling:"
-              ncu --set full -o /tmp/test_profile python /test_ncu.py 2>&1
-              NCU_EXIT=$?
-              echo "NCU exit code: $NCU_EXIT"
-              echo ""
-              echo "Checking if profile was created:"
-              ls -la /tmp/test_profile.ncu-rep 2>&1 || echo "Profile file not created"
-              echo ""
-              if [ -f /tmp/test_profile.ncu-rep ]; then
-                echo "Profile file exists, testing CSV export:"
-                ncu --import /tmp/test_profile.ncu-rep --csv 2>&1 | head -20
-              fi
-            ' 2>&1 || echo "Container NCU test failed"
-
       - name: Test Docker Compose
         id: test
         run: |

From b19a0b92e3bbdb13cd0842c1a40173b972c80481 Mon Sep 17 00:00:00 2001
From: Bryce Adelstein Lelbach aka wash <brycelelbach@gmail.com>
Date: Tue, 9 Dec 2025 13:51:02 -0500
Subject: [PATCH 12/15] CI: Remove debugging output from notebooks.

---
 ...40__kernel_authoring__copy__SOLUTION.ipynb | 52 +++----------------
 ..._authoring__book_histogram__SOLUTION.ipynb | 52 +++----------------
 2 files changed, 12 insertions(+), 92 deletions(-)

diff --git a/tutorials/accelerated-python/notebooks/kernels/solutions/40__kernel_authoring__copy__SOLUTION.ipynb b/tutorials/accelerated-python/notebooks/kernels/solutions/40__kernel_authoring__copy__SOLUTION.ipynb
index 84a8db90..4c16f24f 100644
--- a/tutorials/accelerated-python/notebooks/kernels/solutions/40__kernel_authoring__copy__SOLUTION.ipynb
+++ b/tutorials/accelerated-python/notebooks/kernels/solutions/40__kernel_authoring__copy__SOLUTION.ipynb
@@ -125,7 +125,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/"
@@ -146,19 +146,7 @@
     }
    ],
    "source": [
-    "# DEBUG: Capture NCU profiling output to diagnose CI failures\n",
-    "ncu_profile_output = !ncu -f --kernel-name regex:copy_blocked --set full -o copy_blocked python copy_blocked.py\n",
-    "print(\"DEBUG: NCU profiling output:\")\n",
-    "for line in ncu_profile_output:\n",
-    "    print(f\"  {line}\")\n",
-    "\n",
-    "# Check if report file was created\n",
-    "import os\n",
-    "report_exists = os.path.exists(\"copy_blocked.ncu-rep\")\n",
-    "print(f\"DEBUG: copy_blocked.ncu-rep exists: {report_exists}\")\n",
-    "if report_exists:\n",
-    "    print(f\"DEBUG: copy_blocked.ncu-rep size: {os.path.getsize('copy_blocked.ncu-rep')} bytes\")\n",
-    "\n",
+    "!ncu -f --kernel-name regex:copy_blocked --set full -o copy_blocked python copy_blocked.py\n",
     "copy_blocked_csv = !ncu --import copy_blocked.ncu-rep --csv"
    ]
   },
@@ -175,7 +163,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/",
@@ -311,14 +299,6 @@
    "source": [
     "import nsightful\n",
     "\n",
-    "# DEBUG: Print CSV info for CI debugging\n",
-    "print(f\"DEBUG: copy_blocked_csv type: {type(copy_blocked_csv)}\")\n",
-    "print(f\"DEBUG: copy_blocked_csv length: {len(copy_blocked_csv)}\")\n",
-    "if len(copy_blocked_csv) > 0:\n",
-    "    print(f\"DEBUG: First 10 lines of copy_blocked_csv:\")\n",
-    "    for i, line in enumerate(copy_blocked_csv[:10]):\n",
-    "        print(f\"  {i}: {repr(line)}\")\n",
-    "\n",
     "nsightful.display_ncu_csv_in_notebook(copy_blocked_csv)"
    ]
   },
@@ -457,7 +437,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/"
@@ -478,19 +458,7 @@
     }
    ],
    "source": [
-    "# DEBUG: Capture NCU profiling output to diagnose CI failures\n",
-    "ncu_profile_output = !ncu -f --kernel-name regex:copy_optimized --set full -o copy_optimized python copy_optimized.py\n",
-    "print(\"DEBUG: NCU profiling output:\")\n",
-    "for line in ncu_profile_output:\n",
-    "    print(f\"  {line}\")\n",
-    "\n",
-    "# Check if report file was created\n",
-    "import os\n",
-    "report_exists = os.path.exists(\"copy_optimized.ncu-rep\")\n",
-    "print(f\"DEBUG: copy_optimized.ncu-rep exists: {report_exists}\")\n",
-    "if report_exists:\n",
-    "    print(f\"DEBUG: copy_optimized.ncu-rep size: {os.path.getsize('copy_optimized.ncu-rep')} bytes\")\n",
-    "\n",
+    "!ncu -f --kernel-name regex:copy_optimized --set full -o copy_optimized python copy_optimized.py\n",
     "copy_optimized_csv = !ncu --import copy_optimized.ncu-rep --csv"
    ]
   },
@@ -505,7 +473,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 8,
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/",
@@ -639,14 +607,6 @@
     }
    ],
    "source": [
-    "# DEBUG: Print CSV info for CI debugging\n",
-    "print(f\"DEBUG: copy_optimized_csv type: {type(copy_optimized_csv)}\")\n",
-    "print(f\"DEBUG: copy_optimized_csv length: {len(copy_optimized_csv)}\")\n",
-    "if len(copy_optimized_csv) > 0:\n",
-    "    print(f\"DEBUG: First 10 lines of copy_optimized_csv:\")\n",
-    "    for i, line in enumerate(copy_optimized_csv[:10]):\n",
-    "        print(f\"  {i}: {repr(line)}\")\n",
-    "\n",
     "nsightful.display_ncu_csv_in_notebook(copy_optimized_csv)"
    ]
   }
diff --git a/tutorials/accelerated-python/notebooks/kernels/solutions/41__kernel_authoring__book_histogram__SOLUTION.ipynb b/tutorials/accelerated-python/notebooks/kernels/solutions/41__kernel_authoring__book_histogram__SOLUTION.ipynb
index e16a0f14..733d392e 100644
--- a/tutorials/accelerated-python/notebooks/kernels/solutions/41__kernel_authoring__book_histogram__SOLUTION.ipynb
+++ b/tutorials/accelerated-python/notebooks/kernels/solutions/41__kernel_authoring__book_histogram__SOLUTION.ipynb
@@ -253,7 +253,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
    "id": "8dbd226c-66f2-43df-868a-6b024b1de24c",
    "metadata": {
     "colab": {
@@ -275,25 +275,13 @@
     }
    ],
    "source": [
-    "# DEBUG: Capture NCU profiling output to diagnose CI failures\n",
-    "ncu_profile_output = !ncu -f --kernel-name regex:histogram_global --set full -o histogram_global python histogram_global.py\n",
-    "print(\"DEBUG: NCU profiling output:\")\n",
-    "for line in ncu_profile_output:\n",
-    "    print(f\"  {line}\")\n",
-    "\n",
-    "# Check if report file was created\n",
-    "import os\n",
-    "report_exists = os.path.exists(\"histogram_global.ncu-rep\")\n",
-    "print(f\"DEBUG: histogram_global.ncu-rep exists: {report_exists}\")\n",
-    "if report_exists:\n",
-    "    print(f\"DEBUG: histogram_global.ncu-rep size: {os.path.getsize('histogram_global.ncu-rep')} bytes\")\n",
-    "\n",
+    "!ncu -f --kernel-name regex:histogram_global --set full -o histogram_global python histogram_global.py\n",
     "histogram_global_csv = !ncu --import histogram_global.ncu-rep --csv"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 8,
    "id": "ad12380e-253b-4410-ab34-9479411fdf81",
    "metadata": {
     "colab": {
@@ -430,14 +418,6 @@
     }
    ],
    "source": [
-    "# DEBUG: Print CSV info for CI debugging\n",
-    "print(f\"DEBUG: histogram_global_csv type: {type(histogram_global_csv)}\")\n",
-    "print(f\"DEBUG: histogram_global_csv length: {len(histogram_global_csv)}\")\n",
-    "if len(histogram_global_csv) > 0:\n",
-    "    print(f\"DEBUG: First 10 lines of histogram_global_csv:\")\n",
-    "    for i, line in enumerate(histogram_global_csv[:10]):\n",
-    "        print(f\"  {i}: {repr(line)}\")\n",
-    "\n",
     "nsightful.display_ncu_csv_in_notebook(histogram_global_csv)"
    ]
   },
@@ -627,7 +607,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 12,
    "id": "d637b6b1-fb0b-4807-b70b-c80227c0fd6f",
    "metadata": {
     "colab": {
@@ -649,25 +629,13 @@
     }
    ],
    "source": [
-    "# DEBUG: Capture NCU profiling output to diagnose CI failures\n",
-    "ncu_profile_output = !ncu -f --kernel-name regex:histogram_localized --set full -o histogram_localized python histogram_localized.py\n",
-    "print(\"DEBUG: NCU profiling output:\")\n",
-    "for line in ncu_profile_output:\n",
-    "    print(f\"  {line}\")\n",
-    "\n",
-    "# Check if report file was created\n",
-    "import os\n",
-    "report_exists = os.path.exists(\"histogram_localized.ncu-rep\")\n",
-    "print(f\"DEBUG: histogram_localized.ncu-rep exists: {report_exists}\")\n",
-    "if report_exists:\n",
-    "    print(f\"DEBUG: histogram_localized.ncu-rep size: {os.path.getsize('histogram_localized.ncu-rep')} bytes\")\n",
-    "\n",
+    "!ncu -f --kernel-name regex:histogram_localized --set full -o histogram_localized python histogram_localized.py\n",
     "histogram_localized_csv = !ncu --import histogram_localized.ncu-rep --csv"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 13,
    "id": "114e8ff7-b6fb-42ad-abda-f6d53479c052",
    "metadata": {
     "colab": {
@@ -804,14 +772,6 @@
     }
    ],
    "source": [
-    "# DEBUG: Print CSV info for CI debugging\n",
-    "print(f\"DEBUG: histogram_localized_csv type: {type(histogram_localized_csv)}\")\n",
-    "print(f\"DEBUG: histogram_localized_csv length: {len(histogram_localized_csv)}\")\n",
-    "if len(histogram_localized_csv) > 0:\n",
-    "    print(f\"DEBUG: First 10 lines of histogram_localized_csv:\")\n",
-    "    for i, line in enumerate(histogram_localized_csv[:10]):\n",
-    "        print(f\"  {i}: {repr(line)}\")\n",
-    "\n",
     "nsightful.display_ncu_csv_in_notebook(histogram_localized_csv)"
    ]
   },

From 5b280499e3295563568b8ea20405ceb37e6d5761 Mon Sep 17 00:00:00 2001
From: Bryce Adelstein Lelbach aka wash <brycelelbach@gmail.com>
Date: Tue, 9 Dec 2025 15:17:43 -0500
Subject: [PATCH 13/15] CI: Switch to L4 runners to see if that helps with
 timeouts and flakiness.

---
 .github/workflows/test-brev-tutorial-docker-images.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/test-brev-tutorial-docker-images.yml b/.github/workflows/test-brev-tutorial-docker-images.yml
index 8da78a86..53611d93 100644
--- a/.github/workflows/test-brev-tutorial-docker-images.yml
+++ b/.github/workflows/test-brev-tutorial-docker-images.yml
@@ -19,7 +19,7 @@ on:
 jobs:
   test-tutorial:
     name: test-tutorial (${{ inputs.tutorial }})
-    runs-on: linux-amd64-gpu-t4-latest-1
+    runs-on: linux-amd64-gpu-l4-latest-1
     defaults:
       run:
         working-directory: ${{ github.workspace }}

From dacfed4b6752d70a31f80d7010230d03d4094df6 Mon Sep 17 00:00:00 2001
From: Bryce Adelstein Lelbach aka wash <brycelelbach@gmail.com>
Date: Tue, 9 Dec 2025 19:19:10 -0500
Subject: [PATCH 14/15] CI: Pre-pull docker images on test node before running
 `docker compose` to execute the tests.

---
 .../test-brev-tutorial-docker-images.yml      | 25 +++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/.github/workflows/test-brev-tutorial-docker-images.yml b/.github/workflows/test-brev-tutorial-docker-images.yml
index 53611d93..72237b97 100644
--- a/.github/workflows/test-brev-tutorial-docker-images.yml
+++ b/.github/workflows/test-brev-tutorial-docker-images.yml
@@ -129,6 +129,31 @@ jobs:
           ls -la /dev/nvidia* 2>/dev/null || echo "No /dev/nvidia* devices found"
           echo ""
 
+      - name: Pre-pull Docker images
+        run: |
+          COMPOSE_FILE="artifacts/commit-specific/${{ inputs.tutorial }}/brev/docker-compose.yml"
+
+          # Extract all unique images from the compose file
+          # The main image is defined with a YAML anchor like: image: &image ghcr.io/...
+          MAIN_IMAGE=$(grep "image: &image" "$COMPOSE_FILE" | sed 's/.*image: &image //')
+          if [ -z "$MAIN_IMAGE" ]; then
+            # Fallback: try to find any ghcr.io image reference
+            MAIN_IMAGE=$(grep -o 'ghcr.io/[^"]*' "$COMPOSE_FILE" | head -1)
+          fi
+
+          # Extract the nsight image (nvcr.io)
+          NSIGHT_IMAGE=$(grep -o 'nvcr.io/[^"]*' "$COMPOSE_FILE" | head -1)
+
+          echo "Pre-pulling main image: $MAIN_IMAGE"
+          docker pull "$MAIN_IMAGE"
+
+          if [ -n "$NSIGHT_IMAGE" ]; then
+            echo "Pre-pulling nsight image: $NSIGHT_IMAGE"
+            docker pull "$NSIGHT_IMAGE"
+          fi
+
+          echo "All images pulled successfully"
+
       - name: Test Docker Compose
         id: test
         run: |

From e58cc618f85aa861667b19e9fc23ad63c43b96bb Mon Sep 17 00:00:00 2001
From: Bryce Adelstein Lelbach aka wash <brycelelbach@gmail.com>
Date: Tue, 9 Dec 2025 19:20:38 -0500
Subject: [PATCH 15/15] Tutorials/Accelerated Python: During testing, output
 timing for each cell, perform a GPU status check, and never execute
 .ipynb_checkpoints notebooks.

---
 .../accelerated-python/brev/requirements.txt  |  2 +-
 tutorials/accelerated-python/test/pytest.ini  |  2 +-
 .../accelerated-python/test/test_notebooks.py | 77 ++++++++++++++++---
 3 files changed, 69 insertions(+), 12 deletions(-)

diff --git a/tutorials/accelerated-python/brev/requirements.txt b/tutorials/accelerated-python/brev/requirements.txt
index 894481dc..e5d12e35 100644
--- a/tutorials/accelerated-python/brev/requirements.txt
+++ b/tutorials/accelerated-python/brev/requirements.txt
@@ -23,7 +23,7 @@ nvidia-nvshmem-cu12 == 3.3.20
 nvidia-cuda-nvcc-cu12 == 12.8.*
 nvidia-cuda-nvrtc-cu12 == 12.8.*
 
-# NVIDIA devtools
+# NVIDIA developer tools
 nvtx
 nsightful[notebook] @ git+https://github.com/brycelelbach/nsightful.git
 
diff --git a/tutorials/accelerated-python/test/pytest.ini b/tutorials/accelerated-python/test/pytest.ini
index aa23ff1f..1dd0dacc 100644
--- a/tutorials/accelerated-python/test/pytest.ini
+++ b/tutorials/accelerated-python/test/pytest.ini
@@ -1,2 +1,2 @@
 [pytest]
-addopts = -v --durations=0 --durations-min=0.0
+addopts = -v -s --durations=0 --durations-min=0.0
diff --git a/tutorials/accelerated-python/test/test_notebooks.py b/tutorials/accelerated-python/test/test_notebooks.py
index 7ea9f26a..1e60c52f 100644
--- a/tutorials/accelerated-python/test/test_notebooks.py
+++ b/tutorials/accelerated-python/test/test_notebooks.py
@@ -4,6 +4,7 @@
 
 import pytest
 from pathlib import Path
+import time
 import nbformat
 from nbclient import NotebookClient
 from nbclient.exceptions import CellExecutionError
@@ -11,14 +12,17 @@
 # Define the path to the notebooks directory
 NOTEBOOKS_DIR = Path(__file__).resolve().parent.parent / 'notebooks'
 
-# Discover all solution notebooks
-solution_notebooks = sorted(NOTEBOOKS_DIR.rglob('*SOLUTION*.ipynb'))
+# Discover all solution notebooks (excluding checkpoint files)
+solution_notebooks = sorted([
+    nb for nb in NOTEBOOKS_DIR.rglob('*SOLUTION*.ipynb')
+    if '.ipynb_checkpoints' not in str(nb)
+])
 
 # Create test IDs from notebook paths for better test output
 notebook_ids = [nb.relative_to(NOTEBOOKS_DIR).as_posix() for nb in solution_notebooks]
 
 
-def extract_cell_outputs(nb):
+def extract_cell_outputs(nb, cell_times=None):
     """Extract stdout/stderr from all executed cells for debugging."""
     outputs = []
     for i, cell in enumerate(nb.cells):
@@ -36,10 +40,32 @@ def extract_cell_outputs(nb):
                 cell_outputs.append(f"[error] {ename}: {evalue}")
         if cell_outputs:
             source_preview = cell.source[:100].replace('\n', ' ')
-            outputs.append(f"--- Cell {i}: {source_preview}... ---\n" + ''.join(cell_outputs))
+            time_str = f" ({cell_times.get(i, 0):.2f}s)" if cell_times else ""
+            outputs.append(f"--- Cell {i}{time_str}: {source_preview}... ---\n" + ''.join(cell_outputs))
     return '\n'.join(outputs)
 
 
+def check_gpu_state():
+    """Print GPU state for debugging slow execution."""
+    import subprocess
+    try:
+        result = subprocess.run(
+            ['nvidia-smi', '--query-gpu=name,compute_mode,clocks.current.sm,clocks.current.memory,power.draw,temperature.gpu,utilization.gpu', '--format=csv,noheader'],
+            capture_output=True, text=True, timeout=5
+        )
+        if result.returncode == 0:
+            print(f"  GPU State: {result.stdout.strip()}")
+        # Also check for any processes using the GPU
+        result2 = subprocess.run(
+            ['nvidia-smi', '--query-compute-apps=pid,name,used_memory', '--format=csv,noheader'],
+            capture_output=True, text=True, timeout=5
+        )
+        if result2.returncode == 0 and result2.stdout.strip():
+            print(f"  GPU Processes: {result2.stdout.strip()}")
+    except Exception as e:
+        print(f"  GPU State check failed: {e}")
+
+
 @pytest.mark.parametrize('notebook_path', solution_notebooks, ids=notebook_ids)
 def test_solution_notebook_executes(notebook_path):
     """
@@ -47,6 +73,10 @@ def test_solution_notebook_executes(notebook_path):
 
     Uses nbclient to execute all cells in the notebook.
     """
+    print(f"\n=== Starting notebook: {notebook_path.name} ===")
+    check_gpu_state()
+    notebook_start = time.time()
+
     # Read the notebook
     with open(notebook_path, 'r', encoding='utf-8') as f:
         nb = nbformat.read(f, as_version=4)
@@ -66,15 +96,42 @@ def test_solution_notebook_executes(notebook_path):
         resources={'metadata': {'path': str(notebook_path.parent)}}
     )
 
-    # Execute the notebook
+    # Execute the notebook cell by cell to get timing
+    cell_times = {}
     try:
-        client.execute()
+        with client.setup_kernel():
+            for i, cell in enumerate(nb.cells):
+                if cell.cell_type != 'code':
+                    continue
+                cell_start = time.time()
+                source_preview = cell.source[:60].replace('\n', ' ')
+                print(f"  Cell {i}: {source_preview}...", end='', flush=True)
+
+                # Check kernel is alive before executing
+                if not client.kc.is_alive():
+                    print(" [KERNEL DEAD!]")
+                    raise RuntimeError(f"Kernel died before cell {i}")
+
+                client.execute_cell(cell, i)
+                cell_time = time.time() - cell_start
+                cell_times[i] = cell_time
+                print(f" [{cell_time:.2f}s]")
+
+                # Flush any pending output
+                import sys
+                sys.stdout.flush()
+
     except CellExecutionError as e:
         # Provide detailed error information
         # Include output from ALL cells, not just the failing one
-        all_outputs = extract_cell_outputs(nb)
-        pytest.fail(f"Notebook execution failed:\n{str(e)}\n\n=== ALL CELL OUTPUTS ===\n{all_outputs}")
+        all_outputs = extract_cell_outputs(nb, cell_times)
+        total_time = time.time() - notebook_start
+        pytest.fail(f"Notebook execution failed (total time: {total_time:.2f}s):\n{str(e)}\n\n=== ALL CELL OUTPUTS ===\n{all_outputs}")
     except Exception as e:
         # Catch any other execution errors
-        all_outputs = extract_cell_outputs(nb)
-        pytest.fail(f"Notebook execution failed: {str(e)}\n\n=== ALL CELL OUTPUTS ===\n{all_outputs}")
+        all_outputs = extract_cell_outputs(nb, cell_times)
+        total_time = time.time() - notebook_start
+        pytest.fail(f"Notebook execution failed (total time: {total_time:.2f}s): {str(e)}\n\n=== ALL CELL OUTPUTS ===\n{all_outputs}")
+
+    total_time = time.time() - notebook_start
+    print(f"=== Completed {notebook_path.name} in {total_time:.2f}s ===")