Create timing artifacts.

NVIDIA · May 16, 2024 · f121b1e · f121b1e
1 parent 502b169
commit f121b1e
Show file tree

Hide file tree

Showing 4 changed files with 180 additions and 5 deletions.
diff --git a/.github/actions/workflow-results/action.yml b/.github/actions/workflow-results/action.yml
@@ -31,8 +31,47 @@ runs:
         name: dispatch-job-success
         path: dispatch-job-success/
 
+    - name: Fetch workflow job info
+      if: ${{ inputs.github_token != ''}}
+      continue-on-error: true
+      uses: actions/github-script@v4
+      with:
+        github-token: ${{ inputs.github_token }}
+        script: |
+          const fs = require('fs');
+
+          const owner = context.repo.owner;
+          const repo = context.repo.repo;
+          const runId = context.runId;
+
+          github.paginate(
+            'GET /repos/{owner}/{repo}/actions/runs/{run_id}/jobs',
+            {
+              owner: owner,
+              repo: repo,
+              run_id: runId
+            }
+          )
+          .then(jobs => {
+            console.log('::group::Jobs JSON');
+            console.log(JSON.stringify(jobs, null, 2));
+            console.log('::endgroup::');
+            fs.mkdirSync("results", { recursive: true });
+            fs.writeFileSync('results/jobs.json', JSON.stringify(jobs, null, 2));
+            console.log(`Fetched ${jobs.length} jobs and saved to results/jobs.json`);
+          })
+          .catch(error => {
+            console.error(error);
+          });
+
+    - name: Parse job times
+      continue-on-error: true
+      shell: bash --noprofile --norc -euo pipefail {0}
+      run: |
+        echo "Parsing job times..."
+        python3 "${GITHUB_ACTION_PATH}/parse-job-times.py" workflow/workflow.json results/jobs.json
+
     - name: Prepare execution summary
-      id: job-summary
       continue-on-error: true
       shell: bash --noprofile --norc -euo pipefail {0}
       run: |
@@ -70,8 +109,8 @@ runs:
         github-token: ${{ inputs.github_token }}
         script: |
           const pr_number = process.env.PR_NUMBER;
-          const owner = 'NVIDIA';
-          const repo = 'cccl';
+          const owner = context.repo.owner;
+          const repo = context.repo.repo;
           // Decode URL-encoded string for proper display in comments
           const commentBody = decodeURIComponent(process.env.COMMENT_BODY);
           console.log('::group::Commenting on PR #' + pr_number + ' with the following message:')

diff --git a/.github/actions/workflow-results/parse-job-times.py b/.github/actions/workflow-results/parse-job-times.py
@@ -0,0 +1,134 @@
+#!/usr/bin/env python3
+
+import argparse
+import datetime
+import json
+import os
+import sys
+
+
+def get_jobs_json(jobs_file):
+    # Return the contents of jobs.json
+    with open(jobs_file) as f:
+        result = json.load(f)
+
+    return result
+
+
+def get_workflow_json(workflow_file):
+    # Return the contents of ~/cccl/.local/tmp/workflow.json
+    with open(workflow_file) as f:
+        return json.load(f)
+
+
+def generate_job_id_map(workflow):
+    '''Map full job name to job id'''
+    job_id_map = {}
+    for group_name, group_json in workflow.items():
+        standalone = group_json['standalone'] if 'standalone' in group_json else []
+        for job in standalone:
+            name = f"{group_name} / s.{job['id']} / {job['name']}"
+            job_id_map[name] = job['id']
+        two_stage = group_json['two_stage'] if 'two_stage' in group_json else []
+        for pc in two_stage:
+            producers = pc['producers']
+            for job in producers:
+                name = f"{group_name} / t.{pc['id']} / p.{job['id']} / {job['name']}"
+                job_id_map[name] = job['id']
+            consumers = pc['consumers']
+            for job in consumers:
+                name = f"{group_name} / t.{pc['id']} / c.{job['id']} / {job['name']}"
+                job_id_map[name] = job['id']
+
+    return job_id_map
+
+
+def main():
+    # Accept two command line arguments: <workflow.json> <jobs.json>
+    parser = argparse.ArgumentParser(description='Parse job times')
+    parser.add_argument('workflow', type=str, help='Path to workflow.json')
+    parser.add_argument('jobs', type=str, help='Path to jobs.json')
+    args = parser.parse_args()
+
+    jobs = get_jobs_json(args.jobs)
+    workflow = get_workflow_json(args.workflow)
+
+    # Converts full github job names into job ids:
+    job_id_map = generate_job_id_map(workflow)
+
+    # Map of id -> { <job stats> }
+    result = {}
+
+    for job in jobs:
+        name = job['name']
+
+        # Build workflow, verify devcontainers, etc:
+        if name not in job_id_map:
+            print(f"Skipping {name}")
+            continue
+
+        id = job_id_map[name]
+
+        # Job times are 2024-05-09T06:52:20Z
+        started_at = job['started_at']
+        started_time = datetime.datetime.strptime(started_at, "%Y-%m-%dT%H:%M:%SZ")
+
+        completed_at = job['completed_at']
+        completed_time = datetime.datetime.strptime(completed_at, "%Y-%m-%dT%H:%M:%SZ")
+
+        job_seconds = (completed_time - started_time).total_seconds()
+        job_duration = str(datetime.timedelta(seconds=job_seconds))
+
+        result[id] = {}
+        result[id]['name'] = name
+        result[id]['started_at'] = started_at
+        result[id]['completed_at'] = completed_at
+        result[id]['job_duration'] = job_duration
+        result[id]['job_seconds'] = job_seconds
+
+        # Find the "Run command" step and record its duration:
+        command_seconds = 0
+        for step in job['steps']:
+            if step['name'] == "Run command":
+                # Step timers are 2024-05-09T05:26:46.000-04:00
+                step_started_at = step['started_at']
+                step_started_time = datetime.datetime.strptime(step_started_at, "%Y-%m-%dT%H:%M:%S.%f%z")
+                step_completed_at = step['completed_at']
+                step_completed_time = datetime.datetime.strptime(step_completed_at, "%Y-%m-%dT%H:%M:%S.%f%z")
+                command_seconds = (step_completed_time - step_started_time).total_seconds()
+                break
+
+        command_duration = str(datetime.timedelta(seconds=command_seconds))
+
+        result[id]['command_seconds'] = command_seconds
+        result[id]['command_duration'] = command_duration
+
+    print(json.dumps(result, indent=4))
+
+    # print(f"{"duration":10} {"command":10} {"util": 5} name")
+    for id, stats in result.items():
+        print(f"{stats['job_duration']:10} {stats['command_duration']:10} {stats['command_seconds'] * 100 / stats['job_seconds']:3f} {stats['name']}")
+
+
+"""
+What the final table entry going to look like?
+
+Current:
+
+🔍 cxx_name: MSVC 🔍
+  🟩 clang (0% Fail)              Failed:  0  -- Passed:  8  -- Total:  8
+  🟩 gcc (0% Fail)                Failed:  0  -- Passed:  8  -- Total:  8
+  🔍 MSVC (75% Fail)              Failed:  9  -- Passed:  3  -- Total: 12
+
+3
+ New:
+                        | Jobs | Fail | Pass | total    | avg    | ovrhd |
+ 🔍 cxx_name: MSVC 🔍  |      |      |      |          |        |       |
+  🟩 clang             |  256 |  100 |  156 | 5d10h12m |  1h10m |    8% |
+  🟩 gcc               |  100 |    0 |  100 | 3d13h62m | 40m10s |   15% |
+  🔍 MSVC              |  256 |  256 |    0 | 2d14h04m |  0h10m |   51% |
+"""
+
+
+if __name__ == "__main__":
+    main()
diff --git a/.github/workflows/workflow-dispatch-job.yml b/.github/workflows/workflow-dispatch-job.yml
@@ -83,7 +83,7 @@ jobs:
           echo "SCCACHE_IDLE_TIMEOUT=32768" >> $GITHUB_ENV
           echo "SCCACHE_S3_USE_SSL=true" >> $GITHUB_ENV
           echo "SCCACHE_S3_NO_CREDENTIALS=false" >> $GITHUB_ENV
-      - name: Run command
+      - name: Run command # Do not change this step's name, it is checked in parse-job-times.py
         shell: su coder {0}
         env:
           # Dereferencing the command from and env var instead of a GHA input avoids issues with escaping
@@ -149,7 +149,7 @@ jobs:
           role-duration-seconds: 43200 # 12 hours
       - name: Fetch ${{ inputs.image }}
         run: docker pull ${{ inputs.image }}
-      - name: Run Command
+      - name: Run command # Do not change this step's name, it is checked in parse-job-times.py
         run: >-
           docker run ${{ inputs.image }} powershell -c "[System.Environment]::SetEnvironmentVariable('AWS_ACCESS_KEY_ID','${{env.AWS_ACCESS_KEY_ID}}')
                                                         [System.Environment]::SetEnvironmentVariable('AWS_SECRET_ACCESS_KEY','${{env.AWS_SECRET_ACCESS_KEY}}')

diff --git a/ci/matrix.yaml b/ci/matrix.yaml
@@ -49,6 +49,8 @@ workflows:
   #   - {jobs: ['build'], project['thrust'], std: 17, ctk: *ctk_curr, cxx: [*gcc12, *llvm16]}
   #
   override:
+    # A few jobs, just to capture timing info:
+    - {jobs: ['build'], std: 'all', project: 'thrust', cxx: [*gcc12, *llvm16]}
 
   pull_request:
     # default_projects: nvcc