[HUD][CH migration] Functioning commit_jobs_query (#5566)

There's a lot of type casting/changes here because rockset returns slightly different types. TODO when we switch: get rid of the type changes and just change the code to use the ClickHouse types The query didn't actually change that much, most of it is formatting https://torchci-git-csl-workingcommithudquery-fbopensource.vercel.app/pytorch/pytorch/commit/773a782249ebe982996f18988ce6b26c4a786aa3
pytorch · Aug 20, 2024 · 164342e · 164342e
1 parent 27442aa
commit 164342e
Show file tree

Hide file tree

Showing 5 changed files with 135 additions and 158 deletions.
diff --git a/torchci/clickhouse_queries/commit_jobs_query/query.sql b/torchci/clickhouse_queries/commit_jobs_query/query.sql
@@ -1,158 +1,118 @@
 -- This query is used by HUD commit and pull request pages to get all jobs belong
 -- to specific commit hash. They can then be displayed on those pages.
+-- Based off of https://github.com/pytorch/test-infra/blob/c84f2b91cd104d3bbff5d99c4459059119050b95/torchci/rockset/commons/__sql/commit_jobs_query.sql#L1
+-- CircleCI has been removed
 WITH job AS (
-  SELECT
-    job.started_at AS time,
-    workflow.head_sha AS sha,
-    job.name AS job_name,
-    workflow.name AS workflow_name,
-    job.id,
-    workflow.id AS workflow_id,
-    workflow.artifacts_url AS github_artifact_url,
-    job.conclusion,
-    job.html_url,
-    IF(
-      {repo : String} = 'pytorch/pytorch',
-      CONCAT(
-        'https://ossci-raw-job-status.s3.amazonaws.com/log/',
-        job.id :: String
-      ),
-      CONCAT(
-        'https://ossci-raw-job-status.s3.amazonaws.com/log/',
-        {repo : String}, '/', job.id :: String
-      )
-    ) AS log_url,
-    DATE_DIFF(
-      'SECOND', job.created_at, job.started_at
-    ) AS queue_time_s,
-    DATE_DIFF(
-      'SECOND', job.started_at, job.completed_at
-    ) AS duration_s,
-    job.torchci_classification.line as line,
-    job.torchci_classification.captures as captures,
-    job.torchci_classification.line_num as line_num,
-    job.torchci_classification.context as context,
-    job.runner_name AS runner_name,
-    workflow.head_commit.'author'.'email' AS authorEmail
-  FROM
-    workflow_job job
-    INNER JOIN workflow_run workflow ON workflow.id = job.run_id
-  WHERE
-    job.name != 'ciflow_should_run'
-    AND job.name != 'generate-test-matrix'
-    AND workflow.event != 'workflow_run' -- Filter out workflow_run-triggered jobs, which have nothing to do with the SHA
-    AND workflow.event != 'repository_dispatch' -- Filter out repository_dispatch-triggered jobs, which have nothing to do with the SHA
-    AND workflow.head_sha = {sha : String}
-    AND job.head_sha = {sha : String}
-    AND workflow.repository.'full_name' = {repo : String} --         UNION
-    --         -- Handle CircleCI
-    --         -- IMPORTANT: this needs to have the same order AS the query above
-    --         SELECT
-    --             job._event_time AS time,
-    --             job.pipeline.vcs.revision AS sha,
-    --             -- Swap workflow and job name for consistency with GHA naming style.
-    --             job.workflow.name AS job_name,
-    --             job.job.name AS workflow_name,
-    --             job.job.number AS id,
-    --             null AS workflow_id,
-    --             null AS github_artifact_id,
-    --             CASE
-    --                 WHEN job.job.status = 'failed' THEN 'failure'
-    --                 WHEN job.job.status = 'canceled' THEN 'cancelled'
-    --                 ELSE job.job.status
-    --             END AS conclusion,
-    --             -- cirleci doesn't provide a url, piece one together out of the info we have
-    --             CONCAT(
-    --                 'https://app.circleci.com/pipelines/github/',
-    -- : repo,
-    --                 '/',
-    --                 CAST(job.pipeline.number AS string),
-    --                 '/workflows/',
-    --                 job.workflow.id,
-    --                 '/jobs/',
-    --                 CAST(job.job.number AS string)
-    --             ) AS html_url,
-    --             -- logs aren't downloaded currently, just reuse html_url
-    --             html_url AS log_url,
-    --             null AS queue_time_s,
-    --             -- for circle ci, the event time comes after the end time, so its not reliable for queueing
-    --             DATE_DIFF(
-    --                 'SECOND',
-    --                 PARSE_TIMESTAMP_ISO8601(job.job.started_at),
-    --                 PARSE_TIMESTAMP_ISO8601(job.job.stopped_at)
-    --             ) AS duration_s,
-    --             -- Classifications not yet supported
-    --             null,
-    --             null,
-    --             null,
-    --             null,
-    --             -- Don't care about runner name from CircleCI
-    --             null AS runner_name,
-    --             null AS authorEmail,
-    --         FROM
-    --             circleci.job job
-    --         WHERE
-    --             job.pipeline.vcs.revision =: sha
-    --             AND CONCAT(job.organization.name, '/', job.project.name) =: repo
-  UNION ALL
-  SELECT
-    workflow.created_at AS time,
-    workflow.head_sha AS sha,
-    workflow.name AS job_name,
-    'Workflow Startup Failure' AS workflow_name,
-    workflow.id,
-    0 AS workflow_id,
-    workflow.artifacts_url AS github_artifact_url,
-    if(
-      workflow.conclusion = ''
-      and workflow.status = 'queued',
-      'failure',
-      workflow.conclusion
-    ) as conclusion,
-    workflow.html_url,
-    '' AS log_url,
-    DATE_DIFF(
-      'SECOND', workflow.created_at, workflow.run_started_at
-    ) AS queue_time_s,
-    0 AS duration_s,
-    '' as line,
-    [] as captures,
-    0 as line_num,
-    [] as context,
-    '' AS runner_name,
-    workflow.head_commit.author.email AS authorEmail
-  FROM
-    workflow_run workflow
-  WHERE
-    workflow.event != 'workflow_run' -- Filter out workflow_run-triggered jobs, which have nothing to do with the SHA
-    AND workflow.event != 'repository_dispatch' -- Filter out repository_dispatch-triggered jobs, which have nothing to do with the SHA
-    AND workflow.head_sha = {sha : String}
-    AND workflow.repository.full_name = {repo : String}
+    SELECT
+        job.started_at AS time,
+        workflow.head_sha AS sha,
+        job.name AS job_name,
+        workflow.name AS workflow_name,
+        job.id,
+        workflow.id AS workflow_id,
+        workflow.artifacts_url AS github_artifact_url,
+        job.conclusion,
+        job.html_url,
+        IF(
+            {repo: String } = 'pytorch/pytorch',
+            CONCAT(
+                'https://ossci-raw-job-status.s3.amazonaws.com/log/',
+                job.id:: String
+            ),
+            CONCAT(
+                'https://ossci-raw-job-status.s3.amazonaws.com/log/',
+                {repo: String },
+                '/',
+                job.id:: String
+            )
+        ) AS log_url,
+        if(
+            job.started_at = 0,
+            0,
+            DATE_DIFF('SECOND', job.created_at, job.started_at)
+        ) AS queue_time_s,
+        if(
+            job.completed_at = 0,
+            0,
+            DATE_DIFF('SECOND', job.started_at, job.completed_at)
+        ) AS duration_s,
+        job.torchci_classification.line as line,
+        job.torchci_classification.captures as captures,
+        job.torchci_classification.line_num as line_num,
+        job.torchci_classification.context as context,
+        job.runner_name AS runner_name,
+        workflow.head_commit. 'author'.'email' AS authorEmail
+    FROM
+        workflow_job job final
+        INNER JOIN workflow_run workflow final ON workflow.id = job.run_id
+    WHERE
+        job.name != 'ciflow_should_run'
+        AND job.name != 'generate-test-matrix'
+        AND workflow.event != 'workflow_run' -- Filter out workflow_run-triggered jobs, which have nothing to do with the SHA
+        AND workflow.event != 'repository_dispatch' -- Filter out repository_dispatch-triggered jobs, which have nothing to do with the SHA
+        AND workflow.head_sha = {sha: String }
+        AND job.head_sha = {sha: String }
+        AND workflow.repository. 'full_name' = {repo: String } --         UNION
+    UNION ALL
+    SELECT
+        workflow.created_at AS time,
+        workflow.head_sha AS sha,
+        workflow.name AS job_name,
+        'Workflow Startup Failure' AS workflow_name,
+        workflow.id,
+        0 AS workflow_id,
+        workflow.artifacts_url AS github_artifact_url,
+        if(
+            workflow.conclusion = ''
+            and workflow.status = 'queued',
+            'failure',
+            workflow.conclusion
+        ) as conclusion,
+        workflow.html_url,
+        '' AS log_url,
+        DATE_DIFF(
+            'SECOND',
+            workflow.created_at,
+            workflow.run_started_at
+        ) AS queue_time_s,
+        0 AS duration_s,
+        '' as line,
+        [ ] as captures,
+        0 as line_num,
+        [ ] as context,
+        '' AS runner_name,
+        workflow.head_commit.author.email AS authorEmail
+    FROM
+        workflow_run workflow final
+    WHERE
+        workflow.event != 'workflow_run' -- Filter out workflow_run-triggered jobs, which have nothing to do with the SHA
+        AND workflow.event != 'repository_dispatch' -- Filter out repository_dispatch-triggered jobs, which have nothing to do with the SHA
+        AND workflow.head_sha = {sha: String }
+        AND workflow.repository.full_name = {repo: String }
 )
 SELECT
-  sha,
-  workflow_name AS workflowName,
-  job_name AS jobName,
-  CONCAT(workflow_name, ' / ', job_name) AS name,
-  id AS id,
-  workflow_id AS workflowId,
-  github_artifact_url AS githubArtifactUrl,
-  if(
-    conclusion = '', 'pending', conclusion
-  ),
-  html_url AS htmlUrl,
-  log_url AS logUrl,
-  duration_s AS durationS,
-  queue_time_s AS queueTimeS,
-  line AS failureLines,
-  line_num AS failureLineNumbers,
-  captures AS failureCaptures,
-  context AS failureContext,
-  runner_name AS runnerName,
-  authorEmail,
-  time,
+    sha,
+    workflow_name AS workflowName,
+    job_name AS jobName,
+    CONCAT(workflow_name, ' / ', job_name) AS name,
+    id AS id,
+    workflow_id AS workflowId,
+    github_artifact_url AS githubArtifactUrl,
+    if(conclusion = '', 'pending', conclusion) as conclusion,
+    html_url AS htmlUrl,
+    log_url AS logUrl,
+    duration_s AS durationS,
+    queue_time_s AS queueTimeS,
+    -- Convert to arrays
+    if(line = '', [ ], [ line ]) AS failureLines,
+    if(line_num = 0, [ ], [ line_num ]) AS failureLineNumbers,
+    captures AS failureCaptures,
+    context AS failureContext,
+    runner_name AS runnerName,
+    authorEmail,
+    time,
 FROM
-  job
+    job
 ORDER BY
-  name,
-  time DESC
+    name,
+    time DESC
diff --git a/torchci/components/JobLinks.tsx b/torchci/components/JobLinks.tsx
@@ -40,7 +40,7 @@ export default function JobLinks({
     );
   }
 
-  if (job.failureCaptures != null) {
+  if (job.failureCaptures != null && job.failureLines?.length != 0) {
     subInfo.push(
       <a
         target="_blank"

diff --git a/torchci/components/LogViewer.tsx b/torchci/components/LogViewer.tsx
@@ -274,6 +274,12 @@ function LogWithLineSelector({
   });
   // undefined means that no line is selected, so the log viewer is closed
   const [currentLine, setCurrentLine] = useState<number | undefined>(undefined);
+  // TODO: Remove this. This is a hack to make sure that that the log viewer
+  // will always show up. It gets around some differences in output between
+  // rockset and clickhouse
+  if (lineNumbers.length === 0) {
+    lineNumbers = [0];
+  }
   return (
     <>
       {lineNumbers.map((line, index) => (

diff --git a/torchci/lib/clickhouse.ts b/torchci/lib/clickhouse.ts
@@ -12,7 +12,7 @@ export function getClickhouseClient() {
 export async function queryClickhouse(
   query: string,
   params: Record<string, unknown>
-) {
+): Promise<any[]> {
   /**
    * queryClickhouse
    * @param query: string, the sql query
@@ -25,7 +25,7 @@ export async function queryClickhouse(
     query_params: params,
   });
 
-  return await res.json();
+  return (await res.json()) as any[];
 }
 
 export async function queryClickhouseSaved(

diff --git a/torchci/lib/fetchCommit.ts b/torchci/lib/fetchCommit.ts
@@ -1,18 +1,26 @@
 import _ from "lodash";
 import { Octokit } from "octokit";
 import rocksetVersions from "rockset/prodVersions.json";
-import { queryClickhouseSaved } from "./clickhouse";
+import { enableClickhouse, queryClickhouseSaved } from "./clickhouse";
 import { commitDataFromResponse, getOctokit } from "./github";
 import { removeCancelledJobAfterRetry } from "./jobUtils";
 import getRocksetClient from "./rockset";
 import { CommitData, JobData } from "./types";
 
 async function fetchDatabaseInfo(owner: string, repo: string, sha: string) {
-  if (false) {
-    const response = queryClickhouseSaved("commit_jobs_query", {
+  if (enableClickhouse()) {
+    const response = await queryClickhouseSaved("commit_jobs_query", {
       repo: `${owner}/${repo}`,
       sha: sha,
     });
+
+    for (const row of response) {
+      row.id = row.id == 0 ? null : row.id;
+      row.workflowId = row.workflowId == 0 ? null : row.workflowId;
+      row.durationS = parseInt(row.durationS);
+      row.queueTimeS = parseInt(row.queueTimeS);
+      row.time = row.time + "Z";
+    }
     return response;
   } else {
     const rocksetClient = getRocksetClient();
@@ -66,7 +74,10 @@ export default async function fetchCommit(
   // and then merging them back together
   const [workflows, onlyJobs] = _.partition(
     jobs,
-    (job) => job.workflowId === null || job.workflowId === undefined
+    (job) =>
+      job.workflowId === null ||
+      job.workflowId === undefined ||
+      job.workflowId === 0
   );
 
   const filteredJobs = removeCancelledJobAfterRetry<JobData>(onlyJobs);