From 164342e45f5aa6413845408d224fb396ef7909d3 Mon Sep 17 00:00:00 2001 From: clee2000 <44682903+clee2000@users.noreply.github.com> Date: Tue, 20 Aug 2024 09:45:04 -0700 Subject: [PATCH] [HUD][CH migration] Functioning commit_jobs_query (#5566) There's a lot of type casting/changes here because rockset returns slightly different types. TODO when we switch: get rid of the type changes and just change the code to use the ClickHouse types The query didn't actually change that much, most of it is formatting https://torchci-git-csl-workingcommithudquery-fbopensource.vercel.app/pytorch/pytorch/commit/773a782249ebe982996f18988ce6b26c4a786aa3 --- .../commit_jobs_query/query.sql | 262 ++++++++---------- torchci/components/JobLinks.tsx | 2 +- torchci/components/LogViewer.tsx | 6 + torchci/lib/clickhouse.ts | 4 +- torchci/lib/fetchCommit.ts | 19 +- 5 files changed, 135 insertions(+), 158 deletions(-) diff --git a/torchci/clickhouse_queries/commit_jobs_query/query.sql b/torchci/clickhouse_queries/commit_jobs_query/query.sql index 44c76b4b76..709ad0db73 100644 --- a/torchci/clickhouse_queries/commit_jobs_query/query.sql +++ b/torchci/clickhouse_queries/commit_jobs_query/query.sql @@ -1,158 +1,118 @@ -- This query is used by HUD commit and pull request pages to get all jobs belong -- to specific commit hash. They can then be displayed on those pages. +-- Based off of https://github.com/pytorch/test-infra/blob/c84f2b91cd104d3bbff5d99c4459059119050b95/torchci/rockset/commons/__sql/commit_jobs_query.sql#L1 +-- CircleCI has been removed WITH job AS ( - SELECT - job.started_at AS time, - workflow.head_sha AS sha, - job.name AS job_name, - workflow.name AS workflow_name, - job.id, - workflow.id AS workflow_id, - workflow.artifacts_url AS github_artifact_url, - job.conclusion, - job.html_url, - IF( - {repo : String} = 'pytorch/pytorch', - CONCAT( - 'https://ossci-raw-job-status.s3.amazonaws.com/log/', - job.id :: String - ), - CONCAT( - 'https://ossci-raw-job-status.s3.amazonaws.com/log/', - {repo : String}, '/', job.id :: String - ) - ) AS log_url, - DATE_DIFF( - 'SECOND', job.created_at, job.started_at - ) AS queue_time_s, - DATE_DIFF( - 'SECOND', job.started_at, job.completed_at - ) AS duration_s, - job.torchci_classification.line as line, - job.torchci_classification.captures as captures, - job.torchci_classification.line_num as line_num, - job.torchci_classification.context as context, - job.runner_name AS runner_name, - workflow.head_commit.'author'.'email' AS authorEmail - FROM - workflow_job job - INNER JOIN workflow_run workflow ON workflow.id = job.run_id - WHERE - job.name != 'ciflow_should_run' - AND job.name != 'generate-test-matrix' - AND workflow.event != 'workflow_run' -- Filter out workflow_run-triggered jobs, which have nothing to do with the SHA - AND workflow.event != 'repository_dispatch' -- Filter out repository_dispatch-triggered jobs, which have nothing to do with the SHA - AND workflow.head_sha = {sha : String} - AND job.head_sha = {sha : String} - AND workflow.repository.'full_name' = {repo : String} -- UNION - -- -- Handle CircleCI - -- -- IMPORTANT: this needs to have the same order AS the query above - -- SELECT - -- job._event_time AS time, - -- job.pipeline.vcs.revision AS sha, - -- -- Swap workflow and job name for consistency with GHA naming style. - -- job.workflow.name AS job_name, - -- job.job.name AS workflow_name, - -- job.job.number AS id, - -- null AS workflow_id, - -- null AS github_artifact_id, - -- CASE - -- WHEN job.job.status = 'failed' THEN 'failure' - -- WHEN job.job.status = 'canceled' THEN 'cancelled' - -- ELSE job.job.status - -- END AS conclusion, - -- -- cirleci doesn't provide a url, piece one together out of the info we have - -- CONCAT( - -- 'https://app.circleci.com/pipelines/github/', - -- : repo, - -- '/', - -- CAST(job.pipeline.number AS string), - -- '/workflows/', - -- job.workflow.id, - -- '/jobs/', - -- CAST(job.job.number AS string) - -- ) AS html_url, - -- -- logs aren't downloaded currently, just reuse html_url - -- html_url AS log_url, - -- null AS queue_time_s, - -- -- for circle ci, the event time comes after the end time, so its not reliable for queueing - -- DATE_DIFF( - -- 'SECOND', - -- PARSE_TIMESTAMP_ISO8601(job.job.started_at), - -- PARSE_TIMESTAMP_ISO8601(job.job.stopped_at) - -- ) AS duration_s, - -- -- Classifications not yet supported - -- null, - -- null, - -- null, - -- null, - -- -- Don't care about runner name from CircleCI - -- null AS runner_name, - -- null AS authorEmail, - -- FROM - -- circleci.job job - -- WHERE - -- job.pipeline.vcs.revision =: sha - -- AND CONCAT(job.organization.name, '/', job.project.name) =: repo - UNION ALL - SELECT - workflow.created_at AS time, - workflow.head_sha AS sha, - workflow.name AS job_name, - 'Workflow Startup Failure' AS workflow_name, - workflow.id, - 0 AS workflow_id, - workflow.artifacts_url AS github_artifact_url, - if( - workflow.conclusion = '' - and workflow.status = 'queued', - 'failure', - workflow.conclusion - ) as conclusion, - workflow.html_url, - '' AS log_url, - DATE_DIFF( - 'SECOND', workflow.created_at, workflow.run_started_at - ) AS queue_time_s, - 0 AS duration_s, - '' as line, - [] as captures, - 0 as line_num, - [] as context, - '' AS runner_name, - workflow.head_commit.author.email AS authorEmail - FROM - workflow_run workflow - WHERE - workflow.event != 'workflow_run' -- Filter out workflow_run-triggered jobs, which have nothing to do with the SHA - AND workflow.event != 'repository_dispatch' -- Filter out repository_dispatch-triggered jobs, which have nothing to do with the SHA - AND workflow.head_sha = {sha : String} - AND workflow.repository.full_name = {repo : String} + SELECT + job.started_at AS time, + workflow.head_sha AS sha, + job.name AS job_name, + workflow.name AS workflow_name, + job.id, + workflow.id AS workflow_id, + workflow.artifacts_url AS github_artifact_url, + job.conclusion, + job.html_url, + IF( + {repo: String } = 'pytorch/pytorch', + CONCAT( + 'https://ossci-raw-job-status.s3.amazonaws.com/log/', + job.id:: String + ), + CONCAT( + 'https://ossci-raw-job-status.s3.amazonaws.com/log/', + {repo: String }, + '/', + job.id:: String + ) + ) AS log_url, + if( + job.started_at = 0, + 0, + DATE_DIFF('SECOND', job.created_at, job.started_at) + ) AS queue_time_s, + if( + job.completed_at = 0, + 0, + DATE_DIFF('SECOND', job.started_at, job.completed_at) + ) AS duration_s, + job.torchci_classification.line as line, + job.torchci_classification.captures as captures, + job.torchci_classification.line_num as line_num, + job.torchci_classification.context as context, + job.runner_name AS runner_name, + workflow.head_commit. 'author'.'email' AS authorEmail + FROM + workflow_job job final + INNER JOIN workflow_run workflow final ON workflow.id = job.run_id + WHERE + job.name != 'ciflow_should_run' + AND job.name != 'generate-test-matrix' + AND workflow.event != 'workflow_run' -- Filter out workflow_run-triggered jobs, which have nothing to do with the SHA + AND workflow.event != 'repository_dispatch' -- Filter out repository_dispatch-triggered jobs, which have nothing to do with the SHA + AND workflow.head_sha = {sha: String } + AND job.head_sha = {sha: String } + AND workflow.repository. 'full_name' = {repo: String } -- UNION + UNION ALL + SELECT + workflow.created_at AS time, + workflow.head_sha AS sha, + workflow.name AS job_name, + 'Workflow Startup Failure' AS workflow_name, + workflow.id, + 0 AS workflow_id, + workflow.artifacts_url AS github_artifact_url, + if( + workflow.conclusion = '' + and workflow.status = 'queued', + 'failure', + workflow.conclusion + ) as conclusion, + workflow.html_url, + '' AS log_url, + DATE_DIFF( + 'SECOND', + workflow.created_at, + workflow.run_started_at + ) AS queue_time_s, + 0 AS duration_s, + '' as line, + [ ] as captures, + 0 as line_num, + [ ] as context, + '' AS runner_name, + workflow.head_commit.author.email AS authorEmail + FROM + workflow_run workflow final + WHERE + workflow.event != 'workflow_run' -- Filter out workflow_run-triggered jobs, which have nothing to do with the SHA + AND workflow.event != 'repository_dispatch' -- Filter out repository_dispatch-triggered jobs, which have nothing to do with the SHA + AND workflow.head_sha = {sha: String } + AND workflow.repository.full_name = {repo: String } ) SELECT - sha, - workflow_name AS workflowName, - job_name AS jobName, - CONCAT(workflow_name, ' / ', job_name) AS name, - id AS id, - workflow_id AS workflowId, - github_artifact_url AS githubArtifactUrl, - if( - conclusion = '', 'pending', conclusion - ), - html_url AS htmlUrl, - log_url AS logUrl, - duration_s AS durationS, - queue_time_s AS queueTimeS, - line AS failureLines, - line_num AS failureLineNumbers, - captures AS failureCaptures, - context AS failureContext, - runner_name AS runnerName, - authorEmail, - time, + sha, + workflow_name AS workflowName, + job_name AS jobName, + CONCAT(workflow_name, ' / ', job_name) AS name, + id AS id, + workflow_id AS workflowId, + github_artifact_url AS githubArtifactUrl, + if(conclusion = '', 'pending', conclusion) as conclusion, + html_url AS htmlUrl, + log_url AS logUrl, + duration_s AS durationS, + queue_time_s AS queueTimeS, + -- Convert to arrays + if(line = '', [ ], [ line ]) AS failureLines, + if(line_num = 0, [ ], [ line_num ]) AS failureLineNumbers, + captures AS failureCaptures, + context AS failureContext, + runner_name AS runnerName, + authorEmail, + time, FROM - job + job ORDER BY - name, - time DESC + name, + time DESC diff --git a/torchci/components/JobLinks.tsx b/torchci/components/JobLinks.tsx index 3cee6aa0d6..d9a48dcbad 100644 --- a/torchci/components/JobLinks.tsx +++ b/torchci/components/JobLinks.tsx @@ -40,7 +40,7 @@ export default function JobLinks({ ); } - if (job.failureCaptures != null) { + if (job.failureCaptures != null && job.failureLines?.length != 0) { subInfo.push( (undefined); + // TODO: Remove this. This is a hack to make sure that that the log viewer + // will always show up. It gets around some differences in output between + // rockset and clickhouse + if (lineNumbers.length === 0) { + lineNumbers = [0]; + } return ( <> {lineNumbers.map((line, index) => ( diff --git a/torchci/lib/clickhouse.ts b/torchci/lib/clickhouse.ts index aeb6db7c95..44cdc8c122 100644 --- a/torchci/lib/clickhouse.ts +++ b/torchci/lib/clickhouse.ts @@ -12,7 +12,7 @@ export function getClickhouseClient() { export async function queryClickhouse( query: string, params: Record -) { +): Promise { /** * queryClickhouse * @param query: string, the sql query @@ -25,7 +25,7 @@ export async function queryClickhouse( query_params: params, }); - return await res.json(); + return (await res.json()) as any[]; } export async function queryClickhouseSaved( diff --git a/torchci/lib/fetchCommit.ts b/torchci/lib/fetchCommit.ts index 56a8056892..8fb2a0823d 100644 --- a/torchci/lib/fetchCommit.ts +++ b/torchci/lib/fetchCommit.ts @@ -1,18 +1,26 @@ import _ from "lodash"; import { Octokit } from "octokit"; import rocksetVersions from "rockset/prodVersions.json"; -import { queryClickhouseSaved } from "./clickhouse"; +import { enableClickhouse, queryClickhouseSaved } from "./clickhouse"; import { commitDataFromResponse, getOctokit } from "./github"; import { removeCancelledJobAfterRetry } from "./jobUtils"; import getRocksetClient from "./rockset"; import { CommitData, JobData } from "./types"; async function fetchDatabaseInfo(owner: string, repo: string, sha: string) { - if (false) { - const response = queryClickhouseSaved("commit_jobs_query", { + if (enableClickhouse()) { + const response = await queryClickhouseSaved("commit_jobs_query", { repo: `${owner}/${repo}`, sha: sha, }); + + for (const row of response) { + row.id = row.id == 0 ? null : row.id; + row.workflowId = row.workflowId == 0 ? null : row.workflowId; + row.durationS = parseInt(row.durationS); + row.queueTimeS = parseInt(row.queueTimeS); + row.time = row.time + "Z"; + } return response; } else { const rocksetClient = getRocksetClient(); @@ -66,7 +74,10 @@ export default async function fetchCommit( // and then merging them back together const [workflows, onlyJobs] = _.partition( jobs, - (job) => job.workflowId === null || job.workflowId === undefined + (job) => + job.workflowId === null || + job.workflowId === undefined || + job.workflowId === 0 ); const filteredJobs = removeCancelledJobAfterRetry(onlyJobs);