Skip to content

Commit

Permalink
[HUD][CH migration] Functioning commit_jobs_query (#5566)
Browse files Browse the repository at this point in the history
There's a lot of type casting/changes here because rockset returns
slightly different types.

TODO when we switch: get rid of the type changes and just change the
code to use the ClickHouse types

The query didn't actually change that much, most of it is formatting


https://torchci-git-csl-workingcommithudquery-fbopensource.vercel.app/pytorch/pytorch/commit/773a782249ebe982996f18988ce6b26c4a786aa3
  • Loading branch information
clee2000 authored Aug 20, 2024
1 parent 27442aa commit 164342e
Show file tree
Hide file tree
Showing 5 changed files with 135 additions and 158 deletions.
262 changes: 111 additions & 151 deletions torchci/clickhouse_queries/commit_jobs_query/query.sql
Original file line number Diff line number Diff line change
@@ -1,158 +1,118 @@
-- This query is used by HUD commit and pull request pages to get all jobs belong
-- to specific commit hash. They can then be displayed on those pages.
-- Based off of https://github.com/pytorch/test-infra/blob/c84f2b91cd104d3bbff5d99c4459059119050b95/torchci/rockset/commons/__sql/commit_jobs_query.sql#L1
-- CircleCI has been removed
WITH job AS (
SELECT
job.started_at AS time,
workflow.head_sha AS sha,
job.name AS job_name,
workflow.name AS workflow_name,
job.id,
workflow.id AS workflow_id,
workflow.artifacts_url AS github_artifact_url,
job.conclusion,
job.html_url,
IF(
{repo : String} = 'pytorch/pytorch',
CONCAT(
'https://ossci-raw-job-status.s3.amazonaws.com/log/',
job.id :: String
),
CONCAT(
'https://ossci-raw-job-status.s3.amazonaws.com/log/',
{repo : String}, '/', job.id :: String
)
) AS log_url,
DATE_DIFF(
'SECOND', job.created_at, job.started_at
) AS queue_time_s,
DATE_DIFF(
'SECOND', job.started_at, job.completed_at
) AS duration_s,
job.torchci_classification.line as line,
job.torchci_classification.captures as captures,
job.torchci_classification.line_num as line_num,
job.torchci_classification.context as context,
job.runner_name AS runner_name,
workflow.head_commit.'author'.'email' AS authorEmail
FROM
workflow_job job
INNER JOIN workflow_run workflow ON workflow.id = job.run_id
WHERE
job.name != 'ciflow_should_run'
AND job.name != 'generate-test-matrix'
AND workflow.event != 'workflow_run' -- Filter out workflow_run-triggered jobs, which have nothing to do with the SHA
AND workflow.event != 'repository_dispatch' -- Filter out repository_dispatch-triggered jobs, which have nothing to do with the SHA
AND workflow.head_sha = {sha : String}
AND job.head_sha = {sha : String}
AND workflow.repository.'full_name' = {repo : String} -- UNION
-- -- Handle CircleCI
-- -- IMPORTANT: this needs to have the same order AS the query above
-- SELECT
-- job._event_time AS time,
-- job.pipeline.vcs.revision AS sha,
-- -- Swap workflow and job name for consistency with GHA naming style.
-- job.workflow.name AS job_name,
-- job.job.name AS workflow_name,
-- job.job.number AS id,
-- null AS workflow_id,
-- null AS github_artifact_id,
-- CASE
-- WHEN job.job.status = 'failed' THEN 'failure'
-- WHEN job.job.status = 'canceled' THEN 'cancelled'
-- ELSE job.job.status
-- END AS conclusion,
-- -- cirleci doesn't provide a url, piece one together out of the info we have
-- CONCAT(
-- 'https://app.circleci.com/pipelines/github/',
-- : repo,
-- '/',
-- CAST(job.pipeline.number AS string),
-- '/workflows/',
-- job.workflow.id,
-- '/jobs/',
-- CAST(job.job.number AS string)
-- ) AS html_url,
-- -- logs aren't downloaded currently, just reuse html_url
-- html_url AS log_url,
-- null AS queue_time_s,
-- -- for circle ci, the event time comes after the end time, so its not reliable for queueing
-- DATE_DIFF(
-- 'SECOND',
-- PARSE_TIMESTAMP_ISO8601(job.job.started_at),
-- PARSE_TIMESTAMP_ISO8601(job.job.stopped_at)
-- ) AS duration_s,
-- -- Classifications not yet supported
-- null,
-- null,
-- null,
-- null,
-- -- Don't care about runner name from CircleCI
-- null AS runner_name,
-- null AS authorEmail,
-- FROM
-- circleci.job job
-- WHERE
-- job.pipeline.vcs.revision =: sha
-- AND CONCAT(job.organization.name, '/', job.project.name) =: repo
UNION ALL
SELECT
workflow.created_at AS time,
workflow.head_sha AS sha,
workflow.name AS job_name,
'Workflow Startup Failure' AS workflow_name,
workflow.id,
0 AS workflow_id,
workflow.artifacts_url AS github_artifact_url,
if(
workflow.conclusion = ''
and workflow.status = 'queued',
'failure',
workflow.conclusion
) as conclusion,
workflow.html_url,
'' AS log_url,
DATE_DIFF(
'SECOND', workflow.created_at, workflow.run_started_at
) AS queue_time_s,
0 AS duration_s,
'' as line,
[] as captures,
0 as line_num,
[] as context,
'' AS runner_name,
workflow.head_commit.author.email AS authorEmail
FROM
workflow_run workflow
WHERE
workflow.event != 'workflow_run' -- Filter out workflow_run-triggered jobs, which have nothing to do with the SHA
AND workflow.event != 'repository_dispatch' -- Filter out repository_dispatch-triggered jobs, which have nothing to do with the SHA
AND workflow.head_sha = {sha : String}
AND workflow.repository.full_name = {repo : String}
SELECT
job.started_at AS time,
workflow.head_sha AS sha,
job.name AS job_name,
workflow.name AS workflow_name,
job.id,
workflow.id AS workflow_id,
workflow.artifacts_url AS github_artifact_url,
job.conclusion,
job.html_url,
IF(
{repo: String } = 'pytorch/pytorch',
CONCAT(
'https://ossci-raw-job-status.s3.amazonaws.com/log/',
job.id:: String
),
CONCAT(
'https://ossci-raw-job-status.s3.amazonaws.com/log/',
{repo: String },
'/',
job.id:: String
)
) AS log_url,
if(
job.started_at = 0,
0,
DATE_DIFF('SECOND', job.created_at, job.started_at)
) AS queue_time_s,
if(
job.completed_at = 0,
0,
DATE_DIFF('SECOND', job.started_at, job.completed_at)
) AS duration_s,
job.torchci_classification.line as line,
job.torchci_classification.captures as captures,
job.torchci_classification.line_num as line_num,
job.torchci_classification.context as context,
job.runner_name AS runner_name,
workflow.head_commit. 'author'.'email' AS authorEmail
FROM
workflow_job job final
INNER JOIN workflow_run workflow final ON workflow.id = job.run_id
WHERE
job.name != 'ciflow_should_run'
AND job.name != 'generate-test-matrix'
AND workflow.event != 'workflow_run' -- Filter out workflow_run-triggered jobs, which have nothing to do with the SHA
AND workflow.event != 'repository_dispatch' -- Filter out repository_dispatch-triggered jobs, which have nothing to do with the SHA
AND workflow.head_sha = {sha: String }
AND job.head_sha = {sha: String }
AND workflow.repository. 'full_name' = {repo: String } -- UNION
UNION ALL
SELECT
workflow.created_at AS time,
workflow.head_sha AS sha,
workflow.name AS job_name,
'Workflow Startup Failure' AS workflow_name,
workflow.id,
0 AS workflow_id,
workflow.artifacts_url AS github_artifact_url,
if(
workflow.conclusion = ''
and workflow.status = 'queued',
'failure',
workflow.conclusion
) as conclusion,
workflow.html_url,
'' AS log_url,
DATE_DIFF(
'SECOND',
workflow.created_at,
workflow.run_started_at
) AS queue_time_s,
0 AS duration_s,
'' as line,
[ ] as captures,
0 as line_num,
[ ] as context,
'' AS runner_name,
workflow.head_commit.author.email AS authorEmail
FROM
workflow_run workflow final
WHERE
workflow.event != 'workflow_run' -- Filter out workflow_run-triggered jobs, which have nothing to do with the SHA
AND workflow.event != 'repository_dispatch' -- Filter out repository_dispatch-triggered jobs, which have nothing to do with the SHA
AND workflow.head_sha = {sha: String }
AND workflow.repository.full_name = {repo: String }
)
SELECT
sha,
workflow_name AS workflowName,
job_name AS jobName,
CONCAT(workflow_name, ' / ', job_name) AS name,
id AS id,
workflow_id AS workflowId,
github_artifact_url AS githubArtifactUrl,
if(
conclusion = '', 'pending', conclusion
),
html_url AS htmlUrl,
log_url AS logUrl,
duration_s AS durationS,
queue_time_s AS queueTimeS,
line AS failureLines,
line_num AS failureLineNumbers,
captures AS failureCaptures,
context AS failureContext,
runner_name AS runnerName,
authorEmail,
time,
sha,
workflow_name AS workflowName,
job_name AS jobName,
CONCAT(workflow_name, ' / ', job_name) AS name,
id AS id,
workflow_id AS workflowId,
github_artifact_url AS githubArtifactUrl,
if(conclusion = '', 'pending', conclusion) as conclusion,
html_url AS htmlUrl,
log_url AS logUrl,
duration_s AS durationS,
queue_time_s AS queueTimeS,
-- Convert to arrays
if(line = '', [ ], [ line ]) AS failureLines,
if(line_num = 0, [ ], [ line_num ]) AS failureLineNumbers,
captures AS failureCaptures,
context AS failureContext,
runner_name AS runnerName,
authorEmail,
time,
FROM
job
job
ORDER BY
name,
time DESC
name,
time DESC
2 changes: 1 addition & 1 deletion torchci/components/JobLinks.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ export default function JobLinks({
);
}

if (job.failureCaptures != null) {
if (job.failureCaptures != null && job.failureLines?.length != 0) {
subInfo.push(
<a
target="_blank"
Expand Down
6 changes: 6 additions & 0 deletions torchci/components/LogViewer.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,12 @@ function LogWithLineSelector({
});
// undefined means that no line is selected, so the log viewer is closed
const [currentLine, setCurrentLine] = useState<number | undefined>(undefined);
// TODO: Remove this. This is a hack to make sure that that the log viewer
// will always show up. It gets around some differences in output between
// rockset and clickhouse
if (lineNumbers.length === 0) {
lineNumbers = [0];
}
return (
<>
{lineNumbers.map((line, index) => (
Expand Down
4 changes: 2 additions & 2 deletions torchci/lib/clickhouse.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ export function getClickhouseClient() {
export async function queryClickhouse(
query: string,
params: Record<string, unknown>
) {
): Promise<any[]> {
/**
* queryClickhouse
* @param query: string, the sql query
Expand All @@ -25,7 +25,7 @@ export async function queryClickhouse(
query_params: params,
});

return await res.json();
return (await res.json()) as any[];
}

export async function queryClickhouseSaved(
Expand Down
19 changes: 15 additions & 4 deletions torchci/lib/fetchCommit.ts
Original file line number Diff line number Diff line change
@@ -1,18 +1,26 @@
import _ from "lodash";
import { Octokit } from "octokit";
import rocksetVersions from "rockset/prodVersions.json";
import { queryClickhouseSaved } from "./clickhouse";
import { enableClickhouse, queryClickhouseSaved } from "./clickhouse";
import { commitDataFromResponse, getOctokit } from "./github";
import { removeCancelledJobAfterRetry } from "./jobUtils";
import getRocksetClient from "./rockset";
import { CommitData, JobData } from "./types";

async function fetchDatabaseInfo(owner: string, repo: string, sha: string) {
if (false) {
const response = queryClickhouseSaved("commit_jobs_query", {
if (enableClickhouse()) {
const response = await queryClickhouseSaved("commit_jobs_query", {
repo: `${owner}/${repo}`,
sha: sha,
});

for (const row of response) {
row.id = row.id == 0 ? null : row.id;
row.workflowId = row.workflowId == 0 ? null : row.workflowId;
row.durationS = parseInt(row.durationS);
row.queueTimeS = parseInt(row.queueTimeS);
row.time = row.time + "Z";
}
return response;
} else {
const rocksetClient = getRocksetClient();
Expand Down Expand Up @@ -66,7 +74,10 @@ export default async function fetchCommit(
// and then merging them back together
const [workflows, onlyJobs] = _.partition(
jobs,
(job) => job.workflowId === null || job.workflowId === undefined
(job) =>
job.workflowId === null ||
job.workflowId === undefined ||
job.workflowId === 0
);

const filteredJobs = removeCancelledJobAfterRetry<JobData>(onlyJobs);
Expand Down

0 comments on commit 164342e

Please sign in to comment.