Skip to content

Commit 3e516f7

Browse files
authored
[CI] Track whether a commit is a revert (#601)
This changes adds additional fields for tracking whether a commit to llvm-project is a revert and what commit/pull request it is reverting. Such commits are detecting by pattern matching against the commit message.
1 parent e49a696 commit 3e516f7

File tree

2 files changed

+64
-19
lines changed

2 files changed

+64
-19
lines changed

premerge/bigquery_schema/llvm_commits_table_schema.json

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,24 @@
4747
"mode": "REPEATED",
4848
"description": "List of GitHub users who reviewed the pull request for this commit"
4949
},
50+
{
51+
"name": "is_revert",
52+
"type": "BOOLEAN",
53+
"mode": "NULLABLE",
54+
"description": "Whether or not this commit is a revert"
55+
},
56+
{
57+
"name": "pull_request_reverted",
58+
"type": "INTEGER",
59+
"mode": "NULLABLE",
60+
"description": "Pull request matched in revert message. Not reliable for determining if a PR was reverted, `commit_reverted` may contain a commit belonging to a PR"
61+
},
62+
{
63+
"name": "commit_reverted",
64+
"type": "STRING",
65+
"mode": "NULLABLE",
66+
"description": "Commit sha matched in revert message. Not reliable for determining if a commit was reverted, `pull_request_reverted` may contain a PR contributing a commit"
67+
},
5068
{
5169
"name": "diff",
5270
"type": "RECORD",

premerge/ops-container/process_llvm_commits.py

Lines changed: 46 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import logging
44
import math
55
import os
6+
import re
67
import git
78
from google.cloud import bigquery
89
import requests
@@ -64,6 +65,9 @@ class LLVMCommitInfo:
6465
is_reviewed: bool = False
6566
is_approved: bool = False
6667
reviewers: set[str] = dataclasses.field(default_factory=set)
68+
is_revert: bool = False
69+
pull_request_reverted: int | None = None
70+
commit_reverted: str | None = None
6771

6872

6973
def scrape_new_commits_by_date(
@@ -113,26 +117,49 @@ def query_for_reviews(
113117
List of LLVMCommitInfo objects for each commit's review information.
114118
"""
115119
# Create a map of commit sha to info
116-
new_commits = {
117-
commit.hexsha: LLVMCommitInfo(
118-
commit_sha=commit.hexsha,
119-
commit_timestamp_seconds=commit.committed_date,
120-
diff=[
121-
{
122-
"file": file,
123-
"additions": line_stats["insertions"],
124-
"deletions": line_stats["deletions"],
125-
"total": line_stats["lines"],
126-
}
127-
for file, line_stats in commit.stats.files.items()
128-
],
129-
)
130-
for commit in new_commits
131-
}
120+
new_commits_info = {}
121+
for commit in new_commits:
122+
# Check if this commit is a revert
123+
is_revert = (
124+
re.match(
125+
r"^Revert \".*\"( \(#\d+\))?", commit.message, flags=re.IGNORECASE
126+
)
127+
is not None
128+
)
129+
130+
# Check which pull request or commit is being reverted (if any)
131+
pull_request_match = re.search(
132+
r"Reverts? (?:llvm\/llvm-project)?#(\d+)", commit.message, flags=re.IGNORECASE
133+
)
134+
commit_match = re.search(
135+
r"This reverts commit (\w+)", commit.message, flags=re.IGNORECASE
136+
)
137+
pull_request_reverted = (
138+
int(pull_request_match.group(1)) if pull_request_match else None
139+
)
140+
commit_reverted = commit_match.group(1) if commit_match else None
141+
142+
# Add entry
143+
new_commits_info[commit.hexsha] = LLVMCommitInfo(
144+
commit_sha=commit.hexsha,
145+
commit_timestamp_seconds=commit.committed_date,
146+
diff=[
147+
{
148+
"file": file,
149+
"additions": line_stats["insertions"],
150+
"deletions": line_stats["deletions"],
151+
"total": line_stats["lines"],
152+
}
153+
for file, line_stats in commit.stats.files.items()
154+
],
155+
is_revert=is_revert,
156+
pull_request_reverted=pull_request_reverted,
157+
commit_reverted=commit_reverted,
158+
)
132159

133160
# Create GraphQL subqueries for each commit
134161
commit_subqueries = []
135-
for commit_sha in new_commits:
162+
for commit_sha in new_commits_info:
136163
commit_subqueries.append(
137164
COMMIT_GRAPHQL_SUBQUERY_TEMPLATE.format(commit_sha=commit_sha)
138165
)
@@ -180,7 +207,7 @@ def query_for_reviews(
180207
# Amend commit information with GitHub data
181208
for commit_sha, data in api_commit_data.items():
182209
commit_sha = commit_sha.removeprefix("commit_")
183-
commit_info = new_commits[commit_sha]
210+
commit_info = new_commits_info[commit_sha]
184211
commit_info.commit_author = data["author"]["user"]["login"]
185212

186213
# If commit has no pull requests, skip it. No data to update.
@@ -201,7 +228,7 @@ def query_for_reviews(
201228
# against what we want to measure, so remove them from the set of reviewers.
202229
commit_info.reviewers.discard(commit_info.commit_author)
203230

204-
return list(new_commits.values())
231+
return list(new_commits_info.values())
205232

206233

207234
def upload_daily_metrics_to_bigquery(

0 commit comments

Comments
 (0)