From 7089ca4deaf3a16e8e0f3a6622eb0f99869e49bb Mon Sep 17 00:00:00 2001 From: Rasmus Karlsson Date: Sat, 20 Aug 2022 16:26:32 +0200 Subject: [PATCH 01/10] Add .gitignore file --- .gitignore | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..aa74832 --- /dev/null +++ b/.gitignore @@ -0,0 +1,9 @@ +__pycache__/ +venv/ + +# Generated by review action +clang-tidy-review-output.json +clang-tidy-review-metadata.json + +# Generated by clang-tidy +clang_tidy_review.yaml From bc228f35d090bcd5462964dce92eb6526ab6e0fd Mon Sep 17 00:00:00 2001 From: Rasmus Karlsson Date: Sat, 20 Aug 2022 16:26:42 +0200 Subject: [PATCH 02/10] Add .dockerignore file --- .dockerignore | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 .dockerignore diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..9da3019 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,3 @@ +**/venv +**/__pycache__ +.git From 8097d6d5cb9f88da11bbb76ae41f954267cca22a Mon Sep 17 00:00:00 2001 From: Rasmus Karlsson Date: Sun, 21 Aug 2022 12:14:35 +0200 Subject: [PATCH 03/10] Move all shared logic from review.py into the clang_tidy_review library --- post/__init__.py | 0 post/clang_tidy_review/__init__.py | 634 +++++++++++++++++++++++++++++ review.py | 621 +--------------------------- 3 files changed, 645 insertions(+), 610 deletions(-) create mode 100644 post/__init__.py create mode 100644 post/clang_tidy_review/__init__.py diff --git a/post/__init__.py b/post/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/post/clang_tidy_review/__init__.py b/post/clang_tidy_review/__init__.py new file mode 100644 index 0000000..77c0a7e --- /dev/null +++ b/post/clang_tidy_review/__init__.py @@ -0,0 +1,634 @@ +# clang-tidy review +# Copyright (c) 2020 Peter Hill +# SPDX-License-Identifier: MIT +# See LICENSE for more information + +import argparse +import itertools +import json +import os +from operator import itemgetter +import pprint +import pathlib +import requests +import subprocess +import textwrap +import unidiff +import yaml +import contextlib +import datetime +import subprocess +from github import Github +from github.Requester import Requester +from github.PaginatedList import PaginatedList +from typing import Any, List, Optional + +DIFF_HEADER_LINE_LENGTH = 5 +FIXES_FILE = "clang_tidy_review.yaml" + + +class PullRequest: + """Add some convenience functions not in PyGithub""" + + def __init__(self, repo: str, pr_number: int, token: str) -> None: + self.repo = repo + self.pr_number = pr_number + self.token = token + + github = Github(token) + repo_object = github.get_repo(f"{repo}") + self._pull_request = repo_object.get_pull(pr_number) + + def headers(self, media_type: str): + return { + "Accept": f"application/vnd.github.{media_type}", + "Authorization": f"token {self.token}", + } + + @property + def base_url(self): + return f"https://api.github.com/repos/{self.repo}/pulls/{self.pr_number}" + + def get(self, media_type: str, extra: str = "") -> str: + url = f"{self.base_url}{extra}" + response = requests.get(url, headers=self.headers(media_type)) + response.raise_for_status() + return response.text + + def get_pr_diff(self) -> List[unidiff.PatchSet]: + """Download the PR diff, return a list of PatchedFile""" + + diffs = self.get("v3.diff") + + # PatchSet is the easiest way to construct what we want, but the + # diff_line_no property on lines is counted from the top of the + # whole PatchSet, whereas GitHub is expecting the "position" + # property to be line count within each file's diff. So we need to + # do this little bit of faff to get a list of file-diffs with + # their own diff_line_no range + diff = [unidiff.PatchSet(str(file))[0] for file in unidiff.PatchSet(diffs)] + return diff + + def get_pr_comments(self): + """Download the PR review comments using the comfort-fade preview headers""" + + def get_element( + requester: Requester, headers: dict, element: dict, completed: bool + ): + return element + + return PaginatedList( + get_element, + self._pull_request._requester, + f"{self.base_url}/comments", + None, + ) + + def post_lgtm_comment(self, body: str): + """Post a "LGTM" comment if everything's clean, making sure not to spam""" + + if not body: + return + + comments = self.get_pr_comments() + + for comment in comments: + if comment["body"] == body: + print("Already posted, no need to update") + return + + self._pull_request.create_issue_comment(body) + + def post_review(self, review): + """Submit a completed review""" + headers = { + "Accept": "application/vnd.github.comfort-fade-preview+json", + "Authorization": f"token {self.token}", + } + url = f"{self.base_url}/reviews" + + post_review_response = requests.post(url, json=review, headers=headers) + print(post_review_response.text) + post_review_response.raise_for_status() + + +@contextlib.contextmanager +def message_group(title: str): + print(f"::group::{title}", flush=True) + try: + yield + finally: + print("::endgroup::", flush=True) + + +def make_file_line_lookup(diff): + """Get a lookup table for each file in diff, to convert between source + line number to line number in the diff + + """ + lookup = {} + for file in diff: + filename = file.target_file[2:] + lookup[filename] = {} + for hunk in file: + for line in hunk: + if line.diff_line_no is None: + continue + if not line.is_removed: + lookup[filename][line.target_line_no] = ( + line.diff_line_no - DIFF_HEADER_LINE_LENGTH + ) + return lookup + + +def make_file_offset_lookup(filenames): + """Create a lookup table to convert between character offset and line + number for the list of files in `filenames`. + + This is a dict of the cumulative sum of the line lengths for each file. + + """ + lookup = {} + + for filename in filenames: + with open(filename, "r") as file: + lines = file.readlines() + # Length of each line + line_lengths = map(len, lines) + # Cumulative sum of line lengths => offset at end of each line + lookup[os.path.abspath(filename)] = [0] + list( + itertools.accumulate(line_lengths) + ) + + return lookup + + +def get_diagnostic_file_path(clang_tidy_diagnostic, build_dir): + + # Sometimes, clang-tidy gives us an absolute path, so everything is fine. + # Sometimes however it gives us a relative path that is realtive to the + # build directory, so we prepend that. + + # Modern clang-tidy + if ("DiagnosticMessage" in clang_tidy_diagnostic) and ( + "FilePath" in clang_tidy_diagnostic["DiagnosticMessage"] + ): + file_path = clang_tidy_diagnostic["DiagnosticMessage"]["FilePath"] + if file_path == "": + return "" + elif os.path.isabs(file_path): + return os.path.normpath(os.path.abspath(file_path)) + else: + # Make the relative path absolute with the build dir + if "BuildDirectory" in clang_tidy_diagnostic: + return os.path.normpath( + os.path.abspath( + os.path.join(clang_tidy_diagnostic["BuildDirectory"], file_path) + ) + ) + else: + return os.path.normpath(os.path.abspath(file_path)) + + # Pre-clang-tidy-9 format + elif "FilePath" in clang_tidy_diagnostic: + file_path = clang_tidy_diagnostic["FilePath"] + if file_path == "": + return "" + else: + return os.path.normpath(os.path.abspath(os.path.join(build_dir, file_path))) + + else: + return "" + + +def find_line_number_from_offset(offset_lookup, filename, offset): + """Work out which line number `offset` corresponds to using `offset_lookup`. + + The line number (0-indexed) is the index of the first line offset + which is larger than `offset`. + + """ + name = str(pathlib.Path(filename).resolve().absolute()) + + if name not in offset_lookup: + # Let's make sure we've the file offsets for this other file + offset_lookup.update(make_file_offset_lookup([name])) + + for line_num, line_offset in enumerate(offset_lookup[name]): + if line_offset > offset: + return line_num - 1 + return -1 + + +def read_one_line(filename, line_offset): + """Read a single line from a source file""" + # Could cache the files instead of opening them each time? + with open(filename, "r") as file: + file.seek(line_offset) + return file.readline().rstrip("\n") + + +def collate_replacement_sets(diagnostic, offset_lookup): + """Return a dict of replacements on the same or consecutive lines, indexed by line number + + We need this as we have to apply all the replacements on one line at the same time + + This could break if there are replacements in with the same line + number but in different files. + + """ + + # First, make sure each replacement contains "LineNumber", and + # "EndLineNumber" in case it spans multiple lines + for replacement in diagnostic["Replacements"]: + # It's possible the replacement is needed in another file? + # Not really sure how that could come about, but let's + # cover our behinds in case it does happen: + if replacement["FilePath"] not in offset_lookup: + # Let's make sure we've the file offsets for this other file + offset_lookup.update(make_file_offset_lookup([replacement["FilePath"]])) + + replacement["LineNumber"] = find_line_number_from_offset( + offset_lookup, replacement["FilePath"], replacement["Offset"] + ) + replacement["EndLineNumber"] = find_line_number_from_offset( + offset_lookup, + replacement["FilePath"], + replacement["Offset"] + replacement["Length"], + ) + + # Now we can group them into consecutive lines + groups = [] + for index, replacement in enumerate(diagnostic["Replacements"]): + if index == 0: + # First one starts a new group, always + groups.append([replacement]) + elif ( + replacement["LineNumber"] == groups[-1][-1]["LineNumber"] + or replacement["LineNumber"] - 1 == groups[-1][-1]["LineNumber"] + ): + # Same or adjacent line to the last line in the last group + # goes in the same group + groups[-1].append(replacement) + else: + # Otherwise, start a new group + groups.append([replacement]) + + # Turn the list into a dict + return {g[0]["LineNumber"]: g for g in groups} + + +def replace_one_line(replacement_set, line_num, offset_lookup): + """Apply all the replacements in replacement_set at the same time""" + + filename = replacement_set[0]["FilePath"] + # File offset at the start of the first line + line_offset = offset_lookup[filename][line_num] + + # List of (start, end) offsets from line_offset + insert_offsets = [(0, 0)] + # Read all the source lines into a dict so we only get one copy of + # each line, though we might read the same line in multiple times + source_lines = {} + for replacement in replacement_set: + start = replacement["Offset"] - line_offset + end = start + replacement["Length"] + insert_offsets.append((start, end)) + + # Make sure to read any extra lines we need too + for replacement_line_num in range( + replacement["LineNumber"], replacement["EndLineNumber"] + 1 + ): + replacement_line_offset = offset_lookup[filename][replacement_line_num] + source_lines[replacement_line_num] = ( + read_one_line(filename, replacement_line_offset) + "\n" + ) + + # Replacements might cross multiple lines, so squash them all together + source_line = "".join(source_lines.values()).rstrip("\n") + + insert_offsets.append((None, None)) + + fragments = [] + for (_, start), (end, _) in zip(insert_offsets[:-1], insert_offsets[1:]): + fragments.append(source_line[start:end]) + + new_line = "" + for fragment, replacement in zip(fragments, replacement_set): + new_line += fragment + replacement["ReplacementText"] + + return source_line, new_line + fragments[-1] + + +def format_ordinary_line(source_line, line_offset): + """Format a single C++ line with a diagnostic indicator""" + + return textwrap.dedent( + f"""\ + ```cpp + {source_line} + {line_offset * " " + "^"} + ``` + """ + ) + + +def format_diff_line(diagnostic, offset_lookup, source_line, line_offset, line_num): + """Format a replacement as a Github suggestion or diff block""" + + end_line = line_num + + # We're going to be appending to this + code_blocks = "" + + replacement_sets = collate_replacement_sets(diagnostic, offset_lookup) + + for replacement_line_num, replacement_set in replacement_sets.items(): + old_line, new_line = replace_one_line( + replacement_set, replacement_line_num, offset_lookup + ) + + print(f"----------\n{old_line=}\n{new_line=}\n----------") + + # If the replacement is for the same line as the + # diagnostic (which is where the comment will be), then + # format the replacement as a suggestion. Otherwise, + # format it as a diff + if replacement_line_num == line_num: + code_blocks += f""" +```suggestion +{new_line} +``` +""" + end_line = replacement_set[-1]["EndLineNumber"] + else: + # Prepend each line in the replacement line with "+ " + # in order to make a nice diff block. The extra + # whitespace is so the multiline dedent-ed block below + # doesn't come out weird. + whitespace = "\n " + new_line = whitespace.join([f"+ {line}" for line in new_line.splitlines()]) + old_line = whitespace.join([f"- {line}" for line in old_line.splitlines()]) + + rel_path = try_relative(replacement_set[0]["FilePath"]) + code_blocks += textwrap.dedent( + f"""\ + + {rel_path}:{replacement_line_num}: + ```diff + {old_line} + {new_line} + ``` + """ + ) + return code_blocks, end_line + + +def try_relative(path): + """Try making `path` relative to current directory, otherwise make it an absolute path""" + try: + here = pathlib.Path.cwd() + return pathlib.Path(path).relative_to(here) + except ValueError: + return pathlib.Path(path).resolve() + + +def format_notes(notes, offset_lookup): + """Format an array of notes into a single string""" + + code_blocks = "" + + for note in notes: + filename = note["FilePath"] + + if filename == "": + return note["Message"] + + resolved_path = str(pathlib.Path(filename).resolve().absolute()) + + line_num = find_line_number_from_offset( + offset_lookup, resolved_path, note["FileOffset"] + ) + line_offset = note["FileOffset"] - offset_lookup[resolved_path][line_num] + source_line = read_one_line( + resolved_path, offset_lookup[resolved_path][line_num] + ) + + path = try_relative(resolved_path) + message = f"**{path}:{line_num}:** {note['Message']}" + code = format_ordinary_line(source_line, line_offset) + code_blocks += f"{message}\n{code}" + + return code_blocks + + +def make_comment_from_diagnostic( + diagnostic_name, diagnostic, filename, offset_lookup, notes +): + """Create a comment from a diagnostic + + Comment contains the diagnostic message, plus its name, along with + code block(s) containing either the exact location of the + diagnostic, or suggested fix(es). + + """ + + line_num = find_line_number_from_offset( + offset_lookup, filename, diagnostic["FileOffset"] + ) + line_offset = diagnostic["FileOffset"] - offset_lookup[filename][line_num] + + source_line = read_one_line(filename, offset_lookup[filename][line_num]) + end_line = line_num + + print( + f"""{diagnostic} + {line_num=}; {line_offset=}; {source_line=} + """ + ) + + if diagnostic["Replacements"]: + code_blocks, end_line = format_diff_line( + diagnostic, offset_lookup, source_line, line_offset, line_num + ) + else: + # No fixit, so just point at the problem + code_blocks = format_ordinary_line(source_line, line_offset) + + code_blocks += format_notes(notes, offset_lookup) + + comment_body = ( + f"warning: {diagnostic['Message']} [{diagnostic_name}]\n{code_blocks}" + ) + + return comment_body, end_line + 1 + + +def make_review(diagnostics, diff_lookup, offset_lookup, build_dir): + """Create a Github review from a set of clang-tidy diagnostics""" + + comments = [] + + for diagnostic in diagnostics: + try: + diagnostic_message = diagnostic["DiagnosticMessage"] + except KeyError: + # Pre-clang-tidy-9 format + diagnostic_message = diagnostic + + if diagnostic_message["FilePath"] == "": + continue + + comment_body, end_line = make_comment_from_diagnostic( + diagnostic["DiagnosticName"], + diagnostic_message, + get_diagnostic_file_path(diagnostic, build_dir), + offset_lookup, + notes=diagnostic.get("Notes", []), + ) + + rel_path = str(try_relative(get_diagnostic_file_path(diagnostic, build_dir))) + # diff lines are 1-indexed + source_line = 1 + find_line_number_from_offset( + offset_lookup, + get_diagnostic_file_path(diagnostic, build_dir), + diagnostic_message["FileOffset"], + ) + + if rel_path not in diff_lookup or end_line not in diff_lookup[rel_path]: + print( + f"WARNING: Skipping comment for file '{rel_path}' not in PR changeset. Comment body is:\n{comment_body}" + ) + continue + + comments.append( + { + "path": rel_path, + "body": comment_body, + "side": "RIGHT", + "line": end_line, + } + ) + # If this is a multiline comment, we need a couple more bits: + if end_line != source_line: + comments[-1].update( + { + "start_side": "RIGHT", + "start_line": source_line, + } + ) + + review = { + "body": "clang-tidy made some suggestions", + "event": "COMMENT", + "comments": comments, + } + return review + + +def get_line_ranges(diff, files): + """Return the line ranges of added lines in diff, suitable for the + line-filter argument of clang-tidy + + """ + + lines_by_file = {} + for filename in diff: + if filename.target_file[2:] not in files: + continue + added_lines = [] + for hunk in filename: + for line in hunk: + if line.is_added: + added_lines.append(line.target_line_no) + + for _, group in itertools.groupby( + enumerate(added_lines), lambda ix: ix[0] - ix[1] + ): + groups = list(map(itemgetter(1), group)) + lines_by_file.setdefault(filename.target_file[2:], []).append( + [groups[0], groups[-1]] + ) + + line_filter_json = [] + for name, lines in lines_by_file.items(): + line_filter_json.append(str({"name": name, "lines": lines})) + return json.dumps(line_filter_json, separators=(",", ":")) + + +def get_clang_tidy_warnings( + line_filter, build_dir, clang_tidy_checks, clang_tidy_binary, config_file, files + ): + """Run clang-tidy on the given files and save output into FIXES_FILE""" + + if config_file != "": + config = f'-config-file="{config_file}"' + else: + config = f"-checks={clang_tidy_checks}" + + print(f"Using config: {config}") + + command = f"{clang_tidy_binary} -p={build_dir} {config} -line-filter={line_filter} {files} --export-fixes={FIXES_FILE}" + + start = datetime.datetime.now() + try: + with message_group(f"Running:\n\t{command}"): + output = subprocess.run( + command, capture_output=True, shell=True, check=True, encoding="utf-8" + ) + except subprocess.CalledProcessError as e: + print( + f"\n\nclang-tidy failed with return code {e.returncode} and error:\n{e.stderr}\nOutput was:\n{e.stdout}" + ) + end = datetime.datetime.now() + + print(f"Took: {end - start}") + + try: + with open(FIXES_FILE, "r") as fixes_file: + return yaml.safe_load(fixes_file) + except FileNotFoundError: + return {} + + +def cull_comments(pull_request: PullRequest, review, max_comments): + """Remove comments from review that have already been posted, and keep + only the first max_comments + + """ + + comments = pull_request.get_pr_comments() + + for comment in comments: + review["comments"] = list( + filter( + lambda review_comment: not ( + review_comment["path"] == comment["path"] + and review_comment["line"] == comment["line"] + and review_comment["body"] == comment["body"] + ), + review["comments"], + ) + ) + + if len(review["comments"]) > max_comments: + review["body"] += ( + "\n\nThere were too many comments to post at once. " + f"Showing the first {max_comments} out of {len(review['comments'])}. " + "Check the log or trigger a new build to see more." + ) + review["comments"] = review["comments"][:max_comments] + + return review + + +def strip_enclosing_quotes(string: str) -> str: + """Strip leading/trailing whitespace and remove any enclosing quotes""" + stripped = string.strip() + + # Need to check double quotes again in case they're nested inside + # single quotes + for quote in ['"', "'", '"']: + if stripped.startswith(quote) and stripped.endswith(quote): + stripped = stripped[1:-1] + return stripped diff --git a/review.py b/review.py index d07288f..318c038 100755 --- a/review.py +++ b/review.py @@ -6,615 +6,28 @@ # See LICENSE for more information import argparse -import contextlib -import datetime -import itertools import fnmatch import json import os -from operator import itemgetter import pathlib import pprint import re -import requests import subprocess -import textwrap -import unidiff -import yaml -from github import Github -from github.Requester import Requester -from github.PaginatedList import PaginatedList -from typing import List -BAD_CHARS_APT_PACKAGES_PATTERN = "[;&|($]" -DIFF_HEADER_LINE_LENGTH = 5 -FIXES_FILE = "clang_tidy_review.yaml" - - -class PullRequest: - """Add some convenience functions not in PyGithub""" - - def __init__(self, repo: str, pr_number: int, token: str): - self.repo = repo - self.pr_number = pr_number - self.token = token - - github = Github(token) - repo_object = github.get_repo(f"{repo}") - self._pull_request = repo_object.get_pull(pr_number) - - def headers(self, media_type: str): - return { - "Accept": f"application/vnd.github.{media_type}", - "Authorization": f"token {self.token}", - } - - @property - def base_url(self): - return f"https://api.github.com/repos/{self.repo}/pulls/{self.pr_number}" - - def get(self, media_type: str, extra: str = "") -> str: - url = f"{self.base_url}{extra}" - response = requests.get(url, headers=self.headers(media_type)) - response.raise_for_status() - return response.text - - def get_pr_diff(self) -> List[unidiff.PatchSet]: - """Download the PR diff, return a list of PatchedFile""" - - diffs = self.get("v3.diff") - - # PatchSet is the easiest way to construct what we want, but the - # diff_line_no property on lines is counted from the top of the - # whole PatchSet, whereas GitHub is expecting the "position" - # property to be line count within each file's diff. So we need to - # do this little bit of faff to get a list of file-diffs with - # their own diff_line_no range - diff = [unidiff.PatchSet(str(file))[0] for file in unidiff.PatchSet(diffs)] - return diff - - def get_pr_comments(self): - """Download the PR review comments using the comfort-fade preview headers""" - - def get_element( - requester: Requester, headers: dict, element: dict, completed: bool - ): - return element - - return PaginatedList( - get_element, - self._pull_request._requester, - f"{self.base_url}/comments", - None, - ) - - def post_lgtm_comment(self, body: str): - """Post a "LGTM" comment if everything's clean, making sure not to spam""" - - if not body: - return - - comments = self.get_pr_comments() - - for comment in comments: - if comment["body"] == body: - print("Already posted, no need to update") - return - - self._pull_request.create_issue_comment(body) - - def post_review(self, review): - """Submit a completed review""" - headers = { - "Accept": "application/vnd.github.comfort-fade-preview+json", - "Authorization": f"token {self.token}", - } - url = f"{self.base_url}/reviews" - - post_review_response = requests.post(url, json=review, headers=headers) - print(post_review_response.text) - post_review_response.raise_for_status() - - -@contextlib.contextmanager -def message_group(title: str): - print(f"::group::{title}", flush=True) - try: - yield - finally: - print("::endgroup::", flush=True) - - -def make_file_line_lookup(diff): - """Get a lookup table for each file in diff, to convert between source - line number to line number in the diff - - """ - lookup = {} - for file in diff: - filename = file.target_file[2:] - lookup[filename] = {} - for hunk in file: - for line in hunk: - if line.diff_line_no is None: - continue - if not line.is_removed: - lookup[filename][line.target_line_no] = ( - line.diff_line_no - DIFF_HEADER_LINE_LENGTH - ) - return lookup - - -def make_file_offset_lookup(filenames): - """Create a lookup table to convert between character offset and line - number for the list of files in `filenames`. - - This is a dict of the cumulative sum of the line lengths for each file. - - """ - lookup = {} - - for filename in filenames: - with open(filename, "r") as file: - lines = file.readlines() - # Length of each line - line_lengths = map(len, lines) - # Cumulative sum of line lengths => offset at end of each line - lookup[os.path.abspath(filename)] = [0] + list( - itertools.accumulate(line_lengths) - ) - - return lookup - -def get_diagnostic_file_path(clang_tidy_diagnostic, build_dir): - - # Sometimes, clang-tidy gives us an absolute path, so everything is fine. - # Sometimes however it gives us a relative path that is realtive to the - # build directory, so we prepend that. - - # Modern clang-tidy - if ("DiagnosticMessage" in clang_tidy_diagnostic) and ("FilePath" in clang_tidy_diagnostic["DiagnosticMessage"]): - file_path = clang_tidy_diagnostic["DiagnosticMessage"]["FilePath"] - if file_path == "": - return "" - elif os.path.isabs(file_path): - return os.path.normpath(os.path.abspath(file_path)) - else: - # Make the relative path absolute with the build dir - if "BuildDirectory" in clang_tidy_diagnostic: - return os.path.normpath(os.path.abspath(os.path.join(clang_tidy_diagnostic["BuildDirectory"], file_path))) - else: - return os.path.normpath(os.path.abspath(file_path)) - - # Pre-clang-tidy-9 format - elif "FilePath" in clang_tidy_diagnostic: - file_path = clang_tidy_diagnostic["FilePath"] - if file_path == "": - return "" - else: - return os.path.normpath(os.path.abspath(os.path.join(build_dir, file_path))) - - else: - return "" - - -def find_line_number_from_offset(offset_lookup, filename, offset): - """Work out which line number `offset` corresponds to using `offset_lookup`. - - The line number (0-indexed) is the index of the first line offset - which is larger than `offset`. - - """ - name = str(pathlib.Path(filename).resolve().absolute()) - - if name not in offset_lookup: - # Let's make sure we've the file offsets for this other file - offset_lookup.update(make_file_offset_lookup([name])) - - for line_num, line_offset in enumerate(offset_lookup[name]): - if line_offset > offset: - return line_num - 1 - return -1 - - -def read_one_line(filename, line_offset): - """Read a single line from a source file""" - # Could cache the files instead of opening them each time? - with open(filename, "r") as file: - file.seek(line_offset) - return file.readline().rstrip("\n") - - -def collate_replacement_sets(diagnostic, offset_lookup): - """Return a dict of replacements on the same or consecutive lines, indexed by line number - - We need this as we have to apply all the replacements on one line at the same time - - This could break if there are replacements in with the same line - number but in different files. - - """ - - # First, make sure each replacement contains "LineNumber", and - # "EndLineNumber" in case it spans multiple lines - for replacement in diagnostic["Replacements"]: - # It's possible the replacement is needed in another file? - # Not really sure how that could come about, but let's - # cover our behinds in case it does happen: - if replacement["FilePath"] not in offset_lookup: - # Let's make sure we've the file offsets for this other file - offset_lookup.update(make_file_offset_lookup([replacement["FilePath"]])) - - replacement["LineNumber"] = find_line_number_from_offset( - offset_lookup, replacement["FilePath"], replacement["Offset"] - ) - replacement["EndLineNumber"] = find_line_number_from_offset( - offset_lookup, - replacement["FilePath"], - replacement["Offset"] + replacement["Length"], - ) - - # Now we can group them into consecutive lines - groups = [] - for index, replacement in enumerate(diagnostic["Replacements"]): - if index == 0: - # First one starts a new group, always - groups.append([replacement]) - elif ( - replacement["LineNumber"] == groups[-1][-1]["LineNumber"] - or replacement["LineNumber"] - 1 == groups[-1][-1]["LineNumber"] - ): - # Same or adjacent line to the last line in the last group - # goes in the same group - groups[-1].append(replacement) - else: - # Otherwise, start a new group - groups.append([replacement]) - - # Turn the list into a dict - return {g[0]["LineNumber"]: g for g in groups} - - -def replace_one_line(replacement_set, line_num, offset_lookup): - """Apply all the replacements in replacement_set at the same time""" - - filename = replacement_set[0]["FilePath"] - # File offset at the start of the first line - line_offset = offset_lookup[filename][line_num] - - # List of (start, end) offsets from line_offset - insert_offsets = [(0, 0)] - # Read all the source lines into a dict so we only get one copy of - # each line, though we might read the same line in multiple times - source_lines = {} - for replacement in replacement_set: - start = replacement["Offset"] - line_offset - end = start + replacement["Length"] - insert_offsets.append((start, end)) - - # Make sure to read any extra lines we need too - for replacement_line_num in range( - replacement["LineNumber"], replacement["EndLineNumber"] + 1 - ): - replacement_line_offset = offset_lookup[filename][replacement_line_num] - source_lines[replacement_line_num] = ( - read_one_line(filename, replacement_line_offset) + "\n" - ) - - # Replacements might cross multiple lines, so squash them all together - source_line = "".join(source_lines.values()).rstrip("\n") - - insert_offsets.append((None, None)) - - fragments = [] - for (_, start), (end, _) in zip(insert_offsets[:-1], insert_offsets[1:]): - fragments.append(source_line[start:end]) - - new_line = "" - for fragment, replacement in zip(fragments, replacement_set): - new_line += fragment + replacement["ReplacementText"] - - return source_line, new_line + fragments[-1] - - -def format_ordinary_line(source_line, line_offset): - """Format a single C++ line with a diagnostic indicator""" - - return textwrap.dedent( - f"""\ - ```cpp - {source_line} - {line_offset * " " + "^"} - ``` - """ - ) - - -def format_diff_line(diagnostic, offset_lookup, source_line, line_offset, line_num): - """Format a replacement as a Github suggestion or diff block""" - - end_line = line_num - - # We're going to be appending to this - code_blocks = "" - - replacement_sets = collate_replacement_sets(diagnostic, offset_lookup) - - for replacement_line_num, replacement_set in replacement_sets.items(): - old_line, new_line = replace_one_line( - replacement_set, replacement_line_num, offset_lookup - ) - - print(f"----------\n{old_line=}\n{new_line=}\n----------") - - # If the replacement is for the same line as the - # diagnostic (which is where the comment will be), then - # format the replacement as a suggestion. Otherwise, - # format it as a diff - if replacement_line_num == line_num: - code_blocks += f""" -```suggestion -{new_line} -``` -""" - end_line = replacement_set[-1]["EndLineNumber"] - else: - # Prepend each line in the replacement line with "+ " - # in order to make a nice diff block. The extra - # whitespace is so the multiline dedent-ed block below - # doesn't come out weird. - whitespace = "\n " - new_line = whitespace.join([f"+ {line}" for line in new_line.splitlines()]) - old_line = whitespace.join([f"- {line}" for line in old_line.splitlines()]) - - rel_path = try_relative(replacement_set[0]["FilePath"]) - code_blocks += textwrap.dedent( - f"""\ - - {rel_path}:{replacement_line_num}: - ```diff - {old_line} - {new_line} - ``` - """ - ) - return code_blocks, end_line - - -def try_relative(path): - """Try making `path` relative to current directory, otherwise make it an absolute path""" - try: - here = pathlib.Path.cwd() - return pathlib.Path(path).relative_to(here) - except ValueError: - return pathlib.Path(path).resolve() - - -def format_notes(notes, offset_lookup): - """Format an array of notes into a single string""" - - code_blocks = "" - - for note in notes: - filename = note["FilePath"] - - if filename == "": - return note["Message"] - - resolved_path = str(pathlib.Path(filename).resolve().absolute()) - - line_num = find_line_number_from_offset( - offset_lookup, resolved_path, note["FileOffset"] - ) - line_offset = note["FileOffset"] - offset_lookup[resolved_path][line_num] - source_line = read_one_line( - resolved_path, offset_lookup[resolved_path][line_num] +from post.clang_tidy_review import ( + PullRequest, + get_line_ranges, + get_clang_tidy_warnings, + make_file_line_lookup, + make_file_offset_lookup, + message_group, + make_review, + cull_comments, + strip_enclosing_quotes, ) - path = try_relative(resolved_path) - message = f"**{path}:{line_num}:** {note['Message']}" - code = format_ordinary_line(source_line, line_offset) - code_blocks += f"{message}\n{code}" - return code_blocks - - -def make_comment_from_diagnostic(diagnostic_name, diagnostic, filename ,offset_lookup, notes): - """Create a comment from a diagnostic - - Comment contains the diagnostic message, plus its name, along with - code block(s) containing either the exact location of the - diagnostic, or suggested fix(es). - - """ - - line_num = find_line_number_from_offset( - offset_lookup, filename, diagnostic["FileOffset"] - ) - line_offset = diagnostic["FileOffset"] - offset_lookup[filename][line_num] - - source_line = read_one_line(filename, offset_lookup[filename][line_num]) - end_line = line_num - - print( - f"""{diagnostic} - {line_num=}; {line_offset=}; {source_line=} - """ - ) - - if diagnostic["Replacements"]: - code_blocks, end_line = format_diff_line( - diagnostic, offset_lookup, source_line, line_offset, line_num - ) - else: - # No fixit, so just point at the problem - code_blocks = format_ordinary_line(source_line, line_offset) - - code_blocks += format_notes(notes, offset_lookup) - - comment_body = ( - f"warning: {diagnostic['Message']} [{diagnostic_name}]\n{code_blocks}" - ) - - return comment_body, end_line + 1 - - -def make_review(diagnostics, diff_lookup, offset_lookup, build_dir): - """Create a Github review from a set of clang-tidy diagnostics""" - - comments = [] - - for diagnostic in diagnostics: - try: - diagnostic_message = diagnostic["DiagnosticMessage"] - except KeyError: - # Pre-clang-tidy-9 format - diagnostic_message = diagnostic - - if diagnostic_message["FilePath"] == "": - continue - - comment_body, end_line = make_comment_from_diagnostic( - diagnostic["DiagnosticName"], - diagnostic_message, - get_diagnostic_file_path(diagnostic, build_dir), - offset_lookup, - notes=diagnostic.get("Notes", []), - ) - - rel_path = str(try_relative(get_diagnostic_file_path(diagnostic, build_dir))) - # diff lines are 1-indexed - source_line = 1 + find_line_number_from_offset( - offset_lookup, - get_diagnostic_file_path(diagnostic, build_dir), - diagnostic_message["FileOffset"], - ) - - if rel_path not in diff_lookup or end_line not in diff_lookup[rel_path]: - print( - f"WARNING: Skipping comment for file '{rel_path}' not in PR changeset. Comment body is:\n{comment_body}" - ) - continue - - comments.append( - { - "path": rel_path, - "body": comment_body, - "side": "RIGHT", - "line": end_line, - } - ) - # If this is a multiline comment, we need a couple more bits: - if end_line != source_line: - comments[-1].update( - { - "start_side": "RIGHT", - "start_line": source_line, - } - ) - - review = { - "body": "clang-tidy made some suggestions", - "event": "COMMENT", - "comments": comments, - } - return review - - -def get_line_ranges(diff, files): - """Return the line ranges of added lines in diff, suitable for the - line-filter argument of clang-tidy - - """ - - lines_by_file = {} - for filename in diff: - if filename.target_file[2:] not in files: - continue - added_lines = [] - for hunk in filename: - for line in hunk: - if line.is_added: - added_lines.append(line.target_line_no) - - for _, group in itertools.groupby( - enumerate(added_lines), lambda ix: ix[0] - ix[1] - ): - groups = list(map(itemgetter(1), group)) - lines_by_file.setdefault(filename.target_file[2:], []).append( - [groups[0], groups[-1]] - ) - - line_filter_json = [] - for name, lines in lines_by_file.items(): - line_filter_json.append(str({"name": name, "lines": lines})) - return json.dumps(line_filter_json, separators=(",", ":")) - - -def get_clang_tidy_warnings( - line_filter, build_dir, clang_tidy_checks, clang_tidy_binary, config_file, files -): - """Get the clang-tidy warnings""" - - if config_file != "": - config = f"-config-file=\"{config_file}\"" - else: - config = f"-checks={clang_tidy_checks}" - - print(f"Using config: {config}") - - command = f"{clang_tidy_binary} -p={build_dir} {config} -line-filter={line_filter} {files} --export-fixes={FIXES_FILE}" - - start = datetime.datetime.now() - try: - with message_group(f"Running:\n\t{command}"): - output = subprocess.run( - command, capture_output=True, shell=True, check=True, encoding="utf-8" - ) - except subprocess.CalledProcessError as e: - print( - f"\n\nclang-tidy failed with return code {e.returncode} and error:\n{e.stderr}\nOutput was:\n{e.stdout}" - ) - end = datetime.datetime.now() - - print(f"Took: {end - start}") - - try: - with open(FIXES_FILE, "r") as fixes_file: - return yaml.safe_load(fixes_file) - except FileNotFoundError: - return {} - - -def cull_comments(pull_request: PullRequest, review, max_comments): - """Remove comments from review that have already been posted, and keep - only the first max_comments - - """ - - comments = pull_request.get_pr_comments() - - for comment in comments: - review["comments"] = list( - filter( - lambda review_comment: not ( - review_comment["path"] == comment["path"] - and review_comment["line"] == comment["line"] - and review_comment["body"] == comment["body"] - ), - review["comments"], - ) - ) - - if len(review["comments"]) > max_comments: - review["body"] += ( - "\n\nThere were too many comments to post at once. " - f"Showing the first {max_comments} out of {len(review['comments'])}. " - "Check the log or trigger a new build to see more." - ) - review["comments"] = review["comments"][:max_comments] - - return review +BAD_CHARS_APT_PACKAGES_PATTERN = "[;&|($]" def main( @@ -709,18 +122,6 @@ def main( pull_request.post_review(trimmed_review) -def strip_enclosing_quotes(string: str) -> str: - """Strip leading/trailing whitespace and remove any enclosing quotes""" - stripped = string.strip() - - # Need to check double quotes again in case they're nested inside - # single quotes - for quote in ['"', "'", '"']: - if stripped.startswith(quote) and stripped.endswith(quote): - stripped = stripped[1:-1] - return stripped - - def fix_absolute_paths(build_compile_commands, base_dir): """Update absolute paths in compile_commands.json to new location, if compile_commands.json was created outside the Actions container From 59fea60857e13c2098933676dc3bdde9b566652d Mon Sep 17 00:00:00 2001 From: Rasmus Karlsson Date: Sat, 20 Aug 2022 16:33:04 +0200 Subject: [PATCH 04/10] review Dockerfile: Include post_comments library rm -rf apt cache dir to make container slightly smaller Add workdir to help debugging locally --- Dockerfile | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index b36dfa8..daf6369 100644 --- a/Dockerfile +++ b/Dockerfile @@ -16,8 +16,15 @@ RUN apt update && \ clang-tidy-12 \ python3 python3-pip && \ pip3 install --upgrade pip && \ - pip3 install -r requirements.txt + pip3 install -r requirements.txt && \ + rm -rf /var/lib/apt/lists/ -COPY review.py /review.py +WORKDIR /action -ENTRYPOINT ["/review.py"] +COPY review.py /action/review.py + +# Include the entirety of the post directory for simplicity's sake +# Technically we only need the clang_tidy_review directory but this keeps things consistent for running the command locally during development and in the docker image +COPY post /action/post + +ENTRYPOINT ["/action/review.py"] From 3291259067d71d818ba5ac2ea83a1af08d2b72ba Mon Sep 17 00:00:00 2001 From: Rasmus Karlsson Date: Sun, 21 Aug 2022 13:57:03 +0200 Subject: [PATCH 05/10] Save metadata regarding the review action to a file --- post/clang_tidy_review/__init__.py | 31 +++++++++++++++++++++++++++++- review.py | 2 ++ 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/post/clang_tidy_review/__init__.py b/post/clang_tidy_review/__init__.py index 77c0a7e..59bbab2 100644 --- a/post/clang_tidy_review/__init__.py +++ b/post/clang_tidy_review/__init__.py @@ -21,10 +21,20 @@ from github import Github from github.Requester import Requester from github.PaginatedList import PaginatedList -from typing import Any, List, Optional +from typing import Any, List, Optional, TypedDict DIFF_HEADER_LINE_LENGTH = 5 FIXES_FILE = "clang_tidy_review.yaml" +METADATA_FILE = "clang-tidy-review-metadata.json" + + +class Metadata(TypedDict): + """Loaded from `METADATA_FILE` + Contains information necessary to post a review without pull request knowledge + + """ + + pr_number: int class PullRequest: @@ -526,6 +536,25 @@ def make_review(diagnostics, diff_lookup, offset_lookup, build_dir): return review +def load_metadata() -> Metadata: + """Load metadata from the METADATA_FILE path""" + + with open(METADATA_FILE, "r") as metadata_file: + x = json.load(metadata_file) + print(f"x: {x}") + return x + +def save_metadata(pr_number: int) -> None: + """Save metadata to the METADATA_FILE path""" + + metadata: Metadata = { + "pr_number": pr_number + } + + with open(METADATA_FILE, "w") as metadata_file: + json.dump(metadata, metadata_file) + + def get_line_ranges(diff, files): """Return the line ranges of added lines in diff, suitable for the line-filter argument of clang-tidy diff --git a/review.py b/review.py index 318c038..75d5de9 100755 --- a/review.py +++ b/review.py @@ -121,6 +121,8 @@ def main( print("Posting the review:\n", pprint.pformat(trimmed_review), flush=True) pull_request.post_review(trimmed_review) + with message_group("Saving metadata"): + save_metadata(pr_number) def fix_absolute_paths(build_compile_commands, base_dir): """Update absolute paths in compile_commands.json to new location, if From 12e86cd81d6fefa06c2a00ece58966fac6b40e25 Mon Sep 17 00:00:00 2001 From: Rasmus Karlsson Date: Sat, 20 Aug 2022 17:13:58 +0200 Subject: [PATCH 06/10] review action: Add split_workflow argument --- action.yml | 5 +++++ review.py | 17 +++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/action.yml b/action.yml index b41208c..5a4a2d3 100644 --- a/action.yml +++ b/action.yml @@ -53,6 +53,10 @@ inputs: description: 'Message to post on PR if no issues are found. An empty string will post no LGTM comment.' required: false default: 'clang-tidy review says "All clean, LGTM! :+1:"' + split_workflow: + description: "Only generate but don't post the review, leaving it for the second workflow. Relevant when receiving PRs from forks that don't have the required permissions to post reviews." + required: false + default: false pr: default: ${{ github.event.pull_request.number }} repo: @@ -77,3 +81,4 @@ runs: - --apt-packages=${{ inputs.apt_packages }} - --cmake-command='${{ inputs.cmake_command }}' - --lgtm-comment-body='${{ inputs.lgtm_comment_body }}' + - --split_workflow=${{ inputs.split_workflow }} diff --git a/review.py b/review.py index 75d5de9..6402bfd 100755 --- a/review.py +++ b/review.py @@ -42,6 +42,7 @@ def main( exclude, max_comments, lgtm_comment_body, + split_workflow: bool, dry_run: bool = False, ): @@ -203,6 +204,15 @@ def fix_absolute_paths(build_compile_commands, base_dir): type=str, default="", ) + + def bool_argument(user_input): + user_input = str(user_input).upper() + if user_input == "TRUE": + return True + if user_input == "FALSE": + return False + raise ValueError("Invalid value passed to bool_argument") + parser.add_argument( "--max-comments", help="Maximum number of comments to post at once", @@ -215,6 +225,12 @@ def fix_absolute_paths(build_compile_commands, base_dir): type=str, default='clang-tidy review says "All clean, LGTM! :+1:"', ) + parser.add_argument( + "--split_workflow", + help="Only generate but don't post the review, leaving it for the second workflow. Relevant when receiving PRs from forks that don't have the required permissions to post reviews.", + type=bool_argument, + default=False, + ) parser.add_argument("--token", help="github auth token") parser.add_argument( "--dry-run", help="Run and generate review, but don't post", action="store_true" @@ -261,5 +277,6 @@ def fix_absolute_paths(build_compile_commands, base_dir): exclude=exclude, max_comments=args.max_comments, lgtm_comment_body=strip_enclosing_quotes(args.lgtm_comment_body), + split_workflow=args.split_workflow, dry_run=args.dry_run, ) From 8f1924edf5cf3f69d5b9b7898525628081a7ebec Mon Sep 17 00:00:00 2001 From: Rasmus Karlsson Date: Sat, 13 Aug 2022 15:19:53 +0200 Subject: [PATCH 07/10] Post error annotation if standard workflow is used and PR comes from a fork --- post/clang_tidy_review/__init__.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/post/clang_tidy_review/__init__.py b/post/clang_tidy_review/__init__.py index 59bbab2..6bc67d4 100644 --- a/post/clang_tidy_review/__init__.py +++ b/post/clang_tidy_review/__init__.py @@ -119,7 +119,14 @@ def post_review(self, review): post_review_response = requests.post(url, json=review, headers=headers) print(post_review_response.text) - post_review_response.raise_for_status() + try: + post_review_response.raise_for_status() + except requests.exceptions.HTTPError as e: + if e.response.status_code == 403: + print("::error title=Missing permissions::This workflow does not have enough permissions to submit a review. This could be because the GitHub token specified for this workflow is invalid or missing permissions, or it could be because this pull request comes from a fork which reduces the default token permissions. To support forked workflows, see the https://github.com/ZedThree/clang-tidy-review#usage-in-fork-environments instructions") + + # Re-raise the exception, causing an error in the workflow + raise e @contextlib.contextmanager From 6fa82907d3d5febd709c2697c99a7e39412f330b Mon Sep 17 00:00:00 2001 From: Rasmus Karlsson Date: Sun, 21 Aug 2022 14:02:38 +0200 Subject: [PATCH 08/10] Move review creating & posting to two complete functions --- post/clang_tidy_review/__init__.py | 235 +++++++++++++++++++++++------ review.py | 89 ++--------- 2 files changed, 201 insertions(+), 123 deletions(-) diff --git a/post/clang_tidy_review/__init__.py b/post/clang_tidy_review/__init__.py index 6bc67d4..fa56f24 100644 --- a/post/clang_tidy_review/__init__.py +++ b/post/clang_tidy_review/__init__.py @@ -4,6 +4,7 @@ # See LICENSE for more information import argparse +import fnmatch import itertools import json import os @@ -17,15 +18,15 @@ import yaml import contextlib import datetime -import subprocess from github import Github from github.Requester import Requester from github.PaginatedList import PaginatedList -from typing import Any, List, Optional, TypedDict +from typing import List, Optional, TypedDict DIFF_HEADER_LINE_LENGTH = 5 FIXES_FILE = "clang_tidy_review.yaml" METADATA_FILE = "clang-tidy-review-metadata.json" +REVIEW_FILE = "clang-tidy-review-output.json" class Metadata(TypedDict): @@ -37,6 +38,60 @@ class Metadata(TypedDict): pr_number: int +class PRReviewComment(TypedDict): + path: str + position: Optional[int] + body: str + line: Optional[int] + side: Optional[str] + start_line: Optional[int] + start_side: Optional[str] + + +class PRReview(TypedDict): + body: str + event: str + comments: List[PRReviewComment] + + +def build_clang_tidy_warnings( + line_filter, build_dir, clang_tidy_checks, clang_tidy_binary, config_file, files +) -> None: + """Run clang-tidy on the given files and save output into FIXES_FILE""" + + if config_file != "": + config = f'-config-file="{config_file}"' + else: + config = f"-checks={clang_tidy_checks}" + + print(f"Using config: {config}") + + command = f"{clang_tidy_binary} -p={build_dir} {config} -line-filter={line_filter} {files} --export-fixes={FIXES_FILE}" + + start = datetime.datetime.now() + try: + with message_group(f"Running:\n\t{command}"): + subprocess.run( + command, capture_output=True, shell=True, check=True, encoding="utf-8" + ) + except subprocess.CalledProcessError as e: + print( + f"\n\nclang-tidy failed with return code {e.returncode} and error:\n{e.stderr}\nOutput was:\n{e.stdout}" + ) + end = datetime.datetime.now() + + print(f"Took: {end - start}") + + +def load_clang_tidy_warnings(): + """Read clang-tidy warnings from FIXES_FILE. Can be produced by build_clang_tidy_warnings""" + try: + with open(FIXES_FILE, "r") as fixes_file: + return yaml.safe_load(fixes_file) + except FileNotFoundError: + return {} + + class PullRequest: """Add some convenience functions not in PyGithub""" @@ -481,12 +536,17 @@ def make_comment_from_diagnostic( return comment_body, end_line + 1 -def make_review(diagnostics, diff_lookup, offset_lookup, build_dir): +def create_review_file( + clang_tidy_warnings, diff_lookup, offset_lookup, build_dir +) -> Optional[PRReview]: """Create a Github review from a set of clang-tidy diagnostics""" - comments = [] + if "Diagnostics" not in clang_tidy_warnings: + return None + + comments: List[PRReviewComment] = [] - for diagnostic in diagnostics: + for diagnostic in clang_tidy_warnings["Diagnostics"]: try: diagnostic_message = diagnostic["DiagnosticMessage"] except KeyError: @@ -535,7 +595,7 @@ def make_review(diagnostics, diff_lookup, offset_lookup, build_dir): } ) - review = { + review: PRReview = { "body": "clang-tidy made some suggestions", "event": "COMMENT", "comments": comments, @@ -543,25 +603,103 @@ def make_review(diagnostics, diff_lookup, offset_lookup, build_dir): return review +def create_review( + pull_request: PullRequest, + build_dir: str, + clang_tidy_checks: str, + clang_tidy_binary: str, + config_file: str, + include: List[str], + exclude: List[str], +) -> Optional[PRReview]: + """Given the parameters, runs clang-tidy and creates a review. + If no files were changed, or no warnings could be found, None will be returned. + + """ + + diff = pull_request.get_pr_diff() + print(f"\nDiff from GitHub PR:\n{diff}\n") + + changed_files = [filename.target_file[2:] for filename in diff] + files = [] + for pattern in include: + files.extend(fnmatch.filter(changed_files, pattern)) + print(f"include: {pattern}, file list now: {files}") + for pattern in exclude: + files = [f for f in files if not fnmatch.fnmatch(f, pattern)] + print(f"exclude: {pattern}, file list now: {files}") + + if files == []: + print("No files to check!") + return None + + print(f"Checking these files: {files}", flush=True) + + line_ranges = get_line_ranges(diff, files) + if line_ranges == "[]": + print("No lines added in this PR!") + return None + + print(f"Line filter for clang-tidy:\n{line_ranges}\n") + + # Run clang-tidy with the configured parameters and produce the CLANG_TIDY_FIXES file + build_clang_tidy_warnings( + line_ranges, + build_dir, + clang_tidy_checks, + clang_tidy_binary, + config_file, + '"' + '" "'.join(files) + '"', + ) + + # Read and parse the CLANG_TIDY_FIXES file + clang_tidy_warnings = load_clang_tidy_warnings() + + print("clang-tidy had the following warnings:\n", clang_tidy_warnings, flush=True) + + diff_lookup = make_file_line_lookup(diff) + offset_lookup = make_file_offset_lookup(files) + + with message_group("Creating review from warnings"): + review = create_review_file( + clang_tidy_warnings, diff_lookup, offset_lookup, build_dir + ) + with open(REVIEW_FILE, "w") as review_file: + json.dump(review, review_file) + + return review + + def load_metadata() -> Metadata: """Load metadata from the METADATA_FILE path""" with open(METADATA_FILE, "r") as metadata_file: - x = json.load(metadata_file) - print(f"x: {x}") - return x + return json.load(metadata_file) + def save_metadata(pr_number: int) -> None: """Save metadata to the METADATA_FILE path""" - metadata: Metadata = { - "pr_number": pr_number - } + metadata: Metadata = {"pr_number": pr_number} with open(METADATA_FILE, "w") as metadata_file: json.dump(metadata, metadata_file) +def load_review() -> Optional[PRReview]: + """Load review output from the standard REVIEW_FILE path. + This file contains + + """ + + with open(REVIEW_FILE, "r") as review_file: + payload = json.load(review_file) + if payload: + return payload + + return None + + def get_line_ranges(diff, files): """Return the line ranges of added lines in diff, suitable for the line-filter argument of clang-tidy @@ -592,41 +730,6 @@ def get_line_ranges(diff, files): return json.dumps(line_filter_json, separators=(",", ":")) -def get_clang_tidy_warnings( - line_filter, build_dir, clang_tidy_checks, clang_tidy_binary, config_file, files - ): - """Run clang-tidy on the given files and save output into FIXES_FILE""" - - if config_file != "": - config = f'-config-file="{config_file}"' - else: - config = f"-checks={clang_tidy_checks}" - - print(f"Using config: {config}") - - command = f"{clang_tidy_binary} -p={build_dir} {config} -line-filter={line_filter} {files} --export-fixes={FIXES_FILE}" - - start = datetime.datetime.now() - try: - with message_group(f"Running:\n\t{command}"): - output = subprocess.run( - command, capture_output=True, shell=True, check=True, encoding="utf-8" - ) - except subprocess.CalledProcessError as e: - print( - f"\n\nclang-tidy failed with return code {e.returncode} and error:\n{e.stderr}\nOutput was:\n{e.stdout}" - ) - end = datetime.datetime.now() - - print(f"Took: {end - start}") - - try: - with open(FIXES_FILE, "r") as fixes_file: - return yaml.safe_load(fixes_file) - except FileNotFoundError: - return {} - - def cull_comments(pull_request: PullRequest, review, max_comments): """Remove comments from review that have already been posted, and keep only the first max_comments @@ -668,3 +771,41 @@ def strip_enclosing_quotes(string: str) -> str: if stripped.startswith(quote) and stripped.endswith(quote): stripped = stripped[1:-1] return stripped + + +def post_review( + pull_request: PullRequest, + review: Optional[PRReview], + max_comments: int, + lgtm_comment_body: str, + dry_run: bool, +) -> int: + print( + "Created the following review:\n", pprint.pformat(review, width=130), flush=True + ) + + if not review or review["comments"] == []: + print("No warnings to report, LGTM!") + if not dry_run: + pull_request.post_lgtm_comment(lgtm_comment_body) + return 0 + + print(f"::set-output name=total_comments::{len(review['comments'])}") + + total_comments = len(review["comments"]) + + print("Removing already posted or extra comments", flush=True) + trimmed_review = cull_comments(pull_request, review, max_comments) + + if trimmed_review["comments"] == []: + print("Everything already posted!") + return total_comments + + if dry_run: + pprint.pprint(review, width=130) + return total_comments + + print("Posting the review:\n", pprint.pformat(trimmed_review), flush=True) + pull_request.post_review(trimmed_review) + + return total_comments diff --git a/review.py b/review.py index 6402bfd..db4816f 100755 --- a/review.py +++ b/review.py @@ -6,24 +6,19 @@ # See LICENSE for more information import argparse -import fnmatch import json import os import pathlib -import pprint import re import subprocess from post.clang_tidy_review import ( PullRequest, - get_line_ranges, - get_clang_tidy_warnings, - make_file_line_lookup, - make_file_offset_lookup, message_group, - make_review, - cull_comments, strip_enclosing_quotes, + create_review, + save_metadata, + post_review, ) @@ -47,84 +42,26 @@ def main( ): pull_request = PullRequest(repo, pr_number, token) - diff = pull_request.get_pr_diff() - print(f"\nDiff from GitHub PR:\n{diff}\n") - changed_files = [filename.target_file[2:] for filename in diff] - files = [] - for pattern in include: - files.extend(fnmatch.filter(changed_files, pattern)) - print(f"include: {pattern}, file list now: {files}") - for pattern in exclude: - files = [f for f in files if not fnmatch.fnmatch(f, pattern)] - print(f"exclude: {pattern}, file list now: {files}") - - if files == []: - print("No files to check!") - return - - print(f"Checking these files: {files}", flush=True) - - line_ranges = get_line_ranges(diff, files) - if line_ranges == "[]": - print("No lines added in this PR!") - return - - print(f"Line filter for clang-tidy:\n{line_ranges}\n") - - clang_tidy_warnings = get_clang_tidy_warnings( - line_ranges, + review = create_review( + pull_request, build_dir, clang_tidy_checks, clang_tidy_binary, config_file, - '"' + '" "'.join(files) + '"', - ) - print("clang-tidy had the following warnings:\n", clang_tidy_warnings, flush=True) - - if clang_tidy_warnings == {}: - print("No warnings, LGTM!") - if not dry_run: - pull_request.post_lgtm_comment(lgtm_comment_body) - return - - diff_lookup = make_file_line_lookup(diff) - offset_lookup = make_file_offset_lookup(files) - - with message_group("Creating review from warnings"): - review = make_review( - clang_tidy_warnings["Diagnostics"], diff_lookup, offset_lookup, build_dir - ) - - print( - "Created the following review:\n", pprint.pformat(review, width=130), flush=True + include, + exclude, ) - if review["comments"] == []: - print("No warnings to report, LGTM!") - if not dry_run: - pull_request.post_lgtm_comment(lgtm_comment_body) - return - - print(f"::set-output name=total_comments::{len(review['comments'])}") - - print("Removing already posted or extra comments", flush=True) - trimmed_review = cull_comments(pull_request, review, max_comments) - - if trimmed_review["comments"] == []: - print("Everything already posted!") - return review - - if dry_run: - pprint.pprint(review, width=130) - return - - print("Posting the review:\n", pprint.pformat(trimmed_review), flush=True) - pull_request.post_review(trimmed_review) - with message_group("Saving metadata"): save_metadata(pr_number) + if split_workflow: + print("split_workflow is enabled, not posting review") + else: + post_review(pull_request, review, max_comments, lgtm_comment_body, dry_run) + + def fix_absolute_paths(build_compile_commands, base_dir): """Update absolute paths in compile_commands.json to new location, if compile_commands.json was created outside the Actions container From 9222a9b17979cd45347cd23864fc62af65ab1ac0 Mon Sep 17 00:00:00 2001 From: Rasmus Karlsson Date: Sat, 27 Aug 2022 13:59:36 +0200 Subject: [PATCH 09/10] Add post action --- post/Dockerfile | 13 ++++++++ post/README.md | 3 ++ post/action.yml | 32 +++++++++++++++++++ post/post.py | 71 +++++++++++++++++++++++++++++++++++++++++++ post/requirements.txt | 4 +++ 5 files changed, 123 insertions(+) create mode 100644 post/Dockerfile create mode 100644 post/README.md create mode 100644 post/action.yml create mode 100755 post/post.py create mode 100644 post/requirements.txt diff --git a/post/Dockerfile b/post/Dockerfile new file mode 100644 index 0000000..4937e3b --- /dev/null +++ b/post/Dockerfile @@ -0,0 +1,13 @@ +FROM python:3 + +COPY requirements.txt /requirements.txt + +RUN pip3 install --upgrade pip && \ + pip3 install -r requirements.txt + +WORKDIR /action + +COPY post.py /action/post.py +COPY clang_tidy_review /action/clang_tidy_review + +ENTRYPOINT ["/action/post.py"] diff --git a/post/README.md b/post/README.md new file mode 100644 index 0000000..11f8cc9 --- /dev/null +++ b/post/README.md @@ -0,0 +1,3 @@ +# Clang-Tidy Review - Post + +This is a child-action that only posts the review from the [parent action](../README.md). diff --git a/post/action.yml b/post/action.yml new file mode 100644 index 0000000..a3c55c2 --- /dev/null +++ b/post/action.yml @@ -0,0 +1,32 @@ +name: 'clang-tidy review - post comments' +author: 'Peter Hill' +description: 'Create a pull request review based on warnings produced by the parent action' +branding: + icon: 'book-open' + color: 'red' +inputs: + token: + description: 'Authentication token' + default: ${{ github.token }} + required: false + repo: + default: ${{ github.repository }} + max_comments: + description: 'Maximum number of comments to post at once' + required: false + default: '25' + lgtm_comment_body: + description: 'Message to post on PR if no issues are found. An empty string will post no LGTM comment.' + required: false + default: 'clang-tidy review says "All clean, LGTM! :+1:"' +outputs: + total_comments: + description: 'Total number of warnings from clang-tidy' +runs: + using: 'docker' + image: 'Dockerfile' + args: + - --token=${{ inputs.token }} + - --repo=${{ inputs.repo }} + - --max-comments=${{ inputs.max_comments }} + - --lgtm-comment-body='${{ inputs.lgtm_comment_body }}' diff --git a/post/post.py b/post/post.py new file mode 100755 index 0000000..79b022e --- /dev/null +++ b/post/post.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python3 + +# clang-tidy review - post comments +# Copyright (c) 2022 Peter Hill +# SPDX-License-Identifier: MIT +# See LICENSE for more information + +import argparse +import pprint + +from clang_tidy_review import ( + PullRequest, + load_review, + post_review, + load_metadata, + strip_enclosing_quotes, + ) + + +def main( + repo: str, + token: str, + max_comments: int, + lgtm_comment_body: str, + dry_run: bool, +) -> None: + metadata = load_metadata() + pull_request = PullRequest(repo, metadata["pr_number"], token) + + review = load_review() + print( + "clang-tidy-review generated the following review", + pprint.pformat(review, width=130), + flush=True, + ) + + post_review(pull_request, review, max_comments, lgtm_comment_body, dry_run) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Post a review based on feedback generated by the clang-tidy-review action" + ) + + parser.add_argument("--repo", help="Repo name in form 'owner/repo'") + parser.add_argument( + "--max-comments", + help="Maximum number of comments to post at once", + type=int, + default=25, + ) + parser.add_argument( + "--lgtm-comment-body", + help="Message to post on PR if no issues are found. An empty string will post no LGTM comment.", + type=str, + default='clang-tidy review says "All clean, LGTM! :+1:"', + ) + parser.add_argument("--token", help="github auth token") + parser.add_argument( + "--dry-run", help="Run and generate review, but don't post", action="store_true" + ) + + args = parser.parse_args() + + main( + repo=args.repo, + token=args.token, + max_comments=args.max_comments, + lgtm_comment_body=strip_enclosing_quotes(args.lgtm_comment_body), + dry_run=args.dry_run, + ) diff --git a/post/requirements.txt b/post/requirements.txt new file mode 100644 index 0000000..0a91033 --- /dev/null +++ b/post/requirements.txt @@ -0,0 +1,4 @@ +PyGithub~=1.51 +unidiff~=0.6.0 +requests~=2.23 +pyyaml~=5.4 From 39d926a87e83199419474f85cd7f85dea0e95d00 Mon Sep 17 00:00:00 2001 From: Rasmus Karlsson Date: Sat, 13 Aug 2022 15:08:55 +0200 Subject: [PATCH 10/10] README.md: Add split workflow example --- README.md | 82 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) diff --git a/README.md b/README.md index 577af23..92bac69 100644 --- a/README.md +++ b/README.md @@ -98,6 +98,8 @@ at once, so `clang-tidy-review` will only attempt to post the first - default: '25' - `lgtm_comment_body`: Message to post on PR if no issues are found. An empty string will post no LGTM comment. - default: 'clang-tidy review says "All clean, LGTM! :+1:"' +- `split_workflow`: Only generate but don't post the review, leaving it for the second workflow. Relevant when receiving PRs from forks that don't have the required permissions to post reviews. + - default: false ## Outputs @@ -188,6 +190,86 @@ jobs: base_dir: ${{ env.base_dir }} ``` +## Usage in fork environments (Split workflow) + +Actions from forks are limited in their permissions for your security. To support this use case, you can use the split workflow described below. + +Example lint workflow: + +```yaml +name: clang-tidy-review + +# You can be more specific, but it currently only works on pull requests +on: [pull_request] + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + + # Optionally generate compile_commands.json + + - uses: ZedThree/clang-tidy-review@v0.8.0 + with: + split_workflow: true + + - uses: actions/upload-artifact@v3 + with: + name: clang-tidy-review + path: | + clang-tidy-review-output.json + clang-tidy-review-metadata.json +``` + +Example post comments workflow: + +```yaml +name: Post clang-tidy review comments + +on: + workflow_run: + # The name field of the lint action + workflows: ["clang-tidy-review"] + types: + - completed + +jobs: + build: + runs-on: ubuntu-latest + + steps: + # Downloads the artifact uploaded by the lint action + - name: 'Download artifact' + uses: actions/github-script@v6 + with: + script: | + const artifacts = await github.rest.actions.listWorkflowRunArtifacts({ + owner: context.repo.owner, + repo: context.repo.repo, + run_id: ${{github.event.workflow_run.id }}, + }); + const matchArtifact = artifacts.data.artifacts.filter((artifact) => { + return artifact.name == "clang-tidy-review" + })[0]; + const download = await github.rest.actions.downloadArtifact({ + owner: context.repo.owner, + repo: context.repo.repo, + artifact_id: matchArtifact.id, + archive_format: 'zip', + }); + const fs = require('fs'); + fs.writeFileSync('${{github.workspace}}/clang-tidy-review.zip', Buffer.from(download.data)); + - name: 'Unzip artifact' + run: unzip clang-tidy-review.zip + + - uses: ZedThree/clang-tidy-review/post@v0.8.0 +``` + +The lint workflow runs with limited permissions, while the post comments workflow has the required permissions because it's triggered by the `workflow_run` event. +Read more about workflow security limitations [here](https://securitylab.github.com/research/github-actions-preventing-pwn-requests/). + ## Real world project samples |Project|Workflow| |----------|-------|