From 7089ca4deaf3a16e8e0f3a6622eb0f99869e49bb Mon Sep 17 00:00:00 2001
From: Rasmus Karlsson <rasmus.karlsson@pajlada.com>
Date: Sat, 20 Aug 2022 16:26:32 +0200
Subject: [PATCH 01/10] Add .gitignore file

---
 .gitignore | 9 +++++++++
 1 file changed, 9 insertions(+)
 create mode 100644 .gitignore

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..aa74832
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,9 @@
+__pycache__/
+venv/
+
+# Generated by review action
+clang-tidy-review-output.json
+clang-tidy-review-metadata.json
+
+# Generated by clang-tidy
+clang_tidy_review.yaml

From bc228f35d090bcd5462964dce92eb6526ab6e0fd Mon Sep 17 00:00:00 2001
From: Rasmus Karlsson <rasmus.karlsson@pajlada.com>
Date: Sat, 20 Aug 2022 16:26:42 +0200
Subject: [PATCH 02/10] Add .dockerignore file

---
 .dockerignore | 3 +++
 1 file changed, 3 insertions(+)
 create mode 100644 .dockerignore

diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000..9da3019
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,3 @@
+**/venv
+**/__pycache__
+.git

From 8097d6d5cb9f88da11bbb76ae41f954267cca22a Mon Sep 17 00:00:00 2001
From: Rasmus Karlsson <rasmus.karlsson@pajlada.com>
Date: Sun, 21 Aug 2022 12:14:35 +0200
Subject: [PATCH 03/10] Move all shared logic from review.py into the
 clang_tidy_review library

---
 post/__init__.py                   |   0
 post/clang_tidy_review/__init__.py | 634 +++++++++++++++++++++++++++++
 review.py                          | 621 +---------------------------
 3 files changed, 645 insertions(+), 610 deletions(-)
 create mode 100644 post/__init__.py
 create mode 100644 post/clang_tidy_review/__init__.py

diff --git a/post/__init__.py b/post/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/post/clang_tidy_review/__init__.py b/post/clang_tidy_review/__init__.py
new file mode 100644
index 0000000..77c0a7e
--- /dev/null
+++ b/post/clang_tidy_review/__init__.py
@@ -0,0 +1,634 @@
+# clang-tidy review
+# Copyright (c) 2020 Peter Hill
+# SPDX-License-Identifier: MIT
+# See LICENSE for more information
+
+import argparse
+import itertools
+import json
+import os
+from operator import itemgetter
+import pprint
+import pathlib
+import requests
+import subprocess
+import textwrap
+import unidiff
+import yaml
+import contextlib
+import datetime
+import subprocess
+from github import Github
+from github.Requester import Requester
+from github.PaginatedList import PaginatedList
+from typing import Any, List, Optional
+
+DIFF_HEADER_LINE_LENGTH = 5
+FIXES_FILE = "clang_tidy_review.yaml"
+
+
+class PullRequest:
+    """Add some convenience functions not in PyGithub"""
+
+    def __init__(self, repo: str, pr_number: int, token: str) -> None:
+        self.repo = repo
+        self.pr_number = pr_number
+        self.token = token
+
+        github = Github(token)
+        repo_object = github.get_repo(f"{repo}")
+        self._pull_request = repo_object.get_pull(pr_number)
+
+    def headers(self, media_type: str):
+        return {
+            "Accept": f"application/vnd.github.{media_type}",
+            "Authorization": f"token {self.token}",
+        }
+
+    @property
+    def base_url(self):
+        return f"https://api.github.com/repos/{self.repo}/pulls/{self.pr_number}"
+
+    def get(self, media_type: str, extra: str = "") -> str:
+        url = f"{self.base_url}{extra}"
+        response = requests.get(url, headers=self.headers(media_type))
+        response.raise_for_status()
+        return response.text
+
+    def get_pr_diff(self) -> List[unidiff.PatchSet]:
+        """Download the PR diff, return a list of PatchedFile"""
+
+        diffs = self.get("v3.diff")
+
+        # PatchSet is the easiest way to construct what we want, but the
+        # diff_line_no property on lines is counted from the top of the
+        # whole PatchSet, whereas GitHub is expecting the "position"
+        # property to be line count within each file's diff. So we need to
+        # do this little bit of faff to get a list of file-diffs with
+        # their own diff_line_no range
+        diff = [unidiff.PatchSet(str(file))[0] for file in unidiff.PatchSet(diffs)]
+        return diff
+
+    def get_pr_comments(self):
+        """Download the PR review comments using the comfort-fade preview headers"""
+
+        def get_element(
+            requester: Requester, headers: dict, element: dict, completed: bool
+        ):
+            return element
+
+        return PaginatedList(
+            get_element,
+            self._pull_request._requester,
+            f"{self.base_url}/comments",
+            None,
+        )
+
+    def post_lgtm_comment(self, body: str):
+        """Post a "LGTM" comment if everything's clean, making sure not to spam"""
+
+        if not body:
+            return
+
+        comments = self.get_pr_comments()
+
+        for comment in comments:
+            if comment["body"] == body:
+                print("Already posted, no need to update")
+                return
+
+        self._pull_request.create_issue_comment(body)
+
+    def post_review(self, review):
+        """Submit a completed review"""
+        headers = {
+            "Accept": "application/vnd.github.comfort-fade-preview+json",
+            "Authorization": f"token {self.token}",
+        }
+        url = f"{self.base_url}/reviews"
+
+        post_review_response = requests.post(url, json=review, headers=headers)
+        print(post_review_response.text)
+        post_review_response.raise_for_status()
+
+
+@contextlib.contextmanager
+def message_group(title: str):
+    print(f"::group::{title}", flush=True)
+    try:
+        yield
+    finally:
+        print("::endgroup::", flush=True)
+
+
+def make_file_line_lookup(diff):
+    """Get a lookup table for each file in diff, to convert between source
+    line number to line number in the diff
+
+    """
+    lookup = {}
+    for file in diff:
+        filename = file.target_file[2:]
+        lookup[filename] = {}
+        for hunk in file:
+            for line in hunk:
+                if line.diff_line_no is None:
+                    continue
+                if not line.is_removed:
+                    lookup[filename][line.target_line_no] = (
+                        line.diff_line_no - DIFF_HEADER_LINE_LENGTH
+                    )
+    return lookup
+
+
+def make_file_offset_lookup(filenames):
+    """Create a lookup table to convert between character offset and line
+    number for the list of files in `filenames`.
+
+    This is a dict of the cumulative sum of the line lengths for each file.
+
+    """
+    lookup = {}
+
+    for filename in filenames:
+        with open(filename, "r") as file:
+            lines = file.readlines()
+        # Length of each line
+        line_lengths = map(len, lines)
+        # Cumulative sum of line lengths => offset at end of each line
+        lookup[os.path.abspath(filename)] = [0] + list(
+            itertools.accumulate(line_lengths)
+        )
+
+    return lookup
+
+
+def get_diagnostic_file_path(clang_tidy_diagnostic, build_dir):
+
+    # Sometimes, clang-tidy gives us an absolute path, so everything is fine.
+    # Sometimes however it gives us a relative path that is realtive to the
+    # build directory, so we prepend that.
+
+    # Modern clang-tidy
+    if ("DiagnosticMessage" in clang_tidy_diagnostic) and (
+        "FilePath" in clang_tidy_diagnostic["DiagnosticMessage"]
+    ):
+        file_path = clang_tidy_diagnostic["DiagnosticMessage"]["FilePath"]
+        if file_path == "":
+            return ""
+        elif os.path.isabs(file_path):
+            return os.path.normpath(os.path.abspath(file_path))
+        else:
+            # Make the relative path absolute with the build dir
+            if "BuildDirectory" in clang_tidy_diagnostic:
+                return os.path.normpath(
+                    os.path.abspath(
+                        os.path.join(clang_tidy_diagnostic["BuildDirectory"], file_path)
+                    )
+                )
+            else:
+                return os.path.normpath(os.path.abspath(file_path))
+
+    # Pre-clang-tidy-9 format
+    elif "FilePath" in clang_tidy_diagnostic:
+        file_path = clang_tidy_diagnostic["FilePath"]
+        if file_path == "":
+            return ""
+        else:
+            return os.path.normpath(os.path.abspath(os.path.join(build_dir, file_path)))
+
+    else:
+        return ""
+
+
+def find_line_number_from_offset(offset_lookup, filename, offset):
+    """Work out which line number `offset` corresponds to using `offset_lookup`.
+
+    The line number (0-indexed) is the index of the first line offset
+    which is larger than `offset`.
+
+    """
+    name = str(pathlib.Path(filename).resolve().absolute())
+
+    if name not in offset_lookup:
+        # Let's make sure we've the file offsets for this other file
+        offset_lookup.update(make_file_offset_lookup([name]))
+
+    for line_num, line_offset in enumerate(offset_lookup[name]):
+        if line_offset > offset:
+            return line_num - 1
+    return -1
+
+
+def read_one_line(filename, line_offset):
+    """Read a single line from a source file"""
+    # Could cache the files instead of opening them each time?
+    with open(filename, "r") as file:
+        file.seek(line_offset)
+        return file.readline().rstrip("\n")
+
+
+def collate_replacement_sets(diagnostic, offset_lookup):
+    """Return a dict of replacements on the same or consecutive lines, indexed by line number
+
+    We need this as we have to apply all the replacements on one line at the same time
+
+    This could break if there are replacements in with the same line
+    number but in different files.
+
+    """
+
+    # First, make sure each replacement contains "LineNumber", and
+    # "EndLineNumber" in case it spans multiple lines
+    for replacement in diagnostic["Replacements"]:
+        # It's possible the replacement is needed in another file?
+        # Not really sure how that could come about, but let's
+        # cover our behinds in case it does happen:
+        if replacement["FilePath"] not in offset_lookup:
+            # Let's make sure we've the file offsets for this other file
+            offset_lookup.update(make_file_offset_lookup([replacement["FilePath"]]))
+
+        replacement["LineNumber"] = find_line_number_from_offset(
+            offset_lookup, replacement["FilePath"], replacement["Offset"]
+        )
+        replacement["EndLineNumber"] = find_line_number_from_offset(
+            offset_lookup,
+            replacement["FilePath"],
+            replacement["Offset"] + replacement["Length"],
+        )
+
+    # Now we can group them into consecutive lines
+    groups = []
+    for index, replacement in enumerate(diagnostic["Replacements"]):
+        if index == 0:
+            # First one starts a new group, always
+            groups.append([replacement])
+        elif (
+            replacement["LineNumber"] == groups[-1][-1]["LineNumber"]
+            or replacement["LineNumber"] - 1 == groups[-1][-1]["LineNumber"]
+        ):
+            # Same or adjacent line to the last line in the last group
+            # goes in the same group
+            groups[-1].append(replacement)
+        else:
+            # Otherwise, start a new group
+            groups.append([replacement])
+
+    # Turn the list into a dict
+    return {g[0]["LineNumber"]: g for g in groups}
+
+
+def replace_one_line(replacement_set, line_num, offset_lookup):
+    """Apply all the replacements in replacement_set at the same time"""
+
+    filename = replacement_set[0]["FilePath"]
+    # File offset at the start of the first line
+    line_offset = offset_lookup[filename][line_num]
+
+    # List of (start, end) offsets from line_offset
+    insert_offsets = [(0, 0)]
+    # Read all the source lines into a dict so we only get one copy of
+    # each line, though we might read the same line in multiple times
+    source_lines = {}
+    for replacement in replacement_set:
+        start = replacement["Offset"] - line_offset
+        end = start + replacement["Length"]
+        insert_offsets.append((start, end))
+
+        # Make sure to read any extra lines we need too
+        for replacement_line_num in range(
+            replacement["LineNumber"], replacement["EndLineNumber"] + 1
+        ):
+            replacement_line_offset = offset_lookup[filename][replacement_line_num]
+            source_lines[replacement_line_num] = (
+                read_one_line(filename, replacement_line_offset) + "\n"
+            )
+
+    # Replacements might cross multiple lines, so squash them all together
+    source_line = "".join(source_lines.values()).rstrip("\n")
+
+    insert_offsets.append((None, None))
+
+    fragments = []
+    for (_, start), (end, _) in zip(insert_offsets[:-1], insert_offsets[1:]):
+        fragments.append(source_line[start:end])
+
+    new_line = ""
+    for fragment, replacement in zip(fragments, replacement_set):
+        new_line += fragment + replacement["ReplacementText"]
+
+    return source_line, new_line + fragments[-1]
+
+
+def format_ordinary_line(source_line, line_offset):
+    """Format a single C++ line with a diagnostic indicator"""
+
+    return textwrap.dedent(
+        f"""\
+         ```cpp
+         {source_line}
+         {line_offset * " " + "^"}
+         ```
+         """
+    )
+
+
+def format_diff_line(diagnostic, offset_lookup, source_line, line_offset, line_num):
+    """Format a replacement as a Github suggestion or diff block"""
+
+    end_line = line_num
+
+    # We're going to be appending to this
+    code_blocks = ""
+
+    replacement_sets = collate_replacement_sets(diagnostic, offset_lookup)
+
+    for replacement_line_num, replacement_set in replacement_sets.items():
+        old_line, new_line = replace_one_line(
+            replacement_set, replacement_line_num, offset_lookup
+        )
+
+        print(f"----------\n{old_line=}\n{new_line=}\n----------")
+
+        # If the replacement is for the same line as the
+        # diagnostic (which is where the comment will be), then
+        # format the replacement as a suggestion. Otherwise,
+        # format it as a diff
+        if replacement_line_num == line_num:
+            code_blocks += f"""
+```suggestion
+{new_line}
+```
+"""
+            end_line = replacement_set[-1]["EndLineNumber"]
+        else:
+            # Prepend each line in the replacement line with "+ "
+            # in order to make a nice diff block. The extra
+            # whitespace is so the multiline dedent-ed block below
+            # doesn't come out weird.
+            whitespace = "\n                "
+            new_line = whitespace.join([f"+ {line}" for line in new_line.splitlines()])
+            old_line = whitespace.join([f"- {line}" for line in old_line.splitlines()])
+
+            rel_path = try_relative(replacement_set[0]["FilePath"])
+            code_blocks += textwrap.dedent(
+                f"""\
+
+                {rel_path}:{replacement_line_num}:
+                ```diff
+                {old_line}
+                {new_line}
+                ```
+                """
+            )
+    return code_blocks, end_line
+
+
+def try_relative(path):
+    """Try making `path` relative to current directory, otherwise make it an absolute path"""
+    try:
+        here = pathlib.Path.cwd()
+        return pathlib.Path(path).relative_to(here)
+    except ValueError:
+        return pathlib.Path(path).resolve()
+
+
+def format_notes(notes, offset_lookup):
+    """Format an array of notes into a single string"""
+
+    code_blocks = ""
+
+    for note in notes:
+        filename = note["FilePath"]
+
+        if filename == "":
+            return note["Message"]
+
+        resolved_path = str(pathlib.Path(filename).resolve().absolute())
+
+        line_num = find_line_number_from_offset(
+            offset_lookup, resolved_path, note["FileOffset"]
+        )
+        line_offset = note["FileOffset"] - offset_lookup[resolved_path][line_num]
+        source_line = read_one_line(
+            resolved_path, offset_lookup[resolved_path][line_num]
+        )
+
+        path = try_relative(resolved_path)
+        message = f"**{path}:{line_num}:** {note['Message']}"
+        code = format_ordinary_line(source_line, line_offset)
+        code_blocks += f"{message}\n{code}"
+
+    return code_blocks
+
+
+def make_comment_from_diagnostic(
+    diagnostic_name, diagnostic, filename, offset_lookup, notes
+):
+    """Create a comment from a diagnostic
+
+    Comment contains the diagnostic message, plus its name, along with
+    code block(s) containing either the exact location of the
+    diagnostic, or suggested fix(es).
+
+    """
+
+    line_num = find_line_number_from_offset(
+        offset_lookup, filename, diagnostic["FileOffset"]
+    )
+    line_offset = diagnostic["FileOffset"] - offset_lookup[filename][line_num]
+
+    source_line = read_one_line(filename, offset_lookup[filename][line_num])
+    end_line = line_num
+
+    print(
+        f"""{diagnostic}
+    {line_num=};    {line_offset=};    {source_line=}
+    """
+    )
+
+    if diagnostic["Replacements"]:
+        code_blocks, end_line = format_diff_line(
+            diagnostic, offset_lookup, source_line, line_offset, line_num
+        )
+    else:
+        # No fixit, so just point at the problem
+        code_blocks = format_ordinary_line(source_line, line_offset)
+
+    code_blocks += format_notes(notes, offset_lookup)
+
+    comment_body = (
+        f"warning: {diagnostic['Message']} [{diagnostic_name}]\n{code_blocks}"
+    )
+
+    return comment_body, end_line + 1
+
+
+def make_review(diagnostics, diff_lookup, offset_lookup, build_dir):
+    """Create a Github review from a set of clang-tidy diagnostics"""
+
+    comments = []
+
+    for diagnostic in diagnostics:
+        try:
+            diagnostic_message = diagnostic["DiagnosticMessage"]
+        except KeyError:
+            # Pre-clang-tidy-9 format
+            diagnostic_message = diagnostic
+
+        if diagnostic_message["FilePath"] == "":
+            continue
+
+        comment_body, end_line = make_comment_from_diagnostic(
+            diagnostic["DiagnosticName"],
+            diagnostic_message,
+            get_diagnostic_file_path(diagnostic, build_dir),
+            offset_lookup,
+            notes=diagnostic.get("Notes", []),
+        )
+
+        rel_path = str(try_relative(get_diagnostic_file_path(diagnostic, build_dir)))
+        # diff lines are 1-indexed
+        source_line = 1 + find_line_number_from_offset(
+            offset_lookup,
+            get_diagnostic_file_path(diagnostic, build_dir),
+            diagnostic_message["FileOffset"],
+        )
+
+        if rel_path not in diff_lookup or end_line not in diff_lookup[rel_path]:
+            print(
+                f"WARNING: Skipping comment for file '{rel_path}' not in PR changeset. Comment body is:\n{comment_body}"
+            )
+            continue
+
+        comments.append(
+            {
+                "path": rel_path,
+                "body": comment_body,
+                "side": "RIGHT",
+                "line": end_line,
+            }
+        )
+        # If this is a multiline comment, we need a couple more bits:
+        if end_line != source_line:
+            comments[-1].update(
+                {
+                    "start_side": "RIGHT",
+                    "start_line": source_line,
+                }
+            )
+
+    review = {
+        "body": "clang-tidy made some suggestions",
+        "event": "COMMENT",
+        "comments": comments,
+    }
+    return review
+
+
+def get_line_ranges(diff, files):
+    """Return the line ranges of added lines in diff, suitable for the
+    line-filter argument of clang-tidy
+
+    """
+
+    lines_by_file = {}
+    for filename in diff:
+        if filename.target_file[2:] not in files:
+            continue
+        added_lines = []
+        for hunk in filename:
+            for line in hunk:
+                if line.is_added:
+                    added_lines.append(line.target_line_no)
+
+        for _, group in itertools.groupby(
+            enumerate(added_lines), lambda ix: ix[0] - ix[1]
+        ):
+            groups = list(map(itemgetter(1), group))
+            lines_by_file.setdefault(filename.target_file[2:], []).append(
+                [groups[0], groups[-1]]
+            )
+
+    line_filter_json = []
+    for name, lines in lines_by_file.items():
+        line_filter_json.append(str({"name": name, "lines": lines}))
+    return json.dumps(line_filter_json, separators=(",", ":"))
+
+
+def get_clang_tidy_warnings(
+    line_filter, build_dir, clang_tidy_checks, clang_tidy_binary, config_file, files
+    ):
+    """Run clang-tidy on the given files and save output into FIXES_FILE"""
+
+    if config_file != "":
+        config = f'-config-file="{config_file}"'
+    else:
+        config = f"-checks={clang_tidy_checks}"
+
+    print(f"Using config: {config}")
+
+    command = f"{clang_tidy_binary} -p={build_dir} {config} -line-filter={line_filter} {files} --export-fixes={FIXES_FILE}"
+
+    start = datetime.datetime.now()
+    try:
+        with message_group(f"Running:\n\t{command}"):
+            output = subprocess.run(
+                command, capture_output=True, shell=True, check=True, encoding="utf-8"
+            )
+    except subprocess.CalledProcessError as e:
+        print(
+            f"\n\nclang-tidy failed with return code {e.returncode} and error:\n{e.stderr}\nOutput was:\n{e.stdout}"
+        )
+    end = datetime.datetime.now()
+
+    print(f"Took: {end - start}")
+
+    try:
+        with open(FIXES_FILE, "r") as fixes_file:
+            return yaml.safe_load(fixes_file)
+    except FileNotFoundError:
+        return {}
+
+
+def cull_comments(pull_request: PullRequest, review, max_comments):
+    """Remove comments from review that have already been posted, and keep
+    only the first max_comments
+
+    """
+
+    comments = pull_request.get_pr_comments()
+
+    for comment in comments:
+        review["comments"] = list(
+            filter(
+                lambda review_comment: not (
+                    review_comment["path"] == comment["path"]
+                    and review_comment["line"] == comment["line"]
+                    and review_comment["body"] == comment["body"]
+                ),
+                review["comments"],
+            )
+        )
+
+    if len(review["comments"]) > max_comments:
+        review["body"] += (
+            "\n\nThere were too many comments to post at once. "
+            f"Showing the first {max_comments} out of {len(review['comments'])}. "
+            "Check the log or trigger a new build to see more."
+        )
+        review["comments"] = review["comments"][:max_comments]
+
+    return review
+
+
+def strip_enclosing_quotes(string: str) -> str:
+    """Strip leading/trailing whitespace and remove any enclosing quotes"""
+    stripped = string.strip()
+
+    # Need to check double quotes again in case they're nested inside
+    # single quotes
+    for quote in ['"', "'", '"']:
+        if stripped.startswith(quote) and stripped.endswith(quote):
+            stripped = stripped[1:-1]
+    return stripped
diff --git a/review.py b/review.py
index d07288f..318c038 100755
--- a/review.py
+++ b/review.py
@@ -6,615 +6,28 @@
 # See LICENSE for more information
 
 import argparse
-import contextlib
-import datetime
-import itertools
 import fnmatch
 import json
 import os
-from operator import itemgetter
 import pathlib
 import pprint
 import re
-import requests
 import subprocess
-import textwrap
-import unidiff
-import yaml
-from github import Github
-from github.Requester import Requester
-from github.PaginatedList import PaginatedList
-from typing import List
 
-BAD_CHARS_APT_PACKAGES_PATTERN = "[;&|($]"
-DIFF_HEADER_LINE_LENGTH = 5
-FIXES_FILE = "clang_tidy_review.yaml"
-
-
-class PullRequest:
-    """Add some convenience functions not in PyGithub"""
-
-    def __init__(self, repo: str, pr_number: int, token: str):
-        self.repo = repo
-        self.pr_number = pr_number
-        self.token = token
-
-        github = Github(token)
-        repo_object = github.get_repo(f"{repo}")
-        self._pull_request = repo_object.get_pull(pr_number)
-
-    def headers(self, media_type: str):
-        return {
-            "Accept": f"application/vnd.github.{media_type}",
-            "Authorization": f"token {self.token}",
-        }
-
-    @property
-    def base_url(self):
-        return f"https://api.github.com/repos/{self.repo}/pulls/{self.pr_number}"
-
-    def get(self, media_type: str, extra: str = "") -> str:
-        url = f"{self.base_url}{extra}"
-        response = requests.get(url, headers=self.headers(media_type))
-        response.raise_for_status()
-        return response.text
-
-    def get_pr_diff(self) -> List[unidiff.PatchSet]:
-        """Download the PR diff, return a list of PatchedFile"""
-
-        diffs = self.get("v3.diff")
-
-        # PatchSet is the easiest way to construct what we want, but the
-        # diff_line_no property on lines is counted from the top of the
-        # whole PatchSet, whereas GitHub is expecting the "position"
-        # property to be line count within each file's diff. So we need to
-        # do this little bit of faff to get a list of file-diffs with
-        # their own diff_line_no range
-        diff = [unidiff.PatchSet(str(file))[0] for file in unidiff.PatchSet(diffs)]
-        return diff
-
-    def get_pr_comments(self):
-        """Download the PR review comments using the comfort-fade preview headers"""
-
-        def get_element(
-            requester: Requester, headers: dict, element: dict, completed: bool
-        ):
-            return element
-
-        return PaginatedList(
-            get_element,
-            self._pull_request._requester,
-            f"{self.base_url}/comments",
-            None,
-        )
-
-    def post_lgtm_comment(self, body: str):
-        """Post a "LGTM" comment if everything's clean, making sure not to spam"""
-
-        if not body:
-            return
-
-        comments = self.get_pr_comments()
-
-        for comment in comments:
-            if comment["body"] == body:
-                print("Already posted, no need to update")
-                return
-
-        self._pull_request.create_issue_comment(body)
-
-    def post_review(self, review):
-        """Submit a completed review"""
-        headers = {
-            "Accept": "application/vnd.github.comfort-fade-preview+json",
-            "Authorization": f"token {self.token}",
-        }
-        url = f"{self.base_url}/reviews"
-
-        post_review_response = requests.post(url, json=review, headers=headers)
-        print(post_review_response.text)
-        post_review_response.raise_for_status()
-
-
-@contextlib.contextmanager
-def message_group(title: str):
-    print(f"::group::{title}", flush=True)
-    try:
-        yield
-    finally:
-        print("::endgroup::", flush=True)
-
-
-def make_file_line_lookup(diff):
-    """Get a lookup table for each file in diff, to convert between source
-    line number to line number in the diff
-
-    """
-    lookup = {}
-    for file in diff:
-        filename = file.target_file[2:]
-        lookup[filename] = {}
-        for hunk in file:
-            for line in hunk:
-                if line.diff_line_no is None:
-                    continue
-                if not line.is_removed:
-                    lookup[filename][line.target_line_no] = (
-                        line.diff_line_no - DIFF_HEADER_LINE_LENGTH
-                    )
-    return lookup
-
-
-def make_file_offset_lookup(filenames):
-    """Create a lookup table to convert between character offset and line
-    number for the list of files in `filenames`.
-
-    This is a dict of the cumulative sum of the line lengths for each file.
-
-    """
-    lookup = {}
-
-    for filename in filenames:
-        with open(filename, "r") as file:
-            lines = file.readlines()
-        # Length of each line
-        line_lengths = map(len, lines)
-        # Cumulative sum of line lengths => offset at end of each line
-        lookup[os.path.abspath(filename)] = [0] + list(
-            itertools.accumulate(line_lengths)
-        )
-
-    return lookup
-
-def get_diagnostic_file_path(clang_tidy_diagnostic, build_dir):
-    
-    # Sometimes, clang-tidy gives us an absolute path, so everything is fine.
-    # Sometimes however it gives us a relative path that is realtive to the
-    # build directory, so we prepend that.
-    
-    # Modern clang-tidy
-    if ("DiagnosticMessage" in clang_tidy_diagnostic) and ("FilePath" in clang_tidy_diagnostic["DiagnosticMessage"]):
-        file_path = clang_tidy_diagnostic["DiagnosticMessage"]["FilePath"]
-        if file_path == "":
-            return ""
-        elif os.path.isabs(file_path):
-            return os.path.normpath(os.path.abspath(file_path))
-        else:
-            # Make the relative path absolute with the build dir
-            if "BuildDirectory" in clang_tidy_diagnostic:
-                return os.path.normpath(os.path.abspath(os.path.join(clang_tidy_diagnostic["BuildDirectory"], file_path)))
-            else:
-                return os.path.normpath(os.path.abspath(file_path))
-      
-    # Pre-clang-tidy-9 format
-    elif "FilePath" in clang_tidy_diagnostic:
-        file_path = clang_tidy_diagnostic["FilePath"]
-        if file_path == "":
-            return ""
-        else:
-            return os.path.normpath(os.path.abspath(os.path.join(build_dir, file_path)))
-        
-    else:
-        return ""
-
-
-def find_line_number_from_offset(offset_lookup, filename, offset):
-    """Work out which line number `offset` corresponds to using `offset_lookup`.
-
-    The line number (0-indexed) is the index of the first line offset
-    which is larger than `offset`.
-
-    """
-    name = str(pathlib.Path(filename).resolve().absolute())
-
-    if name not in offset_lookup:
-        # Let's make sure we've the file offsets for this other file
-        offset_lookup.update(make_file_offset_lookup([name]))
-
-    for line_num, line_offset in enumerate(offset_lookup[name]):
-        if line_offset > offset:
-            return line_num - 1
-    return -1
-
-
-def read_one_line(filename, line_offset):
-    """Read a single line from a source file"""
-    # Could cache the files instead of opening them each time?
-    with open(filename, "r") as file:
-        file.seek(line_offset)
-        return file.readline().rstrip("\n")
-
-
-def collate_replacement_sets(diagnostic, offset_lookup):
-    """Return a dict of replacements on the same or consecutive lines, indexed by line number
-
-    We need this as we have to apply all the replacements on one line at the same time
-
-    This could break if there are replacements in with the same line
-    number but in different files.
-
-    """
-
-    # First, make sure each replacement contains "LineNumber", and
-    # "EndLineNumber" in case it spans multiple lines
-    for replacement in diagnostic["Replacements"]:
-        # It's possible the replacement is needed in another file?
-        # Not really sure how that could come about, but let's
-        # cover our behinds in case it does happen:
-        if replacement["FilePath"] not in offset_lookup:
-            # Let's make sure we've the file offsets for this other file
-            offset_lookup.update(make_file_offset_lookup([replacement["FilePath"]]))
-
-        replacement["LineNumber"] = find_line_number_from_offset(
-            offset_lookup, replacement["FilePath"], replacement["Offset"]
-        )
-        replacement["EndLineNumber"] = find_line_number_from_offset(
-            offset_lookup,
-            replacement["FilePath"],
-            replacement["Offset"] + replacement["Length"],
-        )
-
-    # Now we can group them into consecutive lines
-    groups = []
-    for index, replacement in enumerate(diagnostic["Replacements"]):
-        if index == 0:
-            # First one starts a new group, always
-            groups.append([replacement])
-        elif (
-            replacement["LineNumber"] == groups[-1][-1]["LineNumber"]
-            or replacement["LineNumber"] - 1 == groups[-1][-1]["LineNumber"]
-        ):
-            # Same or adjacent line to the last line in the last group
-            # goes in the same group
-            groups[-1].append(replacement)
-        else:
-            # Otherwise, start a new group
-            groups.append([replacement])
-
-    # Turn the list into a dict
-    return {g[0]["LineNumber"]: g for g in groups}
-
-
-def replace_one_line(replacement_set, line_num, offset_lookup):
-    """Apply all the replacements in replacement_set at the same time"""
-
-    filename = replacement_set[0]["FilePath"]
-    # File offset at the start of the first line
-    line_offset = offset_lookup[filename][line_num]
-
-    # List of (start, end) offsets from line_offset
-    insert_offsets = [(0, 0)]
-    # Read all the source lines into a dict so we only get one copy of
-    # each line, though we might read the same line in multiple times
-    source_lines = {}
-    for replacement in replacement_set:
-        start = replacement["Offset"] - line_offset
-        end = start + replacement["Length"]
-        insert_offsets.append((start, end))
-
-        # Make sure to read any extra lines we need too
-        for replacement_line_num in range(
-            replacement["LineNumber"], replacement["EndLineNumber"] + 1
-        ):
-            replacement_line_offset = offset_lookup[filename][replacement_line_num]
-            source_lines[replacement_line_num] = (
-                read_one_line(filename, replacement_line_offset) + "\n"
-            )
-
-    # Replacements might cross multiple lines, so squash them all together
-    source_line = "".join(source_lines.values()).rstrip("\n")
-
-    insert_offsets.append((None, None))
-
-    fragments = []
-    for (_, start), (end, _) in zip(insert_offsets[:-1], insert_offsets[1:]):
-        fragments.append(source_line[start:end])
-
-    new_line = ""
-    for fragment, replacement in zip(fragments, replacement_set):
-        new_line += fragment + replacement["ReplacementText"]
-
-    return source_line, new_line + fragments[-1]
-
-
-def format_ordinary_line(source_line, line_offset):
-    """Format a single C++ line with a diagnostic indicator"""
-
-    return textwrap.dedent(
-        f"""\
-         ```cpp
-         {source_line}
-         {line_offset * " " + "^"}
-         ```
-         """
-    )
-
-
-def format_diff_line(diagnostic, offset_lookup, source_line, line_offset, line_num):
-    """Format a replacement as a Github suggestion or diff block"""
-
-    end_line = line_num
-
-    # We're going to be appending to this
-    code_blocks = ""
-
-    replacement_sets = collate_replacement_sets(diagnostic, offset_lookup)
-
-    for replacement_line_num, replacement_set in replacement_sets.items():
-        old_line, new_line = replace_one_line(
-            replacement_set, replacement_line_num, offset_lookup
-        )
-
-        print(f"----------\n{old_line=}\n{new_line=}\n----------")
-
-        # If the replacement is for the same line as the
-        # diagnostic (which is where the comment will be), then
-        # format the replacement as a suggestion. Otherwise,
-        # format it as a diff
-        if replacement_line_num == line_num:
-            code_blocks += f"""
-```suggestion
-{new_line}
-```
-"""
-            end_line = replacement_set[-1]["EndLineNumber"]
-        else:
-            # Prepend each line in the replacement line with "+ "
-            # in order to make a nice diff block. The extra
-            # whitespace is so the multiline dedent-ed block below
-            # doesn't come out weird.
-            whitespace = "\n                "
-            new_line = whitespace.join([f"+ {line}" for line in new_line.splitlines()])
-            old_line = whitespace.join([f"- {line}" for line in old_line.splitlines()])
-
-            rel_path = try_relative(replacement_set[0]["FilePath"])
-            code_blocks += textwrap.dedent(
-                f"""\
-
-                {rel_path}:{replacement_line_num}:
-                ```diff
-                {old_line}
-                {new_line}
-                ```
-                """
-            )
-    return code_blocks, end_line
-
-
-def try_relative(path):
-    """Try making `path` relative to current directory, otherwise make it an absolute path"""
-    try:
-        here = pathlib.Path.cwd()
-        return pathlib.Path(path).relative_to(here)
-    except ValueError:
-        return pathlib.Path(path).resolve()
-
-
-def format_notes(notes, offset_lookup):
-    """Format an array of notes into a single string"""
-
-    code_blocks = ""
-
-    for note in notes:
-        filename = note["FilePath"]
-
-        if filename == "":
-            return note["Message"]
-
-        resolved_path = str(pathlib.Path(filename).resolve().absolute())
-
-        line_num = find_line_number_from_offset(
-            offset_lookup, resolved_path, note["FileOffset"]
-        )
-        line_offset = note["FileOffset"] - offset_lookup[resolved_path][line_num]
-        source_line = read_one_line(
-            resolved_path, offset_lookup[resolved_path][line_num]
+from post.clang_tidy_review import (
+        PullRequest,
+        get_line_ranges,
+        get_clang_tidy_warnings,
+        make_file_line_lookup,
+        make_file_offset_lookup,
+        message_group,
+        make_review,
+        cull_comments,
+        strip_enclosing_quotes,
         )
 
-        path = try_relative(resolved_path)
-        message = f"**{path}:{line_num}:** {note['Message']}"
-        code = format_ordinary_line(source_line, line_offset)
-        code_blocks += f"{message}\n{code}"
 
-    return code_blocks
-
-
-def make_comment_from_diagnostic(diagnostic_name, diagnostic, filename ,offset_lookup, notes):
-    """Create a comment from a diagnostic
-
-    Comment contains the diagnostic message, plus its name, along with
-    code block(s) containing either the exact location of the
-    diagnostic, or suggested fix(es).
-
-    """
-
-    line_num = find_line_number_from_offset(
-        offset_lookup, filename, diagnostic["FileOffset"]
-    )
-    line_offset = diagnostic["FileOffset"] - offset_lookup[filename][line_num]
-
-    source_line = read_one_line(filename, offset_lookup[filename][line_num])
-    end_line = line_num
-
-    print(
-        f"""{diagnostic}
-    {line_num=};    {line_offset=};    {source_line=}
-    """
-    )
-
-    if diagnostic["Replacements"]:
-        code_blocks, end_line = format_diff_line(
-            diagnostic, offset_lookup, source_line, line_offset, line_num
-        )
-    else:
-        # No fixit, so just point at the problem
-        code_blocks = format_ordinary_line(source_line, line_offset)
-
-    code_blocks += format_notes(notes, offset_lookup)
-
-    comment_body = (
-        f"warning: {diagnostic['Message']} [{diagnostic_name}]\n{code_blocks}"
-    )
-
-    return comment_body, end_line + 1
-
-
-def make_review(diagnostics, diff_lookup, offset_lookup, build_dir):
-    """Create a Github review from a set of clang-tidy diagnostics"""
-
-    comments = []
-
-    for diagnostic in diagnostics:
-        try:
-            diagnostic_message = diagnostic["DiagnosticMessage"]
-        except KeyError:
-            # Pre-clang-tidy-9 format
-            diagnostic_message = diagnostic
-            
-        if diagnostic_message["FilePath"] == "":
-            continue
-
-        comment_body, end_line = make_comment_from_diagnostic(
-            diagnostic["DiagnosticName"],
-            diagnostic_message,
-            get_diagnostic_file_path(diagnostic, build_dir),
-            offset_lookup,
-            notes=diagnostic.get("Notes", []),
-        )
-
-        rel_path = str(try_relative(get_diagnostic_file_path(diagnostic, build_dir)))
-        # diff lines are 1-indexed
-        source_line = 1 + find_line_number_from_offset(
-            offset_lookup,
-            get_diagnostic_file_path(diagnostic, build_dir),
-            diagnostic_message["FileOffset"],
-        )
-
-        if rel_path not in diff_lookup or end_line not in diff_lookup[rel_path]:
-            print(
-                f"WARNING: Skipping comment for file '{rel_path}' not in PR changeset. Comment body is:\n{comment_body}"
-            )
-            continue
-
-        comments.append(
-            {
-                "path": rel_path,
-                "body": comment_body,
-                "side": "RIGHT",
-                "line": end_line,
-            }
-        )
-        # If this is a multiline comment, we need a couple more bits:
-        if end_line != source_line:
-            comments[-1].update(
-                {
-                    "start_side": "RIGHT",
-                    "start_line": source_line,
-                }
-            )
-
-    review = {
-        "body": "clang-tidy made some suggestions",
-        "event": "COMMENT",
-        "comments": comments,
-    }
-    return review
-
-
-def get_line_ranges(diff, files):
-    """Return the line ranges of added lines in diff, suitable for the
-    line-filter argument of clang-tidy
-
-    """
-
-    lines_by_file = {}
-    for filename in diff:
-        if filename.target_file[2:] not in files:
-            continue
-        added_lines = []
-        for hunk in filename:
-            for line in hunk:
-                if line.is_added:
-                    added_lines.append(line.target_line_no)
-
-        for _, group in itertools.groupby(
-            enumerate(added_lines), lambda ix: ix[0] - ix[1]
-        ):
-            groups = list(map(itemgetter(1), group))
-            lines_by_file.setdefault(filename.target_file[2:], []).append(
-                [groups[0], groups[-1]]
-            )
-
-    line_filter_json = []
-    for name, lines in lines_by_file.items():
-        line_filter_json.append(str({"name": name, "lines": lines}))
-    return json.dumps(line_filter_json, separators=(",", ":"))
-
-
-def get_clang_tidy_warnings(
-    line_filter, build_dir, clang_tidy_checks, clang_tidy_binary, config_file, files
-):
-    """Get the clang-tidy warnings"""
-
-    if config_file != "":
-        config = f"-config-file=\"{config_file}\""
-    else:
-        config = f"-checks={clang_tidy_checks}"
-
-    print(f"Using config: {config}")
-
-    command = f"{clang_tidy_binary} -p={build_dir} {config} -line-filter={line_filter} {files} --export-fixes={FIXES_FILE}"
-
-    start = datetime.datetime.now()
-    try:
-        with message_group(f"Running:\n\t{command}"):
-            output = subprocess.run(
-                command, capture_output=True, shell=True, check=True, encoding="utf-8"
-            )
-    except subprocess.CalledProcessError as e:
-        print(
-            f"\n\nclang-tidy failed with return code {e.returncode} and error:\n{e.stderr}\nOutput was:\n{e.stdout}"
-        )
-    end = datetime.datetime.now()
-
-    print(f"Took: {end - start}")
-
-    try:
-        with open(FIXES_FILE, "r") as fixes_file:
-            return yaml.safe_load(fixes_file)
-    except FileNotFoundError:
-        return {}
-
-
-def cull_comments(pull_request: PullRequest, review, max_comments):
-    """Remove comments from review that have already been posted, and keep
-    only the first max_comments
-
-    """
-
-    comments = pull_request.get_pr_comments()
-
-    for comment in comments:
-        review["comments"] = list(
-            filter(
-                lambda review_comment: not (
-                    review_comment["path"] == comment["path"]
-                    and review_comment["line"] == comment["line"]
-                    and review_comment["body"] == comment["body"]
-                ),
-                review["comments"],
-            )
-        )
-
-    if len(review["comments"]) > max_comments:
-        review["body"] += (
-            "\n\nThere were too many comments to post at once. "
-            f"Showing the first {max_comments} out of {len(review['comments'])}. "
-            "Check the log or trigger a new build to see more."
-        )
-        review["comments"] = review["comments"][:max_comments]
-
-    return review
+BAD_CHARS_APT_PACKAGES_PATTERN = "[;&|($]"
 
 
 def main(
@@ -709,18 +122,6 @@ def main(
     pull_request.post_review(trimmed_review)
 
 
-def strip_enclosing_quotes(string: str) -> str:
-    """Strip leading/trailing whitespace and remove any enclosing quotes"""
-    stripped = string.strip()
-
-    # Need to check double quotes again in case they're nested inside
-    # single quotes
-    for quote in ['"', "'", '"']:
-        if stripped.startswith(quote) and stripped.endswith(quote):
-            stripped = stripped[1:-1]
-    return stripped
-
-
 def fix_absolute_paths(build_compile_commands, base_dir):
     """Update absolute paths in compile_commands.json to new location, if
     compile_commands.json was created outside the Actions container

From 59fea60857e13c2098933676dc3bdde9b566652d Mon Sep 17 00:00:00 2001
From: Rasmus Karlsson <rasmus.karlsson@pajlada.com>
Date: Sat, 20 Aug 2022 16:33:04 +0200
Subject: [PATCH 04/10] review Dockerfile: Include post_comments library

rm -rf apt cache dir to make container slightly smaller
Add workdir to help debugging locally
---
 Dockerfile | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index b36dfa8..daf6369 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -16,8 +16,15 @@ RUN apt update && \
     clang-tidy-12 \
     python3 python3-pip && \
     pip3 install --upgrade pip && \
-    pip3 install -r requirements.txt
+    pip3 install -r requirements.txt && \
+    rm -rf /var/lib/apt/lists/
 
-COPY review.py /review.py
+WORKDIR /action
 
-ENTRYPOINT ["/review.py"]
+COPY review.py /action/review.py
+
+# Include the entirety of the post directory for simplicity's sake
+# Technically we only need the clang_tidy_review directory but this keeps things consistent for running the command locally during development and in the docker image
+COPY post /action/post
+
+ENTRYPOINT ["/action/review.py"]

From 3291259067d71d818ba5ac2ea83a1af08d2b72ba Mon Sep 17 00:00:00 2001
From: Rasmus Karlsson <rasmus.karlsson@pajlada.com>
Date: Sun, 21 Aug 2022 13:57:03 +0200
Subject: [PATCH 05/10] Save metadata regarding the review action to a file

---
 post/clang_tidy_review/__init__.py | 31 +++++++++++++++++++++++++++++-
 review.py                          |  2 ++
 2 files changed, 32 insertions(+), 1 deletion(-)

diff --git a/post/clang_tidy_review/__init__.py b/post/clang_tidy_review/__init__.py
index 77c0a7e..59bbab2 100644
--- a/post/clang_tidy_review/__init__.py
+++ b/post/clang_tidy_review/__init__.py
@@ -21,10 +21,20 @@
 from github import Github
 from github.Requester import Requester
 from github.PaginatedList import PaginatedList
-from typing import Any, List, Optional
+from typing import Any, List, Optional, TypedDict
 
 DIFF_HEADER_LINE_LENGTH = 5
 FIXES_FILE = "clang_tidy_review.yaml"
+METADATA_FILE = "clang-tidy-review-metadata.json"
+
+
+class Metadata(TypedDict):
+    """Loaded from `METADATA_FILE`
+    Contains information necessary to post a review without pull request knowledge
+
+    """
+
+    pr_number: int
 
 
 class PullRequest:
@@ -526,6 +536,25 @@ def make_review(diagnostics, diff_lookup, offset_lookup, build_dir):
     return review
 
 
+def load_metadata() -> Metadata:
+    """Load metadata from the METADATA_FILE path"""
+
+    with open(METADATA_FILE, "r") as metadata_file:
+        x = json.load(metadata_file)
+        print(f"x: {x}")
+        return x
+
+def save_metadata(pr_number: int) -> None:
+    """Save metadata to the METADATA_FILE path"""
+
+    metadata: Metadata = {
+            "pr_number": pr_number
+            }
+
+    with open(METADATA_FILE, "w") as metadata_file:
+        json.dump(metadata, metadata_file)
+
+
 def get_line_ranges(diff, files):
     """Return the line ranges of added lines in diff, suitable for the
     line-filter argument of clang-tidy
diff --git a/review.py b/review.py
index 318c038..75d5de9 100755
--- a/review.py
+++ b/review.py
@@ -121,6 +121,8 @@ def main(
     print("Posting the review:\n", pprint.pformat(trimmed_review), flush=True)
     pull_request.post_review(trimmed_review)
 
+    with message_group("Saving metadata"):
+        save_metadata(pr_number)
 
 def fix_absolute_paths(build_compile_commands, base_dir):
     """Update absolute paths in compile_commands.json to new location, if

From 12e86cd81d6fefa06c2a00ece58966fac6b40e25 Mon Sep 17 00:00:00 2001
From: Rasmus Karlsson <rasmus.karlsson@pajlada.com>
Date: Sat, 20 Aug 2022 17:13:58 +0200
Subject: [PATCH 06/10] review action: Add split_workflow argument

---
 action.yml |  5 +++++
 review.py  | 17 +++++++++++++++++
 2 files changed, 22 insertions(+)

diff --git a/action.yml b/action.yml
index b41208c..5a4a2d3 100644
--- a/action.yml
+++ b/action.yml
@@ -53,6 +53,10 @@ inputs:
     description: 'Message to post on PR if no issues are found. An empty string will post no LGTM comment.'
     required: false
     default: 'clang-tidy review says "All clean, LGTM! :+1:"'
+  split_workflow:
+    description: "Only generate but don't post the review, leaving it for the second workflow. Relevant when receiving PRs from forks that don't have the required permissions to post reviews."
+    required: false
+    default: false
   pr:
     default: ${{ github.event.pull_request.number }}
   repo:
@@ -77,3 +81,4 @@ runs:
     - --apt-packages=${{ inputs.apt_packages }}
     - --cmake-command='${{ inputs.cmake_command }}'
     - --lgtm-comment-body='${{ inputs.lgtm_comment_body }}'
+    - --split_workflow=${{ inputs.split_workflow }}
diff --git a/review.py b/review.py
index 75d5de9..6402bfd 100755
--- a/review.py
+++ b/review.py
@@ -42,6 +42,7 @@ def main(
     exclude,
     max_comments,
     lgtm_comment_body,
+    split_workflow: bool,
     dry_run: bool = False,
 ):
 
@@ -203,6 +204,15 @@ def fix_absolute_paths(build_compile_commands, base_dir):
         type=str,
         default="",
     )
+
+    def bool_argument(user_input):
+        user_input = str(user_input).upper()
+        if user_input == "TRUE":
+            return True
+        if user_input == "FALSE":
+            return False
+        raise ValueError("Invalid value passed to bool_argument")
+
     parser.add_argument(
         "--max-comments",
         help="Maximum number of comments to post at once",
@@ -215,6 +225,12 @@ def fix_absolute_paths(build_compile_commands, base_dir):
         type=str,
         default='clang-tidy review says "All clean, LGTM! :+1:"',
     )
+    parser.add_argument(
+        "--split_workflow",
+        help="Only generate but don't post the review, leaving it for the second workflow. Relevant when receiving PRs from forks that don't have the required permissions to post reviews.",
+        type=bool_argument,
+        default=False,
+    )
     parser.add_argument("--token", help="github auth token")
     parser.add_argument(
         "--dry-run", help="Run and generate review, but don't post", action="store_true"
@@ -261,5 +277,6 @@ def fix_absolute_paths(build_compile_commands, base_dir):
         exclude=exclude,
         max_comments=args.max_comments,
         lgtm_comment_body=strip_enclosing_quotes(args.lgtm_comment_body),
+        split_workflow=args.split_workflow,
         dry_run=args.dry_run,
     )

From 8f1924edf5cf3f69d5b9b7898525628081a7ebec Mon Sep 17 00:00:00 2001
From: Rasmus Karlsson <rasmus.karlsson@pajlada.com>
Date: Sat, 13 Aug 2022 15:19:53 +0200
Subject: [PATCH 07/10] Post error annotation if standard workflow is used and
 PR comes from a fork

---
 post/clang_tidy_review/__init__.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/post/clang_tidy_review/__init__.py b/post/clang_tidy_review/__init__.py
index 59bbab2..6bc67d4 100644
--- a/post/clang_tidy_review/__init__.py
+++ b/post/clang_tidy_review/__init__.py
@@ -119,7 +119,14 @@ def post_review(self, review):
 
         post_review_response = requests.post(url, json=review, headers=headers)
         print(post_review_response.text)
-        post_review_response.raise_for_status()
+        try:
+            post_review_response.raise_for_status()
+        except requests.exceptions.HTTPError as e:
+            if e.response.status_code == 403:
+                print("::error title=Missing permissions::This workflow does not have enough permissions to submit a review. This could be because the GitHub token specified for this workflow is invalid or missing permissions, or it could be because this pull request comes from a fork which reduces the default token permissions. To support forked workflows, see the https://github.com/ZedThree/clang-tidy-review#usage-in-fork-environments instructions")
+
+            # Re-raise the exception, causing an error in the workflow
+            raise e
 
 
 @contextlib.contextmanager

From 6fa82907d3d5febd709c2697c99a7e39412f330b Mon Sep 17 00:00:00 2001
From: Rasmus Karlsson <rasmus.karlsson@pajlada.com>
Date: Sun, 21 Aug 2022 14:02:38 +0200
Subject: [PATCH 08/10] Move review creating & posting to two complete
 functions

---
 post/clang_tidy_review/__init__.py | 235 +++++++++++++++++++++++------
 review.py                          |  89 ++---------
 2 files changed, 201 insertions(+), 123 deletions(-)

diff --git a/post/clang_tidy_review/__init__.py b/post/clang_tidy_review/__init__.py
index 6bc67d4..fa56f24 100644
--- a/post/clang_tidy_review/__init__.py
+++ b/post/clang_tidy_review/__init__.py
@@ -4,6 +4,7 @@
 # See LICENSE for more information
 
 import argparse
+import fnmatch
 import itertools
 import json
 import os
@@ -17,15 +18,15 @@
 import yaml
 import contextlib
 import datetime
-import subprocess
 from github import Github
 from github.Requester import Requester
 from github.PaginatedList import PaginatedList
-from typing import Any, List, Optional, TypedDict
+from typing import List, Optional, TypedDict
 
 DIFF_HEADER_LINE_LENGTH = 5
 FIXES_FILE = "clang_tidy_review.yaml"
 METADATA_FILE = "clang-tidy-review-metadata.json"
+REVIEW_FILE = "clang-tidy-review-output.json"
 
 
 class Metadata(TypedDict):
@@ -37,6 +38,60 @@ class Metadata(TypedDict):
     pr_number: int
 
 
+class PRReviewComment(TypedDict):
+    path: str
+    position: Optional[int]
+    body: str
+    line: Optional[int]
+    side: Optional[str]
+    start_line: Optional[int]
+    start_side: Optional[str]
+
+
+class PRReview(TypedDict):
+    body: str
+    event: str
+    comments: List[PRReviewComment]
+
+
+def build_clang_tidy_warnings(
+    line_filter, build_dir, clang_tidy_checks, clang_tidy_binary, config_file, files
+) -> None:
+    """Run clang-tidy on the given files and save output into FIXES_FILE"""
+
+    if config_file != "":
+        config = f'-config-file="{config_file}"'
+    else:
+        config = f"-checks={clang_tidy_checks}"
+
+    print(f"Using config: {config}")
+
+    command = f"{clang_tidy_binary} -p={build_dir} {config} -line-filter={line_filter} {files} --export-fixes={FIXES_FILE}"
+
+    start = datetime.datetime.now()
+    try:
+        with message_group(f"Running:\n\t{command}"):
+            subprocess.run(
+                command, capture_output=True, shell=True, check=True, encoding="utf-8"
+            )
+    except subprocess.CalledProcessError as e:
+        print(
+            f"\n\nclang-tidy failed with return code {e.returncode} and error:\n{e.stderr}\nOutput was:\n{e.stdout}"
+        )
+    end = datetime.datetime.now()
+
+    print(f"Took: {end - start}")
+
+
+def load_clang_tidy_warnings():
+    """Read clang-tidy warnings from FIXES_FILE. Can be produced by build_clang_tidy_warnings"""
+    try:
+        with open(FIXES_FILE, "r") as fixes_file:
+            return yaml.safe_load(fixes_file)
+    except FileNotFoundError:
+        return {}
+
+
 class PullRequest:
     """Add some convenience functions not in PyGithub"""
 
@@ -481,12 +536,17 @@ def make_comment_from_diagnostic(
     return comment_body, end_line + 1
 
 
-def make_review(diagnostics, diff_lookup, offset_lookup, build_dir):
+def create_review_file(
+    clang_tidy_warnings, diff_lookup, offset_lookup, build_dir
+) -> Optional[PRReview]:
     """Create a Github review from a set of clang-tidy diagnostics"""
 
-    comments = []
+    if "Diagnostics" not in clang_tidy_warnings:
+        return None
+
+    comments: List[PRReviewComment] = []
 
-    for diagnostic in diagnostics:
+    for diagnostic in clang_tidy_warnings["Diagnostics"]:
         try:
             diagnostic_message = diagnostic["DiagnosticMessage"]
         except KeyError:
@@ -535,7 +595,7 @@ def make_review(diagnostics, diff_lookup, offset_lookup, build_dir):
                 }
             )
 
-    review = {
+    review: PRReview = {
         "body": "clang-tidy made some suggestions",
         "event": "COMMENT",
         "comments": comments,
@@ -543,25 +603,103 @@ def make_review(diagnostics, diff_lookup, offset_lookup, build_dir):
     return review
 
 
+def create_review(
+    pull_request: PullRequest,
+    build_dir: str,
+    clang_tidy_checks: str,
+    clang_tidy_binary: str,
+    config_file: str,
+    include: List[str],
+    exclude: List[str],
+) -> Optional[PRReview]:
+    """Given the parameters, runs clang-tidy and creates a review.
+    If no files were changed, or no warnings could be found, None will be returned.
+
+    """
+
+    diff = pull_request.get_pr_diff()
+    print(f"\nDiff from GitHub PR:\n{diff}\n")
+
+    changed_files = [filename.target_file[2:] for filename in diff]
+    files = []
+    for pattern in include:
+        files.extend(fnmatch.filter(changed_files, pattern))
+        print(f"include: {pattern}, file list now: {files}")
+    for pattern in exclude:
+        files = [f for f in files if not fnmatch.fnmatch(f, pattern)]
+        print(f"exclude: {pattern}, file list now: {files}")
+
+    if files == []:
+        print("No files to check!")
+        return None
+
+    print(f"Checking these files: {files}", flush=True)
+
+    line_ranges = get_line_ranges(diff, files)
+    if line_ranges == "[]":
+        print("No lines added in this PR!")
+        return None
+
+    print(f"Line filter for clang-tidy:\n{line_ranges}\n")
+
+    # Run clang-tidy with the configured parameters and produce the CLANG_TIDY_FIXES file
+    build_clang_tidy_warnings(
+        line_ranges,
+        build_dir,
+        clang_tidy_checks,
+        clang_tidy_binary,
+        config_file,
+        '"' + '" "'.join(files) + '"',
+    )
+
+    # Read and parse the CLANG_TIDY_FIXES file
+    clang_tidy_warnings = load_clang_tidy_warnings()
+
+    print("clang-tidy had the following warnings:\n", clang_tidy_warnings, flush=True)
+
+    diff_lookup = make_file_line_lookup(diff)
+    offset_lookup = make_file_offset_lookup(files)
+
+    with message_group("Creating review from warnings"):
+        review = create_review_file(
+            clang_tidy_warnings, diff_lookup, offset_lookup, build_dir
+        )
+        with open(REVIEW_FILE, "w") as review_file:
+            json.dump(review, review_file)
+
+        return review
+
+
 def load_metadata() -> Metadata:
     """Load metadata from the METADATA_FILE path"""
 
     with open(METADATA_FILE, "r") as metadata_file:
-        x = json.load(metadata_file)
-        print(f"x: {x}")
-        return x
+        return json.load(metadata_file)
+
 
 def save_metadata(pr_number: int) -> None:
     """Save metadata to the METADATA_FILE path"""
 
-    metadata: Metadata = {
-            "pr_number": pr_number
-            }
+    metadata: Metadata = {"pr_number": pr_number}
 
     with open(METADATA_FILE, "w") as metadata_file:
         json.dump(metadata, metadata_file)
 
 
+def load_review() -> Optional[PRReview]:
+    """Load review output from the standard REVIEW_FILE path.
+    This file contains
+
+    """
+
+    with open(REVIEW_FILE, "r") as review_file:
+        payload = json.load(review_file)
+        if payload:
+            return payload
+
+        return None
+
+
 def get_line_ranges(diff, files):
     """Return the line ranges of added lines in diff, suitable for the
     line-filter argument of clang-tidy
@@ -592,41 +730,6 @@ def get_line_ranges(diff, files):
     return json.dumps(line_filter_json, separators=(",", ":"))
 
 
-def get_clang_tidy_warnings(
-    line_filter, build_dir, clang_tidy_checks, clang_tidy_binary, config_file, files
-    ):
-    """Run clang-tidy on the given files and save output into FIXES_FILE"""
-
-    if config_file != "":
-        config = f'-config-file="{config_file}"'
-    else:
-        config = f"-checks={clang_tidy_checks}"
-
-    print(f"Using config: {config}")
-
-    command = f"{clang_tidy_binary} -p={build_dir} {config} -line-filter={line_filter} {files} --export-fixes={FIXES_FILE}"
-
-    start = datetime.datetime.now()
-    try:
-        with message_group(f"Running:\n\t{command}"):
-            output = subprocess.run(
-                command, capture_output=True, shell=True, check=True, encoding="utf-8"
-            )
-    except subprocess.CalledProcessError as e:
-        print(
-            f"\n\nclang-tidy failed with return code {e.returncode} and error:\n{e.stderr}\nOutput was:\n{e.stdout}"
-        )
-    end = datetime.datetime.now()
-
-    print(f"Took: {end - start}")
-
-    try:
-        with open(FIXES_FILE, "r") as fixes_file:
-            return yaml.safe_load(fixes_file)
-    except FileNotFoundError:
-        return {}
-
-
 def cull_comments(pull_request: PullRequest, review, max_comments):
     """Remove comments from review that have already been posted, and keep
     only the first max_comments
@@ -668,3 +771,41 @@ def strip_enclosing_quotes(string: str) -> str:
         if stripped.startswith(quote) and stripped.endswith(quote):
             stripped = stripped[1:-1]
     return stripped
+
+
+def post_review(
+    pull_request: PullRequest,
+    review: Optional[PRReview],
+    max_comments: int,
+    lgtm_comment_body: str,
+    dry_run: bool,
+) -> int:
+    print(
+        "Created the following review:\n", pprint.pformat(review, width=130), flush=True
+    )
+
+    if not review or review["comments"] == []:
+        print("No warnings to report, LGTM!")
+        if not dry_run:
+            pull_request.post_lgtm_comment(lgtm_comment_body)
+        return 0
+
+    print(f"::set-output name=total_comments::{len(review['comments'])}")
+
+    total_comments = len(review["comments"])
+
+    print("Removing already posted or extra comments", flush=True)
+    trimmed_review = cull_comments(pull_request, review, max_comments)
+
+    if trimmed_review["comments"] == []:
+        print("Everything already posted!")
+        return total_comments
+
+    if dry_run:
+        pprint.pprint(review, width=130)
+        return total_comments
+
+    print("Posting the review:\n", pprint.pformat(trimmed_review), flush=True)
+    pull_request.post_review(trimmed_review)
+
+    return total_comments
diff --git a/review.py b/review.py
index 6402bfd..db4816f 100755
--- a/review.py
+++ b/review.py
@@ -6,24 +6,19 @@
 # See LICENSE for more information
 
 import argparse
-import fnmatch
 import json
 import os
 import pathlib
-import pprint
 import re
 import subprocess
 
 from post.clang_tidy_review import (
         PullRequest,
-        get_line_ranges,
-        get_clang_tidy_warnings,
-        make_file_line_lookup,
-        make_file_offset_lookup,
         message_group,
-        make_review,
-        cull_comments,
         strip_enclosing_quotes,
+        create_review,
+        save_metadata,
+        post_review,
         )
 
 
@@ -47,84 +42,26 @@ def main(
 ):
 
     pull_request = PullRequest(repo, pr_number, token)
-    diff = pull_request.get_pr_diff()
-    print(f"\nDiff from GitHub PR:\n{diff}\n")
 
-    changed_files = [filename.target_file[2:] for filename in diff]
-    files = []
-    for pattern in include:
-        files.extend(fnmatch.filter(changed_files, pattern))
-        print(f"include: {pattern}, file list now: {files}")
-    for pattern in exclude:
-        files = [f for f in files if not fnmatch.fnmatch(f, pattern)]
-        print(f"exclude: {pattern}, file list now: {files}")
-
-    if files == []:
-        print("No files to check!")
-        return
-
-    print(f"Checking these files: {files}", flush=True)
-
-    line_ranges = get_line_ranges(diff, files)
-    if line_ranges == "[]":
-        print("No lines added in this PR!")
-        return
-
-    print(f"Line filter for clang-tidy:\n{line_ranges}\n")
-
-    clang_tidy_warnings = get_clang_tidy_warnings(
-        line_ranges,
+    review = create_review(
+        pull_request,
         build_dir,
         clang_tidy_checks,
         clang_tidy_binary,
         config_file,
-        '"' + '" "'.join(files) + '"',
-    )
-    print("clang-tidy had the following warnings:\n", clang_tidy_warnings, flush=True)
-
-    if clang_tidy_warnings == {}:
-        print("No warnings, LGTM!")
-        if not dry_run:
-            pull_request.post_lgtm_comment(lgtm_comment_body)
-        return
-
-    diff_lookup = make_file_line_lookup(diff)
-    offset_lookup = make_file_offset_lookup(files)
-
-    with message_group("Creating review from warnings"):
-        review = make_review(
-            clang_tidy_warnings["Diagnostics"], diff_lookup, offset_lookup, build_dir
-        )
-
-    print(
-        "Created the following review:\n", pprint.pformat(review, width=130), flush=True
+        include,
+        exclude,
     )
 
-    if review["comments"] == []:
-        print("No warnings to report, LGTM!")
-        if not dry_run:
-            pull_request.post_lgtm_comment(lgtm_comment_body)
-        return
-
-    print(f"::set-output name=total_comments::{len(review['comments'])}")
-
-    print("Removing already posted or extra comments", flush=True)
-    trimmed_review = cull_comments(pull_request, review, max_comments)
-
-    if trimmed_review["comments"] == []:
-        print("Everything already posted!")
-        return review
-
-    if dry_run:
-        pprint.pprint(review, width=130)
-        return
-
-    print("Posting the review:\n", pprint.pformat(trimmed_review), flush=True)
-    pull_request.post_review(trimmed_review)
-
     with message_group("Saving metadata"):
         save_metadata(pr_number)
 
+    if split_workflow:
+        print("split_workflow is enabled, not posting review")
+    else:
+        post_review(pull_request, review, max_comments, lgtm_comment_body, dry_run)
+
+
 def fix_absolute_paths(build_compile_commands, base_dir):
     """Update absolute paths in compile_commands.json to new location, if
     compile_commands.json was created outside the Actions container

From 9222a9b17979cd45347cd23864fc62af65ab1ac0 Mon Sep 17 00:00:00 2001
From: Rasmus Karlsson <rasmus.karlsson@pajlada.com>
Date: Sat, 27 Aug 2022 13:59:36 +0200
Subject: [PATCH 09/10] Add post action

---
 post/Dockerfile       | 13 ++++++++
 post/README.md        |  3 ++
 post/action.yml       | 32 +++++++++++++++++++
 post/post.py          | 71 +++++++++++++++++++++++++++++++++++++++++++
 post/requirements.txt |  4 +++
 5 files changed, 123 insertions(+)
 create mode 100644 post/Dockerfile
 create mode 100644 post/README.md
 create mode 100644 post/action.yml
 create mode 100755 post/post.py
 create mode 100644 post/requirements.txt

diff --git a/post/Dockerfile b/post/Dockerfile
new file mode 100644
index 0000000..4937e3b
--- /dev/null
+++ b/post/Dockerfile
@@ -0,0 +1,13 @@
+FROM python:3
+
+COPY requirements.txt /requirements.txt
+
+RUN pip3 install --upgrade pip && \
+    pip3 install -r requirements.txt
+
+WORKDIR /action
+
+COPY post.py /action/post.py
+COPY clang_tidy_review /action/clang_tidy_review
+
+ENTRYPOINT ["/action/post.py"]
diff --git a/post/README.md b/post/README.md
new file mode 100644
index 0000000..11f8cc9
--- /dev/null
+++ b/post/README.md
@@ -0,0 +1,3 @@
+# Clang-Tidy Review - Post
+
+This is a child-action that only posts the review from the [parent action](../README.md).
diff --git a/post/action.yml b/post/action.yml
new file mode 100644
index 0000000..a3c55c2
--- /dev/null
+++ b/post/action.yml
@@ -0,0 +1,32 @@
+name: 'clang-tidy review - post comments'
+author: 'Peter Hill'
+description: 'Create a pull request review based on warnings produced by the parent action'
+branding:
+  icon: 'book-open'
+  color: 'red'
+inputs:
+  token:
+    description: 'Authentication token'
+    default: ${{ github.token }}
+    required: false
+  repo:
+    default: ${{ github.repository }}
+  max_comments:
+    description: 'Maximum number of comments to post at once'
+    required: false
+    default: '25'
+  lgtm_comment_body:
+    description: 'Message to post on PR if no issues are found. An empty string will post no LGTM comment.'
+    required: false
+    default: 'clang-tidy review says "All clean, LGTM! :+1:"'
+outputs:
+  total_comments:
+    description: 'Total number of warnings from clang-tidy'
+runs:
+  using: 'docker'
+  image: 'Dockerfile'
+  args:
+    - --token=${{ inputs.token }}
+    - --repo=${{ inputs.repo }}
+    - --max-comments=${{ inputs.max_comments }}
+    - --lgtm-comment-body='${{ inputs.lgtm_comment_body }}'
diff --git a/post/post.py b/post/post.py
new file mode 100755
index 0000000..79b022e
--- /dev/null
+++ b/post/post.py
@@ -0,0 +1,71 @@
+#!/usr/bin/env python3
+
+# clang-tidy review - post comments
+# Copyright (c) 2022 Peter Hill
+# SPDX-License-Identifier: MIT
+# See LICENSE for more information
+
+import argparse
+import pprint
+
+from clang_tidy_review import (
+        PullRequest,
+        load_review,
+        post_review,
+        load_metadata,
+        strip_enclosing_quotes,
+        )
+
+
+def main(
+    repo: str,
+    token: str,
+    max_comments: int,
+    lgtm_comment_body: str,
+    dry_run: bool,
+) -> None:
+    metadata = load_metadata()
+    pull_request = PullRequest(repo, metadata["pr_number"], token)
+
+    review = load_review()
+    print(
+        "clang-tidy-review generated the following review",
+        pprint.pformat(review, width=130),
+        flush=True,
+    )
+
+    post_review(pull_request, review, max_comments, lgtm_comment_body, dry_run)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Post a review based on feedback generated by the clang-tidy-review action"
+    )
+
+    parser.add_argument("--repo", help="Repo name in form 'owner/repo'")
+    parser.add_argument(
+        "--max-comments",
+        help="Maximum number of comments to post at once",
+        type=int,
+        default=25,
+    )
+    parser.add_argument(
+        "--lgtm-comment-body",
+        help="Message to post on PR if no issues are found. An empty string will post no LGTM comment.",
+        type=str,
+        default='clang-tidy review says "All clean, LGTM! :+1:"',
+    )
+    parser.add_argument("--token", help="github auth token")
+    parser.add_argument(
+        "--dry-run", help="Run and generate review, but don't post", action="store_true"
+    )
+
+    args = parser.parse_args()
+
+    main(
+        repo=args.repo,
+        token=args.token,
+        max_comments=args.max_comments,
+        lgtm_comment_body=strip_enclosing_quotes(args.lgtm_comment_body),
+        dry_run=args.dry_run,
+    )
diff --git a/post/requirements.txt b/post/requirements.txt
new file mode 100644
index 0000000..0a91033
--- /dev/null
+++ b/post/requirements.txt
@@ -0,0 +1,4 @@
+PyGithub~=1.51
+unidiff~=0.6.0
+requests~=2.23
+pyyaml~=5.4

From 39d926a87e83199419474f85cd7f85dea0e95d00 Mon Sep 17 00:00:00 2001
From: Rasmus Karlsson <rasmus.karlsson@pajlada.com>
Date: Sat, 13 Aug 2022 15:08:55 +0200
Subject: [PATCH 10/10] README.md: Add split workflow example

---
 README.md | 82 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 82 insertions(+)

diff --git a/README.md b/README.md
index 577af23..92bac69 100644
--- a/README.md
+++ b/README.md
@@ -98,6 +98,8 @@ at once, so `clang-tidy-review` will only attempt to post the first
   - default: '25'
 - `lgtm_comment_body`: Message to post on PR if no issues are found. An empty string will post no LGTM comment.
   - default: 'clang-tidy review says "All clean, LGTM! :+1:"'
+- `split_workflow`: Only generate but don't post the review, leaving it for the second workflow. Relevant when receiving PRs from forks that don't have the required permissions to post reviews.
+  - default: false
 
 ## Outputs
 
@@ -188,6 +190,86 @@ jobs:
         base_dir: ${{ env.base_dir }}
 ```
 
+## Usage in fork environments (Split workflow)
+
+Actions from forks are limited in their permissions for your security. To support this use case, you can use the split workflow described below.
+
+Example lint workflow:
+
+```yaml
+name: clang-tidy-review
+
+# You can be more specific, but it currently only works on pull requests
+on: [pull_request]
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v2
+
+    # Optionally generate compile_commands.json
+
+    - uses: ZedThree/clang-tidy-review@v0.8.0
+      with:
+        split_workflow: true
+
+    - uses: actions/upload-artifact@v3
+      with:
+        name: clang-tidy-review
+        path: |
+          clang-tidy-review-output.json
+          clang-tidy-review-metadata.json
+```
+
+Example post comments workflow:
+
+```yaml
+name: Post clang-tidy review comments
+
+on:
+  workflow_run:
+    # The name field of the lint action
+    workflows: ["clang-tidy-review"]
+    types:
+      - completed
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+
+    steps:
+      # Downloads the artifact uploaded by the lint action
+      - name: 'Download artifact'
+        uses: actions/github-script@v6
+        with:
+          script: |
+            const artifacts = await github.rest.actions.listWorkflowRunArtifacts({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              run_id: ${{github.event.workflow_run.id }},
+            });
+            const matchArtifact = artifacts.data.artifacts.filter((artifact) => {
+              return artifact.name == "clang-tidy-review"
+            })[0];
+            const download = await github.rest.actions.downloadArtifact({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              artifact_id: matchArtifact.id,
+              archive_format: 'zip',
+            });
+            const fs = require('fs');
+            fs.writeFileSync('${{github.workspace}}/clang-tidy-review.zip', Buffer.from(download.data));
+      - name: 'Unzip artifact'
+        run: unzip clang-tidy-review.zip
+
+      - uses: ZedThree/clang-tidy-review/post@v0.8.0
+```
+
+The lint workflow runs with limited permissions, while the post comments workflow has the required permissions because it's triggered by the `workflow_run` event.  
+Read more about workflow security limitations [here](https://securitylab.github.com/research/github-actions-preventing-pwn-requests/).
+
 ## Real world project samples
 |Project|Workflow|
 |----------|-------|