diff --git a/.github/CodeQL-README.md b/.github/CodeQL-README.md index bc9c4990f..f3871f7d0 100644 --- a/.github/CodeQL-README.md +++ b/.github/CodeQL-README.md @@ -95,6 +95,34 @@ Suggested immediate next steps 3. If you want more thorough coverage, enable the experimental packs temporarily in a non-blocking run, review the alerts, then decide whether to include them in CI permanently. 4. If you want, I can run a short search and report back exact pack names for GitHub Actions and experimental packs — tell me if you want me to search the public CodeQL repo for matching pack names and list them with links. -Contact / Ownership +## Viewing CodeQL Alerts + +### For Pull Requests from the Same Repository + +When a PR is opened from a branch in the same repository (not a fork), the CodeQL workflow will automatically post a comment on the PR with details about any new or resolved alerts. + +### For Pull Requests from Forks + +When a PR is opened from a fork, the `GITHUB_TOKEN` does not have permission to post comments on the PR due to GitHub's security model. In this case: + +1. **The CodeQL analysis workflow uploads artifacts instead of commenting directly**: The `codeql-analysis.yaml` workflow runs the scan and uploads one or more artifacts that contain the alert information for the fork PR. + +2. **Artifacts are processed by a separate comment workflow**: The `codeql-comment.yaml` workflow consumes the uploaded artifacts and is responsible for generating any PR comments or additional reporting. You can also download these artifacts from the workflow run's artifacts section for manual inspection. + +3. **Check the Security tab**: CodeQL alerts from fork PRs are still uploaded to the Code Scanning alerts in the base repository. Repository maintainers with appropriate permissions can view these alerts by: + - Navigating to the repository's Security tab + - Clicking on "Code scanning alerts" + - Filtering by branch or PR number to find alerts specific to the fork PR + +4. **Ask a maintainer**: If you're the fork PR author and need details about the alerts, you can ask a repository maintainer to review the Code Scanning alerts for your PR. + +### Understanding the Workflow Behavior + +- **New alerts detected**: The workflow will fail to prevent merging code with new security issues. +- **Fixed alerts only**: The workflow will succeed and provide informational notices. +- **No permission to comment**: This is expected GitHub behavior for fork PRs and is not a bug. + +## Contact / Ownership + - Consider adding a CODEOWNERS file for the phlex-src tree so triage notifications reach the most appropriate maintainers. ``` diff --git a/.github/actions/generate-build-matrix/generate_matrix.py b/.github/actions/generate-build-matrix/generate_matrix.py index b6d5932ea..a7d7e9f7d 100644 --- a/.github/actions/generate-build-matrix/generate_matrix.py +++ b/.github/actions/generate-build-matrix/generate_matrix.py @@ -1,4 +1,5 @@ """Generates a build matrix for CI based on the trigger event and user input.""" + import json import os import re diff --git a/.github/workflows/codeql-analysis.yaml b/.github/workflows/codeql-analysis.yaml index 1ffec06e2..53deab9b2 100644 --- a/.github/workflows/codeql-analysis.yaml +++ b/.github/workflows/codeql-analysis.yaml @@ -132,108 +132,29 @@ jobs: path: ${{ steps.set_log_path.outputs.path }} retention-days: 3 - - name: "Debug: PR head and repo info (no-op for non-PR runs)" - if: github.event_name == 'pull_request' + - name: Prepare PR comment data + if: >- + github.event_name == 'pull_request' && + steps.check_codeql.outputs.comment_path != '' run: | - echo "github.repository: ${{ github.repository }}" - echo "pr.head.repo.full_name: ${{ github.event.pull_request.head.repo.full_name }}" - echo "pr.head.repo.owner.login: ${{ github.event.pull_request.head.repo.owner.login }}" - echo "pr.base.repo.full_name: ${{ github.event.pull_request.base.repo.full_name }}" - echo "actor: ${{ github.actor }}" - echo "Available step outputs from check_codeql:" - echo " new_alerts=${{ steps.check_codeql.outputs.new_alerts }}" - echo " fixed_alerts=${{ steps.check_codeql.outputs.fixed_alerts }}" - echo "Event payload head/type (first 200 chars):" - if [ -n "$GITHUB_EVENT_PATH" ]; then - jq -c . < "$GITHUB_EVENT_PATH" | cut -c-200 || true - fi + mkdir -p pr_comment_data + echo "${{ github.event.pull_request.number }}" > pr_comment_data/pr_number.txt + cp "${{ steps.check_codeql.outputs.comment_path }}" pr_comment_data/comment_body.md - - name: Comment on PR with CodeQL alert changes - # Only attempt to post comments on pull requests that originate from - # the same repository. GitHub's `GITHUB_TOKEN` cannot create comments - # on pull requests originating from forks (Resource not accessible - # by integration). For forked PRs, maintainers can inspect the - # uploaded log/artifact instead. + - name: Upload PR comment data if: >- github.event_name == 'pull_request' && - github.event.pull_request.head.repo.full_name == github.repository && - (steps.check_codeql.outputs.new_alerts == 'true' || - steps.check_codeql.outputs.fixed_alerts == 'true') - uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 + steps.check_codeql.outputs.comment_path != '' + uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 with: - script: | - const fs = require('fs'); - const marker = ''; - const legacyMarker = ''; - const commentPath = '${{ steps.check_codeql.outputs.comment_path }}'; - - if (!commentPath) { - core.setFailed('check_codeql did not emit a comment_path output.'); - return; - } - - const body = fs.readFileSync(commentPath, 'utf8'); - const finalBody = body.includes(marker) ? body : `${body}\n${marker}`; - - const { data: comments } = await github.rest.issues.listComments({ - owner: context.repo.owner, - repo: context.repo.repo, - issue_number: context.issue.number, - per_page: 100, - }); - - const existing = comments.find(comment => comment.body.includes(marker) || comment.body.includes(legacyMarker)); - - if (existing) { - await github.rest.issues.updateComment({ - owner: context.repo.owner, - repo: context.repo.repo, - comment_id: existing.id, - body: finalBody, - }); - } else { - await github.rest.issues.createComment({ - owner: context.repo.owner, - repo: context.repo.repo, - issue_number: context.issue.number, - body: finalBody, - }); - } - - - name: Log CodeQL alert changes for forked PR - if: >- - github.event_name == 'pull_request' && - github.event.pull_request.head.repo.full_name != github.repository && - (steps.check_codeql.outputs.new_alerts == 'true' || - steps.check_codeql.outputs.fixed_alerts == 'true') - run: | - set -euo pipefail - comment_file="${{ steps.check_codeql.outputs.comment_path }}" - if [ "${{ steps.check_codeql.outputs.new_alerts }}" = "true" ]; then - prefix="::error::" - should_fail=true - else - prefix="::notice::" - should_fail=false - fi - echo "${prefix}This PR is from a fork. CodeQL comments cannot be posted directly." - echo "${prefix}Displaying comment content in the log:" - # Read the file line by line and prepend the prefix to avoid issues - # with special characters in the comment body. - while IFS= read -r line; do - echo "${prefix}${line}" - done < "$comment_file" - if [ "$should_fail" = true ]; then - exit 1 - fi + name: codeql-pr-data + path: pr_comment_data/ + retention-days: 1 - name: Fail workflow due to new CodeQL alerts - # Only fail the job for PRs from the same repository where the - # action has permission to comment / act. Forked PR runs cannot - # reliably perform repo-write actions with `GITHUB_TOKEN`. + # Fails the check on the PR so the user sees red X if: >- github.event_name == 'pull_request' && - github.event.pull_request.head.repo.full_name == github.repository && steps.check_codeql.outputs.new_alerts == 'true' run: | echo "New CodeQL alerts detected; failing job." diff --git a/.github/workflows/codeql-comment.yaml b/.github/workflows/codeql-comment.yaml new file mode 100644 index 000000000..afdca4164 --- /dev/null +++ b/.github/workflows/codeql-comment.yaml @@ -0,0 +1,79 @@ +name: "CodeQL - Post Comment" + +on: + workflow_run: + workflows: ["CodeQL - Analyze (C++, Python, Actions)"] + types: + - completed + +permissions: + pull-requests: write + issues: write + +jobs: + post-comment: + runs-on: ubuntu-latest + if: github.event.workflow_run.event == 'pull_request' && github.event.workflow_run.conclusion != 'cancelled' + steps: + - name: Download PR comment data + uses: actions/download-artifact@fa0a91b85d4f404e8442c7c958156baef1102941 # v4.1.8 + with: + name: codeql-pr-data + run-id: ${{ github.event.workflow_run.id }} + github-token: ${{ secrets.WORKFLOW_PAT }} # Needed to download artifacts from other runs if private/internal + path: pr_comment_data + continue-on-error: true + + - name: Post Comment + # Only run if the artifact was found (meaning alerts exist) + if: success() && hashFiles('pr_comment_data/pr_number.txt') != '' + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 + with: + github-token: ${{ secrets.WORKFLOW_PAT }} + script: | + const fs = require('fs'); + const path = require('path'); + + const prNumberPath = path.join('pr_comment_data', 'pr_number.txt'); + const commentBodyPath = path.join('pr_comment_data', 'comment_body.md'); + + if (!fs.existsSync(prNumberPath) || !fs.existsSync(commentBodyPath)) { + console.log('Artifact data missing, skipping comment.'); + return; + } + + const prNumber = Number(fs.readFileSync(prNumberPath, 'utf8').trim()); + const body = fs.readFileSync(commentBodyPath, 'utf8'); + const marker = ''; + const legacyMarker = ''; + + // Append marker if missing + const finalBody = body.includes(marker) ? body : `${body}\n${marker}`; + + console.log(`Processing PR #${prNumber}`); + + const { data: comments } = await github.rest.issues.listComments({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: prNumber, + }); + + const existing = comments.find(c => c.body.includes(marker) || c.body.includes(legacyMarker)); + + if (existing) { + console.log(`Updating existing comment ${existing.id}`); + await github.rest.issues.updateComment({ + owner: context.repo.owner, + repo: context.repo.repo, + comment_id: existing.id, + body: finalBody + }); + } else { + console.log(`Creating new comment`); + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: prNumber, + body: finalBody + }); + } diff --git a/scripts/check_codeql_alerts.py b/scripts/check_codeql_alerts.py index 0df23570f..285fadc78 100644 --- a/scripts/check_codeql_alerts.py +++ b/scripts/check_codeql_alerts.py @@ -287,11 +287,26 @@ def _to_alert_api(raw: dict) -> Alert: instance = raw.get("most_recent_instance") or {} loc = instance.get("location") or {} location = "(location unavailable)" + if loc: - phys = loc.get("physicalLocation") or {} - formatted = _format_physical_location(phys) - if formatted: - location = formatted + # Check for nested SARIF format (physicalLocation) + phys = loc.get("physicalLocation") + if phys: + formatted = _format_physical_location(phys) + if formatted: + location = formatted + # Check for flat API format (path, start_line, etc.) + elif "path" in loc: + path = loc.get("path") + start_line = loc.get("start_line") + start_col = loc.get("start_column") + if start_line: + location = f"{path}:{start_line}" + if start_col: + location = f"{location}:{start_col}" + elif path is not None: + location = str(path) + else: # If the API instance has no physical location, try to locate using other instances # (some alerts expose locations in 'instances' or other fields) @@ -640,7 +655,7 @@ def collect_alerts( f"Failed to write SARIF unknown-location snippet: {exc}", file=sys.stderr, ) - buckets[baseline_state].append(alert) + buckets[baseline_state].append(alert) return buckets diff --git a/scripts/sarif-alerts.py b/scripts/sarif-alerts.py index f421598c4..37173c1e8 100644 --- a/scripts/sarif-alerts.py +++ b/scripts/sarif-alerts.py @@ -1,4 +1,5 @@ """A simple tool to print SARIF results from one or more SARIF files.""" + import argparse import json from pathlib import Path diff --git a/test/python/adder.py b/test/python/adder.py index d4889447f..549dcdab9 100644 --- a/test/python/adder.py +++ b/test/python/adder.py @@ -4,6 +4,7 @@ real. It serves as a "Hello, World" equivalent for running Python code. """ + def add(i: int, j: int) -> int: """Add the inputs together and return the sum total. @@ -39,7 +40,4 @@ def PHLEX_REGISTER_ALGORITHMS(m, config): Returns: None """ - m.transform(add, - input_family = config["input"], - output_products = config["output"]) - + m.transform(add, input_family=config["input"], output_products=config["output"]) diff --git a/test/python/all_config.py b/test/python/all_config.py index 10b4f5df1..9e6c00d6a 100644 --- a/test/python/all_config.py +++ b/test/python/all_config.py @@ -5,6 +5,7 @@ to Python. The actual run is a noop. """ + class ConfigConsumer: """A callable class that "needs" every configuration type. @@ -12,7 +13,7 @@ class ConfigConsumer: __name__ (str): Identifier for Phlex. """ - __name__ = 'config_consumer' + __name__ = "config_consumer" def __init__(self, config): """Create a config consumer object. @@ -28,18 +29,18 @@ def __init__(self, config): None """ # builtin types - assert config['a_bool'] == False # noqa: E712 # we really want to check False - assert config['an_int'] == -37 - assert config['a_uint'] == 18446744073709551616 - assert config['a_float'] == 3.1415 - assert config['a_string'] == 'foo' + assert config["a_bool"] == False # noqa: E712 # we really want to check False + assert config["an_int"] == -37 + assert config["a_uint"] == 18446744073709551616 + assert config["a_float"] == 3.1415 + assert config["a_string"] == "foo" # collection types - assert config['some_bools'] == (False, True) - assert config['some_ints'] == (-1, 42, -55) - assert config['some_uints'] == (18446744073709551616, 29, 137) - assert config['some_floats'] == (3.1415, 2.71828) - assert config['some_strings'] == ('aap', 'noot', 'mies') + assert config["some_bools"] == (False, True) + assert config["some_ints"] == (-1, 42, -55) + assert config["some_uints"] == (18446744073709551616, 29, 137) + assert config["some_floats"] == (3.1415, 2.71828) + assert config["some_strings"] == ("aap", "noot", "mies") def __call__(self, i: int, j: int) -> None: """Dummy routine to do something. @@ -71,5 +72,4 @@ def PHLEX_REGISTER_ALGORITHMS(m, config): None """ config_consumer = ConfigConsumer(config) - m.observe(config_consumer, input_family = config["input"]) - + m.observe(config_consumer, input_family=config["input"]) diff --git a/test/python/reducer.py b/test/python/reducer.py index 6abc55f2b..2ced48de7 100644 --- a/test/python/reducer.py +++ b/test/python/reducer.py @@ -11,6 +11,7 @@ lifetime issues (if they are not). """ + def add(i: int, j: int) -> int: """Add the inputs together and return the sum total. @@ -49,24 +50,13 @@ def PHLEX_REGISTER_ALGORITHMS(m, config): """ # first recieve the same input x4 but return "different" output for i in range(4): - m.transform(add, - name = "reduce%d" % i, - input_family = config["input"], - output_products = ["sum%d" % i]) + m.transform( + add, name="reduce%d" % i, input_family=config["input"], output_products=["sum%d" % i] + ) # now reduce them pair-wise - m.transform(add, - name = "reduce01", - input_family = ["sum0", "sum1"], - output_products = ["sum01"]) - m.transform(add, - name = "reduce23", - input_family = ["sum2", "sum3"], - output_products = ["sum23"]) + m.transform(add, name="reduce01", input_family=["sum0", "sum1"], output_products=["sum01"]) + m.transform(add, name="reduce23", input_family=["sum2", "sum3"], output_products=["sum23"]) # once more (and the configuration will add a verifier) - m.transform(add, - name = "reduce", - input_family = ["sum01", "sum23"], - output_products = ["sum"]) - + m.transform(add, name="reduce", input_family=["sum01", "sum23"], output_products=["sum"]) diff --git a/test/python/sumit.py b/test/python/sumit.py index a64c4f837..be4940141 100644 --- a/test/python/sumit.py +++ b/test/python/sumit.py @@ -27,6 +27,7 @@ def collectify(i: int, j: int) -> npt.NDArray[np.int32]: """ return np.array([i, j], dtype=np.int32) + def sum_array(coll: npt.NDArray[np.int32]) -> int: """Add the elements of the input collection and return the sum total. @@ -61,10 +62,5 @@ def PHLEX_REGISTER_ALGORITHMS(m, config): Returns: None """ - m.transform(collectify, - input_family = config["input"], - output_products = ["my_pyarray"]) - m.transform(sum_array, - input_family = ["my_pyarray"], - output_products = config["output"]) - + m.transform(collectify, input_family=config["input"], output_products=["my_pyarray"]) + m.transform(sum_array, input_family=["my_pyarray"], output_products=config["output"]) diff --git a/test/python/verify.py b/test/python/verify.py index f75bb4fce..936f5a81d 100644 --- a/test/python/verify.py +++ b/test/python/verify.py @@ -4,6 +4,7 @@ this observer verifies its result against the expected value. """ + class Verifier: """A callable class that can assert an expected value. @@ -22,7 +23,7 @@ class Verifier: AssertionError """ - __name__ = 'verifier' + __name__ = "verifier" def __init__(self, sum_total: int): """Create a verifier object. @@ -68,5 +69,4 @@ def PHLEX_REGISTER_ALGORITHMS(m, config): None """ assert_sum = Verifier(config["sum_total"]) - m.observe(assert_sum, input_family = config["input"]) - + m.observe(assert_sum, input_family=config["input"])