Skip to content

Commit

Permalink
ci: get a list of changed files using git diff instead of GitHub APIv4
Browse files Browse the repository at this point in the history
  • Loading branch information
jiridanek committed Jun 29, 2024
1 parent 3f93529 commit 9a9a4a0
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 79 deletions.
15 changes: 9 additions & 6 deletions .github/workflows/build-notebooks-pr.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
---
"name": "Build Notebooks"
"name": "Build Notebooks (pr)"
"on":
"pull_request":

Expand All @@ -18,15 +18,18 @@ jobs:
steps:
- uses: actions/checkout@v4

- run: |
- name: Determine targets to build based on changed files
run: |
set -x
git fetch --no-tags origin 'pull/${{ github.event.pull_request.number }}/head:${{ github.event.pull_request.head.ref }}'
git fetch --no-tags origin '+refs/heads/${{ github.event.pull_request.base.ref }}:refs/remotes/origin/${{ github.event.pull_request.base.ref }}'
python3 ci/cached-builds/gen_gha_matrix_jobs.py \
--owner=${{ github.repository_owner }} \
--repo=${{ github.event.pull_request.base.repo.name }} \
--pr-number=${{ github.event.pull_request.number }} \
--skip-unchanged
--from-ref 'origin/${{ github.event.pull_request.base.ref }}' \
--to-ref '${{ github.event.pull_request.head.ref }}'
id: gen
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
shell: bash

build:
needs: ["gen"]
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/build-notebooks.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
---
# This file is autogenerated by ci/cached-builds/gen_gha_matrix_jobs.py
{
"name": "Build Notebooks",
"name": "Build Notebooks (push)",
"permissions": {
"packages": "write"
},
Expand Down
24 changes: 11 additions & 13 deletions ci/cached-builds/gen_gha_matrix_jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def write_github_workflow_file(tree: dict[str, list[str]], path: pathlib.Path) -
}

workflow = {
"name": "Build Notebooks",
"name": "Build Notebooks (push)",
# https://docs.github.com/en/actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
"permissions": {
"packages": "write",
Expand Down Expand Up @@ -152,14 +152,10 @@ def main() -> None:
logging.basicConfig(level=logging.DEBUG, stream=sys.stderr)

argparser = argparse.ArgumentParser()
argparser.add_argument("--owner", type=str, required=False,
help="GitHub repo owner/org (for the --skip-unchanged feature)")
argparser.add_argument("--repo", type=str, required=False,
help="GitHub repo name (for the --skip-unchanged feature)")
argparser.add_argument("--pr-number", type=int, required=False,
help="PR number under owner/repo (for the --skip-unchanged feature)")
argparser.add_argument("--skip-unchanged", type=bool, required=False, default=False,
action=argparse.BooleanOptionalAction)
argparser.add_argument("--from-ref", type=str, required=False,
help="Git ref of the base branch (to determine changed files)")
argparser.add_argument("--to-ref", type=str, required=False,
help="Git ref of the PR branch (to determine changed files)")
args = argparser.parse_args()

# https://www.gnu.org/software/make/manual/make.html#Reading-Makefiles
Expand All @@ -170,9 +166,9 @@ def main() -> None:
write_github_workflow_file(tree, project_dir / ".github" / "workflows" / "build-notebooks.yaml")

leafs = compute_leafs_in_dependency_tree(tree)
if args.skip_unchanged:
logging.info(f"Skipping targets not modified in PR #{args.pr_number}")
changed_files = gha_pr_changed_files.list_changed_files(args.owner, args.repo, args.pr_number)
if args.from_ref:
logging.info(f"Skipping targets not modified in the PR")
changed_files = gha_pr_changed_files.list_changed_files(args.from_ref, args.to_ref)
leafs = gha_pr_changed_files.filter_out_unchanged(leafs, changed_files)
output = print_github_actions_pr_matrix(tree, leafs)

Expand All @@ -197,6 +193,8 @@ def test_select_changed_targets(self):
changed_files = ["jupyter/datascience/ubi9-python-3.9/Dockerfile"]

leafs = gha_pr_changed_files.filter_out_unchanged(leafs, changed_files)
assert set(leafs) == {'cuda-jupyter-tensorflow-ubi9-python-3.9',
assert set(leafs) == {'amd-jupyter-pytorch-c9s-python-3.9',
'amd-jupyter-tensorflow-c9s-python-3.9',
'cuda-jupyter-tensorflow-ubi9-python-3.9',
'jupyter-trustyai-ubi9-python-3.9',
'jupyter-pytorch-ubi9-python-3.9'}
69 changes: 10 additions & 59 deletions ci/cached-builds/gha_pr_changed_files.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
import json
import logging
import os
import pathlib
import re
import subprocess
import unittest
import urllib.request

PROJECT_ROOT = pathlib.Path(__file__).parent.parent.parent.resolve()

Expand All @@ -15,57 +13,13 @@ def get_github_token() -> str:
return github_token


# https://docs.github.com/en/graphql/guides/forming-calls-with-graphql
def compose_gh_api_request(pull_number: int, owner="opendatahub-io", repo="notebooks", per_page=100,
cursor="") -> urllib.request.Request:
github_token = get_github_token()

return urllib.request.Request(
url="https://api.github.com/graphql",
method="POST",
headers={
"Authorization": f"bearer {github_token}",
},
# https://docs.github.com/en/graphql/guides/using-the-explorer
data=json.dumps({"query": f"""
{{
repository(owner:"{owner}", name:"{repo}") {{
pullRequest(number:{pull_number}) {{
files(first:{per_page}, after:"{cursor}") {{
edges {{
node {{
path
}}
cursor
}}
}}
}}
}}
}}
"""}).encode("utf-8"),
)


def list_changed_files(owner: str, repo: str, pr_number: int, per_page=100) -> list[str]:
files = []

logging.debug("Getting list of changed files from GitHub API")

CURSOR = ""
while CURSOR is not None:
request = compose_gh_api_request(pull_number=pr_number, owner=owner, repo=repo, per_page=per_page,
cursor=CURSOR)
response = urllib.request.urlopen(request)
data = json.loads(response.read().decode("utf-8"))
response.close()
edges = data["data"]["repository"]["pullRequest"]["files"]["edges"]

CURSOR = None
for edge in edges:
files.append(edge["node"]["path"])
CURSOR = edge["cursor"]

logging.debug(f"Determined {len(files)} changed files: {files[:5]} (..., printing up to 5)")
def list_changed_files(from_ref: str, to_ref: str) -> list[str]:
logging.debug("Getting list of changed files from git diff")

files = subprocess.check_output(["git", "diff", "--name-only", from_ref, to_ref],
encoding='utf-8').splitlines()

logging.debug(f"Determined {len(files)} changed files: {files[:100]} (..., printing up to 100 files)")
return files


Expand Down Expand Up @@ -110,12 +64,9 @@ def filter_out_unchanged(targets: list[str], changed_files: list[str]) -> list[s


class SelfTests(unittest.TestCase):
def test_compose_gh_api_request__call_without_asserting(self):
request = compose_gh_api_request(pull_number=556, per_page=100, cursor="")
print(request.data)

def test_list_changed_files__pagination_works(self):
changed_files = list_changed_files(owner="opendatahub-io", repo="notebooks", pr_number=556, per_page=1)
def test_list_changed_files(self):
"""This is PR #556 in opendatahub-io/notebooks"""
changed_files = list_changed_files(from_ref="4d4841f", to_ref="2c36c11")
assert set(changed_files) == {'codeserver/ubi9-python-3.9/Dockerfile',
'codeserver/ubi9-python-3.9/run-code-server.sh'}

Expand Down

0 comments on commit 9a9a4a0

Please sign in to comment.