Skip to content
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 111 additions & 0 deletions .github/workflows/update-python-pkg-index.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
name: Updates the Python package index upon an event.

on:
repository_dispatch:
types: [update_package_index]

# According to the [documentation](https://docs.github.com/en/actions/how-tos/manage-workflow-runs/manually-run-a-workflow#configuring-a-workflow-to-run-manually)
# it is only possible to trigger a workflow manually, if it is located in the default branch.
workflow_dispatch:
inputs:
source_repo:
description: 'Name of the repo that contains the dependency.'
required: true
type: string
source_org:
description: 'Name of the organization/user that owns the dependency repo.'
required: true
type: string
dependency_ref:
description: 'Reference that in the dependency repo that should be checked out and turned into a dependency.'
required: true
type: string

# We need this until this file is not in `main`, without it the web interface will not pick it up.
# See https://stackoverflow.com/a/71057825
#pull_request:

jobs:
update-index:
runs-on: ubuntu-latest
steps:
- name: Print all variables
shell: bash
run: |
echo "source_repo: ${{ inputs.source_repo == '' && github.event.client_payload.source_repo || inputs.source_repo }}"
echo "source_org: ${{ inputs.source_org == '' && github.event.client_payload.source_org || inputs.source_org }}"
echo "dep_ref: ${{ inputs.dependency_ref == '' && github.event.client_payload.dependency_ref || inputs.dependency_ref }}"
echo "payload: ${{ toJson(github.event.client_payload) }}"

- name: Checkout the `main` branch of the Python package index.
uses: actions/checkout@v4
with:
path: index_repo
ref: main # We always work on main!

- name: Checkout the repo of the dependency that should be added to the index.
uses: actions/checkout@v4
with:
repository: ${{ inputs.source_org == '' && github.event.client_payload.source_org || inputs.source_org }}/${{ inputs.source_repo == '' && github.event.client_payload.source_repo || inputs.source_repo }}
path: ${{ inputs.source_repo == '' && github.event.client_payload.source_repo || inputs.source_repo }}
submodules: 'recursive'
ref: ${{ inputs.dependency_ref == '' && github.event.client_payload.dependency_ref || inputs.dependency_ref }}

- name: Build the distribution file.
shell: bash
run: |
DEPENDENCY_REPO="${PWD}/${{ inputs.source_repo == '' && github.event.client_payload.source_repo || inputs.source_repo }}"
PACKAGE_BUILD_FOLDER="${PWD}/index_repo/build"

cd "${DEPENDENCY_REPO}"
python -m pip install build --user
python -m build --wheel --outdir "${PACKAGE_BUILD_FOLDER}"

- name: Test the distribution file.
shell: bash
run: |
PACKAGE_BUILD_FOLDER="${PWD}/index_repo/build"
DESTINATION_FOLDER="${PWD}/index_repo/${{ inputs.source_repo == '' && github.event.client_payload.source_repo || inputs.source_repo }}"
mkdir -p "${DESTINATION_FOLDER}"

readarray -t -d "" PACKAGE_FILES < <(find "${PACKAGE_BUILD_FOLDER}" -type f -print0)
for I in ${!PACKAGE_FILES[@]}
do
PACKAGE_FILE="${PACKAGE_FILES[$I]}"
pip install --force-reinstall --upgrade --no-deps "${PACKAGE_FILE}"
if [ $? -ne 0 ]
then
echo "Failed to install package '${PACKAGE_FILE}'"
exit 3
fi
echo "Successfully tested '${PACKAGE_FILE}'"
cp -t "${DESTINATION_FOLDER}" "${PACKAGE_FILE}"
done

- name: Rescan the package index and update the static `index.html` files.
shell: bash
env:
CI_COMMIT_MESSAGE: updated dependency "${{ inputs.source_org == '' && github.event.client_payload.source_org || inputs.source_org }}/${{ inputs.source_repo == '' && github.event.client_payload.source_repo || inputs.source_repo }}"
CI_COMMIT_AUTHOR: github-actions[bot]
CI_COMMIT_EMAIL: [email protected]
run: |
cd ./index_repo

# Not fully sure if this check is useful, because it seems that creating a wheel is not reproducible.
# I.e. creating a wheel from a commit and then generating another wheel will result in a "different",
# in terms of its hash, file than the first time.
if ! git status --porcelain --untracked-files=no ; then
# There are no changed.
echo "There were no changes!"
exit 0
fi

# Update all the packages.
python generator.py

# We directly push to main!
git config --global user.name "${{ env.CI_COMMIT_AUTHOR }}"
git config --global user.email "${{ env.CI_COMMIT_EMAIL }}"
git add .
git commit --no-verify -m "${CI_COMMIT_MESSAGE}"
git push origin main
20 changes: 2 additions & 18 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,18 +1,2 @@
# This .gitignore is appropriate for repositories deployed to GitHub Pages and using
# a Gemfile as specified at https://github.com/github/pages-gem#conventional

# Basic Jekyll gitignores (synchronize to Jekyll.gitignore)
_site/
.sass-cache/
.jekyll-cache/
.jekyll-metadata

# Additional Ruby/bundler ignore for when you run: bundle install
/vendor

# Specific ignore for GitHub Pages
# GitHub Pages will always use its own deployed version of pages-gem
# This means GitHub Pages will NOT use your Gemfile.lock and therefore it is
# counterproductive to check this file into the repository.
# Details at https://github.com/github/pages-gem/issues/768
Gemfile.lock
.token*
build/
60 changes: 58 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,58 @@
# pypi-index
Python packages index
# Custom GT4Py Python Package Index Server
This repo hosts the custom packages that are needed to use GT4Py, these currently includes:
- [GridTools/dace](https://github.com/GridTools/dace), currently only for the `next`.
- [ghex-org/GHEX](https://github.com/ghex-org/GHEX)

# Usage
The repo is intended to work fully automatically, orchestrated by GitHub actions.

## Workflow `update-python-pkg-index.yml`
This is the main workflow, in short it does:
- Pulls the repo, whose package should be updated.
- Creates a wheel from the repo that has been pulled.
- Tests if the wheel can be installed.
- Updated the package index, i.e. regenerates the `index.html` files, for this `generator.py` is used.
- Creates a commit containing the updated indexes and the generated wheel.
- Pushes the new commit directly to `main`.

The workflow can be started manually, either through the GitHub web interface or through the `issue_update.sh` script.
In either case some information have to be provided:
- The name of the repo on GitHub, generally referred to as "source repo".
- The owner (user or organization) that owns the repo, generally referred to as "source owner".
- The branch of the repo from which a Python package should be created, generally referred to as "dependency ref".

> According to the [documentation](https://docs.github.com/en/actions/reference/workflows-and-actions/events-that-trigger-workflows#repository_dispatch) the
> `repository_dispatch` trigger (the one that is used such that _other_ repo can start the update) only works when
> the workflow file is located on the default branch!

## `generator.py`
Script for updating the static pages.
It works by scanning subfolders, currently `dace` and `ghex`, and creates an index based on all Python packages it founds in them.
It is usually run by by the workflow automatically.

## `issue_update.sh`
A simple script that allows to issue a manual remote update of the index.
For more information please see its help output.

## `update_workflows`
This folder contains the workflows that must be installed into the repos containing the dependency.

### DaCe
For DaCe the `dace-updater.yml` must be added to the DaCe repo.
It listens for pushes to tags `__gt4py-next-integration_*`, i.e. the ones that we use to tag our releases.
There is an [_experimental_ branch](https://github.com/GridTools/dace/pull/12) that tests the workflow using the [development index](https://github.com/philip-paul-mueller/test_package_index).
It kind of works, however, currently only pushes related to the branch itself are detected, i.e. the branch that contains the workflow file.
This means, that the workflow file must be included inside `gt4py-next-integration` branch, that is used to deploy the thing, which is not so nice.
As an experiment, I changed the defualt branch from `main` to the experimental one, without success, but it might be due to the mentioned "unintended side effects" that a popup was informing me.


## Token
In order for the _depending_ repo to issue an update request an access token is needed.
This can either be a normal (classic) access token, that needs to grant read access to the repository.
The other possibility is to use a fine grained access token, in which case only the '"Contents" repository permissions (write)' permission has to be granted.

# TODO:
- Install in DaCe
- Install in GHEX
- Configure the page to use `main` as source.

124 changes: 124 additions & 0 deletions generator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
"""Regenerates the index based on the specified folders.
"""
from typing import Final, Sequence

import hashlib
import pathlib
import re
import sys

HTML_HEADER: Final[str] = """\
<!DOCTYPE HTML>
<html>
<head>
<title>{Title}</title>
<meta charset="UTF-8" />
</head>

<body>
<h1>{Title}</h1>
"""
"""The header for a html page.

It contains the opening `<body>` tag and has the `Titel` interpolation.
"""

HTML_FOOTER: Final[str] = """\
</body>
</html>
"""
"""Contains the footer of an html page.

This includes the closing `</body>` tag.
"""

def normalize_name(name: str) -> bool:
"""Normalize the project name according to the rules in PEP503."""
return re.sub(r"[-_.]+", "-", name).lower()


def write_project_index(
base_folder: pathlib.Path,
project_name: str,
) -> int:
# Project folder must exists because we assume that the files are located inside.
project_folder = base_folder / project_name
if not project_folder.is_dir():
raise NotADirectoryError(
f"Expected that the project folder `{project_folder}` for project `{project_name}` exists."
)

found_packages = 0
normalized_project_name = normalize_name(project_name)
with open(project_folder / "index.html", "wt") as index:
index.write(HTML_HEADER.format(Title=f"Custom Package for '{project_name}'"))

for file in project_folder.iterdir():
filename = file.name
if filename.startswith(".") or not any(filename.endswith(ext) for ext in [".zip", ".tar.gz", ".whl"]):
print(
f"While building the index for project '{project_name}' found non Python package file '{filename}', which will be ignored.",
file=sys.stderr,
flush=True,
)
continue
assert filename.startswith(normalized_project_name + "-")

# Compute the hash such that we can append it to the link.
with open(file, "rb") as F:
digest = hashlib.file_digest(F, "sha256")

# PEP503 says that the text of the anchor element must be the filename, so there
# is not need for fancy processing of the file name. Furthermore, we assume that
# the file names have the correct normalized name and version.
index.write(
f'\t\t<a href="{filename}#sha256={digest.hexdigest()}">{filename}</a> </br>\n'.replace("\t", " ")
)
found_packages += 1
index.write(HTML_FOOTER)

return found_packages


def write_package_index(
base_folder: pathlib.Path,
packages: Sequence[str],
) -> None:

with open(base_folder / "index.html", "wt") as index:
index.write(HTML_HEADER.format(Title=f"Custom Package Index for GT4Py"))

for project_name in packages:
project_folder = base_folder / project_name
normalized_project_name = normalize_name(project_name)
if not project_folder.is_dir():
print(
f"There is not folder associated to the project `{project_name}`, skipping it.",
flush=True,
file=sys.stderr,
)
continue

# Now generate the index for that file.
found_packages = write_project_index(base_folder, project_name)

if found_packages == 0:
# Consider no packages not as an error, only output a warning.
# TODO: Consider removing the folder.
print(
f"No packages for project `{project_name}` could be located.",
flush=True,
file=sys.stderr,
)
continue

index.write(f'\t\t<a href="{project_name}">{normalized_project_name}</a>\n'.replace("\t", " "))

index.write(HTML_FOOTER)


if __name__ == "__main__":
write_package_index(
base_folder=pathlib.Path(__file__).parent,
packages=["dace", "ghex"],
)
Loading