diff --git a/.github/workflows/build-gfx90a.yml b/.github/workflows/build-gfx90a.yml new file mode 100644 index 0000000..2278ad7 --- /dev/null +++ b/.github/workflows/build-gfx90a.yml @@ -0,0 +1,57 @@ +name: Build gfx90a Docker image + +on: + push: + branches: + - main + paths: + - envs/x86/gfx90a/** + - .github/workflows/build-gfx90a.yml + - AGENTS.md + - README.md + pull_request: + paths: + - envs/x86/gfx90a/** + - .github/workflows/build-gfx90a.yml + - AGENTS.md + - README.md + workflow_dispatch: + +jobs: + build: + name: Build gfx90a image + runs-on: ubuntu-latest + permissions: + contents: read + steps: + - name: Check out repository + uses: actions/checkout@v4 + - name: Log in to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + - name: Set up Buildx + uses: docker/setup-buildx-action@v3 + - name: Cache Docker layers + uses: actions/cache@v4 + with: + path: /tmp/.buildx-cache + key: ${{ runner.os }}-buildx-gfx90a-${{ github.sha }} + restore-keys: | + ${{ runner.os }}-buildx-gfx90a- + - name: Build docker image + uses: docker/build-push-action@v5 + with: + context: . + file: envs/x86/gfx90a/Dockerfile + push: true + tags: | + higherordermethods/selfish:gfx90a + higherordermethods/selfish:gfx90a-${{ github.sha }} + cache-from: type=local,src=/tmp/.buildx-cache + cache-to: type=local,dest=/tmp/.buildx-cache-new,mode=max + - name: Move cache + run: | + rm -rf /tmp/.buildx-cache + mv /tmp/.buildx-cache-new /tmp/.buildx-cache diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..73620c0 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,22 @@ +# Repository Guidelines + +## Project Structure & Module Organization +Source files live under `envs///`, where each leaf directory owns a `spack.yaml` manifest and (optionally) a generated `Dockerfile`. Keep CPU targets (`x86`, …) and accelerator targets (`gfx90a`, `sm72`, `none`) granular so images stay purpose-built, and limit the root `README.md` to high-level context. + +## Build, Test, and Development Commands +- `spack spec -e envs/x86/gfx90a/spack.yaml` — concretizes the manifest locally; run this before opening a PR so dependency drift is caught early. +- `spack containerize envs/x86/gfx90a/spack.yaml > envs/x86/gfx90a/Dockerfile` — regenerates the Dockerfile after manifest edits (avoid hand-tuning output). +- `docker build -f envs/x86/gfx90a/Dockerfile -t selfish:gfx90a .` — builds the shareable runtime image; tag images `-` for clarity. +- `docker run --rm selfish:gfx90a spack find hdf5` — smoke-tests that the expected view was installed inside the image. + +## Coding Style & Naming Conventions +Spack YAML uses 2-space indentation, lowercase keys, and quoted constraint strings (`"target=x86_64_v3"`). Group `specs` alphabetically, keep `packages` overrides sorted by scope, and rely on multiline `RUN` blocks with trailing `\` alignment plus brief comments for non-obvious workarounds. Name new environments after the hardware tuple (`x86/gfx942`, `x86/none`) so downstream scripts can glob predictably. + +## Testing Guidelines +For each environment change, run `spack spec` followed by `spack install --fail-fast` inside a disposable builder container to verify concretization. Container builds must pass `docker build` locally before review; capture the last ~20 lines for the PR description. When adding MPI/HDF5 variants, run `docker run --rm mpichversion` (or another representative binary) to prove runtime availability. There is no coverage gate, but every new spec should ship with at least one build log, and GitHub Actions now double-checks gfx90a builds and publishes them to `higherordermethods/selfish`. + +## Commit & Pull Request Guidelines +Existing history uses short, imperative subject lines (“Initial commit”); follow the same format and include the touched environment in parentheses when practical, e.g., `Add feq-parse 2.2.2 to gfx90a`. One logical change per commit keeps bisects clean. PRs should describe the motivation, list updated directories, attach the relevant `spack spec` or `docker build` excerpt, and link any upstream SELF issues. Paste terminal snippets when reviewing GPU-specific behavior. + +## Security & Configuration Tips +Pin base images (`rockylinux:9`) and Spack refs in manifests, and run `dnf update -y` at build time to pick up CVEs. Never embed registry credentials or cluster hostnames in `spack.yaml`; rely on build-time secrets where required. Before publishing, scan the resulting image with `docker scout cves selfish:gfx90a` (or equivalent) to catch dependency vulnerabilities. diff --git a/README.md b/README.md index d4c97f7..e35ad4f 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,8 @@ While SELF does support bare-metal builds and those are regularly tested, the co The core SELF team at Fluid Numerics has adopted enroot+pyxis with Slurm for our deployment model due to positive experience with this approach. +See [Repository Guidelines](AGENTS.md) for contributor expectations, build commands, and review checklists. + More docs coming soon diff --git a/envs/x86/gfx90a/Dockerfile b/envs/x86/gfx90a/Dockerfile index fe8347c..7e67cf6 100644 --- a/envs/x86/gfx90a/Dockerfile +++ b/envs/x86/gfx90a/Dockerfile @@ -33,6 +33,13 @@ RUN dnf update -y \ && rm -rf /var/cache/dnf \ && dnf clean all +# Install HIP # +COPY envs/x86/gfx90a/rocm.repo /etc/yum.repos.d/rocm.repo + +RUN dnf clean all && \ + dnf update -y && \ + dnf install -y rocm-hip-libraries rocm-hip-runtime hip-devel hsa-rocr-devel rocm-llvm-devel + RUN mkdir $SPACK_ROOT && cd $SPACK_ROOT && \ git init --quiet && git remote add origin https://github.com/spack/spack.git && git fetch --depth=1 origin v1.0.2 && git checkout --detach FETCH_HEAD && \ mkdir -p $SPACK_ROOT/opt/spack @@ -81,12 +88,25 @@ set -o noclobber \ && echo ' - feq-parse@2.2.2' \ && echo ' - mpich@4.2.3 +rocm' \ && echo ' - hdf5@1.14.5 +fortran +mpi' \ +&& echo ' concretizer:' \ +&& echo ' unify: true' \ && echo ' packages:' \ && echo ' all:' \ +&& echo ' providers:' \ +&& echo ' mpi: [mpich]' \ && echo ' require:' \ && echo ' - target=x86_64_v3' \ && echo ' prefer:' \ && echo ' - amdgpu_target=gfx942' \ +&& echo ' hip:' \ +&& echo ' buildable: false' \ +&& echo ' externals:' \ +&& echo ' - spec: "hip@6.4.1"' \ +&& echo ' prefix: "/opt/rocm"' \ +&& echo ' - spec: "hsa-rocr-dev@6.4.1"' \ +&& echo ' prefix: "/opt/rocm"' \ +&& echo ' - spec: "llvm-amdgpu@6.4.1"' \ +&& echo ' prefix: "/opt/rocm"' \ && echo '' \ && echo ' concretizer:' \ && echo ' unify: true' \ @@ -115,6 +135,12 @@ FROM docker.io/rockylinux:9 COPY --from=builder /opt/spack-environment /opt/spack-environment COPY --from=builder /opt/software /opt/software +# Install HIP # +COPY envs/x86/gfx90a/rocm.repo /etc/yum.repos.d/rocm.repo +RUN dnf clean all && \ + dnf update -y && \ + dnf install -y rocm-hip-libraries rocm-hip-runtime hip-devel hsa-rocr-devel rocm-llvm-devel + # paths.view is a symlink, so copy the parent to avoid dereferencing and duplicating it COPY --from=builder /opt/views /opt/views diff --git a/envs/x86/gfx90a/rocm.repo b/envs/x86/gfx90a/rocm.repo new file mode 100644 index 0000000..b56cd1d --- /dev/null +++ b/envs/x86/gfx90a/rocm.repo @@ -0,0 +1,7 @@ +[rocm] +name=ROCm 6.4.1 repository +baseurl=https://repo.radeon.com/rocm/el9/6.4.1/main +enabled=1 +priority=50 +gpgcheck=1 +gpgkey=https://repo.radeon.com/rocm/rocm.gpg.key diff --git a/envs/x86/gfx90a/spack.yaml b/envs/x86/gfx90a/spack.yaml index a4397d4..9f12803 100644 --- a/envs/x86/gfx90a/spack.yaml +++ b/envs/x86/gfx90a/spack.yaml @@ -6,10 +6,17 @@ spack: packages: all: + providers: + mpi: [mpich] require: - "target=x86_64_v3" prefer: - "amdgpu_target=gfx942" + hip: + buildable: false + externals: + - spec: "hip@6.4.1" + prefix: "/opt/rocm" container: format: docker