diff --git a/.common-ci.yml b/.common-ci.yml
index 35351153f..6a37d7989 100644
--- a/.common-ci.yml
+++ b/.common-ci.yml
@@ -38,13 +38,13 @@ workflow:
- if: $CI_COMMIT_BRANCH
- if: $CI_COMMIT_TAG
- if: $CI_PIPELINE_SOURCE == "web"
- - if: $CI_COMMIT_BRANCH == "master"
+ - if: $CI_COMMIT_BRANCH == "main"
- if: $CI_COMMIT_BRANCH =~ /^release-.*$/
- if: $CI_COMMIT_TAG && $CI_COMMIT_TAG != ""
.main-or-manual:
rules:
- - if: $CI_COMMIT_BRANCH == "master"
+ - if: $CI_COMMIT_BRANCH == "main"
- if: $CI_COMMIT_BRANCH =~ /^release-.*$/
- if: $CI_COMMIT_TAG && $CI_COMMIT_TAG != ""
- if: $CI_PIPELINE_SOURCE == "schedule"
@@ -71,7 +71,7 @@ trigger-pipeline:
.buildx-setup:
before_script:
- - export BUILDX_VERSION=v0.6.3
+ - export BUILDX_VERSION=v0.15.1
- apk add --no-cache curl
- mkdir -p ~/.docker/cli-plugins
- curl -sSLo ~/.docker/cli-plugins/docker-buildx "https://github.com/docker/buildx/releases/download/${BUILDX_VERSION}/buildx-${BUILDX_VERSION}.linux-amd64"
@@ -82,9 +82,9 @@ trigger-pipeline:
- '[[ -n "${SKIP_QEMU_SETUP}" ]] || docker run --rm --privileged multiarch/qemu-user-static --reset -p yes'
# Define targets for the gpu-operator and gpu-operator-validator images
-.dist-ubi8:
+.dist-ubi9:
variables:
- DIST: ubi8
+ DIST: ubi9
CVE_UPDATES: "cyrus-sasl-lib"
.target-gpu-operator:
@@ -99,6 +99,7 @@ trigger-pipeline:
IMAGE_NAME: "${CI_REGISTRY_IMAGE}/gpu-operator-validator"
IN_IMAGE_NAME: "gpu-operator-validator"
IMAGE_ARCHIVE: "gpu-operator-validator.tar"
+ IN_REGISTRY: "${STAGING_REGISTRY}/gpu-operator"
# .release forms the base of the deployment jobs which push images to the CI registry.
# This is extended with the version to be deployed (e.g. the SHA or TAG) and the
@@ -149,7 +150,7 @@ trigger-pipeline:
# Download the regctl binary for use in the release steps
.regctl-setup:
before_script:
- - export REGCTL_VERSION=v0.3.10
+ - export REGCTL_VERSION=v0.7.2
- apk add --no-cache curl
- mkdir -p bin
- curl -sSLo bin/regctl https://github.com/regclient/regclient/releases/download/${REGCTL_VERSION}/regctl-linux-amd64
@@ -181,7 +182,7 @@ trigger-pipeline:
release:staging-gpu-operator:
extends:
- .release:staging
- - .dist-ubi8
+ - .dist-ubi9
- .target-gpu-operator
variables:
OUT_IMAGE_NAME: "${CI_REGISTRY_IMAGE}/staging/gpu-operator"
@@ -189,7 +190,7 @@ release:staging-gpu-operator:
release:staging-gpu-operator-validator:
extends:
- .release:staging
- - .dist-ubi8
+ - .dist-ubi9
- .target-gpu-operator-validator
variables:
OUT_IMAGE_NAME: "${CI_REGISTRY_IMAGE}/staging/gpu-operator-validator"
@@ -197,7 +198,7 @@ release:staging-gpu-operator-validator:
release:staging-latest-gpu-operator:
extends:
- .release:staging
- - .dist-ubi8
+ - .dist-ubi9
- .target-gpu-operator
variables:
OUT_IMAGE_NAME: "${CI_REGISTRY_IMAGE}/staging/gpu-operator"
@@ -208,7 +209,7 @@ release:staging-latest-gpu-operator:
release:staging-latest-gpu-operator-validator:
extends:
- .release:staging
- - .dist-ubi8
+ - .dist-ubi9
- .target-gpu-operator-validator
variables:
OUT_IMAGE_NAME: "${CI_REGISTRY_IMAGE}/staging/gpu-operator-validator"
diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md
index 41dc2bb8a..268e75212 100644
--- a/.github/ISSUE_TEMPLATE.md
+++ b/.github/ISSUE_TEMPLATE.md
@@ -39,10 +39,10 @@ _Detailed steps to reproduce the issue._
Collecting full debug bundle (optional):
```
-curl -o must-gather.sh -L https://raw.githubusercontent.com/NVIDIA/gpu-operator/master/hack/must-gather.sh
+curl -o must-gather.sh -L https://raw.githubusercontent.com/NVIDIA/gpu-operator/main/hack/must-gather.sh
chmod +x must-gather.sh
./must-gather.sh
```
-**NOTE**: please refer to the [must-gather](https://raw.githubusercontent.com/NVIDIA/gpu-operator/master/hack/must-gather.sh) script for debug data collected.
+**NOTE**: please refer to the [must-gather](https://raw.githubusercontent.com/NVIDIA/gpu-operator/main/hack/must-gather.sh) script for debug data collected.
This bundle can be submitted to us via email: **operator_feedback@nvidia.com**
diff --git a/.github/copy-pr-bot.yaml b/.github/copy-pr-bot.yaml
new file mode 100644
index 000000000..c61a81b13
--- /dev/null
+++ b/.github/copy-pr-bot.yaml
@@ -0,0 +1,3 @@
+# https://docs.gha-runners.nvidia.com/apps/copy-pr-bot/#configuration
+
+enabled: true
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
new file mode 100644
index 000000000..7ba9bbfe8
--- /dev/null
+++ b/.github/dependabot.yml
@@ -0,0 +1,43 @@
+# Please see the documentation for all configuration options:
+# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
+
+version: 2
+updates:
+ - package-ecosystem: "gomod"
+ target-branch: main
+ directory: "/"
+ schedule:
+ interval: "daily"
+ labels:
+ - dependencies
+ groups:
+ k8sio:
+ patterns:
+ - k8s.io/*
+ exclude-patterns:
+ - k8s.io/klog/*
+
+ - package-ecosystem: "gomod"
+ target-branch: main
+ directory: "/tools"
+ schedule:
+ interval: "daily"
+ labels:
+ - dependencies
+
+ # Update GPU Operator base images.
+ - package-ecosystem: "docker"
+ directory: "/docker"
+ schedule:
+ interval: "daily"
+
+ # Update GPU Operator Validator base images.
+ - package-ecosystem: "docker"
+ directory: "/validator"
+ schedule:
+ interval: "daily"
+
+ - package-ecosystem: "github-actions"
+ directory: "/"
+ schedule:
+ interval: "daily"
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
deleted file mode 100644
index e8e3d86dc..000000000
--- a/.github/pull_request_template.md
+++ /dev/null
@@ -1,5 +0,0 @@
-Hello!
-
-Thanks for making this contribution! When contributing to this repository please keep in mind the following:
-- [You should sign your work](https://github.com/NVIDIA/gpu-operator/blob/master/CONTRIBUTING.md).
-- You should be making your contribution against the [gitlab.com repository](https://gitlab.com/nvidia/kubernetes/gpu-operator) as github.com is just a mirror.
diff --git a/.github/workflows/blossom-ci.yml b/.github/workflows/blossom-ci.yml
deleted file mode 100644
index a5b37eb4f..000000000
--- a/.github/workflows/blossom-ci.yml
+++ /dev/null
@@ -1,113 +0,0 @@
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# A workflow to trigger ci on hybrid infra (github + self hosted runner)
-name: Blossom-CI
-on:
- issue_comment:
- types: [created]
- workflow_dispatch:
- inputs:
- platform:
- description: 'runs-on argument'
- required: false
- args:
- description: 'argument'
- required: false
-jobs:
- Authorization:
- name: Authorization
- runs-on: blossom
- outputs:
- args: ${{ env.args }}
-
- # This job only runs for pull request comments
- if: |
- contains( '\
- anstockatnv,\
- rorajani,\
- cdesiniotis,\
- shivamerla,\
- ArangoGutierrez,\
- elezar,\
- klueska,\
- zvonkok,\
- ', format('{0},', github.actor)) &&
- github.event.comment.body == '/blossom-ci'
- steps:
- - name: Check if comment is issued by authorized person
- run: blossom-ci
- env:
- OPERATION: 'AUTH'
- REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- REPO_KEY_DATA: ${{ secrets.BLOSSOM_KEY }}
-
- Vulnerability-scan:
- name: Vulnerability scan
- needs: [Authorization]
- runs-on: ubuntu-latest
- steps:
- - name: Checkout code
- uses: actions/checkout@v2
- with:
- repository: ${{ fromJson(needs.Authorization.outputs.args).repo }}
- ref: ${{ fromJson(needs.Authorization.outputs.args).ref }}
- lfs: 'true'
-
- # repo specific steps
- #- name: Setup java
- # uses: actions/setup-java@v1
- # with:
- # java-version: 1.8
-
- # add blackduck properties https://synopsys.atlassian.net/wiki/spaces/INTDOCS/pages/631308372/Methods+for+Configuring+Analysis#Using-a-configuration-file
- #- name: Setup blackduck properties
- # run: |
- # PROJECTS=$(mvn -am dependency:tree | grep maven-dependency-plugin | awk '{ out="com.nvidia:"$(NF-1);print out }' | grep rapids | xargs | sed -e 's/ /,/g')
- # echo detect.maven.build.command="-pl=$PROJECTS -am" >> application.properties
- # echo detect.maven.included.scopes=compile >> application.properties
-
- - name: Run blossom action
- uses: NVIDIA/blossom-action@main
- env:
- REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- REPO_KEY_DATA: ${{ secrets.BLOSSOM_KEY }}
- with:
- args1: ${{ fromJson(needs.Authorization.outputs.args).args1 }}
- args2: ${{ fromJson(needs.Authorization.outputs.args).args2 }}
- args3: ${{ fromJson(needs.Authorization.outputs.args).args3 }}
-
- Job-trigger:
- name: Start ci job
- needs: [Vulnerability-scan]
- runs-on: blossom
- steps:
- - name: Start ci job
- run: blossom-ci
- env:
- OPERATION: 'START-CI-JOB'
- CI_SERVER: ${{ secrets.CI_SERVER }}
- REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-
- Upload-Log:
- name: Upload log
- runs-on: blossom
- if : github.event_name == 'workflow_dispatch'
- steps:
- - name: Jenkins log for pull request ${{ fromJson(github.event.inputs.args).pr }} (click here)
- run: blossom-ci
- env:
- OPERATION: 'POST-PROCESSING'
- CI_SERVER: ${{ secrets.CI_SERVER }}
- REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
new file mode 100644
index 000000000..79f3e17ac
--- /dev/null
+++ b/.github/workflows/ci.yaml
@@ -0,0 +1,312 @@
+# Copyright 2024 NVIDIA CORPORATION
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: CI
+
+on:
+ push:
+ branches:
+ - "pull-request/[0-9]+"
+ - main
+ - release-*
+
+jobs:
+ ### Configuration checks ###
+ helm-lint:
+ runs-on: ubuntu-latest
+ steps:
+ - name: Checkout code
+ uses: actions/checkout@v4
+ - name: Install Helm
+ uses: azure/setup-helm@v4.2.0
+ id: install
+ - run: helm lint deployments/gpu-operator/
+ validate-csv:
+ runs-on: ubuntu-latest
+ steps:
+ - name: Checkout code
+ uses: actions/checkout@v4
+ - name: Get Golang version
+ id: vars
+ run: |
+ GOLANG_VERSION=$( grep "GOLANG_VERSION ?=" versions.mk )
+ echo "GOLANG_VERSION=${GOLANG_VERSION##GOLANG_VERSION ?= }" >> $GITHUB_ENV
+ - name: Install Go
+ uses: actions/setup-go@v5
+ with:
+ go-version: ${{ env.GOLANG_VERSION }}
+ - run: make validate-csv
+ validate-helm-values:
+ runs-on: ubuntu-latest
+ steps:
+ - name: Checkout code
+ uses: actions/checkout@v4
+ - name: Get Golang version
+ id: vars
+ run: |
+ GOLANG_VERSION=$( grep "GOLANG_VERSION ?=" versions.mk )
+ echo "GOLANG_VERSION=${GOLANG_VERSION##GOLANG_VERSION ?= }" >> $GITHUB_ENV
+ - name: Install Go
+ uses: actions/setup-go@v5
+ with:
+ go-version: ${{ env.GOLANG_VERSION }}
+ - run: make validate-helm-values
+
+ ### Golang checks and build ###
+ go-check:
+ needs: [helm-lint, validate-csv, validate-helm-values]
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+ name: Checkout code
+ - name: Get Golang version
+ id: vars
+ run: |
+ GOLANG_VERSION=$( grep "GOLANG_VERSION ?=" versions.mk )
+ GOLANGCI_LINT_VERSION=$( grep "GOLANGCI_LINT_VERSION ?=" versions.mk )
+ echo "GOLANG_VERSION=${GOLANG_VERSION##GOLANG_VERSION ?= }" >> $GITHUB_ENV
+ echo "GOLANGCI_LINT_VERSION=${GOLANGCI_LINT_VERSION##GOLANGCI_LINT_VERSION ?= }" >> $GITHUB_ENV
+ - name: Install Go
+ uses: actions/setup-go@v5
+ with:
+ go-version: ${{ env.GOLANG_VERSION }}
+ - name: Lint
+ uses: golangci/golangci-lint-action@v6
+ with:
+ version: ${{ env.GOLANGCI_LINT_VERSION }}
+ args: -v --timeout 5m
+ skip-cache: true
+ - run: make check
+ go-test:
+ needs: [helm-lint, validate-csv, validate-helm-values]
+ name: unit tests
+ runs-on: ubuntu-latest
+ steps:
+ - name: Checkout code
+ uses: actions/checkout@v4
+ - name: Get Golang version
+ id: vars
+ run: |
+ GOLANG_VERSION=$( grep "GOLANG_VERSION ?=" versions.mk )
+ echo "GOLANG_VERSION=${GOLANG_VERSION##GOLANG_VERSION ?= }" >> $GITHUB_ENV
+ - name: Install Go
+ uses: actions/setup-go@v5
+ with:
+ go-version: ${{ env.GOLANG_VERSION }}
+ - run: make coverage
+ go-build:
+ needs: [helm-lint, validate-csv, validate-helm-values]
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+ name: Checkout code
+ - run: make docker-build
+
+ ### Image builds ###
+ build-gpu-operator:
+ needs: [go-check, go-test, go-build]
+ runs-on: ubuntu-latest
+ strategy:
+ matrix:
+ dist: [ubi9]
+ steps:
+ - uses: actions/checkout@v4
+ name: Check out code
+ - name: Calculate build vars
+ id: vars
+ run: |
+ echo "COMMIT_SHORT_SHA=${GITHUB_SHA:0:8}" >> $GITHUB_ENV
+ echo "LOWERCASE_REPO_OWNER=$(echo "${GITHUB_REPOSITORY_OWNER}" | awk '{print tolower($0)}')" >> $GITHUB_ENV
+ REPO_FULL_NAME="${{ github.event.pull_request.head.repo.full_name }}"
+ echo "${REPO_FULL_NAME}"
+ echo "LABEL_IMAGE_SOURCE=https://github.com/${REPO_FULL_NAME}" >> $GITHUB_ENV
+
+ GENERATE_ARTIFACTS="false"
+ if [[ "${{ github.actor }}" == "dependabot[bot]" ]]; then
+ GENERATE_ARTIFACTS="false"
+ elif [[ "${{ github.event_name }}" == "pull_request" && "${{ github.event.pull_request.head.repo.full_name }}" == "${{ github.repository }}" ]]; then
+ GENERATE_ARTIFACTS="true"
+ elif [[ "${{ github.event_name }}" == "push" ]]; then
+ GENERATE_ARTIFACTS="true"
+ fi
+ echo "PUSH_ON_BUILD=${GENERATE_ARTIFACTS}" >> $GITHUB_ENV
+ echo "BUILD_MULTI_ARCH_IMAGES=${GENERATE_ARTIFACTS}" >> $GITHUB_ENV
+ - name: Set up QEMU
+ uses: docker/setup-qemu-action@v3
+ - name: Set up Docker Buildx
+ uses: docker/setup-buildx-action@v3
+ - name: Login to GitHub Container Registry
+ uses: docker/login-action@v3
+ with:
+ registry: ghcr.io
+ username: ${{ github.actor }}
+ password: ${{ secrets.GITHUB_TOKEN }}
+ - name: Build image
+ env:
+ IMAGE_NAME: ghcr.io/${LOWERCASE_REPO_OWNER}/gpu-operator
+ VERSION: ${COMMIT_SHORT_SHA}
+ run: |
+ echo "${VERSION}"
+ make build-${{ matrix.dist }}
+ build-gpu-operator-validator:
+ needs: [go-check, go-test, go-build]
+ runs-on: ubuntu-latest
+ strategy:
+ matrix:
+ dist: [ubi9]
+ steps:
+ - uses: actions/checkout@v4
+ name: Check out code
+ - name: Calculate build vars
+ id: vars
+ run: |
+ echo "COMMIT_SHORT_SHA=${GITHUB_SHA:0:8}" >> $GITHUB_ENV
+ echo "LOWERCASE_REPO_OWNER=$(echo "${GITHUB_REPOSITORY_OWNER}" | awk '{print tolower($0)}')" >> $GITHUB_ENV
+ REPO_FULL_NAME="${{ github.event.pull_request.head.repo.full_name }}"
+ echo "${REPO_FULL_NAME}"
+ echo "LABEL_IMAGE_SOURCE=https://github.com/${REPO_FULL_NAME}" >> $GITHUB_ENV
+
+ GENERATE_ARTIFACTS="false"
+ if [[ "${{ github.actor }}" == "dependabot[bot]" ]]; then
+ GENERATE_ARTIFACTS="false"
+ elif [[ "${{ github.event_name }}" == "pull_request" && "${{ github.event.pull_request.head.repo.full_name }}" == "${{ github.repository }}" ]]; then
+ GENERATE_ARTIFACTS="true"
+ elif [[ "${{ github.event_name }}" == "push" ]]; then
+ GENERATE_ARTIFACTS="true"
+ fi
+ echo "PUSH_ON_BUILD=${GENERATE_ARTIFACTS}" >> $GITHUB_ENV
+ echo "BUILD_MULTI_ARCH_IMAGES=${GENERATE_ARTIFACTS}" >> $GITHUB_ENV
+ - name: Set up QEMU
+ uses: docker/setup-qemu-action@v3
+ - name: Set up Docker Buildx
+ uses: docker/setup-buildx-action@v3
+ - name: Login to GitHub Container Registry
+ uses: docker/login-action@v3
+ with:
+ registry: ghcr.io
+ username: ${{ github.actor }}
+ password: ${{ secrets.GITHUB_TOKEN }}
+ - name: Build image
+ env:
+ IMAGE_NAME: ghcr.io/${LOWERCASE_REPO_OWNER}/gpu-operator/gpu-operator-validator
+ VERSION: ${COMMIT_SHORT_SHA}
+ SUBCOMPONENT: validator
+ run: |
+ echo "${VERSION}"
+ make build-${{ matrix.dist }}
+
+ ### e2e tests ###
+ e2e-tests-containerd:
+ needs: [build-gpu-operator, build-gpu-operator-validator]
+ runs-on: linux-amd64-cpu4
+ steps:
+ - uses: actions/checkout@v4
+ name: Check out code
+ - name: Set up Holodeck
+ uses: NVIDIA/holodeck@v0.2.1
+ with:
+ aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }}
+ aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+ aws_ssh_key: ${{ secrets.AWS_SSH_KEY }}
+ holodeck_config: "tests/holodeck.yaml"
+ - name: Get public dns name
+ id: get_public_dns_name
+ uses: mikefarah/yq@master
+ with:
+ cmd: yq '.status.properties[] | select(.name == "public-dns-name") | .value' /github/workspace/.cache/holodeck.yaml
+ - name: Calculate test vars
+ id: vars
+ run: |
+ COMMIT_SHORT_SHA=${GITHUB_SHA:0:8}
+ echo "COMMIT_SHORT_SHA=${GITHUB_SHA:0:8}" >> $GITHUB_ENV
+ LOWERCASE_REPO_OWNER=$(echo "${GITHUB_REPOSITORY_OWNER}" | awk '{print tolower($0)}')
+ echo "LOWERCASE_REPO_OWNER=$(echo "${GITHUB_REPOSITORY_OWNER}" | awk '{print tolower($0)}')" >> $GITHUB_ENV
+
+ echo "OPERATOR_VERSION=${COMMIT_SHORT_SHA}" >> $GITHUB_ENV
+ echo "OPERATOR_IMAGE=ghcr.io/${LOWERCASE_REPO_OWNER}/gpu-operator" >> $GITHUB_ENV
+ echo "VALIDATOR_VERSION=${COMMIT_SHORT_SHA}" >> $GITHUB_ENV
+ echo "VALIDATOR_IMAGE=ghcr.io/${LOWERCASE_REPO_OWNER}/gpu-operator/gpu-operator-validator" >> $GITHUB_ENV
+
+ echo "instance_hostname=ubuntu@${{ steps.get_public_dns_name.outputs.result }}" >> $GITHUB_ENV
+ echo "private_key=${{ github.workspace }}/key.pem" >> $GITHUB_ENV
+ - name: Run e2e tests
+ env:
+ GPU_PRODUCT_NAME: "Tesla-T4"
+ SKIP_LAUNCH: "true"
+ CONTAINER_RUNTIME: "containerd"
+ TEST_CASE: "./tests/cases/defaults.sh"
+ run: |
+ echo "${{ secrets.AWS_SSH_KEY }}" > ${private_key} && chmod 400 ${private_key}
+ ./tests/ci-run-e2e.sh ${OPERATOR_IMAGE} ${OPERATOR_VERSION} ${VALIDATOR_IMAGE} ${VALIDATOR_VERSION} ${GPU_PRODUCT_NAME} ${TEST_CASE} || rc=$?
+ ./tests/scripts/pull.sh /tmp/logs logs
+ exit $rc
+ - name: Archive test logs
+ if: ${{ failure() }}
+ uses: actions/upload-artifact@v4
+ with:
+ name: containerd-e2e-test-logs
+ path: ./logs/
+ retention-days: 15
+
+ e2e-tests-nvidiadriver:
+ needs: [build-gpu-operator, build-gpu-operator-validator]
+ runs-on: linux-amd64-cpu4
+ steps:
+ - uses: actions/checkout@v4
+ name: Check out code
+ - name: Set up Holodeck
+ uses: NVIDIA/holodeck@v0.2.1
+ with:
+ aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }}
+ aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+ aws_ssh_key: ${{ secrets.AWS_SSH_KEY }}
+ holodeck_config: "tests/holodeck.yaml"
+ - name: Get public dns name
+ id: get_public_dns_name
+ uses: mikefarah/yq@master
+ with:
+ cmd: yq '.status.properties[] | select(.name == "public-dns-name") | .value' /github/workspace/.cache/holodeck.yaml
+ - name: Calculate test vars
+ id: vars
+ run: |
+ COMMIT_SHORT_SHA=${GITHUB_SHA:0:8}
+ echo "COMMIT_SHORT_SHA=${GITHUB_SHA:0:8}" >> $GITHUB_ENV
+ LOWERCASE_REPO_OWNER=$(echo "${GITHUB_REPOSITORY_OWNER}" | awk '{print tolower($0)}')
+ echo "LOWERCASE_REPO_OWNER=$(echo "${GITHUB_REPOSITORY_OWNER}" | awk '{print tolower($0)}')" >> $GITHUB_ENV
+
+ echo "OPERATOR_VERSION=${COMMIT_SHORT_SHA}" >> $GITHUB_ENV
+ echo "OPERATOR_IMAGE=ghcr.io/${LOWERCASE_REPO_OWNER}/gpu-operator" >> $GITHUB_ENV
+ echo "VALIDATOR_VERSION=${COMMIT_SHORT_SHA}" >> $GITHUB_ENV
+ echo "VALIDATOR_IMAGE=ghcr.io/${LOWERCASE_REPO_OWNER}/gpu-operator/gpu-operator-validator" >> $GITHUB_ENV
+
+ echo "instance_hostname=ubuntu@${{ steps.get_public_dns_name.outputs.result }}" >> $GITHUB_ENV
+ echo "private_key=${{ github.workspace }}/key.pem" >> $GITHUB_ENV
+ - name: Run e2e tests
+ env:
+ GPU_PRODUCT_NAME: "Tesla-T4"
+ SKIP_LAUNCH: "true"
+ CONTAINER_RUNTIME: "containerd"
+ TEST_CASE: "./tests/cases/nvidia-driver.sh"
+ run: |
+ echo "${{ secrets.AWS_SSH_KEY }}" > ${private_key} && chmod 400 ${private_key}
+ ./tests/ci-run-e2e.sh ${OPERATOR_IMAGE} ${OPERATOR_VERSION} ${VALIDATOR_IMAGE} ${VALIDATOR_VERSION} ${GPU_PRODUCT_NAME} ${TEST_CASE} || rc=$?
+ ./tests/scripts/pull.sh /tmp/logs logs
+ exit $rc
+ - name: Archive test logs
+ if: ${{ failure() }}
+ uses: actions/upload-artifact@v4
+ with:
+ name: nvidiadriver-e2e-test-logs
+ path: ./logs/
+ retention-days: 15
diff --git a/.github/workflows/pre-sanity.yml b/.github/workflows/pre-sanity.yml
deleted file mode 100644
index 7e2ef5822..000000000
--- a/.github/workflows/pre-sanity.yml
+++ /dev/null
@@ -1,22 +0,0 @@
-name: Run pre sanity
-
-# run this workflow for each commit
-on: [pull_request]
-
-jobs:
- build:
- runs-on: ubuntu-latest
- steps:
- - uses: actions/checkout@v3
-
- - name: Build dev image
- run: make .build-image
-
- - name: Build
- run: make docker-build
-
- - name: Tests
- run: make docker-coverage
-
- - name: Checks
- run: make docker-check
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 898348232..c4b2969c9 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -95,13 +95,13 @@ unit-tests:
build:gpu-operator:
extends:
- .image-build
- - .dist-ubi8
+ - .dist-ubi9
- .target-gpu-operator
build:gpu-operator-validator:
extends:
- .image-build
- - .dist-ubi8
+ - .dist-ubi9
- .target-gpu-operator-validator
.e2e_defaults:
@@ -111,13 +111,13 @@ build:gpu-operator-validator:
TF_VAR_additional_ingress_ip_ranges: '["216.228.112.0/26", "217.111.27.192/26"]'
# These should match the images generated by the deploy step.
# TODO: Should these use the staging release instead?
- OPERATOR_VERSION: "${CI_COMMIT_SHORT_SHA}-${DIST}"
+ OPERATOR_VERSION: "${CI_COMMIT_SHORT_SHA}"
OPERATOR_IMAGE: "${CI_REGISTRY_IMAGE}"
- VALIDATOR_VERSION: "${CI_COMMIT_SHORT_SHA}-${DIST}"
+ VALIDATOR_VERSION: "${CI_COMMIT_SHORT_SHA}"
VALIDATOR_IMAGE: "${CI_REGISTRY_IMAGE}/gpu-operator-validator"
GPU_PRODUCT_NAME: "Tesla-T4"
extends:
- - .dist-ubi8
+ - .dist-ubi9
except:
variables:
- $CI_COMMIT_MESSAGE =~ /skip-end-to-end-tests/
diff --git a/.golangci.yml b/.golangci.yml
index f8bc00608..3ba1bf0b3 100644
--- a/.golangci.yml
+++ b/.golangci.yml
@@ -22,3 +22,4 @@ linters-settings:
excludes:
- G101
- G404
+ - G115
diff --git a/.nvidia-ci.yml b/.nvidia-ci.yml
index 5fd0a5924..bbf7392cc 100644
--- a/.nvidia-ci.yml
+++ b/.nvidia-ci.yml
@@ -14,13 +14,13 @@ include:
ref: "2023.10.09"
variables:
- # Release "devel"-tagged images off the master branch
- RELEASE_DEVEL_BRANCH: "master"
+ # Release "devel"-tagged images off the main branch
+ RELEASE_DEVEL_BRANCH: "main"
DEVEL_RELEASE_IMAGE_VERSION: "devel"
# On the multi-arch builder we don't need the qemu setup.
SKIP_QEMU_SETUP: "1"
# Define the public staging registry
- STAGING_REGISTRY: registry.gitlab.com/nvidia/kubernetes/gpu-operator/staging
+ STAGING_REGISTRY: ghcr.io/nvidia
STAGING_VERSION: ${CI_COMMIT_SHORT_SHA}
GIT_SUBMODULE_PATHS: cnt-ci
@@ -47,21 +47,21 @@ variables:
- !reference [.regctl-setup, before_script]
- apk add --no-cache make bash
- >
- regctl manifest get ${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST} --list > /dev/null && echo "${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST}" || ( echo "${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST} does not exist" && sleep infinity )
+ regctl manifest get ${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION} --list > /dev/null && echo "${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}" || ( echo "${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION} does not exist" && sleep infinity )
script:
- regctl registry login "${OUT_REGISTRY}" -u "${OUT_REGISTRY_USER}" -p "${OUT_REGISTRY_TOKEN}"
- - make IMAGE=${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST} OUT_IMAGE=${OUT_IMAGE_NAME}:${CI_COMMIT_SHORT_SHA}-${DIST} push-${DIST}
+ - make IMAGE=${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION} OUT_IMAGE=${OUT_IMAGE_NAME}:${CI_COMMIT_SHORT_SHA} push-${DIST}
image:gpu-operator:
extends:
- .image-pull
- - .dist-ubi8
+ - .dist-ubi9
- .target-gpu-operator
image:gpu-operator-validator:
extends:
- .image-pull
- - .dist-ubi8
+ - .dist-ubi9
- .target-gpu-operator-validator
variables:
OUT_IMAGE_NAME: "${CI_REGISTRY_IMAGE}/gpu-operator-validator"
@@ -80,7 +80,7 @@ image:gpu-operator-validator:
stage: scan
image: "${PULSE_IMAGE}"
variables:
- IMAGE: "${IMAGE_NAME}:${CI_COMMIT_SHORT_SHA}-${DIST}"
+ IMAGE: "${IMAGE_NAME}:${CI_COMMIT_SHORT_SHA}"
IMAGE_ARCHIVE: "gpu-operator.tar"
except:
variables:
@@ -109,7 +109,7 @@ image:gpu-operator-validator:
.scan:gpu-operator:
extends:
- .scan
- - .dist-ubi8
+ - .dist-ubi9
- .target-gpu-operator
needs:
- image:gpu-operator
@@ -129,7 +129,7 @@ scan:gpu-operator-arm64:
.scan:gpu-operator-validator:
extends:
- .scan
- - .dist-ubi8
+ - .dist-ubi9
- .target-gpu-operator-validator
needs:
- image:gpu-operator-validator
@@ -158,13 +158,13 @@ scan:gpu-operator-validator-arm64:
release:ngc-gpu-operator:
extends:
- .release:ngc
- - .dist-ubi8
+ - .dist-ubi9
- .target-gpu-operator
release:ngc-gpu-operator-validator:
extends:
- .release:ngc
- - .dist-ubi8
+ - .dist-ubi9
- .target-gpu-operator-validator
variables:
IN_IMAGE_NAME: "gpu-operator-validator"
@@ -174,53 +174,56 @@ release:ngc-gpu-operator-validator:
# Download the ngc cli binary for use in the sign steps
.ngccli-setup:
before_script:
- - export NGCLI_VERSION=3.31.0
- - apk add --no-cache curl
- - curl -sSLo ngccli_linux.zip https://api.ngc.nvidia.com/v2/resources/nvidia/ngc-apps/ngc_cli/versions/${NGCLI_VERSION}/files/ngccli_linux.zip
+ - apt-get update && apt-get install -y curl unzip jq
+ - |
+ if [ -z "${NGCCLI_VERSION}" ]; then
+ NGC_VERSION_URL="https://api.ngc.nvidia.com/v2/resources/nvidia/ngc-apps/ngc_cli/versions"
+ # Extract the latest version from the JSON data using jq
+ export NGCCLI_VERSION=$(curl -s $NGC_VERSION_URL | jq -r '.recipe.latestVersionIdStr')
+ fi
+ echo "NGCCLI_VERSION ${NGCCLI_VERSION}"
+ - curl -sSLo ngccli_linux.zip https://api.ngc.nvidia.com/v2/resources/nvidia/ngc-apps/ngc_cli/versions/${NGCCLI_VERSION}/files/ngccli_linux.zip
- unzip ngccli_linux.zip
- chmod u+x ngc-cli/ngc
- - export PATH=$(pwd)/ngc-cli:${PATH}
- - ngc config set --api_key=${NGC_REGISTRY_TOKEN} --org=nvidia
# .sign forms the base of the deployment jobs which signs images in the CI registry.
# This is extended with the image name and version to be deployed.
.sign:ngc:
+ image: ubuntu:latest
stage: sign
+ rules:
+ - if: $CI_COMMIT_TAG
variables:
- IMAGE_NAME: "${OUT_IMAGE_NAME}"
- VERSION: "${OUT_IMAGE_VERSION}"
+ NGC_CLI_API_KEY: "${NGC_REGISTRY_TOKEN}"
+ IMAGE_NAME: "${NGC_REGISTRY_IMAGE}"
+ IMAGE_TAG: "${CI_COMMIT_TAG}"
retry:
max: 2
before_script:
- !reference [.ngccli-setup, before_script]
- # We ensure that the OUT_IMAGE_VERSION is set
- - 'echo Version: ${IMAGE_NAME} ; [[ -n "${VERSION}" ]] || exit 1'
- - apk add --no-cache bash
+ # We ensure that the IMAGE_NAME and IMAGE_TAG is set
+ - 'echo Image Name: ${IMAGE_NAME} && [[ -n "${IMAGE_NAME}" ]] || exit 1'
+ - 'echo Image Tag: ${IMAGE_TAG} && [[ -n "${IMAGE_TAG}" ]] || exit 1'
script:
- - 'echo "Signing the image ${IMAGE_NAME}:${VERSION}"'
- - 'echo "ngc registry image publish --source ${IMAGE_NAME}:${VERSION} ${IMAGE_NAME}:${VERSION} --public --discoverable --allow-guest --sign"'
+ - 'echo "Signing the image ${IMAGE_NAME}:${IMAGE_TAG}"'
+ - ngc-cli/ngc registry image publish --source ${IMAGE_NAME}:${IMAGE_TAG} ${IMAGE_NAME}:${IMAGE_TAG} --public --discoverable --allow-guest --sign --org nvidia
sign:ngc-gpu-operator:
extends:
- .sign:ngc
needs:
- release:ngc-gpu-operator
- rules:
- - if: $CI_COMMIT_TAG
variables:
- OUT_IMAGE_VERSION: "${CI_COMMIT_TAG}"
- OUT_IMAGE_NAME: "${NGC_REGISTRY_IMAGE}" # This needs to change for the gpu-operator and gpu-operator-validator
+ IMAGE_TAG: "${CI_COMMIT_TAG}"
sign:ngc-gpu-operator-validator:
extends:
- .sign:ngc
needs:
- release:ngc-gpu-operator-validator
- rules:
- - if: $CI_COMMIT_TAG
variables:
- OUT_IMAGE_VERSION: "${CI_COMMIT_TAG}"
- OUT_IMAGE_NAME: "${NGC_PROD_VALIDATOR_IMAGE}"
+ IMAGE_NAME: "${NGC_PROD_VALIDATOR_IMAGE}"
+ IMAGE_TAG: "${CI_COMMIT_TAG}"
.schedule_defaults:
rules:
@@ -234,7 +237,7 @@ sign:ngc-gpu-operator-validator:
OPERATOR_IMAGE: "${STAGING_REGISTRY}/gpu-operator"
VALIDATOR_VERSION: "${CI_COMMIT_SHORT_SHA}"
VALIDATOR_IMAGE: "${STAGING_REGISTRY}/gpu-operator-validator"
- TARGET_DRIVER_VERSION: "525.147.05"
+ TARGET_DRIVER_VERSION: "565.57.01"
.e2e_tests:
extends:
@@ -284,7 +287,7 @@ e2e_tests_containerd_k8s1_27:
- .e2e_defaults
- .infra_setup_defaults
- .schedule_defaults
-
+
.clean_infra:
extends:
- .cnt_kube_clean
@@ -308,7 +311,7 @@ cnt_kube_setup_containerd_k8s1_25:
variables:
TF_VAR_kubernetes_version: "1.25.11"
TF_VAR_gpu_device_name: "NVIDIA-A100-PCIE-40GB"
-
+
cnt_kube_clean_containerd_k8s1_25:
extends:
- .clean_infra
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index c1b5ac802..5c05a316b 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -15,34 +15,41 @@ The NVIDIA GPU Operator is an open-source product built and maintained by NVIDIA
## Architecture
The GPU Operator is made up of the following software components - each of the components runs as a container, including NVIDIA drivers. The associated code is linked to each of the components below:
-* [gpu-operator](https://gitlab.com/nvidia/kubernetes/gpu-operator)
+* [gpu-operator](https://github.com/NVIDIA/gpu-operator)
* [k8s-device-plugin](https://github.com/NVIDIA/k8s-device-plugin)
-* [driver](https://gitlab.com/nvidia/container-images/driver)
-* [container-toolkit](https://gitlab.com/nvidia/container-toolkit/container-config)
+* [driver](https://github.com/NVIDIA/gpu-driver-container)
+* [container-toolkit](https://github.com/NVIDIA/nvidia-container-toolkit)
* [dcgm-exporter](https://github.com/NVIDIA/dcgm-exporter)
-* [gpu-feature-discovery](https://gitlab.com/nvidia/kubernetes/gpu-feature-discovery)
-* [mig-manager](https://gitlab.com/nvidia/cloud-native/mig-parted)
-* [samples](https://gitlab.com/nvidia/container-images/samples/-/tree/main/cuda/archive/rhel-ubi8/vector-add)
+* [gpu-feature-discovery](https://github.com/NVIDIA/k8s-device-plugin)
+* [mig-manager](https://github.com/NVIDIA/mig-parted)
+* [sandbox-device-plugin](https://github.com/NVIDIA/kubevirt-gpu-device-plugin)
+* [vgpu-device-manager](https://github.com/NVIDIA/vgpu-device-manager)
+* [kata-manager](https://github.com/NVIDIA/k8s-kata-manager)
+* [samples](https://github.com/NVIDIA/k8s-samples)
```
-gitlab.com/
-├── nvidia/
-│ ├── gpu-operator (CRD and controller logic that implements the reconciliation)
-│ ├── k8s-device-plugin (NVIDIA Device Plugin for Kubernetes)
-│ ├── driver (NVIDIA Driver qualified for data center GPUs)
-│ ├── container-toolkit (NVIDIA Container Toolkit, runtime for Docker)
-│ ├── dcgm-exporter (NVIDIA DCGM for monitoring and telemetry)
-│ ├── gpu-feature-discovery (NVIDIA GPU Feature Discovery for Kubernetes)
-│ ├── mig-manager (NVIDIA Multi-Instance GPU Manager for Kubernetes)
-│ ├── samples (CUDA VectorAdd sample used for validation steps)
+github.com/
+├── NVIDIA/
+│ ├── gpu-operator (CRD and controller logic that implements the reconciliation)
+│ ├── k8s-device-plugin (NVIDIA Device Plugin for Kubernetes)
+│ ├── gpu-driver-container (NVIDIA Driver qualified for data center GPUs)
+│ ├── nvidia-container-toolkit (NVIDIA Container Toolkit, runtime for Docker)
+│ ├── dcgm-exporter (NVIDIA DCGM for monitoring and telemetry)
+│ ├── gpu-feature-discovery (NVIDIA GPU Feature Discovery for Kubernetes)
+│ ├── mig-manager (NVIDIA Multi-Instance GPU Manager for Kubernetes)
+│ ├── sandbox-device-plugin (NVIDIA Device Plugin for sandboxed environments)
+│ ├── vgpu-device-manager (NVIDIA vGPU Device Manager for Kubernetes)
+│ ├── kata-manager (NVIDIA Kata Manager for Kubernetes)
+│ ├── samples (CUDA VectorAdd sample used for validation steps)
```
## License
The NVIDIA GPU Operator is open-source and its components are licensed under the permissive Apache 2.0 license.
## Artifacts
-The NVIDIA GPU Operator has three artifacts as part of the product release:
+The NVIDIA GPU Operator has the following artifacts as part of the product release:
1. [Source Code](#source-code)
+1. [Documentation](#documentation)
1. [Container Images](#container-images)
1. [Helm Charts](#helm-charts)
@@ -50,53 +57,27 @@ The GPU Operator releases follow [calendar versioning](https://calver.org/).
### Source Code
-The NVIDIA GPU Operator is available on two external source code repositories:
-* GitHub: https://github.com/NVIDIA/gpu-operator
-* GitLab: https://gitlab.com/nvidia/kubernetes/gpu-operator
+The NVIDIA GPU Operator source code is available on GitHub at https://github.com/NVIDIA/gpu-operator
-The product page of the GPU Operator is available on NVIDIA’s official repository on GitHub. GitHub is where we interact primarily with users for issues related to the operator. GitHub is a mirror of the source code repository on GitLab - no development happens on GitHub.
+### Documentation
-GitLab is where the GPU Operator is actively developed - we leverage GitLab’s CI/CD infrastructure for build, test, package and release of the Operator. GitLab is where we expect users and partners to contribute patches (“Merge Requests” or “MRs”) against the source code repository. MRs do not require explicit contributor license agreements (CLA), but we expect contributors to sign their work.
+The official NVIDIA GPU Operator documentation is available at https://docs.nvidia.com/datacenter/cloud-native/gpu-operator/latest/index.html
### Container Images
-Releases of the GPU Operator include container images that are currently available on NVIDIA’s Docker Hub [repository](https://hub.docker.com/u/nvidia). In the future, the operator will be available on [NVIDIA NGC Catalog](https://ngc.nvidia.com/).
-
-The following are the container images (and tag format) that are released:
-```
-├── nvidia/
-│ ├── gpu-operator ()
-│ ├── k8s-device-plugin ()
-│ ├── driver ()
-│ ├── container-toolkit ()
-│ ├── dcgm-exporter ()
-│ ├── gpu-feature-discovery ()
-│ ├── mig-manager ()
-│ ├── samples ()
-```
+Releases of the GPU Operator include container images that are currently available on [NVIDIA NGC Catalog](https://ngc.nvidia.com/).
### Helm Charts
To simplify the deployment, the Operator can be installed using a Helm chart (note only Helm v3 is supported). The documentation for helm installation
can be viewed [here](https://docs.nvidia.com/datacenter/cloud-native/gpu-operator/getting-started.html#install-helm).
-Continuous (‘nightly’) releases of the operator are available. Release milestones are available under ‘stable’.
-```
-├── nightly/index.yaml
-├── stable/index.yaml (default when installing the operator)
-```
+
## Contributions
NVIDIA is willing to work with partners for adding platform support for the GPU Operator. The GPU Operator is open-source and permissively licensed under the Apache 2.0 license with only minimal requirements for source code [contributions](#signing).
-To get started with building the GPU Operator, follow these steps:
-
-```shell
-$ git clone https://gitlab.com/nvidia/kubernetes/gpu-operator.git
-$ cd gpu-operator
-$ make .build-image
-```
-We also use a CI infrastructure on AWS for nightly and per-change testing on the GPU Operator. This infrastructure is available here: https://gitlab.com/nvidia/container-infrastructure/aws-kube-ci
+To file feature requests, bugs, or questions, submit an issue at https://github.com/NVIDIA/gpu-operator/issues
-To ensure that the GPU Operator releases can be effectively validated on new platforms, it would be ideal for contributions to make available CI infrastructure (e.g. runners) and associated changes to the CI scripts.
+To contribute to the project, file a Pull Request at https://github.com/NVIDIA/gpu-operator/pulls. Contributions do not require explicit contributor license agreements (CLA), but we expect contributors to sign their work.
## Signing your work
diff --git a/Makefile b/Makefile
index 7743795bb..ee7eaabcf 100644
--- a/Makefile
+++ b/Makefile
@@ -25,23 +25,20 @@ endif
include $(CURDIR)/versions.mk
MODULE := github.com/NVIDIA/gpu-operator
-CUDA_IMAGE ?= nvcr.io/nvidia/cuda
BUILDER_IMAGE ?= golang:$(GOLANG_VERSION)
-DIST ?= ubi8
ifeq ($(IMAGE_NAME),)
REGISTRY ?= nvcr.io/nvidia/cloud-native
IMAGE_NAME := $(REGISTRY)/gpu-operator
endif
-IMAGE_VERSION := $(VERSION)
-IMAGE_TAG ?= $(IMAGE_VERSION)-$(DIST)
+IMAGE_TAG ?= $(VERSION)
IMAGE = $(IMAGE_NAME):$(IMAGE_TAG)
BUILDIMAGE ?= $(IMAGE_NAME):$(IMAGE_TAG)-build
OUT_IMAGE_NAME ?= $(IMAGE_NAME)
OUT_IMAGE_VERSION ?= $(VERSION)
-OUT_IMAGE_TAG = $(OUT_IMAGE_VERSION)-$(DIST)
+OUT_IMAGE_TAG = $(OUT_IMAGE_VERSION)
OUT_IMAGE = $(OUT_IMAGE_NAME):$(OUT_IMAGE_TAG)
# CHANNELS define the bundle channels used in the bundle.
@@ -76,16 +73,14 @@ endif
all: gpu-operator
-# Run tests
-ENVTEST_ASSETS_DIR=$(shell pwd)/testbin
-test: generate check manifests
- mkdir -p ${ENVTEST_ASSETS_DIR}
- test -f ${ENVTEST_ASSETS_DIR}/setup-envtest.sh || curl -sSLo ${ENVTEST_ASSETS_DIR}/setup-envtest.sh https://raw.githubusercontent.com/kubernetes-sigs/controller-runtime/v0.7.0/hack/setup-envtest.sh
- source ${ENVTEST_ASSETS_DIR}/setup-envtest.sh; fetch_envtest_tools $(ENVTEST_ASSETS_DIR); setup_envtest_env $(ENVTEST_ASSETS_DIR); go test ./... -coverprofile cover.out
-
GOOS ?= linux
VERSION_PKG = github.com/NVIDIA/gpu-operator/internal/info
+PWD = $(shell pwd)
+CLIENT_GEN = $(PWD)/bin/client-gen
+CONTROLLER_GEN = $(PWD)/bin/controller-gen
+KUSTOMIZE = $(PWD)/bin/kustomize
+
# Build gpu-operator binary
gpu-operator:
CGO_ENABLED=0 GOOS=$(GOOS) \
@@ -96,15 +91,15 @@ run: generate check manifests
go run ./cmd/gpu-operator/...
# Install CRDs into a cluster
-install: manifests kustomize
+install: manifests install-tools
$(KUSTOMIZE) build config/crd | kubectl apply -f -
# Uninstall CRDs from a cluster
-uninstall: manifests kustomize
+uninstall: manifests install-tools
$(KUSTOMIZE) build config/crd | kubectl delete -f -
# Deploy gpu-operator in the configured Kubernetes cluster in ~/.kube/config
-deploy: manifests generate-env kustomize
+deploy: manifests generate-env install-tools
cd config/manager && $(KUSTOMIZE) edit set image gpu-operator=${IMAGE}
$(KUSTOMIZE) build config/default | kubectl apply -f -
@@ -116,26 +111,25 @@ undeploy:
$(KUSTOMIZE) build config/default | kubectl delete -f -
# Generate manifests e.g. CRD, RBAC etc.
-manifests: controller-gen
+manifests: install-tools
+ @echo "- Generating CRDs from the codebase"
$(CONTROLLER_GEN) rbac:roleName=gpu-operator-role crd webhook paths="./..." output:crd:artifacts:config=config/crd/bases
# Generate code
-generate: controller-gen
+generate: install-tools
$(CONTROLLER_GEN) object:headerFile="hack/boilerplate.go.txt" paths="./..."
-# Download controller-gen locally if necessary
-CONTROLLER_GEN = $(shell pwd)/bin/controller-gen
-controller-gen:
- @GOBIN=$(PROJECT_DIR)/bin GO111MODULE=on $(GO_CMD) install sigs.k8s.io/controller-tools/cmd/controller-gen@v0.14.0
-
-# Download kustomize locally if necessary
-KUSTOMIZE = $(shell pwd)/bin/kustomize
-kustomize:
- @GOBIN=$(PROJECT_DIR)/bin GO111MODULE=on $(GO_CMD) install sigs.k8s.io/kustomize/kustomize/v4@v5.1.1
+generate-clientset: install-tools
+ $(CLIENT_GEN) --go-header-file=$(CURDIR)/hack/boilerplate.go.txt \
+ --clientset-name "versioned" \
+ --output-dir $(CURDIR)/api \
+ --output-pkg $(MODULE)/api \
+ --input-base $(CURDIR)/api \
+ --input nvidia/v1,nvidia/v1alpha1
# Generate bundle manifests and metadata, then validate generated files.
.PHONY: bundle
-bundle: manifests kustomize
+bundle: manifests install-tools
operator-sdk generate kustomize manifests -q
cd config/manager && $(KUSTOMIZE) edit set image gpu-operator=$(IMAGE)
$(KUSTOMIZE) build config/manifests | operator-sdk generate bundle -q --overwrite --version $(VERSION) $(BUNDLE_METADATA_OPTS)
@@ -158,7 +152,7 @@ push-bundle-image: build-bundle-image
CMDS := $(patsubst ./cmd/%/,%,$(sort $(dir $(wildcard ./cmd/*/))))
CMD_TARGETS := $(patsubst %,cmd-%, $(CMDS))
-CHECK_TARGETS := lint license-check validate-modules
+CHECK_TARGETS := lint license-check validate-modules validate-generated-assets
MAKE_TARGETS := build check coverage cmds $(CMD_TARGETS) $(CHECK_TARGETS)
DOCKER_TARGETS := $(patsubst %,docker-%, $(MAKE_TARGETS))
.PHONY: $(MAKE_TARGETS) $(DOCKER_TARGETS)
@@ -226,6 +220,11 @@ $(CMD_TARGETS): cmd-%:
build:
go build ./...
+sync-crds:
+ @echo "- Syncing CRDs into Helm and OLM packages..."
+ cp $(PROJECT_DIR)/config/crd/bases/* $(PROJECT_DIR)/deployments/gpu-operator/crds
+ cp $(PROJECT_DIR)/config/crd/bases/* $(PROJECT_DIR)/bundle/manifests
+
validate-modules:
@echo "- Verifying that the dependencies have expected content..."
go mod verify
@@ -244,6 +243,10 @@ validate-helm-values: cmds
sed '/^--/d' | \
./gpuop-cfg validate clusterpolicy --input="-"
+validate-generated-assets: manifests generate generate-clientset sync-crds
+ @echo "- Verifying that the generated code and manifests are in-sync..."
+ @git diff --exit-code -- api config
+
COVERAGE_FILE := coverage.out
unit-test: build
go list -f {{.Dir}} $(MODULE)/... | grep -v /tests/e2e \
@@ -254,8 +257,8 @@ coverage: unit-test
go tool cover -func=$(COVERAGE_FILE).no-mocks
##### Public rules #####
-DISTRIBUTIONS := ubi8
-DEFAULT_PUSH_TARGET := ubi8
+DISTRIBUTIONS := ubi9
+DEFAULT_PUSH_TARGET := ubi9
PUSH_TARGETS := $(patsubst %,push-%, $(DISTRIBUTIONS))
BUILD_TARGETS := $(patsubst %,build-%, $(DISTRIBUTIONS))
@@ -276,17 +279,6 @@ $(ALL_TARGETS): %:
make -C $(SUBCOMPONENT) $(*)
else
-# For the default push target we also push a short tag equal to the version.
-# We skip this for the development release
-DEVEL_RELEASE_IMAGE_VERSION ?= devel
-ifneq ($(strip $(VERSION)),$(DEVEL_RELEASE_IMAGE_VERSION))
-push-$(DEFAULT_PUSH_TARGET): push-short
-endif
-
-push-%: DIST = $(*)
-push-short: DIST = $(DEFAULT_PUSH_TARGET)
-
-build-%: DIST = $(*)
build-%: DOCKERFILE = $(CURDIR)/docker/Dockerfile
$(DISTRIBUTIONS): %: build-%
@@ -296,9 +288,6 @@ $(BUILD_TARGETS): build-%:
$(DOCKER_BUILD_OPTIONS) \
$(DOCKER_BUILD_PLATFORM_OPTIONS) \
--tag $(IMAGE) \
- --build-arg BASE_DIST="$(DIST)" \
- --build-arg CUDA_IMAGE="$(CUDA_IMAGE)" \
- --build-arg CUDA_VERSION="$(CUDA_VERSION)" \
--build-arg VERSION="$(VERSION)" \
--build-arg BUILDER_IMAGE="$(BUILDER_IMAGE)" \
--build-arg GOLANG_VERSION="$(GOLANG_VERSION)" \
@@ -311,3 +300,8 @@ $(BUILD_TARGETS): build-%:
docker-image: OUT_IMAGE ?= $(IMAGE_NAME):$(IMAGE_TAG)
docker-image: ${DEFAULT_PUSH_TARGET}
endif
+
+install-tools:
+ @echo Installing tools from tools.go
+ export GOBIN=$(PROJECT_DIR)/bin && \
+ grep '^\s*_' tools/tools.go | awk '{print $$2}' | xargs -tI % $(GO_CMD) install -mod=readonly -modfile=tools/go.mod %
diff --git a/RELEASE.md b/RELEASE.md
deleted file mode 100644
index 84a0fbf19..000000000
--- a/RELEASE.md
+++ /dev/null
@@ -1,54 +0,0 @@
-# Artifacts
-
-This repository outputs two artifacts:
-- The GPU Operator container.
-- The GPU Operator helm chart.
-
-# Versioning
-
-This repository follows Semantic Versioning 2.0.0
-The artifacts will be versioned as follows:
-- **nightly**: 1.0.0-nightly-shortSHA
- - The version names contain "nightly".
- - Leading number of pre-release version tracked in master.
- - build meta data of SHA hash is appended to version string.
- - May be buggy
- - Features may be removed at any time.
- - The API may change in incompatible ways in a later software release without notice.
- - Recommended for use in short-lived clusters
- - when Docker supports it, we'll use +shortSHA in SemVer 2.0 fashion
-- **alpha**: 1.0.0-alpha.N
- - The version names contain "alpha".
- - May be buggy, enabling features may expose bugs.
- - Features may be removed at any time.
- - The API may change in incompatible ways in a later software release without notice.
- - Recommended for use in short-lived clusters and tech previews
-- **beta**: 1.0.0-rc.N
- - The version names contain "rc".
- - Code is well tested. Using the feature is considered safe.
- - Features will not be dropped.
- - The API may change in incompatible ways but when this happens we will provided instructions for migrating to the next version.
- - Recommended for only non-business-critical uses.
-- **stable**: 1.X.Y
- - The version follows [SemVer 2.0.0](http://semver.org/)
- - Stable versions of features will appear in released software for many subsequent versions.
-
-*Note: Some of the items were copied from Kubernetes' own API versioning policy: [https://kubernetes.io/docs/concepts/overview/kubernetes-api/](https://kubernetes.io/docs/concepts/overview/kubernetes-api/)*
-
-**The GPU Operator helm chart MUST be the same as the GPU Operator container.**
-
-# Nightly Release Process
-
-After every commit that successfully passes all tests, the following actions are performed:
-- The GPU Operator container is persisted on the dockerhub registry (e.g: 1.X.Y-nightly-shortSHA)
-- The GPU Operator helm chart is pushed on the repository's github pages (e.g: 1.X.Y-nightly-shortSHA)
-
-# Release Process
-
-After a commit that successfully passes all tests, a maintainer tags that commit with the release version (e.g: `1.0.0-alpha.1`):
-- The GPU Operator container is persisted on the dockerhub and NGC registry
- - The tag for that container is the commit tag
-- The GPU Operator helm chart is pushed on the repository's github pages and NGC registry
- - The tag for that container is the commit tag
-- The Readme should be updated with the changelog
-- The helm chart values.yaml and Chart.yaml should be updated with the newer version
diff --git a/api/v1/clusterpolicy_types.go b/api/nvidia/v1/clusterpolicy_types.go
similarity index 98%
rename from api/v1/clusterpolicy_types.go
rename to api/nvidia/v1/clusterpolicy_types.go
index 44d6c95f3..07e424761 100644
--- a/api/v1/clusterpolicy_types.go
+++ b/api/nvidia/v1/clusterpolicy_types.go
@@ -34,6 +34,10 @@ import (
// EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN!
// NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized.
+const (
+ ClusterPolicyCRDName = "ClusterPolicy"
+)
+
// ClusterPolicySpec defines the desired state of ClusterPolicy
type ClusterPolicySpec struct {
// INSERT ADDITIONAL SPEC FIELDS - desired state of cluster
@@ -88,6 +92,8 @@ type ClusterPolicySpec struct {
KataManager KataManagerSpec `json:"kataManager,omitempty"`
// CCManager component spec
CCManager CCManagerSpec `json:"ccManager,omitempty"`
+ // HostPaths defines various paths on the host needed by GPU Operator components
+ HostPaths HostPathsSpec `json:"hostPaths,omitempty"`
}
// Runtime defines container runtime type
@@ -144,6 +150,20 @@ type OperatorSpec struct {
UseOpenShiftDriverToolkit *bool `json:"use_ocp_driver_toolkit,omitempty"`
}
+// HostPathsSpec defines various paths on the host needed by GPU Operator components
+type HostPathsSpec struct {
+ // RootFS represents the path to the root filesystem of the host.
+ // This is used by components that need to interact with the host filesystem
+ // and as such this must be a chroot-able filesystem.
+ // Examples include the MIG Manager and Toolkit Container which may need to
+ // stop, start, or restart systemd services.
+ RootFS string `json:"rootFS,omitempty"`
+
+ // DriverInstallDir represents the root at which driver files including libraries,
+ // config files, and executables can be found.
+ DriverInstallDir string `json:"driverInstallDir,omitempty"`
+}
+
// EnvVar represents an environment variable present in a Container.
type EnvVar struct {
// Name of the environment variable.
@@ -734,6 +754,11 @@ type DevicePluginSpec struct {
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors=true
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="Configuration for the NVIDIA Device Plugin via the ConfigMap"
Config *DevicePluginConfig `json:"config,omitempty"`
+
+ // Optional: MPS related configuration for the NVIDIA Device Plugin
+ // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors=true
+ // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="MPS related configuration for the NVIDIA Device Plugin"
+ MPS *MPSConfig `json:"mps,omitempty"`
}
// DevicePluginConfig defines ConfigMap name for NVIDIA Device Plugin config
@@ -752,6 +777,17 @@ type DevicePluginConfig struct {
Default string `json:"default,omitempty"`
}
+// MPSConfig defines MPS related configuration for the NVIDIA Device Plugin
+type MPSConfig struct {
+ // Root defines the MPS root path on the host
+ // +kubebuilder:validation:Optional
+ // +kubebuilder:default=/run/nvidia/mps
+ // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors=true
+ // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="MPS root path on the host"
+ // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.x-descriptors="urn:alm:descriptor:com.tectonic.ui:text"
+ Root string `json:"root,omitempty"`
+}
+
// SandboxDevicePluginSpec defines the properties for the NVIDIA Sandbox Device Plugin deployment
type SandboxDevicePluginSpec struct {
// Enabled indicates if deployment of NVIDIA Sandbox Device Plugin through operator is enabled
@@ -964,7 +1000,7 @@ type DCGMSpec struct {
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.x-descriptors="urn:alm:descriptor:com.tectonic.ui:advanced,urn:alm:descriptor:com.tectonic.ui:text"
Env []EnvVar `json:"env,omitempty"`
- // HostPort represents host port that needs to be bound for DCGM engine (Default: 5555)
+ // Deprecated: HostPort represents host port that needs to be bound for DCGM engine (Default: 5555)
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="Host port to bind for DCGM engine"
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.x-descriptors="urn:alm:descriptor:com.tectonic.ui:number"
HostPort int32 `json:"hostPort,omitempty"`
@@ -1643,6 +1679,8 @@ type ClusterPolicyStatus struct {
Conditions []metav1.Condition `json:"conditions,omitempty"`
}
+// +genclient
+// +genclient:nonNamespaced
// +kubebuilder:object:root=true
// +kubebuilder:subresource:status
// +kubebuilder:resource:scope=Cluster
diff --git a/api/nvidia/v1/groupversion_info.go b/api/nvidia/v1/groupversion_info.go
new file mode 100644
index 000000000..84e81998c
--- /dev/null
+++ b/api/nvidia/v1/groupversion_info.go
@@ -0,0 +1,36 @@
+/*
+Copyright 2021.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+// Package v1 contains API Schema definitions for the clusterpolicy v1 API group
+// +kubebuilder:object:generate=true
+// +groupName=nvidia.com
+package v1
+
+import (
+ "k8s.io/apimachinery/pkg/runtime/schema"
+ "sigs.k8s.io/controller-runtime/pkg/scheme"
+)
+
+var (
+ // SchemeGroupVersion is group version used to register these objects
+ SchemeGroupVersion = schema.GroupVersion{Group: "nvidia.com", Version: "v1"}
+
+ // SchemeBuilder is used to add go types to the GroupVersionKind scheme
+ SchemeBuilder = &scheme.Builder{GroupVersion: SchemeGroupVersion}
+
+ // AddToScheme adds the types in this group-version to the given scheme.
+ AddToScheme = SchemeBuilder.AddToScheme
+)
diff --git a/api/nvidia/v1/zz_generated.deepcopy.go b/api/nvidia/v1/zz_generated.deepcopy.go
new file mode 100644
index 000000000..6d876f675
--- /dev/null
+++ b/api/nvidia/v1/zz_generated.deepcopy.go
@@ -0,0 +1,1627 @@
+//go:build !ignore_autogenerated
+
+/**
+# Copyright (c) NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+// Code generated by controller-gen. DO NOT EDIT.
+
+package v1
+
+import (
+ "github.com/NVIDIA/k8s-kata-manager/api/v1alpha1/config"
+ "github.com/NVIDIA/k8s-operator-libs/api/upgrade/v1alpha1"
+ monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
+ corev1 "k8s.io/api/core/v1"
+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+ runtime "k8s.io/apimachinery/pkg/runtime"
+)
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *CCManagerSpec) DeepCopyInto(out *CCManagerSpec) {
+ *out = *in
+ if in.Enabled != nil {
+ in, out := &in.Enabled, &out.Enabled
+ *out = new(bool)
+ **out = **in
+ }
+ if in.ImagePullSecrets != nil {
+ in, out := &in.ImagePullSecrets, &out.ImagePullSecrets
+ *out = make([]string, len(*in))
+ copy(*out, *in)
+ }
+ if in.Resources != nil {
+ in, out := &in.Resources, &out.Resources
+ *out = new(ResourceRequirements)
+ (*in).DeepCopyInto(*out)
+ }
+ if in.Args != nil {
+ in, out := &in.Args, &out.Args
+ *out = make([]string, len(*in))
+ copy(*out, *in)
+ }
+ if in.Env != nil {
+ in, out := &in.Env, &out.Env
+ *out = make([]EnvVar, len(*in))
+ copy(*out, *in)
+ }
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CCManagerSpec.
+func (in *CCManagerSpec) DeepCopy() *CCManagerSpec {
+ if in == nil {
+ return nil
+ }
+ out := new(CCManagerSpec)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *CDIConfigSpec) DeepCopyInto(out *CDIConfigSpec) {
+ *out = *in
+ if in.Enabled != nil {
+ in, out := &in.Enabled, &out.Enabled
+ *out = new(bool)
+ **out = **in
+ }
+ if in.Default != nil {
+ in, out := &in.Default, &out.Default
+ *out = new(bool)
+ **out = **in
+ }
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CDIConfigSpec.
+func (in *CDIConfigSpec) DeepCopy() *CDIConfigSpec {
+ if in == nil {
+ return nil
+ }
+ out := new(CDIConfigSpec)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *CUDAValidatorSpec) DeepCopyInto(out *CUDAValidatorSpec) {
+ *out = *in
+ if in.Env != nil {
+ in, out := &in.Env, &out.Env
+ *out = make([]EnvVar, len(*in))
+ copy(*out, *in)
+ }
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CUDAValidatorSpec.
+func (in *CUDAValidatorSpec) DeepCopy() *CUDAValidatorSpec {
+ if in == nil {
+ return nil
+ }
+ out := new(CUDAValidatorSpec)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *ClusterPolicy) DeepCopyInto(out *ClusterPolicy) {
+ *out = *in
+ out.TypeMeta = in.TypeMeta
+ in.ObjectMeta.DeepCopyInto(&out.ObjectMeta)
+ in.Spec.DeepCopyInto(&out.Spec)
+ in.Status.DeepCopyInto(&out.Status)
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ClusterPolicy.
+func (in *ClusterPolicy) DeepCopy() *ClusterPolicy {
+ if in == nil {
+ return nil
+ }
+ out := new(ClusterPolicy)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
+func (in *ClusterPolicy) DeepCopyObject() runtime.Object {
+ if c := in.DeepCopy(); c != nil {
+ return c
+ }
+ return nil
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *ClusterPolicyList) DeepCopyInto(out *ClusterPolicyList) {
+ *out = *in
+ out.TypeMeta = in.TypeMeta
+ in.ListMeta.DeepCopyInto(&out.ListMeta)
+ if in.Items != nil {
+ in, out := &in.Items, &out.Items
+ *out = make([]ClusterPolicy, len(*in))
+ for i := range *in {
+ (*in)[i].DeepCopyInto(&(*out)[i])
+ }
+ }
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ClusterPolicyList.
+func (in *ClusterPolicyList) DeepCopy() *ClusterPolicyList {
+ if in == nil {
+ return nil
+ }
+ out := new(ClusterPolicyList)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
+func (in *ClusterPolicyList) DeepCopyObject() runtime.Object {
+ if c := in.DeepCopy(); c != nil {
+ return c
+ }
+ return nil
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *ClusterPolicySpec) DeepCopyInto(out *ClusterPolicySpec) {
+ *out = *in
+ in.Operator.DeepCopyInto(&out.Operator)
+ in.Daemonsets.DeepCopyInto(&out.Daemonsets)
+ in.Driver.DeepCopyInto(&out.Driver)
+ in.Toolkit.DeepCopyInto(&out.Toolkit)
+ in.DevicePlugin.DeepCopyInto(&out.DevicePlugin)
+ in.DCGMExporter.DeepCopyInto(&out.DCGMExporter)
+ in.DCGM.DeepCopyInto(&out.DCGM)
+ in.NodeStatusExporter.DeepCopyInto(&out.NodeStatusExporter)
+ in.GPUFeatureDiscovery.DeepCopyInto(&out.GPUFeatureDiscovery)
+ out.MIG = in.MIG
+ in.MIGManager.DeepCopyInto(&out.MIGManager)
+ in.PSP.DeepCopyInto(&out.PSP)
+ in.PSA.DeepCopyInto(&out.PSA)
+ in.Validator.DeepCopyInto(&out.Validator)
+ if in.GPUDirectStorage != nil {
+ in, out := &in.GPUDirectStorage, &out.GPUDirectStorage
+ *out = new(GPUDirectStorageSpec)
+ (*in).DeepCopyInto(*out)
+ }
+ if in.GDRCopy != nil {
+ in, out := &in.GDRCopy, &out.GDRCopy
+ *out = new(GDRCopySpec)
+ (*in).DeepCopyInto(*out)
+ }
+ in.SandboxWorkloads.DeepCopyInto(&out.SandboxWorkloads)
+ in.VFIOManager.DeepCopyInto(&out.VFIOManager)
+ in.SandboxDevicePlugin.DeepCopyInto(&out.SandboxDevicePlugin)
+ in.VGPUManager.DeepCopyInto(&out.VGPUManager)
+ in.VGPUDeviceManager.DeepCopyInto(&out.VGPUDeviceManager)
+ in.CDI.DeepCopyInto(&out.CDI)
+ in.KataManager.DeepCopyInto(&out.KataManager)
+ in.CCManager.DeepCopyInto(&out.CCManager)
+ out.HostPaths = in.HostPaths
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ClusterPolicySpec.
+func (in *ClusterPolicySpec) DeepCopy() *ClusterPolicySpec {
+ if in == nil {
+ return nil
+ }
+ out := new(ClusterPolicySpec)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *ClusterPolicyStatus) DeepCopyInto(out *ClusterPolicyStatus) {
+ *out = *in
+ if in.Conditions != nil {
+ in, out := &in.Conditions, &out.Conditions
+ *out = make([]metav1.Condition, len(*in))
+ for i := range *in {
+ (*in)[i].DeepCopyInto(&(*out)[i])
+ }
+ }
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ClusterPolicyStatus.
+func (in *ClusterPolicyStatus) DeepCopy() *ClusterPolicyStatus {
+ if in == nil {
+ return nil
+ }
+ out := new(ClusterPolicyStatus)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *ContainerProbeSpec) DeepCopyInto(out *ContainerProbeSpec) {
+ *out = *in
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ContainerProbeSpec.
+func (in *ContainerProbeSpec) DeepCopy() *ContainerProbeSpec {
+ if in == nil {
+ return nil
+ }
+ out := new(ContainerProbeSpec)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *DCGMExporterMetricsConfig) DeepCopyInto(out *DCGMExporterMetricsConfig) {
+ *out = *in
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DCGMExporterMetricsConfig.
+func (in *DCGMExporterMetricsConfig) DeepCopy() *DCGMExporterMetricsConfig {
+ if in == nil {
+ return nil
+ }
+ out := new(DCGMExporterMetricsConfig)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *DCGMExporterServiceMonitorConfig) DeepCopyInto(out *DCGMExporterServiceMonitorConfig) {
+ *out = *in
+ if in.Enabled != nil {
+ in, out := &in.Enabled, &out.Enabled
+ *out = new(bool)
+ **out = **in
+ }
+ if in.HonorLabels != nil {
+ in, out := &in.HonorLabels, &out.HonorLabels
+ *out = new(bool)
+ **out = **in
+ }
+ if in.AdditionalLabels != nil {
+ in, out := &in.AdditionalLabels, &out.AdditionalLabels
+ *out = make(map[string]string, len(*in))
+ for key, val := range *in {
+ (*out)[key] = val
+ }
+ }
+ if in.Relabelings != nil {
+ in, out := &in.Relabelings, &out.Relabelings
+ *out = make([]*monitoringv1.RelabelConfig, len(*in))
+ for i := range *in {
+ if (*in)[i] != nil {
+ in, out := &(*in)[i], &(*out)[i]
+ *out = new(monitoringv1.RelabelConfig)
+ (*in).DeepCopyInto(*out)
+ }
+ }
+ }
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DCGMExporterServiceMonitorConfig.
+func (in *DCGMExporterServiceMonitorConfig) DeepCopy() *DCGMExporterServiceMonitorConfig {
+ if in == nil {
+ return nil
+ }
+ out := new(DCGMExporterServiceMonitorConfig)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *DCGMExporterSpec) DeepCopyInto(out *DCGMExporterSpec) {
+ *out = *in
+ if in.Enabled != nil {
+ in, out := &in.Enabled, &out.Enabled
+ *out = new(bool)
+ **out = **in
+ }
+ if in.ImagePullSecrets != nil {
+ in, out := &in.ImagePullSecrets, &out.ImagePullSecrets
+ *out = make([]string, len(*in))
+ copy(*out, *in)
+ }
+ if in.Resources != nil {
+ in, out := &in.Resources, &out.Resources
+ *out = new(ResourceRequirements)
+ (*in).DeepCopyInto(*out)
+ }
+ if in.Args != nil {
+ in, out := &in.Args, &out.Args
+ *out = make([]string, len(*in))
+ copy(*out, *in)
+ }
+ if in.Env != nil {
+ in, out := &in.Env, &out.Env
+ *out = make([]EnvVar, len(*in))
+ copy(*out, *in)
+ }
+ if in.MetricsConfig != nil {
+ in, out := &in.MetricsConfig, &out.MetricsConfig
+ *out = new(DCGMExporterMetricsConfig)
+ **out = **in
+ }
+ if in.ServiceMonitor != nil {
+ in, out := &in.ServiceMonitor, &out.ServiceMonitor
+ *out = new(DCGMExporterServiceMonitorConfig)
+ (*in).DeepCopyInto(*out)
+ }
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DCGMExporterSpec.
+func (in *DCGMExporterSpec) DeepCopy() *DCGMExporterSpec {
+ if in == nil {
+ return nil
+ }
+ out := new(DCGMExporterSpec)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *DCGMSpec) DeepCopyInto(out *DCGMSpec) {
+ *out = *in
+ if in.Enabled != nil {
+ in, out := &in.Enabled, &out.Enabled
+ *out = new(bool)
+ **out = **in
+ }
+ if in.ImagePullSecrets != nil {
+ in, out := &in.ImagePullSecrets, &out.ImagePullSecrets
+ *out = make([]string, len(*in))
+ copy(*out, *in)
+ }
+ if in.Resources != nil {
+ in, out := &in.Resources, &out.Resources
+ *out = new(ResourceRequirements)
+ (*in).DeepCopyInto(*out)
+ }
+ if in.Args != nil {
+ in, out := &in.Args, &out.Args
+ *out = make([]string, len(*in))
+ copy(*out, *in)
+ }
+ if in.Env != nil {
+ in, out := &in.Env, &out.Env
+ *out = make([]EnvVar, len(*in))
+ copy(*out, *in)
+ }
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DCGMSpec.
+func (in *DCGMSpec) DeepCopy() *DCGMSpec {
+ if in == nil {
+ return nil
+ }
+ out := new(DCGMSpec)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *DaemonsetsSpec) DeepCopyInto(out *DaemonsetsSpec) {
+ *out = *in
+ if in.Labels != nil {
+ in, out := &in.Labels, &out.Labels
+ *out = make(map[string]string, len(*in))
+ for key, val := range *in {
+ (*out)[key] = val
+ }
+ }
+ if in.Annotations != nil {
+ in, out := &in.Annotations, &out.Annotations
+ *out = make(map[string]string, len(*in))
+ for key, val := range *in {
+ (*out)[key] = val
+ }
+ }
+ if in.Tolerations != nil {
+ in, out := &in.Tolerations, &out.Tolerations
+ *out = make([]corev1.Toleration, len(*in))
+ for i := range *in {
+ (*in)[i].DeepCopyInto(&(*out)[i])
+ }
+ }
+ if in.RollingUpdate != nil {
+ in, out := &in.RollingUpdate, &out.RollingUpdate
+ *out = new(RollingUpdateSpec)
+ **out = **in
+ }
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DaemonsetsSpec.
+func (in *DaemonsetsSpec) DeepCopy() *DaemonsetsSpec {
+ if in == nil {
+ return nil
+ }
+ out := new(DaemonsetsSpec)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *DevicePluginConfig) DeepCopyInto(out *DevicePluginConfig) {
+ *out = *in
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DevicePluginConfig.
+func (in *DevicePluginConfig) DeepCopy() *DevicePluginConfig {
+ if in == nil {
+ return nil
+ }
+ out := new(DevicePluginConfig)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *DevicePluginSpec) DeepCopyInto(out *DevicePluginSpec) {
+ *out = *in
+ if in.Enabled != nil {
+ in, out := &in.Enabled, &out.Enabled
+ *out = new(bool)
+ **out = **in
+ }
+ if in.ImagePullSecrets != nil {
+ in, out := &in.ImagePullSecrets, &out.ImagePullSecrets
+ *out = make([]string, len(*in))
+ copy(*out, *in)
+ }
+ if in.Resources != nil {
+ in, out := &in.Resources, &out.Resources
+ *out = new(ResourceRequirements)
+ (*in).DeepCopyInto(*out)
+ }
+ if in.Args != nil {
+ in, out := &in.Args, &out.Args
+ *out = make([]string, len(*in))
+ copy(*out, *in)
+ }
+ if in.Env != nil {
+ in, out := &in.Env, &out.Env
+ *out = make([]EnvVar, len(*in))
+ copy(*out, *in)
+ }
+ if in.Config != nil {
+ in, out := &in.Config, &out.Config
+ *out = new(DevicePluginConfig)
+ **out = **in
+ }
+ if in.MPS != nil {
+ in, out := &in.MPS, &out.MPS
+ *out = new(MPSConfig)
+ **out = **in
+ }
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DevicePluginSpec.
+func (in *DevicePluginSpec) DeepCopy() *DevicePluginSpec {
+ if in == nil {
+ return nil
+ }
+ out := new(DevicePluginSpec)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *DriverCertConfigSpec) DeepCopyInto(out *DriverCertConfigSpec) {
+ *out = *in
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DriverCertConfigSpec.
+func (in *DriverCertConfigSpec) DeepCopy() *DriverCertConfigSpec {
+ if in == nil {
+ return nil
+ }
+ out := new(DriverCertConfigSpec)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *DriverLicensingConfigSpec) DeepCopyInto(out *DriverLicensingConfigSpec) {
+ *out = *in
+ if in.NLSEnabled != nil {
+ in, out := &in.NLSEnabled, &out.NLSEnabled
+ *out = new(bool)
+ **out = **in
+ }
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DriverLicensingConfigSpec.
+func (in *DriverLicensingConfigSpec) DeepCopy() *DriverLicensingConfigSpec {
+ if in == nil {
+ return nil
+ }
+ out := new(DriverLicensingConfigSpec)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *DriverManagerSpec) DeepCopyInto(out *DriverManagerSpec) {
+ *out = *in
+ if in.ImagePullSecrets != nil {
+ in, out := &in.ImagePullSecrets, &out.ImagePullSecrets
+ *out = make([]string, len(*in))
+ copy(*out, *in)
+ }
+ if in.Env != nil {
+ in, out := &in.Env, &out.Env
+ *out = make([]EnvVar, len(*in))
+ copy(*out, *in)
+ }
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DriverManagerSpec.
+func (in *DriverManagerSpec) DeepCopy() *DriverManagerSpec {
+ if in == nil {
+ return nil
+ }
+ out := new(DriverManagerSpec)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *DriverRepoConfigSpec) DeepCopyInto(out *DriverRepoConfigSpec) {
+ *out = *in
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DriverRepoConfigSpec.
+func (in *DriverRepoConfigSpec) DeepCopy() *DriverRepoConfigSpec {
+ if in == nil {
+ return nil
+ }
+ out := new(DriverRepoConfigSpec)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *DriverSpec) DeepCopyInto(out *DriverSpec) {
+ *out = *in
+ if in.UseNvidiaDriverCRD != nil {
+ in, out := &in.UseNvidiaDriverCRD, &out.UseNvidiaDriverCRD
+ *out = new(bool)
+ **out = **in
+ }
+ if in.UsePrecompiled != nil {
+ in, out := &in.UsePrecompiled, &out.UsePrecompiled
+ *out = new(bool)
+ **out = **in
+ }
+ if in.UseOpenKernelModules != nil {
+ in, out := &in.UseOpenKernelModules, &out.UseOpenKernelModules
+ *out = new(bool)
+ **out = **in
+ }
+ if in.Enabled != nil {
+ in, out := &in.Enabled, &out.Enabled
+ *out = new(bool)
+ **out = **in
+ }
+ if in.StartupProbe != nil {
+ in, out := &in.StartupProbe, &out.StartupProbe
+ *out = new(ContainerProbeSpec)
+ **out = **in
+ }
+ if in.LivenessProbe != nil {
+ in, out := &in.LivenessProbe, &out.LivenessProbe
+ *out = new(ContainerProbeSpec)
+ **out = **in
+ }
+ if in.ReadinessProbe != nil {
+ in, out := &in.ReadinessProbe, &out.ReadinessProbe
+ *out = new(ContainerProbeSpec)
+ **out = **in
+ }
+ if in.GPUDirectRDMA != nil {
+ in, out := &in.GPUDirectRDMA, &out.GPUDirectRDMA
+ *out = new(GPUDirectRDMASpec)
+ (*in).DeepCopyInto(*out)
+ }
+ if in.UpgradePolicy != nil {
+ in, out := &in.UpgradePolicy, &out.UpgradePolicy
+ *out = new(v1alpha1.DriverUpgradePolicySpec)
+ (*in).DeepCopyInto(*out)
+ }
+ if in.ImagePullSecrets != nil {
+ in, out := &in.ImagePullSecrets, &out.ImagePullSecrets
+ *out = make([]string, len(*in))
+ copy(*out, *in)
+ }
+ in.Manager.DeepCopyInto(&out.Manager)
+ if in.Resources != nil {
+ in, out := &in.Resources, &out.Resources
+ *out = new(ResourceRequirements)
+ (*in).DeepCopyInto(*out)
+ }
+ if in.Args != nil {
+ in, out := &in.Args, &out.Args
+ *out = make([]string, len(*in))
+ copy(*out, *in)
+ }
+ if in.Env != nil {
+ in, out := &in.Env, &out.Env
+ *out = make([]EnvVar, len(*in))
+ copy(*out, *in)
+ }
+ if in.RepoConfig != nil {
+ in, out := &in.RepoConfig, &out.RepoConfig
+ *out = new(DriverRepoConfigSpec)
+ **out = **in
+ }
+ if in.CertConfig != nil {
+ in, out := &in.CertConfig, &out.CertConfig
+ *out = new(DriverCertConfigSpec)
+ **out = **in
+ }
+ if in.LicensingConfig != nil {
+ in, out := &in.LicensingConfig, &out.LicensingConfig
+ *out = new(DriverLicensingConfigSpec)
+ (*in).DeepCopyInto(*out)
+ }
+ if in.VirtualTopology != nil {
+ in, out := &in.VirtualTopology, &out.VirtualTopology
+ *out = new(VirtualTopologyConfigSpec)
+ **out = **in
+ }
+ if in.KernelModuleConfig != nil {
+ in, out := &in.KernelModuleConfig, &out.KernelModuleConfig
+ *out = new(KernelModuleConfigSpec)
+ **out = **in
+ }
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DriverSpec.
+func (in *DriverSpec) DeepCopy() *DriverSpec {
+ if in == nil {
+ return nil
+ }
+ out := new(DriverSpec)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *DriverValidatorSpec) DeepCopyInto(out *DriverValidatorSpec) {
+ *out = *in
+ if in.Env != nil {
+ in, out := &in.Env, &out.Env
+ *out = make([]EnvVar, len(*in))
+ copy(*out, *in)
+ }
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DriverValidatorSpec.
+func (in *DriverValidatorSpec) DeepCopy() *DriverValidatorSpec {
+ if in == nil {
+ return nil
+ }
+ out := new(DriverValidatorSpec)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *EnvVar) DeepCopyInto(out *EnvVar) {
+ *out = *in
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EnvVar.
+func (in *EnvVar) DeepCopy() *EnvVar {
+ if in == nil {
+ return nil
+ }
+ out := new(EnvVar)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *GDRCopySpec) DeepCopyInto(out *GDRCopySpec) {
+ *out = *in
+ if in.Enabled != nil {
+ in, out := &in.Enabled, &out.Enabled
+ *out = new(bool)
+ **out = **in
+ }
+ if in.ImagePullSecrets != nil {
+ in, out := &in.ImagePullSecrets, &out.ImagePullSecrets
+ *out = make([]string, len(*in))
+ copy(*out, *in)
+ }
+ if in.Args != nil {
+ in, out := &in.Args, &out.Args
+ *out = make([]string, len(*in))
+ copy(*out, *in)
+ }
+ if in.Env != nil {
+ in, out := &in.Env, &out.Env
+ *out = make([]EnvVar, len(*in))
+ copy(*out, *in)
+ }
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GDRCopySpec.
+func (in *GDRCopySpec) DeepCopy() *GDRCopySpec {
+ if in == nil {
+ return nil
+ }
+ out := new(GDRCopySpec)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *GPUDirectRDMASpec) DeepCopyInto(out *GPUDirectRDMASpec) {
+ *out = *in
+ if in.Enabled != nil {
+ in, out := &in.Enabled, &out.Enabled
+ *out = new(bool)
+ **out = **in
+ }
+ if in.UseHostMOFED != nil {
+ in, out := &in.UseHostMOFED, &out.UseHostMOFED
+ *out = new(bool)
+ **out = **in
+ }
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GPUDirectRDMASpec.
+func (in *GPUDirectRDMASpec) DeepCopy() *GPUDirectRDMASpec {
+ if in == nil {
+ return nil
+ }
+ out := new(GPUDirectRDMASpec)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *GPUDirectStorageSpec) DeepCopyInto(out *GPUDirectStorageSpec) {
+ *out = *in
+ if in.Enabled != nil {
+ in, out := &in.Enabled, &out.Enabled
+ *out = new(bool)
+ **out = **in
+ }
+ if in.ImagePullSecrets != nil {
+ in, out := &in.ImagePullSecrets, &out.ImagePullSecrets
+ *out = make([]string, len(*in))
+ copy(*out, *in)
+ }
+ if in.Args != nil {
+ in, out := &in.Args, &out.Args
+ *out = make([]string, len(*in))
+ copy(*out, *in)
+ }
+ if in.Env != nil {
+ in, out := &in.Env, &out.Env
+ *out = make([]EnvVar, len(*in))
+ copy(*out, *in)
+ }
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GPUDirectStorageSpec.
+func (in *GPUDirectStorageSpec) DeepCopy() *GPUDirectStorageSpec {
+ if in == nil {
+ return nil
+ }
+ out := new(GPUDirectStorageSpec)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *GPUFeatureDiscoverySpec) DeepCopyInto(out *GPUFeatureDiscoverySpec) {
+ *out = *in
+ if in.Enabled != nil {
+ in, out := &in.Enabled, &out.Enabled
+ *out = new(bool)
+ **out = **in
+ }
+ if in.ImagePullSecrets != nil {
+ in, out := &in.ImagePullSecrets, &out.ImagePullSecrets
+ *out = make([]string, len(*in))
+ copy(*out, *in)
+ }
+ if in.Resources != nil {
+ in, out := &in.Resources, &out.Resources
+ *out = new(ResourceRequirements)
+ (*in).DeepCopyInto(*out)
+ }
+ if in.Args != nil {
+ in, out := &in.Args, &out.Args
+ *out = make([]string, len(*in))
+ copy(*out, *in)
+ }
+ if in.Env != nil {
+ in, out := &in.Env, &out.Env
+ *out = make([]EnvVar, len(*in))
+ copy(*out, *in)
+ }
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GPUFeatureDiscoverySpec.
+func (in *GPUFeatureDiscoverySpec) DeepCopy() *GPUFeatureDiscoverySpec {
+ if in == nil {
+ return nil
+ }
+ out := new(GPUFeatureDiscoverySpec)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *HostPathsSpec) DeepCopyInto(out *HostPathsSpec) {
+ *out = *in
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HostPathsSpec.
+func (in *HostPathsSpec) DeepCopy() *HostPathsSpec {
+ if in == nil {
+ return nil
+ }
+ out := new(HostPathsSpec)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *InitContainerSpec) DeepCopyInto(out *InitContainerSpec) {
+ *out = *in
+ if in.ImagePullSecrets != nil {
+ in, out := &in.ImagePullSecrets, &out.ImagePullSecrets
+ *out = make([]string, len(*in))
+ copy(*out, *in)
+ }
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InitContainerSpec.
+func (in *InitContainerSpec) DeepCopy() *InitContainerSpec {
+ if in == nil {
+ return nil
+ }
+ out := new(InitContainerSpec)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *KataManagerSpec) DeepCopyInto(out *KataManagerSpec) {
+ *out = *in
+ if in.Enabled != nil {
+ in, out := &in.Enabled, &out.Enabled
+ *out = new(bool)
+ **out = **in
+ }
+ if in.Config != nil {
+ in, out := &in.Config, &out.Config
+ *out = new(config.Config)
+ (*in).DeepCopyInto(*out)
+ }
+ if in.ImagePullSecrets != nil {
+ in, out := &in.ImagePullSecrets, &out.ImagePullSecrets
+ *out = make([]string, len(*in))
+ copy(*out, *in)
+ }
+ if in.Resources != nil {
+ in, out := &in.Resources, &out.Resources
+ *out = new(ResourceRequirements)
+ (*in).DeepCopyInto(*out)
+ }
+ if in.Args != nil {
+ in, out := &in.Args, &out.Args
+ *out = make([]string, len(*in))
+ copy(*out, *in)
+ }
+ if in.Env != nil {
+ in, out := &in.Env, &out.Env
+ *out = make([]EnvVar, len(*in))
+ copy(*out, *in)
+ }
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KataManagerSpec.
+func (in *KataManagerSpec) DeepCopy() *KataManagerSpec {
+ if in == nil {
+ return nil
+ }
+ out := new(KataManagerSpec)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *KernelModuleConfigSpec) DeepCopyInto(out *KernelModuleConfigSpec) {
+ *out = *in
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KernelModuleConfigSpec.
+func (in *KernelModuleConfigSpec) DeepCopy() *KernelModuleConfigSpec {
+ if in == nil {
+ return nil
+ }
+ out := new(KernelModuleConfigSpec)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *MIGGPUClientsConfigSpec) DeepCopyInto(out *MIGGPUClientsConfigSpec) {
+ *out = *in
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MIGGPUClientsConfigSpec.
+func (in *MIGGPUClientsConfigSpec) DeepCopy() *MIGGPUClientsConfigSpec {
+ if in == nil {
+ return nil
+ }
+ out := new(MIGGPUClientsConfigSpec)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *MIGManagerSpec) DeepCopyInto(out *MIGManagerSpec) {
+ *out = *in
+ if in.Enabled != nil {
+ in, out := &in.Enabled, &out.Enabled
+ *out = new(bool)
+ **out = **in
+ }
+ if in.ImagePullSecrets != nil {
+ in, out := &in.ImagePullSecrets, &out.ImagePullSecrets
+ *out = make([]string, len(*in))
+ copy(*out, *in)
+ }
+ if in.Resources != nil {
+ in, out := &in.Resources, &out.Resources
+ *out = new(ResourceRequirements)
+ (*in).DeepCopyInto(*out)
+ }
+ if in.Args != nil {
+ in, out := &in.Args, &out.Args
+ *out = make([]string, len(*in))
+ copy(*out, *in)
+ }
+ if in.Env != nil {
+ in, out := &in.Env, &out.Env
+ *out = make([]EnvVar, len(*in))
+ copy(*out, *in)
+ }
+ if in.Config != nil {
+ in, out := &in.Config, &out.Config
+ *out = new(MIGPartedConfigSpec)
+ **out = **in
+ }
+ if in.GPUClientsConfig != nil {
+ in, out := &in.GPUClientsConfig, &out.GPUClientsConfig
+ *out = new(MIGGPUClientsConfigSpec)
+ **out = **in
+ }
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MIGManagerSpec.
+func (in *MIGManagerSpec) DeepCopy() *MIGManagerSpec {
+ if in == nil {
+ return nil
+ }
+ out := new(MIGManagerSpec)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *MIGPartedConfigSpec) DeepCopyInto(out *MIGPartedConfigSpec) {
+ *out = *in
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MIGPartedConfigSpec.
+func (in *MIGPartedConfigSpec) DeepCopy() *MIGPartedConfigSpec {
+ if in == nil {
+ return nil
+ }
+ out := new(MIGPartedConfigSpec)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *MIGSpec) DeepCopyInto(out *MIGSpec) {
+ *out = *in
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MIGSpec.
+func (in *MIGSpec) DeepCopy() *MIGSpec {
+ if in == nil {
+ return nil
+ }
+ out := new(MIGSpec)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *MPSConfig) DeepCopyInto(out *MPSConfig) {
+ *out = *in
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MPSConfig.
+func (in *MPSConfig) DeepCopy() *MPSConfig {
+ if in == nil {
+ return nil
+ }
+ out := new(MPSConfig)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *NodeStatusExporterSpec) DeepCopyInto(out *NodeStatusExporterSpec) {
+ *out = *in
+ if in.Enabled != nil {
+ in, out := &in.Enabled, &out.Enabled
+ *out = new(bool)
+ **out = **in
+ }
+ if in.ImagePullSecrets != nil {
+ in, out := &in.ImagePullSecrets, &out.ImagePullSecrets
+ *out = make([]string, len(*in))
+ copy(*out, *in)
+ }
+ if in.Resources != nil {
+ in, out := &in.Resources, &out.Resources
+ *out = new(ResourceRequirements)
+ (*in).DeepCopyInto(*out)
+ }
+ if in.Args != nil {
+ in, out := &in.Args, &out.Args
+ *out = make([]string, len(*in))
+ copy(*out, *in)
+ }
+ if in.Env != nil {
+ in, out := &in.Env, &out.Env
+ *out = make([]EnvVar, len(*in))
+ copy(*out, *in)
+ }
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeStatusExporterSpec.
+func (in *NodeStatusExporterSpec) DeepCopy() *NodeStatusExporterSpec {
+ if in == nil {
+ return nil
+ }
+ out := new(NodeStatusExporterSpec)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *OperatorSpec) DeepCopyInto(out *OperatorSpec) {
+ *out = *in
+ in.InitContainer.DeepCopyInto(&out.InitContainer)
+ if in.Labels != nil {
+ in, out := &in.Labels, &out.Labels
+ *out = make(map[string]string, len(*in))
+ for key, val := range *in {
+ (*out)[key] = val
+ }
+ }
+ if in.Annotations != nil {
+ in, out := &in.Annotations, &out.Annotations
+ *out = make(map[string]string, len(*in))
+ for key, val := range *in {
+ (*out)[key] = val
+ }
+ }
+ if in.UseOpenShiftDriverToolkit != nil {
+ in, out := &in.UseOpenShiftDriverToolkit, &out.UseOpenShiftDriverToolkit
+ *out = new(bool)
+ **out = **in
+ }
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new OperatorSpec.
+func (in *OperatorSpec) DeepCopy() *OperatorSpec {
+ if in == nil {
+ return nil
+ }
+ out := new(OperatorSpec)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *PSASpec) DeepCopyInto(out *PSASpec) {
+ *out = *in
+ if in.Enabled != nil {
+ in, out := &in.Enabled, &out.Enabled
+ *out = new(bool)
+ **out = **in
+ }
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PSASpec.
+func (in *PSASpec) DeepCopy() *PSASpec {
+ if in == nil {
+ return nil
+ }
+ out := new(PSASpec)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *PSPSpec) DeepCopyInto(out *PSPSpec) {
+ *out = *in
+ if in.Enabled != nil {
+ in, out := &in.Enabled, &out.Enabled
+ *out = new(bool)
+ **out = **in
+ }
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PSPSpec.
+func (in *PSPSpec) DeepCopy() *PSPSpec {
+ if in == nil {
+ return nil
+ }
+ out := new(PSPSpec)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *PluginValidatorSpec) DeepCopyInto(out *PluginValidatorSpec) {
+ *out = *in
+ if in.Env != nil {
+ in, out := &in.Env, &out.Env
+ *out = make([]EnvVar, len(*in))
+ copy(*out, *in)
+ }
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PluginValidatorSpec.
+func (in *PluginValidatorSpec) DeepCopy() *PluginValidatorSpec {
+ if in == nil {
+ return nil
+ }
+ out := new(PluginValidatorSpec)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *ResourceRequirements) DeepCopyInto(out *ResourceRequirements) {
+ *out = *in
+ if in.Limits != nil {
+ in, out := &in.Limits, &out.Limits
+ *out = make(corev1.ResourceList, len(*in))
+ for key, val := range *in {
+ (*out)[key] = val.DeepCopy()
+ }
+ }
+ if in.Requests != nil {
+ in, out := &in.Requests, &out.Requests
+ *out = make(corev1.ResourceList, len(*in))
+ for key, val := range *in {
+ (*out)[key] = val.DeepCopy()
+ }
+ }
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ResourceRequirements.
+func (in *ResourceRequirements) DeepCopy() *ResourceRequirements {
+ if in == nil {
+ return nil
+ }
+ out := new(ResourceRequirements)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *RollingUpdateSpec) DeepCopyInto(out *RollingUpdateSpec) {
+ *out = *in
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RollingUpdateSpec.
+func (in *RollingUpdateSpec) DeepCopy() *RollingUpdateSpec {
+ if in == nil {
+ return nil
+ }
+ out := new(RollingUpdateSpec)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *SandboxDevicePluginSpec) DeepCopyInto(out *SandboxDevicePluginSpec) {
+ *out = *in
+ if in.Enabled != nil {
+ in, out := &in.Enabled, &out.Enabled
+ *out = new(bool)
+ **out = **in
+ }
+ if in.ImagePullSecrets != nil {
+ in, out := &in.ImagePullSecrets, &out.ImagePullSecrets
+ *out = make([]string, len(*in))
+ copy(*out, *in)
+ }
+ if in.Resources != nil {
+ in, out := &in.Resources, &out.Resources
+ *out = new(ResourceRequirements)
+ (*in).DeepCopyInto(*out)
+ }
+ if in.Args != nil {
+ in, out := &in.Args, &out.Args
+ *out = make([]string, len(*in))
+ copy(*out, *in)
+ }
+ if in.Env != nil {
+ in, out := &in.Env, &out.Env
+ *out = make([]EnvVar, len(*in))
+ copy(*out, *in)
+ }
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SandboxDevicePluginSpec.
+func (in *SandboxDevicePluginSpec) DeepCopy() *SandboxDevicePluginSpec {
+ if in == nil {
+ return nil
+ }
+ out := new(SandboxDevicePluginSpec)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *SandboxWorkloadsSpec) DeepCopyInto(out *SandboxWorkloadsSpec) {
+ *out = *in
+ if in.Enabled != nil {
+ in, out := &in.Enabled, &out.Enabled
+ *out = new(bool)
+ **out = **in
+ }
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SandboxWorkloadsSpec.
+func (in *SandboxWorkloadsSpec) DeepCopy() *SandboxWorkloadsSpec {
+ if in == nil {
+ return nil
+ }
+ out := new(SandboxWorkloadsSpec)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *ToolkitSpec) DeepCopyInto(out *ToolkitSpec) {
+ *out = *in
+ if in.Enabled != nil {
+ in, out := &in.Enabled, &out.Enabled
+ *out = new(bool)
+ **out = **in
+ }
+ if in.ImagePullSecrets != nil {
+ in, out := &in.ImagePullSecrets, &out.ImagePullSecrets
+ *out = make([]string, len(*in))
+ copy(*out, *in)
+ }
+ if in.Resources != nil {
+ in, out := &in.Resources, &out.Resources
+ *out = new(ResourceRequirements)
+ (*in).DeepCopyInto(*out)
+ }
+ if in.Args != nil {
+ in, out := &in.Args, &out.Args
+ *out = make([]string, len(*in))
+ copy(*out, *in)
+ }
+ if in.Env != nil {
+ in, out := &in.Env, &out.Env
+ *out = make([]EnvVar, len(*in))
+ copy(*out, *in)
+ }
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ToolkitSpec.
+func (in *ToolkitSpec) DeepCopy() *ToolkitSpec {
+ if in == nil {
+ return nil
+ }
+ out := new(ToolkitSpec)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *ToolkitValidatorSpec) DeepCopyInto(out *ToolkitValidatorSpec) {
+ *out = *in
+ if in.Env != nil {
+ in, out := &in.Env, &out.Env
+ *out = make([]EnvVar, len(*in))
+ copy(*out, *in)
+ }
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ToolkitValidatorSpec.
+func (in *ToolkitValidatorSpec) DeepCopy() *ToolkitValidatorSpec {
+ if in == nil {
+ return nil
+ }
+ out := new(ToolkitValidatorSpec)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *VFIOManagerSpec) DeepCopyInto(out *VFIOManagerSpec) {
+ *out = *in
+ if in.Enabled != nil {
+ in, out := &in.Enabled, &out.Enabled
+ *out = new(bool)
+ **out = **in
+ }
+ if in.ImagePullSecrets != nil {
+ in, out := &in.ImagePullSecrets, &out.ImagePullSecrets
+ *out = make([]string, len(*in))
+ copy(*out, *in)
+ }
+ if in.Resources != nil {
+ in, out := &in.Resources, &out.Resources
+ *out = new(ResourceRequirements)
+ (*in).DeepCopyInto(*out)
+ }
+ if in.Args != nil {
+ in, out := &in.Args, &out.Args
+ *out = make([]string, len(*in))
+ copy(*out, *in)
+ }
+ if in.Env != nil {
+ in, out := &in.Env, &out.Env
+ *out = make([]EnvVar, len(*in))
+ copy(*out, *in)
+ }
+ in.DriverManager.DeepCopyInto(&out.DriverManager)
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VFIOManagerSpec.
+func (in *VFIOManagerSpec) DeepCopy() *VFIOManagerSpec {
+ if in == nil {
+ return nil
+ }
+ out := new(VFIOManagerSpec)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *VFIOPCIValidatorSpec) DeepCopyInto(out *VFIOPCIValidatorSpec) {
+ *out = *in
+ if in.Env != nil {
+ in, out := &in.Env, &out.Env
+ *out = make([]EnvVar, len(*in))
+ copy(*out, *in)
+ }
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VFIOPCIValidatorSpec.
+func (in *VFIOPCIValidatorSpec) DeepCopy() *VFIOPCIValidatorSpec {
+ if in == nil {
+ return nil
+ }
+ out := new(VFIOPCIValidatorSpec)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *VGPUDeviceManagerSpec) DeepCopyInto(out *VGPUDeviceManagerSpec) {
+ *out = *in
+ if in.Enabled != nil {
+ in, out := &in.Enabled, &out.Enabled
+ *out = new(bool)
+ **out = **in
+ }
+ if in.ImagePullSecrets != nil {
+ in, out := &in.ImagePullSecrets, &out.ImagePullSecrets
+ *out = make([]string, len(*in))
+ copy(*out, *in)
+ }
+ if in.Resources != nil {
+ in, out := &in.Resources, &out.Resources
+ *out = new(ResourceRequirements)
+ (*in).DeepCopyInto(*out)
+ }
+ if in.Args != nil {
+ in, out := &in.Args, &out.Args
+ *out = make([]string, len(*in))
+ copy(*out, *in)
+ }
+ if in.Env != nil {
+ in, out := &in.Env, &out.Env
+ *out = make([]EnvVar, len(*in))
+ copy(*out, *in)
+ }
+ if in.Config != nil {
+ in, out := &in.Config, &out.Config
+ *out = new(VGPUDevicesConfigSpec)
+ **out = **in
+ }
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VGPUDeviceManagerSpec.
+func (in *VGPUDeviceManagerSpec) DeepCopy() *VGPUDeviceManagerSpec {
+ if in == nil {
+ return nil
+ }
+ out := new(VGPUDeviceManagerSpec)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *VGPUDevicesConfigSpec) DeepCopyInto(out *VGPUDevicesConfigSpec) {
+ *out = *in
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VGPUDevicesConfigSpec.
+func (in *VGPUDevicesConfigSpec) DeepCopy() *VGPUDevicesConfigSpec {
+ if in == nil {
+ return nil
+ }
+ out := new(VGPUDevicesConfigSpec)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *VGPUDevicesValidatorSpec) DeepCopyInto(out *VGPUDevicesValidatorSpec) {
+ *out = *in
+ if in.Env != nil {
+ in, out := &in.Env, &out.Env
+ *out = make([]EnvVar, len(*in))
+ copy(*out, *in)
+ }
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VGPUDevicesValidatorSpec.
+func (in *VGPUDevicesValidatorSpec) DeepCopy() *VGPUDevicesValidatorSpec {
+ if in == nil {
+ return nil
+ }
+ out := new(VGPUDevicesValidatorSpec)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *VGPUManagerSpec) DeepCopyInto(out *VGPUManagerSpec) {
+ *out = *in
+ if in.Enabled != nil {
+ in, out := &in.Enabled, &out.Enabled
+ *out = new(bool)
+ **out = **in
+ }
+ if in.ImagePullSecrets != nil {
+ in, out := &in.ImagePullSecrets, &out.ImagePullSecrets
+ *out = make([]string, len(*in))
+ copy(*out, *in)
+ }
+ if in.Resources != nil {
+ in, out := &in.Resources, &out.Resources
+ *out = new(ResourceRequirements)
+ (*in).DeepCopyInto(*out)
+ }
+ if in.Args != nil {
+ in, out := &in.Args, &out.Args
+ *out = make([]string, len(*in))
+ copy(*out, *in)
+ }
+ if in.Env != nil {
+ in, out := &in.Env, &out.Env
+ *out = make([]EnvVar, len(*in))
+ copy(*out, *in)
+ }
+ in.DriverManager.DeepCopyInto(&out.DriverManager)
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VGPUManagerSpec.
+func (in *VGPUManagerSpec) DeepCopy() *VGPUManagerSpec {
+ if in == nil {
+ return nil
+ }
+ out := new(VGPUManagerSpec)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *VGPUManagerValidatorSpec) DeepCopyInto(out *VGPUManagerValidatorSpec) {
+ *out = *in
+ if in.Env != nil {
+ in, out := &in.Env, &out.Env
+ *out = make([]EnvVar, len(*in))
+ copy(*out, *in)
+ }
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VGPUManagerValidatorSpec.
+func (in *VGPUManagerValidatorSpec) DeepCopy() *VGPUManagerValidatorSpec {
+ if in == nil {
+ return nil
+ }
+ out := new(VGPUManagerValidatorSpec)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *ValidatorSpec) DeepCopyInto(out *ValidatorSpec) {
+ *out = *in
+ in.Plugin.DeepCopyInto(&out.Plugin)
+ in.Toolkit.DeepCopyInto(&out.Toolkit)
+ in.Driver.DeepCopyInto(&out.Driver)
+ in.CUDA.DeepCopyInto(&out.CUDA)
+ in.VFIOPCI.DeepCopyInto(&out.VFIOPCI)
+ in.VGPUManager.DeepCopyInto(&out.VGPUManager)
+ in.VGPUDevices.DeepCopyInto(&out.VGPUDevices)
+ if in.ImagePullSecrets != nil {
+ in, out := &in.ImagePullSecrets, &out.ImagePullSecrets
+ *out = make([]string, len(*in))
+ copy(*out, *in)
+ }
+ if in.Resources != nil {
+ in, out := &in.Resources, &out.Resources
+ *out = new(ResourceRequirements)
+ (*in).DeepCopyInto(*out)
+ }
+ if in.Args != nil {
+ in, out := &in.Args, &out.Args
+ *out = make([]string, len(*in))
+ copy(*out, *in)
+ }
+ if in.Env != nil {
+ in, out := &in.Env, &out.Env
+ *out = make([]EnvVar, len(*in))
+ copy(*out, *in)
+ }
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ValidatorSpec.
+func (in *ValidatorSpec) DeepCopy() *ValidatorSpec {
+ if in == nil {
+ return nil
+ }
+ out := new(ValidatorSpec)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *VirtualTopologyConfigSpec) DeepCopyInto(out *VirtualTopologyConfigSpec) {
+ *out = *in
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VirtualTopologyConfigSpec.
+func (in *VirtualTopologyConfigSpec) DeepCopy() *VirtualTopologyConfigSpec {
+ if in == nil {
+ return nil
+ }
+ out := new(VirtualTopologyConfigSpec)
+ in.DeepCopyInto(out)
+ return out
+}
diff --git a/api/nvidia/v1alpha1/groupversion_info.go b/api/nvidia/v1alpha1/groupversion_info.go
new file mode 100644
index 000000000..f9e561289
--- /dev/null
+++ b/api/nvidia/v1alpha1/groupversion_info.go
@@ -0,0 +1,36 @@
+/**
+# Copyright (c) NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+// Package v1alpha1 contains API Schema definitions for the nvidia v1alpha1 API group
+// +kubebuilder:object:generate=true
+// +groupName=nvidia.com
+package v1alpha1
+
+import (
+ "k8s.io/apimachinery/pkg/runtime/schema"
+ "sigs.k8s.io/controller-runtime/pkg/scheme"
+)
+
+var (
+ // SchemeGroupVersion is group version used to register these objects
+ SchemeGroupVersion = schema.GroupVersion{Group: "nvidia.com", Version: "v1alpha1"}
+
+ // SchemeBuilder is used to add go types to the GroupVersionKind scheme
+ SchemeBuilder = &scheme.Builder{GroupVersion: SchemeGroupVersion}
+
+ // AddToScheme adds the types in this group-version to the given scheme.
+ AddToScheme = SchemeBuilder.AddToScheme
+)
diff --git a/api/v1alpha1/nvidiadriver_types.go b/api/nvidia/v1alpha1/nvidiadriver_types.go
similarity index 99%
rename from api/v1alpha1/nvidiadriver_types.go
rename to api/nvidia/v1alpha1/nvidiadriver_types.go
index 489c3394f..86bae0b48 100644
--- a/api/v1alpha1/nvidiadriver_types.go
+++ b/api/nvidia/v1alpha1/nvidiadriver_types.go
@@ -462,6 +462,8 @@ type NVIDIADriverStatus struct {
Conditions []metav1.Condition `json:"conditions,omitempty"`
}
+// +genclient
+// +genclient:nonNamespaced
//+kubebuilder:object:root=true
//+kubebuilder:subresource:status
//+kubebuilder:resource:scope=Cluster,shortName={"nvd","nvdriver","nvdrivers"}
diff --git a/api/v1alpha1/nvidiadriver_types_test.go b/api/nvidia/v1alpha1/nvidiadriver_types_test.go
similarity index 100%
rename from api/v1alpha1/nvidiadriver_types_test.go
rename to api/nvidia/v1alpha1/nvidiadriver_types_test.go
diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/nvidia/v1alpha1/zz_generated.deepcopy.go
similarity index 100%
rename from api/v1alpha1/zz_generated.deepcopy.go
rename to api/nvidia/v1alpha1/zz_generated.deepcopy.go
diff --git a/api/v1/groupversion_info.go b/api/v1/groupversion_info.go
deleted file mode 100644
index 781d71c53..000000000
--- a/api/v1/groupversion_info.go
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
-Copyright 2021.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-// Package v1 contains API Schema definitions for the clusterpolicy v1 API group
-// +kubebuilder:object:generate=true
-// +groupName=nvidia.com
-package v1
-
-import (
- "k8s.io/apimachinery/pkg/runtime/schema"
- "sigs.k8s.io/controller-runtime/pkg/scheme"
-)
-
-var (
- // GroupVersion is group version used to register these objects
- GroupVersion = schema.GroupVersion{Group: "nvidia.com", Version: "v1"}
-
- // SchemeBuilder is used to add go types to the GroupVersionKind scheme
- SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion}
-
- // AddToScheme adds the types in this group-version to the given scheme.
- AddToScheme = SchemeBuilder.AddToScheme
-)
diff --git a/api/v1/zz_generated.deepcopy.go b/api/v1/zz_generated.deepcopy.go
deleted file mode 100644
index d80b36109..000000000
--- a/api/v1/zz_generated.deepcopy.go
+++ /dev/null
@@ -1,1591 +0,0 @@
-//go:build !ignore_autogenerated
-
-/**
-# Copyright (c) NVIDIA CORPORATION. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-**/
-
-// Code generated by controller-gen. DO NOT EDIT.
-
-package v1
-
-import (
- "github.com/NVIDIA/k8s-kata-manager/api/v1alpha1/config"
- "github.com/NVIDIA/k8s-operator-libs/api/upgrade/v1alpha1"
- monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
- corev1 "k8s.io/api/core/v1"
- metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
- runtime "k8s.io/apimachinery/pkg/runtime"
-)
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *CCManagerSpec) DeepCopyInto(out *CCManagerSpec) {
- *out = *in
- if in.Enabled != nil {
- in, out := &in.Enabled, &out.Enabled
- *out = new(bool)
- **out = **in
- }
- if in.ImagePullSecrets != nil {
- in, out := &in.ImagePullSecrets, &out.ImagePullSecrets
- *out = make([]string, len(*in))
- copy(*out, *in)
- }
- if in.Resources != nil {
- in, out := &in.Resources, &out.Resources
- *out = new(ResourceRequirements)
- (*in).DeepCopyInto(*out)
- }
- if in.Args != nil {
- in, out := &in.Args, &out.Args
- *out = make([]string, len(*in))
- copy(*out, *in)
- }
- if in.Env != nil {
- in, out := &in.Env, &out.Env
- *out = make([]EnvVar, len(*in))
- copy(*out, *in)
- }
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CCManagerSpec.
-func (in *CCManagerSpec) DeepCopy() *CCManagerSpec {
- if in == nil {
- return nil
- }
- out := new(CCManagerSpec)
- in.DeepCopyInto(out)
- return out
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *CDIConfigSpec) DeepCopyInto(out *CDIConfigSpec) {
- *out = *in
- if in.Enabled != nil {
- in, out := &in.Enabled, &out.Enabled
- *out = new(bool)
- **out = **in
- }
- if in.Default != nil {
- in, out := &in.Default, &out.Default
- *out = new(bool)
- **out = **in
- }
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CDIConfigSpec.
-func (in *CDIConfigSpec) DeepCopy() *CDIConfigSpec {
- if in == nil {
- return nil
- }
- out := new(CDIConfigSpec)
- in.DeepCopyInto(out)
- return out
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *CUDAValidatorSpec) DeepCopyInto(out *CUDAValidatorSpec) {
- *out = *in
- if in.Env != nil {
- in, out := &in.Env, &out.Env
- *out = make([]EnvVar, len(*in))
- copy(*out, *in)
- }
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CUDAValidatorSpec.
-func (in *CUDAValidatorSpec) DeepCopy() *CUDAValidatorSpec {
- if in == nil {
- return nil
- }
- out := new(CUDAValidatorSpec)
- in.DeepCopyInto(out)
- return out
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *ClusterPolicy) DeepCopyInto(out *ClusterPolicy) {
- *out = *in
- out.TypeMeta = in.TypeMeta
- in.ObjectMeta.DeepCopyInto(&out.ObjectMeta)
- in.Spec.DeepCopyInto(&out.Spec)
- in.Status.DeepCopyInto(&out.Status)
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ClusterPolicy.
-func (in *ClusterPolicy) DeepCopy() *ClusterPolicy {
- if in == nil {
- return nil
- }
- out := new(ClusterPolicy)
- in.DeepCopyInto(out)
- return out
-}
-
-// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
-func (in *ClusterPolicy) DeepCopyObject() runtime.Object {
- if c := in.DeepCopy(); c != nil {
- return c
- }
- return nil
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *ClusterPolicyList) DeepCopyInto(out *ClusterPolicyList) {
- *out = *in
- out.TypeMeta = in.TypeMeta
- in.ListMeta.DeepCopyInto(&out.ListMeta)
- if in.Items != nil {
- in, out := &in.Items, &out.Items
- *out = make([]ClusterPolicy, len(*in))
- for i := range *in {
- (*in)[i].DeepCopyInto(&(*out)[i])
- }
- }
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ClusterPolicyList.
-func (in *ClusterPolicyList) DeepCopy() *ClusterPolicyList {
- if in == nil {
- return nil
- }
- out := new(ClusterPolicyList)
- in.DeepCopyInto(out)
- return out
-}
-
-// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
-func (in *ClusterPolicyList) DeepCopyObject() runtime.Object {
- if c := in.DeepCopy(); c != nil {
- return c
- }
- return nil
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *ClusterPolicySpec) DeepCopyInto(out *ClusterPolicySpec) {
- *out = *in
- in.Operator.DeepCopyInto(&out.Operator)
- in.Daemonsets.DeepCopyInto(&out.Daemonsets)
- in.Driver.DeepCopyInto(&out.Driver)
- in.Toolkit.DeepCopyInto(&out.Toolkit)
- in.DevicePlugin.DeepCopyInto(&out.DevicePlugin)
- in.DCGMExporter.DeepCopyInto(&out.DCGMExporter)
- in.DCGM.DeepCopyInto(&out.DCGM)
- in.NodeStatusExporter.DeepCopyInto(&out.NodeStatusExporter)
- in.GPUFeatureDiscovery.DeepCopyInto(&out.GPUFeatureDiscovery)
- out.MIG = in.MIG
- in.MIGManager.DeepCopyInto(&out.MIGManager)
- in.PSP.DeepCopyInto(&out.PSP)
- in.PSA.DeepCopyInto(&out.PSA)
- in.Validator.DeepCopyInto(&out.Validator)
- if in.GPUDirectStorage != nil {
- in, out := &in.GPUDirectStorage, &out.GPUDirectStorage
- *out = new(GPUDirectStorageSpec)
- (*in).DeepCopyInto(*out)
- }
- if in.GDRCopy != nil {
- in, out := &in.GDRCopy, &out.GDRCopy
- *out = new(GDRCopySpec)
- (*in).DeepCopyInto(*out)
- }
- in.SandboxWorkloads.DeepCopyInto(&out.SandboxWorkloads)
- in.VFIOManager.DeepCopyInto(&out.VFIOManager)
- in.SandboxDevicePlugin.DeepCopyInto(&out.SandboxDevicePlugin)
- in.VGPUManager.DeepCopyInto(&out.VGPUManager)
- in.VGPUDeviceManager.DeepCopyInto(&out.VGPUDeviceManager)
- in.CDI.DeepCopyInto(&out.CDI)
- in.KataManager.DeepCopyInto(&out.KataManager)
- in.CCManager.DeepCopyInto(&out.CCManager)
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ClusterPolicySpec.
-func (in *ClusterPolicySpec) DeepCopy() *ClusterPolicySpec {
- if in == nil {
- return nil
- }
- out := new(ClusterPolicySpec)
- in.DeepCopyInto(out)
- return out
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *ClusterPolicyStatus) DeepCopyInto(out *ClusterPolicyStatus) {
- *out = *in
- if in.Conditions != nil {
- in, out := &in.Conditions, &out.Conditions
- *out = make([]metav1.Condition, len(*in))
- for i := range *in {
- (*in)[i].DeepCopyInto(&(*out)[i])
- }
- }
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ClusterPolicyStatus.
-func (in *ClusterPolicyStatus) DeepCopy() *ClusterPolicyStatus {
- if in == nil {
- return nil
- }
- out := new(ClusterPolicyStatus)
- in.DeepCopyInto(out)
- return out
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *ContainerProbeSpec) DeepCopyInto(out *ContainerProbeSpec) {
- *out = *in
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ContainerProbeSpec.
-func (in *ContainerProbeSpec) DeepCopy() *ContainerProbeSpec {
- if in == nil {
- return nil
- }
- out := new(ContainerProbeSpec)
- in.DeepCopyInto(out)
- return out
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *DCGMExporterMetricsConfig) DeepCopyInto(out *DCGMExporterMetricsConfig) {
- *out = *in
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DCGMExporterMetricsConfig.
-func (in *DCGMExporterMetricsConfig) DeepCopy() *DCGMExporterMetricsConfig {
- if in == nil {
- return nil
- }
- out := new(DCGMExporterMetricsConfig)
- in.DeepCopyInto(out)
- return out
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *DCGMExporterServiceMonitorConfig) DeepCopyInto(out *DCGMExporterServiceMonitorConfig) {
- *out = *in
- if in.Enabled != nil {
- in, out := &in.Enabled, &out.Enabled
- *out = new(bool)
- **out = **in
- }
- if in.HonorLabels != nil {
- in, out := &in.HonorLabels, &out.HonorLabels
- *out = new(bool)
- **out = **in
- }
- if in.AdditionalLabels != nil {
- in, out := &in.AdditionalLabels, &out.AdditionalLabels
- *out = make(map[string]string, len(*in))
- for key, val := range *in {
- (*out)[key] = val
- }
- }
- if in.Relabelings != nil {
- in, out := &in.Relabelings, &out.Relabelings
- *out = make([]*monitoringv1.RelabelConfig, len(*in))
- for i := range *in {
- if (*in)[i] != nil {
- in, out := &(*in)[i], &(*out)[i]
- *out = new(monitoringv1.RelabelConfig)
- (*in).DeepCopyInto(*out)
- }
- }
- }
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DCGMExporterServiceMonitorConfig.
-func (in *DCGMExporterServiceMonitorConfig) DeepCopy() *DCGMExporterServiceMonitorConfig {
- if in == nil {
- return nil
- }
- out := new(DCGMExporterServiceMonitorConfig)
- in.DeepCopyInto(out)
- return out
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *DCGMExporterSpec) DeepCopyInto(out *DCGMExporterSpec) {
- *out = *in
- if in.Enabled != nil {
- in, out := &in.Enabled, &out.Enabled
- *out = new(bool)
- **out = **in
- }
- if in.ImagePullSecrets != nil {
- in, out := &in.ImagePullSecrets, &out.ImagePullSecrets
- *out = make([]string, len(*in))
- copy(*out, *in)
- }
- if in.Resources != nil {
- in, out := &in.Resources, &out.Resources
- *out = new(ResourceRequirements)
- (*in).DeepCopyInto(*out)
- }
- if in.Args != nil {
- in, out := &in.Args, &out.Args
- *out = make([]string, len(*in))
- copy(*out, *in)
- }
- if in.Env != nil {
- in, out := &in.Env, &out.Env
- *out = make([]EnvVar, len(*in))
- copy(*out, *in)
- }
- if in.MetricsConfig != nil {
- in, out := &in.MetricsConfig, &out.MetricsConfig
- *out = new(DCGMExporterMetricsConfig)
- **out = **in
- }
- if in.ServiceMonitor != nil {
- in, out := &in.ServiceMonitor, &out.ServiceMonitor
- *out = new(DCGMExporterServiceMonitorConfig)
- (*in).DeepCopyInto(*out)
- }
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DCGMExporterSpec.
-func (in *DCGMExporterSpec) DeepCopy() *DCGMExporterSpec {
- if in == nil {
- return nil
- }
- out := new(DCGMExporterSpec)
- in.DeepCopyInto(out)
- return out
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *DCGMSpec) DeepCopyInto(out *DCGMSpec) {
- *out = *in
- if in.Enabled != nil {
- in, out := &in.Enabled, &out.Enabled
- *out = new(bool)
- **out = **in
- }
- if in.ImagePullSecrets != nil {
- in, out := &in.ImagePullSecrets, &out.ImagePullSecrets
- *out = make([]string, len(*in))
- copy(*out, *in)
- }
- if in.Resources != nil {
- in, out := &in.Resources, &out.Resources
- *out = new(ResourceRequirements)
- (*in).DeepCopyInto(*out)
- }
- if in.Args != nil {
- in, out := &in.Args, &out.Args
- *out = make([]string, len(*in))
- copy(*out, *in)
- }
- if in.Env != nil {
- in, out := &in.Env, &out.Env
- *out = make([]EnvVar, len(*in))
- copy(*out, *in)
- }
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DCGMSpec.
-func (in *DCGMSpec) DeepCopy() *DCGMSpec {
- if in == nil {
- return nil
- }
- out := new(DCGMSpec)
- in.DeepCopyInto(out)
- return out
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *DaemonsetsSpec) DeepCopyInto(out *DaemonsetsSpec) {
- *out = *in
- if in.Labels != nil {
- in, out := &in.Labels, &out.Labels
- *out = make(map[string]string, len(*in))
- for key, val := range *in {
- (*out)[key] = val
- }
- }
- if in.Annotations != nil {
- in, out := &in.Annotations, &out.Annotations
- *out = make(map[string]string, len(*in))
- for key, val := range *in {
- (*out)[key] = val
- }
- }
- if in.Tolerations != nil {
- in, out := &in.Tolerations, &out.Tolerations
- *out = make([]corev1.Toleration, len(*in))
- for i := range *in {
- (*in)[i].DeepCopyInto(&(*out)[i])
- }
- }
- if in.RollingUpdate != nil {
- in, out := &in.RollingUpdate, &out.RollingUpdate
- *out = new(RollingUpdateSpec)
- **out = **in
- }
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DaemonsetsSpec.
-func (in *DaemonsetsSpec) DeepCopy() *DaemonsetsSpec {
- if in == nil {
- return nil
- }
- out := new(DaemonsetsSpec)
- in.DeepCopyInto(out)
- return out
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *DevicePluginConfig) DeepCopyInto(out *DevicePluginConfig) {
- *out = *in
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DevicePluginConfig.
-func (in *DevicePluginConfig) DeepCopy() *DevicePluginConfig {
- if in == nil {
- return nil
- }
- out := new(DevicePluginConfig)
- in.DeepCopyInto(out)
- return out
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *DevicePluginSpec) DeepCopyInto(out *DevicePluginSpec) {
- *out = *in
- if in.Enabled != nil {
- in, out := &in.Enabled, &out.Enabled
- *out = new(bool)
- **out = **in
- }
- if in.ImagePullSecrets != nil {
- in, out := &in.ImagePullSecrets, &out.ImagePullSecrets
- *out = make([]string, len(*in))
- copy(*out, *in)
- }
- if in.Resources != nil {
- in, out := &in.Resources, &out.Resources
- *out = new(ResourceRequirements)
- (*in).DeepCopyInto(*out)
- }
- if in.Args != nil {
- in, out := &in.Args, &out.Args
- *out = make([]string, len(*in))
- copy(*out, *in)
- }
- if in.Env != nil {
- in, out := &in.Env, &out.Env
- *out = make([]EnvVar, len(*in))
- copy(*out, *in)
- }
- if in.Config != nil {
- in, out := &in.Config, &out.Config
- *out = new(DevicePluginConfig)
- **out = **in
- }
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DevicePluginSpec.
-func (in *DevicePluginSpec) DeepCopy() *DevicePluginSpec {
- if in == nil {
- return nil
- }
- out := new(DevicePluginSpec)
- in.DeepCopyInto(out)
- return out
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *DriverCertConfigSpec) DeepCopyInto(out *DriverCertConfigSpec) {
- *out = *in
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DriverCertConfigSpec.
-func (in *DriverCertConfigSpec) DeepCopy() *DriverCertConfigSpec {
- if in == nil {
- return nil
- }
- out := new(DriverCertConfigSpec)
- in.DeepCopyInto(out)
- return out
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *DriverLicensingConfigSpec) DeepCopyInto(out *DriverLicensingConfigSpec) {
- *out = *in
- if in.NLSEnabled != nil {
- in, out := &in.NLSEnabled, &out.NLSEnabled
- *out = new(bool)
- **out = **in
- }
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DriverLicensingConfigSpec.
-func (in *DriverLicensingConfigSpec) DeepCopy() *DriverLicensingConfigSpec {
- if in == nil {
- return nil
- }
- out := new(DriverLicensingConfigSpec)
- in.DeepCopyInto(out)
- return out
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *DriverManagerSpec) DeepCopyInto(out *DriverManagerSpec) {
- *out = *in
- if in.ImagePullSecrets != nil {
- in, out := &in.ImagePullSecrets, &out.ImagePullSecrets
- *out = make([]string, len(*in))
- copy(*out, *in)
- }
- if in.Env != nil {
- in, out := &in.Env, &out.Env
- *out = make([]EnvVar, len(*in))
- copy(*out, *in)
- }
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DriverManagerSpec.
-func (in *DriverManagerSpec) DeepCopy() *DriverManagerSpec {
- if in == nil {
- return nil
- }
- out := new(DriverManagerSpec)
- in.DeepCopyInto(out)
- return out
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *DriverRepoConfigSpec) DeepCopyInto(out *DriverRepoConfigSpec) {
- *out = *in
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DriverRepoConfigSpec.
-func (in *DriverRepoConfigSpec) DeepCopy() *DriverRepoConfigSpec {
- if in == nil {
- return nil
- }
- out := new(DriverRepoConfigSpec)
- in.DeepCopyInto(out)
- return out
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *DriverSpec) DeepCopyInto(out *DriverSpec) {
- *out = *in
- if in.UseNvidiaDriverCRD != nil {
- in, out := &in.UseNvidiaDriverCRD, &out.UseNvidiaDriverCRD
- *out = new(bool)
- **out = **in
- }
- if in.UsePrecompiled != nil {
- in, out := &in.UsePrecompiled, &out.UsePrecompiled
- *out = new(bool)
- **out = **in
- }
- if in.UseOpenKernelModules != nil {
- in, out := &in.UseOpenKernelModules, &out.UseOpenKernelModules
- *out = new(bool)
- **out = **in
- }
- if in.Enabled != nil {
- in, out := &in.Enabled, &out.Enabled
- *out = new(bool)
- **out = **in
- }
- if in.StartupProbe != nil {
- in, out := &in.StartupProbe, &out.StartupProbe
- *out = new(ContainerProbeSpec)
- **out = **in
- }
- if in.LivenessProbe != nil {
- in, out := &in.LivenessProbe, &out.LivenessProbe
- *out = new(ContainerProbeSpec)
- **out = **in
- }
- if in.ReadinessProbe != nil {
- in, out := &in.ReadinessProbe, &out.ReadinessProbe
- *out = new(ContainerProbeSpec)
- **out = **in
- }
- if in.GPUDirectRDMA != nil {
- in, out := &in.GPUDirectRDMA, &out.GPUDirectRDMA
- *out = new(GPUDirectRDMASpec)
- (*in).DeepCopyInto(*out)
- }
- if in.UpgradePolicy != nil {
- in, out := &in.UpgradePolicy, &out.UpgradePolicy
- *out = new(v1alpha1.DriverUpgradePolicySpec)
- (*in).DeepCopyInto(*out)
- }
- if in.ImagePullSecrets != nil {
- in, out := &in.ImagePullSecrets, &out.ImagePullSecrets
- *out = make([]string, len(*in))
- copy(*out, *in)
- }
- in.Manager.DeepCopyInto(&out.Manager)
- if in.Resources != nil {
- in, out := &in.Resources, &out.Resources
- *out = new(ResourceRequirements)
- (*in).DeepCopyInto(*out)
- }
- if in.Args != nil {
- in, out := &in.Args, &out.Args
- *out = make([]string, len(*in))
- copy(*out, *in)
- }
- if in.Env != nil {
- in, out := &in.Env, &out.Env
- *out = make([]EnvVar, len(*in))
- copy(*out, *in)
- }
- if in.RepoConfig != nil {
- in, out := &in.RepoConfig, &out.RepoConfig
- *out = new(DriverRepoConfigSpec)
- **out = **in
- }
- if in.CertConfig != nil {
- in, out := &in.CertConfig, &out.CertConfig
- *out = new(DriverCertConfigSpec)
- **out = **in
- }
- if in.LicensingConfig != nil {
- in, out := &in.LicensingConfig, &out.LicensingConfig
- *out = new(DriverLicensingConfigSpec)
- (*in).DeepCopyInto(*out)
- }
- if in.VirtualTopology != nil {
- in, out := &in.VirtualTopology, &out.VirtualTopology
- *out = new(VirtualTopologyConfigSpec)
- **out = **in
- }
- if in.KernelModuleConfig != nil {
- in, out := &in.KernelModuleConfig, &out.KernelModuleConfig
- *out = new(KernelModuleConfigSpec)
- **out = **in
- }
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DriverSpec.
-func (in *DriverSpec) DeepCopy() *DriverSpec {
- if in == nil {
- return nil
- }
- out := new(DriverSpec)
- in.DeepCopyInto(out)
- return out
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *DriverValidatorSpec) DeepCopyInto(out *DriverValidatorSpec) {
- *out = *in
- if in.Env != nil {
- in, out := &in.Env, &out.Env
- *out = make([]EnvVar, len(*in))
- copy(*out, *in)
- }
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DriverValidatorSpec.
-func (in *DriverValidatorSpec) DeepCopy() *DriverValidatorSpec {
- if in == nil {
- return nil
- }
- out := new(DriverValidatorSpec)
- in.DeepCopyInto(out)
- return out
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *EnvVar) DeepCopyInto(out *EnvVar) {
- *out = *in
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EnvVar.
-func (in *EnvVar) DeepCopy() *EnvVar {
- if in == nil {
- return nil
- }
- out := new(EnvVar)
- in.DeepCopyInto(out)
- return out
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *GDRCopySpec) DeepCopyInto(out *GDRCopySpec) {
- *out = *in
- if in.Enabled != nil {
- in, out := &in.Enabled, &out.Enabled
- *out = new(bool)
- **out = **in
- }
- if in.ImagePullSecrets != nil {
- in, out := &in.ImagePullSecrets, &out.ImagePullSecrets
- *out = make([]string, len(*in))
- copy(*out, *in)
- }
- if in.Args != nil {
- in, out := &in.Args, &out.Args
- *out = make([]string, len(*in))
- copy(*out, *in)
- }
- if in.Env != nil {
- in, out := &in.Env, &out.Env
- *out = make([]EnvVar, len(*in))
- copy(*out, *in)
- }
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GDRCopySpec.
-func (in *GDRCopySpec) DeepCopy() *GDRCopySpec {
- if in == nil {
- return nil
- }
- out := new(GDRCopySpec)
- in.DeepCopyInto(out)
- return out
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *GPUDirectRDMASpec) DeepCopyInto(out *GPUDirectRDMASpec) {
- *out = *in
- if in.Enabled != nil {
- in, out := &in.Enabled, &out.Enabled
- *out = new(bool)
- **out = **in
- }
- if in.UseHostMOFED != nil {
- in, out := &in.UseHostMOFED, &out.UseHostMOFED
- *out = new(bool)
- **out = **in
- }
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GPUDirectRDMASpec.
-func (in *GPUDirectRDMASpec) DeepCopy() *GPUDirectRDMASpec {
- if in == nil {
- return nil
- }
- out := new(GPUDirectRDMASpec)
- in.DeepCopyInto(out)
- return out
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *GPUDirectStorageSpec) DeepCopyInto(out *GPUDirectStorageSpec) {
- *out = *in
- if in.Enabled != nil {
- in, out := &in.Enabled, &out.Enabled
- *out = new(bool)
- **out = **in
- }
- if in.ImagePullSecrets != nil {
- in, out := &in.ImagePullSecrets, &out.ImagePullSecrets
- *out = make([]string, len(*in))
- copy(*out, *in)
- }
- if in.Args != nil {
- in, out := &in.Args, &out.Args
- *out = make([]string, len(*in))
- copy(*out, *in)
- }
- if in.Env != nil {
- in, out := &in.Env, &out.Env
- *out = make([]EnvVar, len(*in))
- copy(*out, *in)
- }
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GPUDirectStorageSpec.
-func (in *GPUDirectStorageSpec) DeepCopy() *GPUDirectStorageSpec {
- if in == nil {
- return nil
- }
- out := new(GPUDirectStorageSpec)
- in.DeepCopyInto(out)
- return out
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *GPUFeatureDiscoverySpec) DeepCopyInto(out *GPUFeatureDiscoverySpec) {
- *out = *in
- if in.Enabled != nil {
- in, out := &in.Enabled, &out.Enabled
- *out = new(bool)
- **out = **in
- }
- if in.ImagePullSecrets != nil {
- in, out := &in.ImagePullSecrets, &out.ImagePullSecrets
- *out = make([]string, len(*in))
- copy(*out, *in)
- }
- if in.Resources != nil {
- in, out := &in.Resources, &out.Resources
- *out = new(ResourceRequirements)
- (*in).DeepCopyInto(*out)
- }
- if in.Args != nil {
- in, out := &in.Args, &out.Args
- *out = make([]string, len(*in))
- copy(*out, *in)
- }
- if in.Env != nil {
- in, out := &in.Env, &out.Env
- *out = make([]EnvVar, len(*in))
- copy(*out, *in)
- }
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GPUFeatureDiscoverySpec.
-func (in *GPUFeatureDiscoverySpec) DeepCopy() *GPUFeatureDiscoverySpec {
- if in == nil {
- return nil
- }
- out := new(GPUFeatureDiscoverySpec)
- in.DeepCopyInto(out)
- return out
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *InitContainerSpec) DeepCopyInto(out *InitContainerSpec) {
- *out = *in
- if in.ImagePullSecrets != nil {
- in, out := &in.ImagePullSecrets, &out.ImagePullSecrets
- *out = make([]string, len(*in))
- copy(*out, *in)
- }
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InitContainerSpec.
-func (in *InitContainerSpec) DeepCopy() *InitContainerSpec {
- if in == nil {
- return nil
- }
- out := new(InitContainerSpec)
- in.DeepCopyInto(out)
- return out
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *KataManagerSpec) DeepCopyInto(out *KataManagerSpec) {
- *out = *in
- if in.Enabled != nil {
- in, out := &in.Enabled, &out.Enabled
- *out = new(bool)
- **out = **in
- }
- if in.Config != nil {
- in, out := &in.Config, &out.Config
- *out = new(config.Config)
- (*in).DeepCopyInto(*out)
- }
- if in.ImagePullSecrets != nil {
- in, out := &in.ImagePullSecrets, &out.ImagePullSecrets
- *out = make([]string, len(*in))
- copy(*out, *in)
- }
- if in.Resources != nil {
- in, out := &in.Resources, &out.Resources
- *out = new(ResourceRequirements)
- (*in).DeepCopyInto(*out)
- }
- if in.Args != nil {
- in, out := &in.Args, &out.Args
- *out = make([]string, len(*in))
- copy(*out, *in)
- }
- if in.Env != nil {
- in, out := &in.Env, &out.Env
- *out = make([]EnvVar, len(*in))
- copy(*out, *in)
- }
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KataManagerSpec.
-func (in *KataManagerSpec) DeepCopy() *KataManagerSpec {
- if in == nil {
- return nil
- }
- out := new(KataManagerSpec)
- in.DeepCopyInto(out)
- return out
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *KernelModuleConfigSpec) DeepCopyInto(out *KernelModuleConfigSpec) {
- *out = *in
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KernelModuleConfigSpec.
-func (in *KernelModuleConfigSpec) DeepCopy() *KernelModuleConfigSpec {
- if in == nil {
- return nil
- }
- out := new(KernelModuleConfigSpec)
- in.DeepCopyInto(out)
- return out
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *MIGGPUClientsConfigSpec) DeepCopyInto(out *MIGGPUClientsConfigSpec) {
- *out = *in
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MIGGPUClientsConfigSpec.
-func (in *MIGGPUClientsConfigSpec) DeepCopy() *MIGGPUClientsConfigSpec {
- if in == nil {
- return nil
- }
- out := new(MIGGPUClientsConfigSpec)
- in.DeepCopyInto(out)
- return out
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *MIGManagerSpec) DeepCopyInto(out *MIGManagerSpec) {
- *out = *in
- if in.Enabled != nil {
- in, out := &in.Enabled, &out.Enabled
- *out = new(bool)
- **out = **in
- }
- if in.ImagePullSecrets != nil {
- in, out := &in.ImagePullSecrets, &out.ImagePullSecrets
- *out = make([]string, len(*in))
- copy(*out, *in)
- }
- if in.Resources != nil {
- in, out := &in.Resources, &out.Resources
- *out = new(ResourceRequirements)
- (*in).DeepCopyInto(*out)
- }
- if in.Args != nil {
- in, out := &in.Args, &out.Args
- *out = make([]string, len(*in))
- copy(*out, *in)
- }
- if in.Env != nil {
- in, out := &in.Env, &out.Env
- *out = make([]EnvVar, len(*in))
- copy(*out, *in)
- }
- if in.Config != nil {
- in, out := &in.Config, &out.Config
- *out = new(MIGPartedConfigSpec)
- **out = **in
- }
- if in.GPUClientsConfig != nil {
- in, out := &in.GPUClientsConfig, &out.GPUClientsConfig
- *out = new(MIGGPUClientsConfigSpec)
- **out = **in
- }
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MIGManagerSpec.
-func (in *MIGManagerSpec) DeepCopy() *MIGManagerSpec {
- if in == nil {
- return nil
- }
- out := new(MIGManagerSpec)
- in.DeepCopyInto(out)
- return out
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *MIGPartedConfigSpec) DeepCopyInto(out *MIGPartedConfigSpec) {
- *out = *in
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MIGPartedConfigSpec.
-func (in *MIGPartedConfigSpec) DeepCopy() *MIGPartedConfigSpec {
- if in == nil {
- return nil
- }
- out := new(MIGPartedConfigSpec)
- in.DeepCopyInto(out)
- return out
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *MIGSpec) DeepCopyInto(out *MIGSpec) {
- *out = *in
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MIGSpec.
-func (in *MIGSpec) DeepCopy() *MIGSpec {
- if in == nil {
- return nil
- }
- out := new(MIGSpec)
- in.DeepCopyInto(out)
- return out
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *NodeStatusExporterSpec) DeepCopyInto(out *NodeStatusExporterSpec) {
- *out = *in
- if in.Enabled != nil {
- in, out := &in.Enabled, &out.Enabled
- *out = new(bool)
- **out = **in
- }
- if in.ImagePullSecrets != nil {
- in, out := &in.ImagePullSecrets, &out.ImagePullSecrets
- *out = make([]string, len(*in))
- copy(*out, *in)
- }
- if in.Resources != nil {
- in, out := &in.Resources, &out.Resources
- *out = new(ResourceRequirements)
- (*in).DeepCopyInto(*out)
- }
- if in.Args != nil {
- in, out := &in.Args, &out.Args
- *out = make([]string, len(*in))
- copy(*out, *in)
- }
- if in.Env != nil {
- in, out := &in.Env, &out.Env
- *out = make([]EnvVar, len(*in))
- copy(*out, *in)
- }
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeStatusExporterSpec.
-func (in *NodeStatusExporterSpec) DeepCopy() *NodeStatusExporterSpec {
- if in == nil {
- return nil
- }
- out := new(NodeStatusExporterSpec)
- in.DeepCopyInto(out)
- return out
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *OperatorSpec) DeepCopyInto(out *OperatorSpec) {
- *out = *in
- in.InitContainer.DeepCopyInto(&out.InitContainer)
- if in.Labels != nil {
- in, out := &in.Labels, &out.Labels
- *out = make(map[string]string, len(*in))
- for key, val := range *in {
- (*out)[key] = val
- }
- }
- if in.Annotations != nil {
- in, out := &in.Annotations, &out.Annotations
- *out = make(map[string]string, len(*in))
- for key, val := range *in {
- (*out)[key] = val
- }
- }
- if in.UseOpenShiftDriverToolkit != nil {
- in, out := &in.UseOpenShiftDriverToolkit, &out.UseOpenShiftDriverToolkit
- *out = new(bool)
- **out = **in
- }
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new OperatorSpec.
-func (in *OperatorSpec) DeepCopy() *OperatorSpec {
- if in == nil {
- return nil
- }
- out := new(OperatorSpec)
- in.DeepCopyInto(out)
- return out
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *PSASpec) DeepCopyInto(out *PSASpec) {
- *out = *in
- if in.Enabled != nil {
- in, out := &in.Enabled, &out.Enabled
- *out = new(bool)
- **out = **in
- }
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PSASpec.
-func (in *PSASpec) DeepCopy() *PSASpec {
- if in == nil {
- return nil
- }
- out := new(PSASpec)
- in.DeepCopyInto(out)
- return out
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *PSPSpec) DeepCopyInto(out *PSPSpec) {
- *out = *in
- if in.Enabled != nil {
- in, out := &in.Enabled, &out.Enabled
- *out = new(bool)
- **out = **in
- }
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PSPSpec.
-func (in *PSPSpec) DeepCopy() *PSPSpec {
- if in == nil {
- return nil
- }
- out := new(PSPSpec)
- in.DeepCopyInto(out)
- return out
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *PluginValidatorSpec) DeepCopyInto(out *PluginValidatorSpec) {
- *out = *in
- if in.Env != nil {
- in, out := &in.Env, &out.Env
- *out = make([]EnvVar, len(*in))
- copy(*out, *in)
- }
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PluginValidatorSpec.
-func (in *PluginValidatorSpec) DeepCopy() *PluginValidatorSpec {
- if in == nil {
- return nil
- }
- out := new(PluginValidatorSpec)
- in.DeepCopyInto(out)
- return out
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *ResourceRequirements) DeepCopyInto(out *ResourceRequirements) {
- *out = *in
- if in.Limits != nil {
- in, out := &in.Limits, &out.Limits
- *out = make(corev1.ResourceList, len(*in))
- for key, val := range *in {
- (*out)[key] = val.DeepCopy()
- }
- }
- if in.Requests != nil {
- in, out := &in.Requests, &out.Requests
- *out = make(corev1.ResourceList, len(*in))
- for key, val := range *in {
- (*out)[key] = val.DeepCopy()
- }
- }
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ResourceRequirements.
-func (in *ResourceRequirements) DeepCopy() *ResourceRequirements {
- if in == nil {
- return nil
- }
- out := new(ResourceRequirements)
- in.DeepCopyInto(out)
- return out
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *RollingUpdateSpec) DeepCopyInto(out *RollingUpdateSpec) {
- *out = *in
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RollingUpdateSpec.
-func (in *RollingUpdateSpec) DeepCopy() *RollingUpdateSpec {
- if in == nil {
- return nil
- }
- out := new(RollingUpdateSpec)
- in.DeepCopyInto(out)
- return out
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *SandboxDevicePluginSpec) DeepCopyInto(out *SandboxDevicePluginSpec) {
- *out = *in
- if in.Enabled != nil {
- in, out := &in.Enabled, &out.Enabled
- *out = new(bool)
- **out = **in
- }
- if in.ImagePullSecrets != nil {
- in, out := &in.ImagePullSecrets, &out.ImagePullSecrets
- *out = make([]string, len(*in))
- copy(*out, *in)
- }
- if in.Resources != nil {
- in, out := &in.Resources, &out.Resources
- *out = new(ResourceRequirements)
- (*in).DeepCopyInto(*out)
- }
- if in.Args != nil {
- in, out := &in.Args, &out.Args
- *out = make([]string, len(*in))
- copy(*out, *in)
- }
- if in.Env != nil {
- in, out := &in.Env, &out.Env
- *out = make([]EnvVar, len(*in))
- copy(*out, *in)
- }
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SandboxDevicePluginSpec.
-func (in *SandboxDevicePluginSpec) DeepCopy() *SandboxDevicePluginSpec {
- if in == nil {
- return nil
- }
- out := new(SandboxDevicePluginSpec)
- in.DeepCopyInto(out)
- return out
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *SandboxWorkloadsSpec) DeepCopyInto(out *SandboxWorkloadsSpec) {
- *out = *in
- if in.Enabled != nil {
- in, out := &in.Enabled, &out.Enabled
- *out = new(bool)
- **out = **in
- }
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SandboxWorkloadsSpec.
-func (in *SandboxWorkloadsSpec) DeepCopy() *SandboxWorkloadsSpec {
- if in == nil {
- return nil
- }
- out := new(SandboxWorkloadsSpec)
- in.DeepCopyInto(out)
- return out
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *ToolkitSpec) DeepCopyInto(out *ToolkitSpec) {
- *out = *in
- if in.Enabled != nil {
- in, out := &in.Enabled, &out.Enabled
- *out = new(bool)
- **out = **in
- }
- if in.ImagePullSecrets != nil {
- in, out := &in.ImagePullSecrets, &out.ImagePullSecrets
- *out = make([]string, len(*in))
- copy(*out, *in)
- }
- if in.Resources != nil {
- in, out := &in.Resources, &out.Resources
- *out = new(ResourceRequirements)
- (*in).DeepCopyInto(*out)
- }
- if in.Args != nil {
- in, out := &in.Args, &out.Args
- *out = make([]string, len(*in))
- copy(*out, *in)
- }
- if in.Env != nil {
- in, out := &in.Env, &out.Env
- *out = make([]EnvVar, len(*in))
- copy(*out, *in)
- }
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ToolkitSpec.
-func (in *ToolkitSpec) DeepCopy() *ToolkitSpec {
- if in == nil {
- return nil
- }
- out := new(ToolkitSpec)
- in.DeepCopyInto(out)
- return out
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *ToolkitValidatorSpec) DeepCopyInto(out *ToolkitValidatorSpec) {
- *out = *in
- if in.Env != nil {
- in, out := &in.Env, &out.Env
- *out = make([]EnvVar, len(*in))
- copy(*out, *in)
- }
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ToolkitValidatorSpec.
-func (in *ToolkitValidatorSpec) DeepCopy() *ToolkitValidatorSpec {
- if in == nil {
- return nil
- }
- out := new(ToolkitValidatorSpec)
- in.DeepCopyInto(out)
- return out
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *VFIOManagerSpec) DeepCopyInto(out *VFIOManagerSpec) {
- *out = *in
- if in.Enabled != nil {
- in, out := &in.Enabled, &out.Enabled
- *out = new(bool)
- **out = **in
- }
- if in.ImagePullSecrets != nil {
- in, out := &in.ImagePullSecrets, &out.ImagePullSecrets
- *out = make([]string, len(*in))
- copy(*out, *in)
- }
- if in.Resources != nil {
- in, out := &in.Resources, &out.Resources
- *out = new(ResourceRequirements)
- (*in).DeepCopyInto(*out)
- }
- if in.Args != nil {
- in, out := &in.Args, &out.Args
- *out = make([]string, len(*in))
- copy(*out, *in)
- }
- if in.Env != nil {
- in, out := &in.Env, &out.Env
- *out = make([]EnvVar, len(*in))
- copy(*out, *in)
- }
- in.DriverManager.DeepCopyInto(&out.DriverManager)
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VFIOManagerSpec.
-func (in *VFIOManagerSpec) DeepCopy() *VFIOManagerSpec {
- if in == nil {
- return nil
- }
- out := new(VFIOManagerSpec)
- in.DeepCopyInto(out)
- return out
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *VFIOPCIValidatorSpec) DeepCopyInto(out *VFIOPCIValidatorSpec) {
- *out = *in
- if in.Env != nil {
- in, out := &in.Env, &out.Env
- *out = make([]EnvVar, len(*in))
- copy(*out, *in)
- }
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VFIOPCIValidatorSpec.
-func (in *VFIOPCIValidatorSpec) DeepCopy() *VFIOPCIValidatorSpec {
- if in == nil {
- return nil
- }
- out := new(VFIOPCIValidatorSpec)
- in.DeepCopyInto(out)
- return out
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *VGPUDeviceManagerSpec) DeepCopyInto(out *VGPUDeviceManagerSpec) {
- *out = *in
- if in.Enabled != nil {
- in, out := &in.Enabled, &out.Enabled
- *out = new(bool)
- **out = **in
- }
- if in.ImagePullSecrets != nil {
- in, out := &in.ImagePullSecrets, &out.ImagePullSecrets
- *out = make([]string, len(*in))
- copy(*out, *in)
- }
- if in.Resources != nil {
- in, out := &in.Resources, &out.Resources
- *out = new(ResourceRequirements)
- (*in).DeepCopyInto(*out)
- }
- if in.Args != nil {
- in, out := &in.Args, &out.Args
- *out = make([]string, len(*in))
- copy(*out, *in)
- }
- if in.Env != nil {
- in, out := &in.Env, &out.Env
- *out = make([]EnvVar, len(*in))
- copy(*out, *in)
- }
- if in.Config != nil {
- in, out := &in.Config, &out.Config
- *out = new(VGPUDevicesConfigSpec)
- **out = **in
- }
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VGPUDeviceManagerSpec.
-func (in *VGPUDeviceManagerSpec) DeepCopy() *VGPUDeviceManagerSpec {
- if in == nil {
- return nil
- }
- out := new(VGPUDeviceManagerSpec)
- in.DeepCopyInto(out)
- return out
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *VGPUDevicesConfigSpec) DeepCopyInto(out *VGPUDevicesConfigSpec) {
- *out = *in
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VGPUDevicesConfigSpec.
-func (in *VGPUDevicesConfigSpec) DeepCopy() *VGPUDevicesConfigSpec {
- if in == nil {
- return nil
- }
- out := new(VGPUDevicesConfigSpec)
- in.DeepCopyInto(out)
- return out
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *VGPUDevicesValidatorSpec) DeepCopyInto(out *VGPUDevicesValidatorSpec) {
- *out = *in
- if in.Env != nil {
- in, out := &in.Env, &out.Env
- *out = make([]EnvVar, len(*in))
- copy(*out, *in)
- }
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VGPUDevicesValidatorSpec.
-func (in *VGPUDevicesValidatorSpec) DeepCopy() *VGPUDevicesValidatorSpec {
- if in == nil {
- return nil
- }
- out := new(VGPUDevicesValidatorSpec)
- in.DeepCopyInto(out)
- return out
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *VGPUManagerSpec) DeepCopyInto(out *VGPUManagerSpec) {
- *out = *in
- if in.Enabled != nil {
- in, out := &in.Enabled, &out.Enabled
- *out = new(bool)
- **out = **in
- }
- if in.ImagePullSecrets != nil {
- in, out := &in.ImagePullSecrets, &out.ImagePullSecrets
- *out = make([]string, len(*in))
- copy(*out, *in)
- }
- if in.Resources != nil {
- in, out := &in.Resources, &out.Resources
- *out = new(ResourceRequirements)
- (*in).DeepCopyInto(*out)
- }
- if in.Args != nil {
- in, out := &in.Args, &out.Args
- *out = make([]string, len(*in))
- copy(*out, *in)
- }
- if in.Env != nil {
- in, out := &in.Env, &out.Env
- *out = make([]EnvVar, len(*in))
- copy(*out, *in)
- }
- in.DriverManager.DeepCopyInto(&out.DriverManager)
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VGPUManagerSpec.
-func (in *VGPUManagerSpec) DeepCopy() *VGPUManagerSpec {
- if in == nil {
- return nil
- }
- out := new(VGPUManagerSpec)
- in.DeepCopyInto(out)
- return out
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *VGPUManagerValidatorSpec) DeepCopyInto(out *VGPUManagerValidatorSpec) {
- *out = *in
- if in.Env != nil {
- in, out := &in.Env, &out.Env
- *out = make([]EnvVar, len(*in))
- copy(*out, *in)
- }
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VGPUManagerValidatorSpec.
-func (in *VGPUManagerValidatorSpec) DeepCopy() *VGPUManagerValidatorSpec {
- if in == nil {
- return nil
- }
- out := new(VGPUManagerValidatorSpec)
- in.DeepCopyInto(out)
- return out
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *ValidatorSpec) DeepCopyInto(out *ValidatorSpec) {
- *out = *in
- in.Plugin.DeepCopyInto(&out.Plugin)
- in.Toolkit.DeepCopyInto(&out.Toolkit)
- in.Driver.DeepCopyInto(&out.Driver)
- in.CUDA.DeepCopyInto(&out.CUDA)
- in.VFIOPCI.DeepCopyInto(&out.VFIOPCI)
- in.VGPUManager.DeepCopyInto(&out.VGPUManager)
- in.VGPUDevices.DeepCopyInto(&out.VGPUDevices)
- if in.ImagePullSecrets != nil {
- in, out := &in.ImagePullSecrets, &out.ImagePullSecrets
- *out = make([]string, len(*in))
- copy(*out, *in)
- }
- if in.Resources != nil {
- in, out := &in.Resources, &out.Resources
- *out = new(ResourceRequirements)
- (*in).DeepCopyInto(*out)
- }
- if in.Args != nil {
- in, out := &in.Args, &out.Args
- *out = make([]string, len(*in))
- copy(*out, *in)
- }
- if in.Env != nil {
- in, out := &in.Env, &out.Env
- *out = make([]EnvVar, len(*in))
- copy(*out, *in)
- }
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ValidatorSpec.
-func (in *ValidatorSpec) DeepCopy() *ValidatorSpec {
- if in == nil {
- return nil
- }
- out := new(ValidatorSpec)
- in.DeepCopyInto(out)
- return out
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *VirtualTopologyConfigSpec) DeepCopyInto(out *VirtualTopologyConfigSpec) {
- *out = *in
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VirtualTopologyConfigSpec.
-func (in *VirtualTopologyConfigSpec) DeepCopy() *VirtualTopologyConfigSpec {
- if in == nil {
- return nil
- }
- out := new(VirtualTopologyConfigSpec)
- in.DeepCopyInto(out)
- return out
-}
diff --git a/api/v1alpha1/groupversion_info.go b/api/v1alpha1/groupversion_info.go
deleted file mode 100644
index 9c70d751f..000000000
--- a/api/v1alpha1/groupversion_info.go
+++ /dev/null
@@ -1,36 +0,0 @@
-/**
-# Copyright (c) NVIDIA CORPORATION. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-**/
-
-// Package v1alpha1 contains API Schema definitions for the nvidia v1alpha1 API group
-// +kubebuilder:object:generate=true
-// +groupName=nvidia.com
-package v1alpha1
-
-import (
- "k8s.io/apimachinery/pkg/runtime/schema"
- "sigs.k8s.io/controller-runtime/pkg/scheme"
-)
-
-var (
- // GroupVersion is group version used to register these objects
- GroupVersion = schema.GroupVersion{Group: "nvidia.com", Version: "v1alpha1"}
-
- // SchemeBuilder is used to add go types to the GroupVersionKind scheme
- SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion}
-
- // AddToScheme adds the types in this group-version to the given scheme.
- AddToScheme = SchemeBuilder.AddToScheme
-)
diff --git a/api/versioned/clientset.go b/api/versioned/clientset.go
new file mode 100644
index 000000000..539960f0f
--- /dev/null
+++ b/api/versioned/clientset.go
@@ -0,0 +1,133 @@
+/**
+# Copyright (c) NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+// Code generated by client-gen. DO NOT EDIT.
+
+package versioned
+
+import (
+ "fmt"
+ "net/http"
+
+ nvidiav1 "github.com/NVIDIA/gpu-operator/api/versioned/typed/nvidia/v1"
+ nvidiav1alpha1 "github.com/NVIDIA/gpu-operator/api/versioned/typed/nvidia/v1alpha1"
+ discovery "k8s.io/client-go/discovery"
+ rest "k8s.io/client-go/rest"
+ flowcontrol "k8s.io/client-go/util/flowcontrol"
+)
+
+type Interface interface {
+ Discovery() discovery.DiscoveryInterface
+ NvidiaV1() nvidiav1.NvidiaV1Interface
+ NvidiaV1alpha1() nvidiav1alpha1.NvidiaV1alpha1Interface
+}
+
+// Clientset contains the clients for groups.
+type Clientset struct {
+ *discovery.DiscoveryClient
+ nvidiaV1 *nvidiav1.NvidiaV1Client
+ nvidiaV1alpha1 *nvidiav1alpha1.NvidiaV1alpha1Client
+}
+
+// NvidiaV1 retrieves the NvidiaV1Client
+func (c *Clientset) NvidiaV1() nvidiav1.NvidiaV1Interface {
+ return c.nvidiaV1
+}
+
+// NvidiaV1alpha1 retrieves the NvidiaV1alpha1Client
+func (c *Clientset) NvidiaV1alpha1() nvidiav1alpha1.NvidiaV1alpha1Interface {
+ return c.nvidiaV1alpha1
+}
+
+// Discovery retrieves the DiscoveryClient
+func (c *Clientset) Discovery() discovery.DiscoveryInterface {
+ if c == nil {
+ return nil
+ }
+ return c.DiscoveryClient
+}
+
+// NewForConfig creates a new Clientset for the given config.
+// If config's RateLimiter is not set and QPS and Burst are acceptable,
+// NewForConfig will generate a rate-limiter in configShallowCopy.
+// NewForConfig is equivalent to NewForConfigAndClient(c, httpClient),
+// where httpClient was generated with rest.HTTPClientFor(c).
+func NewForConfig(c *rest.Config) (*Clientset, error) {
+ configShallowCopy := *c
+
+ if configShallowCopy.UserAgent == "" {
+ configShallowCopy.UserAgent = rest.DefaultKubernetesUserAgent()
+ }
+
+ // share the transport between all clients
+ httpClient, err := rest.HTTPClientFor(&configShallowCopy)
+ if err != nil {
+ return nil, err
+ }
+
+ return NewForConfigAndClient(&configShallowCopy, httpClient)
+}
+
+// NewForConfigAndClient creates a new Clientset for the given config and http client.
+// Note the http client provided takes precedence over the configured transport values.
+// If config's RateLimiter is not set and QPS and Burst are acceptable,
+// NewForConfigAndClient will generate a rate-limiter in configShallowCopy.
+func NewForConfigAndClient(c *rest.Config, httpClient *http.Client) (*Clientset, error) {
+ configShallowCopy := *c
+ if configShallowCopy.RateLimiter == nil && configShallowCopy.QPS > 0 {
+ if configShallowCopy.Burst <= 0 {
+ return nil, fmt.Errorf("burst is required to be greater than 0 when RateLimiter is not set and QPS is set to greater than 0")
+ }
+ configShallowCopy.RateLimiter = flowcontrol.NewTokenBucketRateLimiter(configShallowCopy.QPS, configShallowCopy.Burst)
+ }
+
+ var cs Clientset
+ var err error
+ cs.nvidiaV1, err = nvidiav1.NewForConfigAndClient(&configShallowCopy, httpClient)
+ if err != nil {
+ return nil, err
+ }
+ cs.nvidiaV1alpha1, err = nvidiav1alpha1.NewForConfigAndClient(&configShallowCopy, httpClient)
+ if err != nil {
+ return nil, err
+ }
+
+ cs.DiscoveryClient, err = discovery.NewDiscoveryClientForConfigAndClient(&configShallowCopy, httpClient)
+ if err != nil {
+ return nil, err
+ }
+ return &cs, nil
+}
+
+// NewForConfigOrDie creates a new Clientset for the given config and
+// panics if there is an error in the config.
+func NewForConfigOrDie(c *rest.Config) *Clientset {
+ cs, err := NewForConfig(c)
+ if err != nil {
+ panic(err)
+ }
+ return cs
+}
+
+// New creates a new Clientset for the given RESTClient.
+func New(c rest.Interface) *Clientset {
+ var cs Clientset
+ cs.nvidiaV1 = nvidiav1.New(c)
+ cs.nvidiaV1alpha1 = nvidiav1alpha1.New(c)
+
+ cs.DiscoveryClient = discovery.NewDiscoveryClient(c)
+ return &cs
+}
diff --git a/api/versioned/fake/clientset_generated.go b/api/versioned/fake/clientset_generated.go
new file mode 100644
index 000000000..ed3bde5a5
--- /dev/null
+++ b/api/versioned/fake/clientset_generated.go
@@ -0,0 +1,96 @@
+/**
+# Copyright (c) NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+// Code generated by client-gen. DO NOT EDIT.
+
+package fake
+
+import (
+ clientset "github.com/NVIDIA/gpu-operator/api/versioned"
+ nvidiav1 "github.com/NVIDIA/gpu-operator/api/versioned/typed/nvidia/v1"
+ fakenvidiav1 "github.com/NVIDIA/gpu-operator/api/versioned/typed/nvidia/v1/fake"
+ nvidiav1alpha1 "github.com/NVIDIA/gpu-operator/api/versioned/typed/nvidia/v1alpha1"
+ fakenvidiav1alpha1 "github.com/NVIDIA/gpu-operator/api/versioned/typed/nvidia/v1alpha1/fake"
+ "k8s.io/apimachinery/pkg/runtime"
+ "k8s.io/apimachinery/pkg/watch"
+ "k8s.io/client-go/discovery"
+ fakediscovery "k8s.io/client-go/discovery/fake"
+ "k8s.io/client-go/testing"
+)
+
+// NewSimpleClientset returns a clientset that will respond with the provided objects.
+// It's backed by a very simple object tracker that processes creates, updates and deletions as-is,
+// without applying any field management, validations and/or defaults. It shouldn't be considered a replacement
+// for a real clientset and is mostly useful in simple unit tests.
+//
+// DEPRECATED: NewClientset replaces this with support for field management, which significantly improves
+// server side apply testing. NewClientset is only available when apply configurations are generated (e.g.
+// via --with-applyconfig).
+func NewSimpleClientset(objects ...runtime.Object) *Clientset {
+ o := testing.NewObjectTracker(scheme, codecs.UniversalDecoder())
+ for _, obj := range objects {
+ if err := o.Add(obj); err != nil {
+ panic(err)
+ }
+ }
+
+ cs := &Clientset{tracker: o}
+ cs.discovery = &fakediscovery.FakeDiscovery{Fake: &cs.Fake}
+ cs.AddReactor("*", "*", testing.ObjectReaction(o))
+ cs.AddWatchReactor("*", func(action testing.Action) (handled bool, ret watch.Interface, err error) {
+ gvr := action.GetResource()
+ ns := action.GetNamespace()
+ watch, err := o.Watch(gvr, ns)
+ if err != nil {
+ return false, nil, err
+ }
+ return true, watch, nil
+ })
+
+ return cs
+}
+
+// Clientset implements clientset.Interface. Meant to be embedded into a
+// struct to get a default implementation. This makes faking out just the method
+// you want to test easier.
+type Clientset struct {
+ testing.Fake
+ discovery *fakediscovery.FakeDiscovery
+ tracker testing.ObjectTracker
+}
+
+func (c *Clientset) Discovery() discovery.DiscoveryInterface {
+ return c.discovery
+}
+
+func (c *Clientset) Tracker() testing.ObjectTracker {
+ return c.tracker
+}
+
+var (
+ _ clientset.Interface = &Clientset{}
+ _ testing.FakeClient = &Clientset{}
+)
+
+// NvidiaV1 retrieves the NvidiaV1Client
+func (c *Clientset) NvidiaV1() nvidiav1.NvidiaV1Interface {
+ return &fakenvidiav1.FakeNvidiaV1{Fake: &c.Fake}
+}
+
+// NvidiaV1alpha1 retrieves the NvidiaV1alpha1Client
+func (c *Clientset) NvidiaV1alpha1() nvidiav1alpha1.NvidiaV1alpha1Interface {
+ return &fakenvidiav1alpha1.FakeNvidiaV1alpha1{Fake: &c.Fake}
+}
diff --git a/api/versioned/fake/doc.go b/api/versioned/fake/doc.go
new file mode 100644
index 000000000..75ffe4d8f
--- /dev/null
+++ b/api/versioned/fake/doc.go
@@ -0,0 +1,20 @@
+/**
+# Copyright (c) NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+// Code generated by client-gen. DO NOT EDIT.
+
+// This package has the automatically generated fake clientset.
+package fake
diff --git a/api/versioned/fake/register.go b/api/versioned/fake/register.go
new file mode 100644
index 000000000..d1afb9205
--- /dev/null
+++ b/api/versioned/fake/register.go
@@ -0,0 +1,58 @@
+/**
+# Copyright (c) NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+// Code generated by client-gen. DO NOT EDIT.
+
+package fake
+
+import (
+ nvidiav1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1"
+ nvidiav1alpha1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1alpha1"
+ v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+ runtime "k8s.io/apimachinery/pkg/runtime"
+ schema "k8s.io/apimachinery/pkg/runtime/schema"
+ serializer "k8s.io/apimachinery/pkg/runtime/serializer"
+ utilruntime "k8s.io/apimachinery/pkg/util/runtime"
+)
+
+var scheme = runtime.NewScheme()
+var codecs = serializer.NewCodecFactory(scheme)
+
+var localSchemeBuilder = runtime.SchemeBuilder{
+ nvidiav1.AddToScheme,
+ nvidiav1alpha1.AddToScheme,
+}
+
+// AddToScheme adds all types of this clientset into the given scheme. This allows composition
+// of clientsets, like in:
+//
+// import (
+// "k8s.io/client-go/kubernetes"
+// clientsetscheme "k8s.io/client-go/kubernetes/scheme"
+// aggregatorclientsetscheme "k8s.io/kube-aggregator/pkg/client/clientset_generated/clientset/scheme"
+// )
+//
+// kclientset, _ := kubernetes.NewForConfig(c)
+// _ = aggregatorclientsetscheme.AddToScheme(clientsetscheme.Scheme)
+//
+// After this, RawExtensions in Kubernetes types will serialize kube-aggregator types
+// correctly.
+var AddToScheme = localSchemeBuilder.AddToScheme
+
+func init() {
+ v1.AddToGroupVersion(scheme, schema.GroupVersion{Version: "v1"})
+ utilruntime.Must(AddToScheme(scheme))
+}
diff --git a/api/versioned/scheme/doc.go b/api/versioned/scheme/doc.go
new file mode 100644
index 000000000..161d7caf5
--- /dev/null
+++ b/api/versioned/scheme/doc.go
@@ -0,0 +1,20 @@
+/**
+# Copyright (c) NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+// Code generated by client-gen. DO NOT EDIT.
+
+// This package contains the scheme of the automatically generated clientset.
+package scheme
diff --git a/api/versioned/scheme/register.go b/api/versioned/scheme/register.go
new file mode 100644
index 000000000..52289fe8c
--- /dev/null
+++ b/api/versioned/scheme/register.go
@@ -0,0 +1,58 @@
+/**
+# Copyright (c) NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+// Code generated by client-gen. DO NOT EDIT.
+
+package scheme
+
+import (
+ nvidiav1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1"
+ nvidiav1alpha1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1alpha1"
+ v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+ runtime "k8s.io/apimachinery/pkg/runtime"
+ schema "k8s.io/apimachinery/pkg/runtime/schema"
+ serializer "k8s.io/apimachinery/pkg/runtime/serializer"
+ utilruntime "k8s.io/apimachinery/pkg/util/runtime"
+)
+
+var Scheme = runtime.NewScheme()
+var Codecs = serializer.NewCodecFactory(Scheme)
+var ParameterCodec = runtime.NewParameterCodec(Scheme)
+var localSchemeBuilder = runtime.SchemeBuilder{
+ nvidiav1.AddToScheme,
+ nvidiav1alpha1.AddToScheme,
+}
+
+// AddToScheme adds all types of this clientset into the given scheme. This allows composition
+// of clientsets, like in:
+//
+// import (
+// "k8s.io/client-go/kubernetes"
+// clientsetscheme "k8s.io/client-go/kubernetes/scheme"
+// aggregatorclientsetscheme "k8s.io/kube-aggregator/pkg/client/clientset_generated/clientset/scheme"
+// )
+//
+// kclientset, _ := kubernetes.NewForConfig(c)
+// _ = aggregatorclientsetscheme.AddToScheme(clientsetscheme.Scheme)
+//
+// After this, RawExtensions in Kubernetes types will serialize kube-aggregator types
+// correctly.
+var AddToScheme = localSchemeBuilder.AddToScheme
+
+func init() {
+ v1.AddToGroupVersion(Scheme, schema.GroupVersion{Version: "v1"})
+ utilruntime.Must(AddToScheme(Scheme))
+}
diff --git a/api/versioned/typed/nvidia/v1/clusterpolicy.go b/api/versioned/typed/nvidia/v1/clusterpolicy.go
new file mode 100644
index 000000000..29d5aa390
--- /dev/null
+++ b/api/versioned/typed/nvidia/v1/clusterpolicy.go
@@ -0,0 +1,69 @@
+/**
+# Copyright (c) NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+// Code generated by client-gen. DO NOT EDIT.
+
+package v1
+
+import (
+ "context"
+
+ v1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1"
+ scheme "github.com/NVIDIA/gpu-operator/api/versioned/scheme"
+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+ types "k8s.io/apimachinery/pkg/types"
+ watch "k8s.io/apimachinery/pkg/watch"
+ gentype "k8s.io/client-go/gentype"
+)
+
+// ClusterPoliciesGetter has a method to return a ClusterPolicyInterface.
+// A group's client should implement this interface.
+type ClusterPoliciesGetter interface {
+ ClusterPolicies() ClusterPolicyInterface
+}
+
+// ClusterPolicyInterface has methods to work with ClusterPolicy resources.
+type ClusterPolicyInterface interface {
+ Create(ctx context.Context, clusterPolicy *v1.ClusterPolicy, opts metav1.CreateOptions) (*v1.ClusterPolicy, error)
+ Update(ctx context.Context, clusterPolicy *v1.ClusterPolicy, opts metav1.UpdateOptions) (*v1.ClusterPolicy, error)
+ // Add a +genclient:noStatus comment above the type to avoid generating UpdateStatus().
+ UpdateStatus(ctx context.Context, clusterPolicy *v1.ClusterPolicy, opts metav1.UpdateOptions) (*v1.ClusterPolicy, error)
+ Delete(ctx context.Context, name string, opts metav1.DeleteOptions) error
+ DeleteCollection(ctx context.Context, opts metav1.DeleteOptions, listOpts metav1.ListOptions) error
+ Get(ctx context.Context, name string, opts metav1.GetOptions) (*v1.ClusterPolicy, error)
+ List(ctx context.Context, opts metav1.ListOptions) (*v1.ClusterPolicyList, error)
+ Watch(ctx context.Context, opts metav1.ListOptions) (watch.Interface, error)
+ Patch(ctx context.Context, name string, pt types.PatchType, data []byte, opts metav1.PatchOptions, subresources ...string) (result *v1.ClusterPolicy, err error)
+ ClusterPolicyExpansion
+}
+
+// clusterPolicies implements ClusterPolicyInterface
+type clusterPolicies struct {
+ *gentype.ClientWithList[*v1.ClusterPolicy, *v1.ClusterPolicyList]
+}
+
+// newClusterPolicies returns a ClusterPolicies
+func newClusterPolicies(c *NvidiaV1Client) *clusterPolicies {
+ return &clusterPolicies{
+ gentype.NewClientWithList[*v1.ClusterPolicy, *v1.ClusterPolicyList](
+ "clusterpolicies",
+ c.RESTClient(),
+ scheme.ParameterCodec,
+ "",
+ func() *v1.ClusterPolicy { return &v1.ClusterPolicy{} },
+ func() *v1.ClusterPolicyList { return &v1.ClusterPolicyList{} }),
+ }
+}
diff --git a/api/versioned/typed/nvidia/v1/doc.go b/api/versioned/typed/nvidia/v1/doc.go
new file mode 100644
index 000000000..fb431d1b5
--- /dev/null
+++ b/api/versioned/typed/nvidia/v1/doc.go
@@ -0,0 +1,20 @@
+/**
+# Copyright (c) NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+// Code generated by client-gen. DO NOT EDIT.
+
+// This package has the automatically generated typed clients.
+package v1
diff --git a/api/versioned/typed/nvidia/v1/fake/doc.go b/api/versioned/typed/nvidia/v1/fake/doc.go
new file mode 100644
index 000000000..a8f211f18
--- /dev/null
+++ b/api/versioned/typed/nvidia/v1/fake/doc.go
@@ -0,0 +1,20 @@
+/**
+# Copyright (c) NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+// Code generated by client-gen. DO NOT EDIT.
+
+// Package fake has the automatically generated clients.
+package fake
diff --git a/api/versioned/typed/nvidia/v1/fake/fake_clusterpolicy.go b/api/versioned/typed/nvidia/v1/fake/fake_clusterpolicy.go
new file mode 100644
index 000000000..e7bb37f19
--- /dev/null
+++ b/api/versioned/typed/nvidia/v1/fake/fake_clusterpolicy.go
@@ -0,0 +1,138 @@
+/**
+# Copyright (c) NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+// Code generated by client-gen. DO NOT EDIT.
+
+package fake
+
+import (
+ "context"
+
+ v1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1"
+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+ labels "k8s.io/apimachinery/pkg/labels"
+ types "k8s.io/apimachinery/pkg/types"
+ watch "k8s.io/apimachinery/pkg/watch"
+ testing "k8s.io/client-go/testing"
+)
+
+// FakeClusterPolicies implements ClusterPolicyInterface
+type FakeClusterPolicies struct {
+ Fake *FakeNvidiaV1
+}
+
+var clusterpoliciesResource = v1.SchemeGroupVersion.WithResource("clusterpolicies")
+
+var clusterpoliciesKind = v1.SchemeGroupVersion.WithKind("ClusterPolicy")
+
+// Get takes name of the clusterPolicy, and returns the corresponding clusterPolicy object, and an error if there is any.
+func (c *FakeClusterPolicies) Get(ctx context.Context, name string, options metav1.GetOptions) (result *v1.ClusterPolicy, err error) {
+ emptyResult := &v1.ClusterPolicy{}
+ obj, err := c.Fake.
+ Invokes(testing.NewRootGetActionWithOptions(clusterpoliciesResource, name, options), emptyResult)
+ if obj == nil {
+ return emptyResult, err
+ }
+ return obj.(*v1.ClusterPolicy), err
+}
+
+// List takes label and field selectors, and returns the list of ClusterPolicies that match those selectors.
+func (c *FakeClusterPolicies) List(ctx context.Context, opts metav1.ListOptions) (result *v1.ClusterPolicyList, err error) {
+ emptyResult := &v1.ClusterPolicyList{}
+ obj, err := c.Fake.
+ Invokes(testing.NewRootListActionWithOptions(clusterpoliciesResource, clusterpoliciesKind, opts), emptyResult)
+ if obj == nil {
+ return emptyResult, err
+ }
+
+ label, _, _ := testing.ExtractFromListOptions(opts)
+ if label == nil {
+ label = labels.Everything()
+ }
+ list := &v1.ClusterPolicyList{ListMeta: obj.(*v1.ClusterPolicyList).ListMeta}
+ for _, item := range obj.(*v1.ClusterPolicyList).Items {
+ if label.Matches(labels.Set(item.Labels)) {
+ list.Items = append(list.Items, item)
+ }
+ }
+ return list, err
+}
+
+// Watch returns a watch.Interface that watches the requested clusterPolicies.
+func (c *FakeClusterPolicies) Watch(ctx context.Context, opts metav1.ListOptions) (watch.Interface, error) {
+ return c.Fake.
+ InvokesWatch(testing.NewRootWatchActionWithOptions(clusterpoliciesResource, opts))
+}
+
+// Create takes the representation of a clusterPolicy and creates it. Returns the server's representation of the clusterPolicy, and an error, if there is any.
+func (c *FakeClusterPolicies) Create(ctx context.Context, clusterPolicy *v1.ClusterPolicy, opts metav1.CreateOptions) (result *v1.ClusterPolicy, err error) {
+ emptyResult := &v1.ClusterPolicy{}
+ obj, err := c.Fake.
+ Invokes(testing.NewRootCreateActionWithOptions(clusterpoliciesResource, clusterPolicy, opts), emptyResult)
+ if obj == nil {
+ return emptyResult, err
+ }
+ return obj.(*v1.ClusterPolicy), err
+}
+
+// Update takes the representation of a clusterPolicy and updates it. Returns the server's representation of the clusterPolicy, and an error, if there is any.
+func (c *FakeClusterPolicies) Update(ctx context.Context, clusterPolicy *v1.ClusterPolicy, opts metav1.UpdateOptions) (result *v1.ClusterPolicy, err error) {
+ emptyResult := &v1.ClusterPolicy{}
+ obj, err := c.Fake.
+ Invokes(testing.NewRootUpdateActionWithOptions(clusterpoliciesResource, clusterPolicy, opts), emptyResult)
+ if obj == nil {
+ return emptyResult, err
+ }
+ return obj.(*v1.ClusterPolicy), err
+}
+
+// UpdateStatus was generated because the type contains a Status member.
+// Add a +genclient:noStatus comment above the type to avoid generating UpdateStatus().
+func (c *FakeClusterPolicies) UpdateStatus(ctx context.Context, clusterPolicy *v1.ClusterPolicy, opts metav1.UpdateOptions) (result *v1.ClusterPolicy, err error) {
+ emptyResult := &v1.ClusterPolicy{}
+ obj, err := c.Fake.
+ Invokes(testing.NewRootUpdateSubresourceActionWithOptions(clusterpoliciesResource, "status", clusterPolicy, opts), emptyResult)
+ if obj == nil {
+ return emptyResult, err
+ }
+ return obj.(*v1.ClusterPolicy), err
+}
+
+// Delete takes name of the clusterPolicy and deletes it. Returns an error if one occurs.
+func (c *FakeClusterPolicies) Delete(ctx context.Context, name string, opts metav1.DeleteOptions) error {
+ _, err := c.Fake.
+ Invokes(testing.NewRootDeleteActionWithOptions(clusterpoliciesResource, name, opts), &v1.ClusterPolicy{})
+ return err
+}
+
+// DeleteCollection deletes a collection of objects.
+func (c *FakeClusterPolicies) DeleteCollection(ctx context.Context, opts metav1.DeleteOptions, listOpts metav1.ListOptions) error {
+ action := testing.NewRootDeleteCollectionActionWithOptions(clusterpoliciesResource, opts, listOpts)
+
+ _, err := c.Fake.Invokes(action, &v1.ClusterPolicyList{})
+ return err
+}
+
+// Patch applies the patch and returns the patched clusterPolicy.
+func (c *FakeClusterPolicies) Patch(ctx context.Context, name string, pt types.PatchType, data []byte, opts metav1.PatchOptions, subresources ...string) (result *v1.ClusterPolicy, err error) {
+ emptyResult := &v1.ClusterPolicy{}
+ obj, err := c.Fake.
+ Invokes(testing.NewRootPatchSubresourceActionWithOptions(clusterpoliciesResource, name, pt, data, opts, subresources...), emptyResult)
+ if obj == nil {
+ return emptyResult, err
+ }
+ return obj.(*v1.ClusterPolicy), err
+}
diff --git a/api/versioned/typed/nvidia/v1/fake/fake_nvidia_client.go b/api/versioned/typed/nvidia/v1/fake/fake_nvidia_client.go
new file mode 100644
index 000000000..aca78400a
--- /dev/null
+++ b/api/versioned/typed/nvidia/v1/fake/fake_nvidia_client.go
@@ -0,0 +1,40 @@
+/**
+# Copyright (c) NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+// Code generated by client-gen. DO NOT EDIT.
+
+package fake
+
+import (
+ v1 "github.com/NVIDIA/gpu-operator/api/versioned/typed/nvidia/v1"
+ rest "k8s.io/client-go/rest"
+ testing "k8s.io/client-go/testing"
+)
+
+type FakeNvidiaV1 struct {
+ *testing.Fake
+}
+
+func (c *FakeNvidiaV1) ClusterPolicies() v1.ClusterPolicyInterface {
+ return &FakeClusterPolicies{c}
+}
+
+// RESTClient returns a RESTClient that is used to communicate
+// with API server by this client implementation.
+func (c *FakeNvidiaV1) RESTClient() rest.Interface {
+ var ret *rest.RESTClient
+ return ret
+}
diff --git a/api/versioned/typed/nvidia/v1/generated_expansion.go b/api/versioned/typed/nvidia/v1/generated_expansion.go
new file mode 100644
index 000000000..9727986c3
--- /dev/null
+++ b/api/versioned/typed/nvidia/v1/generated_expansion.go
@@ -0,0 +1,21 @@
+/**
+# Copyright (c) NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+// Code generated by client-gen. DO NOT EDIT.
+
+package v1
+
+type ClusterPolicyExpansion interface{}
diff --git a/api/versioned/typed/nvidia/v1/nvidia_client.go b/api/versioned/typed/nvidia/v1/nvidia_client.go
new file mode 100644
index 000000000..15d235390
--- /dev/null
+++ b/api/versioned/typed/nvidia/v1/nvidia_client.go
@@ -0,0 +1,107 @@
+/**
+# Copyright (c) NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+// Code generated by client-gen. DO NOT EDIT.
+
+package v1
+
+import (
+ "net/http"
+
+ v1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1"
+ "github.com/NVIDIA/gpu-operator/api/versioned/scheme"
+ rest "k8s.io/client-go/rest"
+)
+
+type NvidiaV1Interface interface {
+ RESTClient() rest.Interface
+ ClusterPoliciesGetter
+}
+
+// NvidiaV1Client is used to interact with features provided by the nvidia group.
+type NvidiaV1Client struct {
+ restClient rest.Interface
+}
+
+func (c *NvidiaV1Client) ClusterPolicies() ClusterPolicyInterface {
+ return newClusterPolicies(c)
+}
+
+// NewForConfig creates a new NvidiaV1Client for the given config.
+// NewForConfig is equivalent to NewForConfigAndClient(c, httpClient),
+// where httpClient was generated with rest.HTTPClientFor(c).
+func NewForConfig(c *rest.Config) (*NvidiaV1Client, error) {
+ config := *c
+ if err := setConfigDefaults(&config); err != nil {
+ return nil, err
+ }
+ httpClient, err := rest.HTTPClientFor(&config)
+ if err != nil {
+ return nil, err
+ }
+ return NewForConfigAndClient(&config, httpClient)
+}
+
+// NewForConfigAndClient creates a new NvidiaV1Client for the given config and http client.
+// Note the http client provided takes precedence over the configured transport values.
+func NewForConfigAndClient(c *rest.Config, h *http.Client) (*NvidiaV1Client, error) {
+ config := *c
+ if err := setConfigDefaults(&config); err != nil {
+ return nil, err
+ }
+ client, err := rest.RESTClientForConfigAndClient(&config, h)
+ if err != nil {
+ return nil, err
+ }
+ return &NvidiaV1Client{client}, nil
+}
+
+// NewForConfigOrDie creates a new NvidiaV1Client for the given config and
+// panics if there is an error in the config.
+func NewForConfigOrDie(c *rest.Config) *NvidiaV1Client {
+ client, err := NewForConfig(c)
+ if err != nil {
+ panic(err)
+ }
+ return client
+}
+
+// New creates a new NvidiaV1Client for the given RESTClient.
+func New(c rest.Interface) *NvidiaV1Client {
+ return &NvidiaV1Client{c}
+}
+
+func setConfigDefaults(config *rest.Config) error {
+ gv := v1.SchemeGroupVersion
+ config.GroupVersion = &gv
+ config.APIPath = "/apis"
+ config.NegotiatedSerializer = scheme.Codecs.WithoutConversion()
+
+ if config.UserAgent == "" {
+ config.UserAgent = rest.DefaultKubernetesUserAgent()
+ }
+
+ return nil
+}
+
+// RESTClient returns a RESTClient that is used to communicate
+// with API server by this client implementation.
+func (c *NvidiaV1Client) RESTClient() rest.Interface {
+ if c == nil {
+ return nil
+ }
+ return c.restClient
+}
diff --git a/api/versioned/typed/nvidia/v1alpha1/doc.go b/api/versioned/typed/nvidia/v1alpha1/doc.go
new file mode 100644
index 000000000..917274fbc
--- /dev/null
+++ b/api/versioned/typed/nvidia/v1alpha1/doc.go
@@ -0,0 +1,20 @@
+/**
+# Copyright (c) NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+// Code generated by client-gen. DO NOT EDIT.
+
+// This package has the automatically generated typed clients.
+package v1alpha1
diff --git a/api/versioned/typed/nvidia/v1alpha1/fake/doc.go b/api/versioned/typed/nvidia/v1alpha1/fake/doc.go
new file mode 100644
index 000000000..a8f211f18
--- /dev/null
+++ b/api/versioned/typed/nvidia/v1alpha1/fake/doc.go
@@ -0,0 +1,20 @@
+/**
+# Copyright (c) NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+// Code generated by client-gen. DO NOT EDIT.
+
+// Package fake has the automatically generated clients.
+package fake
diff --git a/api/versioned/typed/nvidia/v1alpha1/fake/fake_nvidia_client.go b/api/versioned/typed/nvidia/v1alpha1/fake/fake_nvidia_client.go
new file mode 100644
index 000000000..05c071875
--- /dev/null
+++ b/api/versioned/typed/nvidia/v1alpha1/fake/fake_nvidia_client.go
@@ -0,0 +1,40 @@
+/**
+# Copyright (c) NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+// Code generated by client-gen. DO NOT EDIT.
+
+package fake
+
+import (
+ v1alpha1 "github.com/NVIDIA/gpu-operator/api/versioned/typed/nvidia/v1alpha1"
+ rest "k8s.io/client-go/rest"
+ testing "k8s.io/client-go/testing"
+)
+
+type FakeNvidiaV1alpha1 struct {
+ *testing.Fake
+}
+
+func (c *FakeNvidiaV1alpha1) NVIDIADrivers() v1alpha1.NVIDIADriverInterface {
+ return &FakeNVIDIADrivers{c}
+}
+
+// RESTClient returns a RESTClient that is used to communicate
+// with API server by this client implementation.
+func (c *FakeNvidiaV1alpha1) RESTClient() rest.Interface {
+ var ret *rest.RESTClient
+ return ret
+}
diff --git a/api/versioned/typed/nvidia/v1alpha1/fake/fake_nvidiadriver.go b/api/versioned/typed/nvidia/v1alpha1/fake/fake_nvidiadriver.go
new file mode 100644
index 000000000..ef5fd04e6
--- /dev/null
+++ b/api/versioned/typed/nvidia/v1alpha1/fake/fake_nvidiadriver.go
@@ -0,0 +1,138 @@
+/**
+# Copyright (c) NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+// Code generated by client-gen. DO NOT EDIT.
+
+package fake
+
+import (
+ "context"
+
+ v1alpha1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1alpha1"
+ v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+ labels "k8s.io/apimachinery/pkg/labels"
+ types "k8s.io/apimachinery/pkg/types"
+ watch "k8s.io/apimachinery/pkg/watch"
+ testing "k8s.io/client-go/testing"
+)
+
+// FakeNVIDIADrivers implements NVIDIADriverInterface
+type FakeNVIDIADrivers struct {
+ Fake *FakeNvidiaV1alpha1
+}
+
+var nvidiadriversResource = v1alpha1.SchemeGroupVersion.WithResource("nvidiadrivers")
+
+var nvidiadriversKind = v1alpha1.SchemeGroupVersion.WithKind("NVIDIADriver")
+
+// Get takes name of the nVIDIADriver, and returns the corresponding nVIDIADriver object, and an error if there is any.
+func (c *FakeNVIDIADrivers) Get(ctx context.Context, name string, options v1.GetOptions) (result *v1alpha1.NVIDIADriver, err error) {
+ emptyResult := &v1alpha1.NVIDIADriver{}
+ obj, err := c.Fake.
+ Invokes(testing.NewRootGetActionWithOptions(nvidiadriversResource, name, options), emptyResult)
+ if obj == nil {
+ return emptyResult, err
+ }
+ return obj.(*v1alpha1.NVIDIADriver), err
+}
+
+// List takes label and field selectors, and returns the list of NVIDIADrivers that match those selectors.
+func (c *FakeNVIDIADrivers) List(ctx context.Context, opts v1.ListOptions) (result *v1alpha1.NVIDIADriverList, err error) {
+ emptyResult := &v1alpha1.NVIDIADriverList{}
+ obj, err := c.Fake.
+ Invokes(testing.NewRootListActionWithOptions(nvidiadriversResource, nvidiadriversKind, opts), emptyResult)
+ if obj == nil {
+ return emptyResult, err
+ }
+
+ label, _, _ := testing.ExtractFromListOptions(opts)
+ if label == nil {
+ label = labels.Everything()
+ }
+ list := &v1alpha1.NVIDIADriverList{ListMeta: obj.(*v1alpha1.NVIDIADriverList).ListMeta}
+ for _, item := range obj.(*v1alpha1.NVIDIADriverList).Items {
+ if label.Matches(labels.Set(item.Labels)) {
+ list.Items = append(list.Items, item)
+ }
+ }
+ return list, err
+}
+
+// Watch returns a watch.Interface that watches the requested nVIDIADrivers.
+func (c *FakeNVIDIADrivers) Watch(ctx context.Context, opts v1.ListOptions) (watch.Interface, error) {
+ return c.Fake.
+ InvokesWatch(testing.NewRootWatchActionWithOptions(nvidiadriversResource, opts))
+}
+
+// Create takes the representation of a nVIDIADriver and creates it. Returns the server's representation of the nVIDIADriver, and an error, if there is any.
+func (c *FakeNVIDIADrivers) Create(ctx context.Context, nVIDIADriver *v1alpha1.NVIDIADriver, opts v1.CreateOptions) (result *v1alpha1.NVIDIADriver, err error) {
+ emptyResult := &v1alpha1.NVIDIADriver{}
+ obj, err := c.Fake.
+ Invokes(testing.NewRootCreateActionWithOptions(nvidiadriversResource, nVIDIADriver, opts), emptyResult)
+ if obj == nil {
+ return emptyResult, err
+ }
+ return obj.(*v1alpha1.NVIDIADriver), err
+}
+
+// Update takes the representation of a nVIDIADriver and updates it. Returns the server's representation of the nVIDIADriver, and an error, if there is any.
+func (c *FakeNVIDIADrivers) Update(ctx context.Context, nVIDIADriver *v1alpha1.NVIDIADriver, opts v1.UpdateOptions) (result *v1alpha1.NVIDIADriver, err error) {
+ emptyResult := &v1alpha1.NVIDIADriver{}
+ obj, err := c.Fake.
+ Invokes(testing.NewRootUpdateActionWithOptions(nvidiadriversResource, nVIDIADriver, opts), emptyResult)
+ if obj == nil {
+ return emptyResult, err
+ }
+ return obj.(*v1alpha1.NVIDIADriver), err
+}
+
+// UpdateStatus was generated because the type contains a Status member.
+// Add a +genclient:noStatus comment above the type to avoid generating UpdateStatus().
+func (c *FakeNVIDIADrivers) UpdateStatus(ctx context.Context, nVIDIADriver *v1alpha1.NVIDIADriver, opts v1.UpdateOptions) (result *v1alpha1.NVIDIADriver, err error) {
+ emptyResult := &v1alpha1.NVIDIADriver{}
+ obj, err := c.Fake.
+ Invokes(testing.NewRootUpdateSubresourceActionWithOptions(nvidiadriversResource, "status", nVIDIADriver, opts), emptyResult)
+ if obj == nil {
+ return emptyResult, err
+ }
+ return obj.(*v1alpha1.NVIDIADriver), err
+}
+
+// Delete takes name of the nVIDIADriver and deletes it. Returns an error if one occurs.
+func (c *FakeNVIDIADrivers) Delete(ctx context.Context, name string, opts v1.DeleteOptions) error {
+ _, err := c.Fake.
+ Invokes(testing.NewRootDeleteActionWithOptions(nvidiadriversResource, name, opts), &v1alpha1.NVIDIADriver{})
+ return err
+}
+
+// DeleteCollection deletes a collection of objects.
+func (c *FakeNVIDIADrivers) DeleteCollection(ctx context.Context, opts v1.DeleteOptions, listOpts v1.ListOptions) error {
+ action := testing.NewRootDeleteCollectionActionWithOptions(nvidiadriversResource, opts, listOpts)
+
+ _, err := c.Fake.Invokes(action, &v1alpha1.NVIDIADriverList{})
+ return err
+}
+
+// Patch applies the patch and returns the patched nVIDIADriver.
+func (c *FakeNVIDIADrivers) Patch(ctx context.Context, name string, pt types.PatchType, data []byte, opts v1.PatchOptions, subresources ...string) (result *v1alpha1.NVIDIADriver, err error) {
+ emptyResult := &v1alpha1.NVIDIADriver{}
+ obj, err := c.Fake.
+ Invokes(testing.NewRootPatchSubresourceActionWithOptions(nvidiadriversResource, name, pt, data, opts, subresources...), emptyResult)
+ if obj == nil {
+ return emptyResult, err
+ }
+ return obj.(*v1alpha1.NVIDIADriver), err
+}
diff --git a/api/versioned/typed/nvidia/v1alpha1/generated_expansion.go b/api/versioned/typed/nvidia/v1alpha1/generated_expansion.go
new file mode 100644
index 000000000..f5e236215
--- /dev/null
+++ b/api/versioned/typed/nvidia/v1alpha1/generated_expansion.go
@@ -0,0 +1,21 @@
+/**
+# Copyright (c) NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+// Code generated by client-gen. DO NOT EDIT.
+
+package v1alpha1
+
+type NVIDIADriverExpansion interface{}
diff --git a/api/versioned/typed/nvidia/v1alpha1/nvidia_client.go b/api/versioned/typed/nvidia/v1alpha1/nvidia_client.go
new file mode 100644
index 000000000..1fa172e04
--- /dev/null
+++ b/api/versioned/typed/nvidia/v1alpha1/nvidia_client.go
@@ -0,0 +1,107 @@
+/**
+# Copyright (c) NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+// Code generated by client-gen. DO NOT EDIT.
+
+package v1alpha1
+
+import (
+ "net/http"
+
+ v1alpha1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1alpha1"
+ "github.com/NVIDIA/gpu-operator/api/versioned/scheme"
+ rest "k8s.io/client-go/rest"
+)
+
+type NvidiaV1alpha1Interface interface {
+ RESTClient() rest.Interface
+ NVIDIADriversGetter
+}
+
+// NvidiaV1alpha1Client is used to interact with features provided by the nvidia group.
+type NvidiaV1alpha1Client struct {
+ restClient rest.Interface
+}
+
+func (c *NvidiaV1alpha1Client) NVIDIADrivers() NVIDIADriverInterface {
+ return newNVIDIADrivers(c)
+}
+
+// NewForConfig creates a new NvidiaV1alpha1Client for the given config.
+// NewForConfig is equivalent to NewForConfigAndClient(c, httpClient),
+// where httpClient was generated with rest.HTTPClientFor(c).
+func NewForConfig(c *rest.Config) (*NvidiaV1alpha1Client, error) {
+ config := *c
+ if err := setConfigDefaults(&config); err != nil {
+ return nil, err
+ }
+ httpClient, err := rest.HTTPClientFor(&config)
+ if err != nil {
+ return nil, err
+ }
+ return NewForConfigAndClient(&config, httpClient)
+}
+
+// NewForConfigAndClient creates a new NvidiaV1alpha1Client for the given config and http client.
+// Note the http client provided takes precedence over the configured transport values.
+func NewForConfigAndClient(c *rest.Config, h *http.Client) (*NvidiaV1alpha1Client, error) {
+ config := *c
+ if err := setConfigDefaults(&config); err != nil {
+ return nil, err
+ }
+ client, err := rest.RESTClientForConfigAndClient(&config, h)
+ if err != nil {
+ return nil, err
+ }
+ return &NvidiaV1alpha1Client{client}, nil
+}
+
+// NewForConfigOrDie creates a new NvidiaV1alpha1Client for the given config and
+// panics if there is an error in the config.
+func NewForConfigOrDie(c *rest.Config) *NvidiaV1alpha1Client {
+ client, err := NewForConfig(c)
+ if err != nil {
+ panic(err)
+ }
+ return client
+}
+
+// New creates a new NvidiaV1alpha1Client for the given RESTClient.
+func New(c rest.Interface) *NvidiaV1alpha1Client {
+ return &NvidiaV1alpha1Client{c}
+}
+
+func setConfigDefaults(config *rest.Config) error {
+ gv := v1alpha1.SchemeGroupVersion
+ config.GroupVersion = &gv
+ config.APIPath = "/apis"
+ config.NegotiatedSerializer = scheme.Codecs.WithoutConversion()
+
+ if config.UserAgent == "" {
+ config.UserAgent = rest.DefaultKubernetesUserAgent()
+ }
+
+ return nil
+}
+
+// RESTClient returns a RESTClient that is used to communicate
+// with API server by this client implementation.
+func (c *NvidiaV1alpha1Client) RESTClient() rest.Interface {
+ if c == nil {
+ return nil
+ }
+ return c.restClient
+}
diff --git a/api/versioned/typed/nvidia/v1alpha1/nvidiadriver.go b/api/versioned/typed/nvidia/v1alpha1/nvidiadriver.go
new file mode 100644
index 000000000..21ae61bf5
--- /dev/null
+++ b/api/versioned/typed/nvidia/v1alpha1/nvidiadriver.go
@@ -0,0 +1,69 @@
+/**
+# Copyright (c) NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+// Code generated by client-gen. DO NOT EDIT.
+
+package v1alpha1
+
+import (
+ "context"
+
+ v1alpha1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1alpha1"
+ scheme "github.com/NVIDIA/gpu-operator/api/versioned/scheme"
+ v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+ types "k8s.io/apimachinery/pkg/types"
+ watch "k8s.io/apimachinery/pkg/watch"
+ gentype "k8s.io/client-go/gentype"
+)
+
+// NVIDIADriversGetter has a method to return a NVIDIADriverInterface.
+// A group's client should implement this interface.
+type NVIDIADriversGetter interface {
+ NVIDIADrivers() NVIDIADriverInterface
+}
+
+// NVIDIADriverInterface has methods to work with NVIDIADriver resources.
+type NVIDIADriverInterface interface {
+ Create(ctx context.Context, nVIDIADriver *v1alpha1.NVIDIADriver, opts v1.CreateOptions) (*v1alpha1.NVIDIADriver, error)
+ Update(ctx context.Context, nVIDIADriver *v1alpha1.NVIDIADriver, opts v1.UpdateOptions) (*v1alpha1.NVIDIADriver, error)
+ // Add a +genclient:noStatus comment above the type to avoid generating UpdateStatus().
+ UpdateStatus(ctx context.Context, nVIDIADriver *v1alpha1.NVIDIADriver, opts v1.UpdateOptions) (*v1alpha1.NVIDIADriver, error)
+ Delete(ctx context.Context, name string, opts v1.DeleteOptions) error
+ DeleteCollection(ctx context.Context, opts v1.DeleteOptions, listOpts v1.ListOptions) error
+ Get(ctx context.Context, name string, opts v1.GetOptions) (*v1alpha1.NVIDIADriver, error)
+ List(ctx context.Context, opts v1.ListOptions) (*v1alpha1.NVIDIADriverList, error)
+ Watch(ctx context.Context, opts v1.ListOptions) (watch.Interface, error)
+ Patch(ctx context.Context, name string, pt types.PatchType, data []byte, opts v1.PatchOptions, subresources ...string) (result *v1alpha1.NVIDIADriver, err error)
+ NVIDIADriverExpansion
+}
+
+// nVIDIADrivers implements NVIDIADriverInterface
+type nVIDIADrivers struct {
+ *gentype.ClientWithList[*v1alpha1.NVIDIADriver, *v1alpha1.NVIDIADriverList]
+}
+
+// newNVIDIADrivers returns a NVIDIADrivers
+func newNVIDIADrivers(c *NvidiaV1alpha1Client) *nVIDIADrivers {
+ return &nVIDIADrivers{
+ gentype.NewClientWithList[*v1alpha1.NVIDIADriver, *v1alpha1.NVIDIADriverList](
+ "nvidiadrivers",
+ c.RESTClient(),
+ scheme.ParameterCodec,
+ "",
+ func() *v1alpha1.NVIDIADriver { return &v1alpha1.NVIDIADriver{} },
+ func() *v1alpha1.NVIDIADriverList { return &v1alpha1.NVIDIADriverList{} }),
+ }
+}
diff --git a/assets/gpu-feature-discovery/0200_role.yaml b/assets/gpu-feature-discovery/0200_role.yaml
index 52d1b606f..201042082 100644
--- a/assets/gpu-feature-discovery/0200_role.yaml
+++ b/assets/gpu-feature-discovery/0200_role.yaml
@@ -12,11 +12,3 @@ rules:
- use
resourceNames:
- privileged
-- apiGroups:
- - ""
- resources:
- - nodes
- verbs:
- - get
- - list
- - watch
diff --git a/assets/gpu-feature-discovery/0500_daemonset.yaml b/assets/gpu-feature-discovery/0500_daemonset.yaml
index 3bae575a7..40df1c193 100644
--- a/assets/gpu-feature-discovery/0500_daemonset.yaml
+++ b/assets/gpu-feature-discovery/0500_daemonset.yaml
@@ -33,9 +33,51 @@ spec:
securityContext:
privileged: true
volumeMounts:
- - name: run-nvidia
- mountPath: /run/nvidia
- mountPropagation: Bidirectional
+ - name: run-nvidia-validations
+ mountPath: /run/nvidia/validations
+ mountPropagation: HostToContainer
+ - name: gpu-feature-discovery-imex-init
+ image: "FILLED BY THE OPERATOR"
+ command: ["/bin/bash", "-c"]
+ args:
+ - |
+ until [[ -f /run/nvidia/validations/driver-ready ]]
+ do
+ echo "waiting for the driver validations to be ready..."
+ sleep 5
+ done
+ set -o allexport
+ cat /run/nvidia/validations/driver-ready
+ . /run/nvidia/validations/driver-ready
+
+ IMEX_NODES_CONFIG_FILE=/etc/nvidia-imex/nodes_config.cfg
+ if [[ -f /config/${IMEX_NODES_CONFIG_FILE} ]]; then
+ echo "Removing cached IMEX nodes config"
+ rm -f /config/${IMEX_NODES_CONFIG_FILE}
+ fi
+ if [[ ! -f ${DRIVER_ROOT_CTR_PATH}/${IMEX_NODES_CONFIG_FILE} ]]; then
+ echo "No IMEX nodes config path detected; Skipping"
+ exit 0
+ fi
+ echo "Copying IMEX nodes config"
+ mkdir -p $(dirname /config/${IMEX_NODES_CONFIG_FILE})
+ cp ${DRIVER_ROOT_CTR_PATH}/${IMEX_NODES_CONFIG_FILE} /config/${IMEX_NODES_CONFIG_FILE}
+ securityContext:
+ privileged: true
+ volumeMounts:
+ - name: config
+ mountPath: /config
+ - name: run-nvidia-validations
+ mountPath: /run/nvidia/validations
+ mountPropagation: HostToContainer
+ - name: host-root
+ mountPath: /host/etc
+ subPath: etc
+ readOnly: true
+ - name: driver-install-dir
+ mountPath: /driver-root/etc
+ subPath: etc
+ readOnly: true
- name: config-manager-init
image: "FILLED BY THE OPERATOR"
command: ["config-manager"]
@@ -62,6 +104,9 @@ spec:
value: ""
- name: PROCESS_TO_SIGNAL
value: ""
+ volumeMounts:
+ - name: config
+ mountPath: /config
containers:
- image: "FILLED BY THE OPERATOR"
name: gpu-feature-discovery
@@ -71,12 +116,22 @@ spec:
value: 60s
- name: GFD_FAIL_ON_INIT_ERROR
value: "true"
+ - name: NAMESPACE
+ valueFrom:
+ fieldRef:
+ fieldPath: metadata.namespace
+ - name: NODE_NAME
+ valueFrom:
+ fieldRef:
+ fieldPath: spec.nodeName
volumeMounts:
- name: output-dir
mountPath: "/etc/kubernetes/node-feature-discovery/features.d"
- name: dmi-info-dir
mountPath: "/sys/class/dmi/id"
readOnly: true
+ - name: config
+ mountPath: /config
securityContext:
privileged: true
- image: "FILLED BY THE OPERATOR"
@@ -106,7 +161,10 @@ spec:
- name: SIGNAL
value: "1" # SIGHUP
- name: PROCESS_TO_SIGNAL
- value: "/usr/bin/gpu-feature-discovery"
+ value: "gpu-feature-discovery"
+ volumeMounts:
+ - name: config
+ mountPath: /config
volumes:
- name: output-dir
hostPath:
@@ -114,7 +172,16 @@ spec:
- name: dmi-info-dir
hostPath:
path: "/sys/class/dmi/id"
- - name: run-nvidia
+ - name: run-nvidia-validations
+ hostPath:
+ path: "/run/nvidia/validations"
+ type: DirectoryOrCreate
+ - name: host-root
+ hostPath:
+ path: /
+ - name: driver-install-dir
hostPath:
- path: "/run/nvidia"
- type: Directory
+ path: /run/nvidia/driver
+ type: DirectoryOrCreate
+ - name: config
+ emptyDir: {}
diff --git a/assets/state-cc-manager/0200_role.yaml b/assets/state-cc-manager/0200_role.yaml
index 795b9c234..0afa4f919 100644
--- a/assets/state-cc-manager/0200_role.yaml
+++ b/assets/state-cc-manager/0200_role.yaml
@@ -12,3 +12,11 @@ rules:
- use
resourceNames:
- privileged
+- apiGroups:
+ - ""
+ resources:
+ - pods
+ verbs:
+ - get
+ - list
+ - watch
diff --git a/assets/state-cc-manager/0210_clusterrole.yaml b/assets/state-cc-manager/0210_clusterrole.yaml
index 6ee96a97c..f6c2b3e88 100644
--- a/assets/state-cc-manager/0210_clusterrole.yaml
+++ b/assets/state-cc-manager/0210_clusterrole.yaml
@@ -7,7 +7,6 @@ rules:
- ""
resources:
- nodes
- - pods
verbs:
- get
- list
diff --git a/assets/state-container-toolkit/0200_role.yaml b/assets/state-container-toolkit/0200_role.yaml
index ad8f93a7a..22b86bea9 100644
--- a/assets/state-container-toolkit/0200_role.yaml
+++ b/assets/state-container-toolkit/0200_role.yaml
@@ -12,3 +12,9 @@ rules:
- use
resourceNames:
- privileged
+- apiGroups:
+ - apps
+ resources:
+ - daemonsets
+ verbs:
+ - list
diff --git a/assets/state-container-toolkit/0400_configmap.yaml b/assets/state-container-toolkit/0400_configmap.yaml
index ada0872fe..cca4ee4e9 100644
--- a/assets/state-container-toolkit/0400_configmap.yaml
+++ b/assets/state-container-toolkit/0400_configmap.yaml
@@ -9,17 +9,15 @@ data:
entrypoint.sh: |-
#!/bin/bash
- set -e
+ until [[ -f /run/nvidia/validations/driver-ready ]]
+ do
+ echo "waiting for the driver validations to be ready..."
+ sleep 5
+ done
- driver_root=/run/nvidia/driver
- driver_root_ctr_path=$driver_root
- if [[ -f /run/nvidia/validations/host-driver-ready ]]; then
- driver_root=/
- driver_root_ctr_path=/host
- fi
-
- export NVIDIA_DRIVER_ROOT=$driver_root
- export DRIVER_ROOT_CTR_PATH=$driver_root_ctr_path
+ set -o allexport
+ cat /run/nvidia/validations/driver-ready
+ . /run/nvidia/validations/driver-ready
#
# The below delay is a workaround for an issue affecting some versions
diff --git a/assets/state-container-toolkit/0500_daemonset.yaml b/assets/state-container-toolkit/0500_daemonset.yaml
index 85f68869d..b10949460 100644
--- a/assets/state-container-toolkit/0500_daemonset.yaml
+++ b/assets/state-container-toolkit/0500_daemonset.yaml
@@ -36,12 +36,16 @@ spec:
value: "true"
- name: COMPONENT
value: driver
+ - name: OPERATOR_NAMESPACE
+ valueFrom:
+ fieldRef:
+ fieldPath: metadata.namespace
securityContext:
privileged: true
seLinuxOptions:
level: "s0"
volumeMounts:
- - name: driver-install-path
+ - name: driver-install-dir
mountPath: /run/nvidia/driver
mountPropagation: HostToContainer
- name: run-nvidia-validations
@@ -67,6 +71,8 @@ spec:
value: "management.nvidia.com/gpu"
- name: NVIDIA_VISIBLE_DEVICES
value: "void"
+ - name: TOOLKIT_PID_FILE
+ value: "/run/nvidia/toolkit/toolkit.pid"
imagePullPolicy: IfNotPresent
name: nvidia-container-toolkit-ctr
securityContext:
@@ -78,13 +84,17 @@ spec:
readOnly: true
mountPath: /bin/entrypoint.sh
subPath: entrypoint.sh
- - name: nvidia-run-path
- mountPath: /run/nvidia
- mountPropagation: Bidirectional
+ - name: toolkit-root
+ mountPath: /run/nvidia/toolkit
+ - name: run-nvidia-validations
+ mountPath: /run/nvidia/validations
- name: toolkit-install-dir
mountPath: /usr/local/nvidia
- name: crio-hooks
mountPath: /usr/share/containers/oci/hooks.d
+ - name: driver-install-dir
+ mountPath: /driver-root
+ mountPropagation: HostToContainer
- name: host-root
mountPath: /host
readOnly: true
@@ -96,17 +106,18 @@ spec:
configMap:
name: nvidia-container-toolkit-entrypoint
defaultMode: 448
- - name: nvidia-run-path
+ - name: toolkit-root
hostPath:
- path: /run/nvidia
+ path: /run/nvidia/toolkit
type: DirectoryOrCreate
- name: run-nvidia-validations
hostPath:
path: /run/nvidia/validations
type: DirectoryOrCreate
- - name: driver-install-path
+ - name: driver-install-dir
hostPath:
path: /run/nvidia/driver
+ type: DirectoryOrCreate
- name: host-root
hostPath:
path: /
diff --git a/assets/state-dcgm-exporter/0200_role.yaml b/assets/state-dcgm-exporter/0200_role.yaml
index 0e6d68768..f055a3b34 100644
--- a/assets/state-dcgm-exporter/0200_role.yaml
+++ b/assets/state-dcgm-exporter/0200_role.yaml
@@ -15,6 +15,7 @@ rules:
- apiGroups:
- ""
resources:
+ - configmaps
- pods
verbs:
- get
diff --git a/assets/state-dcgm/0400_dcgm.yml b/assets/state-dcgm/0400_dcgm.yml
index 3414e824f..14fea317a 100644
--- a/assets/state-dcgm/0400_dcgm.yml
+++ b/assets/state-dcgm/0400_dcgm.yml
@@ -35,7 +35,6 @@ spec:
- name: run-nvidia
mountPath: /run/nvidia
mountPropagation: HostToContainer
- hostNetwork: true
containers:
- image: "FILLED BY THE OPERATOR"
name: nvidia-dcgm-ctr
@@ -44,7 +43,6 @@ spec:
ports:
- name: "dcgm"
containerPort: 5555
- hostPort: 5555
volumes:
- name: run-nvidia
hostPath:
diff --git a/assets/state-dcgm/0500_service.yaml b/assets/state-dcgm/0500_service.yaml
new file mode 100644
index 000000000..5ea9ac10b
--- /dev/null
+++ b/assets/state-dcgm/0500_service.yaml
@@ -0,0 +1,16 @@
+apiVersion: v1
+kind: Service
+metadata:
+ labels:
+ app: nvidia-dcgm
+ name: nvidia-dcgm
+ namespace: "FILLED BY THE OPERATOR"
+spec:
+ internalTrafficPolicy: Local
+ ports:
+ - name: dcgm
+ port: 5555
+ protocol: TCP
+ selector:
+ app: nvidia-dcgm
+ type: ClusterIP
diff --git a/assets/state-device-plugin/0200_role.yaml b/assets/state-device-plugin/0200_role.yaml
index 8d9b6691a..e188d60b5 100644
--- a/assets/state-device-plugin/0200_role.yaml
+++ b/assets/state-device-plugin/0200_role.yaml
@@ -12,11 +12,3 @@ rules:
- use
resourceNames:
- privileged
-- apiGroups:
- - ""
- resources:
- - nodes
- verbs:
- - get
- - list
- - watch
diff --git a/assets/state-device-plugin/0400_configmap.yaml b/assets/state-device-plugin/0400_configmap.yaml
index 6bfba993b..651fd5421 100644
--- a/assets/state-device-plugin/0400_configmap.yaml
+++ b/assets/state-device-plugin/0400_configmap.yaml
@@ -9,28 +9,15 @@ data:
entrypoint.sh: |-
#!/bin/bash
- driver_root=""
- container_driver_root=""
- while true; do
- if [[ -f /run/nvidia/validations/host-driver-ready ]]; then
- driver_root=/
- container_driver_root=/host
- break
- elif [[ -f /run/nvidia/validations/driver-ready ]]; then
- driver_root=/run/nvidia/driver
- container_driver_root=$driver_root
- break
- else
- echo "waiting for the driver validations to be ready..."
- sleep 5
- fi
+ until [[ -f /run/nvidia/validations/driver-ready ]]
+ do
+ echo "waiting for the driver validations to be ready..."
+ sleep 5
done
-
- export NVIDIA_DRIVER_ROOT=$driver_root
- echo "NVIDIA_DRIVER_ROOT=$NVIDIA_DRIVER_ROOT"
-
- export CONTAINER_DRIVER_ROOT=$container_driver_root
- echo "CONTAINER_DRIVER_ROOT=$CONTAINER_DRIVER_ROOT"
+
+ set -o allexport
+ cat /run/nvidia/validations/driver-ready
+ . /run/nvidia/validations/driver-ready
echo "Starting nvidia-device-plugin"
exec nvidia-device-plugin
diff --git a/assets/state-device-plugin/0500_daemonset.yaml b/assets/state-device-plugin/0500_daemonset.yaml
index d82e590d2..e6a68bd16 100644
--- a/assets/state-device-plugin/0500_daemonset.yaml
+++ b/assets/state-device-plugin/0500_daemonset.yaml
@@ -32,8 +32,8 @@ spec:
securityContext:
privileged: true
volumeMounts:
- - name: run-nvidia
- mountPath: /run/nvidia
+ - name: run-nvidia-validations
+ mountPath: /run/nvidia/validations
mountPropagation: HostToContainer
- image: "FILLED BY THE OPERATOR"
name: config-manager-init
@@ -61,6 +61,9 @@ spec:
value: ""
- name: PROCESS_TO_SIGNAL
value: ""
+ volumeMounts:
+ - name: config
+ mountPath: /config
containers:
- image: "FILLED BY THE OPERATOR"
name: nvidia-device-plugin
@@ -91,8 +94,10 @@ spec:
subPath: entrypoint.sh
- name: device-plugin
mountPath: /var/lib/kubelet/device-plugins
- - name: run-nvidia
- mountPath: /run/nvidia
+ - name: run-nvidia-validations
+ mountPath: /run/nvidia/validations
+ - name: driver-install-dir
+ mountPath: /driver-root
mountPropagation: HostToContainer
- name: host-root
mountPath: /host
@@ -105,6 +110,8 @@ spec:
mountPath: /dev/shm
- name: mps-root
mountPath: /mps
+ - name: config
+ mountPath: /config
- image: "FILLED BY THE OPERATOR"
name: config-manager
command: ["config-manager"]
@@ -133,6 +140,9 @@ spec:
value: "1" # SIGHUP
- name: PROCESS_TO_SIGNAL
value: "nvidia-device-plugin"
+ volumeMounts:
+ - name: config
+ mountPath: /config
volumes:
- name: nvidia-device-plugin-entrypoint
configMap:
@@ -141,10 +151,14 @@ spec:
- name: device-plugin
hostPath:
path: /var/lib/kubelet/device-plugins
- - name: run-nvidia
+ - name: run-nvidia-validations
hostPath:
- path: "/run/nvidia"
- type: Directory
+ path: "/run/nvidia/validations"
+ type: DirectoryOrCreate
+ - name: driver-install-dir
+ hostPath:
+ path: "/run/nvidia/driver"
+ type: DirectoryOrCreate
- name: host-root
hostPath:
path: /
@@ -159,3 +173,5 @@ spec:
- name: mps-shm
hostPath:
path: /run/nvidia/mps/shm
+ - name: config
+ emptyDir: {}
diff --git a/assets/state-driver/0200_role.yaml b/assets/state-driver/0200_role.yaml
index 604c2a3df..a2cb330e6 100644
--- a/assets/state-driver/0200_role.yaml
+++ b/assets/state-driver/0200_role.yaml
@@ -12,20 +12,3 @@ rules:
- use
resourceNames:
- privileged
-- apiGroups:
- - ""
- resources:
- - pods
- - pods/eviction
- - nodes
- verbs:
- - '*'
-- apiGroups:
- - apps
- resources:
- - deployments
- - daemonsets
- - replicasets
- - statefulsets
- verbs:
- - '*'
diff --git a/assets/state-driver/0210_clusterrole.yaml b/assets/state-driver/0210_clusterrole.yaml
index 6bdb2c527..e62fbf316 100644
--- a/assets/state-driver/0210_clusterrole.yaml
+++ b/assets/state-driver/0210_clusterrole.yaml
@@ -35,9 +35,6 @@ rules:
- apiGroups:
- apps
resources:
- - deployments
- daemonsets
- - replicasets
- - statefulsets
verbs:
- - '*'
+ - get
diff --git a/assets/state-driver/0500_daemonset.yaml b/assets/state-driver/0500_daemonset.yaml
index 040043d43..13f83884a 100644
--- a/assets/state-driver/0500_daemonset.yaml
+++ b/assets/state-driver/0500_daemonset.yaml
@@ -94,6 +94,15 @@ spec:
name: nvidia-driver-ctr
command: ["nvidia-driver"]
args: ["init"]
+ env:
+ - name: NODE_NAME
+ valueFrom:
+ fieldRef:
+ fieldPath: spec.nodeName
+ - name: NODE_IP
+ valueFrom:
+ fieldRef:
+ fieldPath: status.hostIP
securityContext:
privileged: true
seLinuxOptions:
diff --git a/assets/state-mig-manager/0200_role.yaml b/assets/state-mig-manager/0200_role.yaml
index 0e9c291e8..5396cbeaa 100644
--- a/assets/state-mig-manager/0200_role.yaml
+++ b/assets/state-mig-manager/0200_role.yaml
@@ -16,6 +16,8 @@ rules:
- ""
resources:
- pods
- - nodes
verbs:
- - '*'
+ - get
+ - list
+ - watch
+ - delete
diff --git a/assets/state-mig-manager/0210_clusterrole.yaml b/assets/state-mig-manager/0210_clusterrole.yaml
index 37e115af9..2e9e9e8b9 100644
--- a/assets/state-mig-manager/0210_clusterrole.yaml
+++ b/assets/state-mig-manager/0210_clusterrole.yaml
@@ -8,4 +8,8 @@ rules:
resources:
- nodes
verbs:
- - '*'
+ - get
+ - list
+ - watch
+ - update
+ - patch
diff --git a/assets/state-mig-manager/0400_configmap.yaml b/assets/state-mig-manager/0400_configmap.yaml
index f81b66969..753aeb499 100644
--- a/assets/state-mig-manager/0400_configmap.yaml
+++ b/assets/state-mig-manager/0400_configmap.yaml
@@ -56,7 +56,7 @@ data:
# H100-80GB, H800-80GB, A100-80GB, A800-80GB, A100-40GB, A800-40GB
all-1g.10gb:
# H100-80GB, H800-80GB, A100-80GB, A800-80GB
- - device-filter: ["0x233010DE", "0x233110DE", "0x232210DE", "0x20B210DE", "0x20B510DE", "0x20F310DE", "0x20F510DE"]
+ - device-filter: ["0x233010DE", "0x233110DE", "0x232210DE", "0x20B210DE", "0x20B510DE", "0x20F310DE", "0x20F510DE", "0x232410DE"]
devices: all
mig-enabled: true
mig-devices:
@@ -201,20 +201,37 @@ data:
mig-devices:
"7g.96gb": 1
-
- # H100-96GB, GH200, H100 NVL, H800 NVL, H100-80GB, H800-80GB, A800-40GB, A800-80GB, A100-40GB, A100-80GB, A30-24GB, PG506-96GB
+ # GH200 144G HBM3e, H200-141GB, H200 NVL, H100-96GB, GH200, H100 NVL, H800 NVL, H100-80GB, H800-80GB, A800-40GB, A800-80GB, A100-40GB, A100-80GB, A30-24GB, PG506-96GB
all-balanced:
+ # GH200 144G HBM3e
+ - device-filter: ["0x234810DE"]
+ devices: all
+ mig-enabled: true
+ mig-devices:
+ "1g.18gb": 2
+ "2g.36gb": 1
+ "3g.72gb": 1
+
+ # H200 141GB, H200 NVL
+ - device-filter: ["0x233510DE", "0x233B10DE"]
+ devices: all
+ mig-enabled: true
+ mig-devices:
+ "1g.18gb": 2
+ "2g.35gb": 1
+ "3g.71gb": 1
+
# H100 NVL, H800 NVL
- device-filter: ["0x232110DE", "0x233A10DE"]
devices: all
mig-enabled: true
mig-devices:
- "1g.12gb": 1
+ "1g.12gb": 2
"2g.24gb": 1
"3g.47gb": 1
# H100-80GB, H800-80GB, A100-80GB, A800-80GB
- - device-filter: ["0x233010DE", "0x233110DE", "0x232210DE", "0x20B210DE", "0x20B510DE", "0x20F310DE", "0x20F510DE"]
+ - device-filter: ["0x233010DE", "0x233110DE", "0x232210DE", "0x20B210DE", "0x20B510DE", "0x20F310DE", "0x20F510DE", "0x232410DE"]
devices: all
mig-enabled: true
mig-devices:
@@ -239,11 +256,86 @@ data:
"1g.6gb": 2
"2g.12gb": 1
- # H100-96GB, PG506-96GB, GH200
- - device-filter: ["0x234210DE", "0x233D10DE", "0x20B610DE"]
+ # H100-96GB, PG506-96GB, GH200, H20
+ - device-filter: ["0x234210DE", "0x233D10DE", "0x20B610DE", "0x232910DE"]
devices: all
mig-enabled: true
mig-devices:
"1g.12gb": 2
"2g.24gb": 1
"3g.48gb": 1
+
+ # H200-141GB, GH200 144G HBM3e
+ all-1g.18gb:
+ - devices: all
+ mig-enabled: true
+ mig-devices:
+ "1g.18gb": 7
+
+ all-1g.18gb.me:
+ - devices: all
+ mig-enabled: true
+ mig-devices:
+ "1g.18gb+me": 1
+
+ # H200-141GB
+ all-1g.35gb:
+ - devices: all
+ mig-enabled: true
+ mig-devices:
+ "1g.35gb": 4
+
+ all-2g.35gb:
+ - devices: all
+ mig-enabled: true
+ mig-devices:
+ "2g.35gb": 3
+
+ all-3g.71gb:
+ - devices: all
+ mig-enabled: true
+ mig-devices:
+ "3g.71gb": 2
+
+ all-4g.71gb:
+ - devices: all
+ mig-enabled: true
+ mig-devices:
+ "4g.71gb": 1
+
+ all-7g.141gb:
+ - devices: all
+ mig-enabled: true
+ mig-devices:
+ "7g.141gb": 1
+
+ # GH200 144G HBM3e
+ all-1g.36gb:
+ - devices: all
+ mig-enabled: true
+ mig-devices:
+ "1g.36gb": 4
+
+ all-2g.36gb:
+ - devices: all
+ mig-enabled: true
+ mig-devices:
+ "2g.36gb": 3
+
+ all-3g.72gb:
+ - devices: all
+ mig-enabled: true
+ mig-devices:
+ "3g.72gb": 2
+
+ all-4g.72gb:
+ - devices: all
+ mig-enabled: true
+ mig-devices:
+ "4g.72gb": 1
+
+ all-7g.144gb:
+ - devices: all
+ mig-enabled: true
+ mig-devices:
+ "7g.144gb": 1
diff --git a/assets/state-mig-manager/0420_configmap.yaml b/assets/state-mig-manager/0420_configmap.yaml
index 5c9e9f1ab..7fbfc0d78 100644
--- a/assets/state-mig-manager/0420_configmap.yaml
+++ b/assets/state-mig-manager/0420_configmap.yaml
@@ -9,34 +9,19 @@ data:
entrypoint.sh: |-
#!/bin/bash
- host_driver=""
- driver_root=""
- driver_root_ctr_path=""
- while true; do
- if [[ -f /run/nvidia/validations/host-driver-ready ]]; then
- host_driver=true
- driver_root="/"
- driver_root_ctr_path="/host"
- break
- elif [[ -f /run/nvidia/validations/driver-ready ]]; then
- host_driver=false
- driver_root="/run/nvidia/driver"
- driver_root_ctr_path="/run/nvidia/driver"
- break
- else
- echo "waiting for the driver validations to be ready..."
- sleep 5
- fi
+ until [[ -f /run/nvidia/validations/driver-ready ]]
+ do
+ echo "waiting for the driver validations to be ready..."
+ sleep 5
done
-
- export WITH_SHUTDOWN_HOST_GPU_CLIENTS=$host_driver
+
+ set -o allexport
+ cat /run/nvidia/validations/driver-ready
+ . /run/nvidia/validations/driver-ready
+
+ # manually export additional envs required by mig-manager
+ export WITH_SHUTDOWN_HOST_GPU_CLIENTS=$IS_HOST_DRIVER
echo "WITH_SHUTDOWN_HOST_GPU_CLIENTS=$WITH_SHUTDOWN_HOST_GPU_CLIENTS"
- export DRIVER_ROOT=$driver_root
- echo "DRIVER_ROOT=$DRIVER_ROOT"
-
- export DRIVER_ROOT_CTR_PATH=$driver_root_ctr_path
- echo "DRIVER_ROOT_CTR_PATH=$DRIVER_ROOT_CTR_PATH"
-
echo "Starting nvidia-mig-manager"
exec nvidia-mig-manager
diff --git a/assets/state-mig-manager/0600_daemonset.yaml b/assets/state-mig-manager/0600_daemonset.yaml
index 2aadec4d5..e8676b27b 100644
--- a/assets/state-mig-manager/0600_daemonset.yaml
+++ b/assets/state-mig-manager/0600_daemonset.yaml
@@ -32,8 +32,8 @@ spec:
securityContext:
privileged: true
volumeMounts:
- - name: run-nvidia
- mountPath: /run/nvidia
+ - name: run-nvidia-validations
+ mountPath: /run/nvidia/validations
mountPropagation: HostToContainer
containers:
- name: nvidia-mig-manager
@@ -62,6 +62,8 @@ spec:
readOnly: true
mountPath: /bin/entrypoint.sh
subPath: entrypoint.sh
+ - name: run-nvidia-validations
+ mountPath: /run/nvidia/validations
- mountPath: /sys
name: host-sys
- mountPath: /mig-parted-config
@@ -71,8 +73,8 @@ spec:
mountPropagation: HostToContainer
- mountPath: /gpu-clients
name: gpu-clients
- - name: run-nvidia
- mountPath: /run/nvidia
+ - name: driver-install-dir
+ mountPath: /driver-root
mountPropagation: HostToContainer
- name: cdi-root
mountPath: /var/run/cdi
@@ -88,10 +90,14 @@ spec:
- name: mig-parted-config
configMap:
name: "FILLED_BY_OPERATOR"
- - name: run-nvidia
+ - name: run-nvidia-validations
hostPath:
- path: "/run/nvidia"
- type: Directory
+ path: "/run/nvidia/validations"
+ type: DirectoryOrCreate
+ - name: driver-install-dir
+ hostPath:
+ path: "/run/nvidia/driver"
+ type: DirectoryOrCreate
- name: host-root
hostPath:
path: "/"
diff --git a/assets/state-mps-control-daemon/0200_role.yaml b/assets/state-mps-control-daemon/0200_role.yaml
index 808c51e7f..1152135fe 100644
--- a/assets/state-mps-control-daemon/0200_role.yaml
+++ b/assets/state-mps-control-daemon/0200_role.yaml
@@ -12,12 +12,3 @@ rules:
- use
resourceNames:
- privileged
-- apiGroups:
- - ""
- resources:
- - nodes
- verbs:
- - get
- - list
- - watch
-
diff --git a/assets/state-mps-control-daemon/0400_daemonset.yaml b/assets/state-mps-control-daemon/0400_daemonset.yaml
index 16fe81301..097ce8ca9 100644
--- a/assets/state-mps-control-daemon/0400_daemonset.yaml
+++ b/assets/state-mps-control-daemon/0400_daemonset.yaml
@@ -25,6 +25,7 @@ spec:
effect: NoSchedule
priorityClassName: system-node-critical
serviceAccountName: nvidia-device-plugin
+ hostPID: true
initContainers:
- image: "FILLED BY THE OPERATOR"
name: toolkit-validation
@@ -71,6 +72,9 @@ spec:
value: ""
- name: PROCESS_TO_SIGNAL
value: ""
+ volumeMounts:
+ - name: config
+ mountPath: /config
containers:
- image: "FILLED BY OPERATOR"
name: mps-control-daemon-ctr
@@ -92,6 +96,8 @@ spec:
mountPath: /dev/shm
- name: mps-root
mountPath: /mps
+ - name: config
+ mountPath: /config
- image: "FILLED BY THE OPERATOR"
name: config-manager
command: ["config-manager"]
@@ -120,6 +126,9 @@ spec:
value: "1" # SIGHUP
- name: PROCESS_TO_SIGNAL
value: "/usr/bin/mps-control-daemon"
+ volumeMounts:
+ - name: config
+ mountPath: /config
volumes:
- name: run-nvidia
hostPath:
@@ -132,3 +141,5 @@ spec:
- name: mps-shm
hostPath:
path: /run/nvidia/mps/shm
+ - name: config
+ emptyDir: {}
diff --git a/assets/state-node-status-exporter/0200_role.yaml b/assets/state-node-status-exporter/0200_role.yaml
index 52476b451..d74b46a94 100644
--- a/assets/state-node-status-exporter/0200_role.yaml
+++ b/assets/state-node-status-exporter/0200_role.yaml
@@ -16,7 +16,14 @@ rules:
- ""
resources:
- pods
- - nodes
verbs:
- get
- list
+- apiGroups:
+ - apps
+ resources:
+ - daemonsets
+ verbs:
+ - get
+ - list
+ - watch
diff --git a/assets/state-node-status-exporter/0300_clusterrole.yaml b/assets/state-node-status-exporter/0300_clusterrole.yaml
index 245c84005..6f91fe237 100644
--- a/assets/state-node-status-exporter/0300_clusterrole.yaml
+++ b/assets/state-node-status-exporter/0300_clusterrole.yaml
@@ -8,4 +8,6 @@ rules:
resources:
- nodes
verbs:
- - '*'
+ - get
+ - list
+ - watch
diff --git a/assets/state-node-status-exporter/0700_daemonset.yaml b/assets/state-node-status-exporter/0700_daemonset.yaml
index 6ec4036bc..2d6d830ad 100644
--- a/assets/state-node-status-exporter/0700_daemonset.yaml
+++ b/assets/state-node-status-exporter/0700_daemonset.yaml
@@ -40,6 +40,10 @@ spec:
valueFrom:
fieldRef:
fieldPath: spec.nodeName
+ - name: OPERATOR_NAMESPACE
+ valueFrom:
+ fieldRef:
+ fieldPath: metadata.namespace
ports:
- name: node-status
containerPort: 8000
diff --git a/assets/state-operator-validation/0200_role.yaml b/assets/state-operator-validation/0200_role.yaml
index ef07efc03..5c464f4e7 100644
--- a/assets/state-operator-validation/0200_role.yaml
+++ b/assets/state-operator-validation/0200_role.yaml
@@ -16,19 +16,19 @@ rules:
- ""
resources:
- pods
- - nodes
verbs:
- - '*'
+ - create
+ - get
+ - list
+ - watch
+ - update
+ - patch
+ - delete
- apiGroups:
- apps
resources:
- - deployments
- daemonsets
verbs:
- - '*'
-- apiGroups:
- - nvidia.com
- resources:
- - clusterpolicies/finalizers
- verbs:
- - '*'
+ - get
+ - list
+ - watch
diff --git a/assets/state-operator-validation/0210_clusterrole.yaml b/assets/state-operator-validation/0210_clusterrole.yaml
index c7a13b95f..324fe47c7 100644
--- a/assets/state-operator-validation/0210_clusterrole.yaml
+++ b/assets/state-operator-validation/0210_clusterrole.yaml
@@ -8,10 +8,19 @@ rules:
resources:
- nodes
verbs:
- - '*'
+ - get
+ - list
+ - watch
- apiGroups:
- nvidia.com
resources:
- clusterpolicies/finalizers
+ - nvidiadrivers/finalizers
verbs:
- - '*'
+ - create
+ - get
+ - list
+ - watch
+ - update
+ - patch
+ - delete
diff --git a/assets/state-operator-validation/0500_daemonset.yaml b/assets/state-operator-validation/0500_daemonset.yaml
index e25f060bf..72a7d72a6 100644
--- a/assets/state-operator-validation/0500_daemonset.yaml
+++ b/assets/state-operator-validation/0500_daemonset.yaml
@@ -35,6 +35,10 @@ spec:
value: "true"
- name: COMPONENT
value: driver
+ - name: OPERATOR_NAMESPACE
+ valueFrom:
+ fieldRef:
+ fieldPath: metadata.namespace
securityContext:
privileged: true
seLinuxOptions:
@@ -44,7 +48,7 @@ spec:
mountPath: /host
readOnly: true
mountPropagation: HostToContainer
- - name: driver-install-path
+ - name: driver-install-dir
mountPath: /run/nvidia/driver
mountPropagation: HostToContainer
- name: run-nvidia-validations
@@ -160,7 +164,7 @@ spec:
hostPath:
path: /run/nvidia/validations
type: DirectoryOrCreate
- - name: driver-install-path
+ - name: driver-install-dir
hostPath:
path: /run/nvidia/driver
- name: host-root
diff --git a/assets/state-sandbox-device-plugin/0200_role.yaml b/assets/state-sandbox-device-plugin/0200_role.yaml
index 3e37487ce..2f5085e51 100644
--- a/assets/state-sandbox-device-plugin/0200_role.yaml
+++ b/assets/state-sandbox-device-plugin/0200_role.yaml
@@ -12,11 +12,3 @@ rules:
- use
resourceNames:
- privileged
-- apiGroups:
- - ""
- resources:
- - pods
- - pods/eviction
- - nodes
- verbs:
- - "get"
diff --git a/assets/state-sandbox-device-plugin/0500_daemonset.yaml b/assets/state-sandbox-device-plugin/0500_daemonset.yaml
index 6c9e2c24a..f99b6f075 100644
--- a/assets/state-sandbox-device-plugin/0500_daemonset.yaml
+++ b/assets/state-sandbox-device-plugin/0500_daemonset.yaml
@@ -67,6 +67,8 @@ spec:
volumeMounts:
- name: device-plugin
mountPath: /var/lib/kubelet/device-plugins
+ - name: vfio
+ mountPath: /dev/vfio
volumes:
- name: device-plugin
hostPath:
@@ -75,3 +77,6 @@ spec:
hostPath:
path: /run/nvidia/validations
type: DirectoryOrCreate
+ - name: vfio
+ hostPath:
+ path: /dev/vfio
diff --git a/assets/state-sandbox-validation/0200_role.yaml b/assets/state-sandbox-validation/0200_role.yaml
index d27405101..79da66ff7 100644
--- a/assets/state-sandbox-validation/0200_role.yaml
+++ b/assets/state-sandbox-validation/0200_role.yaml
@@ -12,9 +12,3 @@ rules:
- use
resourceNames:
- privileged
-- apiGroups:
- - ""
- resources:
- - nodes
- verbs:
- - get
diff --git a/assets/state-vfio-manager/0400_configmap.yaml b/assets/state-vfio-manager/0400_configmap.yaml
index bf333bb92..85df8f8ca 100644
--- a/assets/state-vfio-manager/0400_configmap.yaml
+++ b/assets/state-vfio-manager/0400_configmap.yaml
@@ -94,6 +94,12 @@ data:
echo "unbinding device $gpu"
unbind_from_driver $gpu
+ #for graphics mode, we need to unbind the auxiliary device as well
+ aux_dev=$(get_graphics_aux_dev "$gpu")
+ if [ "$aux_dev" != "NONE" ]; then
+ echo "gpu $gpu is in graphics mode aux_dev $aux_dev"
+ unbind_from_driver "$aux_dev"
+ fi
}
unbind_all() {
@@ -106,13 +112,9 @@ data:
done
}
- bind_device() {
+ bind_pci_device() {
local gpu=$1
- if ! is_nvidia_gpu_device $gpu; then
- return 0
- fi
-
if ! is_bound_to_vfio $gpu; then
unbind_from_other_driver $gpu
echo "binding device $gpu"
@@ -123,6 +125,48 @@ data:
fi
}
+ get_graphics_aux_dev() {
+ local gpu=$1
+ device_class_file=$(readlink -f "/sys/bus/pci/devices/$gpu/class")
+ device_class=$(cat "$device_class_file")
+ if [ "$device_class" != "0x030000" ]; then
+ echo "NONE"
+ return
+ fi
+
+ if ls "/sys/bus/pci/devices/$gpu" | grep consumer >& /dev/null; then
+ aux_dev=$(ls "/sys/bus/pci/devices/$gpu" | grep consumer | awk -Fconsumer:pci: '{print $2}')
+ if [ "$aux_dev" == "" ]; then
+ echo "NONE"
+ return
+ fi
+
+ if ls "/sys/bus/pci/devices/$aux_dev/" >& /dev/null; then
+ echo "$aux_dev"
+ return
+ fi
+ fi
+
+ echo "NONE"
+ }
+
+ bind_device() {
+ local gpu=$1
+
+ if ! is_nvidia_gpu_device $gpu; then
+ echo "device $gpu is not a gpu!"
+ return 0
+ fi
+
+ bind_pci_device "$gpu"
+ #for graphics mode, we need to bind the auxiliary device as well
+ aux_dev=$(get_graphics_aux_dev "$gpu")
+ if [ "$aux_dev" != "NONE" ]; then
+ echo "gpu $gpu is in graphics mode aux_dev $aux_dev"
+ bind_pci_device "$aux_dev"
+ fi
+ }
+
bind_all() {
for dev in /sys/bus/pci/devices/*; do
read vendor < $dev/vendor
diff --git a/assets/state-vgpu-device-manager/0200_role.yaml b/assets/state-vgpu-device-manager/0200_role.yaml
new file mode 100644
index 000000000..9b420d6f4
--- /dev/null
+++ b/assets/state-vgpu-device-manager/0200_role.yaml
@@ -0,0 +1,22 @@
+apiVersion: rbac.authorization.k8s.io/v1
+kind: Role
+metadata:
+ name: nvidia-vgpu-device-manager
+ namespace: "FILLED BY THE OPERATOR"
+rules:
+- apiGroups:
+ - security.openshift.io
+ resources:
+ - securitycontextconstraints
+ verbs:
+ - use
+ resourceNames:
+ - privileged
+- apiGroups:
+ - ""
+ resources:
+ - pods
+ verbs:
+ - get
+ - list
+ - watch
diff --git a/assets/state-vgpu-device-manager/0210_clusterrole.yaml b/assets/state-vgpu-device-manager/0210_clusterrole.yaml
index e3998da32..3d61f324b 100644
--- a/assets/state-vgpu-device-manager/0210_clusterrole.yaml
+++ b/assets/state-vgpu-device-manager/0210_clusterrole.yaml
@@ -14,8 +14,6 @@ rules:
- ""
resources:
- nodes
- - pods
- - pods/eviction
verbs:
- get
- list
diff --git a/assets/state-vgpu-device-manager/0300_rolebinding.yaml b/assets/state-vgpu-device-manager/0300_rolebinding.yaml
new file mode 100644
index 000000000..f50115a08
--- /dev/null
+++ b/assets/state-vgpu-device-manager/0300_rolebinding.yaml
@@ -0,0 +1,13 @@
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+ name: nvidia-vgpu-device-manager
+ namespace: "FILLED BY THE OPERATOR"
+roleRef:
+ apiGroup: rbac.authorization.k8s.io
+ kind: Role
+ name: nvidia-vgpu-device-manager
+subjects:
+- kind: ServiceAccount
+ name: nvidia-vgpu-device-manager
+ namespace: "FILLED BY THE OPERATOR"
diff --git a/assets/state-vgpu-device-manager/0500_configmap.yaml b/assets/state-vgpu-device-manager/0500_configmap.yaml
index 684bbef6b..8490bbece 100644
--- a/assets/state-vgpu-device-manager/0500_configmap.yaml
+++ b/assets/state-vgpu-device-manager/0500_configmap.yaml
@@ -39,46 +39,6 @@ data:
- devices: all
vgpu-devices:
A2-16Q: 1
- A10M-1Q:
- - devices: all
- vgpu-devices:
- A10M-1Q: 20
- A10M-2Q:
- - devices: all
- vgpu-devices:
- A10M-2Q: 10
- A10M-4C:
- - devices: all
- vgpu-devices:
- A10M-4C: 5
- A10M-4Q:
- - devices: all
- vgpu-devices:
- A10M-4Q: 5
- A10M-5C:
- - devices: all
- vgpu-devices:
- A10M-5C: 4
- A10M-5Q:
- - devices: all
- vgpu-devices:
- A10M-5Q: 4
- A10M-10C:
- - devices: all
- vgpu-devices:
- A10M-10C: 2
- A10M-10Q:
- - devices: all
- vgpu-devices:
- A10M-10Q: 2
- A10M-20C:
- - devices: all
- vgpu-devices:
- A10M-20C: 1
- A10M-20Q:
- - devices: all
- vgpu-devices:
- A10M-20Q: 1
A10-1Q:
- devices: all
vgpu-devices:
@@ -175,6 +135,10 @@ data:
- devices: all
vgpu-devices:
A30-2-12C: 2
+ A30-2-12CME:
+ - devices: all
+ vgpu-devices:
+ A30-2-12CME: 1
A30-4C:
- devices: all
vgpu-devices:
@@ -275,6 +239,10 @@ data:
- devices: all
vgpu-devices:
A100D-1-10CME: 1
+ A100D-1-20C:
+ - devices: all
+ vgpu-devices:
+ A100D-1-20C: 4
A100D-2-20C:
- devices: all
vgpu-devices:
@@ -327,6 +295,10 @@ data:
- devices: all
vgpu-devices:
A100DX-1-10CME: 1
+ A100DX-1-20C:
+ - devices: all
+ vgpu-devices:
+ A100DX-1-20C: 4
A100DX-2-20C:
- devices: all
vgpu-devices:
@@ -379,6 +351,10 @@ data:
- devices: all
vgpu-devices:
A100X-1-5CME: 1
+ A100X-1-10C:
+ - devices: all
+ vgpu-devices:
+ A100X-1-10C: 4
A100X-2-10C:
- devices: all
vgpu-devices:
@@ -427,6 +403,10 @@ data:
- devices: all
vgpu-devices:
A100-1-5CME: 1
+ A100-1-10C:
+ - devices: all
+ vgpu-devices:
+ A100-1-10C: 4
A100-2-10C:
- devices: all
vgpu-devices:
@@ -475,6 +455,10 @@ data:
- devices: all
vgpu-devices:
A800D-1-10CME: 1
+ A800D-1-20C:
+ - devices: all
+ vgpu-devices:
+ A800D-1-20C: 4
A800D-2-20C:
- devices: all
vgpu-devices:
@@ -527,6 +511,10 @@ data:
- devices: all
vgpu-devices:
A800DX-1-10CME: 1
+ A800DX-1-20C:
+ - devices: all
+ vgpu-devices:
+ A800DX-1-20C: 4
A800DX-2-20C:
- devices: all
vgpu-devices:
@@ -571,6 +559,374 @@ data:
- devices: all
vgpu-devices:
A800DX-80C: 1
+ A800-1-5C:
+ - devices: all
+ vgpu-devices:
+ A800-1-5C: 7
+ A800-1-5CME:
+ - devices: all
+ vgpu-devices:
+ A800-1-5CME: 1
+ A800-1-10C:
+ - devices: all
+ vgpu-devices:
+ A800-1-10C: 4
+ A800-2-10C:
+ - devices: all
+ vgpu-devices:
+ A800-2-10C: 3
+ A800-3-20C:
+ - devices: all
+ vgpu-devices:
+ A800-3-20C: 2
+ A800-4C:
+ - devices: all
+ vgpu-devices:
+ A800-4C: 10
+ A800-4-20C:
+ - devices: all
+ vgpu-devices:
+ A800-4-20C: 1
+ A800-5C:
+ - devices: all
+ vgpu-devices:
+ A800-5C: 8
+ A800-7-40C:
+ - devices: all
+ vgpu-devices:
+ A800-7-40C: 1
+ A800-8C:
+ - devices: all
+ vgpu-devices:
+ A800-8C: 5
+ A800-10C:
+ - devices: all
+ vgpu-devices:
+ A800-10C: 4
+ A800-20C:
+ - devices: all
+ vgpu-devices:
+ A800-20C: 2
+ A800-40C:
+ - devices: all
+ vgpu-devices:
+ A800-40C: 1
+ GH200-1-12C:
+ - devices: all
+ vgpu-devices:
+ GH200-1-12C: 7
+ GH200-1-12CME:
+ - devices: all
+ vgpu-devices:
+ GH200-1-12CME: 1
+ GH200-1-24C:
+ - devices: all
+ vgpu-devices:
+ GH200-1-24C: 4
+ GH200-2-24C:
+ - devices: all
+ vgpu-devices:
+ GH200-2-24C: 3
+ GH200-3-48C:
+ - devices: all
+ vgpu-devices:
+ GH200-3-48C: 2
+ GH200-4-48C:
+ - devices: all
+ vgpu-devices:
+ GH200-4-48C: 1
+ GH200-7-96C:
+ - devices: all
+ vgpu-devices:
+ GH200-7-96C: 1
+ GH200-96C:
+ - devices: all
+ vgpu-devices:
+ GH200-96C: 1
+ H20-1-12C:
+ - devices: all
+ vgpu-devices:
+ H20-1-12C: 7
+ H20-1-12CME:
+ - devices: all
+ vgpu-devices:
+ H20-1-12CME: 1
+ H20-1-24C:
+ - devices: all
+ vgpu-devices:
+ H20-1-24C: 4
+ H20-2-24C:
+ - devices: all
+ vgpu-devices:
+ H20-2-24C: 3
+ H20-3-48C:
+ - devices: all
+ vgpu-devices:
+ H20-3-48C: 2
+ H20-4C:
+ - devices: all
+ vgpu-devices:
+ H20-4C: 24
+ H20-4-48C:
+ - devices: all
+ vgpu-devices:
+ H20-4-48C: 1
+ H20-6C:
+ - devices: all
+ vgpu-devices:
+ H20-6C: 16
+ H20-7-96C:
+ - devices: all
+ vgpu-devices:
+ H20-7-96C: 1
+ H20-8C:
+ - devices: all
+ vgpu-devices:
+ H20-8C: 12
+ H20-12C:
+ - devices: all
+ vgpu-devices:
+ H20-12C: 8
+ H20-16C:
+ - devices: all
+ vgpu-devices:
+ H20-16C: 6
+ H20-24C:
+ - devices: all
+ vgpu-devices:
+ H20-24C: 4
+ H20-32C:
+ - devices: all
+ vgpu-devices:
+ H20-32C: 3
+ H20-48C:
+ - devices: all
+ vgpu-devices:
+ H20-48C: 2
+ H20-96C:
+ - devices: all
+ vgpu-devices:
+ H20-96C: 1
+ H100L-1-12C:
+ - devices: all
+ vgpu-devices:
+ H100L-1-12C: 7
+ H100L-1-12CME:
+ - devices: all
+ vgpu-devices:
+ H100L-1-12CME: 1
+ H100L-1-24C:
+ - devices: all
+ vgpu-devices:
+ H100L-1-24C: 4
+ H100L-2-24C:
+ - devices: all
+ vgpu-devices:
+ H100L-2-24C: 3
+ H100L-3-47C:
+ - devices: all
+ vgpu-devices:
+ H100L-3-47C: 2
+ H100L-4C:
+ - devices: all
+ vgpu-devices:
+ H100L-4C: 23
+ H100L-4-47C:
+ - devices: all
+ vgpu-devices:
+ H100L-4-47C: 1
+ H100L-6C:
+ - devices: all
+ vgpu-devices:
+ H100L-6C: 15
+ H100L-7-94C:
+ - devices: all
+ vgpu-devices:
+ H100L-7-94C: 1
+ H100L-11C:
+ - devices: all
+ vgpu-devices:
+ H100L-11C: 8
+ H100L-15C:
+ - devices: all
+ vgpu-devices:
+ H100L-15C: 6
+ H100L-23C:
+ - devices: all
+ vgpu-devices:
+ H100L-23C: 4
+ H100L-47C:
+ - devices: all
+ vgpu-devices:
+ H100L-47C: 2
+ H100L-94C:
+ - devices: all
+ vgpu-devices:
+ H100L-94C: 1
+ H100XL-1-12C:
+ - devices: all
+ vgpu-devices:
+ H100XL-1-12C: 7
+ H100XL-1-12CME:
+ - devices: all
+ vgpu-devices:
+ H100XL-1-12CME: 1
+ H100XL-1-24C:
+ - devices: all
+ vgpu-devices:
+ H100XL-1-24C: 4
+ H100XL-2-24C:
+ - devices: all
+ vgpu-devices:
+ H100XL-2-24C: 3
+ H100XL-3-47C:
+ - devices: all
+ vgpu-devices:
+ H100XL-3-47C: 2
+ H100XL-4C:
+ - devices: all
+ vgpu-devices:
+ H100XL-4C: 23
+ H100XL-4-47C:
+ - devices: all
+ vgpu-devices:
+ H100XL-4-47C: 1
+ H100XL-6C:
+ - devices: all
+ vgpu-devices:
+ H100XL-6C: 15
+ H100XL-7-94C:
+ - devices: all
+ vgpu-devices:
+ H100XL-7-94C: 1
+ H100XL-11C:
+ - devices: all
+ vgpu-devices:
+ H100XL-11C: 8
+ H100XL-15C:
+ - devices: all
+ vgpu-devices:
+ H100XL-15C: 6
+ H100XL-23C:
+ - devices: all
+ vgpu-devices:
+ H100XL-23C: 4
+ H100XL-47C:
+ - devices: all
+ vgpu-devices:
+ H100XL-47C: 2
+ H100XL-94C:
+ - devices: all
+ vgpu-devices:
+ H100XL-94C: 1
+ H100XM-1-10C:
+ - devices: all
+ vgpu-devices:
+ H100XM-1-10C: 7
+ H100XM-1-10CME:
+ - devices: all
+ vgpu-devices:
+ H100XM-1-10CME: 1
+ H100XM-1-20C:
+ - devices: all
+ vgpu-devices:
+ H100XM-1-20C: 4
+ H100XM-2-20C:
+ - devices: all
+ vgpu-devices:
+ H100XM-2-20C: 3
+ H100XM-3-40C:
+ - devices: all
+ vgpu-devices:
+ H100XM-3-40C: 2
+ H100XM-4C:
+ - devices: all
+ vgpu-devices:
+ H100XM-4C: 20
+ H100XM-4-40C:
+ - devices: all
+ vgpu-devices:
+ H100XM-4-40C: 1
+ H100XM-5C:
+ - devices: all
+ vgpu-devices:
+ H100XM-5C: 16
+ H100XM-7-80C:
+ - devices: all
+ vgpu-devices:
+ H100XM-7-80C: 1
+ H100XM-8C:
+ - devices: all
+ vgpu-devices:
+ H100XM-8C: 10
+ H100XM-10C:
+ - devices: all
+ vgpu-devices:
+ H100XM-10C: 8
+ H100XM-16C:
+ - devices: all
+ vgpu-devices:
+ H100XM-16C: 5
+ H100XM-20C:
+ - devices: all
+ vgpu-devices:
+ H100XM-20C: 4
+ H100XM-40C:
+ - devices: all
+ vgpu-devices:
+ H100XM-40C: 2
+ H100XM-80C:
+ - devices: all
+ vgpu-devices:
+ H100XM-80C: 1
+ H100XS-1-8C:
+ - devices: all
+ vgpu-devices:
+ H100XS-1-8C: 7
+ H100XS-1-8CME:
+ - devices: all
+ vgpu-devices:
+ H100XS-1-8CME: 1
+ H100XS-1-16C:
+ - devices: all
+ vgpu-devices:
+ H100XS-1-16C: 4
+ H100XS-2-16C:
+ - devices: all
+ vgpu-devices:
+ H100XS-2-16C: 3
+ H100XS-3-32C:
+ - devices: all
+ vgpu-devices:
+ H100XS-3-32C: 2
+ H100XS-4C:
+ - devices: all
+ vgpu-devices:
+ H100XS-4C: 16
+ H100XS-4-32C:
+ - devices: all
+ vgpu-devices:
+ H100XS-4-32C: 1
+ H100XS-7-64C:
+ - devices: all
+ vgpu-devices:
+ H100XS-7-64C: 1
+ H100XS-8C:
+ - devices: all
+ vgpu-devices:
+ H100XS-8C: 8
+ H100XS-16C:
+ - devices: all
+ vgpu-devices:
+ H100XS-16C: 4
+ H100XS-32C:
+ - devices: all
+ vgpu-devices:
+ H100XS-32C: 2
+ H100XS-64C:
+ - devices: all
+ vgpu-devices:
+ H100XS-64C: 1
H100-1-10C:
- devices: all
vgpu-devices:
@@ -579,6 +935,10 @@ data:
- devices: all
vgpu-devices:
H100-1-10CME: 1
+ H100-1-20C:
+ - devices: all
+ vgpu-devices:
+ H100-1-20C: 4
H100-2-20C:
- devices: all
vgpu-devices:
@@ -627,6 +987,122 @@ data:
- devices: all
vgpu-devices:
H100-80C: 1
+ H800L-1-12C:
+ - devices: all
+ vgpu-devices:
+ H800L-1-12C: 7
+ H800L-1-12CME:
+ - devices: all
+ vgpu-devices:
+ H800L-1-12CME: 1
+ H800L-1-24C:
+ - devices: all
+ vgpu-devices:
+ H800L-1-24C: 4
+ H800L-2-24C:
+ - devices: all
+ vgpu-devices:
+ H800L-2-24C: 3
+ H800L-3-47C:
+ - devices: all
+ vgpu-devices:
+ H800L-3-47C: 2
+ H800L-4C:
+ - devices: all
+ vgpu-devices:
+ H800L-4C: 23
+ H800L-4-47C:
+ - devices: all
+ vgpu-devices:
+ H800L-4-47C: 1
+ H800L-6C:
+ - devices: all
+ vgpu-devices:
+ H800L-6C: 15
+ H800L-7-94C:
+ - devices: all
+ vgpu-devices:
+ H800L-7-94C: 1
+ H800L-11C:
+ - devices: all
+ vgpu-devices:
+ H800L-11C: 8
+ H800L-15C:
+ - devices: all
+ vgpu-devices:
+ H800L-15C: 6
+ H800L-23C:
+ - devices: all
+ vgpu-devices:
+ H800L-23C: 4
+ H800L-47C:
+ - devices: all
+ vgpu-devices:
+ H800L-47C: 2
+ H800L-94C:
+ - devices: all
+ vgpu-devices:
+ H800L-94C: 1
+ H800XM-1-10C:
+ - devices: all
+ vgpu-devices:
+ H800XM-1-10C: 7
+ H800XM-1-10CME:
+ - devices: all
+ vgpu-devices:
+ H800XM-1-10CME: 1
+ H800XM-1-20C:
+ - devices: all
+ vgpu-devices:
+ H800XM-1-20C: 4
+ H800XM-2-20C:
+ - devices: all
+ vgpu-devices:
+ H800XM-2-20C: 3
+ H800XM-3-40C:
+ - devices: all
+ vgpu-devices:
+ H800XM-3-40C: 2
+ H800XM-4C:
+ - devices: all
+ vgpu-devices:
+ H800XM-4C: 20
+ H800XM-4-40C:
+ - devices: all
+ vgpu-devices:
+ H800XM-4-40C: 1
+ H800XM-5C:
+ - devices: all
+ vgpu-devices:
+ H800XM-5C: 16
+ H800XM-7-80C:
+ - devices: all
+ vgpu-devices:
+ H800XM-7-80C: 1
+ H800XM-8C:
+ - devices: all
+ vgpu-devices:
+ H800XM-8C: 10
+ H800XM-10C:
+ - devices: all
+ vgpu-devices:
+ H800XM-10C: 8
+ H800XM-16C:
+ - devices: all
+ vgpu-devices:
+ H800XM-16C: 5
+ H800XM-20C:
+ - devices: all
+ vgpu-devices:
+ H800XM-20C: 4
+ H800XM-40C:
+ - devices: all
+ vgpu-devices:
+ H800XM-40C: 2
+ H800XM-80C:
+ - devices: all
+ vgpu-devices:
+ H800XM-80C: 1
H800-1-10C:
- devices: all
vgpu-devices:
@@ -635,6 +1111,10 @@ data:
- devices: all
vgpu-devices:
H800-1-10CME: 1
+ H800-1-20C:
+ - devices: all
+ vgpu-devices:
+ H800-1-20C: 4
H800-2-20C:
- devices: all
vgpu-devices:
@@ -683,6 +1163,58 @@ data:
- devices: all
vgpu-devices:
H800-80C: 1
+ L2-1Q:
+ - devices: all
+ vgpu-devices:
+ L2-1Q: 24
+ L2-2Q:
+ - devices: all
+ vgpu-devices:
+ L2-2Q: 12
+ L2-3Q:
+ - devices: all
+ vgpu-devices:
+ L2-3Q: 8
+ L2-4C:
+ - devices: all
+ vgpu-devices:
+ L2-4C: 6
+ L2-4Q:
+ - devices: all
+ vgpu-devices:
+ L2-4Q: 6
+ L2-6C:
+ - devices: all
+ vgpu-devices:
+ L2-6C: 4
+ L2-6Q:
+ - devices: all
+ vgpu-devices:
+ L2-6Q: 4
+ L2-8C:
+ - devices: all
+ vgpu-devices:
+ L2-8C: 3
+ L2-8Q:
+ - devices: all
+ vgpu-devices:
+ L2-8Q: 3
+ L2-12C:
+ - devices: all
+ vgpu-devices:
+ L2-12C: 2
+ L2-12Q:
+ - devices: all
+ vgpu-devices:
+ L2-12Q: 2
+ L2-24C:
+ - devices: all
+ vgpu-devices:
+ L2-24C: 1
+ L2-24Q:
+ - devices: all
+ vgpu-devices:
+ L2-24Q: 1
L4-1Q:
- devices: all
vgpu-devices:
@@ -735,338 +1267,318 @@ data:
- devices: all
vgpu-devices:
L4-24Q: 1
- L40-1Q:
+ L20-1Q:
- devices: all
vgpu-devices:
- L40-1Q: 32
- L40-2Q:
+ L20-1Q: 32
+ L20-2Q:
- devices: all
vgpu-devices:
- L40-2Q: 24
- L40-3Q:
+ L20-2Q: 24
+ L20-3Q:
- devices: all
vgpu-devices:
- L40-3Q: 16
- L40-4C:
+ L20-3Q: 16
+ L20-4C:
- devices: all
vgpu-devices:
- L40-4C: 12
- L40-4Q:
+ L20-4C: 12
+ L20-4Q:
- devices: all
vgpu-devices:
- L40-4Q: 12
- L40-6C:
+ L20-4Q: 12
+ L20-6C:
- devices: all
vgpu-devices:
- L40-6C: 8
- L40-6Q:
+ L20-6C: 8
+ L20-6Q:
- devices: all
vgpu-devices:
- L40-6Q: 8
- L40-8C:
+ L20-6Q: 8
+ L20-8C:
- devices: all
vgpu-devices:
- L40-8C: 6
- L40-8Q:
+ L20-8C: 6
+ L20-8Q:
- devices: all
vgpu-devices:
- L40-8Q: 6
- L40-12C:
+ L20-8Q: 6
+ L20-12C:
- devices: all
vgpu-devices:
- L40-12C: 4
- L40-12Q:
+ L20-12C: 4
+ L20-12Q:
- devices: all
vgpu-devices:
- L40-12Q: 4
- L40-16C:
+ L20-12Q: 4
+ L20-16C:
- devices: all
vgpu-devices:
- L40-16C: 3
- L40-16Q:
+ L20-16C: 3
+ L20-16Q:
- devices: all
vgpu-devices:
- L40-16Q: 3
- L40-24C:
+ L20-16Q: 3
+ L20-24C:
- devices: all
vgpu-devices:
- L40-24C: 2
- L40-24Q:
+ L20-24C: 2
+ L20-24Q:
- devices: all
vgpu-devices:
- L40-24Q: 2
- L40-48C:
+ L20-24Q: 2
+ L20-48C:
- devices: all
vgpu-devices:
- L40-48C: 1
- L40-48Q:
+ L20-48C: 1
+ L20-48Q:
- devices: all
vgpu-devices:
- L40-48Q: 1
- M6-0Q:
+ L20-48Q: 1
+ L40S-1Q:
- devices: all
vgpu-devices:
- M6-0Q: 16
- M6-1Q:
+ L40S-1Q: 32
+ L40S-2Q:
- devices: all
vgpu-devices:
- M6-1Q: 8
- M6-2Q:
+ L40S-2Q: 24
+ L40S-3Q:
- devices: all
vgpu-devices:
- M6-2Q: 4
- M6-4Q:
+ L40S-3Q: 16
+ L40S-4C:
- devices: all
vgpu-devices:
- M6-4Q: 2
- M6-8Q:
+ L40S-4C: 12
+ L40S-4Q:
- devices: all
vgpu-devices:
- M6-8Q: 1
- M10-0Q:
+ L40S-4Q: 12
+ L40S-6C:
- devices: all
vgpu-devices:
- M10-0Q: 16
- M10-1Q:
+ L40S-6C: 8
+ L40S-6Q:
- devices: all
vgpu-devices:
- M10-1Q: 8
- M10-2Q:
+ L40S-6Q: 8
+ L40S-8C:
- devices: all
vgpu-devices:
- M10-2Q: 4
- M10-4Q:
+ L40S-8C: 6
+ L40S-8Q:
- devices: all
vgpu-devices:
- M10-4Q: 2
- M10-8Q:
+ L40S-8Q: 6
+ L40S-12C:
- devices: all
vgpu-devices:
- M10-8Q: 1
- M60-0Q:
+ L40S-12C: 4
+ L40S-12Q:
- devices: all
vgpu-devices:
- M60-0Q: 16
- M60-1Q:
+ L40S-12Q: 4
+ L40S-16C:
- devices: all
vgpu-devices:
- M60-1Q: 8
- M60-2Q:
+ L40S-16C: 3
+ L40S-16Q:
- devices: all
vgpu-devices:
- M60-2Q: 4
- M60-4Q:
+ L40S-16Q: 3
+ L40S-24C:
- devices: all
vgpu-devices:
- M60-4Q: 2
- M60-8Q:
+ L40S-24C: 2
+ L40S-24Q:
- devices: all
vgpu-devices:
- M60-8Q: 1
- P4-1Q:
+ L40S-24Q: 2
+ L40S-48C:
- devices: all
vgpu-devices:
- P4-1Q: 8
- P4-2Q:
+ L40S-48C: 1
+ L40S-48Q:
- devices: all
vgpu-devices:
- P4-2Q: 4
- P4-4C:
- - devices: all
- vgpu-devices:
- P4-4C: 2
- P4-4Q:
- - devices: all
- vgpu-devices:
- P4-4Q: 2
- P4-8C:
- - devices: all
- vgpu-devices:
- P4-8C: 1
- P4-8Q:
- - devices: all
- vgpu-devices:
- P4-8Q: 1
- P6-1Q:
- - devices: all
- vgpu-devices:
- P6-1Q: 16
- P6-2Q:
+ L40S-48Q: 1
+ L40-1Q:
- devices: all
vgpu-devices:
- P6-2Q: 8
- P6-4C:
+ L40-1Q: 32
+ L40-2Q:
- devices: all
vgpu-devices:
- P6-4C: 4
- P6-4Q:
+ L40-2Q: 24
+ L40-3Q:
- devices: all
vgpu-devices:
- P6-4Q: 4
- P6-8C:
+ L40-3Q: 16
+ L40-4C:
- devices: all
vgpu-devices:
- P6-8C: 2
- P6-8Q:
+ L40-4C: 12
+ L40-4Q:
- devices: all
vgpu-devices:
- P6-8Q: 2
- P6-16C:
+ L40-4Q: 12
+ L40-6C:
- devices: all
vgpu-devices:
- P6-16C: 1
- P6-16Q:
+ L40-6C: 8
+ L40-6Q:
- devices: all
vgpu-devices:
- P6-16Q: 1
- P40-1Q:
+ L40-6Q: 8
+ L40-8C:
- devices: all
vgpu-devices:
- P40-1Q: 24
- P40-2Q:
+ L40-8C: 6
+ L40-8Q:
- devices: all
vgpu-devices:
- P40-2Q: 12
- P40-3Q:
+ L40-8Q: 6
+ L40-12C:
- devices: all
vgpu-devices:
- P40-3Q: 8
- P40-4C:
+ L40-12C: 4
+ L40-12Q:
- devices: all
vgpu-devices:
- P40-4C: 6
- P40-4Q:
+ L40-12Q: 4
+ L40-16C:
- devices: all
vgpu-devices:
- P40-4Q: 6
- P40-6C:
+ L40-16C: 3
+ L40-16Q:
- devices: all
vgpu-devices:
- P40-6C: 4
- P40-6Q:
+ L40-16Q: 3
+ L40-24C:
- devices: all
vgpu-devices:
- P40-6Q: 4
- P40-8C:
+ L40-24C: 2
+ L40-24Q:
- devices: all
vgpu-devices:
- P40-8C: 3
- P40-8Q:
+ L40-24Q: 2
+ L40-48C:
- devices: all
vgpu-devices:
- P40-8Q: 3
- P40-12C:
+ L40-48C: 1
+ L40-48Q:
- devices: all
vgpu-devices:
- P40-12C: 2
- P40-12Q:
+ L40-48Q: 1
+ RTX5000-Ada-1Q:
- devices: all
vgpu-devices:
- P40-12Q: 2
- P40-24C:
+ RTX5000-Ada-1Q: 32
+ RTX5000-Ada-2Q:
- devices: all
vgpu-devices:
- P40-24C: 1
- P40-24Q:
+ RTX5000-Ada-2Q: 16
+ RTX5000-Ada-4C:
- devices: all
vgpu-devices:
- P40-24Q: 1
- P100C-1Q:
+ RTX5000-Ada-4C: 8
+ RTX5000-Ada-4Q:
- devices: all
vgpu-devices:
- P100C-1Q: 12
- P100C-2Q:
+ RTX5000-Ada-4Q: 8
+ RTX5000-Ada-8C:
- devices: all
vgpu-devices:
- P100C-2Q: 6
- P100C-4C:
+ RTX5000-Ada-8C: 4
+ RTX5000-Ada-8Q:
- devices: all
vgpu-devices:
- P100C-4C: 3
- P100C-4Q:
+ RTX5000-Ada-8Q: 4
+ RTX5000-Ada-16C:
- devices: all
vgpu-devices:
- P100C-4Q: 3
- P100C-6C:
+ RTX5000-Ada-16C: 2
+ RTX5000-Ada-16Q:
- devices: all
vgpu-devices:
- P100C-6C: 2
- P100C-6Q:
+ RTX5000-Ada-16Q: 2
+ RTX5000-Ada-32C:
- devices: all
vgpu-devices:
- P100C-6Q: 2
- P100C-12C:
+ RTX5000-Ada-32C: 1
+ RTX5000-Ada-32Q:
- devices: all
vgpu-devices:
- P100C-12C: 1
- P100C-12Q:
+ RTX5000-Ada-32Q: 1
+ RTX5880-Ada-1Q:
- devices: all
vgpu-devices:
- P100C-12Q: 1
- P100X-1Q:
+ RTX5880-Ada-1Q: 32
+ RTX5880-Ada-2Q:
- devices: all
vgpu-devices:
- P100X-1Q: 16
- P100X-2Q:
+ RTX5880-Ada-2Q: 24
+ RTX5880-Ada-3Q:
- devices: all
vgpu-devices:
- P100X-2Q: 8
- P100X-4C:
+ RTX5880-Ada-3Q: 16
+ RTX5880-Ada-4C:
- devices: all
vgpu-devices:
- P100X-4C: 4
- P100X-4Q:
+ RTX5880-Ada-4C: 12
+ RTX5880-Ada-4Q:
- devices: all
vgpu-devices:
- P100X-4Q: 4
- P100X-8C:
+ RTX5880-Ada-4Q: 12
+ RTX5880-Ada-6C:
- devices: all
vgpu-devices:
- P100X-8C: 2
- P100X-8Q:
+ RTX5880-Ada-6C: 8
+ RTX5880-Ada-6Q:
- devices: all
vgpu-devices:
- P100X-8Q: 2
- P100X-16C:
+ RTX5880-Ada-6Q: 8
+ RTX5880-Ada-8C:
- devices: all
vgpu-devices:
- P100X-16C: 1
- P100X-16Q:
+ RTX5880-Ada-8C: 6
+ RTX5880-Ada-8Q:
- devices: all
vgpu-devices:
- P100X-16Q: 1
- P100-1Q:
+ RTX5880-Ada-8Q: 6
+ RTX5880-Ada-12C:
- devices: all
vgpu-devices:
- P100-1Q: 16
- P100-2Q:
+ RTX5880-Ada-12C: 4
+ RTX5880-Ada-12Q:
- devices: all
vgpu-devices:
- P100-2Q: 8
- P100-4C:
+ RTX5880-Ada-12Q: 4
+ RTX5880-Ada-16C:
- devices: all
vgpu-devices:
- P100-4C: 4
- P100-4Q:
+ RTX5880-Ada-16C: 3
+ RTX5880-Ada-16Q:
- devices: all
vgpu-devices:
- P100-4Q: 4
- P100-8C:
+ RTX5880-Ada-16Q: 3
+ RTX5880-Ada-24C:
- devices: all
vgpu-devices:
- P100-8C: 2
- P100-8Q:
+ RTX5880-Ada-24C: 2
+ RTX5880-Ada-24Q:
- devices: all
vgpu-devices:
- P100-8Q: 2
- P100-16C:
+ RTX5880-Ada-24Q: 2
+ RTX5880-Ada-48C:
- devices: all
vgpu-devices:
- P100-16C: 1
- P100-16Q:
+ RTX5880-Ada-48C: 1
+ RTX5880-Ada-48Q:
- devices: all
vgpu-devices:
- P100-16Q: 1
+ RTX5880-Ada-48Q: 1
RTX6000P-1Q:
- devices: all
vgpu-devices:
@@ -1119,58 +1631,74 @@ data:
- devices: all
vgpu-devices:
RTX6000P-24Q: 1
- RTX6000-1Q:
+ RTX6000-Ada-1Q:
+ - devices: all
+ vgpu-devices:
+ RTX6000-Ada-1Q: 32
+ RTX6000-Ada-2Q:
- devices: all
vgpu-devices:
- RTX6000-1Q: 24
- RTX6000-2Q:
+ RTX6000-Ada-2Q: 24
+ RTX6000-Ada-3Q:
- devices: all
vgpu-devices:
- RTX6000-2Q: 12
- RTX6000-3Q:
+ RTX6000-Ada-3Q: 16
+ RTX6000-Ada-4C:
- devices: all
vgpu-devices:
- RTX6000-3Q: 8
- RTX6000-4C:
+ RTX6000-Ada-4C: 12
+ RTX6000-Ada-4Q:
- devices: all
vgpu-devices:
- RTX6000-4C: 6
- RTX6000-4Q:
+ RTX6000-Ada-4Q: 12
+ RTX6000-Ada-6C:
- devices: all
vgpu-devices:
- RTX6000-4Q: 6
- RTX6000-6C:
+ RTX6000-Ada-6C: 8
+ RTX6000-Ada-6Q:
- devices: all
vgpu-devices:
- RTX6000-6C: 4
- RTX6000-6Q:
+ RTX6000-Ada-6Q: 8
+ RTX6000-Ada-8C:
- devices: all
vgpu-devices:
- RTX6000-6Q: 4
- RTX6000-8C:
+ RTX6000-Ada-8C: 6
+ RTX6000-Ada-8Q:
- devices: all
vgpu-devices:
- RTX6000-8C: 3
- RTX6000-8Q:
+ RTX6000-Ada-8Q: 6
+ RTX6000-Ada-12C:
- devices: all
vgpu-devices:
- RTX6000-8Q: 3
- RTX6000-12C:
+ RTX6000-Ada-12C: 4
+ RTX6000-Ada-12Q:
- devices: all
vgpu-devices:
- RTX6000-12C: 2
- RTX6000-12Q:
+ RTX6000-Ada-12Q: 4
+ RTX6000-Ada-16C:
- devices: all
vgpu-devices:
- RTX6000-12Q: 2
- RTX6000-24C:
+ RTX6000-Ada-16C: 3
+ RTX6000-Ada-16Q:
- devices: all
vgpu-devices:
- RTX6000-24C: 1
- RTX6000-24Q:
+ RTX6000-Ada-16Q: 3
+ RTX6000-Ada-24C:
- devices: all
vgpu-devices:
- RTX6000-24Q: 1
+ RTX6000-Ada-24C: 2
+ RTX6000-Ada-24Q:
+ - devices: all
+ vgpu-devices:
+ RTX6000-Ada-24Q: 2
+ RTX6000-Ada-48C:
+ - devices: all
+ vgpu-devices:
+ RTX6000-Ada-48C: 1
+ RTX6000-Ada-48Q:
+ - devices: all
+ vgpu-devices:
+ RTX6000-Ada-48Q: 1
RTX8000P-1Q:
- devices: all
vgpu-devices:
@@ -1239,74 +1767,6 @@ data:
- devices: all
vgpu-devices:
RTX8000P-48Q: 1
- RTX8000-1Q:
- - devices: all
- vgpu-devices:
- RTX8000-1Q: 32
- RTX8000-2Q:
- - devices: all
- vgpu-devices:
- RTX8000-2Q: 24
- RTX8000-3Q:
- - devices: all
- vgpu-devices:
- RTX8000-3Q: 16
- RTX8000-4C:
- - devices: all
- vgpu-devices:
- RTX8000-4C: 8
- RTX8000-4Q:
- - devices: all
- vgpu-devices:
- RTX8000-4Q: 12
- RTX8000-6C:
- - devices: all
- vgpu-devices:
- RTX8000-6C: 8
- RTX8000-6Q:
- - devices: all
- vgpu-devices:
- RTX8000-6Q: 8
- RTX8000-8C:
- - devices: all
- vgpu-devices:
- RTX8000-8C: 6
- RTX8000-8Q:
- - devices: all
- vgpu-devices:
- RTX8000-8Q: 6
- RTX8000-12C:
- - devices: all
- vgpu-devices:
- RTX8000-12C: 4
- RTX8000-12Q:
- - devices: all
- vgpu-devices:
- RTX8000-12Q: 4
- RTX8000-16C:
- - devices: all
- vgpu-devices:
- RTX8000-16C: 3
- RTX8000-16Q:
- - devices: all
- vgpu-devices:
- RTX8000-16Q: 3
- RTX8000-24C:
- - devices: all
- vgpu-devices:
- RTX8000-24C: 2
- RTX8000-24Q:
- - devices: all
- vgpu-devices:
- RTX8000-24Q: 2
- RTX8000-48C:
- - devices: all
- vgpu-devices:
- RTX8000-48C: 1
- RTX8000-48Q:
- - devices: all
- vgpu-devices:
- RTX8000-48Q: 1
RTXA5000-1Q:
- devices: all
vgpu-devices:
@@ -1728,42 +2188,6 @@ data:
vgpu-devices:
V100-16Q: 1
default:
- - device-filter: "0x13BD10DE"
- devices: all
- vgpu-devices:
- M10-4Q: 2
- - device-filter: "0x13F210DE"
- devices: all
- vgpu-devices:
- M60-4Q: 2
- - device-filter: "0x13F310DE"
- devices: all
- vgpu-devices:
- M6-4Q: 2
- - device-filter: "0x15F710DE"
- devices: all
- vgpu-devices:
- P100C-6Q: 2
- - device-filter: "0x15F810DE"
- devices: all
- vgpu-devices:
- P100-8Q: 2
- - device-filter: "0x15F910DE"
- devices: all
- vgpu-devices:
- P100X-8Q: 2
- - device-filter: "0x1B3810DE"
- devices: all
- vgpu-devices:
- P40-12Q: 2
- - device-filter: "0x1BB310DE"
- devices: all
- vgpu-devices:
- P4-4Q: 2
- - device-filter: "0x1BB410DE"
- devices: all
- vgpu-devices:
- P6-8Q: 2
- device-filter: "0x1DB110DE"
devices: all
vgpu-devices:
@@ -1788,14 +2212,6 @@ data:
devices: all
vgpu-devices:
V100S-16Q: 2
- - device-filter: "0x1E3010DE"
- devices: all
- vgpu-devices:
- RTX8000-24Q: 2
- - device-filter: "0x1E3010DE"
- devices: all
- vgpu-devices:
- RTX6000-12Q: 2
- device-filter: "0x1E7810DE"
devices: all
vgpu-devices:
@@ -1832,6 +2248,10 @@ data:
devices: all
vgpu-devices:
A30-12C: 2
+ - device-filter: "0x20B710DE"
+ devices: all
+ vgpu-devices:
+ A30-12C: 2
- device-filter: "0x20B910DE"
devices: all
vgpu-devices:
@@ -1848,6 +2268,14 @@ data:
devices: all
vgpu-devices:
A800D-40C: 2
+ - device-filter: "0x20FD10DE"
+ devices: all
+ vgpu-devices:
+ A800D-40C: 2
+ - device-filter: "0x20F610DE"
+ devices: all
+ vgpu-devices:
+ A800-20C: 2
- device-filter: "0x223010DE"
devices: all
vgpu-devices:
@@ -1868,18 +2296,62 @@ data:
devices: all
vgpu-devices:
A10-12Q: 2
- - device-filter: "0x223810DE"
+ - device-filter: "0x232110DE"
devices: all
vgpu-devices:
- A10M-10Q: 2
+ H100L-47C: 2
- device-filter: "0x232210DE"
devices: all
vgpu-devices:
H800-40C: 2
+ - device-filter: "0x232410DE"
+ devices: all
+ vgpu-devices:
+ H800XM-40C: 2
+ - device-filter: "0x232410DE"
+ devices: all
+ vgpu-devices:
+ H800XM-40C: 2
+ - device-filter: "0x232910DE"
+ devices: all
+ vgpu-devices:
+ H20-48C: 2
+ - device-filter: "0x232910DE"
+ devices: all
+ vgpu-devices:
+ H20-48C: 2
+ - device-filter: "0x233010DE"
+ devices: all
+ vgpu-devices:
+ H100XM-40C: 2
+ - device-filter: "0x233010DE"
+ devices: all
+ vgpu-devices:
+ H100XM-40C: 2
+ - device-filter: "0x233610DE"
+ devices: all
+ vgpu-devices:
+ H100XM-40C: 2
- device-filter: "0x233110DE"
devices: all
vgpu-devices:
H100-40C: 2
+ - device-filter: "0x233710DE"
+ devices: all
+ vgpu-devices:
+ H100XS-32C: 2
+ - device-filter: "0x233910DE"
+ devices: all
+ vgpu-devices:
+ H100XL-47C: 2
+ - device-filter: "0x233A10DE"
+ devices: all
+ vgpu-devices:
+ H800L-47C: 2
+ - device-filter: "0x234210DE"
+ devices: all
+ vgpu-devices:
+ GH200-96C: 1
- device-filter: "0x25B610DE"
devices: all
vgpu-devices:
@@ -1888,10 +2360,38 @@ data:
devices: all
vgpu-devices:
A2-8Q: 2
+ - device-filter: "0x26B110DE"
+ devices: all
+ vgpu-devices:
+ RTX6000-Ada-24Q: 2
+ - device-filter: "0x26B210DE"
+ devices: all
+ vgpu-devices:
+ RTX5000-Ada-16Q: 2
+ - device-filter: "0x26B310DE"
+ devices: all
+ vgpu-devices:
+ RTX5880-Ada-24Q: 2
- device-filter: "0x26B510DE"
devices: all
vgpu-devices:
L40-24Q: 2
+ - device-filter: "0x26B910DE"
+ devices: all
+ vgpu-devices:
+ L40S-24Q: 2
+ - device-filter: "0x26BA10DE"
+ devices: all
+ vgpu-devices:
+ L20-24Q: 2
+ - device-filter: "0x26BA10DE"
+ devices: all
+ vgpu-devices:
+ L20-24Q: 2
+ - device-filter: "0x27B610DE"
+ devices: all
+ vgpu-devices:
+ L2-12Q: 2
- device-filter: "0x27B810DE"
devices: all
vgpu-devices:
diff --git a/assets/state-vgpu-manager/0200_role.yaml b/assets/state-vgpu-manager/0200_role.yaml
index 834cf6d15..5b6818ca0 100644
--- a/assets/state-vgpu-manager/0200_role.yaml
+++ b/assets/state-vgpu-manager/0200_role.yaml
@@ -12,11 +12,3 @@ rules:
- use
resourceNames:
- privileged
-- apiGroups:
- - ""
- resources:
- - pods
- - pods/eviction
- - nodes
- verbs:
- - "get"
diff --git a/assets/state-vgpu-manager/0210_clusterrole.yaml b/assets/state-vgpu-manager/0210_clusterrole.yaml
index e088f3d4a..549734150 100644
--- a/assets/state-vgpu-manager/0210_clusterrole.yaml
+++ b/assets/state-vgpu-manager/0210_clusterrole.yaml
@@ -14,9 +14,26 @@ rules:
- ""
resources:
- nodes
- - pods
verbs:
- get
- list
- patch
- watch
+- apiGroups:
+ - ""
+ resources:
+ - pods
+ - pods/eviction
+ verbs:
+ - create
+ - delete
+ - get
+ - list
+ - patch
+ - update
+- apiGroups:
+ - apps
+ resources:
+ - daemonsets
+ verbs:
+ - get
diff --git a/assets/state-vgpu-manager/0500_daemonset.yaml b/assets/state-vgpu-manager/0500_daemonset.yaml
index a46211b8c..5824c8810 100644
--- a/assets/state-vgpu-manager/0500_daemonset.yaml
+++ b/assets/state-vgpu-manager/0500_daemonset.yaml
@@ -84,6 +84,10 @@ spec:
mountPath: /var/log
- name: dev-log
mountPath: /dev/log
+ - name: firmware-search-path
+ mountPath: /sys/module/firmware_class/parameters/path
+ - name: nv-firmware
+ mountPath: /lib/firmware
# Only kept when OpenShift DriverToolkit side-car is enabled.
- image: "FILLED BY THE OPERATOR"
imagePullPolicy: IfNotPresent
@@ -114,6 +118,10 @@ spec:
mountPath: /var/log
- name: dev-log
mountPath: /dev/log
+ - name: firmware-search-path
+ mountPath: /sys/module/firmware_class/parameters/path
+ - name: nv-firmware
+ mountPath: /lib/firmware
volumes:
- name: run-nvidia
hostPath:
@@ -144,3 +152,10 @@ spec:
- name: dev-log
hostPath:
path: /dev/log
+ - name: firmware-search-path
+ hostPath:
+ path: /sys/module/firmware_class/parameters/path
+ - name: nv-firmware
+ hostPath:
+ path: /run/nvidia/driver/lib/firmware
+ type: DirectoryOrCreate
diff --git a/bundle/manifests/gpu-operator-certified.clusterserviceversion.yaml b/bundle/manifests/gpu-operator-certified.clusterserviceversion.yaml
index f99de06f5..4ab58349d 100644
--- a/bundle/manifests/gpu-operator-certified.clusterserviceversion.yaml
+++ b/bundle/manifests/gpu-operator-certified.clusterserviceversion.yaml
@@ -18,7 +18,7 @@ metadata:
features.operators.openshift.io/cnf: "false"
features.operators.openshift.io/cni: "false"
features.operators.openshift.io/csi: "false"
- olm.skipRange: '>=1.9.0 <23.9.2'
+ olm.skipRange: '>=1.9.0 <24.9.0'
alm-examples: |-
[
{
@@ -101,6 +101,9 @@ metadata:
"config": {
"name": "",
"default": ""
+ },
+ "mps": {
+ "root": "/run/nvidia/mps"
}
},
"gfd": {
@@ -158,7 +161,7 @@ metadata:
"driverType": "gpu",
"repository": "nvcr.io/nvidia",
"image": "driver",
- "version": "sha256:7481a3697783dcdca9ae78e7b548a6900e86ea33ab49ec14f0ba55db2fdb1a2e",
+ "version": "sha256:78e00fc8aa72f5f4925d54b4f9fbb725ca2168e890d90eadd6a497b7f31ccc18",
"nodeSelector": {},
"manager": {},
"repoConfig": {
@@ -186,51 +189,53 @@ metadata:
capabilities: Deep Insights
categories: AI/Machine Learning, OpenShift Optional
certified: "true"
- containerImage: registry.gitlab.com/nvidia/kubernetes/gpu-operator/staging/gpu-operator:master-latest-ubi8
+ containerImage: registry.gitlab.com/nvidia/kubernetes/gpu-operator/staging/gpu-operator:main-latest
createdAt: "Thu Jul 27 13:57:56 PDT 2023"
description: Automate the management and monitoring of NVIDIA GPUs.
provider: NVIDIA
repository: http://github.com/NVIDIA/gpu-operator
support: NVIDIA
- name: gpu-operator-certified.v23.9.2
+ name: gpu-operator-certified.v24.9.0
namespace: placeholder
spec:
apiservicedefinitions: {}
relatedImages:
- name: gpu-operator-image
- image: registry.gitlab.com/nvidia/kubernetes/gpu-operator/staging/gpu-operator:master-latest-ubi8
+ image: registry.gitlab.com/nvidia/kubernetes/gpu-operator/staging/gpu-operator:main-latest
- name: dcgm-exporter-image
- image: nvcr.io/nvidia/k8s/dcgm-exporter@sha256:011fb450af3fa2e8fe5d28d590e4c653631447bc23d149591ced3d89089c4f2c
+ image: nvcr.io/nvidia/k8s/dcgm-exporter@sha256:857f7669ae5ce2a43a3b5691fa970085098321ef8e90a896fa8dda3dee5b1d2b
- name: dcgm-image
- image: nvcr.io/nvidia/cloud-native/dcgm@sha256:6a05d6a1923fda756aed0dddf7ed23a83c30cf1e6c519fc39dd70c0309ec8257
+ image: nvcr.io/nvidia/cloud-native/dcgm@sha256:e8398b2451996e5c64fc3855e7a3c93b667a85f69c0e259049d3b0012c21545d
- name: container-toolkit-image
- image: nvcr.io/nvidia/k8s/container-toolkit@sha256:59a3875e7a37eb370385e654184efa3a1b193c9ea352165818496b19cbe14aa4
+ image: nvcr.io/nvidia/k8s/container-toolkit@sha256:f9d5652cba9ff27f9c4d17f5d09b4a5e4aa631b5cabb9cedf4d3e8c43d6847a4
- name: driver-image
- image: nvcr.io/nvidia/driver@sha256:7481a3697783dcdca9ae78e7b548a6900e86ea33ab49ec14f0ba55db2fdb1a2e
+ image: nvcr.io/nvidia/driver@sha256:78e00fc8aa72f5f4925d54b4f9fbb725ca2168e890d90eadd6a497b7f31ccc18
- name: driver-image-535
- image: nvcr.io/nvidia/driver@sha256:abda1ac56371d55917b96ff330109980f468e133c9d5705da0ef87429f14ccd7
- - name: driver-image-470
- image: nvcr.io/nvidia/driver@sha256:56c79482582cdfbc58d3134e8672637c5bf05f328880f76898f526143d04c6af
+ image: nvcr.io/nvidia/driver@sha256:a64d182b9d8f024ee747710475867f938386f130db8e61227e6955343e815258
+ - name: driver-image-560
+ image: nvcr.io/nvidia/driver@sha256:38b66a8d44cab9e2c62da9e101f32cd9dbcb5e02d8e57b47671284d374ca3695
+ - name: driver-image-565
+ image: nvcr.io/nvidia/driver@sha256:d55b57938866e538acc3a71ca32f8cf87e71c591abd4a34695ee428e7ec2fa73
- name: device-plugin-image
- image: nvcr.io/nvidia/k8s-device-plugin@sha256:2a1baf95eb414e6a451c7da2ac6c2992ff81ac95e407a4b254367f18ca9aa320
+ image: nvcr.io/nvidia/k8s-device-plugin@sha256:7089559ce6153018806857f5049085bae15b3bf6f1c8bd19d8b12f707d087dea
- name: gpu-feature-discovery-image
- image: nvcr.io/nvidia/k8s-device-plugin@sha256:2a1baf95eb414e6a451c7da2ac6c2992ff81ac95e407a4b254367f18ca9aa320
+ image: nvcr.io/nvidia/k8s-device-plugin@sha256:7089559ce6153018806857f5049085bae15b3bf6f1c8bd19d8b12f707d087dea
- name: mig-manager-image
- image: nvcr.io/nvidia/cloud-native/k8s-mig-manager@sha256:a67d8e92861a2dce5649105c07561e4422e9fe4ba81a6525dc0d70a7ef85f9c0
+ image: nvcr.io/nvidia/cloud-native/k8s-mig-manager@sha256:40830d3561c14743f484d45b498141f9e86b1308e16fae3978110783927264ab
- name: init-container-image
- image: nvcr.io/nvidia/cuda@sha256:714547d54e5fe4191019a1e5f1daffc7fab7481b619b79c378541dafc76c9e5d
+ image: nvcr.io/nvidia/cuda@sha256:748a2c5178e5c5811b66183bd0ce87d9fdccf992c0ad9b1a5076841e45533190
- name: gpu-operator-validator-image
- image: registry.gitlab.com/nvidia/kubernetes/gpu-operator/staging/gpu-operator-validator:master-latest-ubi8
+ image: registry.gitlab.com/nvidia/kubernetes/gpu-operator/staging/gpu-operator-validator:main-latest
- name: k8s-driver-manager-image
- image: nvcr.io/nvidia/cloud-native/k8s-driver-manager@sha256:27c44f4720a4abf780217bd5e7903e4a008ebdbcf71238c4f106a0c22654776c
+ image: nvcr.io/nvidia/cloud-native/k8s-driver-manager@sha256:b072c5793be65eee556eaff1b9cbbd115a1ef29982be95b2959adfcb4bc72382
- name: vfio-manager-image
- image: nvcr.io/nvidia/cuda@sha256:714547d54e5fe4191019a1e5f1daffc7fab7481b619b79c378541dafc76c9e5d
+ image: nvcr.io/nvidia/cuda@sha256:748a2c5178e5c5811b66183bd0ce87d9fdccf992c0ad9b1a5076841e45533190
- name: sandbox-device-plugin-image
- image: nvcr.io/nvidia/kubevirt-gpu-device-plugin@sha256:387021553dfb16aab633228d42f63f04fa932b4f46add07527f296dfe97e5148
+ image: nvcr.io/nvidia/kubevirt-gpu-device-plugin@sha256:4ffa1cd2a6497eb647a89ed259dcfb007554737b9d80f69bc173a2c3cd72a1da
- name: vgpu-device-manager-image
- image: nvcr.io/nvidia/cloud-native/vgpu-device-manager@sha256:925f4a47710e4318ed457930f5406174c1f6d28b1bf6b1bc310687fec0fde712
+ image: nvcr.io/nvidia/cloud-native/vgpu-device-manager@sha256:7edd7a0413dcb39b6e3bcefaf06812f3293c8e480ca10783e821a561ed686200
- name: gdrcopy-image
- image: nvcr.io/nvidia/cloud-native/gdrdrv@sha256:1ae0b923bc57f47bab046b50c50110f6914bbaffbfef704df34b3fe332db2e31
+ image: nvcr.io/nvidia/cloud-native/gdrdrv@sha256:cf39d78ced7fb5727a9668ee2cd44b14bb7a23a95b83d5464b7d755740e02121
customresourcedefinitions:
owned:
- name: nvidiadrivers.nvidia.com
@@ -584,7 +589,12 @@ spec:
- apiGroups:
- nvidia.com
resources:
- - '*'
+ - clusterpolicies
+ - clusterpolicies/finalizers
+ - clusterpolicies/status
+ - nvidiadrivers
+ - nvidiadrivers/finalizers
+ - nvidiadrivers/status
verbs:
- create
- delete
@@ -593,6 +603,7 @@ spec:
- patch
- update
- watch
+ - deletecollection
- apiGroups:
- config.openshift.io
resources:
@@ -607,7 +618,14 @@ spec:
resources:
- securitycontextconstraints
verbs:
- - '*'
+ - use
+ - create
+ - get
+ - list
+ - watch
+ - patch
+ - update
+ - delete
- apiGroups:
- security.openshift.io
resources:
@@ -617,105 +635,92 @@ spec:
resourceNames:
- hostmount-anyuid
- apiGroups:
- - rbac.authorization.k8s.io
+ - image.openshift.io
resources:
- - clusterroles
- - clusterrolebindings
- - roles
- - rolebindings
+ - imagestreams
verbs:
- - '*'
+ - get
+ - list
+ - watch
- apiGroups:
- - ""
+ - rbac.authorization.k8s.io
resources:
- - pods
- - pods/eviction
- - services
- - services/finalizers
- - endpoints
- - persistentvolumeclaims
- - events
- - configmaps
- - secrets
- - nodes
- - namespaces
- - serviceaccounts
+ - clusterroles
+ - clusterrolebindings
verbs:
- create
- - delete
- get
- list
- - patch
- - update
- watch
+ - update
+ - patch
+ - delete
- apiGroups:
- - apps
+ - ""
resources:
- - deployments
- - daemonsets
- - replicasets
- - statefulsets
+ - namespaces
verbs:
- - create
- - delete
- get
- list
- - patch
- - update
+ - create
- watch
+ - update
+ - patch
- apiGroups:
- - apps
+ - ""
resources:
- - controllerrevisions
+ - nodes
verbs:
- get
- list
- watch
+ - update
+ - patch
- apiGroups:
- - node.k8s.io
+ - ""
resources:
- - runtimeclasses
+ - events
+ - pods
+ - pods/eviction
verbs:
+ - create
- get
- list
- - create
- - update
- watch
+ - update
+ - patch
- delete
- apiGroups:
- - coordination.k8s.io
- resources:
- - leases
- verbs:
- - '*'
- - apiGroups:
- - monitoring.coreos.com
+ - apps
resources:
- - servicemonitors
- - prometheusrules
+ - daemonsets
verbs:
- get
- list
- - create
- watch
- - update
- - delete
- apiGroups:
- - image.openshift.io
+ - node.k8s.io
resources:
- - imagestreams
+ - runtimeclasses
verbs:
- get
- list
+ - create
+ - update
- watch
+ - delete
- apiGroups:
- apiextensions.k8s.io
resources:
- customresourcedefinitions
verbs:
+ - create
- get
- list
- watch
+ - update
+ - patch
+ - delete
permissions:
- serviceAccountName: gpu-operator
rules:
@@ -725,42 +730,13 @@ spec:
- roles
- rolebindings
verbs:
- - '*'
- - apiGroups:
- - ""
- resources:
- - pods
- - pods/eviction
- - services
- - services/finalizers
- - endpoints
- - persistentvolumeclaims
- - events
- - configmaps
- - secrets
- verbs:
- create
- - delete
- get
- list
- - patch
- - update
- watch
- - apiGroups:
- - apps
- resources:
- - deployments
- - daemonsets
- - replicasets
- - statefulsets
- verbs:
- - create
- - delete
- - get
- - list
- - patch
- update
- - watch
+ - patch
+ - delete
- apiGroups:
- apps
resources:
@@ -770,64 +746,58 @@ spec:
- list
- watch
- apiGroups:
- - monitoring.coreos.com
+ - apps
resources:
- - servicemonitors
- - prometheusrules
+ - daemonsets
verbs:
- - get
- create
+ - get
- list
- - update
- watch
- - delete
- - apiGroups:
- - apps
- resourceNames:
- - gpu-operator
- resources:
- - deployments/finalizers
- verbs:
- update
+ - patch
+ - delete
- apiGroups:
- ""
resources:
- - pods
- verbs:
- - get
- - apiGroups:
- - apps
- resources:
- - replicasets
- - deployments
- verbs:
- - get
- - apiGroups:
- - nvidia.com
- resources:
- - '*'
+ - configmaps
+ - endpoints
+ - secrets
+ - services
+ - services/finalizers
+ - serviceaccounts
verbs:
- create
- - delete
- get
- list
- - patch
- - update
- watch
+ - update
+ - patch
+ - delete
- apiGroups:
- coordination.k8s.io
resources:
- leases
verbs:
- - '*'
+ - create
+ - get
+ - list
+ - watch
+ - update
+ - patch
+ - delete
- apiGroups:
- - apiextensions.k8s.io
+ - monitoring.coreos.com
resources:
- - customresourcedefinitions
+ - servicemonitors
+ - prometheusrules
verbs:
- get
- list
+ - create
- watch
+ - update
+ - delete
deployments:
- name: gpu-operator
spec:
@@ -850,7 +820,7 @@ spec:
- --leader-elect
- --leader-lease-renew-deadline
- "60s"
- image: registry.gitlab.com/nvidia/kubernetes/gpu-operator/staging/gpu-operator:master-latest-ubi8
+ image: registry.gitlab.com/nvidia/kubernetes/gpu-operator/staging/gpu-operator:main-latest
command:
- gpu-operator
livenessProbe:
@@ -888,37 +858,39 @@ spec:
fieldRef:
fieldPath: metadata.namespace
- name: "VALIDATOR_IMAGE"
- value: "registry.gitlab.com/nvidia/kubernetes/gpu-operator/staging/gpu-operator-validator:master-latest-ubi8"
+ value: "registry.gitlab.com/nvidia/kubernetes/gpu-operator/staging/gpu-operator-validator:main-latest"
- name: "GFD_IMAGE"
- value: "nvcr.io/nvidia/k8s-device-plugin@sha256:2a1baf95eb414e6a451c7da2ac6c2992ff81ac95e407a4b254367f18ca9aa320"
+ value: "nvcr.io/nvidia/k8s-device-plugin@sha256:7089559ce6153018806857f5049085bae15b3bf6f1c8bd19d8b12f707d087dea"
- name: "CONTAINER_TOOLKIT_IMAGE"
- value: "nvcr.io/nvidia/k8s/container-toolkit@sha256:59a3875e7a37eb370385e654184efa3a1b193c9ea352165818496b19cbe14aa4"
+ value: "nvcr.io/nvidia/k8s/container-toolkit@sha256:f9d5652cba9ff27f9c4d17f5d09b4a5e4aa631b5cabb9cedf4d3e8c43d6847a4"
- name: "DCGM_IMAGE"
- value: "nvcr.io/nvidia/cloud-native/dcgm@sha256:6a05d6a1923fda756aed0dddf7ed23a83c30cf1e6c519fc39dd70c0309ec8257"
+ value: "nvcr.io/nvidia/cloud-native/dcgm@sha256:e8398b2451996e5c64fc3855e7a3c93b667a85f69c0e259049d3b0012c21545d"
- name: "DCGM_EXPORTER_IMAGE"
- value: "nvcr.io/nvidia/k8s/dcgm-exporter@sha256:011fb450af3fa2e8fe5d28d590e4c653631447bc23d149591ced3d89089c4f2c"
+ value: "nvcr.io/nvidia/k8s/dcgm-exporter@sha256:857f7669ae5ce2a43a3b5691fa970085098321ef8e90a896fa8dda3dee5b1d2b"
- name: "DEVICE_PLUGIN_IMAGE"
- value: "nvcr.io/nvidia/k8s-device-plugin@sha256:2a1baf95eb414e6a451c7da2ac6c2992ff81ac95e407a4b254367f18ca9aa320"
+ value: "nvcr.io/nvidia/k8s-device-plugin@sha256:7089559ce6153018806857f5049085bae15b3bf6f1c8bd19d8b12f707d087dea"
- name: "DRIVER_IMAGE"
- value: "nvcr.io/nvidia/driver@sha256:7481a3697783dcdca9ae78e7b548a6900e86ea33ab49ec14f0ba55db2fdb1a2e"
+ value: "nvcr.io/nvidia/driver@sha256:78e00fc8aa72f5f4925d54b4f9fbb725ca2168e890d90eadd6a497b7f31ccc18"
- name: "DRIVER_IMAGE-535"
- value: "nvcr.io/nvidia/driver@sha256:abda1ac56371d55917b96ff330109980f468e133c9d5705da0ef87429f14ccd7"
- - name: "DRIVER_IMAGE-470"
- value: "nvcr.io/nvidia/driver@sha256:56c79482582cdfbc58d3134e8672637c5bf05f328880f76898f526143d04c6af"
+ value: "nvcr.io/nvidia/driver@sha256:a64d182b9d8f024ee747710475867f938386f130db8e61227e6955343e815258"
+ - name: "DRIVER_IMAGE-560"
+ value: "nvcr.io/nvidia/driver@sha256:38b66a8d44cab9e2c62da9e101f32cd9dbcb5e02d8e57b47671284d374ca3695"
+ - name: "DRIVER_IMAGE-565"
+ value: "nvcr.io/nvidia/driver@sha256:d55b57938866e538acc3a71ca32f8cf87e71c591abd4a34695ee428e7ec2fa73"
- name: "DRIVER_MANAGER_IMAGE"
- value: "nvcr.io/nvidia/cloud-native/k8s-driver-manager@sha256:27c44f4720a4abf780217bd5e7903e4a008ebdbcf71238c4f106a0c22654776c"
+ value: "nvcr.io/nvidia/cloud-native/k8s-driver-manager@sha256:b072c5793be65eee556eaff1b9cbbd115a1ef29982be95b2959adfcb4bc72382"
- name: "MIG_MANAGER_IMAGE"
- value: "nvcr.io/nvidia/cloud-native/k8s-mig-manager@sha256:a67d8e92861a2dce5649105c07561e4422e9fe4ba81a6525dc0d70a7ef85f9c0"
+ value: "nvcr.io/nvidia/cloud-native/k8s-mig-manager@sha256:40830d3561c14743f484d45b498141f9e86b1308e16fae3978110783927264ab"
- name: "CUDA_BASE_IMAGE"
- value: "nvcr.io/nvidia/cuda@sha256:714547d54e5fe4191019a1e5f1daffc7fab7481b619b79c378541dafc76c9e5d"
+ value: "nvcr.io/nvidia/cuda@sha256:748a2c5178e5c5811b66183bd0ce87d9fdccf992c0ad9b1a5076841e45533190"
- name: "VFIO_MANAGER_IMAGE"
- value: "nvcr.io/nvidia/cuda@sha256:714547d54e5fe4191019a1e5f1daffc7fab7481b619b79c378541dafc76c9e5d"
+ value: "nvcr.io/nvidia/cuda@sha256:748a2c5178e5c5811b66183bd0ce87d9fdccf992c0ad9b1a5076841e45533190"
- name: "SANDBOX_DEVICE_PLUGIN_IMAGE"
- value: "nvcr.io/nvidia/kubevirt-gpu-device-plugin@sha256:387021553dfb16aab633228d42f63f04fa932b4f46add07527f296dfe97e5148"
+ value: "nvcr.io/nvidia/kubevirt-gpu-device-plugin@sha256:4ffa1cd2a6497eb647a89ed259dcfb007554737b9d80f69bc173a2c3cd72a1da"
- name: "VGPU_DEVICE_MANAGER_IMAGE"
- value: "nvcr.io/nvidia/cloud-native/vgpu-device-manager@sha256:925f4a47710e4318ed457930f5406174c1f6d28b1bf6b1bc310687fec0fde712"
+ value: "nvcr.io/nvidia/cloud-native/vgpu-device-manager@sha256:7edd7a0413dcb39b6e3bcefaf06812f3293c8e480ca10783e821a561ed686200"
- name: "GDRCOPY_IMAGE"
- value: "nvcr.io/nvidia/cloud-native/gdrdrv@sha256:1ae0b923bc57f47bab046b50c50110f6914bbaffbfef704df34b3fe332db2e31"
+ value: "nvcr.io/nvidia/cloud-native/gdrdrv@sha256:cf39d78ced7fb5727a9668ee2cd44b14bb7a23a95b83d5464b7d755740e02121"
terminationGracePeriodSeconds: 10
volumes:
- hostPath:
@@ -949,5 +921,5 @@ spec:
maturity: stable
provider:
name: NVIDIA Corporation
- version: 23.9.2
- replaces: gpu-operator-certified.v23.9.1
+ version: 24.9.0
+ replaces: gpu-operator-certified.v24.6.2
diff --git a/bundle/manifests/nvidia.com_clusterpolicies.yaml b/bundle/manifests/nvidia.com_clusterpolicies.yaml
index 16e35bf4b..54e4a652b 100644
--- a/bundle/manifests/nvidia.com_clusterpolicies.yaml
+++ b/bundle/manifests/nvidia.com_clusterpolicies.yaml
@@ -3,7 +3,7 @@ apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
- controller-gen.kubebuilder.io/version: v0.14.0
+ controller-gen.kubebuilder.io/version: v0.16.5
name: clusterpolicies.nvidia.com
spec:
group: nvidia.com
@@ -248,8 +248,8 @@ spec:
type: object
type: array
hostPort:
- description: 'HostPort represents host port that needs to be bound
- for DCGM engine (Default: 5555)'
+ description: 'Deprecated: HostPort represents host port that needs
+ to be bound for DCGM engine (Default: 5555)'
format: int32
type: integer
image:
@@ -410,15 +410,20 @@ spec:
sets for NVIDIA DCGM Exporter
items:
description: |-
- RelabelConfig allows dynamic rewriting of the label set, being applied to samples before ingestion.
- It defines ``-section of Prometheus configuration.
- More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs
+ RelabelConfig allows dynamic rewriting of the label set for targets, alerts,
+ scraped samples and remote write samples.
+
+ More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config
properties:
action:
default: replace
description: |-
- Action to perform based on regex matching. Default is 'replace'.
- uppercase and lowercase actions require Prometheus >= 2.36.
+ Action to perform based on the regex matching.
+
+ `Uppercase` and `Lowercase` actions require Prometheus >= v2.36.0.
+ `DropEqual` and `KeepEqual` actions require Prometheus >= v2.41.0.
+
+ Default: "Replace"
enum:
- replace
- Replace
@@ -444,39 +449,47 @@ spec:
- DropEqual
type: string
modulus:
- description: Modulus to take of the hash of the source
- label values.
+ description: |-
+ Modulus to take of the hash of the source label values.
+
+ Only applicable when the action is `HashMod`.
format: int64
type: integer
regex:
description: Regular expression against which the extracted
- value is matched. Default is '(.*)'
+ value is matched.
type: string
replacement:
description: |-
- Replacement value against which a regex replace is performed if the
- regular expression matches. Regex capture groups are available. Default is '$1'
+ Replacement value against which a Replace action is performed if the
+ regular expression matches.
+
+ Regex capture groups are available.
type: string
separator:
- description: Separator placed between concatenated source
- label values. default is ';'.
+ description: Separator is the string between concatenated
+ SourceLabels.
type: string
sourceLabels:
description: |-
- The source labels select values from existing labels. Their content is concatenated
- using the configured separator and matched against the configured regular expression
- for the replace, keep, and drop actions.
+ The source labels select values from existing labels. Their content is
+ concatenated using the configured Separator and matched against the
+ configured regular expression.
items:
- description: LabelName is a valid Prometheus label
- name which may only contain ASCII letters, numbers,
- as well as underscores.
+ description: |-
+ LabelName is a valid Prometheus label name which may only contain ASCII
+ letters, numbers, as well as underscores.
pattern: ^[a-zA-Z_][a-zA-Z0-9_]*$
type: string
type: array
targetLabel:
description: |-
- Label to which the resulting value is written in a replace action.
- It is mandatory for replace actions. Regex capture groups are available.
+ Label to which the resulting string is written in a replacement.
+
+ It is mandatory for `Replace`, `HashMod`, `Lowercase`, `Uppercase`,
+ `KeepEqual` and `DropEqual` actions.
+
+ Regex capture groups are available.
type: string
type: object
type: array
@@ -538,6 +551,15 @@ spec:
items:
type: string
type: array
+ mps:
+ description: 'Optional: MPS related configuration for the NVIDIA
+ Device Plugin'
+ properties:
+ root:
+ default: /run/nvidia/mps
+ description: Root defines the MPS root path on the host
+ type: string
+ type: object
repository:
description: NVIDIA Device Plugin image repository
type: string
@@ -926,9 +948,9 @@ spec:
type: boolean
timeoutSeconds:
default: 300
- description: TimeoutSecond specifies the length of time
- in seconds to wait before giving up on pod termination,
- zero means infinite
+ description: |-
+ TimeoutSecond specifies the length of time in seconds to wait before giving up on pod termination, zero means
+ infinite
minimum: 0
type: integer
type: object
@@ -944,9 +966,9 @@ spec:
type: string
timeoutSeconds:
default: 0
- description: TimeoutSecond specifies the length of time
- in seconds to wait before giving up on pod termination,
- zero means infinite
+ description: |-
+ TimeoutSecond specifies the length of time in seconds to wait before giving up on pod termination, zero means
+ infinite
minimum: 0
type: integer
type: object
@@ -1146,6 +1168,24 @@ spec:
description: GFD image tag
type: string
type: object
+ hostPaths:
+ description: HostPaths defines various paths on the host needed by
+ GPU Operator components
+ properties:
+ driverInstallDir:
+ description: |-
+ DriverInstallDir represents the root at which driver files including libraries,
+ config files, and executables can be found.
+ type: string
+ rootFS:
+ description: |-
+ RootFS represents the path to the root filesystem of the host.
+ This is used by components that need to interact with the host filesystem
+ and as such this must be a chroot-able filesystem.
+ Examples include the MIG Manager and Toolkit Container which may need to
+ stop, start, or restart systemd services.
+ type: string
+ type: object
kataManager:
description: KataManager component spec
properties:
@@ -2269,16 +2309,8 @@ spec:
description: Conditions is a list of conditions representing the ClusterPolicy's
current state.
items:
- description: "Condition contains details for one aspect of the current
- state of this API Resource.\n---\nThis struct is intended for
- direct use as an array at the field path .status.conditions. For
- example,\n\n\n\ttype FooStatus struct{\n\t // Represents the
- observations of a foo's current state.\n\t // Known .status.conditions.type
- are: \"Available\", \"Progressing\", and \"Degraded\"\n\t //
- +patchMergeKey=type\n\t // +patchStrategy=merge\n\t // +listType=map\n\t
- \ // +listMapKey=type\n\t Conditions []metav1.Condition `json:\"conditions,omitempty\"
- patchStrategy:\"merge\" patchMergeKey:\"type\" protobuf:\"bytes,1,rep,name=conditions\"`\n\n\n\t
- \ // other fields\n\t}"
+ description: Condition contains details for one aspect of the current
+ state of this API Resource.
properties:
lastTransitionTime:
description: |-
@@ -2319,12 +2351,7 @@ spec:
- Unknown
type: string
type:
- description: |-
- type of condition in CamelCase or in foo.example.com/CamelCase.
- ---
- Many .condition.type values are consistent across resources like Available, but because arbitrary conditions can be
- useful (see .node.status.conditions), the ability to deconflict is important.
- The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt)
+ description: type of condition in CamelCase or in foo.example.com/CamelCase.
maxLength: 316
pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
type: string
diff --git a/bundle/manifests/nvidia.com_nvidiadrivers.yaml b/bundle/manifests/nvidia.com_nvidiadrivers.yaml
index 317972fd2..c49059a38 100644
--- a/bundle/manifests/nvidia.com_nvidiadrivers.yaml
+++ b/bundle/manifests/nvidia.com_nvidiadrivers.yaml
@@ -3,7 +3,7 @@ apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
- controller-gen.kubebuilder.io/version: v0.14.0
+ controller-gen.kubebuilder.io/version: v0.16.5
name: nvidiadrivers.nvidia.com
spec:
group: nvidia.com
@@ -357,11 +357,13 @@ spec:
items:
type: string
type: array
+ x-kubernetes-list-type: atomic
required:
- key
- operator
type: object
type: array
+ x-kubernetes-list-type: atomic
matchFields:
description: A list of node selector requirements by
node's fields.
@@ -389,11 +391,13 @@ spec:
items:
type: string
type: array
+ x-kubernetes-list-type: atomic
required:
- key
- operator
type: object
type: array
+ x-kubernetes-list-type: atomic
type: object
x-kubernetes-map-type: atomic
weight:
@@ -406,6 +410,7 @@ spec:
- weight
type: object
type: array
+ x-kubernetes-list-type: atomic
requiredDuringSchedulingIgnoredDuringExecution:
description: |-
If the affinity requirements specified by this field are not met at
@@ -450,11 +455,13 @@ spec:
items:
type: string
type: array
+ x-kubernetes-list-type: atomic
required:
- key
- operator
type: object
type: array
+ x-kubernetes-list-type: atomic
matchFields:
description: A list of node selector requirements by
node's fields.
@@ -482,14 +489,17 @@ spec:
items:
type: string
type: array
+ x-kubernetes-list-type: atomic
required:
- key
- operator
type: object
type: array
+ x-kubernetes-list-type: atomic
type: object
x-kubernetes-map-type: atomic
type: array
+ x-kubernetes-list-type: atomic
required:
- nodeSelectorTerms
type: object
@@ -709,16 +719,8 @@ spec:
description: Conditions is a list of conditions representing the NVIDIADriver's
current state.
items:
- description: "Condition contains details for one aspect of the current
- state of this API Resource.\n---\nThis struct is intended for
- direct use as an array at the field path .status.conditions. For
- example,\n\n\n\ttype FooStatus struct{\n\t // Represents the
- observations of a foo's current state.\n\t // Known .status.conditions.type
- are: \"Available\", \"Progressing\", and \"Degraded\"\n\t //
- +patchMergeKey=type\n\t // +patchStrategy=merge\n\t // +listType=map\n\t
- \ // +listMapKey=type\n\t Conditions []metav1.Condition `json:\"conditions,omitempty\"
- patchStrategy:\"merge\" patchMergeKey:\"type\" protobuf:\"bytes,1,rep,name=conditions\"`\n\n\n\t
- \ // other fields\n\t}"
+ description: Condition contains details for one aspect of the current
+ state of this API Resource.
properties:
lastTransitionTime:
description: |-
@@ -759,12 +761,7 @@ spec:
- Unknown
type: string
type:
- description: |-
- type of condition in CamelCase or in foo.example.com/CamelCase.
- ---
- Many .condition.type values are consistent across resources like Available, but because arbitrary conditions can be
- useful (see .node.status.conditions), the ability to deconflict is important.
- The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt)
+ description: type of condition in CamelCase or in foo.example.com/CamelCase.
maxLength: 316
pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
type: string
diff --git a/bundle/metadata/annotations.yaml b/bundle/metadata/annotations.yaml
index f13bb0580..50d49a56b 100644
--- a/bundle/metadata/annotations.yaml
+++ b/bundle/metadata/annotations.yaml
@@ -1,6 +1,6 @@
annotations:
- operators.operatorframework.io.bundle.channels.v1: stable,v23.9
- operators.operatorframework.io.bundle.channel.default.v1: v23.9
+ operators.operatorframework.io.bundle.channels.v1: stable,v24.9
+ operators.operatorframework.io.bundle.channel.default.v1: v24.9
operators.operatorframework.io.bundle.manifests.v1: manifests/
operators.operatorframework.io.bundle.mediatype.v1: registry+v1
operators.operatorframework.io.bundle.metadata.v1: metadata/
@@ -14,4 +14,4 @@ annotations:
operatorframework.io/suggested-namespace: nvidia-gpu-operator
# Annotations to specify OCP versions compatibility.
- com.redhat.openshift.versions: v4.9-v4.15
+ com.redhat.openshift.versions: v4.12-v4.17
diff --git a/bundle/v24.3.0/manifests/gpu-operator-certified.clusterserviceversion.yaml b/bundle/v24.3.0/manifests/gpu-operator-certified.clusterserviceversion.yaml
new file mode 100644
index 000000000..11ceba75d
--- /dev/null
+++ b/bundle/v24.3.0/manifests/gpu-operator-certified.clusterserviceversion.yaml
@@ -0,0 +1,956 @@
+apiVersion: operators.coreos.com/v1alpha1
+kind: ClusterServiceVersion
+metadata:
+ labels:
+ operatorframework.io/arch.arm64: supported
+ operatorframework.io/arch.amd64: supported
+ pod-security.kubernetes.io/enforce: privileged
+ pod-security.kubernetes.io/audit: privileged
+ pod-security.kubernetes.io/warn: privileged
+ annotations:
+ features.operators.openshift.io/disconnected: "true"
+ features.operators.openshift.io/fips-compliant: "false"
+ features.operators.openshift.io/proxy-aware: "true"
+ features.operators.openshift.io/tls-profiles: "false"
+ features.operators.openshift.io/token-auth-aws: "false"
+ features.operators.openshift.io/token-auth-azure: "false"
+ features.operators.openshift.io/token-auth-gcp: "false"
+ features.operators.openshift.io/cnf: "false"
+ features.operators.openshift.io/cni: "false"
+ features.operators.openshift.io/csi: "false"
+ olm.skipRange: '>=1.9.0 <24.3.0'
+ alm-examples: |-
+ [
+ {
+ "apiVersion": "nvidia.com/v1",
+ "kind": "ClusterPolicy",
+ "metadata": {
+ "name": "gpu-cluster-policy"
+ },
+ "spec": {
+ "operator": {
+ "defaultRuntime": "crio",
+ "use_ocp_driver_toolkit": true,
+ "initContainer": {
+ }
+ },
+ "sandboxWorkloads": {
+ "enabled": false,
+ "defaultWorkload": "container"
+ },
+ "driver": {
+ "enabled": true,
+ "useNvidiaDriverCRD": false,
+ "useOpenKernelModules": false,
+ "upgradePolicy": {
+ "autoUpgrade": true,
+ "drain": {
+ "deleteEmptyDir": false,
+ "enable": false,
+ "force": false,
+ "timeoutSeconds": 300
+ },
+ "maxParallelUpgrades": 1,
+ "maxUnavailable": "25%",
+ "podDeletion": {
+ "deleteEmptyDir": false,
+ "force": false,
+ "timeoutSeconds": 300
+ },
+ "waitForCompletion": {
+ "timeoutSeconds": 0
+ }
+ },
+ "repoConfig": {
+ "configMapName": ""
+ },
+ "certConfig": {
+ "name": ""
+ },
+ "licensingConfig": {
+ "nlsEnabled": true,
+ "configMapName": ""
+ },
+ "virtualTopology": {
+ "config": ""
+ },
+ "kernelModuleConfig": {
+ "name": ""
+ }
+ },
+ "dcgmExporter": {
+ "enabled": true,
+ "config": {
+ "name": ""
+ },
+ "serviceMonitor": {
+ "enabled": true
+ }
+ },
+ "dcgm": {
+ "enabled": true
+ },
+ "daemonsets": {
+ "updateStrategy": "RollingUpdate",
+ "rollingUpdate": {
+ "maxUnavailable": "1"
+ }
+ },
+ "devicePlugin": {
+ "enabled": true,
+ "config": {
+ "name": "",
+ "default": ""
+ },
+ "mps": {
+ "root": "/run/nvidia/mps"
+ }
+ },
+ "gfd": {
+ "enabled": true
+ },
+ "migManager": {
+ "enabled": true
+ },
+ "nodeStatusExporter": {
+ "enabled": true
+ },
+ "mig": {
+ "strategy": "single"
+ },
+ "toolkit": {
+ "enabled": true
+ },
+ "validator": {
+ "plugin": {
+ "env": [
+ {
+ "name": "WITH_WORKLOAD",
+ "value": "false"
+ }
+ ]
+ }
+ },
+ "vgpuManager": {
+ "enabled": false
+ },
+ "vgpuDeviceManager": {
+ "enabled": true
+ },
+ "sandboxDevicePlugin": {
+ "enabled": true
+ },
+ "vfioManager": {
+ "enabled": true
+ },
+ "gds": {
+ "enabled": false
+ },
+ "gdrcopy": {
+ "enabled": false
+ }
+ }
+ },
+ {
+ "apiVersion": "nvidia.com/v1alpha1",
+ "kind": "NVIDIADriver",
+ "metadata": {
+ "name": "gpu-driver"
+ },
+ "spec": {
+ "driverType": "gpu",
+ "repository": "nvcr.io/nvidia",
+ "image": "driver",
+ "version": "sha256:96f25c67e5b1072d5981080e12d65ec37eb9ef2fc0494499416aa801b0a34da3",
+ "nodeSelector": {},
+ "manager": {},
+ "repoConfig": {
+ "name": ""
+ },
+ "certConfig": {
+ "name": ""
+ },
+ "licensingConfig": {
+ "nlsEnabled": true,
+ "name": ""
+ },
+ "virtualTopologyConfig": {
+ "name": ""
+ },
+ "kernelModuleConfig": {
+ "name": ""
+ }
+ }
+ }
+ ]
+ operators.operatorframework.io/builder: operator-sdk-v1.4.0
+ operators.operatorframework.io/project_layout: go.kubebuilder.io/v3
+ operatorframework.io/suggested-namespace: nvidia-gpu-operator
+ capabilities: Deep Insights
+ categories: AI/Machine Learning, OpenShift Optional
+ certified: "true"
+ containerImage: nvcr.io/nvidia/gpu-operator@sha256:041e75a3df84039c2dbbd4b9d67763bd212138822dbb6dbc0008858c1c6eff8d
+ createdAt: "Tue Apr 30 14:18:26 PDT 2024"
+ description: Automate the management and monitoring of NVIDIA GPUs.
+ provider: NVIDIA
+ repository: http://github.com/NVIDIA/gpu-operator
+ support: NVIDIA
+ name: gpu-operator-certified.v24.3.0
+ namespace: placeholder
+spec:
+ apiservicedefinitions: {}
+ relatedImages:
+ - name: gpu-operator-image
+ image: nvcr.io/nvidia/gpu-operator@sha256:041e75a3df84039c2dbbd4b9d67763bd212138822dbb6dbc0008858c1c6eff8d
+ - name: dcgm-exporter-image
+ image: nvcr.io/nvidia/k8s/dcgm-exporter@sha256:ecb867c5787e44f1889b257e6c03b07748689a7b293c8f1affb1d4cb449b235c
+ - name: dcgm-image
+ image: nvcr.io/nvidia/cloud-native/dcgm@sha256:2442fd2ec0ee29746abbced727c53741f017e8f3f615321c1ec42c1c3ffa0b12
+ - name: container-toolkit-image
+ image: nvcr.io/nvidia/k8s/container-toolkit@sha256:7798eb9b9424e09959fc808596f87bdb68ac5740174123c6646dee2166d3e7d2
+ - name: driver-image
+ image: nvcr.io/nvidia/driver@sha256:96f25c67e5b1072d5981080e12d65ec37eb9ef2fc0494499416aa801b0a34da3
+ - name: driver-image-535
+ image: nvcr.io/nvidia/driver@sha256:a836ccbe21da735aee1c39b81060ed5e2fdb4ffa339874baaf4634f1e9259f74
+ - name: driver-image-470
+ image: nvcr.io/nvidia/driver@sha256:56c79482582cdfbc58d3134e8672637c5bf05f328880f76898f526143d04c6af
+ - name: device-plugin-image
+ image: nvcr.io/nvidia/k8s-device-plugin@sha256:80674b19898ecf9ed6949e39674da769d6feeeb01bea54b914ef9ff502834f49
+ - name: gpu-feature-discovery-image
+ image: nvcr.io/nvidia/k8s-device-plugin@sha256:80674b19898ecf9ed6949e39674da769d6feeeb01bea54b914ef9ff502834f49
+ - name: mig-manager-image
+ image: nvcr.io/nvidia/cloud-native/k8s-mig-manager@sha256:d8774d6afd37110199512636172823ae8749e5ea2e13760f57c255a74f47018c
+ - name: init-container-image
+ image: nvcr.io/nvidia/cuda@sha256:ae0623ec8634b6c88f815b88037763def160cbbac15013b77ddef257fc276c9a
+ - name: gpu-operator-validator-image
+ image: nvcr.io/nvidia/cloud-native/gpu-operator-validator@sha256:2edc1d4ed555830e70010c82558936198f5faa86fc29ecf5698219145102cfcc
+ - name: k8s-driver-manager-image
+ image: nvcr.io/nvidia/cloud-native/k8s-driver-manager@sha256:0c77725de8c42d248ed825453efd2e005f4900f4be384fd23084f6b721ddd0e0
+ - name: vfio-manager-image
+ image: nvcr.io/nvidia/cuda@sha256:ae0623ec8634b6c88f815b88037763def160cbbac15013b77ddef257fc276c9a
+ - name: sandbox-device-plugin-image
+ image: nvcr.io/nvidia/kubevirt-gpu-device-plugin@sha256:2cc2119d20515f8ca7a61a0f3932578d69a45bcdea49e6c320a89c56f105e7d9
+ - name: vgpu-device-manager-image
+ image: nvcr.io/nvidia/cloud-native/vgpu-device-manager@sha256:f0e4e14a3081417be8e6a5d855fb0cb69e1b63bc54a74f17e6d7084abe275588
+ - name: gdrcopy-image
+ image: nvcr.io/nvidia/cloud-native/gdrdrv@sha256:1ae0b923bc57f47bab046b50c50110f6914bbaffbfef704df34b3fe332db2e31
+ customresourcedefinitions:
+ owned:
+ - name: nvidiadrivers.nvidia.com
+ kind: NVIDIADriver
+ version: v1alpha1
+ displayName: NVIDIADriver
+ description: NVIDIADriver allows you to deploy the NVIDIA driver
+ resources:
+ - kind: ServiceAccount
+ name: ''
+ version: v1
+ - kind: DaemonSet
+ name: ''
+ version: apps/v1
+ - kind: ConfigMap
+ name: ''
+ version: v1
+ - kind: Pod
+ name: ''
+ version: v1
+ - kind: clusterpolicies
+ name: ''
+ version: v1
+ - kind: clusterversions
+ name: ''
+ version: v1
+ - kind: nodes
+ name: ''
+ version: v1
+ - kind: status
+ name: ''
+ version: v1
+ specDescriptors:
+ - description: 'Optional: Set Node affinity'
+ displayName: Node affinity
+ path: affinity
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - 'urn:alm:descriptor:com.tectonic.ui:nodeAffinity'
+ - description: Node selector to control the selection of nodes (optional)
+ displayName: Node Selector
+ path: nodeSelector
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:selector:Node'
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - description: 'Optional: Set tolerations'
+ displayName: Tolerations
+ path: tolerations
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - 'urn:alm:descriptor:io.kubernetes:Tolerations'
+ - description: Image pull secrets
+ displayName: Image pull secrets
+ path: imagePullSecrets
+ x-descriptors:
+ - 'urn:alm:descriptor:io.kubernetes:Secret'
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - displayName: ImagePullPolicy
+ description: 'Image pull policy (default: IfNotPresent)'
+ path: imagePullPolicy
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:imagePullPolicy'
+ statusDescriptors:
+ - description: The current state of the driver.
+ displayName: State
+ path: state
+ x-descriptors:
+ - 'urn:alm:descriptor:text'
+ - name: clusterpolicies.nvidia.com
+ kind: ClusterPolicy
+ version: v1
+ group: nvidia.com
+ displayName: ClusterPolicy
+ description: ClusterPolicy allows you to configure the GPU Operator
+ resources:
+ - kind: ServiceAccount
+ name: ''
+ version: v1
+ - kind: Deployment
+ name: ''
+ version: apps/v1
+ - kind: DaemonSet
+ name: ''
+ version: apps/v1
+ - kind: ConfigMap
+ name: ''
+ version: v1
+ - kind: Pod
+ name: ''
+ version: v1
+ - kind: clusterpolicies
+ name: ''
+ version: v1
+ - kind: clusterversions
+ name: ''
+ version: v1
+ - kind: nodes
+ name: ''
+ version: v1
+ - kind: status
+ name: ''
+ version: v1
+ specDescriptors:
+ - description: GPU Operator config
+ displayName: GPU Operator config
+ path: operator
+ - displayName: ImagePullPolicy
+ description: 'Image pull policy (default: IfNotPresent)'
+ path: operator.validator.imagePullPolicy
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:imagePullPolicy'
+ - description: Image pull secrets
+ displayName: Image pull secrets
+ path: operator.validator.imagePullSecrets
+ x-descriptors:
+ - 'urn:alm:descriptor:io.kubernetes:Secret'
+ - description: NVIDIA GPU/vGPU Driver config
+ displayName: NVIDIA GPU/vGPU Driver config
+ path: driver
+ - description: 'Optional: Set Node affinity'
+ displayName: Node affinity
+ path: driver.affinity
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - 'urn:alm:descriptor:com.tectonic.ui:nodeAffinity'
+ - description: Node selector to control the selection of nodes (optional)
+ displayName: Node Selector
+ path: driver.nodeSelector
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:selector:Node'
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - description: 'Optional: Set tolerations'
+ displayName: Tolerations
+ path: driver.tolerations
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - 'urn:alm:descriptor:io.kubernetes:Tolerations'
+ - description: Image pull secrets
+ displayName: Image pull secrets
+ path: driver.imagePullSecrets
+ x-descriptors:
+ - 'urn:alm:descriptor:io.kubernetes:Secret'
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - displayName: ImagePullPolicy
+ description: 'Image pull policy (default: IfNotPresent)'
+ path: driver.imagePullPolicy
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:imagePullPolicy'
+ - description: NVIDIA DCGM Exporter config
+ displayName: NVIDIA DCGM Exporter config
+ path: dcgmExporter
+ - description: 'Optional: Set Node affinity'
+ displayName: Node affinity
+ path: dcgmExporter.affinity
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - 'urn:alm:descriptor:com.tectonic.ui:nodeAffinity'
+ - description: Node selector to control the selection of nodes (optional)
+ displayName: Node Selector
+ path: dcgmExporter.nodeSelector
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:selector:Node'
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - description: 'Optional: Set tolerations'
+ displayName: Tolerations
+ path: dcgmExporter.tolerations
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - 'urn:alm:descriptor:io.kubernetes:Tolerations'
+ - description: Image pull secrets
+ displayName: Image pull secrets
+ path: dcgmExporter.imagePullSecrets
+ x-descriptors:
+ - 'urn:alm:descriptor:io.kubernetes:Secret'
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - displayName: ImagePullPolicy
+ description: 'Image pull policy (default: IfNotPresent)'
+ path: dcgmExporter.imagePullPolicy
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:imagePullPolicy'
+ - description: NVIDIA Device Plugin config
+ displayName: NVIDIA Device Plugin config
+ path: devicePlugin
+ - description: 'Optional: Set Node affinity'
+ displayName: Node affinity
+ path: devicePlugin.affinity
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - 'urn:alm:descriptor:com.tectonic.ui:nodeAffinity'
+ - description: Node selector to control the selection of nodes (optional)
+ displayName: Node Selector
+ path: devicePlugin.nodeSelector
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:selector:Node'
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - description: 'Optional: Set tolerations'
+ displayName: Tolerations
+ path: devicePlugin.tolerations
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - 'urn:alm:descriptor:io.kubernetes:Tolerations'
+ - description: Image pull secrets
+ displayName: Image pull secrets
+ path: devicePlugin.imagePullSecrets
+ x-descriptors:
+ - 'urn:alm:descriptor:io.kubernetes:Secret'
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - displayName: ImagePullPolicy
+ description: 'Image pull policy (default: IfNotPresent)'
+ path: devicePlugin.imagePullPolicy
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:imagePullPolicy'
+ - description: GPU Feature Discovery Plugin config
+ displayName: GPU Feature Discovery Plugin config
+ path: gfd
+ - description: 'Optional: Set Node affinity'
+ displayName: Node affinity
+ path: gfd.affinity
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - 'urn:alm:descriptor:com.tectonic.ui:nodeAffinity'
+ - description: Node selector to control the selection of nodes (optional)
+ displayName: Node Selector
+ path: gfd.nodeSelector
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:selector:Node'
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - description: 'Optional: Set tolerations'
+ displayName: Tolerations
+ path: gfd.tolerations
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - 'urn:alm:descriptor:io.kubernetes:Tolerations'
+ - description: Image pull secrets
+ displayName: Image pull secrets
+ path: gfd.imagePullSecrets
+ x-descriptors:
+ - 'urn:alm:descriptor:io.kubernetes:Secret'
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - displayName: ImagePullPolicy
+ description: 'Image pull policy (default: IfNotPresent)'
+ path: gfd.imagePullPolicy
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:imagePullPolicy'
+ - description: NVIDIA Container Toolkit config
+ displayName: NVIDIA Container Toolkit config
+ path: toolkit
+ - description: 'Optional: Set Node affinity'
+ displayName: Node affinity
+ path: toolkit.affinity
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - 'urn:alm:descriptor:com.tectonic.ui:nodeAffinity'
+ - description: Node selector to control the selection of nodes (optional)
+ displayName: Node Selector
+ path: toolkit.nodeSelector
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:selector:Node'
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - description: 'Optional: Set tolerations'
+ displayName: Tolerations
+ path: toolkit.tolerations
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - 'urn:alm:descriptor:io.kubernetes:Tolerations'
+ - description: Image pull secrets
+ displayName: Image pull secrets
+ path: toolkit.imagePullSecrets
+ x-descriptors:
+ - 'urn:alm:descriptor:io.kubernetes:Secret'
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - displayName: ImagePullPolicy
+ description: 'Image pull policy (default: IfNotPresent)'
+ path: toolkit.imagePullPolicy
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:imagePullPolicy'
+ - displayName: NVIDIA DCGM config
+ description: NVIDIA DCGM config
+ path: dcgm
+ - displayName: Validator config
+ description: Validator config
+ path: validator
+ - displayName: Node Status Exporter config
+ description: Node Status Exporter config
+ path: nodeStatusExporter
+ - displayName: Daemonsets config
+ description: Daemonsets config
+ path: daemonsets
+ - displayName: MIG config
+ description: MIG config
+ path: mig
+ - displayName: NVIDIA MIG Manager config
+ description: NVIDIA MIG Manager config
+ path: migManager
+ - displayName: PodSecurityPolicy config
+ description: PodSecurityPolicy config
+ path: psp
+ - displayName: NVIDIA GPUDirect Storage config
+ description: NVIDIA GPUDirect Storage config
+ path: gds
+ - displayName: Sandbox Workloads config
+ description: Sandbox Workloads config
+ path: sandboxWorkloads
+ - displayName: NVIDIA vGPU Manager config
+ description: NVIDIA vGPU Manager config
+ path: vgpuManager
+ - displayName: NVIDIA vGPU Device Manager config
+ description: NVIDIA vGPU Device Manager config
+ path: vgpuDeviceManager
+ - displayName: VFIO Manager config
+ description: VFIO Manager config
+ path: vfioManager
+ - displayName: NVIDIA Sandbox Device Plugin config
+ description: NVIDIA Sandbox Device Plugin config
+ path: sandboxDevicePlugin
+ statusDescriptors:
+ - description: The current state of the operator.
+ displayName: State
+ path: state
+ x-descriptors:
+ - 'urn:alm:descriptor:text'
+ displayName: NVIDIA GPU Operator
+ description: >
+ Kubernetes provides access to special hardware resources such as NVIDIA
+ GPUs, NICs, Infiniband adapters and other devices through the [device plugin
+ framework](https://kubernetes.io/docs/concepts/extend-kubernetes/compute-storage-net/device-plugins/).
+ However, configuring and managing nodes with these hardware resources
+ requires configuration of multiple software components such as drivers,
+ container runtimes or other libraries which are difficult and prone to
+ errors.
+
+ The NVIDIA GPU Operator uses the [operator
+ framework](https://cloud.redhat.com/blog/introducing-the-operator-framework) within
+ Kubernetes to automate the management of all NVIDIA software components
+ needed to provision and monitor GPUs.
+ These components include the NVIDIA drivers (to enable CUDA), Kubernetes
+ device plugin for GPUs, the NVIDIA Container Runtime, automatic node
+ labelling and NVIDIA DCGM exporter.
+
+ Visit the official site of the [GPU Operator](https://github.com/NVIDIA/gpu-operator) for more information.
+ For getting started with using the GPU Operator with OpenShift, see the instructions
+ [here](https://docs.nvidia.com/datacenter/cloud-native/gpu-operator/openshift/contents.html).
+ icon:
+ - base64data: iVBORw0KGgoAAAANSUhEUgAAAEAAAABACAMAAACdt4HsAAAB2lBMVEUAAAD///8EBAN3uQACAgIAAAQJDQUCAgB1tgAHCQf+/v5Ufg5Hagxxqwt+xgJ3uAB9wwB4vQBRUVEeLA3e3t5nZ2coKCgODg4FBwZ9wwR6wAJ4vADz8/MbGxt5tw1vpw1/wgoOFwkLDwh9xQH5+fny8vLw8PDFxcWysrKFhYVvb282NjYyMjIqKioXFxdikxRYgxNCYxJQdhFqoQ9xrg16ugxyqgyAxQkEBQj7+/v29vbIyMhjY2NbW1tHR0cvLy8kJCQdHR0ZGRlKbxJ8uhFNcxFVgBAxSBBgkg93tQ50sA4qPg4XIg18vwsbKQsSGgsLCwsMEwqCyQeByQFztADPz8+/v7+6urqWlpZra2tKSkogICASEhJmmRE8XBA5VRA2UBBonA9biA9GaQ4sQg4jMw4mOQ0aJw2GzgsUHgttpAqJ0Ql/wQWG0AJ8vwF0uQCtra2jo6OQkJB9fX1VVVVCQkI9PT0iIiIUFBRSfBNgjhA7WRBGZw+GywmFzgaAyASBxQN2twDb29u2traenp6Kiop+fn53d3dzc3NyqRV4sxM/YBNAXRElNhBjlQ+IzA00TQ16vgxJbgp6vAl4tgJ3vgDs7Ozn5+fa2trS0tJCXRY6VBV6thSL1gf4nFdFAAAD80lEQVRYw+zSOXPaQBgG4He0LJJmbGRGDUIzuvgBQiAEPfcdwC33DTbUtmOwSyc+4iRucvzXRImLFJmRShc8xXbfu+9+szg4OHjjAsH/iFD49q7rqM6xc/wPtWyBhS8sC94ObWRCZDksh1+RzmcEfI0DoPrjylEkSTgViMs9udjYTwMG4Gf51Z1BM81ioRwit+QvgYsdUQZeKFr3ladyKXvVr+pAM5uKcmRLXFzoCIxn+0i/8lSaBMHnfi7qowfQuZnm3PuFPwGs13zD3NlViozY/z4YD6/TCQORbPr2q78GLB0ou5IO40pd5AxQZnJ83m2y9Ju2JYKfgEhWC18aEIfrZLURHwQC0B87ySZwHxX8BNDWB1KfQfyxT2TA24uPQMt8yTWA3obz8wQGlhTN06Z900MkuJLrYu3u5LkK9LTtGRF8NEDLeSnXYLUdHUFVlpPqTa4IamlhJZ464biY1w4CKGrROOW7uwLlV+Q02lanCF6cbSoPVLzUfPwDll5I9T6WyXWhZre1yjiI6VCSzCWY3+FKaAwGHngzpEygx6+V6Uzk6TJR7yhWxJ1bFgTPJ7gMc58aUCq+n+qNT6Pn8y/xOcCiZZVjnJ+AAPhEuj0SKZ9bL9ZpNS9SgM6z9p5w3jt43cMvecfWBhm7dtfEpfhYMDBYpFd7mDZIAxPCFKgBhB0hkWbE2wVMyqycfhOMEiebSzFz5IMTEjw7E87UFj4GVR7GXqaSkoIcISEc/I38/PwhOTUMRBrADgwK09zgYGUBqbwcARiQyp3Eyk6kC4BloqtbJTcaSHIHShALWFmBSRuCWBGC+AtDMAAGIpAAc9mBiB0sCLSXHUSygxSxEIoE7IKEgbhopKgogC96x04QCMMw/H0cG6f0cEmBHaLc7FFQzApoTLwtQgWUWo26glx2mzGkyoHM1PPMO/NrnSH8e2QAiRsZ8S3ZuJoW5Udg5moGoMRLN2gAnkcUctueJ1gADsdtlZ2AgmSYoaDZBXwRctcwy6HN3XX/wfnTnA7Q5x0S0Gku4wHpe7Ql8Mbtu4TqC3qcADGtUl4O3eK0AkZdKH1mU/a6MFQGA7pQGoAVoAuuPYZlLJF2BawVLLjwac6Q8wUax61/CpKQAT6ZX3hFqoqqAFvuf4AzM+NgsoBS/wcSOD7SFzyf6CE9UQK9II1MRvIJm8QSgsLiBZuypsAWKyARElgx5FcLv1N4nFLbB45Sh6+TzsQRtn7bz/B3fS9GQ12bgUE2PKycQbwgXD0SWLwVhpZFq4eHhWloOjLoqGvoRYRGAR2vp2EtpNUaTUpiRAizMAEhKNXpYZNnAUlBCSgFYTIxQTlMMJNGwSgYBdQHAFsKs+/bUkeyAAAAAElFTkSuQmCC
+ mediatype: image/png
+ install:
+ spec:
+ clusterPermissions:
+ - serviceAccountName: gpu-operator
+ rules:
+ - apiGroups:
+ - nvidia.com
+ resources:
+ - '*'
+ verbs:
+ - create
+ - delete
+ - get
+ - list
+ - patch
+ - update
+ - watch
+ - apiGroups:
+ - config.openshift.io
+ resources:
+ - clusterversions
+ - proxies
+ verbs:
+ - get
+ - list
+ - watch
+ - apiGroups:
+ - security.openshift.io
+ resources:
+ - securitycontextconstraints
+ verbs:
+ - '*'
+ - apiGroups:
+ - security.openshift.io
+ resources:
+ - securitycontextconstraints
+ verbs:
+ - use
+ resourceNames:
+ - hostmount-anyuid
+ - apiGroups:
+ - rbac.authorization.k8s.io
+ resources:
+ - clusterroles
+ - clusterrolebindings
+ - roles
+ - rolebindings
+ verbs:
+ - '*'
+ - apiGroups:
+ - ""
+ resources:
+ - pods
+ - pods/eviction
+ - services
+ - services/finalizers
+ - endpoints
+ - persistentvolumeclaims
+ - events
+ - configmaps
+ - secrets
+ - nodes
+ - namespaces
+ - serviceaccounts
+ verbs:
+ - create
+ - delete
+ - get
+ - list
+ - patch
+ - update
+ - watch
+ - apiGroups:
+ - apps
+ resources:
+ - deployments
+ - daemonsets
+ - replicasets
+ - statefulsets
+ verbs:
+ - create
+ - delete
+ - get
+ - list
+ - patch
+ - update
+ - watch
+ - apiGroups:
+ - apps
+ resources:
+ - controllerrevisions
+ verbs:
+ - get
+ - list
+ - watch
+ - apiGroups:
+ - node.k8s.io
+ resources:
+ - runtimeclasses
+ verbs:
+ - get
+ - list
+ - create
+ - update
+ - watch
+ - delete
+ - apiGroups:
+ - coordination.k8s.io
+ resources:
+ - leases
+ verbs:
+ - '*'
+ - apiGroups:
+ - monitoring.coreos.com
+ resources:
+ - servicemonitors
+ - prometheusrules
+ verbs:
+ - get
+ - list
+ - create
+ - watch
+ - update
+ - delete
+ - apiGroups:
+ - image.openshift.io
+ resources:
+ - imagestreams
+ verbs:
+ - get
+ - list
+ - watch
+ - apiGroups:
+ - apiextensions.k8s.io
+ resources:
+ - customresourcedefinitions
+ verbs:
+ - get
+ - list
+ - watch
+ permissions:
+ - serviceAccountName: gpu-operator
+ rules:
+ - apiGroups:
+ - rbac.authorization.k8s.io
+ resources:
+ - roles
+ - rolebindings
+ verbs:
+ - '*'
+ - apiGroups:
+ - ""
+ resources:
+ - pods
+ - pods/eviction
+ - services
+ - services/finalizers
+ - endpoints
+ - persistentvolumeclaims
+ - events
+ - configmaps
+ - secrets
+ verbs:
+ - create
+ - delete
+ - get
+ - list
+ - patch
+ - update
+ - watch
+ - apiGroups:
+ - apps
+ resources:
+ - deployments
+ - daemonsets
+ - replicasets
+ - statefulsets
+ verbs:
+ - create
+ - delete
+ - get
+ - list
+ - patch
+ - update
+ - watch
+ - apiGroups:
+ - apps
+ resources:
+ - controllerrevisions
+ verbs:
+ - get
+ - list
+ - watch
+ - apiGroups:
+ - monitoring.coreos.com
+ resources:
+ - servicemonitors
+ - prometheusrules
+ verbs:
+ - get
+ - create
+ - list
+ - update
+ - watch
+ - delete
+ - apiGroups:
+ - apps
+ resourceNames:
+ - gpu-operator
+ resources:
+ - deployments/finalizers
+ verbs:
+ - update
+ - apiGroups:
+ - ""
+ resources:
+ - pods
+ verbs:
+ - get
+ - apiGroups:
+ - apps
+ resources:
+ - replicasets
+ - deployments
+ verbs:
+ - get
+ - apiGroups:
+ - nvidia.com
+ resources:
+ - '*'
+ verbs:
+ - create
+ - delete
+ - get
+ - list
+ - patch
+ - update
+ - watch
+ - apiGroups:
+ - coordination.k8s.io
+ resources:
+ - leases
+ verbs:
+ - '*'
+ - apiGroups:
+ - apiextensions.k8s.io
+ resources:
+ - customresourcedefinitions
+ verbs:
+ - get
+ - list
+ - watch
+ deployments:
+ - name: gpu-operator
+ spec:
+ replicas: 1
+ selector:
+ matchLabels:
+ app.kubernetes.io/component: gpu-operator
+ app: gpu-operator
+ strategy: {}
+ template:
+ metadata:
+ labels:
+ app.kubernetes.io/component: gpu-operator
+ app: gpu-operator
+ nvidia.com/gpu-driver-upgrade-drain.skip: "true"
+ spec:
+ priorityClassName: system-node-critical
+ containers:
+ - args:
+ - --leader-elect
+ - --leader-lease-renew-deadline
+ - "60s"
+ image: nvcr.io/nvidia/gpu-operator@sha256:041e75a3df84039c2dbbd4b9d67763bd212138822dbb6dbc0008858c1c6eff8d
+ command:
+ - gpu-operator
+ livenessProbe:
+ httpGet:
+ path: /healthz
+ port: 8081
+ initialDelaySeconds: 15
+ periodSeconds: 20
+ name: gpu-operator
+ ports:
+ - name: metrics
+ containerPort: 8080
+ readinessProbe:
+ httpGet:
+ path: /readyz
+ port: 8081
+ initialDelaySeconds: 5
+ periodSeconds: 10
+ resources:
+ limits:
+ cpu: 500m
+ memory: 1Gi
+ requests:
+ cpu: 200m
+ memory: 200Mi
+ securityContext:
+ allowPrivilegeEscalation: false
+ volumeMounts:
+ - mountPath: /host-etc/os-release
+ name: host-os-release
+ readOnly: true
+ env:
+ - name: OPERATOR_NAMESPACE
+ valueFrom:
+ fieldRef:
+ fieldPath: metadata.namespace
+ - name: "VALIDATOR_IMAGE"
+ value: "nvcr.io/nvidia/cloud-native/gpu-operator-validator@sha256:2edc1d4ed555830e70010c82558936198f5faa86fc29ecf5698219145102cfcc"
+ - name: "GFD_IMAGE"
+ value: "nvcr.io/nvidia/k8s-device-plugin@sha256:80674b19898ecf9ed6949e39674da769d6feeeb01bea54b914ef9ff502834f49"
+ - name: "CONTAINER_TOOLKIT_IMAGE"
+ value: "nvcr.io/nvidia/k8s/container-toolkit@sha256:7798eb9b9424e09959fc808596f87bdb68ac5740174123c6646dee2166d3e7d2"
+ - name: "DCGM_IMAGE"
+ value: "nvcr.io/nvidia/cloud-native/dcgm@sha256:2442fd2ec0ee29746abbced727c53741f017e8f3f615321c1ec42c1c3ffa0b12"
+ - name: "DCGM_EXPORTER_IMAGE"
+ value: "nvcr.io/nvidia/k8s/dcgm-exporter@sha256:ecb867c5787e44f1889b257e6c03b07748689a7b293c8f1affb1d4cb449b235c"
+ - name: "DEVICE_PLUGIN_IMAGE"
+ value: "nvcr.io/nvidia/k8s-device-plugin@sha256:80674b19898ecf9ed6949e39674da769d6feeeb01bea54b914ef9ff502834f49"
+ - name: "DRIVER_IMAGE"
+ value: "nvcr.io/nvidia/driver@sha256:96f25c67e5b1072d5981080e12d65ec37eb9ef2fc0494499416aa801b0a34da3"
+ - name: "DRIVER_IMAGE-535"
+ value: "nvcr.io/nvidia/driver@sha256:a836ccbe21da735aee1c39b81060ed5e2fdb4ffa339874baaf4634f1e9259f74"
+ - name: "DRIVER_IMAGE-470"
+ value: "nvcr.io/nvidia/driver@sha256:56c79482582cdfbc58d3134e8672637c5bf05f328880f76898f526143d04c6af"
+ - name: "DRIVER_MANAGER_IMAGE"
+ value: "nvcr.io/nvidia/cloud-native/k8s-driver-manager@sha256:0c77725de8c42d248ed825453efd2e005f4900f4be384fd23084f6b721ddd0e0"
+ - name: "MIG_MANAGER_IMAGE"
+ value: "nvcr.io/nvidia/cloud-native/k8s-mig-manager@sha256:d8774d6afd37110199512636172823ae8749e5ea2e13760f57c255a74f47018c"
+ - name: "CUDA_BASE_IMAGE"
+ value: "nvcr.io/nvidia/cuda@sha256:ae0623ec8634b6c88f815b88037763def160cbbac15013b77ddef257fc276c9a"
+ - name: "VFIO_MANAGER_IMAGE"
+ value: "nvcr.io/nvidia/cuda@sha256:ae0623ec8634b6c88f815b88037763def160cbbac15013b77ddef257fc276c9a"
+ - name: "SANDBOX_DEVICE_PLUGIN_IMAGE"
+ value: "nvcr.io/nvidia/kubevirt-gpu-device-plugin@sha256:2cc2119d20515f8ca7a61a0f3932578d69a45bcdea49e6c320a89c56f105e7d9"
+ - name: "VGPU_DEVICE_MANAGER_IMAGE"
+ value: "nvcr.io/nvidia/cloud-native/vgpu-device-manager@sha256:f0e4e14a3081417be8e6a5d855fb0cb69e1b63bc54a74f17e6d7084abe275588"
+ - name: "GDRCOPY_IMAGE"
+ value: "nvcr.io/nvidia/cloud-native/gdrdrv@sha256:1ae0b923bc57f47bab046b50c50110f6914bbaffbfef704df34b3fe332db2e31"
+ terminationGracePeriodSeconds: 10
+ volumes:
+ - hostPath:
+ path: /etc/os-release
+ name: host-os-release
+ serviceAccountName: gpu-operator
+ strategy: deployment
+ installModes:
+ - supported: true
+ type: OwnNamespace
+ - supported: true
+ type: SingleNamespace
+ - supported: false
+ type: MultiNamespace
+ - supported: false
+ type: AllNamespaces
+ keywords:
+ - gpu
+ - cuda
+ - compute
+ - operator
+ - deep learning
+ - monitoring
+ - tesla
+ maintainers:
+ - name: NVIDIA
+ email: operator_feedback@nvidia.com
+ maturity: stable
+ provider:
+ name: NVIDIA Corporation
+ version: 24.3.0
+ replaces: gpu-operator-certified.v23.9.2
diff --git a/bundle/v24.3.0/manifests/nvidia.com_clusterpolicies.yaml b/bundle/v24.3.0/manifests/nvidia.com_clusterpolicies.yaml
new file mode 100644
index 000000000..f52a487b3
--- /dev/null
+++ b/bundle/v24.3.0/manifests/nvidia.com_clusterpolicies.yaml
@@ -0,0 +1,2386 @@
+---
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+ annotations:
+ controller-gen.kubebuilder.io/version: v0.14.0
+ name: clusterpolicies.nvidia.com
+spec:
+ group: nvidia.com
+ names:
+ kind: ClusterPolicy
+ listKind: ClusterPolicyList
+ plural: clusterpolicies
+ singular: clusterpolicy
+ scope: Cluster
+ versions:
+ - additionalPrinterColumns:
+ - jsonPath: .status.state
+ name: Status
+ type: string
+ - jsonPath: .metadata.creationTimestamp
+ name: Age
+ type: string
+ name: v1
+ schema:
+ openAPIV3Schema:
+ description: ClusterPolicy is the Schema for the clusterpolicies API
+ properties:
+ apiVersion:
+ description: |-
+ APIVersion defines the versioned schema of this representation of an object.
+ Servers should convert recognized schemas to the latest internal value, and
+ may reject unrecognized values.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+ type: string
+ kind:
+ description: |-
+ Kind is a string value representing the REST resource this object represents.
+ Servers may infer this from the endpoint the client submits requests to.
+ Cannot be updated.
+ In CamelCase.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+ type: string
+ metadata:
+ type: object
+ spec:
+ description: ClusterPolicySpec defines the desired state of ClusterPolicy
+ properties:
+ ccManager:
+ description: CCManager component spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ defaultMode:
+ description: Default CC mode setting for compatible GPUs on the
+ node
+ enum:
+ - "on"
+ - "off"
+ - devtools
+ type: string
+ enabled:
+ description: Enabled indicates if deployment of CC Manager is
+ enabled
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: CC Manager image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: CC Manager image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: CC Manager image tag
+ type: string
+ type: object
+ cdi:
+ description: CDI configures how the Container Device Interface is
+ used in the cluster
+ properties:
+ default:
+ default: false
+ description: Default indicates whether to use CDI as the default
+ mechanism for providing GPU access to containers.
+ type: boolean
+ enabled:
+ default: false
+ description: Enabled indicates whether CDI can be used to make
+ GPUs accessible to containers.
+ type: boolean
+ type: object
+ daemonsets:
+ description: Daemonset defines common configuration for all Daemonsets
+ properties:
+ annotations:
+ additionalProperties:
+ type: string
+ description: |-
+ Optional: Annotations is an unstructured key value map stored with a resource that may be
+ set by external tools to store and retrieve arbitrary metadata. They are not
+ queryable and should be preserved when modifying objects.
+ type: object
+ labels:
+ additionalProperties:
+ type: string
+ description: |-
+ Optional: Map of string keys and values that can be used to organize and categorize
+ (scope and select) objects. May match selectors of replication controllers
+ and services.
+ type: object
+ priorityClassName:
+ type: string
+ rollingUpdate:
+ description: 'Optional: Configuration for rolling update of all
+ DaemonSet pods'
+ properties:
+ maxUnavailable:
+ type: string
+ type: object
+ tolerations:
+ description: 'Optional: Set tolerations'
+ items:
+ description: |-
+ The pod this Toleration is attached to tolerates any taint that matches
+ the triple using the matching operator .
+ properties:
+ effect:
+ description: |-
+ Effect indicates the taint effect to match. Empty means match all taint effects.
+ When specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute.
+ type: string
+ key:
+ description: |-
+ Key is the taint key that the toleration applies to. Empty means match all taint keys.
+ If the key is empty, operator must be Exists; this combination means to match all values and all keys.
+ type: string
+ operator:
+ description: |-
+ Operator represents a key's relationship to the value.
+ Valid operators are Exists and Equal. Defaults to Equal.
+ Exists is equivalent to wildcard for value, so that a pod can
+ tolerate all taints of a particular category.
+ type: string
+ tolerationSeconds:
+ description: |-
+ TolerationSeconds represents the period of time the toleration (which must be
+ of effect NoExecute, otherwise this field is ignored) tolerates the taint. By default,
+ it is not set, which means tolerate the taint forever (do not evict). Zero and
+ negative values will be treated as 0 (evict immediately) by the system.
+ format: int64
+ type: integer
+ value:
+ description: |-
+ Value is the taint value the toleration matches to.
+ If the operator is Exists, the value should be empty, otherwise just a regular string.
+ type: string
+ type: object
+ type: array
+ updateStrategy:
+ default: RollingUpdate
+ enum:
+ - RollingUpdate
+ - OnDelete
+ type: string
+ type: object
+ dcgm:
+ description: DCGM component spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ enabled:
+ description: Enabled indicates if deployment of NVIDIA DCGM Hostengine
+ as a separate pod is enabled.
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ hostPort:
+ description: 'HostPort represents host port that needs to be bound
+ for DCGM engine (Default: 5555)'
+ format: int32
+ type: integer
+ image:
+ description: NVIDIA DCGM image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: NVIDIA DCGM image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: NVIDIA DCGM image tag
+ type: string
+ type: object
+ dcgmExporter:
+ description: DCGMExporter spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ config:
+ description: 'Optional: Custom metrics configuration for NVIDIA
+ DCGM Exporter'
+ properties:
+ name:
+ description: ConfigMap name with file dcgm-metrics.csv for
+ metrics to be collected by NVIDIA DCGM Exporter
+ type: string
+ type: object
+ enabled:
+ description: Enabled indicates if deployment of NVIDIA DCGM Exporter
+ through operator is enabled
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: NVIDIA DCGM Exporter image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: NVIDIA DCGM Exporter image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ serviceMonitor:
+ description: 'Optional: ServiceMonitor configuration for NVIDIA
+ DCGM Exporter'
+ properties:
+ additionalLabels:
+ additionalProperties:
+ type: string
+ description: AdditionalLabels to add to ServiceMonitor instance
+ for NVIDIA DCGM Exporter
+ type: object
+ enabled:
+ description: Enabled indicates if ServiceMonitor is deployed
+ for NVIDIA DCGM Exporter
+ type: boolean
+ honorLabels:
+ description: HonorLabels chooses the metric’s labels on collisions
+ with target labels.
+ type: boolean
+ interval:
+ description: |-
+ Interval which metrics should be scraped from NVIDIA DCGM Exporter. If not specified Prometheus’ global scrape interval is used.
+ Supported units: y, w, d, h, m, s, ms
+ pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$
+ type: string
+ relabelings:
+ description: Relabelings allows to rewrite labels on metric
+ sets for NVIDIA DCGM Exporter
+ items:
+ description: |-
+ RelabelConfig allows dynamic rewriting of the label set for targets, alerts,
+ scraped samples and remote write samples.
+
+
+ More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config
+ properties:
+ action:
+ default: replace
+ description: |-
+ Action to perform based on the regex matching.
+
+
+ `Uppercase` and `Lowercase` actions require Prometheus >= v2.36.0.
+ `DropEqual` and `KeepEqual` actions require Prometheus >= v2.41.0.
+
+
+ Default: "Replace"
+ enum:
+ - replace
+ - Replace
+ - keep
+ - Keep
+ - drop
+ - Drop
+ - hashmod
+ - HashMod
+ - labelmap
+ - LabelMap
+ - labeldrop
+ - LabelDrop
+ - labelkeep
+ - LabelKeep
+ - lowercase
+ - Lowercase
+ - uppercase
+ - Uppercase
+ - keepequal
+ - KeepEqual
+ - dropequal
+ - DropEqual
+ type: string
+ modulus:
+ description: |-
+ Modulus to take of the hash of the source label values.
+
+
+ Only applicable when the action is `HashMod`.
+ format: int64
+ type: integer
+ regex:
+ description: Regular expression against which the extracted
+ value is matched.
+ type: string
+ replacement:
+ description: |-
+ Replacement value against which a Replace action is performed if the
+ regular expression matches.
+
+
+ Regex capture groups are available.
+ type: string
+ separator:
+ description: Separator is the string between concatenated
+ SourceLabels.
+ type: string
+ sourceLabels:
+ description: |-
+ The source labels select values from existing labels. Their content is
+ concatenated using the configured Separator and matched against the
+ configured regular expression.
+ items:
+ description: |-
+ LabelName is a valid Prometheus label name which may only contain ASCII
+ letters, numbers, as well as underscores.
+ pattern: ^[a-zA-Z_][a-zA-Z0-9_]*$
+ type: string
+ type: array
+ targetLabel:
+ description: |-
+ Label to which the resulting string is written in a replacement.
+
+
+ It is mandatory for `Replace`, `HashMod`, `Lowercase`, `Uppercase`,
+ `KeepEqual` and `DropEqual` actions.
+
+
+ Regex capture groups are available.
+ type: string
+ type: object
+ type: array
+ type: object
+ version:
+ description: NVIDIA DCGM Exporter image tag
+ type: string
+ type: object
+ devicePlugin:
+ description: DevicePlugin component spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ config:
+ description: 'Optional: Configuration for the NVIDIA Device Plugin
+ via the ConfigMap'
+ properties:
+ default:
+ description: Default config name within the ConfigMap for
+ the NVIDIA Device Plugin config
+ type: string
+ name:
+ description: ConfigMap name for NVIDIA Device Plugin config
+ including shared config between plugin and GFD
+ type: string
+ type: object
+ enabled:
+ description: Enabled indicates if deployment of NVIDIA Device
+ Plugin through operator is enabled
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: NVIDIA Device Plugin image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ mps:
+ description: 'Optional: MPS related configuration for the NVIDIA
+ Device Plugin'
+ properties:
+ root:
+ default: /run/nvidia/mps
+ description: Root defines the MPS root path on the host
+ type: string
+ type: object
+ repository:
+ description: NVIDIA Device Plugin image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: NVIDIA Device Plugin image tag
+ type: string
+ type: object
+ driver:
+ description: Driver component spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ certConfig:
+ description: 'Optional: Custom certificates configuration for
+ NVIDIA Driver container'
+ properties:
+ name:
+ type: string
+ type: object
+ enabled:
+ description: Enabled indicates if deployment of NVIDIA Driver
+ through operator is enabled
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: NVIDIA Driver image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ kernelModuleConfig:
+ description: 'Optional: Kernel module configuration parameters
+ for the NVIDIA Driver'
+ properties:
+ name:
+ type: string
+ type: object
+ licensingConfig:
+ description: 'Optional: Licensing configuration for NVIDIA vGPU
+ licensing'
+ properties:
+ configMapName:
+ type: string
+ nlsEnabled:
+ description: NLSEnabled indicates if NVIDIA Licensing System
+ is used for licensing.
+ type: boolean
+ type: object
+ livenessProbe:
+ description: NVIDIA Driver container liveness probe settings
+ properties:
+ failureThreshold:
+ description: |-
+ Minimum consecutive failures for the probe to be considered failed after having succeeded.
+ Defaults to 3. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ initialDelaySeconds:
+ description: |-
+ Number of seconds after the container has started before liveness probes are initiated.
+ More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+ format: int32
+ type: integer
+ periodSeconds:
+ description: |-
+ How often (in seconds) to perform the probe.
+ Default to 10 seconds. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ successThreshold:
+ description: |-
+ Minimum consecutive successes for the probe to be considered successful after having failed.
+ Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ timeoutSeconds:
+ description: |-
+ Number of seconds after which the probe times out.
+ Defaults to 1 second. Minimum value is 1.
+ More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+ format: int32
+ minimum: 1
+ type: integer
+ type: object
+ manager:
+ description: Manager represents configuration for NVIDIA Driver
+ Manager initContainer
+ properties:
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: Image represents NVIDIA Driver Manager image
+ name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: Repository represents Driver Managerrepository
+ path
+ type: string
+ version:
+ description: Version represents NVIDIA Driver Manager image
+ tag(version)
+ type: string
+ type: object
+ rdma:
+ description: GPUDirectRDMASpec defines the properties for nvidia-peermem
+ deployment
+ properties:
+ enabled:
+ description: Enabled indicates if GPUDirect RDMA is enabled
+ through GPU operator
+ type: boolean
+ useHostMofed:
+ description: UseHostMOFED indicates to use MOFED drivers directly
+ installed on the host to enable GPUDirect RDMA
+ type: boolean
+ type: object
+ readinessProbe:
+ description: NVIDIA Driver container readiness probe settings
+ properties:
+ failureThreshold:
+ description: |-
+ Minimum consecutive failures for the probe to be considered failed after having succeeded.
+ Defaults to 3. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ initialDelaySeconds:
+ description: |-
+ Number of seconds after the container has started before liveness probes are initiated.
+ More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+ format: int32
+ type: integer
+ periodSeconds:
+ description: |-
+ How often (in seconds) to perform the probe.
+ Default to 10 seconds. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ successThreshold:
+ description: |-
+ Minimum consecutive successes for the probe to be considered successful after having failed.
+ Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ timeoutSeconds:
+ description: |-
+ Number of seconds after which the probe times out.
+ Defaults to 1 second. Minimum value is 1.
+ More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+ format: int32
+ minimum: 1
+ type: integer
+ type: object
+ repoConfig:
+ description: 'Optional: Custom repo configuration for NVIDIA Driver
+ container'
+ properties:
+ configMapName:
+ type: string
+ type: object
+ repository:
+ description: NVIDIA Driver image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ startupProbe:
+ description: NVIDIA Driver container startup probe settings
+ properties:
+ failureThreshold:
+ description: |-
+ Minimum consecutive failures for the probe to be considered failed after having succeeded.
+ Defaults to 3. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ initialDelaySeconds:
+ description: |-
+ Number of seconds after the container has started before liveness probes are initiated.
+ More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+ format: int32
+ type: integer
+ periodSeconds:
+ description: |-
+ How often (in seconds) to perform the probe.
+ Default to 10 seconds. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ successThreshold:
+ description: |-
+ Minimum consecutive successes for the probe to be considered successful after having failed.
+ Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ timeoutSeconds:
+ description: |-
+ Number of seconds after which the probe times out.
+ Defaults to 1 second. Minimum value is 1.
+ More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+ format: int32
+ minimum: 1
+ type: integer
+ type: object
+ upgradePolicy:
+ description: Driver auto-upgrade settings
+ properties:
+ autoUpgrade:
+ default: false
+ description: |-
+ AutoUpgrade is a global switch for automatic upgrade feature
+ if set to false all other options are ignored
+ type: boolean
+ drain:
+ description: DrainSpec describes configuration for node drain
+ during automatic upgrade
+ properties:
+ deleteEmptyDir:
+ default: false
+ description: |-
+ DeleteEmptyDir indicates if should continue even if there are pods using emptyDir
+ (local data that will be deleted when the node is drained)
+ type: boolean
+ enable:
+ default: false
+ description: Enable indicates if node draining is allowed
+ during upgrade
+ type: boolean
+ force:
+ default: false
+ description: Force indicates if force draining is allowed
+ type: boolean
+ podSelector:
+ description: |-
+ PodSelector specifies a label selector to filter pods on the node that need to be drained
+ For more details on label selectors, see:
+ https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#label-selectors
+ type: string
+ timeoutSeconds:
+ default: 300
+ description: TimeoutSecond specifies the length of time
+ in seconds to wait before giving up drain, zero means
+ infinite
+ minimum: 0
+ type: integer
+ type: object
+ maxParallelUpgrades:
+ default: 1
+ description: |-
+ MaxParallelUpgrades indicates how many nodes can be upgraded in parallel
+ 0 means no limit, all nodes will be upgraded in parallel
+ minimum: 0
+ type: integer
+ maxUnavailable:
+ anyOf:
+ - type: integer
+ - type: string
+ default: 25%
+ description: |-
+ MaxUnavailable is the maximum number of nodes with the driver installed, that can be unavailable during the upgrade.
+ Value can be an absolute number (ex: 5) or a percentage of total nodes at the start of upgrade (ex: 10%).
+ Absolute number is calculated from percentage by rounding up.
+ By default, a fixed value of 25% is used.
+ x-kubernetes-int-or-string: true
+ podDeletion:
+ description: PodDeletionSpec describes configuration for deletion
+ of pods using special resources during automatic upgrade
+ properties:
+ deleteEmptyDir:
+ default: false
+ description: |-
+ DeleteEmptyDir indicates if should continue even if there are pods using emptyDir
+ (local data that will be deleted when the pod is deleted)
+ type: boolean
+ force:
+ default: false
+ description: Force indicates if force deletion is allowed
+ type: boolean
+ timeoutSeconds:
+ default: 300
+ description: |-
+ TimeoutSecond specifies the length of time in seconds to wait before giving up on pod termination, zero means
+ infinite
+ minimum: 0
+ type: integer
+ type: object
+ waitForCompletion:
+ description: WaitForCompletionSpec describes the configuration
+ for waiting on job completions
+ properties:
+ podSelector:
+ description: |-
+ PodSelector specifies a label selector for the pods to wait for completion
+ For more details on label selectors, see:
+ https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#label-selectors
+ type: string
+ timeoutSeconds:
+ default: 0
+ description: |-
+ TimeoutSecond specifies the length of time in seconds to wait before giving up on pod termination, zero means
+ infinite
+ minimum: 0
+ type: integer
+ type: object
+ type: object
+ useNvidiaDriverCRD:
+ description: UseNvidiaDriverCRD indicates if the deployment of
+ NVIDIA Driver is managed by the NVIDIADriver CRD type
+ type: boolean
+ useOpenKernelModules:
+ description: UseOpenKernelModules indicates if the open GPU kernel
+ modules should be used
+ type: boolean
+ usePrecompiled:
+ description: UsePrecompiled indicates if deployment of NVIDIA
+ Driver using pre-compiled modules is enabled
+ type: boolean
+ version:
+ description: NVIDIA Driver image tag
+ type: string
+ virtualTopology:
+ description: 'Optional: Virtual Topology Daemon configuration
+ for NVIDIA vGPU drivers'
+ properties:
+ config:
+ description: 'Optional: Config name representing virtual topology
+ daemon configuration file nvidia-topologyd.conf'
+ type: string
+ type: object
+ type: object
+ gdrcopy:
+ description: GDRCopy component spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ enabled:
+ description: Enabled indicates if GDRCopy is enabled through GPU
+ Operator
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: NVIDIA GDRCopy driver image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: NVIDIA GDRCopy driver image repository
+ type: string
+ version:
+ description: NVIDIA GDRCopy driver image tag
+ type: string
+ type: object
+ gds:
+ description: GPUDirectStorage defines the spec for GDS components(Experimental)
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ enabled:
+ description: Enabled indicates if GPUDirect Storage is enabled
+ through GPU operator
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: NVIDIA GPUDirect Storage Driver image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: NVIDIA GPUDirect Storage Driver image repository
+ type: string
+ version:
+ description: NVIDIA GPUDirect Storage Driver image tag
+ type: string
+ type: object
+ gfd:
+ description: GPUFeatureDiscovery spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ enabled:
+ description: Enabled indicates if deployment of GPU Feature Discovery
+ Plugin is enabled.
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: GFD image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: GFD image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: GFD image tag
+ type: string
+ type: object
+ kataManager:
+ description: KataManager component spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ config:
+ description: Kata Manager config
+ properties:
+ artifactsDir:
+ default: /opt/nvidia-gpu-operator/artifacts/runtimeclasses
+ description: |-
+ ArtifactsDir is the directory where kata artifacts (e.g. kernel / guest images, configuration, etc.)
+ are placed on the local filesystem.
+ type: string
+ runtimeClasses:
+ description: RuntimeClasses is a list of kata runtime classes
+ to configure.
+ items:
+ description: RuntimeClass defines the configuration for
+ a kata RuntimeClass
+ properties:
+ artifacts:
+ description: Artifacts are the kata artifacts associated
+ with the runtime class.
+ properties:
+ pullSecret:
+ description: PullSecret is the secret used to pull
+ the OCI artifact.
+ type: string
+ url:
+ description: |-
+ URL is the path to the OCI artifact (payload) containing all artifacts
+ associated with a kata runtime class.
+ type: string
+ required:
+ - url
+ type: object
+ name:
+ description: Name is the name of the kata runtime class.
+ type: string
+ nodeSelector:
+ additionalProperties:
+ type: string
+ description: |-
+ NodeSelector specifies the nodeSelector for the RuntimeClass object.
+ This ensures pods running with the RuntimeClass only get scheduled
+ onto nodes which support it.
+ type: object
+ required:
+ - artifacts
+ - name
+ type: object
+ type: array
+ type: object
+ enabled:
+ description: Enabled indicates if deployment of Kata Manager is
+ enabled
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: Kata Manager image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: Kata Manager image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: Kata Manager image tag
+ type: string
+ type: object
+ mig:
+ description: MIG spec
+ properties:
+ strategy:
+ description: 'Optional: MIGStrategy to apply for GFD and NVIDIA
+ Device Plugin'
+ enum:
+ - none
+ - single
+ - mixed
+ type: string
+ type: object
+ migManager:
+ description: MIGManager for configuration to deploy MIG Manager
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ config:
+ description: 'Optional: Custom mig-parted configuration for NVIDIA
+ MIG Manager container'
+ properties:
+ default:
+ default: all-disabled
+ description: Default MIG config to be applied on the node,
+ when there is no config specified with the node label nvidia.com/mig.config
+ enum:
+ - all-disabled
+ - ""
+ type: string
+ name:
+ default: default-mig-parted-config
+ description: ConfigMap name
+ type: string
+ type: object
+ enabled:
+ description: Enabled indicates if deployment of NVIDIA MIG Manager
+ is enabled
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ gpuClientsConfig:
+ description: 'Optional: Custom gpu-clients configuration for NVIDIA
+ MIG Manager container'
+ properties:
+ name:
+ description: ConfigMap name
+ type: string
+ type: object
+ image:
+ description: NVIDIA MIG Manager image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: NVIDIA MIG Manager image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: NVIDIA MIG Manager image tag
+ type: string
+ type: object
+ nodeStatusExporter:
+ description: NodeStatusExporter spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ enabled:
+ description: Enabled indicates if deployment of Node Status Exporter
+ is enabled.
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: Node Status Exporter image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: Node Status Exporterimage repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: Node Status Exporterimage tag
+ type: string
+ type: object
+ operator:
+ description: Operator component spec
+ properties:
+ annotations:
+ additionalProperties:
+ type: string
+ description: |-
+ Optional: Annotations is an unstructured key value map stored with a resource that may be
+ set by external tools to store and retrieve arbitrary metadata. They are not
+ queryable and should be preserved when modifying objects.
+ type: object
+ defaultRuntime:
+ default: docker
+ description: Runtime defines container runtime type
+ enum:
+ - docker
+ - crio
+ - containerd
+ type: string
+ initContainer:
+ description: InitContainerSpec describes configuration for initContainer
+ image used with all components
+ properties:
+ image:
+ description: Image represents image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: Repository represents image repository path
+ type: string
+ version:
+ description: Version represents image tag(version)
+ type: string
+ type: object
+ labels:
+ additionalProperties:
+ type: string
+ description: |-
+ Optional: Map of string keys and values that can be used to organize and categorize
+ (scope and select) objects. May match selectors of replication controllers
+ and services.
+ type: object
+ runtimeClass:
+ default: nvidia
+ type: string
+ use_ocp_driver_toolkit:
+ description: UseOpenShiftDriverToolkit indicates if DriverToolkit
+ image should be used on OpenShift to build and install driver
+ modules
+ type: boolean
+ required:
+ - defaultRuntime
+ type: object
+ psa:
+ description: PSA defines spec for PodSecurityAdmission configuration
+ properties:
+ enabled:
+ description: Enabled indicates if PodSecurityAdmission configuration
+ needs to be enabled for all Pods
+ type: boolean
+ type: object
+ psp:
+ description: |-
+ Deprecated: Pod Security Policies are no longer supported. Please use PodSecurityAdmission instead
+ PSP defines spec for handling PodSecurityPolicies
+ properties:
+ enabled:
+ description: Enabled indicates if PodSecurityPolicies needs to
+ be enabled for all Pods
+ type: boolean
+ type: object
+ sandboxDevicePlugin:
+ description: SandboxDevicePlugin component spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ enabled:
+ description: Enabled indicates if deployment of NVIDIA Sandbox
+ Device Plugin through operator is enabled
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: NVIDIA Sandbox Device Plugin image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: NVIDIA Sandbox Device Plugin image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: NVIDIA Sandbox Device Plugin image tag
+ type: string
+ type: object
+ sandboxWorkloads:
+ description: SandboxWorkloads defines the spec for handling sandbox
+ workloads (i.e. Virtual Machines)
+ properties:
+ defaultWorkload:
+ default: container
+ description: |-
+ DefaultWorkload indicates the default GPU workload type to configure
+ worker nodes in the cluster for
+ enum:
+ - container
+ - vm-passthrough
+ - vm-vgpu
+ type: string
+ enabled:
+ description: |-
+ Enabled indicates if the GPU Operator should manage additional operands required
+ for sandbox workloads (i.e. VFIO Manager, vGPU Manager, and additional device plugins)
+ type: boolean
+ type: object
+ toolkit:
+ description: Toolkit component spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ enabled:
+ description: Enabled indicates if deployment of NVIDIA Container
+ Toolkit through operator is enabled
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: NVIDIA Container Toolkit image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ installDir:
+ default: /usr/local/nvidia
+ description: Toolkit install directory on the host
+ type: string
+ repository:
+ description: NVIDIA Container Toolkit image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: NVIDIA Container Toolkit image tag
+ type: string
+ type: object
+ validator:
+ description: Validator defines the spec for operator-validator daemonset
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ cuda:
+ description: CUDA validator spec
+ properties:
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ type: object
+ driver:
+ description: Toolkit validator spec
+ properties:
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ type: object
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: Validator image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ plugin:
+ description: Plugin validator spec
+ properties:
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ type: object
+ repository:
+ description: Validator image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ toolkit:
+ description: Toolkit validator spec
+ properties:
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ type: object
+ version:
+ description: Validator image tag
+ type: string
+ vfioPCI:
+ description: VfioPCI validator spec
+ properties:
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ type: object
+ vgpuDevices:
+ description: VGPUDevices validator spec
+ properties:
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ type: object
+ vgpuManager:
+ description: VGPUManager validator spec
+ properties:
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ type: object
+ type: object
+ vfioManager:
+ description: VFIOManager for configuration to deploy VFIO-PCI Manager
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ driverManager:
+ description: DriverManager represents configuration for NVIDIA
+ Driver Manager
+ properties:
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: Image represents NVIDIA Driver Manager image
+ name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: Repository represents Driver Managerrepository
+ path
+ type: string
+ version:
+ description: Version represents NVIDIA Driver Manager image
+ tag(version)
+ type: string
+ type: object
+ enabled:
+ description: Enabled indicates if deployment of VFIO Manager is
+ enabled
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: VFIO Manager image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: VFIO Manager image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: VFIO Manager image tag
+ type: string
+ type: object
+ vgpuDeviceManager:
+ description: VGPUDeviceManager spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ config:
+ description: NVIDIA vGPU devices configuration for NVIDIA vGPU
+ Device Manager container
+ properties:
+ default:
+ default: default
+ description: Default config name within the ConfigMap
+ type: string
+ name:
+ description: ConfigMap name
+ type: string
+ type: object
+ enabled:
+ description: Enabled indicates if deployment of NVIDIA vGPU Device
+ Manager is enabled
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: NVIDIA vGPU Device Manager image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: NVIDIA vGPU Device Manager image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: NVIDIA vGPU Device Manager image tag
+ type: string
+ type: object
+ vgpuManager:
+ description: VGPUManager component spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ driverManager:
+ description: DriverManager represents configuration for NVIDIA
+ Driver Manager initContainer
+ properties:
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: Image represents NVIDIA Driver Manager image
+ name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: Repository represents Driver Managerrepository
+ path
+ type: string
+ version:
+ description: Version represents NVIDIA Driver Manager image
+ tag(version)
+ type: string
+ type: object
+ enabled:
+ description: Enabled indicates if deployment of NVIDIA vGPU Manager
+ through operator is enabled
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: NVIDIA vGPU Manager image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: NVIDIA vGPU Manager image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: NVIDIA vGPU Manager image tag
+ type: string
+ type: object
+ required:
+ - daemonsets
+ - dcgm
+ - dcgmExporter
+ - devicePlugin
+ - driver
+ - gfd
+ - nodeStatusExporter
+ - operator
+ - toolkit
+ type: object
+ status:
+ description: ClusterPolicyStatus defines the observed state of ClusterPolicy
+ properties:
+ conditions:
+ description: Conditions is a list of conditions representing the ClusterPolicy's
+ current state.
+ items:
+ description: "Condition contains details for one aspect of the current
+ state of this API Resource.\n---\nThis struct is intended for
+ direct use as an array at the field path .status.conditions. For
+ example,\n\n\n\ttype FooStatus struct{\n\t // Represents the
+ observations of a foo's current state.\n\t // Known .status.conditions.type
+ are: \"Available\", \"Progressing\", and \"Degraded\"\n\t //
+ +patchMergeKey=type\n\t // +patchStrategy=merge\n\t // +listType=map\n\t
+ \ // +listMapKey=type\n\t Conditions []metav1.Condition `json:\"conditions,omitempty\"
+ patchStrategy:\"merge\" patchMergeKey:\"type\" protobuf:\"bytes,1,rep,name=conditions\"`\n\n\n\t
+ \ // other fields\n\t}"
+ properties:
+ lastTransitionTime:
+ description: |-
+ lastTransitionTime is the last time the condition transitioned from one status to another.
+ This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable.
+ format: date-time
+ type: string
+ message:
+ description: |-
+ message is a human readable message indicating details about the transition.
+ This may be an empty string.
+ maxLength: 32768
+ type: string
+ observedGeneration:
+ description: |-
+ observedGeneration represents the .metadata.generation that the condition was set based upon.
+ For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
+ with respect to the current state of the instance.
+ format: int64
+ minimum: 0
+ type: integer
+ reason:
+ description: |-
+ reason contains a programmatic identifier indicating the reason for the condition's last transition.
+ Producers of specific condition types may define expected values and meanings for this field,
+ and whether the values are considered a guaranteed API.
+ The value should be a CamelCase string.
+ This field may not be empty.
+ maxLength: 1024
+ minLength: 1
+ pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
+ type: string
+ status:
+ description: status of the condition, one of True, False, Unknown.
+ enum:
+ - "True"
+ - "False"
+ - Unknown
+ type: string
+ type:
+ description: |-
+ type of condition in CamelCase or in foo.example.com/CamelCase.
+ ---
+ Many .condition.type values are consistent across resources like Available, but because arbitrary conditions can be
+ useful (see .node.status.conditions), the ability to deconflict is important.
+ The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt)
+ maxLength: 316
+ pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
+ type: string
+ required:
+ - lastTransitionTime
+ - message
+ - reason
+ - status
+ - type
+ type: object
+ type: array
+ namespace:
+ description: Namespace indicates a namespace in which the operator
+ is installed
+ type: string
+ state:
+ description: State indicates status of ClusterPolicy
+ enum:
+ - ignored
+ - ready
+ - notReady
+ type: string
+ required:
+ - state
+ type: object
+ type: object
+ served: true
+ storage: true
+ subresources:
+ status: {}
diff --git a/bundle/v24.3.0/manifests/nvidia.com_nvidiadrivers.yaml b/bundle/v24.3.0/manifests/nvidia.com_nvidiadrivers.yaml
new file mode 100644
index 000000000..317972fd2
--- /dev/null
+++ b/bundle/v24.3.0/manifests/nvidia.com_nvidiadrivers.yaml
@@ -0,0 +1,800 @@
+---
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+ annotations:
+ controller-gen.kubebuilder.io/version: v0.14.0
+ name: nvidiadrivers.nvidia.com
+spec:
+ group: nvidia.com
+ names:
+ kind: NVIDIADriver
+ listKind: NVIDIADriverList
+ plural: nvidiadrivers
+ shortNames:
+ - nvd
+ - nvdriver
+ - nvdrivers
+ singular: nvidiadriver
+ scope: Cluster
+ versions:
+ - additionalPrinterColumns:
+ - jsonPath: .status.state
+ name: Status
+ type: string
+ - jsonPath: .metadata.creationTimestamp
+ name: Age
+ type: string
+ name: v1alpha1
+ schema:
+ openAPIV3Schema:
+ description: NVIDIADriver is the Schema for the nvidiadrivers API
+ properties:
+ apiVersion:
+ description: |-
+ APIVersion defines the versioned schema of this representation of an object.
+ Servers should convert recognized schemas to the latest internal value, and
+ may reject unrecognized values.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+ type: string
+ kind:
+ description: |-
+ Kind is a string value representing the REST resource this object represents.
+ Servers may infer this from the endpoint the client submits requests to.
+ Cannot be updated.
+ In CamelCase.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+ type: string
+ metadata:
+ type: object
+ spec:
+ description: NVIDIADriverSpec defines the desired state of NVIDIADriver
+ properties:
+ annotations:
+ additionalProperties:
+ type: string
+ description: |-
+ Optional: Annotations is an unstructured key value map stored with a resource that may be
+ set by external tools to store and retrieve arbitrary metadata. They are not
+ queryable and should be preserved when modifying objects.
+ type: object
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ certConfig:
+ description: 'Optional: Custom certificates configuration for NVIDIA
+ Driver container'
+ properties:
+ name:
+ type: string
+ type: object
+ driverType:
+ default: gpu
+ description: DriverType defines NVIDIA driver type
+ enum:
+ - gpu
+ - vgpu
+ - vgpu-host-manager
+ type: string
+ x-kubernetes-validations:
+ - message: driverType is an immutable field. Please create a new NvidiaDriver
+ resource instead when you want to change this setting.
+ rule: self == oldSelf
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present in
+ a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ gdrcopy:
+ description: GDRCopy defines the spec for GDRCopy driver
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ enabled:
+ description: Enabled indicates if GDRCopy is enabled through GPU
+ operator
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: GDRCopy driver image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: GDRCopy diver image repository
+ type: string
+ version:
+ description: GDRCopy driver image tag
+ type: string
+ type: object
+ gds:
+ description: GPUDirectStorage defines the spec for GDS driver
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ enabled:
+ description: Enabled indicates if GPUDirect Storage is enabled
+ through GPU operator
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: NVIDIA GPUDirect Storage Driver image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: NVIDIA GPUDirect Storage Driver image repository
+ type: string
+ version:
+ description: NVIDIA GPUDirect Storage Driver image tag
+ type: string
+ type: object
+ image:
+ default: nvcr.io/nvidia/driver
+ description: NVIDIA Driver container image name
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ kernelModuleConfig:
+ description: 'Optional: Kernel module configuration parameters for
+ the NVIDIA Driver'
+ properties:
+ name:
+ type: string
+ type: object
+ labels:
+ additionalProperties:
+ type: string
+ description: |-
+ Optional: Map of string keys and values that can be used to organize and categorize
+ (scope and select) objects. May match selectors of replication controllers
+ and services.
+ type: object
+ licensingConfig:
+ description: 'Optional: Licensing configuration for NVIDIA vGPU licensing'
+ properties:
+ name:
+ type: string
+ nlsEnabled:
+ description: NLSEnabled indicates if NVIDIA Licensing System is
+ used for licensing.
+ type: boolean
+ type: object
+ livenessProbe:
+ description: NVIDIA Driver container liveness probe settings
+ properties:
+ failureThreshold:
+ description: |-
+ Minimum consecutive failures for the probe to be considered failed after having succeeded.
+ Defaults to 3. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ initialDelaySeconds:
+ description: |-
+ Number of seconds after the container has started before liveness probes are initiated.
+ More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+ format: int32
+ type: integer
+ periodSeconds:
+ description: |-
+ How often (in seconds) to perform the probe.
+ Default to 10 seconds. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ successThreshold:
+ description: |-
+ Minimum consecutive successes for the probe to be considered successful after having failed.
+ Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ timeoutSeconds:
+ description: |-
+ Number of seconds after which the probe times out.
+ Defaults to 1 second. Minimum value is 1.
+ More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+ format: int32
+ minimum: 1
+ type: integer
+ type: object
+ manager:
+ description: Manager represents configuration for NVIDIA Driver Manager
+ initContainer
+ properties:
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: Image represents NVIDIA Driver Manager image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: Repository represents Driver Managerrepository path
+ type: string
+ version:
+ description: Version represents NVIDIA Driver Manager image tag(version)
+ type: string
+ type: object
+ nodeAffinity:
+ description: Affinity specifies node affinity rules for driver pods
+ properties:
+ preferredDuringSchedulingIgnoredDuringExecution:
+ description: |-
+ The scheduler will prefer to schedule pods to nodes that satisfy
+ the affinity expressions specified by this field, but it may choose
+ a node that violates one or more of the expressions. The node that is
+ most preferred is the one with the greatest sum of weights, i.e.
+ for each node that meets all of the scheduling requirements (resource
+ request, requiredDuringScheduling affinity expressions, etc.),
+ compute a sum by iterating through the elements of this field and adding
+ "weight" to the sum if the node matches the corresponding matchExpressions; the
+ node(s) with the highest sum are the most preferred.
+ items:
+ description: |-
+ An empty preferred scheduling term matches all objects with implicit weight 0
+ (i.e. it's a no-op). A null preferred scheduling term matches no objects (i.e. is also a no-op).
+ properties:
+ preference:
+ description: A node selector term, associated with the corresponding
+ weight.
+ properties:
+ matchExpressions:
+ description: A list of node selector requirements by
+ node's labels.
+ items:
+ description: |-
+ A node selector requirement is a selector that contains values, a key, and an operator
+ that relates the key and values.
+ properties:
+ key:
+ description: The label key that the selector applies
+ to.
+ type: string
+ operator:
+ description: |-
+ Represents a key's relationship to a set of values.
+ Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt.
+ type: string
+ values:
+ description: |-
+ An array of string values. If the operator is In or NotIn,
+ the values array must be non-empty. If the operator is Exists or DoesNotExist,
+ the values array must be empty. If the operator is Gt or Lt, the values
+ array must have a single element, which will be interpreted as an integer.
+ This array is replaced during a strategic merge patch.
+ items:
+ type: string
+ type: array
+ required:
+ - key
+ - operator
+ type: object
+ type: array
+ matchFields:
+ description: A list of node selector requirements by
+ node's fields.
+ items:
+ description: |-
+ A node selector requirement is a selector that contains values, a key, and an operator
+ that relates the key and values.
+ properties:
+ key:
+ description: The label key that the selector applies
+ to.
+ type: string
+ operator:
+ description: |-
+ Represents a key's relationship to a set of values.
+ Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt.
+ type: string
+ values:
+ description: |-
+ An array of string values. If the operator is In or NotIn,
+ the values array must be non-empty. If the operator is Exists or DoesNotExist,
+ the values array must be empty. If the operator is Gt or Lt, the values
+ array must have a single element, which will be interpreted as an integer.
+ This array is replaced during a strategic merge patch.
+ items:
+ type: string
+ type: array
+ required:
+ - key
+ - operator
+ type: object
+ type: array
+ type: object
+ x-kubernetes-map-type: atomic
+ weight:
+ description: Weight associated with matching the corresponding
+ nodeSelectorTerm, in the range 1-100.
+ format: int32
+ type: integer
+ required:
+ - preference
+ - weight
+ type: object
+ type: array
+ requiredDuringSchedulingIgnoredDuringExecution:
+ description: |-
+ If the affinity requirements specified by this field are not met at
+ scheduling time, the pod will not be scheduled onto the node.
+ If the affinity requirements specified by this field cease to be met
+ at some point during pod execution (e.g. due to an update), the system
+ may or may not try to eventually evict the pod from its node.
+ properties:
+ nodeSelectorTerms:
+ description: Required. A list of node selector terms. The
+ terms are ORed.
+ items:
+ description: |-
+ A null or empty node selector term matches no objects. The requirements of
+ them are ANDed.
+ The TopologySelectorTerm type implements a subset of the NodeSelectorTerm.
+ properties:
+ matchExpressions:
+ description: A list of node selector requirements by
+ node's labels.
+ items:
+ description: |-
+ A node selector requirement is a selector that contains values, a key, and an operator
+ that relates the key and values.
+ properties:
+ key:
+ description: The label key that the selector applies
+ to.
+ type: string
+ operator:
+ description: |-
+ Represents a key's relationship to a set of values.
+ Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt.
+ type: string
+ values:
+ description: |-
+ An array of string values. If the operator is In or NotIn,
+ the values array must be non-empty. If the operator is Exists or DoesNotExist,
+ the values array must be empty. If the operator is Gt or Lt, the values
+ array must have a single element, which will be interpreted as an integer.
+ This array is replaced during a strategic merge patch.
+ items:
+ type: string
+ type: array
+ required:
+ - key
+ - operator
+ type: object
+ type: array
+ matchFields:
+ description: A list of node selector requirements by
+ node's fields.
+ items:
+ description: |-
+ A node selector requirement is a selector that contains values, a key, and an operator
+ that relates the key and values.
+ properties:
+ key:
+ description: The label key that the selector applies
+ to.
+ type: string
+ operator:
+ description: |-
+ Represents a key's relationship to a set of values.
+ Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt.
+ type: string
+ values:
+ description: |-
+ An array of string values. If the operator is In or NotIn,
+ the values array must be non-empty. If the operator is Exists or DoesNotExist,
+ the values array must be empty. If the operator is Gt or Lt, the values
+ array must have a single element, which will be interpreted as an integer.
+ This array is replaced during a strategic merge patch.
+ items:
+ type: string
+ type: array
+ required:
+ - key
+ - operator
+ type: object
+ type: array
+ type: object
+ x-kubernetes-map-type: atomic
+ type: array
+ required:
+ - nodeSelectorTerms
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ nodeSelector:
+ additionalProperties:
+ type: string
+ description: NodeSelector specifies a selector for installation of
+ NVIDIA driver
+ type: object
+ priorityClassName:
+ description: 'Optional: Set priorityClassName'
+ type: string
+ rdma:
+ description: GPUDirectRDMA defines the spec for NVIDIA Peer Memory
+ driver
+ properties:
+ enabled:
+ description: Enabled indicates if GPUDirect RDMA is enabled through
+ GPU operator
+ type: boolean
+ useHostMofed:
+ description: UseHostMOFED indicates to use MOFED drivers directly
+ installed on the host to enable GPUDirect RDMA
+ type: boolean
+ type: object
+ readinessProbe:
+ description: NVIDIA Driver container readiness probe settings
+ properties:
+ failureThreshold:
+ description: |-
+ Minimum consecutive failures for the probe to be considered failed after having succeeded.
+ Defaults to 3. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ initialDelaySeconds:
+ description: |-
+ Number of seconds after the container has started before liveness probes are initiated.
+ More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+ format: int32
+ type: integer
+ periodSeconds:
+ description: |-
+ How often (in seconds) to perform the probe.
+ Default to 10 seconds. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ successThreshold:
+ description: |-
+ Minimum consecutive successes for the probe to be considered successful after having failed.
+ Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ timeoutSeconds:
+ description: |-
+ Number of seconds after which the probe times out.
+ Defaults to 1 second. Minimum value is 1.
+ More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+ format: int32
+ minimum: 1
+ type: integer
+ type: object
+ repoConfig:
+ description: 'Optional: Custom repo configuration for NVIDIA Driver
+ container'
+ properties:
+ name:
+ type: string
+ type: object
+ repository:
+ description: NVIDIA Driver repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for each
+ pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ startupProbe:
+ description: NVIDIA Driver container startup probe settings
+ properties:
+ failureThreshold:
+ description: |-
+ Minimum consecutive failures for the probe to be considered failed after having succeeded.
+ Defaults to 3. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ initialDelaySeconds:
+ description: |-
+ Number of seconds after the container has started before liveness probes are initiated.
+ More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+ format: int32
+ type: integer
+ periodSeconds:
+ description: |-
+ How often (in seconds) to perform the probe.
+ Default to 10 seconds. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ successThreshold:
+ description: |-
+ Minimum consecutive successes for the probe to be considered successful after having failed.
+ Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ timeoutSeconds:
+ description: |-
+ Number of seconds after which the probe times out.
+ Defaults to 1 second. Minimum value is 1.
+ More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+ format: int32
+ minimum: 1
+ type: integer
+ type: object
+ tolerations:
+ description: 'Optional: Set tolerations'
+ items:
+ description: |-
+ The pod this Toleration is attached to tolerates any taint that matches
+ the triple using the matching operator .
+ properties:
+ effect:
+ description: |-
+ Effect indicates the taint effect to match. Empty means match all taint effects.
+ When specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute.
+ type: string
+ key:
+ description: |-
+ Key is the taint key that the toleration applies to. Empty means match all taint keys.
+ If the key is empty, operator must be Exists; this combination means to match all values and all keys.
+ type: string
+ operator:
+ description: |-
+ Operator represents a key's relationship to the value.
+ Valid operators are Exists and Equal. Defaults to Equal.
+ Exists is equivalent to wildcard for value, so that a pod can
+ tolerate all taints of a particular category.
+ type: string
+ tolerationSeconds:
+ description: |-
+ TolerationSeconds represents the period of time the toleration (which must be
+ of effect NoExecute, otherwise this field is ignored) tolerates the taint. By default,
+ it is not set, which means tolerate the taint forever (do not evict). Zero and
+ negative values will be treated as 0 (evict immediately) by the system.
+ format: int64
+ type: integer
+ value:
+ description: |-
+ Value is the taint value the toleration matches to.
+ If the operator is Exists, the value should be empty, otherwise just a regular string.
+ type: string
+ type: object
+ type: array
+ useOpenKernelModules:
+ description: UseOpenKernelModules indicates if the open GPU kernel
+ modules should be used
+ type: boolean
+ usePrecompiled:
+ description: UsePrecompiled indicates if deployment of NVIDIA Driver
+ using pre-compiled modules is enabled
+ type: boolean
+ x-kubernetes-validations:
+ - message: usePrecompiled is an immutable field. Please create a new
+ NvidiaDriver resource instead when you want to change this setting.
+ rule: self == oldSelf
+ version:
+ description: NVIDIA Driver version (or just branch for precompiled
+ drivers)
+ type: string
+ virtualTopologyConfig:
+ description: 'Optional: Virtual Topology Daemon configuration for
+ NVIDIA vGPU drivers'
+ properties:
+ name:
+ description: 'Optional: Config name representing virtual topology
+ daemon configuration file nvidia-topologyd.conf'
+ type: string
+ type: object
+ required:
+ - driverType
+ - image
+ type: object
+ status:
+ description: NVIDIADriverStatus defines the observed state of NVIDIADriver
+ properties:
+ conditions:
+ description: Conditions is a list of conditions representing the NVIDIADriver's
+ current state.
+ items:
+ description: "Condition contains details for one aspect of the current
+ state of this API Resource.\n---\nThis struct is intended for
+ direct use as an array at the field path .status.conditions. For
+ example,\n\n\n\ttype FooStatus struct{\n\t // Represents the
+ observations of a foo's current state.\n\t // Known .status.conditions.type
+ are: \"Available\", \"Progressing\", and \"Degraded\"\n\t //
+ +patchMergeKey=type\n\t // +patchStrategy=merge\n\t // +listType=map\n\t
+ \ // +listMapKey=type\n\t Conditions []metav1.Condition `json:\"conditions,omitempty\"
+ patchStrategy:\"merge\" patchMergeKey:\"type\" protobuf:\"bytes,1,rep,name=conditions\"`\n\n\n\t
+ \ // other fields\n\t}"
+ properties:
+ lastTransitionTime:
+ description: |-
+ lastTransitionTime is the last time the condition transitioned from one status to another.
+ This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable.
+ format: date-time
+ type: string
+ message:
+ description: |-
+ message is a human readable message indicating details about the transition.
+ This may be an empty string.
+ maxLength: 32768
+ type: string
+ observedGeneration:
+ description: |-
+ observedGeneration represents the .metadata.generation that the condition was set based upon.
+ For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
+ with respect to the current state of the instance.
+ format: int64
+ minimum: 0
+ type: integer
+ reason:
+ description: |-
+ reason contains a programmatic identifier indicating the reason for the condition's last transition.
+ Producers of specific condition types may define expected values and meanings for this field,
+ and whether the values are considered a guaranteed API.
+ The value should be a CamelCase string.
+ This field may not be empty.
+ maxLength: 1024
+ minLength: 1
+ pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
+ type: string
+ status:
+ description: status of the condition, one of True, False, Unknown.
+ enum:
+ - "True"
+ - "False"
+ - Unknown
+ type: string
+ type:
+ description: |-
+ type of condition in CamelCase or in foo.example.com/CamelCase.
+ ---
+ Many .condition.type values are consistent across resources like Available, but because arbitrary conditions can be
+ useful (see .node.status.conditions), the ability to deconflict is important.
+ The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt)
+ maxLength: 316
+ pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
+ type: string
+ required:
+ - lastTransitionTime
+ - message
+ - reason
+ - status
+ - type
+ type: object
+ type: array
+ namespace:
+ description: Namespace indicates a namespace in which the operator
+ and driver are installed
+ type: string
+ state:
+ description: |-
+ INSERT ADDITIONAL STATUS FIELD - define observed state of cluster
+ Important: Run "make" to regenerate code after modifying this file
+ State indicates status of NVIDIADriver instance
+ enum:
+ - ignored
+ - ready
+ - notReady
+ type: string
+ required:
+ - state
+ type: object
+ type: object
+ served: true
+ storage: true
+ subresources:
+ status: {}
diff --git a/bundle/v24.3.0/metadata/annotations.yaml b/bundle/v24.3.0/metadata/annotations.yaml
new file mode 100644
index 000000000..d718b4016
--- /dev/null
+++ b/bundle/v24.3.0/metadata/annotations.yaml
@@ -0,0 +1,17 @@
+annotations:
+ operators.operatorframework.io.bundle.channels.v1: stable,v24.3
+ operators.operatorframework.io.bundle.channel.default.v1: v24.3
+ operators.operatorframework.io.bundle.manifests.v1: manifests/
+ operators.operatorframework.io.bundle.mediatype.v1: registry+v1
+ operators.operatorframework.io.bundle.metadata.v1: metadata/
+ operators.operatorframework.io.bundle.package.v1: gpu-operator-certified
+ operators.operatorframework.io.metrics.builder: operator-sdk-v1.4.0
+ operators.operatorframework.io.metrics.mediatype.v1: metrics+v1
+ operators.operatorframework.io.metrics.project_layout: go.kubebuilder.io/v3
+ operators.operatorframework.io.test.config.v1: tests/scorecard/
+ operators.operatorframework.io.test.mediatype.v1: scorecard+v1
+ operatorframework.io/cluster-monitoring: "true"
+ operatorframework.io/suggested-namespace: nvidia-gpu-operator
+
+ # Annotations to specify OCP versions compatibility.
+ com.redhat.openshift.versions: v4.9-v4.15
diff --git a/bundle/v24.6.0/manifests/gpu-operator-certified.clusterserviceversion.yaml b/bundle/v24.6.0/manifests/gpu-operator-certified.clusterserviceversion.yaml
new file mode 100644
index 000000000..7f6784a6f
--- /dev/null
+++ b/bundle/v24.6.0/manifests/gpu-operator-certified.clusterserviceversion.yaml
@@ -0,0 +1,921 @@
+apiVersion: operators.coreos.com/v1alpha1
+kind: ClusterServiceVersion
+metadata:
+ labels:
+ operatorframework.io/arch.arm64: supported
+ operatorframework.io/arch.amd64: supported
+ pod-security.kubernetes.io/enforce: privileged
+ pod-security.kubernetes.io/audit: privileged
+ pod-security.kubernetes.io/warn: privileged
+ annotations:
+ features.operators.openshift.io/disconnected: "true"
+ features.operators.openshift.io/fips-compliant: "false"
+ features.operators.openshift.io/proxy-aware: "true"
+ features.operators.openshift.io/tls-profiles: "false"
+ features.operators.openshift.io/token-auth-aws: "false"
+ features.operators.openshift.io/token-auth-azure: "false"
+ features.operators.openshift.io/token-auth-gcp: "false"
+ features.operators.openshift.io/cnf: "false"
+ features.operators.openshift.io/cni: "false"
+ features.operators.openshift.io/csi: "false"
+ olm.skipRange: '>=1.9.0 <24.6.0'
+ alm-examples: |-
+ [
+ {
+ "apiVersion": "nvidia.com/v1",
+ "kind": "ClusterPolicy",
+ "metadata": {
+ "name": "gpu-cluster-policy"
+ },
+ "spec": {
+ "operator": {
+ "defaultRuntime": "crio",
+ "use_ocp_driver_toolkit": true,
+ "initContainer": {
+ }
+ },
+ "sandboxWorkloads": {
+ "enabled": false,
+ "defaultWorkload": "container"
+ },
+ "driver": {
+ "enabled": true,
+ "useNvidiaDriverCRD": false,
+ "useOpenKernelModules": false,
+ "upgradePolicy": {
+ "autoUpgrade": true,
+ "drain": {
+ "deleteEmptyDir": false,
+ "enable": false,
+ "force": false,
+ "timeoutSeconds": 300
+ },
+ "maxParallelUpgrades": 1,
+ "maxUnavailable": "25%",
+ "podDeletion": {
+ "deleteEmptyDir": false,
+ "force": false,
+ "timeoutSeconds": 300
+ },
+ "waitForCompletion": {
+ "timeoutSeconds": 0
+ }
+ },
+ "repoConfig": {
+ "configMapName": ""
+ },
+ "certConfig": {
+ "name": ""
+ },
+ "licensingConfig": {
+ "nlsEnabled": true,
+ "configMapName": ""
+ },
+ "virtualTopology": {
+ "config": ""
+ },
+ "kernelModuleConfig": {
+ "name": ""
+ }
+ },
+ "dcgmExporter": {
+ "enabled": true,
+ "config": {
+ "name": ""
+ },
+ "serviceMonitor": {
+ "enabled": true
+ }
+ },
+ "dcgm": {
+ "enabled": true
+ },
+ "daemonsets": {
+ "updateStrategy": "RollingUpdate",
+ "rollingUpdate": {
+ "maxUnavailable": "1"
+ }
+ },
+ "devicePlugin": {
+ "enabled": true,
+ "config": {
+ "name": "",
+ "default": ""
+ },
+ "mps": {
+ "root": "/run/nvidia/mps"
+ }
+ },
+ "gfd": {
+ "enabled": true
+ },
+ "migManager": {
+ "enabled": true
+ },
+ "nodeStatusExporter": {
+ "enabled": true
+ },
+ "mig": {
+ "strategy": "single"
+ },
+ "toolkit": {
+ "enabled": true
+ },
+ "validator": {
+ "plugin": {
+ "env": [
+ {
+ "name": "WITH_WORKLOAD",
+ "value": "false"
+ }
+ ]
+ }
+ },
+ "vgpuManager": {
+ "enabled": false
+ },
+ "vgpuDeviceManager": {
+ "enabled": true
+ },
+ "sandboxDevicePlugin": {
+ "enabled": true
+ },
+ "vfioManager": {
+ "enabled": true
+ },
+ "gds": {
+ "enabled": false
+ },
+ "gdrcopy": {
+ "enabled": false
+ }
+ }
+ },
+ {
+ "apiVersion": "nvidia.com/v1alpha1",
+ "kind": "NVIDIADriver",
+ "metadata": {
+ "name": "gpu-driver"
+ },
+ "spec": {
+ "driverType": "gpu",
+ "repository": "nvcr.io/nvidia",
+ "image": "driver",
+ "version": "sha256:858de27c152669f5a3cf4287406405b16dd5bb70c0373324eb735511997bb415",
+ "nodeSelector": {},
+ "manager": {},
+ "repoConfig": {
+ "name": ""
+ },
+ "certConfig": {
+ "name": ""
+ },
+ "licensingConfig": {
+ "nlsEnabled": true,
+ "name": ""
+ },
+ "virtualTopologyConfig": {
+ "name": ""
+ },
+ "kernelModuleConfig": {
+ "name": ""
+ }
+ }
+ }
+ ]
+ operators.operatorframework.io/builder: operator-sdk-v1.4.0
+ operators.operatorframework.io/project_layout: go.kubebuilder.io/v3
+ operatorframework.io/suggested-namespace: nvidia-gpu-operator
+ capabilities: Deep Insights
+ categories: AI/Machine Learning, OpenShift Optional
+ certified: "true"
+ containerImage: nvcr.io/nvidia/gpu-operator@sha256:980454fdcc3084ffab1c8b8681819c3ca770738b96633a4497b3fc27e669fef9
+ createdAt: "Thu Jul 27 13:57:56 PDT 2023"
+ description: Automate the management and monitoring of NVIDIA GPUs.
+ provider: NVIDIA
+ repository: http://github.com/NVIDIA/gpu-operator
+ support: NVIDIA
+ name: gpu-operator-certified.v24.6.0
+ namespace: placeholder
+spec:
+ apiservicedefinitions: {}
+ relatedImages:
+ - name: gpu-operator-image
+ image: nvcr.io/nvidia/gpu-operator@sha256:980454fdcc3084ffab1c8b8681819c3ca770738b96633a4497b3fc27e669fef9
+ - name: dcgm-exporter-image
+ image: nvcr.io/nvidia/k8s/dcgm-exporter@sha256:21f4c8b88716e8e6f732f9fb4c2efaef937c227491a8631c5e55036f80f39a4d
+ - name: dcgm-image
+ image: nvcr.io/nvidia/cloud-native/dcgm@sha256:15dab1273345df4a5844c4c761d064dbc4b592101251dc39174e597137123027
+ - name: container-toolkit-image
+ image: nvcr.io/nvidia/k8s/container-toolkit@sha256:f95ef6a0c377e011bc0561c7d2c2bf32e45106fb0ba91ae9a10f97236ded0581
+ - name: driver-image
+ image: nvcr.io/nvidia/driver@sha256:858de27c152669f5a3cf4287406405b16dd5bb70c0373324eb735511997bb415
+ - name: driver-image-535
+ image: nvcr.io/nvidia/driver@sha256:a6d12fb5753f267dda25dfd38910f972bc632c006a24107fa50e20bba3642d7c
+ - name: driver-image-470
+ image: nvcr.io/nvidia/driver@sha256:07e11f85d54d49ec9648fb06e148b8d832ee1f9c3549a915eee853c9ef2949c2
+ - name: device-plugin-image
+ image: nvcr.io/nvidia/k8s-device-plugin@sha256:f85fd6e328e36d4737bf394bf8bd69bec793656af686ca0491f28730d9b96d1a
+ - name: gpu-feature-discovery-image
+ image: nvcr.io/nvidia/k8s-device-plugin@sha256:f85fd6e328e36d4737bf394bf8bd69bec793656af686ca0491f28730d9b96d1a
+ - name: mig-manager-image
+ image: nvcr.io/nvidia/cloud-native/k8s-mig-manager@sha256:781fb47e264d9e0fbc8da5bd046e5e678316c866bc36ddd4b56d4eb0de682d5b
+ - name: init-container-image
+ image: nvcr.io/nvidia/cuda@sha256:b0b6c9286f20432ba9becb711aff2d1c1bd56e47b33e6d1cab04aba926c067fe
+ - name: gpu-operator-validator-image
+ image: nvcr.io/nvidia/cloud-native/gpu-operator-validator@sha256:a4c9c6244f2a70b6c868ad4eb6b8eaf0a1fe9f91c9baefd8f58b0ad085dd715b
+ - name: k8s-driver-manager-image
+ image: nvcr.io/nvidia/cloud-native/k8s-driver-manager@sha256:740abc3ff657545c10effd5354f09af525200ed9a1b7623f0c2e8c7bd9e4a4e2
+ - name: vfio-manager-image
+ image: nvcr.io/nvidia/cuda@sha256:b0b6c9286f20432ba9becb711aff2d1c1bd56e47b33e6d1cab04aba926c067fe
+ - name: sandbox-device-plugin-image
+ image: nvcr.io/nvidia/kubevirt-gpu-device-plugin@sha256:969147c01d63be5d1fe458f32f1cc0c7408cf3062531db91408e2fc57b4d8a67
+ - name: vgpu-device-manager-image
+ image: nvcr.io/nvidia/cloud-native/vgpu-device-manager@sha256:ae63fac9a4057a7646f0cf0ee0566e8928529adde05c4c0a017cda0599e381b2
+ - name: gdrcopy-image
+ image: nvcr.io/nvidia/cloud-native/gdrdrv@sha256:33de74eb590f071403c17b6c210c02963245851971168bc0c07c06c100a9f376
+ customresourcedefinitions:
+ owned:
+ - name: nvidiadrivers.nvidia.com
+ kind: NVIDIADriver
+ version: v1alpha1
+ displayName: NVIDIADriver
+ description: NVIDIADriver allows you to deploy the NVIDIA driver
+ resources:
+ - kind: ServiceAccount
+ name: ''
+ version: v1
+ - kind: DaemonSet
+ name: ''
+ version: apps/v1
+ - kind: ConfigMap
+ name: ''
+ version: v1
+ - kind: Pod
+ name: ''
+ version: v1
+ - kind: clusterpolicies
+ name: ''
+ version: v1
+ - kind: clusterversions
+ name: ''
+ version: v1
+ - kind: nodes
+ name: ''
+ version: v1
+ - kind: status
+ name: ''
+ version: v1
+ specDescriptors:
+ - description: 'Optional: Set Node affinity'
+ displayName: Node affinity
+ path: affinity
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - 'urn:alm:descriptor:com.tectonic.ui:nodeAffinity'
+ - description: Node selector to control the selection of nodes (optional)
+ displayName: Node Selector
+ path: nodeSelector
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:selector:Node'
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - description: 'Optional: Set tolerations'
+ displayName: Tolerations
+ path: tolerations
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - 'urn:alm:descriptor:io.kubernetes:Tolerations'
+ - description: Image pull secrets
+ displayName: Image pull secrets
+ path: imagePullSecrets
+ x-descriptors:
+ - 'urn:alm:descriptor:io.kubernetes:Secret'
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - displayName: ImagePullPolicy
+ description: 'Image pull policy (default: IfNotPresent)'
+ path: imagePullPolicy
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:imagePullPolicy'
+ statusDescriptors:
+ - description: The current state of the driver.
+ displayName: State
+ path: state
+ x-descriptors:
+ - 'urn:alm:descriptor:text'
+ - name: clusterpolicies.nvidia.com
+ kind: ClusterPolicy
+ version: v1
+ group: nvidia.com
+ displayName: ClusterPolicy
+ description: ClusterPolicy allows you to configure the GPU Operator
+ resources:
+ - kind: ServiceAccount
+ name: ''
+ version: v1
+ - kind: Deployment
+ name: ''
+ version: apps/v1
+ - kind: DaemonSet
+ name: ''
+ version: apps/v1
+ - kind: ConfigMap
+ name: ''
+ version: v1
+ - kind: Pod
+ name: ''
+ version: v1
+ - kind: clusterpolicies
+ name: ''
+ version: v1
+ - kind: clusterversions
+ name: ''
+ version: v1
+ - kind: nodes
+ name: ''
+ version: v1
+ - kind: status
+ name: ''
+ version: v1
+ specDescriptors:
+ - description: GPU Operator config
+ displayName: GPU Operator config
+ path: operator
+ - displayName: ImagePullPolicy
+ description: 'Image pull policy (default: IfNotPresent)'
+ path: operator.validator.imagePullPolicy
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:imagePullPolicy'
+ - description: Image pull secrets
+ displayName: Image pull secrets
+ path: operator.validator.imagePullSecrets
+ x-descriptors:
+ - 'urn:alm:descriptor:io.kubernetes:Secret'
+ - description: NVIDIA GPU/vGPU Driver config
+ displayName: NVIDIA GPU/vGPU Driver config
+ path: driver
+ - description: 'Optional: Set Node affinity'
+ displayName: Node affinity
+ path: driver.affinity
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - 'urn:alm:descriptor:com.tectonic.ui:nodeAffinity'
+ - description: Node selector to control the selection of nodes (optional)
+ displayName: Node Selector
+ path: driver.nodeSelector
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:selector:Node'
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - description: 'Optional: Set tolerations'
+ displayName: Tolerations
+ path: driver.tolerations
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - 'urn:alm:descriptor:io.kubernetes:Tolerations'
+ - description: Image pull secrets
+ displayName: Image pull secrets
+ path: driver.imagePullSecrets
+ x-descriptors:
+ - 'urn:alm:descriptor:io.kubernetes:Secret'
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - displayName: ImagePullPolicy
+ description: 'Image pull policy (default: IfNotPresent)'
+ path: driver.imagePullPolicy
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:imagePullPolicy'
+ - description: NVIDIA DCGM Exporter config
+ displayName: NVIDIA DCGM Exporter config
+ path: dcgmExporter
+ - description: 'Optional: Set Node affinity'
+ displayName: Node affinity
+ path: dcgmExporter.affinity
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - 'urn:alm:descriptor:com.tectonic.ui:nodeAffinity'
+ - description: Node selector to control the selection of nodes (optional)
+ displayName: Node Selector
+ path: dcgmExporter.nodeSelector
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:selector:Node'
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - description: 'Optional: Set tolerations'
+ displayName: Tolerations
+ path: dcgmExporter.tolerations
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - 'urn:alm:descriptor:io.kubernetes:Tolerations'
+ - description: Image pull secrets
+ displayName: Image pull secrets
+ path: dcgmExporter.imagePullSecrets
+ x-descriptors:
+ - 'urn:alm:descriptor:io.kubernetes:Secret'
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - displayName: ImagePullPolicy
+ description: 'Image pull policy (default: IfNotPresent)'
+ path: dcgmExporter.imagePullPolicy
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:imagePullPolicy'
+ - description: NVIDIA Device Plugin config
+ displayName: NVIDIA Device Plugin config
+ path: devicePlugin
+ - description: 'Optional: Set Node affinity'
+ displayName: Node affinity
+ path: devicePlugin.affinity
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - 'urn:alm:descriptor:com.tectonic.ui:nodeAffinity'
+ - description: Node selector to control the selection of nodes (optional)
+ displayName: Node Selector
+ path: devicePlugin.nodeSelector
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:selector:Node'
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - description: 'Optional: Set tolerations'
+ displayName: Tolerations
+ path: devicePlugin.tolerations
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - 'urn:alm:descriptor:io.kubernetes:Tolerations'
+ - description: Image pull secrets
+ displayName: Image pull secrets
+ path: devicePlugin.imagePullSecrets
+ x-descriptors:
+ - 'urn:alm:descriptor:io.kubernetes:Secret'
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - displayName: ImagePullPolicy
+ description: 'Image pull policy (default: IfNotPresent)'
+ path: devicePlugin.imagePullPolicy
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:imagePullPolicy'
+ - description: GPU Feature Discovery Plugin config
+ displayName: GPU Feature Discovery Plugin config
+ path: gfd
+ - description: 'Optional: Set Node affinity'
+ displayName: Node affinity
+ path: gfd.affinity
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - 'urn:alm:descriptor:com.tectonic.ui:nodeAffinity'
+ - description: Node selector to control the selection of nodes (optional)
+ displayName: Node Selector
+ path: gfd.nodeSelector
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:selector:Node'
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - description: 'Optional: Set tolerations'
+ displayName: Tolerations
+ path: gfd.tolerations
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - 'urn:alm:descriptor:io.kubernetes:Tolerations'
+ - description: Image pull secrets
+ displayName: Image pull secrets
+ path: gfd.imagePullSecrets
+ x-descriptors:
+ - 'urn:alm:descriptor:io.kubernetes:Secret'
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - displayName: ImagePullPolicy
+ description: 'Image pull policy (default: IfNotPresent)'
+ path: gfd.imagePullPolicy
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:imagePullPolicy'
+ - description: NVIDIA Container Toolkit config
+ displayName: NVIDIA Container Toolkit config
+ path: toolkit
+ - description: 'Optional: Set Node affinity'
+ displayName: Node affinity
+ path: toolkit.affinity
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - 'urn:alm:descriptor:com.tectonic.ui:nodeAffinity'
+ - description: Node selector to control the selection of nodes (optional)
+ displayName: Node Selector
+ path: toolkit.nodeSelector
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:selector:Node'
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - description: 'Optional: Set tolerations'
+ displayName: Tolerations
+ path: toolkit.tolerations
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - 'urn:alm:descriptor:io.kubernetes:Tolerations'
+ - description: Image pull secrets
+ displayName: Image pull secrets
+ path: toolkit.imagePullSecrets
+ x-descriptors:
+ - 'urn:alm:descriptor:io.kubernetes:Secret'
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - displayName: ImagePullPolicy
+ description: 'Image pull policy (default: IfNotPresent)'
+ path: toolkit.imagePullPolicy
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:imagePullPolicy'
+ - displayName: NVIDIA DCGM config
+ description: NVIDIA DCGM config
+ path: dcgm
+ - displayName: Validator config
+ description: Validator config
+ path: validator
+ - displayName: Node Status Exporter config
+ description: Node Status Exporter config
+ path: nodeStatusExporter
+ - displayName: Daemonsets config
+ description: Daemonsets config
+ path: daemonsets
+ - displayName: MIG config
+ description: MIG config
+ path: mig
+ - displayName: NVIDIA MIG Manager config
+ description: NVIDIA MIG Manager config
+ path: migManager
+ - displayName: PodSecurityPolicy config
+ description: PodSecurityPolicy config
+ path: psp
+ - displayName: NVIDIA GPUDirect Storage config
+ description: NVIDIA GPUDirect Storage config
+ path: gds
+ - displayName: Sandbox Workloads config
+ description: Sandbox Workloads config
+ path: sandboxWorkloads
+ - displayName: NVIDIA vGPU Manager config
+ description: NVIDIA vGPU Manager config
+ path: vgpuManager
+ - displayName: NVIDIA vGPU Device Manager config
+ description: NVIDIA vGPU Device Manager config
+ path: vgpuDeviceManager
+ - displayName: VFIO Manager config
+ description: VFIO Manager config
+ path: vfioManager
+ - displayName: NVIDIA Sandbox Device Plugin config
+ description: NVIDIA Sandbox Device Plugin config
+ path: sandboxDevicePlugin
+ statusDescriptors:
+ - description: The current state of the operator.
+ displayName: State
+ path: state
+ x-descriptors:
+ - 'urn:alm:descriptor:text'
+ displayName: NVIDIA GPU Operator
+ description: >
+ Kubernetes provides access to special hardware resources such as NVIDIA
+ GPUs, NICs, Infiniband adapters and other devices through the [device plugin
+ framework](https://kubernetes.io/docs/concepts/extend-kubernetes/compute-storage-net/device-plugins/).
+ However, configuring and managing nodes with these hardware resources
+ requires configuration of multiple software components such as drivers,
+ container runtimes or other libraries which are difficult and prone to
+ errors.
+
+ The NVIDIA GPU Operator uses the [operator
+ framework](https://cloud.redhat.com/blog/introducing-the-operator-framework) within
+ Kubernetes to automate the management of all NVIDIA software components
+ needed to provision and monitor GPUs.
+ These components include the NVIDIA drivers (to enable CUDA), Kubernetes
+ device plugin for GPUs, the NVIDIA Container Runtime, automatic node
+ labelling and NVIDIA DCGM exporter.
+
+ Visit the official site of the [GPU Operator](https://github.com/NVIDIA/gpu-operator) for more information.
+ For getting started with using the GPU Operator with OpenShift, see the instructions
+ [here](https://docs.nvidia.com/datacenter/cloud-native/gpu-operator/openshift/contents.html).
+ icon:
+ - base64data: iVBORw0KGgoAAAANSUhEUgAAAEAAAABACAMAAACdt4HsAAAB2lBMVEUAAAD///8EBAN3uQACAgIAAAQJDQUCAgB1tgAHCQf+/v5Ufg5Hagxxqwt+xgJ3uAB9wwB4vQBRUVEeLA3e3t5nZ2coKCgODg4FBwZ9wwR6wAJ4vADz8/MbGxt5tw1vpw1/wgoOFwkLDwh9xQH5+fny8vLw8PDFxcWysrKFhYVvb282NjYyMjIqKioXFxdikxRYgxNCYxJQdhFqoQ9xrg16ugxyqgyAxQkEBQj7+/v29vbIyMhjY2NbW1tHR0cvLy8kJCQdHR0ZGRlKbxJ8uhFNcxFVgBAxSBBgkg93tQ50sA4qPg4XIg18vwsbKQsSGgsLCwsMEwqCyQeByQFztADPz8+/v7+6urqWlpZra2tKSkogICASEhJmmRE8XBA5VRA2UBBonA9biA9GaQ4sQg4jMw4mOQ0aJw2GzgsUHgttpAqJ0Ql/wQWG0AJ8vwF0uQCtra2jo6OQkJB9fX1VVVVCQkI9PT0iIiIUFBRSfBNgjhA7WRBGZw+GywmFzgaAyASBxQN2twDb29u2traenp6Kiop+fn53d3dzc3NyqRV4sxM/YBNAXRElNhBjlQ+IzA00TQ16vgxJbgp6vAl4tgJ3vgDs7Ozn5+fa2trS0tJCXRY6VBV6thSL1gf4nFdFAAAD80lEQVRYw+zSOXPaQBgG4He0LJJmbGRGDUIzuvgBQiAEPfcdwC33DTbUtmOwSyc+4iRucvzXRImLFJmRShc8xXbfu+9+szg4OHjjAsH/iFD49q7rqM6xc/wPtWyBhS8sC94ObWRCZDksh1+RzmcEfI0DoPrjylEkSTgViMs9udjYTwMG4Gf51Z1BM81ioRwit+QvgYsdUQZeKFr3ladyKXvVr+pAM5uKcmRLXFzoCIxn+0i/8lSaBMHnfi7qowfQuZnm3PuFPwGs13zD3NlViozY/z4YD6/TCQORbPr2q78GLB0ou5IO40pd5AxQZnJ83m2y9Ju2JYKfgEhWC18aEIfrZLURHwQC0B87ySZwHxX8BNDWB1KfQfyxT2TA24uPQMt8yTWA3obz8wQGlhTN06Z900MkuJLrYu3u5LkK9LTtGRF8NEDLeSnXYLUdHUFVlpPqTa4IamlhJZ464biY1w4CKGrROOW7uwLlV+Q02lanCF6cbSoPVLzUfPwDll5I9T6WyXWhZre1yjiI6VCSzCWY3+FKaAwGHngzpEygx6+V6Uzk6TJR7yhWxJ1bFgTPJ7gMc58aUCq+n+qNT6Pn8y/xOcCiZZVjnJ+AAPhEuj0SKZ9bL9ZpNS9SgM6z9p5w3jt43cMvecfWBhm7dtfEpfhYMDBYpFd7mDZIAxPCFKgBhB0hkWbE2wVMyqycfhOMEiebSzFz5IMTEjw7E87UFj4GVR7GXqaSkoIcISEc/I38/PwhOTUMRBrADgwK09zgYGUBqbwcARiQyp3Eyk6kC4BloqtbJTcaSHIHShALWFmBSRuCWBGC+AtDMAAGIpAAc9mBiB0sCLSXHUSygxSxEIoE7IKEgbhopKgogC96x04QCMMw/H0cG6f0cEmBHaLc7FFQzApoTLwtQgWUWo26glx2mzGkyoHM1PPMO/NrnSH8e2QAiRsZ8S3ZuJoW5Udg5moGoMRLN2gAnkcUctueJ1gADsdtlZ2AgmSYoaDZBXwRctcwy6HN3XX/wfnTnA7Q5x0S0Gku4wHpe7Ql8Mbtu4TqC3qcADGtUl4O3eK0AkZdKH1mU/a6MFQGA7pQGoAVoAuuPYZlLJF2BawVLLjwac6Q8wUax61/CpKQAT6ZX3hFqoqqAFvuf4AzM+NgsoBS/wcSOD7SFzyf6CE9UQK9II1MRvIJm8QSgsLiBZuypsAWKyARElgx5FcLv1N4nFLbB45Sh6+TzsQRtn7bz/B3fS9GQ12bgUE2PKycQbwgXD0SWLwVhpZFq4eHhWloOjLoqGvoRYRGAR2vp2EtpNUaTUpiRAizMAEhKNXpYZNnAUlBCSgFYTIxQTlMMJNGwSgYBdQHAFsKs+/bUkeyAAAAAElFTkSuQmCC
+ mediatype: image/png
+ install:
+ spec:
+ clusterPermissions:
+ - serviceAccountName: gpu-operator
+ rules:
+ - apiGroups:
+ - nvidia.com
+ resources:
+ - clusterpolicies
+ - clusterpolicies/finalizers
+ - clusterpolicies/status
+ - nvidiadrivers
+ - nvidiadrivers/finalizers
+ - nvidiadrivers/status
+ verbs:
+ - create
+ - delete
+ - get
+ - list
+ - patch
+ - update
+ - watch
+ - deletecollection
+ - apiGroups:
+ - config.openshift.io
+ resources:
+ - clusterversions
+ - proxies
+ verbs:
+ - get
+ - list
+ - watch
+ - apiGroups:
+ - security.openshift.io
+ resources:
+ - securitycontextconstraints
+ verbs:
+ - use
+ - create
+ - get
+ - list
+ - watch
+ - patch
+ - update
+ - delete
+ - apiGroups:
+ - security.openshift.io
+ resources:
+ - securitycontextconstraints
+ verbs:
+ - use
+ resourceNames:
+ - hostmount-anyuid
+ - apiGroups:
+ - image.openshift.io
+ resources:
+ - imagestreams
+ verbs:
+ - get
+ - list
+ - watch
+ - apiGroups:
+ - monitoring.coreos.com
+ resources:
+ - servicemonitors
+ - prometheusrules
+ verbs:
+ - get
+ - list
+ - create
+ - watch
+ - update
+ - delete
+ - apiGroups:
+ - rbac.authorization.k8s.io
+ resources:
+ - clusterroles
+ - clusterrolebindings
+ verbs:
+ - create
+ - get
+ - list
+ - watch
+ - update
+ - patch
+ - delete
+ - apiGroups:
+ - ""
+ resources:
+ - pods
+ - pods/eviction
+ - services
+ - services/finalizers
+ - events
+ verbs:
+ - create
+ - delete
+ - get
+ - list
+ - patch
+ - update
+ - watch
+ - apiGroups:
+ - ""
+ resources:
+ - namespaces
+ verbs:
+ - get
+ - list
+ - create
+ - watch
+ - update
+ - patch
+ - apiGroups:
+ - ""
+ resources:
+ - nodes
+ verbs:
+ - get
+ - list
+ - watch
+ - update
+ - patch
+ - apiGroups:
+ - apps
+ resources:
+ - daemonsets
+ verbs:
+ - get
+ - list
+ - watch
+ - apiGroups:
+ - apps
+ resources:
+ - controllerrevisions
+ verbs:
+ - get
+ - list
+ - watch
+ - apiGroups:
+ - node.k8s.io
+ resources:
+ - runtimeclasses
+ verbs:
+ - get
+ - list
+ - create
+ - update
+ - watch
+ - delete
+ - apiGroups:
+ - coordination.k8s.io
+ resources:
+ - leases
+ verbs:
+ - create
+ - get
+ - list
+ - watch
+ - update
+ - patch
+ - delete
+ - apiGroups:
+ - apiextensions.k8s.io
+ resources:
+ - customresourcedefinitions
+ verbs:
+ - create
+ - get
+ - list
+ - watch
+ - update
+ - patch
+ - delete
+ permissions:
+ - serviceAccountName: gpu-operator
+ rules:
+ - apiGroups:
+ - rbac.authorization.k8s.io
+ resources:
+ - roles
+ - rolebindings
+ verbs:
+ - create
+ - get
+ - list
+ - watch
+ - update
+ - patch
+ - delete
+ - apiGroups:
+ - apps
+ resources:
+ - daemonsets
+ verbs:
+ - create
+ - get
+ - list
+ - watch
+ - update
+ - patch
+ - delete
+ - apiGroups:
+ - ""
+ resources:
+ - configmaps
+ - endpoints
+ - secrets
+ - serviceaccounts
+ verbs:
+ - create
+ - get
+ - list
+ - watch
+ - update
+ - patch
+ - delete
+ deployments:
+ - name: gpu-operator
+ spec:
+ replicas: 1
+ selector:
+ matchLabels:
+ app.kubernetes.io/component: gpu-operator
+ app: gpu-operator
+ strategy: {}
+ template:
+ metadata:
+ labels:
+ app.kubernetes.io/component: gpu-operator
+ app: gpu-operator
+ nvidia.com/gpu-driver-upgrade-drain.skip: "true"
+ spec:
+ priorityClassName: system-node-critical
+ containers:
+ - args:
+ - --leader-elect
+ - --leader-lease-renew-deadline
+ - "60s"
+ image: nvcr.io/nvidia/gpu-operator@sha256:980454fdcc3084ffab1c8b8681819c3ca770738b96633a4497b3fc27e669fef9
+ command:
+ - gpu-operator
+ livenessProbe:
+ httpGet:
+ path: /healthz
+ port: 8081
+ initialDelaySeconds: 15
+ periodSeconds: 20
+ name: gpu-operator
+ ports:
+ - name: metrics
+ containerPort: 8080
+ readinessProbe:
+ httpGet:
+ path: /readyz
+ port: 8081
+ initialDelaySeconds: 5
+ periodSeconds: 10
+ resources:
+ limits:
+ cpu: 500m
+ memory: 1Gi
+ requests:
+ cpu: 200m
+ memory: 200Mi
+ securityContext:
+ allowPrivilegeEscalation: false
+ volumeMounts:
+ - mountPath: /host-etc/os-release
+ name: host-os-release
+ readOnly: true
+ env:
+ - name: OPERATOR_NAMESPACE
+ valueFrom:
+ fieldRef:
+ fieldPath: metadata.namespace
+ - name: "VALIDATOR_IMAGE"
+ value: "nvcr.io/nvidia/cloud-native/gpu-operator-validator@sha256:a4c9c6244f2a70b6c868ad4eb6b8eaf0a1fe9f91c9baefd8f58b0ad085dd715b"
+ - name: "GFD_IMAGE"
+ value: "nvcr.io/nvidia/k8s-device-plugin@sha256:f85fd6e328e36d4737bf394bf8bd69bec793656af686ca0491f28730d9b96d1a"
+ - name: "CONTAINER_TOOLKIT_IMAGE"
+ value: "nvcr.io/nvidia/k8s/container-toolkit@sha256:f95ef6a0c377e011bc0561c7d2c2bf32e45106fb0ba91ae9a10f97236ded0581"
+ - name: "DCGM_IMAGE"
+ value: "nvcr.io/nvidia/cloud-native/dcgm@sha256:15dab1273345df4a5844c4c761d064dbc4b592101251dc39174e597137123027"
+ - name: "DCGM_EXPORTER_IMAGE"
+ value: "nvcr.io/nvidia/k8s/dcgm-exporter@sha256:21f4c8b88716e8e6f732f9fb4c2efaef937c227491a8631c5e55036f80f39a4d"
+ - name: "DEVICE_PLUGIN_IMAGE"
+ value: "nvcr.io/nvidia/k8s-device-plugin@sha256:f85fd6e328e36d4737bf394bf8bd69bec793656af686ca0491f28730d9b96d1a"
+ - name: "DRIVER_IMAGE"
+ value: "nvcr.io/nvidia/driver@sha256:858de27c152669f5a3cf4287406405b16dd5bb70c0373324eb735511997bb415"
+ - name: "DRIVER_IMAGE-535"
+ value: "nvcr.io/nvidia/driver@sha256:a6d12fb5753f267dda25dfd38910f972bc632c006a24107fa50e20bba3642d7c"
+ - name: "DRIVER_IMAGE-470"
+ value: "nvcr.io/nvidia/driver@sha256:07e11f85d54d49ec9648fb06e148b8d832ee1f9c3549a915eee853c9ef2949c2"
+ - name: "DRIVER_MANAGER_IMAGE"
+ value: "nvcr.io/nvidia/cloud-native/k8s-driver-manager@sha256:740abc3ff657545c10effd5354f09af525200ed9a1b7623f0c2e8c7bd9e4a4e2"
+ - name: "MIG_MANAGER_IMAGE"
+ value: "nvcr.io/nvidia/cloud-native/k8s-mig-manager@sha256:781fb47e264d9e0fbc8da5bd046e5e678316c866bc36ddd4b56d4eb0de682d5b"
+ - name: "CUDA_BASE_IMAGE"
+ value: "nvcr.io/nvidia/cuda@sha256:b0b6c9286f20432ba9becb711aff2d1c1bd56e47b33e6d1cab04aba926c067fe"
+ - name: "VFIO_MANAGER_IMAGE"
+ value: "nvcr.io/nvidia/cuda@sha256:b0b6c9286f20432ba9becb711aff2d1c1bd56e47b33e6d1cab04aba926c067fe"
+ - name: "SANDBOX_DEVICE_PLUGIN_IMAGE"
+ value: "nvcr.io/nvidia/kubevirt-gpu-device-plugin@sha256:969147c01d63be5d1fe458f32f1cc0c7408cf3062531db91408e2fc57b4d8a67"
+ - name: "VGPU_DEVICE_MANAGER_IMAGE"
+ value: "nvcr.io/nvidia/cloud-native/vgpu-device-manager@sha256:ae63fac9a4057a7646f0cf0ee0566e8928529adde05c4c0a017cda0599e381b2"
+ - name: "GDRCOPY_IMAGE"
+ value: "nvcr.io/nvidia/cloud-native/gdrdrv@sha256:33de74eb590f071403c17b6c210c02963245851971168bc0c07c06c100a9f376"
+ terminationGracePeriodSeconds: 10
+ volumes:
+ - hostPath:
+ path: /etc/os-release
+ name: host-os-release
+ serviceAccountName: gpu-operator
+ strategy: deployment
+ installModes:
+ - supported: true
+ type: OwnNamespace
+ - supported: true
+ type: SingleNamespace
+ - supported: false
+ type: MultiNamespace
+ - supported: false
+ type: AllNamespaces
+ keywords:
+ - gpu
+ - cuda
+ - compute
+ - operator
+ - deep learning
+ - monitoring
+ - tesla
+ maintainers:
+ - name: NVIDIA
+ email: operator_feedback@nvidia.com
+ maturity: stable
+ provider:
+ name: NVIDIA Corporation
+ version: 24.6.0
+ replaces: gpu-operator-certified.v24.3.0
diff --git a/bundle/v24.6.0/manifests/nvidia.com_clusterpolicies.yaml b/bundle/v24.6.0/manifests/nvidia.com_clusterpolicies.yaml
new file mode 100644
index 000000000..8e29cabf1
--- /dev/null
+++ b/bundle/v24.6.0/manifests/nvidia.com_clusterpolicies.yaml
@@ -0,0 +1,2404 @@
+---
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+ annotations:
+ controller-gen.kubebuilder.io/version: v0.15.0
+ name: clusterpolicies.nvidia.com
+spec:
+ group: nvidia.com
+ names:
+ kind: ClusterPolicy
+ listKind: ClusterPolicyList
+ plural: clusterpolicies
+ singular: clusterpolicy
+ scope: Cluster
+ versions:
+ - additionalPrinterColumns:
+ - jsonPath: .status.state
+ name: Status
+ type: string
+ - jsonPath: .metadata.creationTimestamp
+ name: Age
+ type: string
+ name: v1
+ schema:
+ openAPIV3Schema:
+ description: ClusterPolicy is the Schema for the clusterpolicies API
+ properties:
+ apiVersion:
+ description: |-
+ APIVersion defines the versioned schema of this representation of an object.
+ Servers should convert recognized schemas to the latest internal value, and
+ may reject unrecognized values.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+ type: string
+ kind:
+ description: |-
+ Kind is a string value representing the REST resource this object represents.
+ Servers may infer this from the endpoint the client submits requests to.
+ Cannot be updated.
+ In CamelCase.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+ type: string
+ metadata:
+ type: object
+ spec:
+ description: ClusterPolicySpec defines the desired state of ClusterPolicy
+ properties:
+ ccManager:
+ description: CCManager component spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ defaultMode:
+ description: Default CC mode setting for compatible GPUs on the
+ node
+ enum:
+ - "on"
+ - "off"
+ - devtools
+ type: string
+ enabled:
+ description: Enabled indicates if deployment of CC Manager is
+ enabled
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: CC Manager image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: CC Manager image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: CC Manager image tag
+ type: string
+ type: object
+ cdi:
+ description: CDI configures how the Container Device Interface is
+ used in the cluster
+ properties:
+ default:
+ default: false
+ description: Default indicates whether to use CDI as the default
+ mechanism for providing GPU access to containers.
+ type: boolean
+ enabled:
+ default: false
+ description: Enabled indicates whether CDI can be used to make
+ GPUs accessible to containers.
+ type: boolean
+ type: object
+ daemonsets:
+ description: Daemonset defines common configuration for all Daemonsets
+ properties:
+ annotations:
+ additionalProperties:
+ type: string
+ description: |-
+ Optional: Annotations is an unstructured key value map stored with a resource that may be
+ set by external tools to store and retrieve arbitrary metadata. They are not
+ queryable and should be preserved when modifying objects.
+ type: object
+ labels:
+ additionalProperties:
+ type: string
+ description: |-
+ Optional: Map of string keys and values that can be used to organize and categorize
+ (scope and select) objects. May match selectors of replication controllers
+ and services.
+ type: object
+ priorityClassName:
+ type: string
+ rollingUpdate:
+ description: 'Optional: Configuration for rolling update of all
+ DaemonSet pods'
+ properties:
+ maxUnavailable:
+ type: string
+ type: object
+ tolerations:
+ description: 'Optional: Set tolerations'
+ items:
+ description: |-
+ The pod this Toleration is attached to tolerates any taint that matches
+ the triple using the matching operator .
+ properties:
+ effect:
+ description: |-
+ Effect indicates the taint effect to match. Empty means match all taint effects.
+ When specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute.
+ type: string
+ key:
+ description: |-
+ Key is the taint key that the toleration applies to. Empty means match all taint keys.
+ If the key is empty, operator must be Exists; this combination means to match all values and all keys.
+ type: string
+ operator:
+ description: |-
+ Operator represents a key's relationship to the value.
+ Valid operators are Exists and Equal. Defaults to Equal.
+ Exists is equivalent to wildcard for value, so that a pod can
+ tolerate all taints of a particular category.
+ type: string
+ tolerationSeconds:
+ description: |-
+ TolerationSeconds represents the period of time the toleration (which must be
+ of effect NoExecute, otherwise this field is ignored) tolerates the taint. By default,
+ it is not set, which means tolerate the taint forever (do not evict). Zero and
+ negative values will be treated as 0 (evict immediately) by the system.
+ format: int64
+ type: integer
+ value:
+ description: |-
+ Value is the taint value the toleration matches to.
+ If the operator is Exists, the value should be empty, otherwise just a regular string.
+ type: string
+ type: object
+ type: array
+ updateStrategy:
+ default: RollingUpdate
+ enum:
+ - RollingUpdate
+ - OnDelete
+ type: string
+ type: object
+ dcgm:
+ description: DCGM component spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ enabled:
+ description: Enabled indicates if deployment of NVIDIA DCGM Hostengine
+ as a separate pod is enabled.
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ hostPort:
+ description: 'Deprecated: HostPort represents host port that needs
+ to be bound for DCGM engine (Default: 5555)'
+ format: int32
+ type: integer
+ image:
+ description: NVIDIA DCGM image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: NVIDIA DCGM image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: NVIDIA DCGM image tag
+ type: string
+ type: object
+ dcgmExporter:
+ description: DCGMExporter spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ config:
+ description: 'Optional: Custom metrics configuration for NVIDIA
+ DCGM Exporter'
+ properties:
+ name:
+ description: ConfigMap name with file dcgm-metrics.csv for
+ metrics to be collected by NVIDIA DCGM Exporter
+ type: string
+ type: object
+ enabled:
+ description: Enabled indicates if deployment of NVIDIA DCGM Exporter
+ through operator is enabled
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: NVIDIA DCGM Exporter image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: NVIDIA DCGM Exporter image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ serviceMonitor:
+ description: 'Optional: ServiceMonitor configuration for NVIDIA
+ DCGM Exporter'
+ properties:
+ additionalLabels:
+ additionalProperties:
+ type: string
+ description: AdditionalLabels to add to ServiceMonitor instance
+ for NVIDIA DCGM Exporter
+ type: object
+ enabled:
+ description: Enabled indicates if ServiceMonitor is deployed
+ for NVIDIA DCGM Exporter
+ type: boolean
+ honorLabels:
+ description: HonorLabels chooses the metric’s labels on collisions
+ with target labels.
+ type: boolean
+ interval:
+ description: |-
+ Interval which metrics should be scraped from NVIDIA DCGM Exporter. If not specified Prometheus’ global scrape interval is used.
+ Supported units: y, w, d, h, m, s, ms
+ pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$
+ type: string
+ relabelings:
+ description: Relabelings allows to rewrite labels on metric
+ sets for NVIDIA DCGM Exporter
+ items:
+ description: |-
+ RelabelConfig allows dynamic rewriting of the label set for targets, alerts,
+ scraped samples and remote write samples.
+
+
+ More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config
+ properties:
+ action:
+ default: replace
+ description: |-
+ Action to perform based on the regex matching.
+
+
+ `Uppercase` and `Lowercase` actions require Prometheus >= v2.36.0.
+ `DropEqual` and `KeepEqual` actions require Prometheus >= v2.41.0.
+
+
+ Default: "Replace"
+ enum:
+ - replace
+ - Replace
+ - keep
+ - Keep
+ - drop
+ - Drop
+ - hashmod
+ - HashMod
+ - labelmap
+ - LabelMap
+ - labeldrop
+ - LabelDrop
+ - labelkeep
+ - LabelKeep
+ - lowercase
+ - Lowercase
+ - uppercase
+ - Uppercase
+ - keepequal
+ - KeepEqual
+ - dropequal
+ - DropEqual
+ type: string
+ modulus:
+ description: |-
+ Modulus to take of the hash of the source label values.
+
+
+ Only applicable when the action is `HashMod`.
+ format: int64
+ type: integer
+ regex:
+ description: Regular expression against which the extracted
+ value is matched.
+ type: string
+ replacement:
+ description: |-
+ Replacement value against which a Replace action is performed if the
+ regular expression matches.
+
+
+ Regex capture groups are available.
+ type: string
+ separator:
+ description: Separator is the string between concatenated
+ SourceLabels.
+ type: string
+ sourceLabels:
+ description: |-
+ The source labels select values from existing labels. Their content is
+ concatenated using the configured Separator and matched against the
+ configured regular expression.
+ items:
+ description: |-
+ LabelName is a valid Prometheus label name which may only contain ASCII
+ letters, numbers, as well as underscores.
+ pattern: ^[a-zA-Z_][a-zA-Z0-9_]*$
+ type: string
+ type: array
+ targetLabel:
+ description: |-
+ Label to which the resulting string is written in a replacement.
+
+
+ It is mandatory for `Replace`, `HashMod`, `Lowercase`, `Uppercase`,
+ `KeepEqual` and `DropEqual` actions.
+
+
+ Regex capture groups are available.
+ type: string
+ type: object
+ type: array
+ type: object
+ version:
+ description: NVIDIA DCGM Exporter image tag
+ type: string
+ type: object
+ devicePlugin:
+ description: DevicePlugin component spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ config:
+ description: 'Optional: Configuration for the NVIDIA Device Plugin
+ via the ConfigMap'
+ properties:
+ default:
+ description: Default config name within the ConfigMap for
+ the NVIDIA Device Plugin config
+ type: string
+ name:
+ description: ConfigMap name for NVIDIA Device Plugin config
+ including shared config between plugin and GFD
+ type: string
+ type: object
+ enabled:
+ description: Enabled indicates if deployment of NVIDIA Device
+ Plugin through operator is enabled
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: NVIDIA Device Plugin image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ mps:
+ description: 'Optional: MPS related configuration for the NVIDIA
+ Device Plugin'
+ properties:
+ root:
+ default: /run/nvidia/mps
+ description: Root defines the MPS root path on the host
+ type: string
+ type: object
+ repository:
+ description: NVIDIA Device Plugin image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: NVIDIA Device Plugin image tag
+ type: string
+ type: object
+ driver:
+ description: Driver component spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ certConfig:
+ description: 'Optional: Custom certificates configuration for
+ NVIDIA Driver container'
+ properties:
+ name:
+ type: string
+ type: object
+ enabled:
+ description: Enabled indicates if deployment of NVIDIA Driver
+ through operator is enabled
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: NVIDIA Driver image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ kernelModuleConfig:
+ description: 'Optional: Kernel module configuration parameters
+ for the NVIDIA Driver'
+ properties:
+ name:
+ type: string
+ type: object
+ licensingConfig:
+ description: 'Optional: Licensing configuration for NVIDIA vGPU
+ licensing'
+ properties:
+ configMapName:
+ type: string
+ nlsEnabled:
+ description: NLSEnabled indicates if NVIDIA Licensing System
+ is used for licensing.
+ type: boolean
+ type: object
+ livenessProbe:
+ description: NVIDIA Driver container liveness probe settings
+ properties:
+ failureThreshold:
+ description: |-
+ Minimum consecutive failures for the probe to be considered failed after having succeeded.
+ Defaults to 3. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ initialDelaySeconds:
+ description: |-
+ Number of seconds after the container has started before liveness probes are initiated.
+ More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+ format: int32
+ type: integer
+ periodSeconds:
+ description: |-
+ How often (in seconds) to perform the probe.
+ Default to 10 seconds. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ successThreshold:
+ description: |-
+ Minimum consecutive successes for the probe to be considered successful after having failed.
+ Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ timeoutSeconds:
+ description: |-
+ Number of seconds after which the probe times out.
+ Defaults to 1 second. Minimum value is 1.
+ More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+ format: int32
+ minimum: 1
+ type: integer
+ type: object
+ manager:
+ description: Manager represents configuration for NVIDIA Driver
+ Manager initContainer
+ properties:
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: Image represents NVIDIA Driver Manager image
+ name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: Repository represents Driver Managerrepository
+ path
+ type: string
+ version:
+ description: Version represents NVIDIA Driver Manager image
+ tag(version)
+ type: string
+ type: object
+ rdma:
+ description: GPUDirectRDMASpec defines the properties for nvidia-peermem
+ deployment
+ properties:
+ enabled:
+ description: Enabled indicates if GPUDirect RDMA is enabled
+ through GPU operator
+ type: boolean
+ useHostMofed:
+ description: UseHostMOFED indicates to use MOFED drivers directly
+ installed on the host to enable GPUDirect RDMA
+ type: boolean
+ type: object
+ readinessProbe:
+ description: NVIDIA Driver container readiness probe settings
+ properties:
+ failureThreshold:
+ description: |-
+ Minimum consecutive failures for the probe to be considered failed after having succeeded.
+ Defaults to 3. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ initialDelaySeconds:
+ description: |-
+ Number of seconds after the container has started before liveness probes are initiated.
+ More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+ format: int32
+ type: integer
+ periodSeconds:
+ description: |-
+ How often (in seconds) to perform the probe.
+ Default to 10 seconds. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ successThreshold:
+ description: |-
+ Minimum consecutive successes for the probe to be considered successful after having failed.
+ Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ timeoutSeconds:
+ description: |-
+ Number of seconds after which the probe times out.
+ Defaults to 1 second. Minimum value is 1.
+ More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+ format: int32
+ minimum: 1
+ type: integer
+ type: object
+ repoConfig:
+ description: 'Optional: Custom repo configuration for NVIDIA Driver
+ container'
+ properties:
+ configMapName:
+ type: string
+ type: object
+ repository:
+ description: NVIDIA Driver image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ startupProbe:
+ description: NVIDIA Driver container startup probe settings
+ properties:
+ failureThreshold:
+ description: |-
+ Minimum consecutive failures for the probe to be considered failed after having succeeded.
+ Defaults to 3. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ initialDelaySeconds:
+ description: |-
+ Number of seconds after the container has started before liveness probes are initiated.
+ More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+ format: int32
+ type: integer
+ periodSeconds:
+ description: |-
+ How often (in seconds) to perform the probe.
+ Default to 10 seconds. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ successThreshold:
+ description: |-
+ Minimum consecutive successes for the probe to be considered successful after having failed.
+ Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ timeoutSeconds:
+ description: |-
+ Number of seconds after which the probe times out.
+ Defaults to 1 second. Minimum value is 1.
+ More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+ format: int32
+ minimum: 1
+ type: integer
+ type: object
+ upgradePolicy:
+ description: Driver auto-upgrade settings
+ properties:
+ autoUpgrade:
+ default: false
+ description: |-
+ AutoUpgrade is a global switch for automatic upgrade feature
+ if set to false all other options are ignored
+ type: boolean
+ drain:
+ description: DrainSpec describes configuration for node drain
+ during automatic upgrade
+ properties:
+ deleteEmptyDir:
+ default: false
+ description: |-
+ DeleteEmptyDir indicates if should continue even if there are pods using emptyDir
+ (local data that will be deleted when the node is drained)
+ type: boolean
+ enable:
+ default: false
+ description: Enable indicates if node draining is allowed
+ during upgrade
+ type: boolean
+ force:
+ default: false
+ description: Force indicates if force draining is allowed
+ type: boolean
+ podSelector:
+ description: |-
+ PodSelector specifies a label selector to filter pods on the node that need to be drained
+ For more details on label selectors, see:
+ https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#label-selectors
+ type: string
+ timeoutSeconds:
+ default: 300
+ description: TimeoutSecond specifies the length of time
+ in seconds to wait before giving up drain, zero means
+ infinite
+ minimum: 0
+ type: integer
+ type: object
+ maxParallelUpgrades:
+ default: 1
+ description: |-
+ MaxParallelUpgrades indicates how many nodes can be upgraded in parallel
+ 0 means no limit, all nodes will be upgraded in parallel
+ minimum: 0
+ type: integer
+ maxUnavailable:
+ anyOf:
+ - type: integer
+ - type: string
+ default: 25%
+ description: |-
+ MaxUnavailable is the maximum number of nodes with the driver installed, that can be unavailable during the upgrade.
+ Value can be an absolute number (ex: 5) or a percentage of total nodes at the start of upgrade (ex: 10%).
+ Absolute number is calculated from percentage by rounding up.
+ By default, a fixed value of 25% is used.
+ x-kubernetes-int-or-string: true
+ podDeletion:
+ description: PodDeletionSpec describes configuration for deletion
+ of pods using special resources during automatic upgrade
+ properties:
+ deleteEmptyDir:
+ default: false
+ description: |-
+ DeleteEmptyDir indicates if should continue even if there are pods using emptyDir
+ (local data that will be deleted when the pod is deleted)
+ type: boolean
+ force:
+ default: false
+ description: Force indicates if force deletion is allowed
+ type: boolean
+ timeoutSeconds:
+ default: 300
+ description: |-
+ TimeoutSecond specifies the length of time in seconds to wait before giving up on pod termination, zero means
+ infinite
+ minimum: 0
+ type: integer
+ type: object
+ waitForCompletion:
+ description: WaitForCompletionSpec describes the configuration
+ for waiting on job completions
+ properties:
+ podSelector:
+ description: |-
+ PodSelector specifies a label selector for the pods to wait for completion
+ For more details on label selectors, see:
+ https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#label-selectors
+ type: string
+ timeoutSeconds:
+ default: 0
+ description: |-
+ TimeoutSecond specifies the length of time in seconds to wait before giving up on pod termination, zero means
+ infinite
+ minimum: 0
+ type: integer
+ type: object
+ type: object
+ useNvidiaDriverCRD:
+ description: UseNvidiaDriverCRD indicates if the deployment of
+ NVIDIA Driver is managed by the NVIDIADriver CRD type
+ type: boolean
+ useOpenKernelModules:
+ description: UseOpenKernelModules indicates if the open GPU kernel
+ modules should be used
+ type: boolean
+ usePrecompiled:
+ description: UsePrecompiled indicates if deployment of NVIDIA
+ Driver using pre-compiled modules is enabled
+ type: boolean
+ version:
+ description: NVIDIA Driver image tag
+ type: string
+ virtualTopology:
+ description: 'Optional: Virtual Topology Daemon configuration
+ for NVIDIA vGPU drivers'
+ properties:
+ config:
+ description: 'Optional: Config name representing virtual topology
+ daemon configuration file nvidia-topologyd.conf'
+ type: string
+ type: object
+ type: object
+ gdrcopy:
+ description: GDRCopy component spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ enabled:
+ description: Enabled indicates if GDRCopy is enabled through GPU
+ Operator
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: NVIDIA GDRCopy driver image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: NVIDIA GDRCopy driver image repository
+ type: string
+ version:
+ description: NVIDIA GDRCopy driver image tag
+ type: string
+ type: object
+ gds:
+ description: GPUDirectStorage defines the spec for GDS components(Experimental)
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ enabled:
+ description: Enabled indicates if GPUDirect Storage is enabled
+ through GPU operator
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: NVIDIA GPUDirect Storage Driver image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: NVIDIA GPUDirect Storage Driver image repository
+ type: string
+ version:
+ description: NVIDIA GPUDirect Storage Driver image tag
+ type: string
+ type: object
+ gfd:
+ description: GPUFeatureDiscovery spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ enabled:
+ description: Enabled indicates if deployment of GPU Feature Discovery
+ Plugin is enabled.
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: GFD image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: GFD image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: GFD image tag
+ type: string
+ type: object
+ hostPaths:
+ description: HostPaths defines various paths on the host needed by
+ GPU Operator components
+ properties:
+ driverInstallDir:
+ description: |-
+ DriverInstallDir represents the root at which driver files including libraries,
+ config files, and executables can be found.
+ type: string
+ rootFS:
+ description: |-
+ RootFS represents the path to the root filesystem of the host.
+ This is used by components that need to interact with the host filesystem
+ and as such this must be a chroot-able filesystem.
+ Examples include the MIG Manager and Toolkit Container which may need to
+ stop, start, or restart systemd services.
+ type: string
+ type: object
+ kataManager:
+ description: KataManager component spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ config:
+ description: Kata Manager config
+ properties:
+ artifactsDir:
+ default: /opt/nvidia-gpu-operator/artifacts/runtimeclasses
+ description: |-
+ ArtifactsDir is the directory where kata artifacts (e.g. kernel / guest images, configuration, etc.)
+ are placed on the local filesystem.
+ type: string
+ runtimeClasses:
+ description: RuntimeClasses is a list of kata runtime classes
+ to configure.
+ items:
+ description: RuntimeClass defines the configuration for
+ a kata RuntimeClass
+ properties:
+ artifacts:
+ description: Artifacts are the kata artifacts associated
+ with the runtime class.
+ properties:
+ pullSecret:
+ description: PullSecret is the secret used to pull
+ the OCI artifact.
+ type: string
+ url:
+ description: |-
+ URL is the path to the OCI artifact (payload) containing all artifacts
+ associated with a kata runtime class.
+ type: string
+ required:
+ - url
+ type: object
+ name:
+ description: Name is the name of the kata runtime class.
+ type: string
+ nodeSelector:
+ additionalProperties:
+ type: string
+ description: |-
+ NodeSelector specifies the nodeSelector for the RuntimeClass object.
+ This ensures pods running with the RuntimeClass only get scheduled
+ onto nodes which support it.
+ type: object
+ required:
+ - artifacts
+ - name
+ type: object
+ type: array
+ type: object
+ enabled:
+ description: Enabled indicates if deployment of Kata Manager is
+ enabled
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: Kata Manager image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: Kata Manager image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: Kata Manager image tag
+ type: string
+ type: object
+ mig:
+ description: MIG spec
+ properties:
+ strategy:
+ description: 'Optional: MIGStrategy to apply for GFD and NVIDIA
+ Device Plugin'
+ enum:
+ - none
+ - single
+ - mixed
+ type: string
+ type: object
+ migManager:
+ description: MIGManager for configuration to deploy MIG Manager
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ config:
+ description: 'Optional: Custom mig-parted configuration for NVIDIA
+ MIG Manager container'
+ properties:
+ default:
+ default: all-disabled
+ description: Default MIG config to be applied on the node,
+ when there is no config specified with the node label nvidia.com/mig.config
+ enum:
+ - all-disabled
+ - ""
+ type: string
+ name:
+ default: default-mig-parted-config
+ description: ConfigMap name
+ type: string
+ type: object
+ enabled:
+ description: Enabled indicates if deployment of NVIDIA MIG Manager
+ is enabled
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ gpuClientsConfig:
+ description: 'Optional: Custom gpu-clients configuration for NVIDIA
+ MIG Manager container'
+ properties:
+ name:
+ description: ConfigMap name
+ type: string
+ type: object
+ image:
+ description: NVIDIA MIG Manager image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: NVIDIA MIG Manager image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: NVIDIA MIG Manager image tag
+ type: string
+ type: object
+ nodeStatusExporter:
+ description: NodeStatusExporter spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ enabled:
+ description: Enabled indicates if deployment of Node Status Exporter
+ is enabled.
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: Node Status Exporter image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: Node Status Exporterimage repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: Node Status Exporterimage tag
+ type: string
+ type: object
+ operator:
+ description: Operator component spec
+ properties:
+ annotations:
+ additionalProperties:
+ type: string
+ description: |-
+ Optional: Annotations is an unstructured key value map stored with a resource that may be
+ set by external tools to store and retrieve arbitrary metadata. They are not
+ queryable and should be preserved when modifying objects.
+ type: object
+ defaultRuntime:
+ default: docker
+ description: Runtime defines container runtime type
+ enum:
+ - docker
+ - crio
+ - containerd
+ type: string
+ initContainer:
+ description: InitContainerSpec describes configuration for initContainer
+ image used with all components
+ properties:
+ image:
+ description: Image represents image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: Repository represents image repository path
+ type: string
+ version:
+ description: Version represents image tag(version)
+ type: string
+ type: object
+ labels:
+ additionalProperties:
+ type: string
+ description: |-
+ Optional: Map of string keys and values that can be used to organize and categorize
+ (scope and select) objects. May match selectors of replication controllers
+ and services.
+ type: object
+ runtimeClass:
+ default: nvidia
+ type: string
+ use_ocp_driver_toolkit:
+ description: UseOpenShiftDriverToolkit indicates if DriverToolkit
+ image should be used on OpenShift to build and install driver
+ modules
+ type: boolean
+ required:
+ - defaultRuntime
+ type: object
+ psa:
+ description: PSA defines spec for PodSecurityAdmission configuration
+ properties:
+ enabled:
+ description: Enabled indicates if PodSecurityAdmission configuration
+ needs to be enabled for all Pods
+ type: boolean
+ type: object
+ psp:
+ description: |-
+ Deprecated: Pod Security Policies are no longer supported. Please use PodSecurityAdmission instead
+ PSP defines spec for handling PodSecurityPolicies
+ properties:
+ enabled:
+ description: Enabled indicates if PodSecurityPolicies needs to
+ be enabled for all Pods
+ type: boolean
+ type: object
+ sandboxDevicePlugin:
+ description: SandboxDevicePlugin component spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ enabled:
+ description: Enabled indicates if deployment of NVIDIA Sandbox
+ Device Plugin through operator is enabled
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: NVIDIA Sandbox Device Plugin image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: NVIDIA Sandbox Device Plugin image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: NVIDIA Sandbox Device Plugin image tag
+ type: string
+ type: object
+ sandboxWorkloads:
+ description: SandboxWorkloads defines the spec for handling sandbox
+ workloads (i.e. Virtual Machines)
+ properties:
+ defaultWorkload:
+ default: container
+ description: |-
+ DefaultWorkload indicates the default GPU workload type to configure
+ worker nodes in the cluster for
+ enum:
+ - container
+ - vm-passthrough
+ - vm-vgpu
+ type: string
+ enabled:
+ description: |-
+ Enabled indicates if the GPU Operator should manage additional operands required
+ for sandbox workloads (i.e. VFIO Manager, vGPU Manager, and additional device plugins)
+ type: boolean
+ type: object
+ toolkit:
+ description: Toolkit component spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ enabled:
+ description: Enabled indicates if deployment of NVIDIA Container
+ Toolkit through operator is enabled
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: NVIDIA Container Toolkit image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ installDir:
+ default: /usr/local/nvidia
+ description: Toolkit install directory on the host
+ type: string
+ repository:
+ description: NVIDIA Container Toolkit image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: NVIDIA Container Toolkit image tag
+ type: string
+ type: object
+ validator:
+ description: Validator defines the spec for operator-validator daemonset
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ cuda:
+ description: CUDA validator spec
+ properties:
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ type: object
+ driver:
+ description: Toolkit validator spec
+ properties:
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ type: object
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: Validator image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ plugin:
+ description: Plugin validator spec
+ properties:
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ type: object
+ repository:
+ description: Validator image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ toolkit:
+ description: Toolkit validator spec
+ properties:
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ type: object
+ version:
+ description: Validator image tag
+ type: string
+ vfioPCI:
+ description: VfioPCI validator spec
+ properties:
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ type: object
+ vgpuDevices:
+ description: VGPUDevices validator spec
+ properties:
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ type: object
+ vgpuManager:
+ description: VGPUManager validator spec
+ properties:
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ type: object
+ type: object
+ vfioManager:
+ description: VFIOManager for configuration to deploy VFIO-PCI Manager
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ driverManager:
+ description: DriverManager represents configuration for NVIDIA
+ Driver Manager
+ properties:
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: Image represents NVIDIA Driver Manager image
+ name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: Repository represents Driver Managerrepository
+ path
+ type: string
+ version:
+ description: Version represents NVIDIA Driver Manager image
+ tag(version)
+ type: string
+ type: object
+ enabled:
+ description: Enabled indicates if deployment of VFIO Manager is
+ enabled
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: VFIO Manager image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: VFIO Manager image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: VFIO Manager image tag
+ type: string
+ type: object
+ vgpuDeviceManager:
+ description: VGPUDeviceManager spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ config:
+ description: NVIDIA vGPU devices configuration for NVIDIA vGPU
+ Device Manager container
+ properties:
+ default:
+ default: default
+ description: Default config name within the ConfigMap
+ type: string
+ name:
+ description: ConfigMap name
+ type: string
+ type: object
+ enabled:
+ description: Enabled indicates if deployment of NVIDIA vGPU Device
+ Manager is enabled
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: NVIDIA vGPU Device Manager image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: NVIDIA vGPU Device Manager image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: NVIDIA vGPU Device Manager image tag
+ type: string
+ type: object
+ vgpuManager:
+ description: VGPUManager component spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ driverManager:
+ description: DriverManager represents configuration for NVIDIA
+ Driver Manager initContainer
+ properties:
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: Image represents NVIDIA Driver Manager image
+ name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: Repository represents Driver Managerrepository
+ path
+ type: string
+ version:
+ description: Version represents NVIDIA Driver Manager image
+ tag(version)
+ type: string
+ type: object
+ enabled:
+ description: Enabled indicates if deployment of NVIDIA vGPU Manager
+ through operator is enabled
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: NVIDIA vGPU Manager image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: NVIDIA vGPU Manager image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: NVIDIA vGPU Manager image tag
+ type: string
+ type: object
+ required:
+ - daemonsets
+ - dcgm
+ - dcgmExporter
+ - devicePlugin
+ - driver
+ - gfd
+ - nodeStatusExporter
+ - operator
+ - toolkit
+ type: object
+ status:
+ description: ClusterPolicyStatus defines the observed state of ClusterPolicy
+ properties:
+ conditions:
+ description: Conditions is a list of conditions representing the ClusterPolicy's
+ current state.
+ items:
+ description: "Condition contains details for one aspect of the current
+ state of this API Resource.\n---\nThis struct is intended for
+ direct use as an array at the field path .status.conditions. For
+ example,\n\n\n\ttype FooStatus struct{\n\t // Represents the
+ observations of a foo's current state.\n\t // Known .status.conditions.type
+ are: \"Available\", \"Progressing\", and \"Degraded\"\n\t //
+ +patchMergeKey=type\n\t // +patchStrategy=merge\n\t // +listType=map\n\t
+ \ // +listMapKey=type\n\t Conditions []metav1.Condition `json:\"conditions,omitempty\"
+ patchStrategy:\"merge\" patchMergeKey:\"type\" protobuf:\"bytes,1,rep,name=conditions\"`\n\n\n\t
+ \ // other fields\n\t}"
+ properties:
+ lastTransitionTime:
+ description: |-
+ lastTransitionTime is the last time the condition transitioned from one status to another.
+ This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable.
+ format: date-time
+ type: string
+ message:
+ description: |-
+ message is a human readable message indicating details about the transition.
+ This may be an empty string.
+ maxLength: 32768
+ type: string
+ observedGeneration:
+ description: |-
+ observedGeneration represents the .metadata.generation that the condition was set based upon.
+ For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
+ with respect to the current state of the instance.
+ format: int64
+ minimum: 0
+ type: integer
+ reason:
+ description: |-
+ reason contains a programmatic identifier indicating the reason for the condition's last transition.
+ Producers of specific condition types may define expected values and meanings for this field,
+ and whether the values are considered a guaranteed API.
+ The value should be a CamelCase string.
+ This field may not be empty.
+ maxLength: 1024
+ minLength: 1
+ pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
+ type: string
+ status:
+ description: status of the condition, one of True, False, Unknown.
+ enum:
+ - "True"
+ - "False"
+ - Unknown
+ type: string
+ type:
+ description: |-
+ type of condition in CamelCase or in foo.example.com/CamelCase.
+ ---
+ Many .condition.type values are consistent across resources like Available, but because arbitrary conditions can be
+ useful (see .node.status.conditions), the ability to deconflict is important.
+ The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt)
+ maxLength: 316
+ pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
+ type: string
+ required:
+ - lastTransitionTime
+ - message
+ - reason
+ - status
+ - type
+ type: object
+ type: array
+ namespace:
+ description: Namespace indicates a namespace in which the operator
+ is installed
+ type: string
+ state:
+ description: State indicates status of ClusterPolicy
+ enum:
+ - ignored
+ - ready
+ - notReady
+ type: string
+ required:
+ - state
+ type: object
+ type: object
+ served: true
+ storage: true
+ subresources:
+ status: {}
diff --git a/bundle/v24.6.0/manifests/nvidia.com_nvidiadrivers.yaml b/bundle/v24.6.0/manifests/nvidia.com_nvidiadrivers.yaml
new file mode 100644
index 000000000..665088edd
--- /dev/null
+++ b/bundle/v24.6.0/manifests/nvidia.com_nvidiadrivers.yaml
@@ -0,0 +1,810 @@
+---
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+ annotations:
+ controller-gen.kubebuilder.io/version: v0.15.0
+ name: nvidiadrivers.nvidia.com
+spec:
+ group: nvidia.com
+ names:
+ kind: NVIDIADriver
+ listKind: NVIDIADriverList
+ plural: nvidiadrivers
+ shortNames:
+ - nvd
+ - nvdriver
+ - nvdrivers
+ singular: nvidiadriver
+ scope: Cluster
+ versions:
+ - additionalPrinterColumns:
+ - jsonPath: .status.state
+ name: Status
+ type: string
+ - jsonPath: .metadata.creationTimestamp
+ name: Age
+ type: string
+ name: v1alpha1
+ schema:
+ openAPIV3Schema:
+ description: NVIDIADriver is the Schema for the nvidiadrivers API
+ properties:
+ apiVersion:
+ description: |-
+ APIVersion defines the versioned schema of this representation of an object.
+ Servers should convert recognized schemas to the latest internal value, and
+ may reject unrecognized values.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+ type: string
+ kind:
+ description: |-
+ Kind is a string value representing the REST resource this object represents.
+ Servers may infer this from the endpoint the client submits requests to.
+ Cannot be updated.
+ In CamelCase.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+ type: string
+ metadata:
+ type: object
+ spec:
+ description: NVIDIADriverSpec defines the desired state of NVIDIADriver
+ properties:
+ annotations:
+ additionalProperties:
+ type: string
+ description: |-
+ Optional: Annotations is an unstructured key value map stored with a resource that may be
+ set by external tools to store and retrieve arbitrary metadata. They are not
+ queryable and should be preserved when modifying objects.
+ type: object
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ certConfig:
+ description: 'Optional: Custom certificates configuration for NVIDIA
+ Driver container'
+ properties:
+ name:
+ type: string
+ type: object
+ driverType:
+ default: gpu
+ description: DriverType defines NVIDIA driver type
+ enum:
+ - gpu
+ - vgpu
+ - vgpu-host-manager
+ type: string
+ x-kubernetes-validations:
+ - message: driverType is an immutable field. Please create a new NvidiaDriver
+ resource instead when you want to change this setting.
+ rule: self == oldSelf
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present in
+ a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ gdrcopy:
+ description: GDRCopy defines the spec for GDRCopy driver
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ enabled:
+ description: Enabled indicates if GDRCopy is enabled through GPU
+ operator
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: GDRCopy driver image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: GDRCopy diver image repository
+ type: string
+ version:
+ description: GDRCopy driver image tag
+ type: string
+ type: object
+ gds:
+ description: GPUDirectStorage defines the spec for GDS driver
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ enabled:
+ description: Enabled indicates if GPUDirect Storage is enabled
+ through GPU operator
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: NVIDIA GPUDirect Storage Driver image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: NVIDIA GPUDirect Storage Driver image repository
+ type: string
+ version:
+ description: NVIDIA GPUDirect Storage Driver image tag
+ type: string
+ type: object
+ image:
+ default: nvcr.io/nvidia/driver
+ description: NVIDIA Driver container image name
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ kernelModuleConfig:
+ description: 'Optional: Kernel module configuration parameters for
+ the NVIDIA Driver'
+ properties:
+ name:
+ type: string
+ type: object
+ labels:
+ additionalProperties:
+ type: string
+ description: |-
+ Optional: Map of string keys and values that can be used to organize and categorize
+ (scope and select) objects. May match selectors of replication controllers
+ and services.
+ type: object
+ licensingConfig:
+ description: 'Optional: Licensing configuration for NVIDIA vGPU licensing'
+ properties:
+ name:
+ type: string
+ nlsEnabled:
+ description: NLSEnabled indicates if NVIDIA Licensing System is
+ used for licensing.
+ type: boolean
+ type: object
+ livenessProbe:
+ description: NVIDIA Driver container liveness probe settings
+ properties:
+ failureThreshold:
+ description: |-
+ Minimum consecutive failures for the probe to be considered failed after having succeeded.
+ Defaults to 3. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ initialDelaySeconds:
+ description: |-
+ Number of seconds after the container has started before liveness probes are initiated.
+ More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+ format: int32
+ type: integer
+ periodSeconds:
+ description: |-
+ How often (in seconds) to perform the probe.
+ Default to 10 seconds. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ successThreshold:
+ description: |-
+ Minimum consecutive successes for the probe to be considered successful after having failed.
+ Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ timeoutSeconds:
+ description: |-
+ Number of seconds after which the probe times out.
+ Defaults to 1 second. Minimum value is 1.
+ More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+ format: int32
+ minimum: 1
+ type: integer
+ type: object
+ manager:
+ description: Manager represents configuration for NVIDIA Driver Manager
+ initContainer
+ properties:
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: Image represents NVIDIA Driver Manager image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: Repository represents Driver Managerrepository path
+ type: string
+ version:
+ description: Version represents NVIDIA Driver Manager image tag(version)
+ type: string
+ type: object
+ nodeAffinity:
+ description: Affinity specifies node affinity rules for driver pods
+ properties:
+ preferredDuringSchedulingIgnoredDuringExecution:
+ description: |-
+ The scheduler will prefer to schedule pods to nodes that satisfy
+ the affinity expressions specified by this field, but it may choose
+ a node that violates one or more of the expressions. The node that is
+ most preferred is the one with the greatest sum of weights, i.e.
+ for each node that meets all of the scheduling requirements (resource
+ request, requiredDuringScheduling affinity expressions, etc.),
+ compute a sum by iterating through the elements of this field and adding
+ "weight" to the sum if the node matches the corresponding matchExpressions; the
+ node(s) with the highest sum are the most preferred.
+ items:
+ description: |-
+ An empty preferred scheduling term matches all objects with implicit weight 0
+ (i.e. it's a no-op). A null preferred scheduling term matches no objects (i.e. is also a no-op).
+ properties:
+ preference:
+ description: A node selector term, associated with the corresponding
+ weight.
+ properties:
+ matchExpressions:
+ description: A list of node selector requirements by
+ node's labels.
+ items:
+ description: |-
+ A node selector requirement is a selector that contains values, a key, and an operator
+ that relates the key and values.
+ properties:
+ key:
+ description: The label key that the selector applies
+ to.
+ type: string
+ operator:
+ description: |-
+ Represents a key's relationship to a set of values.
+ Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt.
+ type: string
+ values:
+ description: |-
+ An array of string values. If the operator is In or NotIn,
+ the values array must be non-empty. If the operator is Exists or DoesNotExist,
+ the values array must be empty. If the operator is Gt or Lt, the values
+ array must have a single element, which will be interpreted as an integer.
+ This array is replaced during a strategic merge patch.
+ items:
+ type: string
+ type: array
+ x-kubernetes-list-type: atomic
+ required:
+ - key
+ - operator
+ type: object
+ type: array
+ x-kubernetes-list-type: atomic
+ matchFields:
+ description: A list of node selector requirements by
+ node's fields.
+ items:
+ description: |-
+ A node selector requirement is a selector that contains values, a key, and an operator
+ that relates the key and values.
+ properties:
+ key:
+ description: The label key that the selector applies
+ to.
+ type: string
+ operator:
+ description: |-
+ Represents a key's relationship to a set of values.
+ Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt.
+ type: string
+ values:
+ description: |-
+ An array of string values. If the operator is In or NotIn,
+ the values array must be non-empty. If the operator is Exists or DoesNotExist,
+ the values array must be empty. If the operator is Gt or Lt, the values
+ array must have a single element, which will be interpreted as an integer.
+ This array is replaced during a strategic merge patch.
+ items:
+ type: string
+ type: array
+ x-kubernetes-list-type: atomic
+ required:
+ - key
+ - operator
+ type: object
+ type: array
+ x-kubernetes-list-type: atomic
+ type: object
+ x-kubernetes-map-type: atomic
+ weight:
+ description: Weight associated with matching the corresponding
+ nodeSelectorTerm, in the range 1-100.
+ format: int32
+ type: integer
+ required:
+ - preference
+ - weight
+ type: object
+ type: array
+ x-kubernetes-list-type: atomic
+ requiredDuringSchedulingIgnoredDuringExecution:
+ description: |-
+ If the affinity requirements specified by this field are not met at
+ scheduling time, the pod will not be scheduled onto the node.
+ If the affinity requirements specified by this field cease to be met
+ at some point during pod execution (e.g. due to an update), the system
+ may or may not try to eventually evict the pod from its node.
+ properties:
+ nodeSelectorTerms:
+ description: Required. A list of node selector terms. The
+ terms are ORed.
+ items:
+ description: |-
+ A null or empty node selector term matches no objects. The requirements of
+ them are ANDed.
+ The TopologySelectorTerm type implements a subset of the NodeSelectorTerm.
+ properties:
+ matchExpressions:
+ description: A list of node selector requirements by
+ node's labels.
+ items:
+ description: |-
+ A node selector requirement is a selector that contains values, a key, and an operator
+ that relates the key and values.
+ properties:
+ key:
+ description: The label key that the selector applies
+ to.
+ type: string
+ operator:
+ description: |-
+ Represents a key's relationship to a set of values.
+ Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt.
+ type: string
+ values:
+ description: |-
+ An array of string values. If the operator is In or NotIn,
+ the values array must be non-empty. If the operator is Exists or DoesNotExist,
+ the values array must be empty. If the operator is Gt or Lt, the values
+ array must have a single element, which will be interpreted as an integer.
+ This array is replaced during a strategic merge patch.
+ items:
+ type: string
+ type: array
+ x-kubernetes-list-type: atomic
+ required:
+ - key
+ - operator
+ type: object
+ type: array
+ x-kubernetes-list-type: atomic
+ matchFields:
+ description: A list of node selector requirements by
+ node's fields.
+ items:
+ description: |-
+ A node selector requirement is a selector that contains values, a key, and an operator
+ that relates the key and values.
+ properties:
+ key:
+ description: The label key that the selector applies
+ to.
+ type: string
+ operator:
+ description: |-
+ Represents a key's relationship to a set of values.
+ Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt.
+ type: string
+ values:
+ description: |-
+ An array of string values. If the operator is In or NotIn,
+ the values array must be non-empty. If the operator is Exists or DoesNotExist,
+ the values array must be empty. If the operator is Gt or Lt, the values
+ array must have a single element, which will be interpreted as an integer.
+ This array is replaced during a strategic merge patch.
+ items:
+ type: string
+ type: array
+ x-kubernetes-list-type: atomic
+ required:
+ - key
+ - operator
+ type: object
+ type: array
+ x-kubernetes-list-type: atomic
+ type: object
+ x-kubernetes-map-type: atomic
+ type: array
+ x-kubernetes-list-type: atomic
+ required:
+ - nodeSelectorTerms
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ nodeSelector:
+ additionalProperties:
+ type: string
+ description: NodeSelector specifies a selector for installation of
+ NVIDIA driver
+ type: object
+ priorityClassName:
+ description: 'Optional: Set priorityClassName'
+ type: string
+ rdma:
+ description: GPUDirectRDMA defines the spec for NVIDIA Peer Memory
+ driver
+ properties:
+ enabled:
+ description: Enabled indicates if GPUDirect RDMA is enabled through
+ GPU operator
+ type: boolean
+ useHostMofed:
+ description: UseHostMOFED indicates to use MOFED drivers directly
+ installed on the host to enable GPUDirect RDMA
+ type: boolean
+ type: object
+ readinessProbe:
+ description: NVIDIA Driver container readiness probe settings
+ properties:
+ failureThreshold:
+ description: |-
+ Minimum consecutive failures for the probe to be considered failed after having succeeded.
+ Defaults to 3. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ initialDelaySeconds:
+ description: |-
+ Number of seconds after the container has started before liveness probes are initiated.
+ More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+ format: int32
+ type: integer
+ periodSeconds:
+ description: |-
+ How often (in seconds) to perform the probe.
+ Default to 10 seconds. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ successThreshold:
+ description: |-
+ Minimum consecutive successes for the probe to be considered successful after having failed.
+ Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ timeoutSeconds:
+ description: |-
+ Number of seconds after which the probe times out.
+ Defaults to 1 second. Minimum value is 1.
+ More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+ format: int32
+ minimum: 1
+ type: integer
+ type: object
+ repoConfig:
+ description: 'Optional: Custom repo configuration for NVIDIA Driver
+ container'
+ properties:
+ name:
+ type: string
+ type: object
+ repository:
+ description: NVIDIA Driver repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for each
+ pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ startupProbe:
+ description: NVIDIA Driver container startup probe settings
+ properties:
+ failureThreshold:
+ description: |-
+ Minimum consecutive failures for the probe to be considered failed after having succeeded.
+ Defaults to 3. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ initialDelaySeconds:
+ description: |-
+ Number of seconds after the container has started before liveness probes are initiated.
+ More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+ format: int32
+ type: integer
+ periodSeconds:
+ description: |-
+ How often (in seconds) to perform the probe.
+ Default to 10 seconds. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ successThreshold:
+ description: |-
+ Minimum consecutive successes for the probe to be considered successful after having failed.
+ Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ timeoutSeconds:
+ description: |-
+ Number of seconds after which the probe times out.
+ Defaults to 1 second. Minimum value is 1.
+ More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+ format: int32
+ minimum: 1
+ type: integer
+ type: object
+ tolerations:
+ description: 'Optional: Set tolerations'
+ items:
+ description: |-
+ The pod this Toleration is attached to tolerates any taint that matches
+ the triple using the matching operator .
+ properties:
+ effect:
+ description: |-
+ Effect indicates the taint effect to match. Empty means match all taint effects.
+ When specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute.
+ type: string
+ key:
+ description: |-
+ Key is the taint key that the toleration applies to. Empty means match all taint keys.
+ If the key is empty, operator must be Exists; this combination means to match all values and all keys.
+ type: string
+ operator:
+ description: |-
+ Operator represents a key's relationship to the value.
+ Valid operators are Exists and Equal. Defaults to Equal.
+ Exists is equivalent to wildcard for value, so that a pod can
+ tolerate all taints of a particular category.
+ type: string
+ tolerationSeconds:
+ description: |-
+ TolerationSeconds represents the period of time the toleration (which must be
+ of effect NoExecute, otherwise this field is ignored) tolerates the taint. By default,
+ it is not set, which means tolerate the taint forever (do not evict). Zero and
+ negative values will be treated as 0 (evict immediately) by the system.
+ format: int64
+ type: integer
+ value:
+ description: |-
+ Value is the taint value the toleration matches to.
+ If the operator is Exists, the value should be empty, otherwise just a regular string.
+ type: string
+ type: object
+ type: array
+ useOpenKernelModules:
+ description: UseOpenKernelModules indicates if the open GPU kernel
+ modules should be used
+ type: boolean
+ usePrecompiled:
+ description: UsePrecompiled indicates if deployment of NVIDIA Driver
+ using pre-compiled modules is enabled
+ type: boolean
+ x-kubernetes-validations:
+ - message: usePrecompiled is an immutable field. Please create a new
+ NvidiaDriver resource instead when you want to change this setting.
+ rule: self == oldSelf
+ version:
+ description: NVIDIA Driver version (or just branch for precompiled
+ drivers)
+ type: string
+ virtualTopologyConfig:
+ description: 'Optional: Virtual Topology Daemon configuration for
+ NVIDIA vGPU drivers'
+ properties:
+ name:
+ description: 'Optional: Config name representing virtual topology
+ daemon configuration file nvidia-topologyd.conf'
+ type: string
+ type: object
+ required:
+ - driverType
+ - image
+ type: object
+ status:
+ description: NVIDIADriverStatus defines the observed state of NVIDIADriver
+ properties:
+ conditions:
+ description: Conditions is a list of conditions representing the NVIDIADriver's
+ current state.
+ items:
+ description: "Condition contains details for one aspect of the current
+ state of this API Resource.\n---\nThis struct is intended for
+ direct use as an array at the field path .status.conditions. For
+ example,\n\n\n\ttype FooStatus struct{\n\t // Represents the
+ observations of a foo's current state.\n\t // Known .status.conditions.type
+ are: \"Available\", \"Progressing\", and \"Degraded\"\n\t //
+ +patchMergeKey=type\n\t // +patchStrategy=merge\n\t // +listType=map\n\t
+ \ // +listMapKey=type\n\t Conditions []metav1.Condition `json:\"conditions,omitempty\"
+ patchStrategy:\"merge\" patchMergeKey:\"type\" protobuf:\"bytes,1,rep,name=conditions\"`\n\n\n\t
+ \ // other fields\n\t}"
+ properties:
+ lastTransitionTime:
+ description: |-
+ lastTransitionTime is the last time the condition transitioned from one status to another.
+ This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable.
+ format: date-time
+ type: string
+ message:
+ description: |-
+ message is a human readable message indicating details about the transition.
+ This may be an empty string.
+ maxLength: 32768
+ type: string
+ observedGeneration:
+ description: |-
+ observedGeneration represents the .metadata.generation that the condition was set based upon.
+ For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
+ with respect to the current state of the instance.
+ format: int64
+ minimum: 0
+ type: integer
+ reason:
+ description: |-
+ reason contains a programmatic identifier indicating the reason for the condition's last transition.
+ Producers of specific condition types may define expected values and meanings for this field,
+ and whether the values are considered a guaranteed API.
+ The value should be a CamelCase string.
+ This field may not be empty.
+ maxLength: 1024
+ minLength: 1
+ pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
+ type: string
+ status:
+ description: status of the condition, one of True, False, Unknown.
+ enum:
+ - "True"
+ - "False"
+ - Unknown
+ type: string
+ type:
+ description: |-
+ type of condition in CamelCase or in foo.example.com/CamelCase.
+ ---
+ Many .condition.type values are consistent across resources like Available, but because arbitrary conditions can be
+ useful (see .node.status.conditions), the ability to deconflict is important.
+ The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt)
+ maxLength: 316
+ pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
+ type: string
+ required:
+ - lastTransitionTime
+ - message
+ - reason
+ - status
+ - type
+ type: object
+ type: array
+ namespace:
+ description: Namespace indicates a namespace in which the operator
+ and driver are installed
+ type: string
+ state:
+ description: |-
+ INSERT ADDITIONAL STATUS FIELD - define observed state of cluster
+ Important: Run "make" to regenerate code after modifying this file
+ State indicates status of NVIDIADriver instance
+ enum:
+ - ignored
+ - ready
+ - notReady
+ type: string
+ required:
+ - state
+ type: object
+ type: object
+ served: true
+ storage: true
+ subresources:
+ status: {}
diff --git a/bundle/v24.6.0/metadata/annotations.yaml b/bundle/v24.6.0/metadata/annotations.yaml
new file mode 100644
index 000000000..f7383d5c4
--- /dev/null
+++ b/bundle/v24.6.0/metadata/annotations.yaml
@@ -0,0 +1,17 @@
+annotations:
+ operators.operatorframework.io.bundle.channels.v1: stable,v24.6
+ operators.operatorframework.io.bundle.channel.default.v1: v24.6
+ operators.operatorframework.io.bundle.manifests.v1: manifests/
+ operators.operatorframework.io.bundle.mediatype.v1: registry+v1
+ operators.operatorframework.io.bundle.metadata.v1: metadata/
+ operators.operatorframework.io.bundle.package.v1: gpu-operator-certified
+ operators.operatorframework.io.metrics.builder: operator-sdk-v1.4.0
+ operators.operatorframework.io.metrics.mediatype.v1: metrics+v1
+ operators.operatorframework.io.metrics.project_layout: go.kubebuilder.io/v3
+ operators.operatorframework.io.test.config.v1: tests/scorecard/
+ operators.operatorframework.io.test.mediatype.v1: scorecard+v1
+ operatorframework.io/cluster-monitoring: "true"
+ operatorframework.io/suggested-namespace: nvidia-gpu-operator
+
+ # Annotations to specify OCP versions compatibility.
+ com.redhat.openshift.versions: v4.12-v4.16
diff --git a/bundle/v24.6.1/manifests/gpu-operator-certified.clusterserviceversion.yaml b/bundle/v24.6.1/manifests/gpu-operator-certified.clusterserviceversion.yaml
new file mode 100644
index 000000000..a4c4aec27
--- /dev/null
+++ b/bundle/v24.6.1/manifests/gpu-operator-certified.clusterserviceversion.yaml
@@ -0,0 +1,921 @@
+apiVersion: operators.coreos.com/v1alpha1
+kind: ClusterServiceVersion
+metadata:
+ labels:
+ operatorframework.io/arch.arm64: supported
+ operatorframework.io/arch.amd64: supported
+ pod-security.kubernetes.io/enforce: privileged
+ pod-security.kubernetes.io/audit: privileged
+ pod-security.kubernetes.io/warn: privileged
+ annotations:
+ features.operators.openshift.io/disconnected: "true"
+ features.operators.openshift.io/fips-compliant: "false"
+ features.operators.openshift.io/proxy-aware: "true"
+ features.operators.openshift.io/tls-profiles: "false"
+ features.operators.openshift.io/token-auth-aws: "false"
+ features.operators.openshift.io/token-auth-azure: "false"
+ features.operators.openshift.io/token-auth-gcp: "false"
+ features.operators.openshift.io/cnf: "false"
+ features.operators.openshift.io/cni: "false"
+ features.operators.openshift.io/csi: "false"
+ olm.skipRange: '>=1.9.0 <24.6.1'
+ alm-examples: |-
+ [
+ {
+ "apiVersion": "nvidia.com/v1",
+ "kind": "ClusterPolicy",
+ "metadata": {
+ "name": "gpu-cluster-policy"
+ },
+ "spec": {
+ "operator": {
+ "defaultRuntime": "crio",
+ "use_ocp_driver_toolkit": true,
+ "initContainer": {
+ }
+ },
+ "sandboxWorkloads": {
+ "enabled": false,
+ "defaultWorkload": "container"
+ },
+ "driver": {
+ "enabled": true,
+ "useNvidiaDriverCRD": false,
+ "useOpenKernelModules": false,
+ "upgradePolicy": {
+ "autoUpgrade": true,
+ "drain": {
+ "deleteEmptyDir": false,
+ "enable": false,
+ "force": false,
+ "timeoutSeconds": 300
+ },
+ "maxParallelUpgrades": 1,
+ "maxUnavailable": "25%",
+ "podDeletion": {
+ "deleteEmptyDir": false,
+ "force": false,
+ "timeoutSeconds": 300
+ },
+ "waitForCompletion": {
+ "timeoutSeconds": 0
+ }
+ },
+ "repoConfig": {
+ "configMapName": ""
+ },
+ "certConfig": {
+ "name": ""
+ },
+ "licensingConfig": {
+ "nlsEnabled": true,
+ "configMapName": ""
+ },
+ "virtualTopology": {
+ "config": ""
+ },
+ "kernelModuleConfig": {
+ "name": ""
+ }
+ },
+ "dcgmExporter": {
+ "enabled": true,
+ "config": {
+ "name": ""
+ },
+ "serviceMonitor": {
+ "enabled": true
+ }
+ },
+ "dcgm": {
+ "enabled": true
+ },
+ "daemonsets": {
+ "updateStrategy": "RollingUpdate",
+ "rollingUpdate": {
+ "maxUnavailable": "1"
+ }
+ },
+ "devicePlugin": {
+ "enabled": true,
+ "config": {
+ "name": "",
+ "default": ""
+ },
+ "mps": {
+ "root": "/run/nvidia/mps"
+ }
+ },
+ "gfd": {
+ "enabled": true
+ },
+ "migManager": {
+ "enabled": true
+ },
+ "nodeStatusExporter": {
+ "enabled": true
+ },
+ "mig": {
+ "strategy": "single"
+ },
+ "toolkit": {
+ "enabled": true
+ },
+ "validator": {
+ "plugin": {
+ "env": [
+ {
+ "name": "WITH_WORKLOAD",
+ "value": "false"
+ }
+ ]
+ }
+ },
+ "vgpuManager": {
+ "enabled": false
+ },
+ "vgpuDeviceManager": {
+ "enabled": true
+ },
+ "sandboxDevicePlugin": {
+ "enabled": true
+ },
+ "vfioManager": {
+ "enabled": true
+ },
+ "gds": {
+ "enabled": false
+ },
+ "gdrcopy": {
+ "enabled": false
+ }
+ }
+ },
+ {
+ "apiVersion": "nvidia.com/v1alpha1",
+ "kind": "NVIDIADriver",
+ "metadata": {
+ "name": "gpu-driver"
+ },
+ "spec": {
+ "driverType": "gpu",
+ "repository": "nvcr.io/nvidia",
+ "image": "driver",
+ "version": "sha256:858de27c152669f5a3cf4287406405b16dd5bb70c0373324eb735511997bb415",
+ "nodeSelector": {},
+ "manager": {},
+ "repoConfig": {
+ "name": ""
+ },
+ "certConfig": {
+ "name": ""
+ },
+ "licensingConfig": {
+ "nlsEnabled": true,
+ "name": ""
+ },
+ "virtualTopologyConfig": {
+ "name": ""
+ },
+ "kernelModuleConfig": {
+ "name": ""
+ }
+ }
+ }
+ ]
+ operators.operatorframework.io/builder: operator-sdk-v1.4.0
+ operators.operatorframework.io/project_layout: go.kubebuilder.io/v3
+ operatorframework.io/suggested-namespace: nvidia-gpu-operator
+ capabilities: Deep Insights
+ categories: AI/Machine Learning, OpenShift Optional
+ certified: "true"
+ containerImage: nvcr.io/nvidia/gpu-operator@sha256:d51c3a34aaa9a5dfbdd3b710ee18d9eaa50aa0fb3518bacd541053d77c5c1098
+ createdAt: "Mon Aug 12 11:35:29 PDT 2024"
+ description: Automate the management and monitoring of NVIDIA GPUs.
+ provider: NVIDIA
+ repository: http://github.com/NVIDIA/gpu-operator
+ support: NVIDIA
+ name: gpu-operator-certified.v24.6.1
+ namespace: placeholder
+spec:
+ apiservicedefinitions: {}
+ relatedImages:
+ - name: gpu-operator-image
+ image: nvcr.io/nvidia/gpu-operator@sha256:d51c3a34aaa9a5dfbdd3b710ee18d9eaa50aa0fb3518bacd541053d77c5c1098
+ - name: dcgm-exporter-image
+ image: nvcr.io/nvidia/k8s/dcgm-exporter@sha256:21f4c8b88716e8e6f732f9fb4c2efaef937c227491a8631c5e55036f80f39a4d
+ - name: dcgm-image
+ image: nvcr.io/nvidia/cloud-native/dcgm@sha256:15dab1273345df4a5844c4c761d064dbc4b592101251dc39174e597137123027
+ - name: container-toolkit-image
+ image: nvcr.io/nvidia/k8s/container-toolkit@sha256:f95ef6a0c377e011bc0561c7d2c2bf32e45106fb0ba91ae9a10f97236ded0581
+ - name: driver-image
+ image: nvcr.io/nvidia/driver@sha256:858de27c152669f5a3cf4287406405b16dd5bb70c0373324eb735511997bb415
+ - name: driver-image-535
+ image: nvcr.io/nvidia/driver@sha256:a6d12fb5753f267dda25dfd38910f972bc632c006a24107fa50e20bba3642d7c
+ - name: driver-image-470
+ image: nvcr.io/nvidia/driver@sha256:07e11f85d54d49ec9648fb06e148b8d832ee1f9c3549a915eee853c9ef2949c2
+ - name: device-plugin-image
+ image: nvcr.io/nvidia/k8s-device-plugin@sha256:7ad2c9f71fe06f9f7745ac8635f46740fbdff4f11edd468addfab81afcdfa534
+ - name: gpu-feature-discovery-image
+ image: nvcr.io/nvidia/k8s-device-plugin@sha256:7ad2c9f71fe06f9f7745ac8635f46740fbdff4f11edd468addfab81afcdfa534
+ - name: mig-manager-image
+ image: nvcr.io/nvidia/cloud-native/k8s-mig-manager@sha256:781fb47e264d9e0fbc8da5bd046e5e678316c866bc36ddd4b56d4eb0de682d5b
+ - name: init-container-image
+ image: nvcr.io/nvidia/cuda@sha256:b0b6c9286f20432ba9becb711aff2d1c1bd56e47b33e6d1cab04aba926c067fe
+ - name: gpu-operator-validator-image
+ image: nvcr.io/nvidia/cloud-native/gpu-operator-validator@sha256:0a48b6c65148358ab792b3dc23bce5d3e660e9176670f62864502f68647704f0
+ - name: k8s-driver-manager-image
+ image: nvcr.io/nvidia/cloud-native/k8s-driver-manager@sha256:740abc3ff657545c10effd5354f09af525200ed9a1b7623f0c2e8c7bd9e4a4e2
+ - name: vfio-manager-image
+ image: nvcr.io/nvidia/cuda@sha256:b0b6c9286f20432ba9becb711aff2d1c1bd56e47b33e6d1cab04aba926c067fe
+ - name: sandbox-device-plugin-image
+ image: nvcr.io/nvidia/kubevirt-gpu-device-plugin@sha256:969147c01d63be5d1fe458f32f1cc0c7408cf3062531db91408e2fc57b4d8a67
+ - name: vgpu-device-manager-image
+ image: nvcr.io/nvidia/cloud-native/vgpu-device-manager@sha256:ae63fac9a4057a7646f0cf0ee0566e8928529adde05c4c0a017cda0599e381b2
+ - name: gdrcopy-image
+ image: nvcr.io/nvidia/cloud-native/gdrdrv@sha256:33de74eb590f071403c17b6c210c02963245851971168bc0c07c06c100a9f376
+ customresourcedefinitions:
+ owned:
+ - name: nvidiadrivers.nvidia.com
+ kind: NVIDIADriver
+ version: v1alpha1
+ displayName: NVIDIADriver
+ description: NVIDIADriver allows you to deploy the NVIDIA driver
+ resources:
+ - kind: ServiceAccount
+ name: ''
+ version: v1
+ - kind: DaemonSet
+ name: ''
+ version: apps/v1
+ - kind: ConfigMap
+ name: ''
+ version: v1
+ - kind: Pod
+ name: ''
+ version: v1
+ - kind: clusterpolicies
+ name: ''
+ version: v1
+ - kind: clusterversions
+ name: ''
+ version: v1
+ - kind: nodes
+ name: ''
+ version: v1
+ - kind: status
+ name: ''
+ version: v1
+ specDescriptors:
+ - description: 'Optional: Set Node affinity'
+ displayName: Node affinity
+ path: affinity
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - 'urn:alm:descriptor:com.tectonic.ui:nodeAffinity'
+ - description: Node selector to control the selection of nodes (optional)
+ displayName: Node Selector
+ path: nodeSelector
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:selector:Node'
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - description: 'Optional: Set tolerations'
+ displayName: Tolerations
+ path: tolerations
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - 'urn:alm:descriptor:io.kubernetes:Tolerations'
+ - description: Image pull secrets
+ displayName: Image pull secrets
+ path: imagePullSecrets
+ x-descriptors:
+ - 'urn:alm:descriptor:io.kubernetes:Secret'
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - displayName: ImagePullPolicy
+ description: 'Image pull policy (default: IfNotPresent)'
+ path: imagePullPolicy
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:imagePullPolicy'
+ statusDescriptors:
+ - description: The current state of the driver.
+ displayName: State
+ path: state
+ x-descriptors:
+ - 'urn:alm:descriptor:text'
+ - name: clusterpolicies.nvidia.com
+ kind: ClusterPolicy
+ version: v1
+ group: nvidia.com
+ displayName: ClusterPolicy
+ description: ClusterPolicy allows you to configure the GPU Operator
+ resources:
+ - kind: ServiceAccount
+ name: ''
+ version: v1
+ - kind: Deployment
+ name: ''
+ version: apps/v1
+ - kind: DaemonSet
+ name: ''
+ version: apps/v1
+ - kind: ConfigMap
+ name: ''
+ version: v1
+ - kind: Pod
+ name: ''
+ version: v1
+ - kind: clusterpolicies
+ name: ''
+ version: v1
+ - kind: clusterversions
+ name: ''
+ version: v1
+ - kind: nodes
+ name: ''
+ version: v1
+ - kind: status
+ name: ''
+ version: v1
+ specDescriptors:
+ - description: GPU Operator config
+ displayName: GPU Operator config
+ path: operator
+ - displayName: ImagePullPolicy
+ description: 'Image pull policy (default: IfNotPresent)'
+ path: operator.validator.imagePullPolicy
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:imagePullPolicy'
+ - description: Image pull secrets
+ displayName: Image pull secrets
+ path: operator.validator.imagePullSecrets
+ x-descriptors:
+ - 'urn:alm:descriptor:io.kubernetes:Secret'
+ - description: NVIDIA GPU/vGPU Driver config
+ displayName: NVIDIA GPU/vGPU Driver config
+ path: driver
+ - description: 'Optional: Set Node affinity'
+ displayName: Node affinity
+ path: driver.affinity
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - 'urn:alm:descriptor:com.tectonic.ui:nodeAffinity'
+ - description: Node selector to control the selection of nodes (optional)
+ displayName: Node Selector
+ path: driver.nodeSelector
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:selector:Node'
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - description: 'Optional: Set tolerations'
+ displayName: Tolerations
+ path: driver.tolerations
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - 'urn:alm:descriptor:io.kubernetes:Tolerations'
+ - description: Image pull secrets
+ displayName: Image pull secrets
+ path: driver.imagePullSecrets
+ x-descriptors:
+ - 'urn:alm:descriptor:io.kubernetes:Secret'
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - displayName: ImagePullPolicy
+ description: 'Image pull policy (default: IfNotPresent)'
+ path: driver.imagePullPolicy
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:imagePullPolicy'
+ - description: NVIDIA DCGM Exporter config
+ displayName: NVIDIA DCGM Exporter config
+ path: dcgmExporter
+ - description: 'Optional: Set Node affinity'
+ displayName: Node affinity
+ path: dcgmExporter.affinity
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - 'urn:alm:descriptor:com.tectonic.ui:nodeAffinity'
+ - description: Node selector to control the selection of nodes (optional)
+ displayName: Node Selector
+ path: dcgmExporter.nodeSelector
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:selector:Node'
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - description: 'Optional: Set tolerations'
+ displayName: Tolerations
+ path: dcgmExporter.tolerations
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - 'urn:alm:descriptor:io.kubernetes:Tolerations'
+ - description: Image pull secrets
+ displayName: Image pull secrets
+ path: dcgmExporter.imagePullSecrets
+ x-descriptors:
+ - 'urn:alm:descriptor:io.kubernetes:Secret'
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - displayName: ImagePullPolicy
+ description: 'Image pull policy (default: IfNotPresent)'
+ path: dcgmExporter.imagePullPolicy
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:imagePullPolicy'
+ - description: NVIDIA Device Plugin config
+ displayName: NVIDIA Device Plugin config
+ path: devicePlugin
+ - description: 'Optional: Set Node affinity'
+ displayName: Node affinity
+ path: devicePlugin.affinity
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - 'urn:alm:descriptor:com.tectonic.ui:nodeAffinity'
+ - description: Node selector to control the selection of nodes (optional)
+ displayName: Node Selector
+ path: devicePlugin.nodeSelector
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:selector:Node'
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - description: 'Optional: Set tolerations'
+ displayName: Tolerations
+ path: devicePlugin.tolerations
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - 'urn:alm:descriptor:io.kubernetes:Tolerations'
+ - description: Image pull secrets
+ displayName: Image pull secrets
+ path: devicePlugin.imagePullSecrets
+ x-descriptors:
+ - 'urn:alm:descriptor:io.kubernetes:Secret'
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - displayName: ImagePullPolicy
+ description: 'Image pull policy (default: IfNotPresent)'
+ path: devicePlugin.imagePullPolicy
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:imagePullPolicy'
+ - description: GPU Feature Discovery Plugin config
+ displayName: GPU Feature Discovery Plugin config
+ path: gfd
+ - description: 'Optional: Set Node affinity'
+ displayName: Node affinity
+ path: gfd.affinity
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - 'urn:alm:descriptor:com.tectonic.ui:nodeAffinity'
+ - description: Node selector to control the selection of nodes (optional)
+ displayName: Node Selector
+ path: gfd.nodeSelector
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:selector:Node'
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - description: 'Optional: Set tolerations'
+ displayName: Tolerations
+ path: gfd.tolerations
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - 'urn:alm:descriptor:io.kubernetes:Tolerations'
+ - description: Image pull secrets
+ displayName: Image pull secrets
+ path: gfd.imagePullSecrets
+ x-descriptors:
+ - 'urn:alm:descriptor:io.kubernetes:Secret'
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - displayName: ImagePullPolicy
+ description: 'Image pull policy (default: IfNotPresent)'
+ path: gfd.imagePullPolicy
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:imagePullPolicy'
+ - description: NVIDIA Container Toolkit config
+ displayName: NVIDIA Container Toolkit config
+ path: toolkit
+ - description: 'Optional: Set Node affinity'
+ displayName: Node affinity
+ path: toolkit.affinity
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - 'urn:alm:descriptor:com.tectonic.ui:nodeAffinity'
+ - description: Node selector to control the selection of nodes (optional)
+ displayName: Node Selector
+ path: toolkit.nodeSelector
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:selector:Node'
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - description: 'Optional: Set tolerations'
+ displayName: Tolerations
+ path: toolkit.tolerations
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - 'urn:alm:descriptor:io.kubernetes:Tolerations'
+ - description: Image pull secrets
+ displayName: Image pull secrets
+ path: toolkit.imagePullSecrets
+ x-descriptors:
+ - 'urn:alm:descriptor:io.kubernetes:Secret'
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - displayName: ImagePullPolicy
+ description: 'Image pull policy (default: IfNotPresent)'
+ path: toolkit.imagePullPolicy
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:imagePullPolicy'
+ - displayName: NVIDIA DCGM config
+ description: NVIDIA DCGM config
+ path: dcgm
+ - displayName: Validator config
+ description: Validator config
+ path: validator
+ - displayName: Node Status Exporter config
+ description: Node Status Exporter config
+ path: nodeStatusExporter
+ - displayName: Daemonsets config
+ description: Daemonsets config
+ path: daemonsets
+ - displayName: MIG config
+ description: MIG config
+ path: mig
+ - displayName: NVIDIA MIG Manager config
+ description: NVIDIA MIG Manager config
+ path: migManager
+ - displayName: PodSecurityPolicy config
+ description: PodSecurityPolicy config
+ path: psp
+ - displayName: NVIDIA GPUDirect Storage config
+ description: NVIDIA GPUDirect Storage config
+ path: gds
+ - displayName: Sandbox Workloads config
+ description: Sandbox Workloads config
+ path: sandboxWorkloads
+ - displayName: NVIDIA vGPU Manager config
+ description: NVIDIA vGPU Manager config
+ path: vgpuManager
+ - displayName: NVIDIA vGPU Device Manager config
+ description: NVIDIA vGPU Device Manager config
+ path: vgpuDeviceManager
+ - displayName: VFIO Manager config
+ description: VFIO Manager config
+ path: vfioManager
+ - displayName: NVIDIA Sandbox Device Plugin config
+ description: NVIDIA Sandbox Device Plugin config
+ path: sandboxDevicePlugin
+ statusDescriptors:
+ - description: The current state of the operator.
+ displayName: State
+ path: state
+ x-descriptors:
+ - 'urn:alm:descriptor:text'
+ displayName: NVIDIA GPU Operator
+ description: >
+ Kubernetes provides access to special hardware resources such as NVIDIA
+ GPUs, NICs, Infiniband adapters and other devices through the [device plugin
+ framework](https://kubernetes.io/docs/concepts/extend-kubernetes/compute-storage-net/device-plugins/).
+ However, configuring and managing nodes with these hardware resources
+ requires configuration of multiple software components such as drivers,
+ container runtimes or other libraries which are difficult and prone to
+ errors.
+
+ The NVIDIA GPU Operator uses the [operator
+ framework](https://cloud.redhat.com/blog/introducing-the-operator-framework) within
+ Kubernetes to automate the management of all NVIDIA software components
+ needed to provision and monitor GPUs.
+ These components include the NVIDIA drivers (to enable CUDA), Kubernetes
+ device plugin for GPUs, the NVIDIA Container Runtime, automatic node
+ labelling and NVIDIA DCGM exporter.
+
+ Visit the official site of the [GPU Operator](https://github.com/NVIDIA/gpu-operator) for more information.
+ For getting started with using the GPU Operator with OpenShift, see the instructions
+ [here](https://docs.nvidia.com/datacenter/cloud-native/gpu-operator/openshift/contents.html).
+ icon:
+ - base64data: iVBORw0KGgoAAAANSUhEUgAAAEAAAABACAMAAACdt4HsAAAB2lBMVEUAAAD///8EBAN3uQACAgIAAAQJDQUCAgB1tgAHCQf+/v5Ufg5Hagxxqwt+xgJ3uAB9wwB4vQBRUVEeLA3e3t5nZ2coKCgODg4FBwZ9wwR6wAJ4vADz8/MbGxt5tw1vpw1/wgoOFwkLDwh9xQH5+fny8vLw8PDFxcWysrKFhYVvb282NjYyMjIqKioXFxdikxRYgxNCYxJQdhFqoQ9xrg16ugxyqgyAxQkEBQj7+/v29vbIyMhjY2NbW1tHR0cvLy8kJCQdHR0ZGRlKbxJ8uhFNcxFVgBAxSBBgkg93tQ50sA4qPg4XIg18vwsbKQsSGgsLCwsMEwqCyQeByQFztADPz8+/v7+6urqWlpZra2tKSkogICASEhJmmRE8XBA5VRA2UBBonA9biA9GaQ4sQg4jMw4mOQ0aJw2GzgsUHgttpAqJ0Ql/wQWG0AJ8vwF0uQCtra2jo6OQkJB9fX1VVVVCQkI9PT0iIiIUFBRSfBNgjhA7WRBGZw+GywmFzgaAyASBxQN2twDb29u2traenp6Kiop+fn53d3dzc3NyqRV4sxM/YBNAXRElNhBjlQ+IzA00TQ16vgxJbgp6vAl4tgJ3vgDs7Ozn5+fa2trS0tJCXRY6VBV6thSL1gf4nFdFAAAD80lEQVRYw+zSOXPaQBgG4He0LJJmbGRGDUIzuvgBQiAEPfcdwC33DTbUtmOwSyc+4iRucvzXRImLFJmRShc8xXbfu+9+szg4OHjjAsH/iFD49q7rqM6xc/wPtWyBhS8sC94ObWRCZDksh1+RzmcEfI0DoPrjylEkSTgViMs9udjYTwMG4Gf51Z1BM81ioRwit+QvgYsdUQZeKFr3ladyKXvVr+pAM5uKcmRLXFzoCIxn+0i/8lSaBMHnfi7qowfQuZnm3PuFPwGs13zD3NlViozY/z4YD6/TCQORbPr2q78GLB0ou5IO40pd5AxQZnJ83m2y9Ju2JYKfgEhWC18aEIfrZLURHwQC0B87ySZwHxX8BNDWB1KfQfyxT2TA24uPQMt8yTWA3obz8wQGlhTN06Z900MkuJLrYu3u5LkK9LTtGRF8NEDLeSnXYLUdHUFVlpPqTa4IamlhJZ464biY1w4CKGrROOW7uwLlV+Q02lanCF6cbSoPVLzUfPwDll5I9T6WyXWhZre1yjiI6VCSzCWY3+FKaAwGHngzpEygx6+V6Uzk6TJR7yhWxJ1bFgTPJ7gMc58aUCq+n+qNT6Pn8y/xOcCiZZVjnJ+AAPhEuj0SKZ9bL9ZpNS9SgM6z9p5w3jt43cMvecfWBhm7dtfEpfhYMDBYpFd7mDZIAxPCFKgBhB0hkWbE2wVMyqycfhOMEiebSzFz5IMTEjw7E87UFj4GVR7GXqaSkoIcISEc/I38/PwhOTUMRBrADgwK09zgYGUBqbwcARiQyp3Eyk6kC4BloqtbJTcaSHIHShALWFmBSRuCWBGC+AtDMAAGIpAAc9mBiB0sCLSXHUSygxSxEIoE7IKEgbhopKgogC96x04QCMMw/H0cG6f0cEmBHaLc7FFQzApoTLwtQgWUWo26glx2mzGkyoHM1PPMO/NrnSH8e2QAiRsZ8S3ZuJoW5Udg5moGoMRLN2gAnkcUctueJ1gADsdtlZ2AgmSYoaDZBXwRctcwy6HN3XX/wfnTnA7Q5x0S0Gku4wHpe7Ql8Mbtu4TqC3qcADGtUl4O3eK0AkZdKH1mU/a6MFQGA7pQGoAVoAuuPYZlLJF2BawVLLjwac6Q8wUax61/CpKQAT6ZX3hFqoqqAFvuf4AzM+NgsoBS/wcSOD7SFzyf6CE9UQK9II1MRvIJm8QSgsLiBZuypsAWKyARElgx5FcLv1N4nFLbB45Sh6+TzsQRtn7bz/B3fS9GQ12bgUE2PKycQbwgXD0SWLwVhpZFq4eHhWloOjLoqGvoRYRGAR2vp2EtpNUaTUpiRAizMAEhKNXpYZNnAUlBCSgFYTIxQTlMMJNGwSgYBdQHAFsKs+/bUkeyAAAAAElFTkSuQmCC
+ mediatype: image/png
+ install:
+ spec:
+ clusterPermissions:
+ - serviceAccountName: gpu-operator
+ rules:
+ - apiGroups:
+ - nvidia.com
+ resources:
+ - clusterpolicies
+ - clusterpolicies/finalizers
+ - clusterpolicies/status
+ - nvidiadrivers
+ - nvidiadrivers/finalizers
+ - nvidiadrivers/status
+ verbs:
+ - create
+ - delete
+ - get
+ - list
+ - patch
+ - update
+ - watch
+ - deletecollection
+ - apiGroups:
+ - config.openshift.io
+ resources:
+ - clusterversions
+ - proxies
+ verbs:
+ - get
+ - list
+ - watch
+ - apiGroups:
+ - security.openshift.io
+ resources:
+ - securitycontextconstraints
+ verbs:
+ - use
+ - create
+ - get
+ - list
+ - watch
+ - patch
+ - update
+ - delete
+ - apiGroups:
+ - security.openshift.io
+ resources:
+ - securitycontextconstraints
+ verbs:
+ - use
+ resourceNames:
+ - hostmount-anyuid
+ - apiGroups:
+ - image.openshift.io
+ resources:
+ - imagestreams
+ verbs:
+ - get
+ - list
+ - watch
+ - apiGroups:
+ - monitoring.coreos.com
+ resources:
+ - servicemonitors
+ - prometheusrules
+ verbs:
+ - get
+ - list
+ - create
+ - watch
+ - update
+ - delete
+ - apiGroups:
+ - rbac.authorization.k8s.io
+ resources:
+ - clusterroles
+ - clusterrolebindings
+ verbs:
+ - create
+ - get
+ - list
+ - watch
+ - update
+ - patch
+ - delete
+ - apiGroups:
+ - ""
+ resources:
+ - pods
+ - pods/eviction
+ - services
+ - services/finalizers
+ - events
+ verbs:
+ - create
+ - delete
+ - get
+ - list
+ - patch
+ - update
+ - watch
+ - apiGroups:
+ - ""
+ resources:
+ - namespaces
+ verbs:
+ - get
+ - list
+ - create
+ - watch
+ - update
+ - patch
+ - apiGroups:
+ - ""
+ resources:
+ - nodes
+ verbs:
+ - get
+ - list
+ - watch
+ - update
+ - patch
+ - apiGroups:
+ - apps
+ resources:
+ - daemonsets
+ verbs:
+ - get
+ - list
+ - watch
+ - apiGroups:
+ - apps
+ resources:
+ - controllerrevisions
+ verbs:
+ - get
+ - list
+ - watch
+ - apiGroups:
+ - node.k8s.io
+ resources:
+ - runtimeclasses
+ verbs:
+ - get
+ - list
+ - create
+ - update
+ - watch
+ - delete
+ - apiGroups:
+ - coordination.k8s.io
+ resources:
+ - leases
+ verbs:
+ - create
+ - get
+ - list
+ - watch
+ - update
+ - patch
+ - delete
+ - apiGroups:
+ - apiextensions.k8s.io
+ resources:
+ - customresourcedefinitions
+ verbs:
+ - create
+ - get
+ - list
+ - watch
+ - update
+ - patch
+ - delete
+ permissions:
+ - serviceAccountName: gpu-operator
+ rules:
+ - apiGroups:
+ - rbac.authorization.k8s.io
+ resources:
+ - roles
+ - rolebindings
+ verbs:
+ - create
+ - get
+ - list
+ - watch
+ - update
+ - patch
+ - delete
+ - apiGroups:
+ - apps
+ resources:
+ - daemonsets
+ verbs:
+ - create
+ - get
+ - list
+ - watch
+ - update
+ - patch
+ - delete
+ - apiGroups:
+ - ""
+ resources:
+ - configmaps
+ - endpoints
+ - secrets
+ - serviceaccounts
+ verbs:
+ - create
+ - get
+ - list
+ - watch
+ - update
+ - patch
+ - delete
+ deployments:
+ - name: gpu-operator
+ spec:
+ replicas: 1
+ selector:
+ matchLabels:
+ app.kubernetes.io/component: gpu-operator
+ app: gpu-operator
+ strategy: {}
+ template:
+ metadata:
+ labels:
+ app.kubernetes.io/component: gpu-operator
+ app: gpu-operator
+ nvidia.com/gpu-driver-upgrade-drain.skip: "true"
+ spec:
+ priorityClassName: system-node-critical
+ containers:
+ - args:
+ - --leader-elect
+ - --leader-lease-renew-deadline
+ - "60s"
+ image: nvcr.io/nvidia/gpu-operator@sha256:d51c3a34aaa9a5dfbdd3b710ee18d9eaa50aa0fb3518bacd541053d77c5c1098
+ command:
+ - gpu-operator
+ livenessProbe:
+ httpGet:
+ path: /healthz
+ port: 8081
+ initialDelaySeconds: 15
+ periodSeconds: 20
+ name: gpu-operator
+ ports:
+ - name: metrics
+ containerPort: 8080
+ readinessProbe:
+ httpGet:
+ path: /readyz
+ port: 8081
+ initialDelaySeconds: 5
+ periodSeconds: 10
+ resources:
+ limits:
+ cpu: 500m
+ memory: 1Gi
+ requests:
+ cpu: 200m
+ memory: 200Mi
+ securityContext:
+ allowPrivilegeEscalation: false
+ volumeMounts:
+ - mountPath: /host-etc/os-release
+ name: host-os-release
+ readOnly: true
+ env:
+ - name: OPERATOR_NAMESPACE
+ valueFrom:
+ fieldRef:
+ fieldPath: metadata.namespace
+ - name: "VALIDATOR_IMAGE"
+ value: "nvcr.io/nvidia/cloud-native/gpu-operator-validator@sha256:0a48b6c65148358ab792b3dc23bce5d3e660e9176670f62864502f68647704f0"
+ - name: "GFD_IMAGE"
+ value: "nvcr.io/nvidia/k8s-device-plugin@sha256:7ad2c9f71fe06f9f7745ac8635f46740fbdff4f11edd468addfab81afcdfa534"
+ - name: "CONTAINER_TOOLKIT_IMAGE"
+ value: "nvcr.io/nvidia/k8s/container-toolkit@sha256:f95ef6a0c377e011bc0561c7d2c2bf32e45106fb0ba91ae9a10f97236ded0581"
+ - name: "DCGM_IMAGE"
+ value: "nvcr.io/nvidia/cloud-native/dcgm@sha256:15dab1273345df4a5844c4c761d064dbc4b592101251dc39174e597137123027"
+ - name: "DCGM_EXPORTER_IMAGE"
+ value: "nvcr.io/nvidia/k8s/dcgm-exporter@sha256:21f4c8b88716e8e6f732f9fb4c2efaef937c227491a8631c5e55036f80f39a4d"
+ - name: "DEVICE_PLUGIN_IMAGE"
+ value: "nvcr.io/nvidia/k8s-device-plugin@sha256:7ad2c9f71fe06f9f7745ac8635f46740fbdff4f11edd468addfab81afcdfa534"
+ - name: "DRIVER_IMAGE"
+ value: "nvcr.io/nvidia/driver@sha256:858de27c152669f5a3cf4287406405b16dd5bb70c0373324eb735511997bb415"
+ - name: "DRIVER_IMAGE-535"
+ value: "nvcr.io/nvidia/driver@sha256:a6d12fb5753f267dda25dfd38910f972bc632c006a24107fa50e20bba3642d7c"
+ - name: "DRIVER_IMAGE-470"
+ value: "nvcr.io/nvidia/driver@sha256:07e11f85d54d49ec9648fb06e148b8d832ee1f9c3549a915eee853c9ef2949c2"
+ - name: "DRIVER_MANAGER_IMAGE"
+ value: "nvcr.io/nvidia/cloud-native/k8s-driver-manager@sha256:740abc3ff657545c10effd5354f09af525200ed9a1b7623f0c2e8c7bd9e4a4e2"
+ - name: "MIG_MANAGER_IMAGE"
+ value: "nvcr.io/nvidia/cloud-native/k8s-mig-manager@sha256:781fb47e264d9e0fbc8da5bd046e5e678316c866bc36ddd4b56d4eb0de682d5b"
+ - name: "CUDA_BASE_IMAGE"
+ value: "nvcr.io/nvidia/cuda@sha256:b0b6c9286f20432ba9becb711aff2d1c1bd56e47b33e6d1cab04aba926c067fe"
+ - name: "VFIO_MANAGER_IMAGE"
+ value: "nvcr.io/nvidia/cuda@sha256:b0b6c9286f20432ba9becb711aff2d1c1bd56e47b33e6d1cab04aba926c067fe"
+ - name: "SANDBOX_DEVICE_PLUGIN_IMAGE"
+ value: "nvcr.io/nvidia/kubevirt-gpu-device-plugin@sha256:969147c01d63be5d1fe458f32f1cc0c7408cf3062531db91408e2fc57b4d8a67"
+ - name: "VGPU_DEVICE_MANAGER_IMAGE"
+ value: "nvcr.io/nvidia/cloud-native/vgpu-device-manager@sha256:ae63fac9a4057a7646f0cf0ee0566e8928529adde05c4c0a017cda0599e381b2"
+ - name: "GDRCOPY_IMAGE"
+ value: "nvcr.io/nvidia/cloud-native/gdrdrv@sha256:33de74eb590f071403c17b6c210c02963245851971168bc0c07c06c100a9f376"
+ terminationGracePeriodSeconds: 10
+ volumes:
+ - hostPath:
+ path: /etc/os-release
+ name: host-os-release
+ serviceAccountName: gpu-operator
+ strategy: deployment
+ installModes:
+ - supported: true
+ type: OwnNamespace
+ - supported: true
+ type: SingleNamespace
+ - supported: false
+ type: MultiNamespace
+ - supported: false
+ type: AllNamespaces
+ keywords:
+ - gpu
+ - cuda
+ - compute
+ - operator
+ - deep learning
+ - monitoring
+ - tesla
+ maintainers:
+ - name: NVIDIA
+ email: operator_feedback@nvidia.com
+ maturity: stable
+ provider:
+ name: NVIDIA Corporation
+ version: 24.6.1
+ replaces: gpu-operator-certified.v24.6.0
diff --git a/bundle/v24.6.1/manifests/nvidia.com_clusterpolicies.yaml b/bundle/v24.6.1/manifests/nvidia.com_clusterpolicies.yaml
new file mode 100644
index 000000000..8e29cabf1
--- /dev/null
+++ b/bundle/v24.6.1/manifests/nvidia.com_clusterpolicies.yaml
@@ -0,0 +1,2404 @@
+---
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+ annotations:
+ controller-gen.kubebuilder.io/version: v0.15.0
+ name: clusterpolicies.nvidia.com
+spec:
+ group: nvidia.com
+ names:
+ kind: ClusterPolicy
+ listKind: ClusterPolicyList
+ plural: clusterpolicies
+ singular: clusterpolicy
+ scope: Cluster
+ versions:
+ - additionalPrinterColumns:
+ - jsonPath: .status.state
+ name: Status
+ type: string
+ - jsonPath: .metadata.creationTimestamp
+ name: Age
+ type: string
+ name: v1
+ schema:
+ openAPIV3Schema:
+ description: ClusterPolicy is the Schema for the clusterpolicies API
+ properties:
+ apiVersion:
+ description: |-
+ APIVersion defines the versioned schema of this representation of an object.
+ Servers should convert recognized schemas to the latest internal value, and
+ may reject unrecognized values.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+ type: string
+ kind:
+ description: |-
+ Kind is a string value representing the REST resource this object represents.
+ Servers may infer this from the endpoint the client submits requests to.
+ Cannot be updated.
+ In CamelCase.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+ type: string
+ metadata:
+ type: object
+ spec:
+ description: ClusterPolicySpec defines the desired state of ClusterPolicy
+ properties:
+ ccManager:
+ description: CCManager component spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ defaultMode:
+ description: Default CC mode setting for compatible GPUs on the
+ node
+ enum:
+ - "on"
+ - "off"
+ - devtools
+ type: string
+ enabled:
+ description: Enabled indicates if deployment of CC Manager is
+ enabled
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: CC Manager image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: CC Manager image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: CC Manager image tag
+ type: string
+ type: object
+ cdi:
+ description: CDI configures how the Container Device Interface is
+ used in the cluster
+ properties:
+ default:
+ default: false
+ description: Default indicates whether to use CDI as the default
+ mechanism for providing GPU access to containers.
+ type: boolean
+ enabled:
+ default: false
+ description: Enabled indicates whether CDI can be used to make
+ GPUs accessible to containers.
+ type: boolean
+ type: object
+ daemonsets:
+ description: Daemonset defines common configuration for all Daemonsets
+ properties:
+ annotations:
+ additionalProperties:
+ type: string
+ description: |-
+ Optional: Annotations is an unstructured key value map stored with a resource that may be
+ set by external tools to store and retrieve arbitrary metadata. They are not
+ queryable and should be preserved when modifying objects.
+ type: object
+ labels:
+ additionalProperties:
+ type: string
+ description: |-
+ Optional: Map of string keys and values that can be used to organize and categorize
+ (scope and select) objects. May match selectors of replication controllers
+ and services.
+ type: object
+ priorityClassName:
+ type: string
+ rollingUpdate:
+ description: 'Optional: Configuration for rolling update of all
+ DaemonSet pods'
+ properties:
+ maxUnavailable:
+ type: string
+ type: object
+ tolerations:
+ description: 'Optional: Set tolerations'
+ items:
+ description: |-
+ The pod this Toleration is attached to tolerates any taint that matches
+ the triple using the matching operator .
+ properties:
+ effect:
+ description: |-
+ Effect indicates the taint effect to match. Empty means match all taint effects.
+ When specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute.
+ type: string
+ key:
+ description: |-
+ Key is the taint key that the toleration applies to. Empty means match all taint keys.
+ If the key is empty, operator must be Exists; this combination means to match all values and all keys.
+ type: string
+ operator:
+ description: |-
+ Operator represents a key's relationship to the value.
+ Valid operators are Exists and Equal. Defaults to Equal.
+ Exists is equivalent to wildcard for value, so that a pod can
+ tolerate all taints of a particular category.
+ type: string
+ tolerationSeconds:
+ description: |-
+ TolerationSeconds represents the period of time the toleration (which must be
+ of effect NoExecute, otherwise this field is ignored) tolerates the taint. By default,
+ it is not set, which means tolerate the taint forever (do not evict). Zero and
+ negative values will be treated as 0 (evict immediately) by the system.
+ format: int64
+ type: integer
+ value:
+ description: |-
+ Value is the taint value the toleration matches to.
+ If the operator is Exists, the value should be empty, otherwise just a regular string.
+ type: string
+ type: object
+ type: array
+ updateStrategy:
+ default: RollingUpdate
+ enum:
+ - RollingUpdate
+ - OnDelete
+ type: string
+ type: object
+ dcgm:
+ description: DCGM component spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ enabled:
+ description: Enabled indicates if deployment of NVIDIA DCGM Hostengine
+ as a separate pod is enabled.
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ hostPort:
+ description: 'Deprecated: HostPort represents host port that needs
+ to be bound for DCGM engine (Default: 5555)'
+ format: int32
+ type: integer
+ image:
+ description: NVIDIA DCGM image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: NVIDIA DCGM image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: NVIDIA DCGM image tag
+ type: string
+ type: object
+ dcgmExporter:
+ description: DCGMExporter spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ config:
+ description: 'Optional: Custom metrics configuration for NVIDIA
+ DCGM Exporter'
+ properties:
+ name:
+ description: ConfigMap name with file dcgm-metrics.csv for
+ metrics to be collected by NVIDIA DCGM Exporter
+ type: string
+ type: object
+ enabled:
+ description: Enabled indicates if deployment of NVIDIA DCGM Exporter
+ through operator is enabled
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: NVIDIA DCGM Exporter image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: NVIDIA DCGM Exporter image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ serviceMonitor:
+ description: 'Optional: ServiceMonitor configuration for NVIDIA
+ DCGM Exporter'
+ properties:
+ additionalLabels:
+ additionalProperties:
+ type: string
+ description: AdditionalLabels to add to ServiceMonitor instance
+ for NVIDIA DCGM Exporter
+ type: object
+ enabled:
+ description: Enabled indicates if ServiceMonitor is deployed
+ for NVIDIA DCGM Exporter
+ type: boolean
+ honorLabels:
+ description: HonorLabels chooses the metric’s labels on collisions
+ with target labels.
+ type: boolean
+ interval:
+ description: |-
+ Interval which metrics should be scraped from NVIDIA DCGM Exporter. If not specified Prometheus’ global scrape interval is used.
+ Supported units: y, w, d, h, m, s, ms
+ pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$
+ type: string
+ relabelings:
+ description: Relabelings allows to rewrite labels on metric
+ sets for NVIDIA DCGM Exporter
+ items:
+ description: |-
+ RelabelConfig allows dynamic rewriting of the label set for targets, alerts,
+ scraped samples and remote write samples.
+
+
+ More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config
+ properties:
+ action:
+ default: replace
+ description: |-
+ Action to perform based on the regex matching.
+
+
+ `Uppercase` and `Lowercase` actions require Prometheus >= v2.36.0.
+ `DropEqual` and `KeepEqual` actions require Prometheus >= v2.41.0.
+
+
+ Default: "Replace"
+ enum:
+ - replace
+ - Replace
+ - keep
+ - Keep
+ - drop
+ - Drop
+ - hashmod
+ - HashMod
+ - labelmap
+ - LabelMap
+ - labeldrop
+ - LabelDrop
+ - labelkeep
+ - LabelKeep
+ - lowercase
+ - Lowercase
+ - uppercase
+ - Uppercase
+ - keepequal
+ - KeepEqual
+ - dropequal
+ - DropEqual
+ type: string
+ modulus:
+ description: |-
+ Modulus to take of the hash of the source label values.
+
+
+ Only applicable when the action is `HashMod`.
+ format: int64
+ type: integer
+ regex:
+ description: Regular expression against which the extracted
+ value is matched.
+ type: string
+ replacement:
+ description: |-
+ Replacement value against which a Replace action is performed if the
+ regular expression matches.
+
+
+ Regex capture groups are available.
+ type: string
+ separator:
+ description: Separator is the string between concatenated
+ SourceLabels.
+ type: string
+ sourceLabels:
+ description: |-
+ The source labels select values from existing labels. Their content is
+ concatenated using the configured Separator and matched against the
+ configured regular expression.
+ items:
+ description: |-
+ LabelName is a valid Prometheus label name which may only contain ASCII
+ letters, numbers, as well as underscores.
+ pattern: ^[a-zA-Z_][a-zA-Z0-9_]*$
+ type: string
+ type: array
+ targetLabel:
+ description: |-
+ Label to which the resulting string is written in a replacement.
+
+
+ It is mandatory for `Replace`, `HashMod`, `Lowercase`, `Uppercase`,
+ `KeepEqual` and `DropEqual` actions.
+
+
+ Regex capture groups are available.
+ type: string
+ type: object
+ type: array
+ type: object
+ version:
+ description: NVIDIA DCGM Exporter image tag
+ type: string
+ type: object
+ devicePlugin:
+ description: DevicePlugin component spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ config:
+ description: 'Optional: Configuration for the NVIDIA Device Plugin
+ via the ConfigMap'
+ properties:
+ default:
+ description: Default config name within the ConfigMap for
+ the NVIDIA Device Plugin config
+ type: string
+ name:
+ description: ConfigMap name for NVIDIA Device Plugin config
+ including shared config between plugin and GFD
+ type: string
+ type: object
+ enabled:
+ description: Enabled indicates if deployment of NVIDIA Device
+ Plugin through operator is enabled
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: NVIDIA Device Plugin image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ mps:
+ description: 'Optional: MPS related configuration for the NVIDIA
+ Device Plugin'
+ properties:
+ root:
+ default: /run/nvidia/mps
+ description: Root defines the MPS root path on the host
+ type: string
+ type: object
+ repository:
+ description: NVIDIA Device Plugin image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: NVIDIA Device Plugin image tag
+ type: string
+ type: object
+ driver:
+ description: Driver component spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ certConfig:
+ description: 'Optional: Custom certificates configuration for
+ NVIDIA Driver container'
+ properties:
+ name:
+ type: string
+ type: object
+ enabled:
+ description: Enabled indicates if deployment of NVIDIA Driver
+ through operator is enabled
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: NVIDIA Driver image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ kernelModuleConfig:
+ description: 'Optional: Kernel module configuration parameters
+ for the NVIDIA Driver'
+ properties:
+ name:
+ type: string
+ type: object
+ licensingConfig:
+ description: 'Optional: Licensing configuration for NVIDIA vGPU
+ licensing'
+ properties:
+ configMapName:
+ type: string
+ nlsEnabled:
+ description: NLSEnabled indicates if NVIDIA Licensing System
+ is used for licensing.
+ type: boolean
+ type: object
+ livenessProbe:
+ description: NVIDIA Driver container liveness probe settings
+ properties:
+ failureThreshold:
+ description: |-
+ Minimum consecutive failures for the probe to be considered failed after having succeeded.
+ Defaults to 3. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ initialDelaySeconds:
+ description: |-
+ Number of seconds after the container has started before liveness probes are initiated.
+ More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+ format: int32
+ type: integer
+ periodSeconds:
+ description: |-
+ How often (in seconds) to perform the probe.
+ Default to 10 seconds. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ successThreshold:
+ description: |-
+ Minimum consecutive successes for the probe to be considered successful after having failed.
+ Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ timeoutSeconds:
+ description: |-
+ Number of seconds after which the probe times out.
+ Defaults to 1 second. Minimum value is 1.
+ More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+ format: int32
+ minimum: 1
+ type: integer
+ type: object
+ manager:
+ description: Manager represents configuration for NVIDIA Driver
+ Manager initContainer
+ properties:
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: Image represents NVIDIA Driver Manager image
+ name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: Repository represents Driver Managerrepository
+ path
+ type: string
+ version:
+ description: Version represents NVIDIA Driver Manager image
+ tag(version)
+ type: string
+ type: object
+ rdma:
+ description: GPUDirectRDMASpec defines the properties for nvidia-peermem
+ deployment
+ properties:
+ enabled:
+ description: Enabled indicates if GPUDirect RDMA is enabled
+ through GPU operator
+ type: boolean
+ useHostMofed:
+ description: UseHostMOFED indicates to use MOFED drivers directly
+ installed on the host to enable GPUDirect RDMA
+ type: boolean
+ type: object
+ readinessProbe:
+ description: NVIDIA Driver container readiness probe settings
+ properties:
+ failureThreshold:
+ description: |-
+ Minimum consecutive failures for the probe to be considered failed after having succeeded.
+ Defaults to 3. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ initialDelaySeconds:
+ description: |-
+ Number of seconds after the container has started before liveness probes are initiated.
+ More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+ format: int32
+ type: integer
+ periodSeconds:
+ description: |-
+ How often (in seconds) to perform the probe.
+ Default to 10 seconds. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ successThreshold:
+ description: |-
+ Minimum consecutive successes for the probe to be considered successful after having failed.
+ Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ timeoutSeconds:
+ description: |-
+ Number of seconds after which the probe times out.
+ Defaults to 1 second. Minimum value is 1.
+ More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+ format: int32
+ minimum: 1
+ type: integer
+ type: object
+ repoConfig:
+ description: 'Optional: Custom repo configuration for NVIDIA Driver
+ container'
+ properties:
+ configMapName:
+ type: string
+ type: object
+ repository:
+ description: NVIDIA Driver image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ startupProbe:
+ description: NVIDIA Driver container startup probe settings
+ properties:
+ failureThreshold:
+ description: |-
+ Minimum consecutive failures for the probe to be considered failed after having succeeded.
+ Defaults to 3. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ initialDelaySeconds:
+ description: |-
+ Number of seconds after the container has started before liveness probes are initiated.
+ More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+ format: int32
+ type: integer
+ periodSeconds:
+ description: |-
+ How often (in seconds) to perform the probe.
+ Default to 10 seconds. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ successThreshold:
+ description: |-
+ Minimum consecutive successes for the probe to be considered successful after having failed.
+ Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ timeoutSeconds:
+ description: |-
+ Number of seconds after which the probe times out.
+ Defaults to 1 second. Minimum value is 1.
+ More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+ format: int32
+ minimum: 1
+ type: integer
+ type: object
+ upgradePolicy:
+ description: Driver auto-upgrade settings
+ properties:
+ autoUpgrade:
+ default: false
+ description: |-
+ AutoUpgrade is a global switch for automatic upgrade feature
+ if set to false all other options are ignored
+ type: boolean
+ drain:
+ description: DrainSpec describes configuration for node drain
+ during automatic upgrade
+ properties:
+ deleteEmptyDir:
+ default: false
+ description: |-
+ DeleteEmptyDir indicates if should continue even if there are pods using emptyDir
+ (local data that will be deleted when the node is drained)
+ type: boolean
+ enable:
+ default: false
+ description: Enable indicates if node draining is allowed
+ during upgrade
+ type: boolean
+ force:
+ default: false
+ description: Force indicates if force draining is allowed
+ type: boolean
+ podSelector:
+ description: |-
+ PodSelector specifies a label selector to filter pods on the node that need to be drained
+ For more details on label selectors, see:
+ https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#label-selectors
+ type: string
+ timeoutSeconds:
+ default: 300
+ description: TimeoutSecond specifies the length of time
+ in seconds to wait before giving up drain, zero means
+ infinite
+ minimum: 0
+ type: integer
+ type: object
+ maxParallelUpgrades:
+ default: 1
+ description: |-
+ MaxParallelUpgrades indicates how many nodes can be upgraded in parallel
+ 0 means no limit, all nodes will be upgraded in parallel
+ minimum: 0
+ type: integer
+ maxUnavailable:
+ anyOf:
+ - type: integer
+ - type: string
+ default: 25%
+ description: |-
+ MaxUnavailable is the maximum number of nodes with the driver installed, that can be unavailable during the upgrade.
+ Value can be an absolute number (ex: 5) or a percentage of total nodes at the start of upgrade (ex: 10%).
+ Absolute number is calculated from percentage by rounding up.
+ By default, a fixed value of 25% is used.
+ x-kubernetes-int-or-string: true
+ podDeletion:
+ description: PodDeletionSpec describes configuration for deletion
+ of pods using special resources during automatic upgrade
+ properties:
+ deleteEmptyDir:
+ default: false
+ description: |-
+ DeleteEmptyDir indicates if should continue even if there are pods using emptyDir
+ (local data that will be deleted when the pod is deleted)
+ type: boolean
+ force:
+ default: false
+ description: Force indicates if force deletion is allowed
+ type: boolean
+ timeoutSeconds:
+ default: 300
+ description: |-
+ TimeoutSecond specifies the length of time in seconds to wait before giving up on pod termination, zero means
+ infinite
+ minimum: 0
+ type: integer
+ type: object
+ waitForCompletion:
+ description: WaitForCompletionSpec describes the configuration
+ for waiting on job completions
+ properties:
+ podSelector:
+ description: |-
+ PodSelector specifies a label selector for the pods to wait for completion
+ For more details on label selectors, see:
+ https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#label-selectors
+ type: string
+ timeoutSeconds:
+ default: 0
+ description: |-
+ TimeoutSecond specifies the length of time in seconds to wait before giving up on pod termination, zero means
+ infinite
+ minimum: 0
+ type: integer
+ type: object
+ type: object
+ useNvidiaDriverCRD:
+ description: UseNvidiaDriverCRD indicates if the deployment of
+ NVIDIA Driver is managed by the NVIDIADriver CRD type
+ type: boolean
+ useOpenKernelModules:
+ description: UseOpenKernelModules indicates if the open GPU kernel
+ modules should be used
+ type: boolean
+ usePrecompiled:
+ description: UsePrecompiled indicates if deployment of NVIDIA
+ Driver using pre-compiled modules is enabled
+ type: boolean
+ version:
+ description: NVIDIA Driver image tag
+ type: string
+ virtualTopology:
+ description: 'Optional: Virtual Topology Daemon configuration
+ for NVIDIA vGPU drivers'
+ properties:
+ config:
+ description: 'Optional: Config name representing virtual topology
+ daemon configuration file nvidia-topologyd.conf'
+ type: string
+ type: object
+ type: object
+ gdrcopy:
+ description: GDRCopy component spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ enabled:
+ description: Enabled indicates if GDRCopy is enabled through GPU
+ Operator
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: NVIDIA GDRCopy driver image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: NVIDIA GDRCopy driver image repository
+ type: string
+ version:
+ description: NVIDIA GDRCopy driver image tag
+ type: string
+ type: object
+ gds:
+ description: GPUDirectStorage defines the spec for GDS components(Experimental)
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ enabled:
+ description: Enabled indicates if GPUDirect Storage is enabled
+ through GPU operator
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: NVIDIA GPUDirect Storage Driver image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: NVIDIA GPUDirect Storage Driver image repository
+ type: string
+ version:
+ description: NVIDIA GPUDirect Storage Driver image tag
+ type: string
+ type: object
+ gfd:
+ description: GPUFeatureDiscovery spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ enabled:
+ description: Enabled indicates if deployment of GPU Feature Discovery
+ Plugin is enabled.
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: GFD image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: GFD image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: GFD image tag
+ type: string
+ type: object
+ hostPaths:
+ description: HostPaths defines various paths on the host needed by
+ GPU Operator components
+ properties:
+ driverInstallDir:
+ description: |-
+ DriverInstallDir represents the root at which driver files including libraries,
+ config files, and executables can be found.
+ type: string
+ rootFS:
+ description: |-
+ RootFS represents the path to the root filesystem of the host.
+ This is used by components that need to interact with the host filesystem
+ and as such this must be a chroot-able filesystem.
+ Examples include the MIG Manager and Toolkit Container which may need to
+ stop, start, or restart systemd services.
+ type: string
+ type: object
+ kataManager:
+ description: KataManager component spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ config:
+ description: Kata Manager config
+ properties:
+ artifactsDir:
+ default: /opt/nvidia-gpu-operator/artifacts/runtimeclasses
+ description: |-
+ ArtifactsDir is the directory where kata artifacts (e.g. kernel / guest images, configuration, etc.)
+ are placed on the local filesystem.
+ type: string
+ runtimeClasses:
+ description: RuntimeClasses is a list of kata runtime classes
+ to configure.
+ items:
+ description: RuntimeClass defines the configuration for
+ a kata RuntimeClass
+ properties:
+ artifacts:
+ description: Artifacts are the kata artifacts associated
+ with the runtime class.
+ properties:
+ pullSecret:
+ description: PullSecret is the secret used to pull
+ the OCI artifact.
+ type: string
+ url:
+ description: |-
+ URL is the path to the OCI artifact (payload) containing all artifacts
+ associated with a kata runtime class.
+ type: string
+ required:
+ - url
+ type: object
+ name:
+ description: Name is the name of the kata runtime class.
+ type: string
+ nodeSelector:
+ additionalProperties:
+ type: string
+ description: |-
+ NodeSelector specifies the nodeSelector for the RuntimeClass object.
+ This ensures pods running with the RuntimeClass only get scheduled
+ onto nodes which support it.
+ type: object
+ required:
+ - artifacts
+ - name
+ type: object
+ type: array
+ type: object
+ enabled:
+ description: Enabled indicates if deployment of Kata Manager is
+ enabled
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: Kata Manager image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: Kata Manager image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: Kata Manager image tag
+ type: string
+ type: object
+ mig:
+ description: MIG spec
+ properties:
+ strategy:
+ description: 'Optional: MIGStrategy to apply for GFD and NVIDIA
+ Device Plugin'
+ enum:
+ - none
+ - single
+ - mixed
+ type: string
+ type: object
+ migManager:
+ description: MIGManager for configuration to deploy MIG Manager
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ config:
+ description: 'Optional: Custom mig-parted configuration for NVIDIA
+ MIG Manager container'
+ properties:
+ default:
+ default: all-disabled
+ description: Default MIG config to be applied on the node,
+ when there is no config specified with the node label nvidia.com/mig.config
+ enum:
+ - all-disabled
+ - ""
+ type: string
+ name:
+ default: default-mig-parted-config
+ description: ConfigMap name
+ type: string
+ type: object
+ enabled:
+ description: Enabled indicates if deployment of NVIDIA MIG Manager
+ is enabled
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ gpuClientsConfig:
+ description: 'Optional: Custom gpu-clients configuration for NVIDIA
+ MIG Manager container'
+ properties:
+ name:
+ description: ConfigMap name
+ type: string
+ type: object
+ image:
+ description: NVIDIA MIG Manager image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: NVIDIA MIG Manager image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: NVIDIA MIG Manager image tag
+ type: string
+ type: object
+ nodeStatusExporter:
+ description: NodeStatusExporter spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ enabled:
+ description: Enabled indicates if deployment of Node Status Exporter
+ is enabled.
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: Node Status Exporter image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: Node Status Exporterimage repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: Node Status Exporterimage tag
+ type: string
+ type: object
+ operator:
+ description: Operator component spec
+ properties:
+ annotations:
+ additionalProperties:
+ type: string
+ description: |-
+ Optional: Annotations is an unstructured key value map stored with a resource that may be
+ set by external tools to store and retrieve arbitrary metadata. They are not
+ queryable and should be preserved when modifying objects.
+ type: object
+ defaultRuntime:
+ default: docker
+ description: Runtime defines container runtime type
+ enum:
+ - docker
+ - crio
+ - containerd
+ type: string
+ initContainer:
+ description: InitContainerSpec describes configuration for initContainer
+ image used with all components
+ properties:
+ image:
+ description: Image represents image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: Repository represents image repository path
+ type: string
+ version:
+ description: Version represents image tag(version)
+ type: string
+ type: object
+ labels:
+ additionalProperties:
+ type: string
+ description: |-
+ Optional: Map of string keys and values that can be used to organize and categorize
+ (scope and select) objects. May match selectors of replication controllers
+ and services.
+ type: object
+ runtimeClass:
+ default: nvidia
+ type: string
+ use_ocp_driver_toolkit:
+ description: UseOpenShiftDriverToolkit indicates if DriverToolkit
+ image should be used on OpenShift to build and install driver
+ modules
+ type: boolean
+ required:
+ - defaultRuntime
+ type: object
+ psa:
+ description: PSA defines spec for PodSecurityAdmission configuration
+ properties:
+ enabled:
+ description: Enabled indicates if PodSecurityAdmission configuration
+ needs to be enabled for all Pods
+ type: boolean
+ type: object
+ psp:
+ description: |-
+ Deprecated: Pod Security Policies are no longer supported. Please use PodSecurityAdmission instead
+ PSP defines spec for handling PodSecurityPolicies
+ properties:
+ enabled:
+ description: Enabled indicates if PodSecurityPolicies needs to
+ be enabled for all Pods
+ type: boolean
+ type: object
+ sandboxDevicePlugin:
+ description: SandboxDevicePlugin component spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ enabled:
+ description: Enabled indicates if deployment of NVIDIA Sandbox
+ Device Plugin through operator is enabled
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: NVIDIA Sandbox Device Plugin image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: NVIDIA Sandbox Device Plugin image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: NVIDIA Sandbox Device Plugin image tag
+ type: string
+ type: object
+ sandboxWorkloads:
+ description: SandboxWorkloads defines the spec for handling sandbox
+ workloads (i.e. Virtual Machines)
+ properties:
+ defaultWorkload:
+ default: container
+ description: |-
+ DefaultWorkload indicates the default GPU workload type to configure
+ worker nodes in the cluster for
+ enum:
+ - container
+ - vm-passthrough
+ - vm-vgpu
+ type: string
+ enabled:
+ description: |-
+ Enabled indicates if the GPU Operator should manage additional operands required
+ for sandbox workloads (i.e. VFIO Manager, vGPU Manager, and additional device plugins)
+ type: boolean
+ type: object
+ toolkit:
+ description: Toolkit component spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ enabled:
+ description: Enabled indicates if deployment of NVIDIA Container
+ Toolkit through operator is enabled
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: NVIDIA Container Toolkit image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ installDir:
+ default: /usr/local/nvidia
+ description: Toolkit install directory on the host
+ type: string
+ repository:
+ description: NVIDIA Container Toolkit image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: NVIDIA Container Toolkit image tag
+ type: string
+ type: object
+ validator:
+ description: Validator defines the spec for operator-validator daemonset
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ cuda:
+ description: CUDA validator spec
+ properties:
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ type: object
+ driver:
+ description: Toolkit validator spec
+ properties:
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ type: object
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: Validator image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ plugin:
+ description: Plugin validator spec
+ properties:
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ type: object
+ repository:
+ description: Validator image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ toolkit:
+ description: Toolkit validator spec
+ properties:
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ type: object
+ version:
+ description: Validator image tag
+ type: string
+ vfioPCI:
+ description: VfioPCI validator spec
+ properties:
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ type: object
+ vgpuDevices:
+ description: VGPUDevices validator spec
+ properties:
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ type: object
+ vgpuManager:
+ description: VGPUManager validator spec
+ properties:
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ type: object
+ type: object
+ vfioManager:
+ description: VFIOManager for configuration to deploy VFIO-PCI Manager
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ driverManager:
+ description: DriverManager represents configuration for NVIDIA
+ Driver Manager
+ properties:
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: Image represents NVIDIA Driver Manager image
+ name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: Repository represents Driver Managerrepository
+ path
+ type: string
+ version:
+ description: Version represents NVIDIA Driver Manager image
+ tag(version)
+ type: string
+ type: object
+ enabled:
+ description: Enabled indicates if deployment of VFIO Manager is
+ enabled
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: VFIO Manager image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: VFIO Manager image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: VFIO Manager image tag
+ type: string
+ type: object
+ vgpuDeviceManager:
+ description: VGPUDeviceManager spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ config:
+ description: NVIDIA vGPU devices configuration for NVIDIA vGPU
+ Device Manager container
+ properties:
+ default:
+ default: default
+ description: Default config name within the ConfigMap
+ type: string
+ name:
+ description: ConfigMap name
+ type: string
+ type: object
+ enabled:
+ description: Enabled indicates if deployment of NVIDIA vGPU Device
+ Manager is enabled
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: NVIDIA vGPU Device Manager image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: NVIDIA vGPU Device Manager image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: NVIDIA vGPU Device Manager image tag
+ type: string
+ type: object
+ vgpuManager:
+ description: VGPUManager component spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ driverManager:
+ description: DriverManager represents configuration for NVIDIA
+ Driver Manager initContainer
+ properties:
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: Image represents NVIDIA Driver Manager image
+ name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: Repository represents Driver Managerrepository
+ path
+ type: string
+ version:
+ description: Version represents NVIDIA Driver Manager image
+ tag(version)
+ type: string
+ type: object
+ enabled:
+ description: Enabled indicates if deployment of NVIDIA vGPU Manager
+ through operator is enabled
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: NVIDIA vGPU Manager image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: NVIDIA vGPU Manager image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: NVIDIA vGPU Manager image tag
+ type: string
+ type: object
+ required:
+ - daemonsets
+ - dcgm
+ - dcgmExporter
+ - devicePlugin
+ - driver
+ - gfd
+ - nodeStatusExporter
+ - operator
+ - toolkit
+ type: object
+ status:
+ description: ClusterPolicyStatus defines the observed state of ClusterPolicy
+ properties:
+ conditions:
+ description: Conditions is a list of conditions representing the ClusterPolicy's
+ current state.
+ items:
+ description: "Condition contains details for one aspect of the current
+ state of this API Resource.\n---\nThis struct is intended for
+ direct use as an array at the field path .status.conditions. For
+ example,\n\n\n\ttype FooStatus struct{\n\t // Represents the
+ observations of a foo's current state.\n\t // Known .status.conditions.type
+ are: \"Available\", \"Progressing\", and \"Degraded\"\n\t //
+ +patchMergeKey=type\n\t // +patchStrategy=merge\n\t // +listType=map\n\t
+ \ // +listMapKey=type\n\t Conditions []metav1.Condition `json:\"conditions,omitempty\"
+ patchStrategy:\"merge\" patchMergeKey:\"type\" protobuf:\"bytes,1,rep,name=conditions\"`\n\n\n\t
+ \ // other fields\n\t}"
+ properties:
+ lastTransitionTime:
+ description: |-
+ lastTransitionTime is the last time the condition transitioned from one status to another.
+ This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable.
+ format: date-time
+ type: string
+ message:
+ description: |-
+ message is a human readable message indicating details about the transition.
+ This may be an empty string.
+ maxLength: 32768
+ type: string
+ observedGeneration:
+ description: |-
+ observedGeneration represents the .metadata.generation that the condition was set based upon.
+ For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
+ with respect to the current state of the instance.
+ format: int64
+ minimum: 0
+ type: integer
+ reason:
+ description: |-
+ reason contains a programmatic identifier indicating the reason for the condition's last transition.
+ Producers of specific condition types may define expected values and meanings for this field,
+ and whether the values are considered a guaranteed API.
+ The value should be a CamelCase string.
+ This field may not be empty.
+ maxLength: 1024
+ minLength: 1
+ pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
+ type: string
+ status:
+ description: status of the condition, one of True, False, Unknown.
+ enum:
+ - "True"
+ - "False"
+ - Unknown
+ type: string
+ type:
+ description: |-
+ type of condition in CamelCase or in foo.example.com/CamelCase.
+ ---
+ Many .condition.type values are consistent across resources like Available, but because arbitrary conditions can be
+ useful (see .node.status.conditions), the ability to deconflict is important.
+ The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt)
+ maxLength: 316
+ pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
+ type: string
+ required:
+ - lastTransitionTime
+ - message
+ - reason
+ - status
+ - type
+ type: object
+ type: array
+ namespace:
+ description: Namespace indicates a namespace in which the operator
+ is installed
+ type: string
+ state:
+ description: State indicates status of ClusterPolicy
+ enum:
+ - ignored
+ - ready
+ - notReady
+ type: string
+ required:
+ - state
+ type: object
+ type: object
+ served: true
+ storage: true
+ subresources:
+ status: {}
diff --git a/bundle/v24.6.1/manifests/nvidia.com_nvidiadrivers.yaml b/bundle/v24.6.1/manifests/nvidia.com_nvidiadrivers.yaml
new file mode 100644
index 000000000..665088edd
--- /dev/null
+++ b/bundle/v24.6.1/manifests/nvidia.com_nvidiadrivers.yaml
@@ -0,0 +1,810 @@
+---
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+ annotations:
+ controller-gen.kubebuilder.io/version: v0.15.0
+ name: nvidiadrivers.nvidia.com
+spec:
+ group: nvidia.com
+ names:
+ kind: NVIDIADriver
+ listKind: NVIDIADriverList
+ plural: nvidiadrivers
+ shortNames:
+ - nvd
+ - nvdriver
+ - nvdrivers
+ singular: nvidiadriver
+ scope: Cluster
+ versions:
+ - additionalPrinterColumns:
+ - jsonPath: .status.state
+ name: Status
+ type: string
+ - jsonPath: .metadata.creationTimestamp
+ name: Age
+ type: string
+ name: v1alpha1
+ schema:
+ openAPIV3Schema:
+ description: NVIDIADriver is the Schema for the nvidiadrivers API
+ properties:
+ apiVersion:
+ description: |-
+ APIVersion defines the versioned schema of this representation of an object.
+ Servers should convert recognized schemas to the latest internal value, and
+ may reject unrecognized values.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+ type: string
+ kind:
+ description: |-
+ Kind is a string value representing the REST resource this object represents.
+ Servers may infer this from the endpoint the client submits requests to.
+ Cannot be updated.
+ In CamelCase.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+ type: string
+ metadata:
+ type: object
+ spec:
+ description: NVIDIADriverSpec defines the desired state of NVIDIADriver
+ properties:
+ annotations:
+ additionalProperties:
+ type: string
+ description: |-
+ Optional: Annotations is an unstructured key value map stored with a resource that may be
+ set by external tools to store and retrieve arbitrary metadata. They are not
+ queryable and should be preserved when modifying objects.
+ type: object
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ certConfig:
+ description: 'Optional: Custom certificates configuration for NVIDIA
+ Driver container'
+ properties:
+ name:
+ type: string
+ type: object
+ driverType:
+ default: gpu
+ description: DriverType defines NVIDIA driver type
+ enum:
+ - gpu
+ - vgpu
+ - vgpu-host-manager
+ type: string
+ x-kubernetes-validations:
+ - message: driverType is an immutable field. Please create a new NvidiaDriver
+ resource instead when you want to change this setting.
+ rule: self == oldSelf
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present in
+ a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ gdrcopy:
+ description: GDRCopy defines the spec for GDRCopy driver
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ enabled:
+ description: Enabled indicates if GDRCopy is enabled through GPU
+ operator
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: GDRCopy driver image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: GDRCopy diver image repository
+ type: string
+ version:
+ description: GDRCopy driver image tag
+ type: string
+ type: object
+ gds:
+ description: GPUDirectStorage defines the spec for GDS driver
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ enabled:
+ description: Enabled indicates if GPUDirect Storage is enabled
+ through GPU operator
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: NVIDIA GPUDirect Storage Driver image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: NVIDIA GPUDirect Storage Driver image repository
+ type: string
+ version:
+ description: NVIDIA GPUDirect Storage Driver image tag
+ type: string
+ type: object
+ image:
+ default: nvcr.io/nvidia/driver
+ description: NVIDIA Driver container image name
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ kernelModuleConfig:
+ description: 'Optional: Kernel module configuration parameters for
+ the NVIDIA Driver'
+ properties:
+ name:
+ type: string
+ type: object
+ labels:
+ additionalProperties:
+ type: string
+ description: |-
+ Optional: Map of string keys and values that can be used to organize and categorize
+ (scope and select) objects. May match selectors of replication controllers
+ and services.
+ type: object
+ licensingConfig:
+ description: 'Optional: Licensing configuration for NVIDIA vGPU licensing'
+ properties:
+ name:
+ type: string
+ nlsEnabled:
+ description: NLSEnabled indicates if NVIDIA Licensing System is
+ used for licensing.
+ type: boolean
+ type: object
+ livenessProbe:
+ description: NVIDIA Driver container liveness probe settings
+ properties:
+ failureThreshold:
+ description: |-
+ Minimum consecutive failures for the probe to be considered failed after having succeeded.
+ Defaults to 3. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ initialDelaySeconds:
+ description: |-
+ Number of seconds after the container has started before liveness probes are initiated.
+ More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+ format: int32
+ type: integer
+ periodSeconds:
+ description: |-
+ How often (in seconds) to perform the probe.
+ Default to 10 seconds. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ successThreshold:
+ description: |-
+ Minimum consecutive successes for the probe to be considered successful after having failed.
+ Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ timeoutSeconds:
+ description: |-
+ Number of seconds after which the probe times out.
+ Defaults to 1 second. Minimum value is 1.
+ More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+ format: int32
+ minimum: 1
+ type: integer
+ type: object
+ manager:
+ description: Manager represents configuration for NVIDIA Driver Manager
+ initContainer
+ properties:
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: Image represents NVIDIA Driver Manager image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: Repository represents Driver Managerrepository path
+ type: string
+ version:
+ description: Version represents NVIDIA Driver Manager image tag(version)
+ type: string
+ type: object
+ nodeAffinity:
+ description: Affinity specifies node affinity rules for driver pods
+ properties:
+ preferredDuringSchedulingIgnoredDuringExecution:
+ description: |-
+ The scheduler will prefer to schedule pods to nodes that satisfy
+ the affinity expressions specified by this field, but it may choose
+ a node that violates one or more of the expressions. The node that is
+ most preferred is the one with the greatest sum of weights, i.e.
+ for each node that meets all of the scheduling requirements (resource
+ request, requiredDuringScheduling affinity expressions, etc.),
+ compute a sum by iterating through the elements of this field and adding
+ "weight" to the sum if the node matches the corresponding matchExpressions; the
+ node(s) with the highest sum are the most preferred.
+ items:
+ description: |-
+ An empty preferred scheduling term matches all objects with implicit weight 0
+ (i.e. it's a no-op). A null preferred scheduling term matches no objects (i.e. is also a no-op).
+ properties:
+ preference:
+ description: A node selector term, associated with the corresponding
+ weight.
+ properties:
+ matchExpressions:
+ description: A list of node selector requirements by
+ node's labels.
+ items:
+ description: |-
+ A node selector requirement is a selector that contains values, a key, and an operator
+ that relates the key and values.
+ properties:
+ key:
+ description: The label key that the selector applies
+ to.
+ type: string
+ operator:
+ description: |-
+ Represents a key's relationship to a set of values.
+ Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt.
+ type: string
+ values:
+ description: |-
+ An array of string values. If the operator is In or NotIn,
+ the values array must be non-empty. If the operator is Exists or DoesNotExist,
+ the values array must be empty. If the operator is Gt or Lt, the values
+ array must have a single element, which will be interpreted as an integer.
+ This array is replaced during a strategic merge patch.
+ items:
+ type: string
+ type: array
+ x-kubernetes-list-type: atomic
+ required:
+ - key
+ - operator
+ type: object
+ type: array
+ x-kubernetes-list-type: atomic
+ matchFields:
+ description: A list of node selector requirements by
+ node's fields.
+ items:
+ description: |-
+ A node selector requirement is a selector that contains values, a key, and an operator
+ that relates the key and values.
+ properties:
+ key:
+ description: The label key that the selector applies
+ to.
+ type: string
+ operator:
+ description: |-
+ Represents a key's relationship to a set of values.
+ Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt.
+ type: string
+ values:
+ description: |-
+ An array of string values. If the operator is In or NotIn,
+ the values array must be non-empty. If the operator is Exists or DoesNotExist,
+ the values array must be empty. If the operator is Gt or Lt, the values
+ array must have a single element, which will be interpreted as an integer.
+ This array is replaced during a strategic merge patch.
+ items:
+ type: string
+ type: array
+ x-kubernetes-list-type: atomic
+ required:
+ - key
+ - operator
+ type: object
+ type: array
+ x-kubernetes-list-type: atomic
+ type: object
+ x-kubernetes-map-type: atomic
+ weight:
+ description: Weight associated with matching the corresponding
+ nodeSelectorTerm, in the range 1-100.
+ format: int32
+ type: integer
+ required:
+ - preference
+ - weight
+ type: object
+ type: array
+ x-kubernetes-list-type: atomic
+ requiredDuringSchedulingIgnoredDuringExecution:
+ description: |-
+ If the affinity requirements specified by this field are not met at
+ scheduling time, the pod will not be scheduled onto the node.
+ If the affinity requirements specified by this field cease to be met
+ at some point during pod execution (e.g. due to an update), the system
+ may or may not try to eventually evict the pod from its node.
+ properties:
+ nodeSelectorTerms:
+ description: Required. A list of node selector terms. The
+ terms are ORed.
+ items:
+ description: |-
+ A null or empty node selector term matches no objects. The requirements of
+ them are ANDed.
+ The TopologySelectorTerm type implements a subset of the NodeSelectorTerm.
+ properties:
+ matchExpressions:
+ description: A list of node selector requirements by
+ node's labels.
+ items:
+ description: |-
+ A node selector requirement is a selector that contains values, a key, and an operator
+ that relates the key and values.
+ properties:
+ key:
+ description: The label key that the selector applies
+ to.
+ type: string
+ operator:
+ description: |-
+ Represents a key's relationship to a set of values.
+ Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt.
+ type: string
+ values:
+ description: |-
+ An array of string values. If the operator is In or NotIn,
+ the values array must be non-empty. If the operator is Exists or DoesNotExist,
+ the values array must be empty. If the operator is Gt or Lt, the values
+ array must have a single element, which will be interpreted as an integer.
+ This array is replaced during a strategic merge patch.
+ items:
+ type: string
+ type: array
+ x-kubernetes-list-type: atomic
+ required:
+ - key
+ - operator
+ type: object
+ type: array
+ x-kubernetes-list-type: atomic
+ matchFields:
+ description: A list of node selector requirements by
+ node's fields.
+ items:
+ description: |-
+ A node selector requirement is a selector that contains values, a key, and an operator
+ that relates the key and values.
+ properties:
+ key:
+ description: The label key that the selector applies
+ to.
+ type: string
+ operator:
+ description: |-
+ Represents a key's relationship to a set of values.
+ Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt.
+ type: string
+ values:
+ description: |-
+ An array of string values. If the operator is In or NotIn,
+ the values array must be non-empty. If the operator is Exists or DoesNotExist,
+ the values array must be empty. If the operator is Gt or Lt, the values
+ array must have a single element, which will be interpreted as an integer.
+ This array is replaced during a strategic merge patch.
+ items:
+ type: string
+ type: array
+ x-kubernetes-list-type: atomic
+ required:
+ - key
+ - operator
+ type: object
+ type: array
+ x-kubernetes-list-type: atomic
+ type: object
+ x-kubernetes-map-type: atomic
+ type: array
+ x-kubernetes-list-type: atomic
+ required:
+ - nodeSelectorTerms
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ nodeSelector:
+ additionalProperties:
+ type: string
+ description: NodeSelector specifies a selector for installation of
+ NVIDIA driver
+ type: object
+ priorityClassName:
+ description: 'Optional: Set priorityClassName'
+ type: string
+ rdma:
+ description: GPUDirectRDMA defines the spec for NVIDIA Peer Memory
+ driver
+ properties:
+ enabled:
+ description: Enabled indicates if GPUDirect RDMA is enabled through
+ GPU operator
+ type: boolean
+ useHostMofed:
+ description: UseHostMOFED indicates to use MOFED drivers directly
+ installed on the host to enable GPUDirect RDMA
+ type: boolean
+ type: object
+ readinessProbe:
+ description: NVIDIA Driver container readiness probe settings
+ properties:
+ failureThreshold:
+ description: |-
+ Minimum consecutive failures for the probe to be considered failed after having succeeded.
+ Defaults to 3. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ initialDelaySeconds:
+ description: |-
+ Number of seconds after the container has started before liveness probes are initiated.
+ More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+ format: int32
+ type: integer
+ periodSeconds:
+ description: |-
+ How often (in seconds) to perform the probe.
+ Default to 10 seconds. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ successThreshold:
+ description: |-
+ Minimum consecutive successes for the probe to be considered successful after having failed.
+ Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ timeoutSeconds:
+ description: |-
+ Number of seconds after which the probe times out.
+ Defaults to 1 second. Minimum value is 1.
+ More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+ format: int32
+ minimum: 1
+ type: integer
+ type: object
+ repoConfig:
+ description: 'Optional: Custom repo configuration for NVIDIA Driver
+ container'
+ properties:
+ name:
+ type: string
+ type: object
+ repository:
+ description: NVIDIA Driver repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for each
+ pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ startupProbe:
+ description: NVIDIA Driver container startup probe settings
+ properties:
+ failureThreshold:
+ description: |-
+ Minimum consecutive failures for the probe to be considered failed after having succeeded.
+ Defaults to 3. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ initialDelaySeconds:
+ description: |-
+ Number of seconds after the container has started before liveness probes are initiated.
+ More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+ format: int32
+ type: integer
+ periodSeconds:
+ description: |-
+ How often (in seconds) to perform the probe.
+ Default to 10 seconds. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ successThreshold:
+ description: |-
+ Minimum consecutive successes for the probe to be considered successful after having failed.
+ Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ timeoutSeconds:
+ description: |-
+ Number of seconds after which the probe times out.
+ Defaults to 1 second. Minimum value is 1.
+ More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+ format: int32
+ minimum: 1
+ type: integer
+ type: object
+ tolerations:
+ description: 'Optional: Set tolerations'
+ items:
+ description: |-
+ The pod this Toleration is attached to tolerates any taint that matches
+ the triple using the matching operator .
+ properties:
+ effect:
+ description: |-
+ Effect indicates the taint effect to match. Empty means match all taint effects.
+ When specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute.
+ type: string
+ key:
+ description: |-
+ Key is the taint key that the toleration applies to. Empty means match all taint keys.
+ If the key is empty, operator must be Exists; this combination means to match all values and all keys.
+ type: string
+ operator:
+ description: |-
+ Operator represents a key's relationship to the value.
+ Valid operators are Exists and Equal. Defaults to Equal.
+ Exists is equivalent to wildcard for value, so that a pod can
+ tolerate all taints of a particular category.
+ type: string
+ tolerationSeconds:
+ description: |-
+ TolerationSeconds represents the period of time the toleration (which must be
+ of effect NoExecute, otherwise this field is ignored) tolerates the taint. By default,
+ it is not set, which means tolerate the taint forever (do not evict). Zero and
+ negative values will be treated as 0 (evict immediately) by the system.
+ format: int64
+ type: integer
+ value:
+ description: |-
+ Value is the taint value the toleration matches to.
+ If the operator is Exists, the value should be empty, otherwise just a regular string.
+ type: string
+ type: object
+ type: array
+ useOpenKernelModules:
+ description: UseOpenKernelModules indicates if the open GPU kernel
+ modules should be used
+ type: boolean
+ usePrecompiled:
+ description: UsePrecompiled indicates if deployment of NVIDIA Driver
+ using pre-compiled modules is enabled
+ type: boolean
+ x-kubernetes-validations:
+ - message: usePrecompiled is an immutable field. Please create a new
+ NvidiaDriver resource instead when you want to change this setting.
+ rule: self == oldSelf
+ version:
+ description: NVIDIA Driver version (or just branch for precompiled
+ drivers)
+ type: string
+ virtualTopologyConfig:
+ description: 'Optional: Virtual Topology Daemon configuration for
+ NVIDIA vGPU drivers'
+ properties:
+ name:
+ description: 'Optional: Config name representing virtual topology
+ daemon configuration file nvidia-topologyd.conf'
+ type: string
+ type: object
+ required:
+ - driverType
+ - image
+ type: object
+ status:
+ description: NVIDIADriverStatus defines the observed state of NVIDIADriver
+ properties:
+ conditions:
+ description: Conditions is a list of conditions representing the NVIDIADriver's
+ current state.
+ items:
+ description: "Condition contains details for one aspect of the current
+ state of this API Resource.\n---\nThis struct is intended for
+ direct use as an array at the field path .status.conditions. For
+ example,\n\n\n\ttype FooStatus struct{\n\t // Represents the
+ observations of a foo's current state.\n\t // Known .status.conditions.type
+ are: \"Available\", \"Progressing\", and \"Degraded\"\n\t //
+ +patchMergeKey=type\n\t // +patchStrategy=merge\n\t // +listType=map\n\t
+ \ // +listMapKey=type\n\t Conditions []metav1.Condition `json:\"conditions,omitempty\"
+ patchStrategy:\"merge\" patchMergeKey:\"type\" protobuf:\"bytes,1,rep,name=conditions\"`\n\n\n\t
+ \ // other fields\n\t}"
+ properties:
+ lastTransitionTime:
+ description: |-
+ lastTransitionTime is the last time the condition transitioned from one status to another.
+ This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable.
+ format: date-time
+ type: string
+ message:
+ description: |-
+ message is a human readable message indicating details about the transition.
+ This may be an empty string.
+ maxLength: 32768
+ type: string
+ observedGeneration:
+ description: |-
+ observedGeneration represents the .metadata.generation that the condition was set based upon.
+ For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
+ with respect to the current state of the instance.
+ format: int64
+ minimum: 0
+ type: integer
+ reason:
+ description: |-
+ reason contains a programmatic identifier indicating the reason for the condition's last transition.
+ Producers of specific condition types may define expected values and meanings for this field,
+ and whether the values are considered a guaranteed API.
+ The value should be a CamelCase string.
+ This field may not be empty.
+ maxLength: 1024
+ minLength: 1
+ pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
+ type: string
+ status:
+ description: status of the condition, one of True, False, Unknown.
+ enum:
+ - "True"
+ - "False"
+ - Unknown
+ type: string
+ type:
+ description: |-
+ type of condition in CamelCase or in foo.example.com/CamelCase.
+ ---
+ Many .condition.type values are consistent across resources like Available, but because arbitrary conditions can be
+ useful (see .node.status.conditions), the ability to deconflict is important.
+ The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt)
+ maxLength: 316
+ pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
+ type: string
+ required:
+ - lastTransitionTime
+ - message
+ - reason
+ - status
+ - type
+ type: object
+ type: array
+ namespace:
+ description: Namespace indicates a namespace in which the operator
+ and driver are installed
+ type: string
+ state:
+ description: |-
+ INSERT ADDITIONAL STATUS FIELD - define observed state of cluster
+ Important: Run "make" to regenerate code after modifying this file
+ State indicates status of NVIDIADriver instance
+ enum:
+ - ignored
+ - ready
+ - notReady
+ type: string
+ required:
+ - state
+ type: object
+ type: object
+ served: true
+ storage: true
+ subresources:
+ status: {}
diff --git a/bundle/v24.6.1/metadata/annotations.yaml b/bundle/v24.6.1/metadata/annotations.yaml
new file mode 100644
index 000000000..f7383d5c4
--- /dev/null
+++ b/bundle/v24.6.1/metadata/annotations.yaml
@@ -0,0 +1,17 @@
+annotations:
+ operators.operatorframework.io.bundle.channels.v1: stable,v24.6
+ operators.operatorframework.io.bundle.channel.default.v1: v24.6
+ operators.operatorframework.io.bundle.manifests.v1: manifests/
+ operators.operatorframework.io.bundle.mediatype.v1: registry+v1
+ operators.operatorframework.io.bundle.metadata.v1: metadata/
+ operators.operatorframework.io.bundle.package.v1: gpu-operator-certified
+ operators.operatorframework.io.metrics.builder: operator-sdk-v1.4.0
+ operators.operatorframework.io.metrics.mediatype.v1: metrics+v1
+ operators.operatorframework.io.metrics.project_layout: go.kubebuilder.io/v3
+ operators.operatorframework.io.test.config.v1: tests/scorecard/
+ operators.operatorframework.io.test.mediatype.v1: scorecard+v1
+ operatorframework.io/cluster-monitoring: "true"
+ operatorframework.io/suggested-namespace: nvidia-gpu-operator
+
+ # Annotations to specify OCP versions compatibility.
+ com.redhat.openshift.versions: v4.12-v4.16
diff --git a/bundle/v24.6.2/manifests/gpu-operator-certified.clusterserviceversion.yaml b/bundle/v24.6.2/manifests/gpu-operator-certified.clusterserviceversion.yaml
new file mode 100644
index 000000000..7d1d1dda5
--- /dev/null
+++ b/bundle/v24.6.2/manifests/gpu-operator-certified.clusterserviceversion.yaml
@@ -0,0 +1,921 @@
+apiVersion: operators.coreos.com/v1alpha1
+kind: ClusterServiceVersion
+metadata:
+ labels:
+ operatorframework.io/arch.arm64: supported
+ operatorframework.io/arch.amd64: supported
+ pod-security.kubernetes.io/enforce: privileged
+ pod-security.kubernetes.io/audit: privileged
+ pod-security.kubernetes.io/warn: privileged
+ annotations:
+ features.operators.openshift.io/disconnected: "true"
+ features.operators.openshift.io/fips-compliant: "false"
+ features.operators.openshift.io/proxy-aware: "true"
+ features.operators.openshift.io/tls-profiles: "false"
+ features.operators.openshift.io/token-auth-aws: "false"
+ features.operators.openshift.io/token-auth-azure: "false"
+ features.operators.openshift.io/token-auth-gcp: "false"
+ features.operators.openshift.io/cnf: "false"
+ features.operators.openshift.io/cni: "false"
+ features.operators.openshift.io/csi: "false"
+ olm.skipRange: '>=1.9.0 <24.6.2'
+ alm-examples: |-
+ [
+ {
+ "apiVersion": "nvidia.com/v1",
+ "kind": "ClusterPolicy",
+ "metadata": {
+ "name": "gpu-cluster-policy"
+ },
+ "spec": {
+ "operator": {
+ "defaultRuntime": "crio",
+ "use_ocp_driver_toolkit": true,
+ "initContainer": {
+ }
+ },
+ "sandboxWorkloads": {
+ "enabled": false,
+ "defaultWorkload": "container"
+ },
+ "driver": {
+ "enabled": true,
+ "useNvidiaDriverCRD": false,
+ "useOpenKernelModules": false,
+ "upgradePolicy": {
+ "autoUpgrade": true,
+ "drain": {
+ "deleteEmptyDir": false,
+ "enable": false,
+ "force": false,
+ "timeoutSeconds": 300
+ },
+ "maxParallelUpgrades": 1,
+ "maxUnavailable": "25%",
+ "podDeletion": {
+ "deleteEmptyDir": false,
+ "force": false,
+ "timeoutSeconds": 300
+ },
+ "waitForCompletion": {
+ "timeoutSeconds": 0
+ }
+ },
+ "repoConfig": {
+ "configMapName": ""
+ },
+ "certConfig": {
+ "name": ""
+ },
+ "licensingConfig": {
+ "nlsEnabled": true,
+ "configMapName": ""
+ },
+ "virtualTopology": {
+ "config": ""
+ },
+ "kernelModuleConfig": {
+ "name": ""
+ }
+ },
+ "dcgmExporter": {
+ "enabled": true,
+ "config": {
+ "name": ""
+ },
+ "serviceMonitor": {
+ "enabled": true
+ }
+ },
+ "dcgm": {
+ "enabled": true
+ },
+ "daemonsets": {
+ "updateStrategy": "RollingUpdate",
+ "rollingUpdate": {
+ "maxUnavailable": "1"
+ }
+ },
+ "devicePlugin": {
+ "enabled": true,
+ "config": {
+ "name": "",
+ "default": ""
+ },
+ "mps": {
+ "root": "/run/nvidia/mps"
+ }
+ },
+ "gfd": {
+ "enabled": true
+ },
+ "migManager": {
+ "enabled": true
+ },
+ "nodeStatusExporter": {
+ "enabled": true
+ },
+ "mig": {
+ "strategy": "single"
+ },
+ "toolkit": {
+ "enabled": true
+ },
+ "validator": {
+ "plugin": {
+ "env": [
+ {
+ "name": "WITH_WORKLOAD",
+ "value": "false"
+ }
+ ]
+ }
+ },
+ "vgpuManager": {
+ "enabled": false
+ },
+ "vgpuDeviceManager": {
+ "enabled": true
+ },
+ "sandboxDevicePlugin": {
+ "enabled": true
+ },
+ "vfioManager": {
+ "enabled": true
+ },
+ "gds": {
+ "enabled": false
+ },
+ "gdrcopy": {
+ "enabled": false
+ }
+ }
+ },
+ {
+ "apiVersion": "nvidia.com/v1alpha1",
+ "kind": "NVIDIADriver",
+ "metadata": {
+ "name": "gpu-driver"
+ },
+ "spec": {
+ "driverType": "gpu",
+ "repository": "nvcr.io/nvidia",
+ "image": "driver",
+ "version": "sha256:858de27c152669f5a3cf4287406405b16dd5bb70c0373324eb735511997bb415",
+ "nodeSelector": {},
+ "manager": {},
+ "repoConfig": {
+ "name": ""
+ },
+ "certConfig": {
+ "name": ""
+ },
+ "licensingConfig": {
+ "nlsEnabled": true,
+ "name": ""
+ },
+ "virtualTopologyConfig": {
+ "name": ""
+ },
+ "kernelModuleConfig": {
+ "name": ""
+ }
+ }
+ }
+ ]
+ operators.operatorframework.io/builder: operator-sdk-v1.4.0
+ operators.operatorframework.io/project_layout: go.kubebuilder.io/v3
+ operatorframework.io/suggested-namespace: nvidia-gpu-operator
+ capabilities: Deep Insights
+ categories: AI/Machine Learning, OpenShift Optional
+ certified: "true"
+ containerImage: nvcr.io/nvidia/gpu-operator@sha256:8e0969cffc030a89c4acd68e64d41dd54e3bce8a794106b178d4dbd636a07f1c
+ createdAt: "Wed Sep 25 08:25:16 PDT 2024"
+ description: Automate the management and monitoring of NVIDIA GPUs.
+ provider: NVIDIA
+ repository: http://github.com/NVIDIA/gpu-operator
+ support: NVIDIA
+ name: gpu-operator-certified.v24.6.2
+ namespace: placeholder
+spec:
+ apiservicedefinitions: {}
+ relatedImages:
+ - name: gpu-operator-image
+ image: nvcr.io/nvidia/gpu-operator@sha256:8e0969cffc030a89c4acd68e64d41dd54e3bce8a794106b178d4dbd636a07f1c
+ - name: dcgm-exporter-image
+ image: nvcr.io/nvidia/k8s/dcgm-exporter@sha256:21f4c8b88716e8e6f732f9fb4c2efaef937c227491a8631c5e55036f80f39a4d
+ - name: dcgm-image
+ image: nvcr.io/nvidia/cloud-native/dcgm@sha256:15dab1273345df4a5844c4c761d064dbc4b592101251dc39174e597137123027
+ - name: container-toolkit-image
+ image: nvcr.io/nvidia/k8s/container-toolkit@sha256:7bcc188703f2fac630f7a4ff8960e6733ac3a29adf6a946533b796d9a27b8acf
+ - name: driver-image
+ image: nvcr.io/nvidia/driver@sha256:858de27c152669f5a3cf4287406405b16dd5bb70c0373324eb735511997bb415
+ - name: driver-image-535
+ image: nvcr.io/nvidia/driver@sha256:a6d12fb5753f267dda25dfd38910f972bc632c006a24107fa50e20bba3642d7c
+ - name: driver-image-470
+ image: nvcr.io/nvidia/driver@sha256:07e11f85d54d49ec9648fb06e148b8d832ee1f9c3549a915eee853c9ef2949c2
+ - name: device-plugin-image
+ image: nvcr.io/nvidia/k8s-device-plugin@sha256:7ad2c9f71fe06f9f7745ac8635f46740fbdff4f11edd468addfab81afcdfa534
+ - name: gpu-feature-discovery-image
+ image: nvcr.io/nvidia/k8s-device-plugin@sha256:7ad2c9f71fe06f9f7745ac8635f46740fbdff4f11edd468addfab81afcdfa534
+ - name: mig-manager-image
+ image: nvcr.io/nvidia/cloud-native/k8s-mig-manager@sha256:781fb47e264d9e0fbc8da5bd046e5e678316c866bc36ddd4b56d4eb0de682d5b
+ - name: init-container-image
+ image: nvcr.io/nvidia/cuda@sha256:de5b598bca89850567c4c104411d66bb52f47c9179199e6a3be6829b7ac586a2
+ - name: gpu-operator-validator-image
+ image: nvcr.io/nvidia/cloud-native/gpu-operator-validator@sha256:96380b95396b7f29d2ed2ec8ef1ad5a8bcf9f55051db723295dc0a20db845331
+ - name: k8s-driver-manager-image
+ image: nvcr.io/nvidia/cloud-native/k8s-driver-manager@sha256:740abc3ff657545c10effd5354f09af525200ed9a1b7623f0c2e8c7bd9e4a4e2
+ - name: vfio-manager-image
+ image: nvcr.io/nvidia/cuda@sha256:de5b598bca89850567c4c104411d66bb52f47c9179199e6a3be6829b7ac586a2
+ - name: sandbox-device-plugin-image
+ image: nvcr.io/nvidia/kubevirt-gpu-device-plugin@sha256:969147c01d63be5d1fe458f32f1cc0c7408cf3062531db91408e2fc57b4d8a67
+ - name: vgpu-device-manager-image
+ image: nvcr.io/nvidia/cloud-native/vgpu-device-manager@sha256:ae63fac9a4057a7646f0cf0ee0566e8928529adde05c4c0a017cda0599e381b2
+ - name: gdrcopy-image
+ image: nvcr.io/nvidia/cloud-native/gdrdrv@sha256:33de74eb590f071403c17b6c210c02963245851971168bc0c07c06c100a9f376
+ customresourcedefinitions:
+ owned:
+ - name: nvidiadrivers.nvidia.com
+ kind: NVIDIADriver
+ version: v1alpha1
+ displayName: NVIDIADriver
+ description: NVIDIADriver allows you to deploy the NVIDIA driver
+ resources:
+ - kind: ServiceAccount
+ name: ''
+ version: v1
+ - kind: DaemonSet
+ name: ''
+ version: apps/v1
+ - kind: ConfigMap
+ name: ''
+ version: v1
+ - kind: Pod
+ name: ''
+ version: v1
+ - kind: clusterpolicies
+ name: ''
+ version: v1
+ - kind: clusterversions
+ name: ''
+ version: v1
+ - kind: nodes
+ name: ''
+ version: v1
+ - kind: status
+ name: ''
+ version: v1
+ specDescriptors:
+ - description: 'Optional: Set Node affinity'
+ displayName: Node affinity
+ path: affinity
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - 'urn:alm:descriptor:com.tectonic.ui:nodeAffinity'
+ - description: Node selector to control the selection of nodes (optional)
+ displayName: Node Selector
+ path: nodeSelector
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:selector:Node'
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - description: 'Optional: Set tolerations'
+ displayName: Tolerations
+ path: tolerations
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - 'urn:alm:descriptor:io.kubernetes:Tolerations'
+ - description: Image pull secrets
+ displayName: Image pull secrets
+ path: imagePullSecrets
+ x-descriptors:
+ - 'urn:alm:descriptor:io.kubernetes:Secret'
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - displayName: ImagePullPolicy
+ description: 'Image pull policy (default: IfNotPresent)'
+ path: imagePullPolicy
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:imagePullPolicy'
+ statusDescriptors:
+ - description: The current state of the driver.
+ displayName: State
+ path: state
+ x-descriptors:
+ - 'urn:alm:descriptor:text'
+ - name: clusterpolicies.nvidia.com
+ kind: ClusterPolicy
+ version: v1
+ group: nvidia.com
+ displayName: ClusterPolicy
+ description: ClusterPolicy allows you to configure the GPU Operator
+ resources:
+ - kind: ServiceAccount
+ name: ''
+ version: v1
+ - kind: Deployment
+ name: ''
+ version: apps/v1
+ - kind: DaemonSet
+ name: ''
+ version: apps/v1
+ - kind: ConfigMap
+ name: ''
+ version: v1
+ - kind: Pod
+ name: ''
+ version: v1
+ - kind: clusterpolicies
+ name: ''
+ version: v1
+ - kind: clusterversions
+ name: ''
+ version: v1
+ - kind: nodes
+ name: ''
+ version: v1
+ - kind: status
+ name: ''
+ version: v1
+ specDescriptors:
+ - description: GPU Operator config
+ displayName: GPU Operator config
+ path: operator
+ - displayName: ImagePullPolicy
+ description: 'Image pull policy (default: IfNotPresent)'
+ path: operator.validator.imagePullPolicy
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:imagePullPolicy'
+ - description: Image pull secrets
+ displayName: Image pull secrets
+ path: operator.validator.imagePullSecrets
+ x-descriptors:
+ - 'urn:alm:descriptor:io.kubernetes:Secret'
+ - description: NVIDIA GPU/vGPU Driver config
+ displayName: NVIDIA GPU/vGPU Driver config
+ path: driver
+ - description: 'Optional: Set Node affinity'
+ displayName: Node affinity
+ path: driver.affinity
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - 'urn:alm:descriptor:com.tectonic.ui:nodeAffinity'
+ - description: Node selector to control the selection of nodes (optional)
+ displayName: Node Selector
+ path: driver.nodeSelector
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:selector:Node'
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - description: 'Optional: Set tolerations'
+ displayName: Tolerations
+ path: driver.tolerations
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - 'urn:alm:descriptor:io.kubernetes:Tolerations'
+ - description: Image pull secrets
+ displayName: Image pull secrets
+ path: driver.imagePullSecrets
+ x-descriptors:
+ - 'urn:alm:descriptor:io.kubernetes:Secret'
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - displayName: ImagePullPolicy
+ description: 'Image pull policy (default: IfNotPresent)'
+ path: driver.imagePullPolicy
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:imagePullPolicy'
+ - description: NVIDIA DCGM Exporter config
+ displayName: NVIDIA DCGM Exporter config
+ path: dcgmExporter
+ - description: 'Optional: Set Node affinity'
+ displayName: Node affinity
+ path: dcgmExporter.affinity
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - 'urn:alm:descriptor:com.tectonic.ui:nodeAffinity'
+ - description: Node selector to control the selection of nodes (optional)
+ displayName: Node Selector
+ path: dcgmExporter.nodeSelector
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:selector:Node'
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - description: 'Optional: Set tolerations'
+ displayName: Tolerations
+ path: dcgmExporter.tolerations
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - 'urn:alm:descriptor:io.kubernetes:Tolerations'
+ - description: Image pull secrets
+ displayName: Image pull secrets
+ path: dcgmExporter.imagePullSecrets
+ x-descriptors:
+ - 'urn:alm:descriptor:io.kubernetes:Secret'
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - displayName: ImagePullPolicy
+ description: 'Image pull policy (default: IfNotPresent)'
+ path: dcgmExporter.imagePullPolicy
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:imagePullPolicy'
+ - description: NVIDIA Device Plugin config
+ displayName: NVIDIA Device Plugin config
+ path: devicePlugin
+ - description: 'Optional: Set Node affinity'
+ displayName: Node affinity
+ path: devicePlugin.affinity
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - 'urn:alm:descriptor:com.tectonic.ui:nodeAffinity'
+ - description: Node selector to control the selection of nodes (optional)
+ displayName: Node Selector
+ path: devicePlugin.nodeSelector
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:selector:Node'
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - description: 'Optional: Set tolerations'
+ displayName: Tolerations
+ path: devicePlugin.tolerations
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - 'urn:alm:descriptor:io.kubernetes:Tolerations'
+ - description: Image pull secrets
+ displayName: Image pull secrets
+ path: devicePlugin.imagePullSecrets
+ x-descriptors:
+ - 'urn:alm:descriptor:io.kubernetes:Secret'
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - displayName: ImagePullPolicy
+ description: 'Image pull policy (default: IfNotPresent)'
+ path: devicePlugin.imagePullPolicy
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:imagePullPolicy'
+ - description: GPU Feature Discovery Plugin config
+ displayName: GPU Feature Discovery Plugin config
+ path: gfd
+ - description: 'Optional: Set Node affinity'
+ displayName: Node affinity
+ path: gfd.affinity
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - 'urn:alm:descriptor:com.tectonic.ui:nodeAffinity'
+ - description: Node selector to control the selection of nodes (optional)
+ displayName: Node Selector
+ path: gfd.nodeSelector
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:selector:Node'
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - description: 'Optional: Set tolerations'
+ displayName: Tolerations
+ path: gfd.tolerations
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - 'urn:alm:descriptor:io.kubernetes:Tolerations'
+ - description: Image pull secrets
+ displayName: Image pull secrets
+ path: gfd.imagePullSecrets
+ x-descriptors:
+ - 'urn:alm:descriptor:io.kubernetes:Secret'
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - displayName: ImagePullPolicy
+ description: 'Image pull policy (default: IfNotPresent)'
+ path: gfd.imagePullPolicy
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:imagePullPolicy'
+ - description: NVIDIA Container Toolkit config
+ displayName: NVIDIA Container Toolkit config
+ path: toolkit
+ - description: 'Optional: Set Node affinity'
+ displayName: Node affinity
+ path: toolkit.affinity
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - 'urn:alm:descriptor:com.tectonic.ui:nodeAffinity'
+ - description: Node selector to control the selection of nodes (optional)
+ displayName: Node Selector
+ path: toolkit.nodeSelector
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:selector:Node'
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - description: 'Optional: Set tolerations'
+ displayName: Tolerations
+ path: toolkit.tolerations
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - 'urn:alm:descriptor:io.kubernetes:Tolerations'
+ - description: Image pull secrets
+ displayName: Image pull secrets
+ path: toolkit.imagePullSecrets
+ x-descriptors:
+ - 'urn:alm:descriptor:io.kubernetes:Secret'
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - displayName: ImagePullPolicy
+ description: 'Image pull policy (default: IfNotPresent)'
+ path: toolkit.imagePullPolicy
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:imagePullPolicy'
+ - displayName: NVIDIA DCGM config
+ description: NVIDIA DCGM config
+ path: dcgm
+ - displayName: Validator config
+ description: Validator config
+ path: validator
+ - displayName: Node Status Exporter config
+ description: Node Status Exporter config
+ path: nodeStatusExporter
+ - displayName: Daemonsets config
+ description: Daemonsets config
+ path: daemonsets
+ - displayName: MIG config
+ description: MIG config
+ path: mig
+ - displayName: NVIDIA MIG Manager config
+ description: NVIDIA MIG Manager config
+ path: migManager
+ - displayName: PodSecurityPolicy config
+ description: PodSecurityPolicy config
+ path: psp
+ - displayName: NVIDIA GPUDirect Storage config
+ description: NVIDIA GPUDirect Storage config
+ path: gds
+ - displayName: Sandbox Workloads config
+ description: Sandbox Workloads config
+ path: sandboxWorkloads
+ - displayName: NVIDIA vGPU Manager config
+ description: NVIDIA vGPU Manager config
+ path: vgpuManager
+ - displayName: NVIDIA vGPU Device Manager config
+ description: NVIDIA vGPU Device Manager config
+ path: vgpuDeviceManager
+ - displayName: VFIO Manager config
+ description: VFIO Manager config
+ path: vfioManager
+ - displayName: NVIDIA Sandbox Device Plugin config
+ description: NVIDIA Sandbox Device Plugin config
+ path: sandboxDevicePlugin
+ statusDescriptors:
+ - description: The current state of the operator.
+ displayName: State
+ path: state
+ x-descriptors:
+ - 'urn:alm:descriptor:text'
+ displayName: NVIDIA GPU Operator
+ description: >
+ Kubernetes provides access to special hardware resources such as NVIDIA
+ GPUs, NICs, Infiniband adapters and other devices through the [device plugin
+ framework](https://kubernetes.io/docs/concepts/extend-kubernetes/compute-storage-net/device-plugins/).
+ However, configuring and managing nodes with these hardware resources
+ requires configuration of multiple software components such as drivers,
+ container runtimes or other libraries which are difficult and prone to
+ errors.
+
+ The NVIDIA GPU Operator uses the [operator
+ framework](https://cloud.redhat.com/blog/introducing-the-operator-framework) within
+ Kubernetes to automate the management of all NVIDIA software components
+ needed to provision and monitor GPUs.
+ These components include the NVIDIA drivers (to enable CUDA), Kubernetes
+ device plugin for GPUs, the NVIDIA Container Runtime, automatic node
+ labelling and NVIDIA DCGM exporter.
+
+ Visit the official site of the [GPU Operator](https://github.com/NVIDIA/gpu-operator) for more information.
+ For getting started with using the GPU Operator with OpenShift, see the instructions
+ [here](https://docs.nvidia.com/datacenter/cloud-native/gpu-operator/openshift/contents.html).
+ icon:
+ - base64data: iVBORw0KGgoAAAANSUhEUgAAAEAAAABACAMAAACdt4HsAAAB2lBMVEUAAAD///8EBAN3uQACAgIAAAQJDQUCAgB1tgAHCQf+/v5Ufg5Hagxxqwt+xgJ3uAB9wwB4vQBRUVEeLA3e3t5nZ2coKCgODg4FBwZ9wwR6wAJ4vADz8/MbGxt5tw1vpw1/wgoOFwkLDwh9xQH5+fny8vLw8PDFxcWysrKFhYVvb282NjYyMjIqKioXFxdikxRYgxNCYxJQdhFqoQ9xrg16ugxyqgyAxQkEBQj7+/v29vbIyMhjY2NbW1tHR0cvLy8kJCQdHR0ZGRlKbxJ8uhFNcxFVgBAxSBBgkg93tQ50sA4qPg4XIg18vwsbKQsSGgsLCwsMEwqCyQeByQFztADPz8+/v7+6urqWlpZra2tKSkogICASEhJmmRE8XBA5VRA2UBBonA9biA9GaQ4sQg4jMw4mOQ0aJw2GzgsUHgttpAqJ0Ql/wQWG0AJ8vwF0uQCtra2jo6OQkJB9fX1VVVVCQkI9PT0iIiIUFBRSfBNgjhA7WRBGZw+GywmFzgaAyASBxQN2twDb29u2traenp6Kiop+fn53d3dzc3NyqRV4sxM/YBNAXRElNhBjlQ+IzA00TQ16vgxJbgp6vAl4tgJ3vgDs7Ozn5+fa2trS0tJCXRY6VBV6thSL1gf4nFdFAAAD80lEQVRYw+zSOXPaQBgG4He0LJJmbGRGDUIzuvgBQiAEPfcdwC33DTbUtmOwSyc+4iRucvzXRImLFJmRShc8xXbfu+9+szg4OHjjAsH/iFD49q7rqM6xc/wPtWyBhS8sC94ObWRCZDksh1+RzmcEfI0DoPrjylEkSTgViMs9udjYTwMG4Gf51Z1BM81ioRwit+QvgYsdUQZeKFr3ladyKXvVr+pAM5uKcmRLXFzoCIxn+0i/8lSaBMHnfi7qowfQuZnm3PuFPwGs13zD3NlViozY/z4YD6/TCQORbPr2q78GLB0ou5IO40pd5AxQZnJ83m2y9Ju2JYKfgEhWC18aEIfrZLURHwQC0B87ySZwHxX8BNDWB1KfQfyxT2TA24uPQMt8yTWA3obz8wQGlhTN06Z900MkuJLrYu3u5LkK9LTtGRF8NEDLeSnXYLUdHUFVlpPqTa4IamlhJZ464biY1w4CKGrROOW7uwLlV+Q02lanCF6cbSoPVLzUfPwDll5I9T6WyXWhZre1yjiI6VCSzCWY3+FKaAwGHngzpEygx6+V6Uzk6TJR7yhWxJ1bFgTPJ7gMc58aUCq+n+qNT6Pn8y/xOcCiZZVjnJ+AAPhEuj0SKZ9bL9ZpNS9SgM6z9p5w3jt43cMvecfWBhm7dtfEpfhYMDBYpFd7mDZIAxPCFKgBhB0hkWbE2wVMyqycfhOMEiebSzFz5IMTEjw7E87UFj4GVR7GXqaSkoIcISEc/I38/PwhOTUMRBrADgwK09zgYGUBqbwcARiQyp3Eyk6kC4BloqtbJTcaSHIHShALWFmBSRuCWBGC+AtDMAAGIpAAc9mBiB0sCLSXHUSygxSxEIoE7IKEgbhopKgogC96x04QCMMw/H0cG6f0cEmBHaLc7FFQzApoTLwtQgWUWo26glx2mzGkyoHM1PPMO/NrnSH8e2QAiRsZ8S3ZuJoW5Udg5moGoMRLN2gAnkcUctueJ1gADsdtlZ2AgmSYoaDZBXwRctcwy6HN3XX/wfnTnA7Q5x0S0Gku4wHpe7Ql8Mbtu4TqC3qcADGtUl4O3eK0AkZdKH1mU/a6MFQGA7pQGoAVoAuuPYZlLJF2BawVLLjwac6Q8wUax61/CpKQAT6ZX3hFqoqqAFvuf4AzM+NgsoBS/wcSOD7SFzyf6CE9UQK9II1MRvIJm8QSgsLiBZuypsAWKyARElgx5FcLv1N4nFLbB45Sh6+TzsQRtn7bz/B3fS9GQ12bgUE2PKycQbwgXD0SWLwVhpZFq4eHhWloOjLoqGvoRYRGAR2vp2EtpNUaTUpiRAizMAEhKNXpYZNnAUlBCSgFYTIxQTlMMJNGwSgYBdQHAFsKs+/bUkeyAAAAAElFTkSuQmCC
+ mediatype: image/png
+ install:
+ spec:
+ clusterPermissions:
+ - serviceAccountName: gpu-operator
+ rules:
+ - apiGroups:
+ - nvidia.com
+ resources:
+ - clusterpolicies
+ - clusterpolicies/finalizers
+ - clusterpolicies/status
+ - nvidiadrivers
+ - nvidiadrivers/finalizers
+ - nvidiadrivers/status
+ verbs:
+ - create
+ - delete
+ - get
+ - list
+ - patch
+ - update
+ - watch
+ - deletecollection
+ - apiGroups:
+ - config.openshift.io
+ resources:
+ - clusterversions
+ - proxies
+ verbs:
+ - get
+ - list
+ - watch
+ - apiGroups:
+ - security.openshift.io
+ resources:
+ - securitycontextconstraints
+ verbs:
+ - use
+ - create
+ - get
+ - list
+ - watch
+ - patch
+ - update
+ - delete
+ - apiGroups:
+ - security.openshift.io
+ resources:
+ - securitycontextconstraints
+ verbs:
+ - use
+ resourceNames:
+ - hostmount-anyuid
+ - apiGroups:
+ - image.openshift.io
+ resources:
+ - imagestreams
+ verbs:
+ - get
+ - list
+ - watch
+ - apiGroups:
+ - monitoring.coreos.com
+ resources:
+ - servicemonitors
+ - prometheusrules
+ verbs:
+ - get
+ - list
+ - create
+ - watch
+ - update
+ - delete
+ - apiGroups:
+ - rbac.authorization.k8s.io
+ resources:
+ - clusterroles
+ - clusterrolebindings
+ verbs:
+ - create
+ - get
+ - list
+ - watch
+ - update
+ - patch
+ - delete
+ - apiGroups:
+ - ""
+ resources:
+ - pods
+ - pods/eviction
+ - services
+ - services/finalizers
+ - events
+ verbs:
+ - create
+ - delete
+ - get
+ - list
+ - patch
+ - update
+ - watch
+ - apiGroups:
+ - ""
+ resources:
+ - namespaces
+ verbs:
+ - get
+ - list
+ - create
+ - watch
+ - update
+ - patch
+ - apiGroups:
+ - ""
+ resources:
+ - nodes
+ verbs:
+ - get
+ - list
+ - watch
+ - update
+ - patch
+ - apiGroups:
+ - apps
+ resources:
+ - daemonsets
+ verbs:
+ - get
+ - list
+ - watch
+ - apiGroups:
+ - apps
+ resources:
+ - controllerrevisions
+ verbs:
+ - get
+ - list
+ - watch
+ - apiGroups:
+ - node.k8s.io
+ resources:
+ - runtimeclasses
+ verbs:
+ - get
+ - list
+ - create
+ - update
+ - watch
+ - delete
+ - apiGroups:
+ - coordination.k8s.io
+ resources:
+ - leases
+ verbs:
+ - create
+ - get
+ - list
+ - watch
+ - update
+ - patch
+ - delete
+ - apiGroups:
+ - apiextensions.k8s.io
+ resources:
+ - customresourcedefinitions
+ verbs:
+ - create
+ - get
+ - list
+ - watch
+ - update
+ - patch
+ - delete
+ permissions:
+ - serviceAccountName: gpu-operator
+ rules:
+ - apiGroups:
+ - rbac.authorization.k8s.io
+ resources:
+ - roles
+ - rolebindings
+ verbs:
+ - create
+ - get
+ - list
+ - watch
+ - update
+ - patch
+ - delete
+ - apiGroups:
+ - apps
+ resources:
+ - daemonsets
+ verbs:
+ - create
+ - get
+ - list
+ - watch
+ - update
+ - patch
+ - delete
+ - apiGroups:
+ - ""
+ resources:
+ - configmaps
+ - endpoints
+ - secrets
+ - serviceaccounts
+ verbs:
+ - create
+ - get
+ - list
+ - watch
+ - update
+ - patch
+ - delete
+ deployments:
+ - name: gpu-operator
+ spec:
+ replicas: 1
+ selector:
+ matchLabels:
+ app.kubernetes.io/component: gpu-operator
+ app: gpu-operator
+ strategy: {}
+ template:
+ metadata:
+ labels:
+ app.kubernetes.io/component: gpu-operator
+ app: gpu-operator
+ nvidia.com/gpu-driver-upgrade-drain.skip: "true"
+ spec:
+ priorityClassName: system-node-critical
+ containers:
+ - args:
+ - --leader-elect
+ - --leader-lease-renew-deadline
+ - "60s"
+ image: nvcr.io/nvidia/gpu-operator@sha256:8e0969cffc030a89c4acd68e64d41dd54e3bce8a794106b178d4dbd636a07f1c
+ command:
+ - gpu-operator
+ livenessProbe:
+ httpGet:
+ path: /healthz
+ port: 8081
+ initialDelaySeconds: 15
+ periodSeconds: 20
+ name: gpu-operator
+ ports:
+ - name: metrics
+ containerPort: 8080
+ readinessProbe:
+ httpGet:
+ path: /readyz
+ port: 8081
+ initialDelaySeconds: 5
+ periodSeconds: 10
+ resources:
+ limits:
+ cpu: 500m
+ memory: 1Gi
+ requests:
+ cpu: 200m
+ memory: 200Mi
+ securityContext:
+ allowPrivilegeEscalation: false
+ volumeMounts:
+ - mountPath: /host-etc/os-release
+ name: host-os-release
+ readOnly: true
+ env:
+ - name: OPERATOR_NAMESPACE
+ valueFrom:
+ fieldRef:
+ fieldPath: metadata.namespace
+ - name: "VALIDATOR_IMAGE"
+ value: "nvcr.io/nvidia/cloud-native/gpu-operator-validator@sha256:96380b95396b7f29d2ed2ec8ef1ad5a8bcf9f55051db723295dc0a20db845331"
+ - name: "GFD_IMAGE"
+ value: "nvcr.io/nvidia/k8s-device-plugin@sha256:7ad2c9f71fe06f9f7745ac8635f46740fbdff4f11edd468addfab81afcdfa534"
+ - name: "CONTAINER_TOOLKIT_IMAGE"
+ value: "nvcr.io/nvidia/k8s/container-toolkit@sha256:7bcc188703f2fac630f7a4ff8960e6733ac3a29adf6a946533b796d9a27b8acf"
+ - name: "DCGM_IMAGE"
+ value: "nvcr.io/nvidia/cloud-native/dcgm@sha256:15dab1273345df4a5844c4c761d064dbc4b592101251dc39174e597137123027"
+ - name: "DCGM_EXPORTER_IMAGE"
+ value: "nvcr.io/nvidia/k8s/dcgm-exporter@sha256:21f4c8b88716e8e6f732f9fb4c2efaef937c227491a8631c5e55036f80f39a4d"
+ - name: "DEVICE_PLUGIN_IMAGE"
+ value: "nvcr.io/nvidia/k8s-device-plugin@sha256:7ad2c9f71fe06f9f7745ac8635f46740fbdff4f11edd468addfab81afcdfa534"
+ - name: "DRIVER_IMAGE"
+ value: "nvcr.io/nvidia/driver@sha256:858de27c152669f5a3cf4287406405b16dd5bb70c0373324eb735511997bb415"
+ - name: "DRIVER_IMAGE-535"
+ value: "nvcr.io/nvidia/driver@sha256:a6d12fb5753f267dda25dfd38910f972bc632c006a24107fa50e20bba3642d7c"
+ - name: "DRIVER_IMAGE-470"
+ value: "nvcr.io/nvidia/driver@sha256:07e11f85d54d49ec9648fb06e148b8d832ee1f9c3549a915eee853c9ef2949c2"
+ - name: "DRIVER_MANAGER_IMAGE"
+ value: "nvcr.io/nvidia/cloud-native/k8s-driver-manager@sha256:740abc3ff657545c10effd5354f09af525200ed9a1b7623f0c2e8c7bd9e4a4e2"
+ - name: "MIG_MANAGER_IMAGE"
+ value: "nvcr.io/nvidia/cloud-native/k8s-mig-manager@sha256:781fb47e264d9e0fbc8da5bd046e5e678316c866bc36ddd4b56d4eb0de682d5b"
+ - name: "CUDA_BASE_IMAGE"
+ value: "nvcr.io/nvidia/cuda@sha256:de5b598bca89850567c4c104411d66bb52f47c9179199e6a3be6829b7ac586a2"
+ - name: "VFIO_MANAGER_IMAGE"
+ value: "nvcr.io/nvidia/cuda@sha256:de5b598bca89850567c4c104411d66bb52f47c9179199e6a3be6829b7ac586a2"
+ - name: "SANDBOX_DEVICE_PLUGIN_IMAGE"
+ value: "nvcr.io/nvidia/kubevirt-gpu-device-plugin@sha256:969147c01d63be5d1fe458f32f1cc0c7408cf3062531db91408e2fc57b4d8a67"
+ - name: "VGPU_DEVICE_MANAGER_IMAGE"
+ value: "nvcr.io/nvidia/cloud-native/vgpu-device-manager@sha256:ae63fac9a4057a7646f0cf0ee0566e8928529adde05c4c0a017cda0599e381b2"
+ - name: "GDRCOPY_IMAGE"
+ value: "nvcr.io/nvidia/cloud-native/gdrdrv@sha256:33de74eb590f071403c17b6c210c02963245851971168bc0c07c06c100a9f376"
+ terminationGracePeriodSeconds: 10
+ volumes:
+ - hostPath:
+ path: /etc/os-release
+ name: host-os-release
+ serviceAccountName: gpu-operator
+ strategy: deployment
+ installModes:
+ - supported: true
+ type: OwnNamespace
+ - supported: true
+ type: SingleNamespace
+ - supported: false
+ type: MultiNamespace
+ - supported: false
+ type: AllNamespaces
+ keywords:
+ - gpu
+ - cuda
+ - compute
+ - operator
+ - deep learning
+ - monitoring
+ - tesla
+ maintainers:
+ - name: NVIDIA
+ email: operator_feedback@nvidia.com
+ maturity: stable
+ provider:
+ name: NVIDIA Corporation
+ version: 24.6.2
+ replaces: gpu-operator-certified.v24.6.1
diff --git a/bundle/v24.6.2/manifests/nvidia.com_clusterpolicies.yaml b/bundle/v24.6.2/manifests/nvidia.com_clusterpolicies.yaml
new file mode 100644
index 000000000..8e29cabf1
--- /dev/null
+++ b/bundle/v24.6.2/manifests/nvidia.com_clusterpolicies.yaml
@@ -0,0 +1,2404 @@
+---
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+ annotations:
+ controller-gen.kubebuilder.io/version: v0.15.0
+ name: clusterpolicies.nvidia.com
+spec:
+ group: nvidia.com
+ names:
+ kind: ClusterPolicy
+ listKind: ClusterPolicyList
+ plural: clusterpolicies
+ singular: clusterpolicy
+ scope: Cluster
+ versions:
+ - additionalPrinterColumns:
+ - jsonPath: .status.state
+ name: Status
+ type: string
+ - jsonPath: .metadata.creationTimestamp
+ name: Age
+ type: string
+ name: v1
+ schema:
+ openAPIV3Schema:
+ description: ClusterPolicy is the Schema for the clusterpolicies API
+ properties:
+ apiVersion:
+ description: |-
+ APIVersion defines the versioned schema of this representation of an object.
+ Servers should convert recognized schemas to the latest internal value, and
+ may reject unrecognized values.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+ type: string
+ kind:
+ description: |-
+ Kind is a string value representing the REST resource this object represents.
+ Servers may infer this from the endpoint the client submits requests to.
+ Cannot be updated.
+ In CamelCase.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+ type: string
+ metadata:
+ type: object
+ spec:
+ description: ClusterPolicySpec defines the desired state of ClusterPolicy
+ properties:
+ ccManager:
+ description: CCManager component spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ defaultMode:
+ description: Default CC mode setting for compatible GPUs on the
+ node
+ enum:
+ - "on"
+ - "off"
+ - devtools
+ type: string
+ enabled:
+ description: Enabled indicates if deployment of CC Manager is
+ enabled
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: CC Manager image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: CC Manager image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: CC Manager image tag
+ type: string
+ type: object
+ cdi:
+ description: CDI configures how the Container Device Interface is
+ used in the cluster
+ properties:
+ default:
+ default: false
+ description: Default indicates whether to use CDI as the default
+ mechanism for providing GPU access to containers.
+ type: boolean
+ enabled:
+ default: false
+ description: Enabled indicates whether CDI can be used to make
+ GPUs accessible to containers.
+ type: boolean
+ type: object
+ daemonsets:
+ description: Daemonset defines common configuration for all Daemonsets
+ properties:
+ annotations:
+ additionalProperties:
+ type: string
+ description: |-
+ Optional: Annotations is an unstructured key value map stored with a resource that may be
+ set by external tools to store and retrieve arbitrary metadata. They are not
+ queryable and should be preserved when modifying objects.
+ type: object
+ labels:
+ additionalProperties:
+ type: string
+ description: |-
+ Optional: Map of string keys and values that can be used to organize and categorize
+ (scope and select) objects. May match selectors of replication controllers
+ and services.
+ type: object
+ priorityClassName:
+ type: string
+ rollingUpdate:
+ description: 'Optional: Configuration for rolling update of all
+ DaemonSet pods'
+ properties:
+ maxUnavailable:
+ type: string
+ type: object
+ tolerations:
+ description: 'Optional: Set tolerations'
+ items:
+ description: |-
+ The pod this Toleration is attached to tolerates any taint that matches
+ the triple using the matching operator .
+ properties:
+ effect:
+ description: |-
+ Effect indicates the taint effect to match. Empty means match all taint effects.
+ When specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute.
+ type: string
+ key:
+ description: |-
+ Key is the taint key that the toleration applies to. Empty means match all taint keys.
+ If the key is empty, operator must be Exists; this combination means to match all values and all keys.
+ type: string
+ operator:
+ description: |-
+ Operator represents a key's relationship to the value.
+ Valid operators are Exists and Equal. Defaults to Equal.
+ Exists is equivalent to wildcard for value, so that a pod can
+ tolerate all taints of a particular category.
+ type: string
+ tolerationSeconds:
+ description: |-
+ TolerationSeconds represents the period of time the toleration (which must be
+ of effect NoExecute, otherwise this field is ignored) tolerates the taint. By default,
+ it is not set, which means tolerate the taint forever (do not evict). Zero and
+ negative values will be treated as 0 (evict immediately) by the system.
+ format: int64
+ type: integer
+ value:
+ description: |-
+ Value is the taint value the toleration matches to.
+ If the operator is Exists, the value should be empty, otherwise just a regular string.
+ type: string
+ type: object
+ type: array
+ updateStrategy:
+ default: RollingUpdate
+ enum:
+ - RollingUpdate
+ - OnDelete
+ type: string
+ type: object
+ dcgm:
+ description: DCGM component spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ enabled:
+ description: Enabled indicates if deployment of NVIDIA DCGM Hostengine
+ as a separate pod is enabled.
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ hostPort:
+ description: 'Deprecated: HostPort represents host port that needs
+ to be bound for DCGM engine (Default: 5555)'
+ format: int32
+ type: integer
+ image:
+ description: NVIDIA DCGM image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: NVIDIA DCGM image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: NVIDIA DCGM image tag
+ type: string
+ type: object
+ dcgmExporter:
+ description: DCGMExporter spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ config:
+ description: 'Optional: Custom metrics configuration for NVIDIA
+ DCGM Exporter'
+ properties:
+ name:
+ description: ConfigMap name with file dcgm-metrics.csv for
+ metrics to be collected by NVIDIA DCGM Exporter
+ type: string
+ type: object
+ enabled:
+ description: Enabled indicates if deployment of NVIDIA DCGM Exporter
+ through operator is enabled
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: NVIDIA DCGM Exporter image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: NVIDIA DCGM Exporter image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ serviceMonitor:
+ description: 'Optional: ServiceMonitor configuration for NVIDIA
+ DCGM Exporter'
+ properties:
+ additionalLabels:
+ additionalProperties:
+ type: string
+ description: AdditionalLabels to add to ServiceMonitor instance
+ for NVIDIA DCGM Exporter
+ type: object
+ enabled:
+ description: Enabled indicates if ServiceMonitor is deployed
+ for NVIDIA DCGM Exporter
+ type: boolean
+ honorLabels:
+ description: HonorLabels chooses the metric’s labels on collisions
+ with target labels.
+ type: boolean
+ interval:
+ description: |-
+ Interval which metrics should be scraped from NVIDIA DCGM Exporter. If not specified Prometheus’ global scrape interval is used.
+ Supported units: y, w, d, h, m, s, ms
+ pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$
+ type: string
+ relabelings:
+ description: Relabelings allows to rewrite labels on metric
+ sets for NVIDIA DCGM Exporter
+ items:
+ description: |-
+ RelabelConfig allows dynamic rewriting of the label set for targets, alerts,
+ scraped samples and remote write samples.
+
+
+ More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config
+ properties:
+ action:
+ default: replace
+ description: |-
+ Action to perform based on the regex matching.
+
+
+ `Uppercase` and `Lowercase` actions require Prometheus >= v2.36.0.
+ `DropEqual` and `KeepEqual` actions require Prometheus >= v2.41.0.
+
+
+ Default: "Replace"
+ enum:
+ - replace
+ - Replace
+ - keep
+ - Keep
+ - drop
+ - Drop
+ - hashmod
+ - HashMod
+ - labelmap
+ - LabelMap
+ - labeldrop
+ - LabelDrop
+ - labelkeep
+ - LabelKeep
+ - lowercase
+ - Lowercase
+ - uppercase
+ - Uppercase
+ - keepequal
+ - KeepEqual
+ - dropequal
+ - DropEqual
+ type: string
+ modulus:
+ description: |-
+ Modulus to take of the hash of the source label values.
+
+
+ Only applicable when the action is `HashMod`.
+ format: int64
+ type: integer
+ regex:
+ description: Regular expression against which the extracted
+ value is matched.
+ type: string
+ replacement:
+ description: |-
+ Replacement value against which a Replace action is performed if the
+ regular expression matches.
+
+
+ Regex capture groups are available.
+ type: string
+ separator:
+ description: Separator is the string between concatenated
+ SourceLabels.
+ type: string
+ sourceLabels:
+ description: |-
+ The source labels select values from existing labels. Their content is
+ concatenated using the configured Separator and matched against the
+ configured regular expression.
+ items:
+ description: |-
+ LabelName is a valid Prometheus label name which may only contain ASCII
+ letters, numbers, as well as underscores.
+ pattern: ^[a-zA-Z_][a-zA-Z0-9_]*$
+ type: string
+ type: array
+ targetLabel:
+ description: |-
+ Label to which the resulting string is written in a replacement.
+
+
+ It is mandatory for `Replace`, `HashMod`, `Lowercase`, `Uppercase`,
+ `KeepEqual` and `DropEqual` actions.
+
+
+ Regex capture groups are available.
+ type: string
+ type: object
+ type: array
+ type: object
+ version:
+ description: NVIDIA DCGM Exporter image tag
+ type: string
+ type: object
+ devicePlugin:
+ description: DevicePlugin component spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ config:
+ description: 'Optional: Configuration for the NVIDIA Device Plugin
+ via the ConfigMap'
+ properties:
+ default:
+ description: Default config name within the ConfigMap for
+ the NVIDIA Device Plugin config
+ type: string
+ name:
+ description: ConfigMap name for NVIDIA Device Plugin config
+ including shared config between plugin and GFD
+ type: string
+ type: object
+ enabled:
+ description: Enabled indicates if deployment of NVIDIA Device
+ Plugin through operator is enabled
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: NVIDIA Device Plugin image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ mps:
+ description: 'Optional: MPS related configuration for the NVIDIA
+ Device Plugin'
+ properties:
+ root:
+ default: /run/nvidia/mps
+ description: Root defines the MPS root path on the host
+ type: string
+ type: object
+ repository:
+ description: NVIDIA Device Plugin image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: NVIDIA Device Plugin image tag
+ type: string
+ type: object
+ driver:
+ description: Driver component spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ certConfig:
+ description: 'Optional: Custom certificates configuration for
+ NVIDIA Driver container'
+ properties:
+ name:
+ type: string
+ type: object
+ enabled:
+ description: Enabled indicates if deployment of NVIDIA Driver
+ through operator is enabled
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: NVIDIA Driver image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ kernelModuleConfig:
+ description: 'Optional: Kernel module configuration parameters
+ for the NVIDIA Driver'
+ properties:
+ name:
+ type: string
+ type: object
+ licensingConfig:
+ description: 'Optional: Licensing configuration for NVIDIA vGPU
+ licensing'
+ properties:
+ configMapName:
+ type: string
+ nlsEnabled:
+ description: NLSEnabled indicates if NVIDIA Licensing System
+ is used for licensing.
+ type: boolean
+ type: object
+ livenessProbe:
+ description: NVIDIA Driver container liveness probe settings
+ properties:
+ failureThreshold:
+ description: |-
+ Minimum consecutive failures for the probe to be considered failed after having succeeded.
+ Defaults to 3. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ initialDelaySeconds:
+ description: |-
+ Number of seconds after the container has started before liveness probes are initiated.
+ More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+ format: int32
+ type: integer
+ periodSeconds:
+ description: |-
+ How often (in seconds) to perform the probe.
+ Default to 10 seconds. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ successThreshold:
+ description: |-
+ Minimum consecutive successes for the probe to be considered successful after having failed.
+ Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ timeoutSeconds:
+ description: |-
+ Number of seconds after which the probe times out.
+ Defaults to 1 second. Minimum value is 1.
+ More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+ format: int32
+ minimum: 1
+ type: integer
+ type: object
+ manager:
+ description: Manager represents configuration for NVIDIA Driver
+ Manager initContainer
+ properties:
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: Image represents NVIDIA Driver Manager image
+ name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: Repository represents Driver Managerrepository
+ path
+ type: string
+ version:
+ description: Version represents NVIDIA Driver Manager image
+ tag(version)
+ type: string
+ type: object
+ rdma:
+ description: GPUDirectRDMASpec defines the properties for nvidia-peermem
+ deployment
+ properties:
+ enabled:
+ description: Enabled indicates if GPUDirect RDMA is enabled
+ through GPU operator
+ type: boolean
+ useHostMofed:
+ description: UseHostMOFED indicates to use MOFED drivers directly
+ installed on the host to enable GPUDirect RDMA
+ type: boolean
+ type: object
+ readinessProbe:
+ description: NVIDIA Driver container readiness probe settings
+ properties:
+ failureThreshold:
+ description: |-
+ Minimum consecutive failures for the probe to be considered failed after having succeeded.
+ Defaults to 3. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ initialDelaySeconds:
+ description: |-
+ Number of seconds after the container has started before liveness probes are initiated.
+ More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+ format: int32
+ type: integer
+ periodSeconds:
+ description: |-
+ How often (in seconds) to perform the probe.
+ Default to 10 seconds. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ successThreshold:
+ description: |-
+ Minimum consecutive successes for the probe to be considered successful after having failed.
+ Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ timeoutSeconds:
+ description: |-
+ Number of seconds after which the probe times out.
+ Defaults to 1 second. Minimum value is 1.
+ More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+ format: int32
+ minimum: 1
+ type: integer
+ type: object
+ repoConfig:
+ description: 'Optional: Custom repo configuration for NVIDIA Driver
+ container'
+ properties:
+ configMapName:
+ type: string
+ type: object
+ repository:
+ description: NVIDIA Driver image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ startupProbe:
+ description: NVIDIA Driver container startup probe settings
+ properties:
+ failureThreshold:
+ description: |-
+ Minimum consecutive failures for the probe to be considered failed after having succeeded.
+ Defaults to 3. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ initialDelaySeconds:
+ description: |-
+ Number of seconds after the container has started before liveness probes are initiated.
+ More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+ format: int32
+ type: integer
+ periodSeconds:
+ description: |-
+ How often (in seconds) to perform the probe.
+ Default to 10 seconds. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ successThreshold:
+ description: |-
+ Minimum consecutive successes for the probe to be considered successful after having failed.
+ Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ timeoutSeconds:
+ description: |-
+ Number of seconds after which the probe times out.
+ Defaults to 1 second. Minimum value is 1.
+ More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+ format: int32
+ minimum: 1
+ type: integer
+ type: object
+ upgradePolicy:
+ description: Driver auto-upgrade settings
+ properties:
+ autoUpgrade:
+ default: false
+ description: |-
+ AutoUpgrade is a global switch for automatic upgrade feature
+ if set to false all other options are ignored
+ type: boolean
+ drain:
+ description: DrainSpec describes configuration for node drain
+ during automatic upgrade
+ properties:
+ deleteEmptyDir:
+ default: false
+ description: |-
+ DeleteEmptyDir indicates if should continue even if there are pods using emptyDir
+ (local data that will be deleted when the node is drained)
+ type: boolean
+ enable:
+ default: false
+ description: Enable indicates if node draining is allowed
+ during upgrade
+ type: boolean
+ force:
+ default: false
+ description: Force indicates if force draining is allowed
+ type: boolean
+ podSelector:
+ description: |-
+ PodSelector specifies a label selector to filter pods on the node that need to be drained
+ For more details on label selectors, see:
+ https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#label-selectors
+ type: string
+ timeoutSeconds:
+ default: 300
+ description: TimeoutSecond specifies the length of time
+ in seconds to wait before giving up drain, zero means
+ infinite
+ minimum: 0
+ type: integer
+ type: object
+ maxParallelUpgrades:
+ default: 1
+ description: |-
+ MaxParallelUpgrades indicates how many nodes can be upgraded in parallel
+ 0 means no limit, all nodes will be upgraded in parallel
+ minimum: 0
+ type: integer
+ maxUnavailable:
+ anyOf:
+ - type: integer
+ - type: string
+ default: 25%
+ description: |-
+ MaxUnavailable is the maximum number of nodes with the driver installed, that can be unavailable during the upgrade.
+ Value can be an absolute number (ex: 5) or a percentage of total nodes at the start of upgrade (ex: 10%).
+ Absolute number is calculated from percentage by rounding up.
+ By default, a fixed value of 25% is used.
+ x-kubernetes-int-or-string: true
+ podDeletion:
+ description: PodDeletionSpec describes configuration for deletion
+ of pods using special resources during automatic upgrade
+ properties:
+ deleteEmptyDir:
+ default: false
+ description: |-
+ DeleteEmptyDir indicates if should continue even if there are pods using emptyDir
+ (local data that will be deleted when the pod is deleted)
+ type: boolean
+ force:
+ default: false
+ description: Force indicates if force deletion is allowed
+ type: boolean
+ timeoutSeconds:
+ default: 300
+ description: |-
+ TimeoutSecond specifies the length of time in seconds to wait before giving up on pod termination, zero means
+ infinite
+ minimum: 0
+ type: integer
+ type: object
+ waitForCompletion:
+ description: WaitForCompletionSpec describes the configuration
+ for waiting on job completions
+ properties:
+ podSelector:
+ description: |-
+ PodSelector specifies a label selector for the pods to wait for completion
+ For more details on label selectors, see:
+ https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#label-selectors
+ type: string
+ timeoutSeconds:
+ default: 0
+ description: |-
+ TimeoutSecond specifies the length of time in seconds to wait before giving up on pod termination, zero means
+ infinite
+ minimum: 0
+ type: integer
+ type: object
+ type: object
+ useNvidiaDriverCRD:
+ description: UseNvidiaDriverCRD indicates if the deployment of
+ NVIDIA Driver is managed by the NVIDIADriver CRD type
+ type: boolean
+ useOpenKernelModules:
+ description: UseOpenKernelModules indicates if the open GPU kernel
+ modules should be used
+ type: boolean
+ usePrecompiled:
+ description: UsePrecompiled indicates if deployment of NVIDIA
+ Driver using pre-compiled modules is enabled
+ type: boolean
+ version:
+ description: NVIDIA Driver image tag
+ type: string
+ virtualTopology:
+ description: 'Optional: Virtual Topology Daemon configuration
+ for NVIDIA vGPU drivers'
+ properties:
+ config:
+ description: 'Optional: Config name representing virtual topology
+ daemon configuration file nvidia-topologyd.conf'
+ type: string
+ type: object
+ type: object
+ gdrcopy:
+ description: GDRCopy component spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ enabled:
+ description: Enabled indicates if GDRCopy is enabled through GPU
+ Operator
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: NVIDIA GDRCopy driver image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: NVIDIA GDRCopy driver image repository
+ type: string
+ version:
+ description: NVIDIA GDRCopy driver image tag
+ type: string
+ type: object
+ gds:
+ description: GPUDirectStorage defines the spec for GDS components(Experimental)
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ enabled:
+ description: Enabled indicates if GPUDirect Storage is enabled
+ through GPU operator
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: NVIDIA GPUDirect Storage Driver image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: NVIDIA GPUDirect Storage Driver image repository
+ type: string
+ version:
+ description: NVIDIA GPUDirect Storage Driver image tag
+ type: string
+ type: object
+ gfd:
+ description: GPUFeatureDiscovery spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ enabled:
+ description: Enabled indicates if deployment of GPU Feature Discovery
+ Plugin is enabled.
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: GFD image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: GFD image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: GFD image tag
+ type: string
+ type: object
+ hostPaths:
+ description: HostPaths defines various paths on the host needed by
+ GPU Operator components
+ properties:
+ driverInstallDir:
+ description: |-
+ DriverInstallDir represents the root at which driver files including libraries,
+ config files, and executables can be found.
+ type: string
+ rootFS:
+ description: |-
+ RootFS represents the path to the root filesystem of the host.
+ This is used by components that need to interact with the host filesystem
+ and as such this must be a chroot-able filesystem.
+ Examples include the MIG Manager and Toolkit Container which may need to
+ stop, start, or restart systemd services.
+ type: string
+ type: object
+ kataManager:
+ description: KataManager component spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ config:
+ description: Kata Manager config
+ properties:
+ artifactsDir:
+ default: /opt/nvidia-gpu-operator/artifacts/runtimeclasses
+ description: |-
+ ArtifactsDir is the directory where kata artifacts (e.g. kernel / guest images, configuration, etc.)
+ are placed on the local filesystem.
+ type: string
+ runtimeClasses:
+ description: RuntimeClasses is a list of kata runtime classes
+ to configure.
+ items:
+ description: RuntimeClass defines the configuration for
+ a kata RuntimeClass
+ properties:
+ artifacts:
+ description: Artifacts are the kata artifacts associated
+ with the runtime class.
+ properties:
+ pullSecret:
+ description: PullSecret is the secret used to pull
+ the OCI artifact.
+ type: string
+ url:
+ description: |-
+ URL is the path to the OCI artifact (payload) containing all artifacts
+ associated with a kata runtime class.
+ type: string
+ required:
+ - url
+ type: object
+ name:
+ description: Name is the name of the kata runtime class.
+ type: string
+ nodeSelector:
+ additionalProperties:
+ type: string
+ description: |-
+ NodeSelector specifies the nodeSelector for the RuntimeClass object.
+ This ensures pods running with the RuntimeClass only get scheduled
+ onto nodes which support it.
+ type: object
+ required:
+ - artifacts
+ - name
+ type: object
+ type: array
+ type: object
+ enabled:
+ description: Enabled indicates if deployment of Kata Manager is
+ enabled
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: Kata Manager image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: Kata Manager image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: Kata Manager image tag
+ type: string
+ type: object
+ mig:
+ description: MIG spec
+ properties:
+ strategy:
+ description: 'Optional: MIGStrategy to apply for GFD and NVIDIA
+ Device Plugin'
+ enum:
+ - none
+ - single
+ - mixed
+ type: string
+ type: object
+ migManager:
+ description: MIGManager for configuration to deploy MIG Manager
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ config:
+ description: 'Optional: Custom mig-parted configuration for NVIDIA
+ MIG Manager container'
+ properties:
+ default:
+ default: all-disabled
+ description: Default MIG config to be applied on the node,
+ when there is no config specified with the node label nvidia.com/mig.config
+ enum:
+ - all-disabled
+ - ""
+ type: string
+ name:
+ default: default-mig-parted-config
+ description: ConfigMap name
+ type: string
+ type: object
+ enabled:
+ description: Enabled indicates if deployment of NVIDIA MIG Manager
+ is enabled
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ gpuClientsConfig:
+ description: 'Optional: Custom gpu-clients configuration for NVIDIA
+ MIG Manager container'
+ properties:
+ name:
+ description: ConfigMap name
+ type: string
+ type: object
+ image:
+ description: NVIDIA MIG Manager image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: NVIDIA MIG Manager image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: NVIDIA MIG Manager image tag
+ type: string
+ type: object
+ nodeStatusExporter:
+ description: NodeStatusExporter spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ enabled:
+ description: Enabled indicates if deployment of Node Status Exporter
+ is enabled.
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: Node Status Exporter image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: Node Status Exporterimage repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: Node Status Exporterimage tag
+ type: string
+ type: object
+ operator:
+ description: Operator component spec
+ properties:
+ annotations:
+ additionalProperties:
+ type: string
+ description: |-
+ Optional: Annotations is an unstructured key value map stored with a resource that may be
+ set by external tools to store and retrieve arbitrary metadata. They are not
+ queryable and should be preserved when modifying objects.
+ type: object
+ defaultRuntime:
+ default: docker
+ description: Runtime defines container runtime type
+ enum:
+ - docker
+ - crio
+ - containerd
+ type: string
+ initContainer:
+ description: InitContainerSpec describes configuration for initContainer
+ image used with all components
+ properties:
+ image:
+ description: Image represents image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: Repository represents image repository path
+ type: string
+ version:
+ description: Version represents image tag(version)
+ type: string
+ type: object
+ labels:
+ additionalProperties:
+ type: string
+ description: |-
+ Optional: Map of string keys and values that can be used to organize and categorize
+ (scope and select) objects. May match selectors of replication controllers
+ and services.
+ type: object
+ runtimeClass:
+ default: nvidia
+ type: string
+ use_ocp_driver_toolkit:
+ description: UseOpenShiftDriverToolkit indicates if DriverToolkit
+ image should be used on OpenShift to build and install driver
+ modules
+ type: boolean
+ required:
+ - defaultRuntime
+ type: object
+ psa:
+ description: PSA defines spec for PodSecurityAdmission configuration
+ properties:
+ enabled:
+ description: Enabled indicates if PodSecurityAdmission configuration
+ needs to be enabled for all Pods
+ type: boolean
+ type: object
+ psp:
+ description: |-
+ Deprecated: Pod Security Policies are no longer supported. Please use PodSecurityAdmission instead
+ PSP defines spec for handling PodSecurityPolicies
+ properties:
+ enabled:
+ description: Enabled indicates if PodSecurityPolicies needs to
+ be enabled for all Pods
+ type: boolean
+ type: object
+ sandboxDevicePlugin:
+ description: SandboxDevicePlugin component spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ enabled:
+ description: Enabled indicates if deployment of NVIDIA Sandbox
+ Device Plugin through operator is enabled
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: NVIDIA Sandbox Device Plugin image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: NVIDIA Sandbox Device Plugin image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: NVIDIA Sandbox Device Plugin image tag
+ type: string
+ type: object
+ sandboxWorkloads:
+ description: SandboxWorkloads defines the spec for handling sandbox
+ workloads (i.e. Virtual Machines)
+ properties:
+ defaultWorkload:
+ default: container
+ description: |-
+ DefaultWorkload indicates the default GPU workload type to configure
+ worker nodes in the cluster for
+ enum:
+ - container
+ - vm-passthrough
+ - vm-vgpu
+ type: string
+ enabled:
+ description: |-
+ Enabled indicates if the GPU Operator should manage additional operands required
+ for sandbox workloads (i.e. VFIO Manager, vGPU Manager, and additional device plugins)
+ type: boolean
+ type: object
+ toolkit:
+ description: Toolkit component spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ enabled:
+ description: Enabled indicates if deployment of NVIDIA Container
+ Toolkit through operator is enabled
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: NVIDIA Container Toolkit image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ installDir:
+ default: /usr/local/nvidia
+ description: Toolkit install directory on the host
+ type: string
+ repository:
+ description: NVIDIA Container Toolkit image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: NVIDIA Container Toolkit image tag
+ type: string
+ type: object
+ validator:
+ description: Validator defines the spec for operator-validator daemonset
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ cuda:
+ description: CUDA validator spec
+ properties:
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ type: object
+ driver:
+ description: Toolkit validator spec
+ properties:
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ type: object
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: Validator image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ plugin:
+ description: Plugin validator spec
+ properties:
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ type: object
+ repository:
+ description: Validator image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ toolkit:
+ description: Toolkit validator spec
+ properties:
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ type: object
+ version:
+ description: Validator image tag
+ type: string
+ vfioPCI:
+ description: VfioPCI validator spec
+ properties:
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ type: object
+ vgpuDevices:
+ description: VGPUDevices validator spec
+ properties:
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ type: object
+ vgpuManager:
+ description: VGPUManager validator spec
+ properties:
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ type: object
+ type: object
+ vfioManager:
+ description: VFIOManager for configuration to deploy VFIO-PCI Manager
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ driverManager:
+ description: DriverManager represents configuration for NVIDIA
+ Driver Manager
+ properties:
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: Image represents NVIDIA Driver Manager image
+ name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: Repository represents Driver Managerrepository
+ path
+ type: string
+ version:
+ description: Version represents NVIDIA Driver Manager image
+ tag(version)
+ type: string
+ type: object
+ enabled:
+ description: Enabled indicates if deployment of VFIO Manager is
+ enabled
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: VFIO Manager image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: VFIO Manager image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: VFIO Manager image tag
+ type: string
+ type: object
+ vgpuDeviceManager:
+ description: VGPUDeviceManager spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ config:
+ description: NVIDIA vGPU devices configuration for NVIDIA vGPU
+ Device Manager container
+ properties:
+ default:
+ default: default
+ description: Default config name within the ConfigMap
+ type: string
+ name:
+ description: ConfigMap name
+ type: string
+ type: object
+ enabled:
+ description: Enabled indicates if deployment of NVIDIA vGPU Device
+ Manager is enabled
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: NVIDIA vGPU Device Manager image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: NVIDIA vGPU Device Manager image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: NVIDIA vGPU Device Manager image tag
+ type: string
+ type: object
+ vgpuManager:
+ description: VGPUManager component spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ driverManager:
+ description: DriverManager represents configuration for NVIDIA
+ Driver Manager initContainer
+ properties:
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: Image represents NVIDIA Driver Manager image
+ name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: Repository represents Driver Managerrepository
+ path
+ type: string
+ version:
+ description: Version represents NVIDIA Driver Manager image
+ tag(version)
+ type: string
+ type: object
+ enabled:
+ description: Enabled indicates if deployment of NVIDIA vGPU Manager
+ through operator is enabled
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: NVIDIA vGPU Manager image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: NVIDIA vGPU Manager image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: NVIDIA vGPU Manager image tag
+ type: string
+ type: object
+ required:
+ - daemonsets
+ - dcgm
+ - dcgmExporter
+ - devicePlugin
+ - driver
+ - gfd
+ - nodeStatusExporter
+ - operator
+ - toolkit
+ type: object
+ status:
+ description: ClusterPolicyStatus defines the observed state of ClusterPolicy
+ properties:
+ conditions:
+ description: Conditions is a list of conditions representing the ClusterPolicy's
+ current state.
+ items:
+ description: "Condition contains details for one aspect of the current
+ state of this API Resource.\n---\nThis struct is intended for
+ direct use as an array at the field path .status.conditions. For
+ example,\n\n\n\ttype FooStatus struct{\n\t // Represents the
+ observations of a foo's current state.\n\t // Known .status.conditions.type
+ are: \"Available\", \"Progressing\", and \"Degraded\"\n\t //
+ +patchMergeKey=type\n\t // +patchStrategy=merge\n\t // +listType=map\n\t
+ \ // +listMapKey=type\n\t Conditions []metav1.Condition `json:\"conditions,omitempty\"
+ patchStrategy:\"merge\" patchMergeKey:\"type\" protobuf:\"bytes,1,rep,name=conditions\"`\n\n\n\t
+ \ // other fields\n\t}"
+ properties:
+ lastTransitionTime:
+ description: |-
+ lastTransitionTime is the last time the condition transitioned from one status to another.
+ This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable.
+ format: date-time
+ type: string
+ message:
+ description: |-
+ message is a human readable message indicating details about the transition.
+ This may be an empty string.
+ maxLength: 32768
+ type: string
+ observedGeneration:
+ description: |-
+ observedGeneration represents the .metadata.generation that the condition was set based upon.
+ For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
+ with respect to the current state of the instance.
+ format: int64
+ minimum: 0
+ type: integer
+ reason:
+ description: |-
+ reason contains a programmatic identifier indicating the reason for the condition's last transition.
+ Producers of specific condition types may define expected values and meanings for this field,
+ and whether the values are considered a guaranteed API.
+ The value should be a CamelCase string.
+ This field may not be empty.
+ maxLength: 1024
+ minLength: 1
+ pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
+ type: string
+ status:
+ description: status of the condition, one of True, False, Unknown.
+ enum:
+ - "True"
+ - "False"
+ - Unknown
+ type: string
+ type:
+ description: |-
+ type of condition in CamelCase or in foo.example.com/CamelCase.
+ ---
+ Many .condition.type values are consistent across resources like Available, but because arbitrary conditions can be
+ useful (see .node.status.conditions), the ability to deconflict is important.
+ The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt)
+ maxLength: 316
+ pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
+ type: string
+ required:
+ - lastTransitionTime
+ - message
+ - reason
+ - status
+ - type
+ type: object
+ type: array
+ namespace:
+ description: Namespace indicates a namespace in which the operator
+ is installed
+ type: string
+ state:
+ description: State indicates status of ClusterPolicy
+ enum:
+ - ignored
+ - ready
+ - notReady
+ type: string
+ required:
+ - state
+ type: object
+ type: object
+ served: true
+ storage: true
+ subresources:
+ status: {}
diff --git a/bundle/v24.6.2/manifests/nvidia.com_nvidiadrivers.yaml b/bundle/v24.6.2/manifests/nvidia.com_nvidiadrivers.yaml
new file mode 100644
index 000000000..665088edd
--- /dev/null
+++ b/bundle/v24.6.2/manifests/nvidia.com_nvidiadrivers.yaml
@@ -0,0 +1,810 @@
+---
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+ annotations:
+ controller-gen.kubebuilder.io/version: v0.15.0
+ name: nvidiadrivers.nvidia.com
+spec:
+ group: nvidia.com
+ names:
+ kind: NVIDIADriver
+ listKind: NVIDIADriverList
+ plural: nvidiadrivers
+ shortNames:
+ - nvd
+ - nvdriver
+ - nvdrivers
+ singular: nvidiadriver
+ scope: Cluster
+ versions:
+ - additionalPrinterColumns:
+ - jsonPath: .status.state
+ name: Status
+ type: string
+ - jsonPath: .metadata.creationTimestamp
+ name: Age
+ type: string
+ name: v1alpha1
+ schema:
+ openAPIV3Schema:
+ description: NVIDIADriver is the Schema for the nvidiadrivers API
+ properties:
+ apiVersion:
+ description: |-
+ APIVersion defines the versioned schema of this representation of an object.
+ Servers should convert recognized schemas to the latest internal value, and
+ may reject unrecognized values.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+ type: string
+ kind:
+ description: |-
+ Kind is a string value representing the REST resource this object represents.
+ Servers may infer this from the endpoint the client submits requests to.
+ Cannot be updated.
+ In CamelCase.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+ type: string
+ metadata:
+ type: object
+ spec:
+ description: NVIDIADriverSpec defines the desired state of NVIDIADriver
+ properties:
+ annotations:
+ additionalProperties:
+ type: string
+ description: |-
+ Optional: Annotations is an unstructured key value map stored with a resource that may be
+ set by external tools to store and retrieve arbitrary metadata. They are not
+ queryable and should be preserved when modifying objects.
+ type: object
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ certConfig:
+ description: 'Optional: Custom certificates configuration for NVIDIA
+ Driver container'
+ properties:
+ name:
+ type: string
+ type: object
+ driverType:
+ default: gpu
+ description: DriverType defines NVIDIA driver type
+ enum:
+ - gpu
+ - vgpu
+ - vgpu-host-manager
+ type: string
+ x-kubernetes-validations:
+ - message: driverType is an immutable field. Please create a new NvidiaDriver
+ resource instead when you want to change this setting.
+ rule: self == oldSelf
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present in
+ a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ gdrcopy:
+ description: GDRCopy defines the spec for GDRCopy driver
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ enabled:
+ description: Enabled indicates if GDRCopy is enabled through GPU
+ operator
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: GDRCopy driver image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: GDRCopy diver image repository
+ type: string
+ version:
+ description: GDRCopy driver image tag
+ type: string
+ type: object
+ gds:
+ description: GPUDirectStorage defines the spec for GDS driver
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ enabled:
+ description: Enabled indicates if GPUDirect Storage is enabled
+ through GPU operator
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: NVIDIA GPUDirect Storage Driver image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: NVIDIA GPUDirect Storage Driver image repository
+ type: string
+ version:
+ description: NVIDIA GPUDirect Storage Driver image tag
+ type: string
+ type: object
+ image:
+ default: nvcr.io/nvidia/driver
+ description: NVIDIA Driver container image name
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ kernelModuleConfig:
+ description: 'Optional: Kernel module configuration parameters for
+ the NVIDIA Driver'
+ properties:
+ name:
+ type: string
+ type: object
+ labels:
+ additionalProperties:
+ type: string
+ description: |-
+ Optional: Map of string keys and values that can be used to organize and categorize
+ (scope and select) objects. May match selectors of replication controllers
+ and services.
+ type: object
+ licensingConfig:
+ description: 'Optional: Licensing configuration for NVIDIA vGPU licensing'
+ properties:
+ name:
+ type: string
+ nlsEnabled:
+ description: NLSEnabled indicates if NVIDIA Licensing System is
+ used for licensing.
+ type: boolean
+ type: object
+ livenessProbe:
+ description: NVIDIA Driver container liveness probe settings
+ properties:
+ failureThreshold:
+ description: |-
+ Minimum consecutive failures for the probe to be considered failed after having succeeded.
+ Defaults to 3. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ initialDelaySeconds:
+ description: |-
+ Number of seconds after the container has started before liveness probes are initiated.
+ More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+ format: int32
+ type: integer
+ periodSeconds:
+ description: |-
+ How often (in seconds) to perform the probe.
+ Default to 10 seconds. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ successThreshold:
+ description: |-
+ Minimum consecutive successes for the probe to be considered successful after having failed.
+ Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ timeoutSeconds:
+ description: |-
+ Number of seconds after which the probe times out.
+ Defaults to 1 second. Minimum value is 1.
+ More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+ format: int32
+ minimum: 1
+ type: integer
+ type: object
+ manager:
+ description: Manager represents configuration for NVIDIA Driver Manager
+ initContainer
+ properties:
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: Image represents NVIDIA Driver Manager image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: Repository represents Driver Managerrepository path
+ type: string
+ version:
+ description: Version represents NVIDIA Driver Manager image tag(version)
+ type: string
+ type: object
+ nodeAffinity:
+ description: Affinity specifies node affinity rules for driver pods
+ properties:
+ preferredDuringSchedulingIgnoredDuringExecution:
+ description: |-
+ The scheduler will prefer to schedule pods to nodes that satisfy
+ the affinity expressions specified by this field, but it may choose
+ a node that violates one or more of the expressions. The node that is
+ most preferred is the one with the greatest sum of weights, i.e.
+ for each node that meets all of the scheduling requirements (resource
+ request, requiredDuringScheduling affinity expressions, etc.),
+ compute a sum by iterating through the elements of this field and adding
+ "weight" to the sum if the node matches the corresponding matchExpressions; the
+ node(s) with the highest sum are the most preferred.
+ items:
+ description: |-
+ An empty preferred scheduling term matches all objects with implicit weight 0
+ (i.e. it's a no-op). A null preferred scheduling term matches no objects (i.e. is also a no-op).
+ properties:
+ preference:
+ description: A node selector term, associated with the corresponding
+ weight.
+ properties:
+ matchExpressions:
+ description: A list of node selector requirements by
+ node's labels.
+ items:
+ description: |-
+ A node selector requirement is a selector that contains values, a key, and an operator
+ that relates the key and values.
+ properties:
+ key:
+ description: The label key that the selector applies
+ to.
+ type: string
+ operator:
+ description: |-
+ Represents a key's relationship to a set of values.
+ Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt.
+ type: string
+ values:
+ description: |-
+ An array of string values. If the operator is In or NotIn,
+ the values array must be non-empty. If the operator is Exists or DoesNotExist,
+ the values array must be empty. If the operator is Gt or Lt, the values
+ array must have a single element, which will be interpreted as an integer.
+ This array is replaced during a strategic merge patch.
+ items:
+ type: string
+ type: array
+ x-kubernetes-list-type: atomic
+ required:
+ - key
+ - operator
+ type: object
+ type: array
+ x-kubernetes-list-type: atomic
+ matchFields:
+ description: A list of node selector requirements by
+ node's fields.
+ items:
+ description: |-
+ A node selector requirement is a selector that contains values, a key, and an operator
+ that relates the key and values.
+ properties:
+ key:
+ description: The label key that the selector applies
+ to.
+ type: string
+ operator:
+ description: |-
+ Represents a key's relationship to a set of values.
+ Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt.
+ type: string
+ values:
+ description: |-
+ An array of string values. If the operator is In or NotIn,
+ the values array must be non-empty. If the operator is Exists or DoesNotExist,
+ the values array must be empty. If the operator is Gt or Lt, the values
+ array must have a single element, which will be interpreted as an integer.
+ This array is replaced during a strategic merge patch.
+ items:
+ type: string
+ type: array
+ x-kubernetes-list-type: atomic
+ required:
+ - key
+ - operator
+ type: object
+ type: array
+ x-kubernetes-list-type: atomic
+ type: object
+ x-kubernetes-map-type: atomic
+ weight:
+ description: Weight associated with matching the corresponding
+ nodeSelectorTerm, in the range 1-100.
+ format: int32
+ type: integer
+ required:
+ - preference
+ - weight
+ type: object
+ type: array
+ x-kubernetes-list-type: atomic
+ requiredDuringSchedulingIgnoredDuringExecution:
+ description: |-
+ If the affinity requirements specified by this field are not met at
+ scheduling time, the pod will not be scheduled onto the node.
+ If the affinity requirements specified by this field cease to be met
+ at some point during pod execution (e.g. due to an update), the system
+ may or may not try to eventually evict the pod from its node.
+ properties:
+ nodeSelectorTerms:
+ description: Required. A list of node selector terms. The
+ terms are ORed.
+ items:
+ description: |-
+ A null or empty node selector term matches no objects. The requirements of
+ them are ANDed.
+ The TopologySelectorTerm type implements a subset of the NodeSelectorTerm.
+ properties:
+ matchExpressions:
+ description: A list of node selector requirements by
+ node's labels.
+ items:
+ description: |-
+ A node selector requirement is a selector that contains values, a key, and an operator
+ that relates the key and values.
+ properties:
+ key:
+ description: The label key that the selector applies
+ to.
+ type: string
+ operator:
+ description: |-
+ Represents a key's relationship to a set of values.
+ Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt.
+ type: string
+ values:
+ description: |-
+ An array of string values. If the operator is In or NotIn,
+ the values array must be non-empty. If the operator is Exists or DoesNotExist,
+ the values array must be empty. If the operator is Gt or Lt, the values
+ array must have a single element, which will be interpreted as an integer.
+ This array is replaced during a strategic merge patch.
+ items:
+ type: string
+ type: array
+ x-kubernetes-list-type: atomic
+ required:
+ - key
+ - operator
+ type: object
+ type: array
+ x-kubernetes-list-type: atomic
+ matchFields:
+ description: A list of node selector requirements by
+ node's fields.
+ items:
+ description: |-
+ A node selector requirement is a selector that contains values, a key, and an operator
+ that relates the key and values.
+ properties:
+ key:
+ description: The label key that the selector applies
+ to.
+ type: string
+ operator:
+ description: |-
+ Represents a key's relationship to a set of values.
+ Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt.
+ type: string
+ values:
+ description: |-
+ An array of string values. If the operator is In or NotIn,
+ the values array must be non-empty. If the operator is Exists or DoesNotExist,
+ the values array must be empty. If the operator is Gt or Lt, the values
+ array must have a single element, which will be interpreted as an integer.
+ This array is replaced during a strategic merge patch.
+ items:
+ type: string
+ type: array
+ x-kubernetes-list-type: atomic
+ required:
+ - key
+ - operator
+ type: object
+ type: array
+ x-kubernetes-list-type: atomic
+ type: object
+ x-kubernetes-map-type: atomic
+ type: array
+ x-kubernetes-list-type: atomic
+ required:
+ - nodeSelectorTerms
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ nodeSelector:
+ additionalProperties:
+ type: string
+ description: NodeSelector specifies a selector for installation of
+ NVIDIA driver
+ type: object
+ priorityClassName:
+ description: 'Optional: Set priorityClassName'
+ type: string
+ rdma:
+ description: GPUDirectRDMA defines the spec for NVIDIA Peer Memory
+ driver
+ properties:
+ enabled:
+ description: Enabled indicates if GPUDirect RDMA is enabled through
+ GPU operator
+ type: boolean
+ useHostMofed:
+ description: UseHostMOFED indicates to use MOFED drivers directly
+ installed on the host to enable GPUDirect RDMA
+ type: boolean
+ type: object
+ readinessProbe:
+ description: NVIDIA Driver container readiness probe settings
+ properties:
+ failureThreshold:
+ description: |-
+ Minimum consecutive failures for the probe to be considered failed after having succeeded.
+ Defaults to 3. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ initialDelaySeconds:
+ description: |-
+ Number of seconds after the container has started before liveness probes are initiated.
+ More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+ format: int32
+ type: integer
+ periodSeconds:
+ description: |-
+ How often (in seconds) to perform the probe.
+ Default to 10 seconds. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ successThreshold:
+ description: |-
+ Minimum consecutive successes for the probe to be considered successful after having failed.
+ Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ timeoutSeconds:
+ description: |-
+ Number of seconds after which the probe times out.
+ Defaults to 1 second. Minimum value is 1.
+ More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+ format: int32
+ minimum: 1
+ type: integer
+ type: object
+ repoConfig:
+ description: 'Optional: Custom repo configuration for NVIDIA Driver
+ container'
+ properties:
+ name:
+ type: string
+ type: object
+ repository:
+ description: NVIDIA Driver repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for each
+ pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ startupProbe:
+ description: NVIDIA Driver container startup probe settings
+ properties:
+ failureThreshold:
+ description: |-
+ Minimum consecutive failures for the probe to be considered failed after having succeeded.
+ Defaults to 3. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ initialDelaySeconds:
+ description: |-
+ Number of seconds after the container has started before liveness probes are initiated.
+ More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+ format: int32
+ type: integer
+ periodSeconds:
+ description: |-
+ How often (in seconds) to perform the probe.
+ Default to 10 seconds. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ successThreshold:
+ description: |-
+ Minimum consecutive successes for the probe to be considered successful after having failed.
+ Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ timeoutSeconds:
+ description: |-
+ Number of seconds after which the probe times out.
+ Defaults to 1 second. Minimum value is 1.
+ More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+ format: int32
+ minimum: 1
+ type: integer
+ type: object
+ tolerations:
+ description: 'Optional: Set tolerations'
+ items:
+ description: |-
+ The pod this Toleration is attached to tolerates any taint that matches
+ the triple using the matching operator .
+ properties:
+ effect:
+ description: |-
+ Effect indicates the taint effect to match. Empty means match all taint effects.
+ When specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute.
+ type: string
+ key:
+ description: |-
+ Key is the taint key that the toleration applies to. Empty means match all taint keys.
+ If the key is empty, operator must be Exists; this combination means to match all values and all keys.
+ type: string
+ operator:
+ description: |-
+ Operator represents a key's relationship to the value.
+ Valid operators are Exists and Equal. Defaults to Equal.
+ Exists is equivalent to wildcard for value, so that a pod can
+ tolerate all taints of a particular category.
+ type: string
+ tolerationSeconds:
+ description: |-
+ TolerationSeconds represents the period of time the toleration (which must be
+ of effect NoExecute, otherwise this field is ignored) tolerates the taint. By default,
+ it is not set, which means tolerate the taint forever (do not evict). Zero and
+ negative values will be treated as 0 (evict immediately) by the system.
+ format: int64
+ type: integer
+ value:
+ description: |-
+ Value is the taint value the toleration matches to.
+ If the operator is Exists, the value should be empty, otherwise just a regular string.
+ type: string
+ type: object
+ type: array
+ useOpenKernelModules:
+ description: UseOpenKernelModules indicates if the open GPU kernel
+ modules should be used
+ type: boolean
+ usePrecompiled:
+ description: UsePrecompiled indicates if deployment of NVIDIA Driver
+ using pre-compiled modules is enabled
+ type: boolean
+ x-kubernetes-validations:
+ - message: usePrecompiled is an immutable field. Please create a new
+ NvidiaDriver resource instead when you want to change this setting.
+ rule: self == oldSelf
+ version:
+ description: NVIDIA Driver version (or just branch for precompiled
+ drivers)
+ type: string
+ virtualTopologyConfig:
+ description: 'Optional: Virtual Topology Daemon configuration for
+ NVIDIA vGPU drivers'
+ properties:
+ name:
+ description: 'Optional: Config name representing virtual topology
+ daemon configuration file nvidia-topologyd.conf'
+ type: string
+ type: object
+ required:
+ - driverType
+ - image
+ type: object
+ status:
+ description: NVIDIADriverStatus defines the observed state of NVIDIADriver
+ properties:
+ conditions:
+ description: Conditions is a list of conditions representing the NVIDIADriver's
+ current state.
+ items:
+ description: "Condition contains details for one aspect of the current
+ state of this API Resource.\n---\nThis struct is intended for
+ direct use as an array at the field path .status.conditions. For
+ example,\n\n\n\ttype FooStatus struct{\n\t // Represents the
+ observations of a foo's current state.\n\t // Known .status.conditions.type
+ are: \"Available\", \"Progressing\", and \"Degraded\"\n\t //
+ +patchMergeKey=type\n\t // +patchStrategy=merge\n\t // +listType=map\n\t
+ \ // +listMapKey=type\n\t Conditions []metav1.Condition `json:\"conditions,omitempty\"
+ patchStrategy:\"merge\" patchMergeKey:\"type\" protobuf:\"bytes,1,rep,name=conditions\"`\n\n\n\t
+ \ // other fields\n\t}"
+ properties:
+ lastTransitionTime:
+ description: |-
+ lastTransitionTime is the last time the condition transitioned from one status to another.
+ This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable.
+ format: date-time
+ type: string
+ message:
+ description: |-
+ message is a human readable message indicating details about the transition.
+ This may be an empty string.
+ maxLength: 32768
+ type: string
+ observedGeneration:
+ description: |-
+ observedGeneration represents the .metadata.generation that the condition was set based upon.
+ For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
+ with respect to the current state of the instance.
+ format: int64
+ minimum: 0
+ type: integer
+ reason:
+ description: |-
+ reason contains a programmatic identifier indicating the reason for the condition's last transition.
+ Producers of specific condition types may define expected values and meanings for this field,
+ and whether the values are considered a guaranteed API.
+ The value should be a CamelCase string.
+ This field may not be empty.
+ maxLength: 1024
+ minLength: 1
+ pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
+ type: string
+ status:
+ description: status of the condition, one of True, False, Unknown.
+ enum:
+ - "True"
+ - "False"
+ - Unknown
+ type: string
+ type:
+ description: |-
+ type of condition in CamelCase or in foo.example.com/CamelCase.
+ ---
+ Many .condition.type values are consistent across resources like Available, but because arbitrary conditions can be
+ useful (see .node.status.conditions), the ability to deconflict is important.
+ The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt)
+ maxLength: 316
+ pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
+ type: string
+ required:
+ - lastTransitionTime
+ - message
+ - reason
+ - status
+ - type
+ type: object
+ type: array
+ namespace:
+ description: Namespace indicates a namespace in which the operator
+ and driver are installed
+ type: string
+ state:
+ description: |-
+ INSERT ADDITIONAL STATUS FIELD - define observed state of cluster
+ Important: Run "make" to regenerate code after modifying this file
+ State indicates status of NVIDIADriver instance
+ enum:
+ - ignored
+ - ready
+ - notReady
+ type: string
+ required:
+ - state
+ type: object
+ type: object
+ served: true
+ storage: true
+ subresources:
+ status: {}
diff --git a/bundle/v24.6.2/metadata/annotations.yaml b/bundle/v24.6.2/metadata/annotations.yaml
new file mode 100644
index 000000000..f7383d5c4
--- /dev/null
+++ b/bundle/v24.6.2/metadata/annotations.yaml
@@ -0,0 +1,17 @@
+annotations:
+ operators.operatorframework.io.bundle.channels.v1: stable,v24.6
+ operators.operatorframework.io.bundle.channel.default.v1: v24.6
+ operators.operatorframework.io.bundle.manifests.v1: manifests/
+ operators.operatorframework.io.bundle.mediatype.v1: registry+v1
+ operators.operatorframework.io.bundle.metadata.v1: metadata/
+ operators.operatorframework.io.bundle.package.v1: gpu-operator-certified
+ operators.operatorframework.io.metrics.builder: operator-sdk-v1.4.0
+ operators.operatorframework.io.metrics.mediatype.v1: metrics+v1
+ operators.operatorframework.io.metrics.project_layout: go.kubebuilder.io/v3
+ operators.operatorframework.io.test.config.v1: tests/scorecard/
+ operators.operatorframework.io.test.mediatype.v1: scorecard+v1
+ operatorframework.io/cluster-monitoring: "true"
+ operatorframework.io/suggested-namespace: nvidia-gpu-operator
+
+ # Annotations to specify OCP versions compatibility.
+ com.redhat.openshift.versions: v4.12-v4.16
diff --git a/bundle/v24.9.0/manifests/gpu-operator-certified.clusterserviceversion.yaml b/bundle/v24.9.0/manifests/gpu-operator-certified.clusterserviceversion.yaml
new file mode 100644
index 000000000..ba850b0cc
--- /dev/null
+++ b/bundle/v24.9.0/manifests/gpu-operator-certified.clusterserviceversion.yaml
@@ -0,0 +1,925 @@
+apiVersion: operators.coreos.com/v1alpha1
+kind: ClusterServiceVersion
+metadata:
+ labels:
+ operatorframework.io/arch.arm64: supported
+ operatorframework.io/arch.amd64: supported
+ pod-security.kubernetes.io/enforce: privileged
+ pod-security.kubernetes.io/audit: privileged
+ pod-security.kubernetes.io/warn: privileged
+ annotations:
+ features.operators.openshift.io/disconnected: "true"
+ features.operators.openshift.io/fips-compliant: "false"
+ features.operators.openshift.io/proxy-aware: "true"
+ features.operators.openshift.io/tls-profiles: "false"
+ features.operators.openshift.io/token-auth-aws: "false"
+ features.operators.openshift.io/token-auth-azure: "false"
+ features.operators.openshift.io/token-auth-gcp: "false"
+ features.operators.openshift.io/cnf: "false"
+ features.operators.openshift.io/cni: "false"
+ features.operators.openshift.io/csi: "false"
+ olm.skipRange: '>=1.9.0 <24.9.0'
+ alm-examples: |-
+ [
+ {
+ "apiVersion": "nvidia.com/v1",
+ "kind": "ClusterPolicy",
+ "metadata": {
+ "name": "gpu-cluster-policy"
+ },
+ "spec": {
+ "operator": {
+ "defaultRuntime": "crio",
+ "use_ocp_driver_toolkit": true,
+ "initContainer": {
+ }
+ },
+ "sandboxWorkloads": {
+ "enabled": false,
+ "defaultWorkload": "container"
+ },
+ "driver": {
+ "enabled": true,
+ "useNvidiaDriverCRD": false,
+ "useOpenKernelModules": false,
+ "upgradePolicy": {
+ "autoUpgrade": true,
+ "drain": {
+ "deleteEmptyDir": false,
+ "enable": false,
+ "force": false,
+ "timeoutSeconds": 300
+ },
+ "maxParallelUpgrades": 1,
+ "maxUnavailable": "25%",
+ "podDeletion": {
+ "deleteEmptyDir": false,
+ "force": false,
+ "timeoutSeconds": 300
+ },
+ "waitForCompletion": {
+ "timeoutSeconds": 0
+ }
+ },
+ "repoConfig": {
+ "configMapName": ""
+ },
+ "certConfig": {
+ "name": ""
+ },
+ "licensingConfig": {
+ "nlsEnabled": true,
+ "configMapName": ""
+ },
+ "virtualTopology": {
+ "config": ""
+ },
+ "kernelModuleConfig": {
+ "name": ""
+ }
+ },
+ "dcgmExporter": {
+ "enabled": true,
+ "config": {
+ "name": ""
+ },
+ "serviceMonitor": {
+ "enabled": true
+ }
+ },
+ "dcgm": {
+ "enabled": true
+ },
+ "daemonsets": {
+ "updateStrategy": "RollingUpdate",
+ "rollingUpdate": {
+ "maxUnavailable": "1"
+ }
+ },
+ "devicePlugin": {
+ "enabled": true,
+ "config": {
+ "name": "",
+ "default": ""
+ },
+ "mps": {
+ "root": "/run/nvidia/mps"
+ }
+ },
+ "gfd": {
+ "enabled": true
+ },
+ "migManager": {
+ "enabled": true
+ },
+ "nodeStatusExporter": {
+ "enabled": true
+ },
+ "mig": {
+ "strategy": "single"
+ },
+ "toolkit": {
+ "enabled": true
+ },
+ "validator": {
+ "plugin": {
+ "env": [
+ {
+ "name": "WITH_WORKLOAD",
+ "value": "false"
+ }
+ ]
+ }
+ },
+ "vgpuManager": {
+ "enabled": false
+ },
+ "vgpuDeviceManager": {
+ "enabled": true
+ },
+ "sandboxDevicePlugin": {
+ "enabled": true
+ },
+ "vfioManager": {
+ "enabled": true
+ },
+ "gds": {
+ "enabled": false
+ },
+ "gdrcopy": {
+ "enabled": false
+ }
+ }
+ },
+ {
+ "apiVersion": "nvidia.com/v1alpha1",
+ "kind": "NVIDIADriver",
+ "metadata": {
+ "name": "gpu-driver"
+ },
+ "spec": {
+ "driverType": "gpu",
+ "repository": "nvcr.io/nvidia",
+ "image": "driver",
+ "version": "sha256:8a9a9e9470f64d340a7f3106a03e9622fa98e25368fbfb7ce9c416ad98f6d951",
+ "nodeSelector": {},
+ "manager": {},
+ "repoConfig": {
+ "name": ""
+ },
+ "certConfig": {
+ "name": ""
+ },
+ "licensingConfig": {
+ "nlsEnabled": true,
+ "name": ""
+ },
+ "virtualTopologyConfig": {
+ "name": ""
+ },
+ "kernelModuleConfig": {
+ "name": ""
+ }
+ }
+ }
+ ]
+ operators.operatorframework.io/builder: operator-sdk-v1.4.0
+ operators.operatorframework.io/project_layout: go.kubebuilder.io/v3
+ operatorframework.io/suggested-namespace: nvidia-gpu-operator
+ capabilities: Deep Insights
+ categories: AI/Machine Learning, OpenShift Optional
+ certified: "true"
+ containerImage: nvcr.io/nvidia/gpu-operator@sha256:3d741e8399519227cba0391b471fab2161501b0983e66789fabead4062d801c6
+ createdAt: "Thu Oct 31 09:45:53 PDT 2024"
+ description: Automate the management and monitoring of NVIDIA GPUs.
+ provider: NVIDIA
+ repository: http://github.com/NVIDIA/gpu-operator
+ support: NVIDIA
+ name: gpu-operator-certified.v24.9.0
+ namespace: placeholder
+spec:
+ apiservicedefinitions: {}
+ relatedImages:
+ - name: gpu-operator-image
+ image: nvcr.io/nvidia/gpu-operator@sha256:3d741e8399519227cba0391b471fab2161501b0983e66789fabead4062d801c6
+ - name: dcgm-exporter-image
+ image: nvcr.io/nvidia/k8s/dcgm-exporter@sha256:10ff95e83bc137796f5be76278a6b38fd31c7360e62c7e72011b428f8848c791
+ - name: dcgm-image
+ image: nvcr.io/nvidia/cloud-native/dcgm@sha256:7b0ebd6c40a11b6484dc4385605372511e4e93132a44d2a3d6ec2e36c24e6783
+ - name: container-toolkit-image
+ image: nvcr.io/nvidia/k8s/container-toolkit@sha256:bb6b55a5bd6419df3ca2d8ec0738b87491fc45e15587e613663890dc3a8e6e13
+ - name: driver-image
+ image: nvcr.io/nvidia/driver@sha256:8a9a9e9470f64d340a7f3106a03e9622fa98e25368fbfb7ce9c416ad98f6d951
+ - name: driver-image-535
+ image: nvcr.io/nvidia/driver@sha256:6b75c7534efa6ec480e8eeea625949cd74330aad287239e1abf160622c4814f3
+ - name: driver-image-560
+ image: nvcr.io/nvidia/driver@sha256:38b66a8d44cab9e2c62da9e101f32cd9dbcb5e02d8e57b47671284d374ca3695
+ - name: driver-image-565
+ image: nvcr.io/nvidia/driver@sha256:d55b57938866e538acc3a71ca32f8cf87e71c591abd4a34695ee428e7ec2fa73
+ - name: device-plugin-image
+ image: nvcr.io/nvidia/k8s-device-plugin@sha256:7089559ce6153018806857f5049085bae15b3bf6f1c8bd19d8b12f707d087dea
+ - name: gpu-feature-discovery-image
+ image: nvcr.io/nvidia/k8s-device-plugin@sha256:7089559ce6153018806857f5049085bae15b3bf6f1c8bd19d8b12f707d087dea
+ - name: mig-manager-image
+ image: nvcr.io/nvidia/cloud-native/k8s-mig-manager@sha256:40830d3561c14743f484d45b498141f9e86b1308e16fae3978110783927264ab
+ - name: init-container-image
+ image: nvcr.io/nvidia/cuda@sha256:748a2c5178e5c5811b66183bd0ce87d9fdccf992c0ad9b1a5076841e45533190
+ - name: gpu-operator-validator-image
+ image: nvcr.io/nvidia/cloud-native/gpu-operator-validator@sha256:70a0bd29259820d6257b04b0cdb6a175f9783d4dd19ccc4ec6599d407c359ba5
+ - name: k8s-driver-manager-image
+ image: nvcr.io/nvidia/cloud-native/k8s-driver-manager@sha256:b072c5793be65eee556eaff1b9cbbd115a1ef29982be95b2959adfcb4bc72382
+ - name: vfio-manager-image
+ image: nvcr.io/nvidia/cuda@sha256:748a2c5178e5c5811b66183bd0ce87d9fdccf992c0ad9b1a5076841e45533190
+ - name: sandbox-device-plugin-image
+ image: nvcr.io/nvidia/kubevirt-gpu-device-plugin@sha256:4ffa1cd2a6497eb647a89ed259dcfb007554737b9d80f69bc173a2c3cd72a1da
+ - name: vgpu-device-manager-image
+ image: nvcr.io/nvidia/cloud-native/vgpu-device-manager@sha256:7edd7a0413dcb39b6e3bcefaf06812f3293c8e480ca10783e821a561ed686200
+ - name: gdrcopy-image
+ image: nvcr.io/nvidia/cloud-native/gdrdrv@sha256:cf39d78ced7fb5727a9668ee2cd44b14bb7a23a95b83d5464b7d755740e02121
+ customresourcedefinitions:
+ owned:
+ - name: nvidiadrivers.nvidia.com
+ kind: NVIDIADriver
+ version: v1alpha1
+ displayName: NVIDIADriver
+ description: NVIDIADriver allows you to deploy the NVIDIA driver
+ resources:
+ - kind: ServiceAccount
+ name: ''
+ version: v1
+ - kind: DaemonSet
+ name: ''
+ version: apps/v1
+ - kind: ConfigMap
+ name: ''
+ version: v1
+ - kind: Pod
+ name: ''
+ version: v1
+ - kind: clusterpolicies
+ name: ''
+ version: v1
+ - kind: clusterversions
+ name: ''
+ version: v1
+ - kind: nodes
+ name: ''
+ version: v1
+ - kind: status
+ name: ''
+ version: v1
+ specDescriptors:
+ - description: 'Optional: Set Node affinity'
+ displayName: Node affinity
+ path: affinity
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - 'urn:alm:descriptor:com.tectonic.ui:nodeAffinity'
+ - description: Node selector to control the selection of nodes (optional)
+ displayName: Node Selector
+ path: nodeSelector
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:selector:Node'
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - description: 'Optional: Set tolerations'
+ displayName: Tolerations
+ path: tolerations
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - 'urn:alm:descriptor:io.kubernetes:Tolerations'
+ - description: Image pull secrets
+ displayName: Image pull secrets
+ path: imagePullSecrets
+ x-descriptors:
+ - 'urn:alm:descriptor:io.kubernetes:Secret'
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - displayName: ImagePullPolicy
+ description: 'Image pull policy (default: IfNotPresent)'
+ path: imagePullPolicy
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:imagePullPolicy'
+ statusDescriptors:
+ - description: The current state of the driver.
+ displayName: State
+ path: state
+ x-descriptors:
+ - 'urn:alm:descriptor:text'
+ - name: clusterpolicies.nvidia.com
+ kind: ClusterPolicy
+ version: v1
+ group: nvidia.com
+ displayName: ClusterPolicy
+ description: ClusterPolicy allows you to configure the GPU Operator
+ resources:
+ - kind: ServiceAccount
+ name: ''
+ version: v1
+ - kind: Deployment
+ name: ''
+ version: apps/v1
+ - kind: DaemonSet
+ name: ''
+ version: apps/v1
+ - kind: ConfigMap
+ name: ''
+ version: v1
+ - kind: Pod
+ name: ''
+ version: v1
+ - kind: clusterpolicies
+ name: ''
+ version: v1
+ - kind: clusterversions
+ name: ''
+ version: v1
+ - kind: nodes
+ name: ''
+ version: v1
+ - kind: status
+ name: ''
+ version: v1
+ specDescriptors:
+ - description: GPU Operator config
+ displayName: GPU Operator config
+ path: operator
+ - displayName: ImagePullPolicy
+ description: 'Image pull policy (default: IfNotPresent)'
+ path: operator.validator.imagePullPolicy
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:imagePullPolicy'
+ - description: Image pull secrets
+ displayName: Image pull secrets
+ path: operator.validator.imagePullSecrets
+ x-descriptors:
+ - 'urn:alm:descriptor:io.kubernetes:Secret'
+ - description: NVIDIA GPU/vGPU Driver config
+ displayName: NVIDIA GPU/vGPU Driver config
+ path: driver
+ - description: 'Optional: Set Node affinity'
+ displayName: Node affinity
+ path: driver.affinity
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - 'urn:alm:descriptor:com.tectonic.ui:nodeAffinity'
+ - description: Node selector to control the selection of nodes (optional)
+ displayName: Node Selector
+ path: driver.nodeSelector
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:selector:Node'
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - description: 'Optional: Set tolerations'
+ displayName: Tolerations
+ path: driver.tolerations
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - 'urn:alm:descriptor:io.kubernetes:Tolerations'
+ - description: Image pull secrets
+ displayName: Image pull secrets
+ path: driver.imagePullSecrets
+ x-descriptors:
+ - 'urn:alm:descriptor:io.kubernetes:Secret'
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - displayName: ImagePullPolicy
+ description: 'Image pull policy (default: IfNotPresent)'
+ path: driver.imagePullPolicy
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:imagePullPolicy'
+ - description: NVIDIA DCGM Exporter config
+ displayName: NVIDIA DCGM Exporter config
+ path: dcgmExporter
+ - description: 'Optional: Set Node affinity'
+ displayName: Node affinity
+ path: dcgmExporter.affinity
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - 'urn:alm:descriptor:com.tectonic.ui:nodeAffinity'
+ - description: Node selector to control the selection of nodes (optional)
+ displayName: Node Selector
+ path: dcgmExporter.nodeSelector
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:selector:Node'
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - description: 'Optional: Set tolerations'
+ displayName: Tolerations
+ path: dcgmExporter.tolerations
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - 'urn:alm:descriptor:io.kubernetes:Tolerations'
+ - description: Image pull secrets
+ displayName: Image pull secrets
+ path: dcgmExporter.imagePullSecrets
+ x-descriptors:
+ - 'urn:alm:descriptor:io.kubernetes:Secret'
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - displayName: ImagePullPolicy
+ description: 'Image pull policy (default: IfNotPresent)'
+ path: dcgmExporter.imagePullPolicy
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:imagePullPolicy'
+ - description: NVIDIA Device Plugin config
+ displayName: NVIDIA Device Plugin config
+ path: devicePlugin
+ - description: 'Optional: Set Node affinity'
+ displayName: Node affinity
+ path: devicePlugin.affinity
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - 'urn:alm:descriptor:com.tectonic.ui:nodeAffinity'
+ - description: Node selector to control the selection of nodes (optional)
+ displayName: Node Selector
+ path: devicePlugin.nodeSelector
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:selector:Node'
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - description: 'Optional: Set tolerations'
+ displayName: Tolerations
+ path: devicePlugin.tolerations
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - 'urn:alm:descriptor:io.kubernetes:Tolerations'
+ - description: Image pull secrets
+ displayName: Image pull secrets
+ path: devicePlugin.imagePullSecrets
+ x-descriptors:
+ - 'urn:alm:descriptor:io.kubernetes:Secret'
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - displayName: ImagePullPolicy
+ description: 'Image pull policy (default: IfNotPresent)'
+ path: devicePlugin.imagePullPolicy
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:imagePullPolicy'
+ - description: GPU Feature Discovery Plugin config
+ displayName: GPU Feature Discovery Plugin config
+ path: gfd
+ - description: 'Optional: Set Node affinity'
+ displayName: Node affinity
+ path: gfd.affinity
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - 'urn:alm:descriptor:com.tectonic.ui:nodeAffinity'
+ - description: Node selector to control the selection of nodes (optional)
+ displayName: Node Selector
+ path: gfd.nodeSelector
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:selector:Node'
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - description: 'Optional: Set tolerations'
+ displayName: Tolerations
+ path: gfd.tolerations
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - 'urn:alm:descriptor:io.kubernetes:Tolerations'
+ - description: Image pull secrets
+ displayName: Image pull secrets
+ path: gfd.imagePullSecrets
+ x-descriptors:
+ - 'urn:alm:descriptor:io.kubernetes:Secret'
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - displayName: ImagePullPolicy
+ description: 'Image pull policy (default: IfNotPresent)'
+ path: gfd.imagePullPolicy
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:imagePullPolicy'
+ - description: NVIDIA Container Toolkit config
+ displayName: NVIDIA Container Toolkit config
+ path: toolkit
+ - description: 'Optional: Set Node affinity'
+ displayName: Node affinity
+ path: toolkit.affinity
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - 'urn:alm:descriptor:com.tectonic.ui:nodeAffinity'
+ - description: Node selector to control the selection of nodes (optional)
+ displayName: Node Selector
+ path: toolkit.nodeSelector
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:selector:Node'
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - description: 'Optional: Set tolerations'
+ displayName: Tolerations
+ path: toolkit.tolerations
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - 'urn:alm:descriptor:io.kubernetes:Tolerations'
+ - description: Image pull secrets
+ displayName: Image pull secrets
+ path: toolkit.imagePullSecrets
+ x-descriptors:
+ - 'urn:alm:descriptor:io.kubernetes:Secret'
+ - 'urn:alm:descriptor:com.tectonic.ui:advanced'
+ - displayName: ImagePullPolicy
+ description: 'Image pull policy (default: IfNotPresent)'
+ path: toolkit.imagePullPolicy
+ x-descriptors:
+ - 'urn:alm:descriptor:com.tectonic.ui:imagePullPolicy'
+ - displayName: NVIDIA DCGM config
+ description: NVIDIA DCGM config
+ path: dcgm
+ - displayName: Validator config
+ description: Validator config
+ path: validator
+ - displayName: Node Status Exporter config
+ description: Node Status Exporter config
+ path: nodeStatusExporter
+ - displayName: Daemonsets config
+ description: Daemonsets config
+ path: daemonsets
+ - displayName: MIG config
+ description: MIG config
+ path: mig
+ - displayName: NVIDIA MIG Manager config
+ description: NVIDIA MIG Manager config
+ path: migManager
+ - displayName: PodSecurityPolicy config
+ description: PodSecurityPolicy config
+ path: psp
+ - displayName: NVIDIA GPUDirect Storage config
+ description: NVIDIA GPUDirect Storage config
+ path: gds
+ - displayName: Sandbox Workloads config
+ description: Sandbox Workloads config
+ path: sandboxWorkloads
+ - displayName: NVIDIA vGPU Manager config
+ description: NVIDIA vGPU Manager config
+ path: vgpuManager
+ - displayName: NVIDIA vGPU Device Manager config
+ description: NVIDIA vGPU Device Manager config
+ path: vgpuDeviceManager
+ - displayName: VFIO Manager config
+ description: VFIO Manager config
+ path: vfioManager
+ - displayName: NVIDIA Sandbox Device Plugin config
+ description: NVIDIA Sandbox Device Plugin config
+ path: sandboxDevicePlugin
+ statusDescriptors:
+ - description: The current state of the operator.
+ displayName: State
+ path: state
+ x-descriptors:
+ - 'urn:alm:descriptor:text'
+ displayName: NVIDIA GPU Operator
+ description: >
+ Kubernetes provides access to special hardware resources such as NVIDIA
+ GPUs, NICs, Infiniband adapters and other devices through the [device plugin
+ framework](https://kubernetes.io/docs/concepts/extend-kubernetes/compute-storage-net/device-plugins/).
+ However, configuring and managing nodes with these hardware resources
+ requires configuration of multiple software components such as drivers,
+ container runtimes or other libraries which are difficult and prone to
+ errors.
+
+ The NVIDIA GPU Operator uses the [operator
+ framework](https://cloud.redhat.com/blog/introducing-the-operator-framework) within
+ Kubernetes to automate the management of all NVIDIA software components
+ needed to provision and monitor GPUs.
+ These components include the NVIDIA drivers (to enable CUDA), Kubernetes
+ device plugin for GPUs, the NVIDIA Container Runtime, automatic node
+ labelling and NVIDIA DCGM exporter.
+
+ Visit the official site of the [GPU Operator](https://github.com/NVIDIA/gpu-operator) for more information.
+ For getting started with using the GPU Operator with OpenShift, see the instructions
+ [here](https://docs.nvidia.com/datacenter/cloud-native/gpu-operator/openshift/contents.html).
+ icon:
+ - base64data: iVBORw0KGgoAAAANSUhEUgAAAEAAAABACAMAAACdt4HsAAAB2lBMVEUAAAD///8EBAN3uQACAgIAAAQJDQUCAgB1tgAHCQf+/v5Ufg5Hagxxqwt+xgJ3uAB9wwB4vQBRUVEeLA3e3t5nZ2coKCgODg4FBwZ9wwR6wAJ4vADz8/MbGxt5tw1vpw1/wgoOFwkLDwh9xQH5+fny8vLw8PDFxcWysrKFhYVvb282NjYyMjIqKioXFxdikxRYgxNCYxJQdhFqoQ9xrg16ugxyqgyAxQkEBQj7+/v29vbIyMhjY2NbW1tHR0cvLy8kJCQdHR0ZGRlKbxJ8uhFNcxFVgBAxSBBgkg93tQ50sA4qPg4XIg18vwsbKQsSGgsLCwsMEwqCyQeByQFztADPz8+/v7+6urqWlpZra2tKSkogICASEhJmmRE8XBA5VRA2UBBonA9biA9GaQ4sQg4jMw4mOQ0aJw2GzgsUHgttpAqJ0Ql/wQWG0AJ8vwF0uQCtra2jo6OQkJB9fX1VVVVCQkI9PT0iIiIUFBRSfBNgjhA7WRBGZw+GywmFzgaAyASBxQN2twDb29u2traenp6Kiop+fn53d3dzc3NyqRV4sxM/YBNAXRElNhBjlQ+IzA00TQ16vgxJbgp6vAl4tgJ3vgDs7Ozn5+fa2trS0tJCXRY6VBV6thSL1gf4nFdFAAAD80lEQVRYw+zSOXPaQBgG4He0LJJmbGRGDUIzuvgBQiAEPfcdwC33DTbUtmOwSyc+4iRucvzXRImLFJmRShc8xXbfu+9+szg4OHjjAsH/iFD49q7rqM6xc/wPtWyBhS8sC94ObWRCZDksh1+RzmcEfI0DoPrjylEkSTgViMs9udjYTwMG4Gf51Z1BM81ioRwit+QvgYsdUQZeKFr3ladyKXvVr+pAM5uKcmRLXFzoCIxn+0i/8lSaBMHnfi7qowfQuZnm3PuFPwGs13zD3NlViozY/z4YD6/TCQORbPr2q78GLB0ou5IO40pd5AxQZnJ83m2y9Ju2JYKfgEhWC18aEIfrZLURHwQC0B87ySZwHxX8BNDWB1KfQfyxT2TA24uPQMt8yTWA3obz8wQGlhTN06Z900MkuJLrYu3u5LkK9LTtGRF8NEDLeSnXYLUdHUFVlpPqTa4IamlhJZ464biY1w4CKGrROOW7uwLlV+Q02lanCF6cbSoPVLzUfPwDll5I9T6WyXWhZre1yjiI6VCSzCWY3+FKaAwGHngzpEygx6+V6Uzk6TJR7yhWxJ1bFgTPJ7gMc58aUCq+n+qNT6Pn8y/xOcCiZZVjnJ+AAPhEuj0SKZ9bL9ZpNS9SgM6z9p5w3jt43cMvecfWBhm7dtfEpfhYMDBYpFd7mDZIAxPCFKgBhB0hkWbE2wVMyqycfhOMEiebSzFz5IMTEjw7E87UFj4GVR7GXqaSkoIcISEc/I38/PwhOTUMRBrADgwK09zgYGUBqbwcARiQyp3Eyk6kC4BloqtbJTcaSHIHShALWFmBSRuCWBGC+AtDMAAGIpAAc9mBiB0sCLSXHUSygxSxEIoE7IKEgbhopKgogC96x04QCMMw/H0cG6f0cEmBHaLc7FFQzApoTLwtQgWUWo26glx2mzGkyoHM1PPMO/NrnSH8e2QAiRsZ8S3ZuJoW5Udg5moGoMRLN2gAnkcUctueJ1gADsdtlZ2AgmSYoaDZBXwRctcwy6HN3XX/wfnTnA7Q5x0S0Gku4wHpe7Ql8Mbtu4TqC3qcADGtUl4O3eK0AkZdKH1mU/a6MFQGA7pQGoAVoAuuPYZlLJF2BawVLLjwac6Q8wUax61/CpKQAT6ZX3hFqoqqAFvuf4AzM+NgsoBS/wcSOD7SFzyf6CE9UQK9II1MRvIJm8QSgsLiBZuypsAWKyARElgx5FcLv1N4nFLbB45Sh6+TzsQRtn7bz/B3fS9GQ12bgUE2PKycQbwgXD0SWLwVhpZFq4eHhWloOjLoqGvoRYRGAR2vp2EtpNUaTUpiRAizMAEhKNXpYZNnAUlBCSgFYTIxQTlMMJNGwSgYBdQHAFsKs+/bUkeyAAAAAElFTkSuQmCC
+ mediatype: image/png
+ install:
+ spec:
+ clusterPermissions:
+ - serviceAccountName: gpu-operator
+ rules:
+ - apiGroups:
+ - nvidia.com
+ resources:
+ - clusterpolicies
+ - clusterpolicies/finalizers
+ - clusterpolicies/status
+ - nvidiadrivers
+ - nvidiadrivers/finalizers
+ - nvidiadrivers/status
+ verbs:
+ - create
+ - delete
+ - get
+ - list
+ - patch
+ - update
+ - watch
+ - deletecollection
+ - apiGroups:
+ - config.openshift.io
+ resources:
+ - clusterversions
+ - proxies
+ verbs:
+ - get
+ - list
+ - watch
+ - apiGroups:
+ - security.openshift.io
+ resources:
+ - securitycontextconstraints
+ verbs:
+ - use
+ - create
+ - get
+ - list
+ - watch
+ - patch
+ - update
+ - delete
+ - apiGroups:
+ - security.openshift.io
+ resources:
+ - securitycontextconstraints
+ verbs:
+ - use
+ resourceNames:
+ - hostmount-anyuid
+ - apiGroups:
+ - image.openshift.io
+ resources:
+ - imagestreams
+ verbs:
+ - get
+ - list
+ - watch
+ - apiGroups:
+ - rbac.authorization.k8s.io
+ resources:
+ - clusterroles
+ - clusterrolebindings
+ verbs:
+ - create
+ - get
+ - list
+ - watch
+ - update
+ - patch
+ - delete
+ - apiGroups:
+ - ""
+ resources:
+ - namespaces
+ verbs:
+ - get
+ - list
+ - create
+ - watch
+ - update
+ - patch
+ - apiGroups:
+ - ""
+ resources:
+ - nodes
+ verbs:
+ - get
+ - list
+ - watch
+ - update
+ - patch
+ - apiGroups:
+ - ""
+ resources:
+ - pods
+ - pods/eviction
+ verbs:
+ - create
+ - get
+ - list
+ - watch
+ - update
+ - patch
+ - delete
+ - apiGroups:
+ - apps
+ resources:
+ - daemonsets
+ verbs:
+ - get
+ - list
+ - watch
+ - apiGroups:
+ - node.k8s.io
+ resources:
+ - runtimeclasses
+ verbs:
+ - get
+ - list
+ - create
+ - update
+ - watch
+ - delete
+ - apiGroups:
+ - apiextensions.k8s.io
+ resources:
+ - customresourcedefinitions
+ verbs:
+ - create
+ - get
+ - list
+ - watch
+ - update
+ - patch
+ - delete
+ permissions:
+ - serviceAccountName: gpu-operator
+ rules:
+ - apiGroups:
+ - rbac.authorization.k8s.io
+ resources:
+ - roles
+ - rolebindings
+ verbs:
+ - create
+ - get
+ - list
+ - watch
+ - update
+ - patch
+ - delete
+ - apiGroups:
+ - apps
+ resources:
+ - controllerrevisions
+ verbs:
+ - get
+ - list
+ - watch
+ - apiGroups:
+ - apps
+ resources:
+ - daemonsets
+ verbs:
+ - create
+ - get
+ - list
+ - watch
+ - update
+ - patch
+ - delete
+ - apiGroups:
+ - ""
+ resources:
+ - configmaps
+ - endpoints
+ - events
+ - secrets
+ - services
+ - services/finalizers
+ - serviceaccounts
+ verbs:
+ - create
+ - get
+ - list
+ - watch
+ - update
+ - patch
+ - delete
+ - apiGroups:
+ - coordination.k8s.io
+ resources:
+ - leases
+ verbs:
+ - create
+ - get
+ - list
+ - watch
+ - update
+ - patch
+ - delete
+ - apiGroups:
+ - monitoring.coreos.com
+ resources:
+ - servicemonitors
+ - prometheusrules
+ verbs:
+ - get
+ - list
+ - create
+ - watch
+ - update
+ - delete
+ deployments:
+ - name: gpu-operator
+ spec:
+ replicas: 1
+ selector:
+ matchLabels:
+ app.kubernetes.io/component: gpu-operator
+ app: gpu-operator
+ strategy: {}
+ template:
+ metadata:
+ labels:
+ app.kubernetes.io/component: gpu-operator
+ app: gpu-operator
+ nvidia.com/gpu-driver-upgrade-drain.skip: "true"
+ spec:
+ priorityClassName: system-node-critical
+ containers:
+ - args:
+ - --leader-elect
+ - --leader-lease-renew-deadline
+ - "60s"
+ image: nvcr.io/nvidia/gpu-operator@sha256:3d741e8399519227cba0391b471fab2161501b0983e66789fabead4062d801c6
+ command:
+ - gpu-operator
+ livenessProbe:
+ httpGet:
+ path: /healthz
+ port: 8081
+ initialDelaySeconds: 15
+ periodSeconds: 20
+ name: gpu-operator
+ ports:
+ - name: metrics
+ containerPort: 8080
+ readinessProbe:
+ httpGet:
+ path: /readyz
+ port: 8081
+ initialDelaySeconds: 5
+ periodSeconds: 10
+ resources:
+ limits:
+ cpu: 500m
+ memory: 1Gi
+ requests:
+ cpu: 200m
+ memory: 200Mi
+ securityContext:
+ allowPrivilegeEscalation: false
+ volumeMounts:
+ - mountPath: /host-etc/os-release
+ name: host-os-release
+ readOnly: true
+ env:
+ - name: OPERATOR_NAMESPACE
+ valueFrom:
+ fieldRef:
+ fieldPath: metadata.namespace
+ - name: "VALIDATOR_IMAGE"
+ value: "nvcr.io/nvidia/cloud-native/gpu-operator-validator@sha256:70a0bd29259820d6257b04b0cdb6a175f9783d4dd19ccc4ec6599d407c359ba5"
+ - name: "GFD_IMAGE"
+ value: "nvcr.io/nvidia/k8s-device-plugin@sha256:7089559ce6153018806857f5049085bae15b3bf6f1c8bd19d8b12f707d087dea"
+ - name: "CONTAINER_TOOLKIT_IMAGE"
+ value: "nvcr.io/nvidia/k8s/container-toolkit@sha256:bb6b55a5bd6419df3ca2d8ec0738b87491fc45e15587e613663890dc3a8e6e13"
+ - name: "DCGM_IMAGE"
+ value: "nvcr.io/nvidia/cloud-native/dcgm@sha256:7b0ebd6c40a11b6484dc4385605372511e4e93132a44d2a3d6ec2e36c24e6783"
+ - name: "DCGM_EXPORTER_IMAGE"
+ value: "nvcr.io/nvidia/k8s/dcgm-exporter@sha256:10ff95e83bc137796f5be76278a6b38fd31c7360e62c7e72011b428f8848c791"
+ - name: "DEVICE_PLUGIN_IMAGE"
+ value: "nvcr.io/nvidia/k8s-device-plugin@sha256:7089559ce6153018806857f5049085bae15b3bf6f1c8bd19d8b12f707d087dea"
+ - name: "DRIVER_IMAGE"
+ value: "nvcr.io/nvidia/driver@sha256:8a9a9e9470f64d340a7f3106a03e9622fa98e25368fbfb7ce9c416ad98f6d951"
+ - name: "DRIVER_IMAGE-535"
+ value: "nvcr.io/nvidia/driver@sha256:6b75c7534efa6ec480e8eeea625949cd74330aad287239e1abf160622c4814f3"
+ - name: "DRIVER_IMAGE-560"
+ value: "nvcr.io/nvidia/driver@sha256:38b66a8d44cab9e2c62da9e101f32cd9dbcb5e02d8e57b47671284d374ca3695"
+ - name: "DRIVER_IMAGE-565"
+ value: "nvcr.io/nvidia/driver@sha256:d55b57938866e538acc3a71ca32f8cf87e71c591abd4a34695ee428e7ec2fa73"
+ - name: "DRIVER_MANAGER_IMAGE"
+ value: "nvcr.io/nvidia/cloud-native/k8s-driver-manager@sha256:b072c5793be65eee556eaff1b9cbbd115a1ef29982be95b2959adfcb4bc72382"
+ - name: "MIG_MANAGER_IMAGE"
+ value: "nvcr.io/nvidia/cloud-native/k8s-mig-manager@sha256:40830d3561c14743f484d45b498141f9e86b1308e16fae3978110783927264ab"
+ - name: "CUDA_BASE_IMAGE"
+ value: "nvcr.io/nvidia/cuda@sha256:748a2c5178e5c5811b66183bd0ce87d9fdccf992c0ad9b1a5076841e45533190"
+ - name: "VFIO_MANAGER_IMAGE"
+ value: "nvcr.io/nvidia/cuda@sha256:748a2c5178e5c5811b66183bd0ce87d9fdccf992c0ad9b1a5076841e45533190"
+ - name: "SANDBOX_DEVICE_PLUGIN_IMAGE"
+ value: "nvcr.io/nvidia/kubevirt-gpu-device-plugin@sha256:4ffa1cd2a6497eb647a89ed259dcfb007554737b9d80f69bc173a2c3cd72a1da"
+ - name: "VGPU_DEVICE_MANAGER_IMAGE"
+ value: "nvcr.io/nvidia/cloud-native/vgpu-device-manager@sha256:7edd7a0413dcb39b6e3bcefaf06812f3293c8e480ca10783e821a561ed686200"
+ - name: "GDRCOPY_IMAGE"
+ value: "nvcr.io/nvidia/cloud-native/gdrdrv@sha256:cf39d78ced7fb5727a9668ee2cd44b14bb7a23a95b83d5464b7d755740e02121"
+ terminationGracePeriodSeconds: 10
+ volumes:
+ - hostPath:
+ path: /etc/os-release
+ name: host-os-release
+ serviceAccountName: gpu-operator
+ strategy: deployment
+ installModes:
+ - supported: true
+ type: OwnNamespace
+ - supported: true
+ type: SingleNamespace
+ - supported: false
+ type: MultiNamespace
+ - supported: false
+ type: AllNamespaces
+ keywords:
+ - gpu
+ - cuda
+ - compute
+ - operator
+ - deep learning
+ - monitoring
+ - tesla
+ maintainers:
+ - name: NVIDIA
+ email: operator_feedback@nvidia.com
+ maturity: stable
+ provider:
+ name: NVIDIA Corporation
+ version: 24.9.0
+ replaces: gpu-operator-certified.v24.6.2
diff --git a/bundle/v24.9.0/manifests/nvidia.com_clusterpolicies.yaml b/bundle/v24.9.0/manifests/nvidia.com_clusterpolicies.yaml
new file mode 100644
index 000000000..8ee8e9a8a
--- /dev/null
+++ b/bundle/v24.9.0/manifests/nvidia.com_clusterpolicies.yaml
@@ -0,0 +1,2384 @@
+---
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+ annotations:
+ controller-gen.kubebuilder.io/version: v0.16.4
+ name: clusterpolicies.nvidia.com
+spec:
+ group: nvidia.com
+ names:
+ kind: ClusterPolicy
+ listKind: ClusterPolicyList
+ plural: clusterpolicies
+ singular: clusterpolicy
+ scope: Cluster
+ versions:
+ - additionalPrinterColumns:
+ - jsonPath: .status.state
+ name: Status
+ type: string
+ - jsonPath: .metadata.creationTimestamp
+ name: Age
+ type: string
+ name: v1
+ schema:
+ openAPIV3Schema:
+ description: ClusterPolicy is the Schema for the clusterpolicies API
+ properties:
+ apiVersion:
+ description: |-
+ APIVersion defines the versioned schema of this representation of an object.
+ Servers should convert recognized schemas to the latest internal value, and
+ may reject unrecognized values.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+ type: string
+ kind:
+ description: |-
+ Kind is a string value representing the REST resource this object represents.
+ Servers may infer this from the endpoint the client submits requests to.
+ Cannot be updated.
+ In CamelCase.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+ type: string
+ metadata:
+ type: object
+ spec:
+ description: ClusterPolicySpec defines the desired state of ClusterPolicy
+ properties:
+ ccManager:
+ description: CCManager component spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ defaultMode:
+ description: Default CC mode setting for compatible GPUs on the
+ node
+ enum:
+ - "on"
+ - "off"
+ - devtools
+ type: string
+ enabled:
+ description: Enabled indicates if deployment of CC Manager is
+ enabled
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: CC Manager image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: CC Manager image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: CC Manager image tag
+ type: string
+ type: object
+ cdi:
+ description: CDI configures how the Container Device Interface is
+ used in the cluster
+ properties:
+ default:
+ default: false
+ description: Default indicates whether to use CDI as the default
+ mechanism for providing GPU access to containers.
+ type: boolean
+ enabled:
+ default: false
+ description: Enabled indicates whether CDI can be used to make
+ GPUs accessible to containers.
+ type: boolean
+ type: object
+ daemonsets:
+ description: Daemonset defines common configuration for all Daemonsets
+ properties:
+ annotations:
+ additionalProperties:
+ type: string
+ description: |-
+ Optional: Annotations is an unstructured key value map stored with a resource that may be
+ set by external tools to store and retrieve arbitrary metadata. They are not
+ queryable and should be preserved when modifying objects.
+ type: object
+ labels:
+ additionalProperties:
+ type: string
+ description: |-
+ Optional: Map of string keys and values that can be used to organize and categorize
+ (scope and select) objects. May match selectors of replication controllers
+ and services.
+ type: object
+ priorityClassName:
+ type: string
+ rollingUpdate:
+ description: 'Optional: Configuration for rolling update of all
+ DaemonSet pods'
+ properties:
+ maxUnavailable:
+ type: string
+ type: object
+ tolerations:
+ description: 'Optional: Set tolerations'
+ items:
+ description: |-
+ The pod this Toleration is attached to tolerates any taint that matches
+ the triple using the matching operator .
+ properties:
+ effect:
+ description: |-
+ Effect indicates the taint effect to match. Empty means match all taint effects.
+ When specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute.
+ type: string
+ key:
+ description: |-
+ Key is the taint key that the toleration applies to. Empty means match all taint keys.
+ If the key is empty, operator must be Exists; this combination means to match all values and all keys.
+ type: string
+ operator:
+ description: |-
+ Operator represents a key's relationship to the value.
+ Valid operators are Exists and Equal. Defaults to Equal.
+ Exists is equivalent to wildcard for value, so that a pod can
+ tolerate all taints of a particular category.
+ type: string
+ tolerationSeconds:
+ description: |-
+ TolerationSeconds represents the period of time the toleration (which must be
+ of effect NoExecute, otherwise this field is ignored) tolerates the taint. By default,
+ it is not set, which means tolerate the taint forever (do not evict). Zero and
+ negative values will be treated as 0 (evict immediately) by the system.
+ format: int64
+ type: integer
+ value:
+ description: |-
+ Value is the taint value the toleration matches to.
+ If the operator is Exists, the value should be empty, otherwise just a regular string.
+ type: string
+ type: object
+ type: array
+ updateStrategy:
+ default: RollingUpdate
+ enum:
+ - RollingUpdate
+ - OnDelete
+ type: string
+ type: object
+ dcgm:
+ description: DCGM component spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ enabled:
+ description: Enabled indicates if deployment of NVIDIA DCGM Hostengine
+ as a separate pod is enabled.
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ hostPort:
+ description: 'Deprecated: HostPort represents host port that needs
+ to be bound for DCGM engine (Default: 5555)'
+ format: int32
+ type: integer
+ image:
+ description: NVIDIA DCGM image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: NVIDIA DCGM image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: NVIDIA DCGM image tag
+ type: string
+ type: object
+ dcgmExporter:
+ description: DCGMExporter spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ config:
+ description: 'Optional: Custom metrics configuration for NVIDIA
+ DCGM Exporter'
+ properties:
+ name:
+ description: ConfigMap name with file dcgm-metrics.csv for
+ metrics to be collected by NVIDIA DCGM Exporter
+ type: string
+ type: object
+ enabled:
+ description: Enabled indicates if deployment of NVIDIA DCGM Exporter
+ through operator is enabled
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: NVIDIA DCGM Exporter image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: NVIDIA DCGM Exporter image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ serviceMonitor:
+ description: 'Optional: ServiceMonitor configuration for NVIDIA
+ DCGM Exporter'
+ properties:
+ additionalLabels:
+ additionalProperties:
+ type: string
+ description: AdditionalLabels to add to ServiceMonitor instance
+ for NVIDIA DCGM Exporter
+ type: object
+ enabled:
+ description: Enabled indicates if ServiceMonitor is deployed
+ for NVIDIA DCGM Exporter
+ type: boolean
+ honorLabels:
+ description: HonorLabels chooses the metric’s labels on collisions
+ with target labels.
+ type: boolean
+ interval:
+ description: |-
+ Interval which metrics should be scraped from NVIDIA DCGM Exporter. If not specified Prometheus’ global scrape interval is used.
+ Supported units: y, w, d, h, m, s, ms
+ pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$
+ type: string
+ relabelings:
+ description: Relabelings allows to rewrite labels on metric
+ sets for NVIDIA DCGM Exporter
+ items:
+ description: |-
+ RelabelConfig allows dynamic rewriting of the label set for targets, alerts,
+ scraped samples and remote write samples.
+
+ More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config
+ properties:
+ action:
+ default: replace
+ description: |-
+ Action to perform based on the regex matching.
+
+ `Uppercase` and `Lowercase` actions require Prometheus >= v2.36.0.
+ `DropEqual` and `KeepEqual` actions require Prometheus >= v2.41.0.
+
+ Default: "Replace"
+ enum:
+ - replace
+ - Replace
+ - keep
+ - Keep
+ - drop
+ - Drop
+ - hashmod
+ - HashMod
+ - labelmap
+ - LabelMap
+ - labeldrop
+ - LabelDrop
+ - labelkeep
+ - LabelKeep
+ - lowercase
+ - Lowercase
+ - uppercase
+ - Uppercase
+ - keepequal
+ - KeepEqual
+ - dropequal
+ - DropEqual
+ type: string
+ modulus:
+ description: |-
+ Modulus to take of the hash of the source label values.
+
+ Only applicable when the action is `HashMod`.
+ format: int64
+ type: integer
+ regex:
+ description: Regular expression against which the extracted
+ value is matched.
+ type: string
+ replacement:
+ description: |-
+ Replacement value against which a Replace action is performed if the
+ regular expression matches.
+
+ Regex capture groups are available.
+ type: string
+ separator:
+ description: Separator is the string between concatenated
+ SourceLabels.
+ type: string
+ sourceLabels:
+ description: |-
+ The source labels select values from existing labels. Their content is
+ concatenated using the configured Separator and matched against the
+ configured regular expression.
+ items:
+ description: |-
+ LabelName is a valid Prometheus label name which may only contain ASCII
+ letters, numbers, as well as underscores.
+ pattern: ^[a-zA-Z_][a-zA-Z0-9_]*$
+ type: string
+ type: array
+ targetLabel:
+ description: |-
+ Label to which the resulting string is written in a replacement.
+
+ It is mandatory for `Replace`, `HashMod`, `Lowercase`, `Uppercase`,
+ `KeepEqual` and `DropEqual` actions.
+
+ Regex capture groups are available.
+ type: string
+ type: object
+ type: array
+ type: object
+ version:
+ description: NVIDIA DCGM Exporter image tag
+ type: string
+ type: object
+ devicePlugin:
+ description: DevicePlugin component spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ config:
+ description: 'Optional: Configuration for the NVIDIA Device Plugin
+ via the ConfigMap'
+ properties:
+ default:
+ description: Default config name within the ConfigMap for
+ the NVIDIA Device Plugin config
+ type: string
+ name:
+ description: ConfigMap name for NVIDIA Device Plugin config
+ including shared config between plugin and GFD
+ type: string
+ type: object
+ enabled:
+ description: Enabled indicates if deployment of NVIDIA Device
+ Plugin through operator is enabled
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: NVIDIA Device Plugin image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ mps:
+ description: 'Optional: MPS related configuration for the NVIDIA
+ Device Plugin'
+ properties:
+ root:
+ default: /run/nvidia/mps
+ description: Root defines the MPS root path on the host
+ type: string
+ type: object
+ repository:
+ description: NVIDIA Device Plugin image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: NVIDIA Device Plugin image tag
+ type: string
+ type: object
+ driver:
+ description: Driver component spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ certConfig:
+ description: 'Optional: Custom certificates configuration for
+ NVIDIA Driver container'
+ properties:
+ name:
+ type: string
+ type: object
+ enabled:
+ description: Enabled indicates if deployment of NVIDIA Driver
+ through operator is enabled
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: NVIDIA Driver image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ kernelModuleConfig:
+ description: 'Optional: Kernel module configuration parameters
+ for the NVIDIA Driver'
+ properties:
+ name:
+ type: string
+ type: object
+ licensingConfig:
+ description: 'Optional: Licensing configuration for NVIDIA vGPU
+ licensing'
+ properties:
+ configMapName:
+ type: string
+ nlsEnabled:
+ description: NLSEnabled indicates if NVIDIA Licensing System
+ is used for licensing.
+ type: boolean
+ type: object
+ livenessProbe:
+ description: NVIDIA Driver container liveness probe settings
+ properties:
+ failureThreshold:
+ description: |-
+ Minimum consecutive failures for the probe to be considered failed after having succeeded.
+ Defaults to 3. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ initialDelaySeconds:
+ description: |-
+ Number of seconds after the container has started before liveness probes are initiated.
+ More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+ format: int32
+ type: integer
+ periodSeconds:
+ description: |-
+ How often (in seconds) to perform the probe.
+ Default to 10 seconds. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ successThreshold:
+ description: |-
+ Minimum consecutive successes for the probe to be considered successful after having failed.
+ Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ timeoutSeconds:
+ description: |-
+ Number of seconds after which the probe times out.
+ Defaults to 1 second. Minimum value is 1.
+ More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+ format: int32
+ minimum: 1
+ type: integer
+ type: object
+ manager:
+ description: Manager represents configuration for NVIDIA Driver
+ Manager initContainer
+ properties:
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: Image represents NVIDIA Driver Manager image
+ name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: Repository represents Driver Managerrepository
+ path
+ type: string
+ version:
+ description: Version represents NVIDIA Driver Manager image
+ tag(version)
+ type: string
+ type: object
+ rdma:
+ description: GPUDirectRDMASpec defines the properties for nvidia-peermem
+ deployment
+ properties:
+ enabled:
+ description: Enabled indicates if GPUDirect RDMA is enabled
+ through GPU operator
+ type: boolean
+ useHostMofed:
+ description: UseHostMOFED indicates to use MOFED drivers directly
+ installed on the host to enable GPUDirect RDMA
+ type: boolean
+ type: object
+ readinessProbe:
+ description: NVIDIA Driver container readiness probe settings
+ properties:
+ failureThreshold:
+ description: |-
+ Minimum consecutive failures for the probe to be considered failed after having succeeded.
+ Defaults to 3. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ initialDelaySeconds:
+ description: |-
+ Number of seconds after the container has started before liveness probes are initiated.
+ More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+ format: int32
+ type: integer
+ periodSeconds:
+ description: |-
+ How often (in seconds) to perform the probe.
+ Default to 10 seconds. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ successThreshold:
+ description: |-
+ Minimum consecutive successes for the probe to be considered successful after having failed.
+ Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ timeoutSeconds:
+ description: |-
+ Number of seconds after which the probe times out.
+ Defaults to 1 second. Minimum value is 1.
+ More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+ format: int32
+ minimum: 1
+ type: integer
+ type: object
+ repoConfig:
+ description: 'Optional: Custom repo configuration for NVIDIA Driver
+ container'
+ properties:
+ configMapName:
+ type: string
+ type: object
+ repository:
+ description: NVIDIA Driver image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ startupProbe:
+ description: NVIDIA Driver container startup probe settings
+ properties:
+ failureThreshold:
+ description: |-
+ Minimum consecutive failures for the probe to be considered failed after having succeeded.
+ Defaults to 3. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ initialDelaySeconds:
+ description: |-
+ Number of seconds after the container has started before liveness probes are initiated.
+ More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+ format: int32
+ type: integer
+ periodSeconds:
+ description: |-
+ How often (in seconds) to perform the probe.
+ Default to 10 seconds. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ successThreshold:
+ description: |-
+ Minimum consecutive successes for the probe to be considered successful after having failed.
+ Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ timeoutSeconds:
+ description: |-
+ Number of seconds after which the probe times out.
+ Defaults to 1 second. Minimum value is 1.
+ More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+ format: int32
+ minimum: 1
+ type: integer
+ type: object
+ upgradePolicy:
+ description: Driver auto-upgrade settings
+ properties:
+ autoUpgrade:
+ default: false
+ description: |-
+ AutoUpgrade is a global switch for automatic upgrade feature
+ if set to false all other options are ignored
+ type: boolean
+ drain:
+ description: DrainSpec describes configuration for node drain
+ during automatic upgrade
+ properties:
+ deleteEmptyDir:
+ default: false
+ description: |-
+ DeleteEmptyDir indicates if should continue even if there are pods using emptyDir
+ (local data that will be deleted when the node is drained)
+ type: boolean
+ enable:
+ default: false
+ description: Enable indicates if node draining is allowed
+ during upgrade
+ type: boolean
+ force:
+ default: false
+ description: Force indicates if force draining is allowed
+ type: boolean
+ podSelector:
+ description: |-
+ PodSelector specifies a label selector to filter pods on the node that need to be drained
+ For more details on label selectors, see:
+ https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#label-selectors
+ type: string
+ timeoutSeconds:
+ default: 300
+ description: TimeoutSecond specifies the length of time
+ in seconds to wait before giving up drain, zero means
+ infinite
+ minimum: 0
+ type: integer
+ type: object
+ maxParallelUpgrades:
+ default: 1
+ description: |-
+ MaxParallelUpgrades indicates how many nodes can be upgraded in parallel
+ 0 means no limit, all nodes will be upgraded in parallel
+ minimum: 0
+ type: integer
+ maxUnavailable:
+ anyOf:
+ - type: integer
+ - type: string
+ default: 25%
+ description: |-
+ MaxUnavailable is the maximum number of nodes with the driver installed, that can be unavailable during the upgrade.
+ Value can be an absolute number (ex: 5) or a percentage of total nodes at the start of upgrade (ex: 10%).
+ Absolute number is calculated from percentage by rounding up.
+ By default, a fixed value of 25% is used.
+ x-kubernetes-int-or-string: true
+ podDeletion:
+ description: PodDeletionSpec describes configuration for deletion
+ of pods using special resources during automatic upgrade
+ properties:
+ deleteEmptyDir:
+ default: false
+ description: |-
+ DeleteEmptyDir indicates if should continue even if there are pods using emptyDir
+ (local data that will be deleted when the pod is deleted)
+ type: boolean
+ force:
+ default: false
+ description: Force indicates if force deletion is allowed
+ type: boolean
+ timeoutSeconds:
+ default: 300
+ description: |-
+ TimeoutSecond specifies the length of time in seconds to wait before giving up on pod termination, zero means
+ infinite
+ minimum: 0
+ type: integer
+ type: object
+ waitForCompletion:
+ description: WaitForCompletionSpec describes the configuration
+ for waiting on job completions
+ properties:
+ podSelector:
+ description: |-
+ PodSelector specifies a label selector for the pods to wait for completion
+ For more details on label selectors, see:
+ https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#label-selectors
+ type: string
+ timeoutSeconds:
+ default: 0
+ description: |-
+ TimeoutSecond specifies the length of time in seconds to wait before giving up on pod termination, zero means
+ infinite
+ minimum: 0
+ type: integer
+ type: object
+ type: object
+ useNvidiaDriverCRD:
+ description: UseNvidiaDriverCRD indicates if the deployment of
+ NVIDIA Driver is managed by the NVIDIADriver CRD type
+ type: boolean
+ useOpenKernelModules:
+ description: UseOpenKernelModules indicates if the open GPU kernel
+ modules should be used
+ type: boolean
+ usePrecompiled:
+ description: UsePrecompiled indicates if deployment of NVIDIA
+ Driver using pre-compiled modules is enabled
+ type: boolean
+ version:
+ description: NVIDIA Driver image tag
+ type: string
+ virtualTopology:
+ description: 'Optional: Virtual Topology Daemon configuration
+ for NVIDIA vGPU drivers'
+ properties:
+ config:
+ description: 'Optional: Config name representing virtual topology
+ daemon configuration file nvidia-topologyd.conf'
+ type: string
+ type: object
+ type: object
+ gdrcopy:
+ description: GDRCopy component spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ enabled:
+ description: Enabled indicates if GDRCopy is enabled through GPU
+ Operator
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: NVIDIA GDRCopy driver image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: NVIDIA GDRCopy driver image repository
+ type: string
+ version:
+ description: NVIDIA GDRCopy driver image tag
+ type: string
+ type: object
+ gds:
+ description: GPUDirectStorage defines the spec for GDS components(Experimental)
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ enabled:
+ description: Enabled indicates if GPUDirect Storage is enabled
+ through GPU operator
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: NVIDIA GPUDirect Storage Driver image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: NVIDIA GPUDirect Storage Driver image repository
+ type: string
+ version:
+ description: NVIDIA GPUDirect Storage Driver image tag
+ type: string
+ type: object
+ gfd:
+ description: GPUFeatureDiscovery spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ enabled:
+ description: Enabled indicates if deployment of GPU Feature Discovery
+ Plugin is enabled.
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: GFD image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: GFD image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: GFD image tag
+ type: string
+ type: object
+ hostPaths:
+ description: HostPaths defines various paths on the host needed by
+ GPU Operator components
+ properties:
+ driverInstallDir:
+ description: |-
+ DriverInstallDir represents the root at which driver files including libraries,
+ config files, and executables can be found.
+ type: string
+ rootFS:
+ description: |-
+ RootFS represents the path to the root filesystem of the host.
+ This is used by components that need to interact with the host filesystem
+ and as such this must be a chroot-able filesystem.
+ Examples include the MIG Manager and Toolkit Container which may need to
+ stop, start, or restart systemd services.
+ type: string
+ type: object
+ kataManager:
+ description: KataManager component spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ config:
+ description: Kata Manager config
+ properties:
+ artifactsDir:
+ default: /opt/nvidia-gpu-operator/artifacts/runtimeclasses
+ description: |-
+ ArtifactsDir is the directory where kata artifacts (e.g. kernel / guest images, configuration, etc.)
+ are placed on the local filesystem.
+ type: string
+ runtimeClasses:
+ description: RuntimeClasses is a list of kata runtime classes
+ to configure.
+ items:
+ description: RuntimeClass defines the configuration for
+ a kata RuntimeClass
+ properties:
+ artifacts:
+ description: Artifacts are the kata artifacts associated
+ with the runtime class.
+ properties:
+ pullSecret:
+ description: PullSecret is the secret used to pull
+ the OCI artifact.
+ type: string
+ url:
+ description: |-
+ URL is the path to the OCI artifact (payload) containing all artifacts
+ associated with a kata runtime class.
+ type: string
+ required:
+ - url
+ type: object
+ name:
+ description: Name is the name of the kata runtime class.
+ type: string
+ nodeSelector:
+ additionalProperties:
+ type: string
+ description: |-
+ NodeSelector specifies the nodeSelector for the RuntimeClass object.
+ This ensures pods running with the RuntimeClass only get scheduled
+ onto nodes which support it.
+ type: object
+ required:
+ - artifacts
+ - name
+ type: object
+ type: array
+ type: object
+ enabled:
+ description: Enabled indicates if deployment of Kata Manager is
+ enabled
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: Kata Manager image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: Kata Manager image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: Kata Manager image tag
+ type: string
+ type: object
+ mig:
+ description: MIG spec
+ properties:
+ strategy:
+ description: 'Optional: MIGStrategy to apply for GFD and NVIDIA
+ Device Plugin'
+ enum:
+ - none
+ - single
+ - mixed
+ type: string
+ type: object
+ migManager:
+ description: MIGManager for configuration to deploy MIG Manager
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ config:
+ description: 'Optional: Custom mig-parted configuration for NVIDIA
+ MIG Manager container'
+ properties:
+ default:
+ default: all-disabled
+ description: Default MIG config to be applied on the node,
+ when there is no config specified with the node label nvidia.com/mig.config
+ enum:
+ - all-disabled
+ - ""
+ type: string
+ name:
+ default: default-mig-parted-config
+ description: ConfigMap name
+ type: string
+ type: object
+ enabled:
+ description: Enabled indicates if deployment of NVIDIA MIG Manager
+ is enabled
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ gpuClientsConfig:
+ description: 'Optional: Custom gpu-clients configuration for NVIDIA
+ MIG Manager container'
+ properties:
+ name:
+ description: ConfigMap name
+ type: string
+ type: object
+ image:
+ description: NVIDIA MIG Manager image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: NVIDIA MIG Manager image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: NVIDIA MIG Manager image tag
+ type: string
+ type: object
+ nodeStatusExporter:
+ description: NodeStatusExporter spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ enabled:
+ description: Enabled indicates if deployment of Node Status Exporter
+ is enabled.
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: Node Status Exporter image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: Node Status Exporterimage repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: Node Status Exporterimage tag
+ type: string
+ type: object
+ operator:
+ description: Operator component spec
+ properties:
+ annotations:
+ additionalProperties:
+ type: string
+ description: |-
+ Optional: Annotations is an unstructured key value map stored with a resource that may be
+ set by external tools to store and retrieve arbitrary metadata. They are not
+ queryable and should be preserved when modifying objects.
+ type: object
+ defaultRuntime:
+ default: docker
+ description: Runtime defines container runtime type
+ enum:
+ - docker
+ - crio
+ - containerd
+ type: string
+ initContainer:
+ description: InitContainerSpec describes configuration for initContainer
+ image used with all components
+ properties:
+ image:
+ description: Image represents image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: Repository represents image repository path
+ type: string
+ version:
+ description: Version represents image tag(version)
+ type: string
+ type: object
+ labels:
+ additionalProperties:
+ type: string
+ description: |-
+ Optional: Map of string keys and values that can be used to organize and categorize
+ (scope and select) objects. May match selectors of replication controllers
+ and services.
+ type: object
+ runtimeClass:
+ default: nvidia
+ type: string
+ use_ocp_driver_toolkit:
+ description: UseOpenShiftDriverToolkit indicates if DriverToolkit
+ image should be used on OpenShift to build and install driver
+ modules
+ type: boolean
+ required:
+ - defaultRuntime
+ type: object
+ psa:
+ description: PSA defines spec for PodSecurityAdmission configuration
+ properties:
+ enabled:
+ description: Enabled indicates if PodSecurityAdmission configuration
+ needs to be enabled for all Pods
+ type: boolean
+ type: object
+ psp:
+ description: |-
+ Deprecated: Pod Security Policies are no longer supported. Please use PodSecurityAdmission instead
+ PSP defines spec for handling PodSecurityPolicies
+ properties:
+ enabled:
+ description: Enabled indicates if PodSecurityPolicies needs to
+ be enabled for all Pods
+ type: boolean
+ type: object
+ sandboxDevicePlugin:
+ description: SandboxDevicePlugin component spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ enabled:
+ description: Enabled indicates if deployment of NVIDIA Sandbox
+ Device Plugin through operator is enabled
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: NVIDIA Sandbox Device Plugin image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: NVIDIA Sandbox Device Plugin image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: NVIDIA Sandbox Device Plugin image tag
+ type: string
+ type: object
+ sandboxWorkloads:
+ description: SandboxWorkloads defines the spec for handling sandbox
+ workloads (i.e. Virtual Machines)
+ properties:
+ defaultWorkload:
+ default: container
+ description: |-
+ DefaultWorkload indicates the default GPU workload type to configure
+ worker nodes in the cluster for
+ enum:
+ - container
+ - vm-passthrough
+ - vm-vgpu
+ type: string
+ enabled:
+ description: |-
+ Enabled indicates if the GPU Operator should manage additional operands required
+ for sandbox workloads (i.e. VFIO Manager, vGPU Manager, and additional device plugins)
+ type: boolean
+ type: object
+ toolkit:
+ description: Toolkit component spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ enabled:
+ description: Enabled indicates if deployment of NVIDIA Container
+ Toolkit through operator is enabled
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: NVIDIA Container Toolkit image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ installDir:
+ default: /usr/local/nvidia
+ description: Toolkit install directory on the host
+ type: string
+ repository:
+ description: NVIDIA Container Toolkit image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: NVIDIA Container Toolkit image tag
+ type: string
+ type: object
+ validator:
+ description: Validator defines the spec for operator-validator daemonset
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ cuda:
+ description: CUDA validator spec
+ properties:
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ type: object
+ driver:
+ description: Toolkit validator spec
+ properties:
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ type: object
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: Validator image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ plugin:
+ description: Plugin validator spec
+ properties:
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ type: object
+ repository:
+ description: Validator image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ toolkit:
+ description: Toolkit validator spec
+ properties:
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ type: object
+ version:
+ description: Validator image tag
+ type: string
+ vfioPCI:
+ description: VfioPCI validator spec
+ properties:
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ type: object
+ vgpuDevices:
+ description: VGPUDevices validator spec
+ properties:
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ type: object
+ vgpuManager:
+ description: VGPUManager validator spec
+ properties:
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ type: object
+ type: object
+ vfioManager:
+ description: VFIOManager for configuration to deploy VFIO-PCI Manager
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ driverManager:
+ description: DriverManager represents configuration for NVIDIA
+ Driver Manager
+ properties:
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: Image represents NVIDIA Driver Manager image
+ name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: Repository represents Driver Managerrepository
+ path
+ type: string
+ version:
+ description: Version represents NVIDIA Driver Manager image
+ tag(version)
+ type: string
+ type: object
+ enabled:
+ description: Enabled indicates if deployment of VFIO Manager is
+ enabled
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: VFIO Manager image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: VFIO Manager image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: VFIO Manager image tag
+ type: string
+ type: object
+ vgpuDeviceManager:
+ description: VGPUDeviceManager spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ config:
+ description: NVIDIA vGPU devices configuration for NVIDIA vGPU
+ Device Manager container
+ properties:
+ default:
+ default: default
+ description: Default config name within the ConfigMap
+ type: string
+ name:
+ description: ConfigMap name
+ type: string
+ type: object
+ enabled:
+ description: Enabled indicates if deployment of NVIDIA vGPU Device
+ Manager is enabled
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: NVIDIA vGPU Device Manager image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: NVIDIA vGPU Device Manager image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: NVIDIA vGPU Device Manager image tag
+ type: string
+ type: object
+ vgpuManager:
+ description: VGPUManager component spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ driverManager:
+ description: DriverManager represents configuration for NVIDIA
+ Driver Manager initContainer
+ properties:
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: Image represents NVIDIA Driver Manager image
+ name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: Repository represents Driver Managerrepository
+ path
+ type: string
+ version:
+ description: Version represents NVIDIA Driver Manager image
+ tag(version)
+ type: string
+ type: object
+ enabled:
+ description: Enabled indicates if deployment of NVIDIA vGPU Manager
+ through operator is enabled
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: NVIDIA vGPU Manager image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: NVIDIA vGPU Manager image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: NVIDIA vGPU Manager image tag
+ type: string
+ type: object
+ required:
+ - daemonsets
+ - dcgm
+ - dcgmExporter
+ - devicePlugin
+ - driver
+ - gfd
+ - nodeStatusExporter
+ - operator
+ - toolkit
+ type: object
+ status:
+ description: ClusterPolicyStatus defines the observed state of ClusterPolicy
+ properties:
+ conditions:
+ description: Conditions is a list of conditions representing the ClusterPolicy's
+ current state.
+ items:
+ description: Condition contains details for one aspect of the current
+ state of this API Resource.
+ properties:
+ lastTransitionTime:
+ description: |-
+ lastTransitionTime is the last time the condition transitioned from one status to another.
+ This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable.
+ format: date-time
+ type: string
+ message:
+ description: |-
+ message is a human readable message indicating details about the transition.
+ This may be an empty string.
+ maxLength: 32768
+ type: string
+ observedGeneration:
+ description: |-
+ observedGeneration represents the .metadata.generation that the condition was set based upon.
+ For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
+ with respect to the current state of the instance.
+ format: int64
+ minimum: 0
+ type: integer
+ reason:
+ description: |-
+ reason contains a programmatic identifier indicating the reason for the condition's last transition.
+ Producers of specific condition types may define expected values and meanings for this field,
+ and whether the values are considered a guaranteed API.
+ The value should be a CamelCase string.
+ This field may not be empty.
+ maxLength: 1024
+ minLength: 1
+ pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
+ type: string
+ status:
+ description: status of the condition, one of True, False, Unknown.
+ enum:
+ - "True"
+ - "False"
+ - Unknown
+ type: string
+ type:
+ description: type of condition in CamelCase or in foo.example.com/CamelCase.
+ maxLength: 316
+ pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
+ type: string
+ required:
+ - lastTransitionTime
+ - message
+ - reason
+ - status
+ - type
+ type: object
+ type: array
+ namespace:
+ description: Namespace indicates a namespace in which the operator
+ is installed
+ type: string
+ state:
+ description: State indicates status of ClusterPolicy
+ enum:
+ - ignored
+ - ready
+ - notReady
+ type: string
+ required:
+ - state
+ type: object
+ type: object
+ served: true
+ storage: true
+ subresources:
+ status: {}
diff --git a/bundle/v24.9.0/manifests/nvidia.com_nvidiadrivers.yaml b/bundle/v24.9.0/manifests/nvidia.com_nvidiadrivers.yaml
new file mode 100644
index 000000000..072155768
--- /dev/null
+++ b/bundle/v24.9.0/manifests/nvidia.com_nvidiadrivers.yaml
@@ -0,0 +1,797 @@
+---
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+ annotations:
+ controller-gen.kubebuilder.io/version: v0.16.4
+ name: nvidiadrivers.nvidia.com
+spec:
+ group: nvidia.com
+ names:
+ kind: NVIDIADriver
+ listKind: NVIDIADriverList
+ plural: nvidiadrivers
+ shortNames:
+ - nvd
+ - nvdriver
+ - nvdrivers
+ singular: nvidiadriver
+ scope: Cluster
+ versions:
+ - additionalPrinterColumns:
+ - jsonPath: .status.state
+ name: Status
+ type: string
+ - jsonPath: .metadata.creationTimestamp
+ name: Age
+ type: string
+ name: v1alpha1
+ schema:
+ openAPIV3Schema:
+ description: NVIDIADriver is the Schema for the nvidiadrivers API
+ properties:
+ apiVersion:
+ description: |-
+ APIVersion defines the versioned schema of this representation of an object.
+ Servers should convert recognized schemas to the latest internal value, and
+ may reject unrecognized values.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+ type: string
+ kind:
+ description: |-
+ Kind is a string value representing the REST resource this object represents.
+ Servers may infer this from the endpoint the client submits requests to.
+ Cannot be updated.
+ In CamelCase.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+ type: string
+ metadata:
+ type: object
+ spec:
+ description: NVIDIADriverSpec defines the desired state of NVIDIADriver
+ properties:
+ annotations:
+ additionalProperties:
+ type: string
+ description: |-
+ Optional: Annotations is an unstructured key value map stored with a resource that may be
+ set by external tools to store and retrieve arbitrary metadata. They are not
+ queryable and should be preserved when modifying objects.
+ type: object
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ certConfig:
+ description: 'Optional: Custom certificates configuration for NVIDIA
+ Driver container'
+ properties:
+ name:
+ type: string
+ type: object
+ driverType:
+ default: gpu
+ description: DriverType defines NVIDIA driver type
+ enum:
+ - gpu
+ - vgpu
+ - vgpu-host-manager
+ type: string
+ x-kubernetes-validations:
+ - message: driverType is an immutable field. Please create a new NvidiaDriver
+ resource instead when you want to change this setting.
+ rule: self == oldSelf
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present in
+ a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ gdrcopy:
+ description: GDRCopy defines the spec for GDRCopy driver
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ enabled:
+ description: Enabled indicates if GDRCopy is enabled through GPU
+ operator
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: GDRCopy driver image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: GDRCopy diver image repository
+ type: string
+ version:
+ description: GDRCopy driver image tag
+ type: string
+ type: object
+ gds:
+ description: GPUDirectStorage defines the spec for GDS driver
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ enabled:
+ description: Enabled indicates if GPUDirect Storage is enabled
+ through GPU operator
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: NVIDIA GPUDirect Storage Driver image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: NVIDIA GPUDirect Storage Driver image repository
+ type: string
+ version:
+ description: NVIDIA GPUDirect Storage Driver image tag
+ type: string
+ type: object
+ image:
+ default: nvcr.io/nvidia/driver
+ description: NVIDIA Driver container image name
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ kernelModuleConfig:
+ description: 'Optional: Kernel module configuration parameters for
+ the NVIDIA Driver'
+ properties:
+ name:
+ type: string
+ type: object
+ labels:
+ additionalProperties:
+ type: string
+ description: |-
+ Optional: Map of string keys and values that can be used to organize and categorize
+ (scope and select) objects. May match selectors of replication controllers
+ and services.
+ type: object
+ licensingConfig:
+ description: 'Optional: Licensing configuration for NVIDIA vGPU licensing'
+ properties:
+ name:
+ type: string
+ nlsEnabled:
+ description: NLSEnabled indicates if NVIDIA Licensing System is
+ used for licensing.
+ type: boolean
+ type: object
+ livenessProbe:
+ description: NVIDIA Driver container liveness probe settings
+ properties:
+ failureThreshold:
+ description: |-
+ Minimum consecutive failures for the probe to be considered failed after having succeeded.
+ Defaults to 3. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ initialDelaySeconds:
+ description: |-
+ Number of seconds after the container has started before liveness probes are initiated.
+ More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+ format: int32
+ type: integer
+ periodSeconds:
+ description: |-
+ How often (in seconds) to perform the probe.
+ Default to 10 seconds. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ successThreshold:
+ description: |-
+ Minimum consecutive successes for the probe to be considered successful after having failed.
+ Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ timeoutSeconds:
+ description: |-
+ Number of seconds after which the probe times out.
+ Defaults to 1 second. Minimum value is 1.
+ More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+ format: int32
+ minimum: 1
+ type: integer
+ type: object
+ manager:
+ description: Manager represents configuration for NVIDIA Driver Manager
+ initContainer
+ properties:
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: Image represents NVIDIA Driver Manager image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: Repository represents Driver Managerrepository path
+ type: string
+ version:
+ description: Version represents NVIDIA Driver Manager image tag(version)
+ type: string
+ type: object
+ nodeAffinity:
+ description: Affinity specifies node affinity rules for driver pods
+ properties:
+ preferredDuringSchedulingIgnoredDuringExecution:
+ description: |-
+ The scheduler will prefer to schedule pods to nodes that satisfy
+ the affinity expressions specified by this field, but it may choose
+ a node that violates one or more of the expressions. The node that is
+ most preferred is the one with the greatest sum of weights, i.e.
+ for each node that meets all of the scheduling requirements (resource
+ request, requiredDuringScheduling affinity expressions, etc.),
+ compute a sum by iterating through the elements of this field and adding
+ "weight" to the sum if the node matches the corresponding matchExpressions; the
+ node(s) with the highest sum are the most preferred.
+ items:
+ description: |-
+ An empty preferred scheduling term matches all objects with implicit weight 0
+ (i.e. it's a no-op). A null preferred scheduling term matches no objects (i.e. is also a no-op).
+ properties:
+ preference:
+ description: A node selector term, associated with the corresponding
+ weight.
+ properties:
+ matchExpressions:
+ description: A list of node selector requirements by
+ node's labels.
+ items:
+ description: |-
+ A node selector requirement is a selector that contains values, a key, and an operator
+ that relates the key and values.
+ properties:
+ key:
+ description: The label key that the selector applies
+ to.
+ type: string
+ operator:
+ description: |-
+ Represents a key's relationship to a set of values.
+ Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt.
+ type: string
+ values:
+ description: |-
+ An array of string values. If the operator is In or NotIn,
+ the values array must be non-empty. If the operator is Exists or DoesNotExist,
+ the values array must be empty. If the operator is Gt or Lt, the values
+ array must have a single element, which will be interpreted as an integer.
+ This array is replaced during a strategic merge patch.
+ items:
+ type: string
+ type: array
+ x-kubernetes-list-type: atomic
+ required:
+ - key
+ - operator
+ type: object
+ type: array
+ x-kubernetes-list-type: atomic
+ matchFields:
+ description: A list of node selector requirements by
+ node's fields.
+ items:
+ description: |-
+ A node selector requirement is a selector that contains values, a key, and an operator
+ that relates the key and values.
+ properties:
+ key:
+ description: The label key that the selector applies
+ to.
+ type: string
+ operator:
+ description: |-
+ Represents a key's relationship to a set of values.
+ Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt.
+ type: string
+ values:
+ description: |-
+ An array of string values. If the operator is In or NotIn,
+ the values array must be non-empty. If the operator is Exists or DoesNotExist,
+ the values array must be empty. If the operator is Gt or Lt, the values
+ array must have a single element, which will be interpreted as an integer.
+ This array is replaced during a strategic merge patch.
+ items:
+ type: string
+ type: array
+ x-kubernetes-list-type: atomic
+ required:
+ - key
+ - operator
+ type: object
+ type: array
+ x-kubernetes-list-type: atomic
+ type: object
+ x-kubernetes-map-type: atomic
+ weight:
+ description: Weight associated with matching the corresponding
+ nodeSelectorTerm, in the range 1-100.
+ format: int32
+ type: integer
+ required:
+ - preference
+ - weight
+ type: object
+ type: array
+ x-kubernetes-list-type: atomic
+ requiredDuringSchedulingIgnoredDuringExecution:
+ description: |-
+ If the affinity requirements specified by this field are not met at
+ scheduling time, the pod will not be scheduled onto the node.
+ If the affinity requirements specified by this field cease to be met
+ at some point during pod execution (e.g. due to an update), the system
+ may or may not try to eventually evict the pod from its node.
+ properties:
+ nodeSelectorTerms:
+ description: Required. A list of node selector terms. The
+ terms are ORed.
+ items:
+ description: |-
+ A null or empty node selector term matches no objects. The requirements of
+ them are ANDed.
+ The TopologySelectorTerm type implements a subset of the NodeSelectorTerm.
+ properties:
+ matchExpressions:
+ description: A list of node selector requirements by
+ node's labels.
+ items:
+ description: |-
+ A node selector requirement is a selector that contains values, a key, and an operator
+ that relates the key and values.
+ properties:
+ key:
+ description: The label key that the selector applies
+ to.
+ type: string
+ operator:
+ description: |-
+ Represents a key's relationship to a set of values.
+ Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt.
+ type: string
+ values:
+ description: |-
+ An array of string values. If the operator is In or NotIn,
+ the values array must be non-empty. If the operator is Exists or DoesNotExist,
+ the values array must be empty. If the operator is Gt or Lt, the values
+ array must have a single element, which will be interpreted as an integer.
+ This array is replaced during a strategic merge patch.
+ items:
+ type: string
+ type: array
+ x-kubernetes-list-type: atomic
+ required:
+ - key
+ - operator
+ type: object
+ type: array
+ x-kubernetes-list-type: atomic
+ matchFields:
+ description: A list of node selector requirements by
+ node's fields.
+ items:
+ description: |-
+ A node selector requirement is a selector that contains values, a key, and an operator
+ that relates the key and values.
+ properties:
+ key:
+ description: The label key that the selector applies
+ to.
+ type: string
+ operator:
+ description: |-
+ Represents a key's relationship to a set of values.
+ Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt.
+ type: string
+ values:
+ description: |-
+ An array of string values. If the operator is In or NotIn,
+ the values array must be non-empty. If the operator is Exists or DoesNotExist,
+ the values array must be empty. If the operator is Gt or Lt, the values
+ array must have a single element, which will be interpreted as an integer.
+ This array is replaced during a strategic merge patch.
+ items:
+ type: string
+ type: array
+ x-kubernetes-list-type: atomic
+ required:
+ - key
+ - operator
+ type: object
+ type: array
+ x-kubernetes-list-type: atomic
+ type: object
+ x-kubernetes-map-type: atomic
+ type: array
+ x-kubernetes-list-type: atomic
+ required:
+ - nodeSelectorTerms
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ nodeSelector:
+ additionalProperties:
+ type: string
+ description: NodeSelector specifies a selector for installation of
+ NVIDIA driver
+ type: object
+ priorityClassName:
+ description: 'Optional: Set priorityClassName'
+ type: string
+ rdma:
+ description: GPUDirectRDMA defines the spec for NVIDIA Peer Memory
+ driver
+ properties:
+ enabled:
+ description: Enabled indicates if GPUDirect RDMA is enabled through
+ GPU operator
+ type: boolean
+ useHostMofed:
+ description: UseHostMOFED indicates to use MOFED drivers directly
+ installed on the host to enable GPUDirect RDMA
+ type: boolean
+ type: object
+ readinessProbe:
+ description: NVIDIA Driver container readiness probe settings
+ properties:
+ failureThreshold:
+ description: |-
+ Minimum consecutive failures for the probe to be considered failed after having succeeded.
+ Defaults to 3. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ initialDelaySeconds:
+ description: |-
+ Number of seconds after the container has started before liveness probes are initiated.
+ More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+ format: int32
+ type: integer
+ periodSeconds:
+ description: |-
+ How often (in seconds) to perform the probe.
+ Default to 10 seconds. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ successThreshold:
+ description: |-
+ Minimum consecutive successes for the probe to be considered successful after having failed.
+ Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ timeoutSeconds:
+ description: |-
+ Number of seconds after which the probe times out.
+ Defaults to 1 second. Minimum value is 1.
+ More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+ format: int32
+ minimum: 1
+ type: integer
+ type: object
+ repoConfig:
+ description: 'Optional: Custom repo configuration for NVIDIA Driver
+ container'
+ properties:
+ name:
+ type: string
+ type: object
+ repository:
+ description: NVIDIA Driver repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for each
+ pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ startupProbe:
+ description: NVIDIA Driver container startup probe settings
+ properties:
+ failureThreshold:
+ description: |-
+ Minimum consecutive failures for the probe to be considered failed after having succeeded.
+ Defaults to 3. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ initialDelaySeconds:
+ description: |-
+ Number of seconds after the container has started before liveness probes are initiated.
+ More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+ format: int32
+ type: integer
+ periodSeconds:
+ description: |-
+ How often (in seconds) to perform the probe.
+ Default to 10 seconds. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ successThreshold:
+ description: |-
+ Minimum consecutive successes for the probe to be considered successful after having failed.
+ Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ timeoutSeconds:
+ description: |-
+ Number of seconds after which the probe times out.
+ Defaults to 1 second. Minimum value is 1.
+ More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+ format: int32
+ minimum: 1
+ type: integer
+ type: object
+ tolerations:
+ description: 'Optional: Set tolerations'
+ items:
+ description: |-
+ The pod this Toleration is attached to tolerates any taint that matches
+ the triple using the matching operator .
+ properties:
+ effect:
+ description: |-
+ Effect indicates the taint effect to match. Empty means match all taint effects.
+ When specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute.
+ type: string
+ key:
+ description: |-
+ Key is the taint key that the toleration applies to. Empty means match all taint keys.
+ If the key is empty, operator must be Exists; this combination means to match all values and all keys.
+ type: string
+ operator:
+ description: |-
+ Operator represents a key's relationship to the value.
+ Valid operators are Exists and Equal. Defaults to Equal.
+ Exists is equivalent to wildcard for value, so that a pod can
+ tolerate all taints of a particular category.
+ type: string
+ tolerationSeconds:
+ description: |-
+ TolerationSeconds represents the period of time the toleration (which must be
+ of effect NoExecute, otherwise this field is ignored) tolerates the taint. By default,
+ it is not set, which means tolerate the taint forever (do not evict). Zero and
+ negative values will be treated as 0 (evict immediately) by the system.
+ format: int64
+ type: integer
+ value:
+ description: |-
+ Value is the taint value the toleration matches to.
+ If the operator is Exists, the value should be empty, otherwise just a regular string.
+ type: string
+ type: object
+ type: array
+ useOpenKernelModules:
+ description: UseOpenKernelModules indicates if the open GPU kernel
+ modules should be used
+ type: boolean
+ usePrecompiled:
+ description: UsePrecompiled indicates if deployment of NVIDIA Driver
+ using pre-compiled modules is enabled
+ type: boolean
+ x-kubernetes-validations:
+ - message: usePrecompiled is an immutable field. Please create a new
+ NvidiaDriver resource instead when you want to change this setting.
+ rule: self == oldSelf
+ version:
+ description: NVIDIA Driver version (or just branch for precompiled
+ drivers)
+ type: string
+ virtualTopologyConfig:
+ description: 'Optional: Virtual Topology Daemon configuration for
+ NVIDIA vGPU drivers'
+ properties:
+ name:
+ description: 'Optional: Config name representing virtual topology
+ daemon configuration file nvidia-topologyd.conf'
+ type: string
+ type: object
+ required:
+ - driverType
+ - image
+ type: object
+ status:
+ description: NVIDIADriverStatus defines the observed state of NVIDIADriver
+ properties:
+ conditions:
+ description: Conditions is a list of conditions representing the NVIDIADriver's
+ current state.
+ items:
+ description: Condition contains details for one aspect of the current
+ state of this API Resource.
+ properties:
+ lastTransitionTime:
+ description: |-
+ lastTransitionTime is the last time the condition transitioned from one status to another.
+ This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable.
+ format: date-time
+ type: string
+ message:
+ description: |-
+ message is a human readable message indicating details about the transition.
+ This may be an empty string.
+ maxLength: 32768
+ type: string
+ observedGeneration:
+ description: |-
+ observedGeneration represents the .metadata.generation that the condition was set based upon.
+ For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
+ with respect to the current state of the instance.
+ format: int64
+ minimum: 0
+ type: integer
+ reason:
+ description: |-
+ reason contains a programmatic identifier indicating the reason for the condition's last transition.
+ Producers of specific condition types may define expected values and meanings for this field,
+ and whether the values are considered a guaranteed API.
+ The value should be a CamelCase string.
+ This field may not be empty.
+ maxLength: 1024
+ minLength: 1
+ pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
+ type: string
+ status:
+ description: status of the condition, one of True, False, Unknown.
+ enum:
+ - "True"
+ - "False"
+ - Unknown
+ type: string
+ type:
+ description: type of condition in CamelCase or in foo.example.com/CamelCase.
+ maxLength: 316
+ pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
+ type: string
+ required:
+ - lastTransitionTime
+ - message
+ - reason
+ - status
+ - type
+ type: object
+ type: array
+ namespace:
+ description: Namespace indicates a namespace in which the operator
+ and driver are installed
+ type: string
+ state:
+ description: |-
+ INSERT ADDITIONAL STATUS FIELD - define observed state of cluster
+ Important: Run "make" to regenerate code after modifying this file
+ State indicates status of NVIDIADriver instance
+ enum:
+ - ignored
+ - ready
+ - notReady
+ type: string
+ required:
+ - state
+ type: object
+ type: object
+ served: true
+ storage: true
+ subresources:
+ status: {}
diff --git a/bundle/v24.9.0/metadata/annotations.yaml b/bundle/v24.9.0/metadata/annotations.yaml
new file mode 100644
index 000000000..50d49a56b
--- /dev/null
+++ b/bundle/v24.9.0/metadata/annotations.yaml
@@ -0,0 +1,17 @@
+annotations:
+ operators.operatorframework.io.bundle.channels.v1: stable,v24.9
+ operators.operatorframework.io.bundle.channel.default.v1: v24.9
+ operators.operatorframework.io.bundle.manifests.v1: manifests/
+ operators.operatorframework.io.bundle.mediatype.v1: registry+v1
+ operators.operatorframework.io.bundle.metadata.v1: metadata/
+ operators.operatorframework.io.bundle.package.v1: gpu-operator-certified
+ operators.operatorframework.io.metrics.builder: operator-sdk-v1.4.0
+ operators.operatorframework.io.metrics.mediatype.v1: metrics+v1
+ operators.operatorframework.io.metrics.project_layout: go.kubebuilder.io/v3
+ operators.operatorframework.io.test.config.v1: tests/scorecard/
+ operators.operatorframework.io.test.mediatype.v1: scorecard+v1
+ operatorframework.io/cluster-monitoring: "true"
+ operatorframework.io/suggested-namespace: nvidia-gpu-operator
+
+ # Annotations to specify OCP versions compatibility.
+ com.redhat.openshift.versions: v4.12-v4.17
diff --git a/cmd/gpu-operator/main.go b/cmd/gpu-operator/main.go
index 4f55e46ec..076362c12 100644
--- a/cmd/gpu-operator/main.go
+++ b/cmd/gpu-operator/main.go
@@ -27,7 +27,12 @@ import (
// to ensure that exec-entrypoint and run can make use of them.
"go.uber.org/zap/zapcore"
_ "k8s.io/client-go/plugin/pkg/client/auth"
+ "sigs.k8s.io/controller-runtime/pkg/cache"
+ apiconfigv1 "github.com/openshift/api/config/v1"
+ apiimagev1 "github.com/openshift/api/image/v1"
+ secv1 "github.com/openshift/api/security/v1"
+ promv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
corev1 "k8s.io/api/core/v1"
apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
"k8s.io/apimachinery/pkg/runtime"
@@ -41,10 +46,11 @@ import (
"github.com/NVIDIA/k8s-operator-libs/pkg/upgrade"
- clusterpolicyv1 "github.com/NVIDIA/gpu-operator/api/v1"
- nvidiav1alpha1 "github.com/NVIDIA/gpu-operator/api/v1alpha1"
+ clusterpolicyv1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1"
+ nvidiav1alpha1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1alpha1"
"github.com/NVIDIA/gpu-operator/controllers"
"github.com/NVIDIA/gpu-operator/controllers/clusterinfo"
+ "github.com/NVIDIA/gpu-operator/internal/consts"
"github.com/NVIDIA/gpu-operator/internal/info"
// +kubebuilder:scaffold:imports
)
@@ -56,11 +62,13 @@ var (
func init() {
utilruntime.Must(clientgoscheme.AddToScheme(scheme))
-
utilruntime.Must(clusterpolicyv1.AddToScheme(scheme))
utilruntime.Must(apiextensionsv1.AddToScheme(scheme))
utilruntime.Must(nvidiav1alpha1.AddToScheme(scheme))
- // +kubebuilder:scaffold:scheme
+ utilruntime.Must(promv1.AddToScheme(scheme))
+ utilruntime.Must(secv1.Install(scheme))
+ utilruntime.Must(apiconfigv1.Install(scheme))
+ utilruntime.Must(apiimagev1.Install(scheme))
}
func main() {
@@ -98,6 +106,16 @@ func main() {
Port: 9443,
})
+ operatorNamespace := os.Getenv("OPERATOR_NAMESPACE")
+ openshiftNamespace := consts.OpenshiftNamespace
+ cacheOptions := cache.Options{
+ DefaultNamespaces: map[string]cache.Config{
+ operatorNamespace: {},
+ // Also cache resources in the openshift namespace to retrieve ImageStreams when on an openshift cluster
+ openshiftNamespace: {},
+ },
+ }
+
options := ctrl.Options{
Scheme: scheme,
Metrics: metricsOptions,
@@ -105,6 +123,7 @@ func main() {
LeaderElection: enableLeaderElection,
LeaderElectionID: "53822513.nvidia.com",
WebhookServer: webhookServer,
+ Cache: cacheOptions,
}
if enableLeaderElection && int(renewDeadline) != 0 {
diff --git a/cmd/gpuop-cfg/validate/clusterpolicy/clusterpolicy.go b/cmd/gpuop-cfg/validate/clusterpolicy/clusterpolicy.go
index c82e9794c..75c44f7bd 100644
--- a/cmd/gpuop-cfg/validate/clusterpolicy/clusterpolicy.go
+++ b/cmd/gpuop-cfg/validate/clusterpolicy/clusterpolicy.go
@@ -25,7 +25,7 @@ import (
"github.com/urfave/cli/v2"
"sigs.k8s.io/yaml"
- v1 "github.com/NVIDIA/gpu-operator/api/v1"
+ v1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1"
)
type command struct {
diff --git a/cmd/gpuop-cfg/validate/clusterpolicy/images.go b/cmd/gpuop-cfg/validate/clusterpolicy/images.go
index 4f315b95c..1383a6c50 100644
--- a/cmd/gpuop-cfg/validate/clusterpolicy/images.go
+++ b/cmd/gpuop-cfg/validate/clusterpolicy/images.go
@@ -23,7 +23,7 @@ import (
"github.com/regclient/regclient"
"github.com/regclient/regclient/types/ref"
- v1 "github.com/NVIDIA/gpu-operator/api/v1"
+ v1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1"
)
var client = regclient.New()
diff --git a/cmd/gpuop-cfg/validate/csv/alm-examples.go b/cmd/gpuop-cfg/validate/csv/alm-examples.go
index feec0bfda..d853a470f 100644
--- a/cmd/gpuop-cfg/validate/csv/alm-examples.go
+++ b/cmd/gpuop-cfg/validate/csv/alm-examples.go
@@ -20,15 +20,15 @@ import (
"fmt"
"github.com/operator-framework/api/pkg/operators/v1alpha1"
- "sigs.k8s.io/json"
+ "k8s.io/apimachinery/pkg/util/json"
- v1 "github.com/NVIDIA/gpu-operator/api/v1"
+ v1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1"
)
func validateALMExample(csv *v1alpha1.ClusterServiceVersion) error {
cpList := []v1.ClusterPolicy{}
example := csv.Annotations["alm-examples"]
- err := json.UnmarshalCaseSensitivePreserveInts([]byte(example), &cpList)
+ err := json.Unmarshal([]byte(example), &cpList)
if err != nil {
return err
}
diff --git a/config/crd/bases/nvidia.com_clusterpolicies.yaml b/config/crd/bases/nvidia.com_clusterpolicies.yaml
index 16e35bf4b..54e4a652b 100644
--- a/config/crd/bases/nvidia.com_clusterpolicies.yaml
+++ b/config/crd/bases/nvidia.com_clusterpolicies.yaml
@@ -3,7 +3,7 @@ apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
- controller-gen.kubebuilder.io/version: v0.14.0
+ controller-gen.kubebuilder.io/version: v0.16.5
name: clusterpolicies.nvidia.com
spec:
group: nvidia.com
@@ -248,8 +248,8 @@ spec:
type: object
type: array
hostPort:
- description: 'HostPort represents host port that needs to be bound
- for DCGM engine (Default: 5555)'
+ description: 'Deprecated: HostPort represents host port that needs
+ to be bound for DCGM engine (Default: 5555)'
format: int32
type: integer
image:
@@ -410,15 +410,20 @@ spec:
sets for NVIDIA DCGM Exporter
items:
description: |-
- RelabelConfig allows dynamic rewriting of the label set, being applied to samples before ingestion.
- It defines ``-section of Prometheus configuration.
- More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs
+ RelabelConfig allows dynamic rewriting of the label set for targets, alerts,
+ scraped samples and remote write samples.
+
+ More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config
properties:
action:
default: replace
description: |-
- Action to perform based on regex matching. Default is 'replace'.
- uppercase and lowercase actions require Prometheus >= 2.36.
+ Action to perform based on the regex matching.
+
+ `Uppercase` and `Lowercase` actions require Prometheus >= v2.36.0.
+ `DropEqual` and `KeepEqual` actions require Prometheus >= v2.41.0.
+
+ Default: "Replace"
enum:
- replace
- Replace
@@ -444,39 +449,47 @@ spec:
- DropEqual
type: string
modulus:
- description: Modulus to take of the hash of the source
- label values.
+ description: |-
+ Modulus to take of the hash of the source label values.
+
+ Only applicable when the action is `HashMod`.
format: int64
type: integer
regex:
description: Regular expression against which the extracted
- value is matched. Default is '(.*)'
+ value is matched.
type: string
replacement:
description: |-
- Replacement value against which a regex replace is performed if the
- regular expression matches. Regex capture groups are available. Default is '$1'
+ Replacement value against which a Replace action is performed if the
+ regular expression matches.
+
+ Regex capture groups are available.
type: string
separator:
- description: Separator placed between concatenated source
- label values. default is ';'.
+ description: Separator is the string between concatenated
+ SourceLabels.
type: string
sourceLabels:
description: |-
- The source labels select values from existing labels. Their content is concatenated
- using the configured separator and matched against the configured regular expression
- for the replace, keep, and drop actions.
+ The source labels select values from existing labels. Their content is
+ concatenated using the configured Separator and matched against the
+ configured regular expression.
items:
- description: LabelName is a valid Prometheus label
- name which may only contain ASCII letters, numbers,
- as well as underscores.
+ description: |-
+ LabelName is a valid Prometheus label name which may only contain ASCII
+ letters, numbers, as well as underscores.
pattern: ^[a-zA-Z_][a-zA-Z0-9_]*$
type: string
type: array
targetLabel:
description: |-
- Label to which the resulting value is written in a replace action.
- It is mandatory for replace actions. Regex capture groups are available.
+ Label to which the resulting string is written in a replacement.
+
+ It is mandatory for `Replace`, `HashMod`, `Lowercase`, `Uppercase`,
+ `KeepEqual` and `DropEqual` actions.
+
+ Regex capture groups are available.
type: string
type: object
type: array
@@ -538,6 +551,15 @@ spec:
items:
type: string
type: array
+ mps:
+ description: 'Optional: MPS related configuration for the NVIDIA
+ Device Plugin'
+ properties:
+ root:
+ default: /run/nvidia/mps
+ description: Root defines the MPS root path on the host
+ type: string
+ type: object
repository:
description: NVIDIA Device Plugin image repository
type: string
@@ -926,9 +948,9 @@ spec:
type: boolean
timeoutSeconds:
default: 300
- description: TimeoutSecond specifies the length of time
- in seconds to wait before giving up on pod termination,
- zero means infinite
+ description: |-
+ TimeoutSecond specifies the length of time in seconds to wait before giving up on pod termination, zero means
+ infinite
minimum: 0
type: integer
type: object
@@ -944,9 +966,9 @@ spec:
type: string
timeoutSeconds:
default: 0
- description: TimeoutSecond specifies the length of time
- in seconds to wait before giving up on pod termination,
- zero means infinite
+ description: |-
+ TimeoutSecond specifies the length of time in seconds to wait before giving up on pod termination, zero means
+ infinite
minimum: 0
type: integer
type: object
@@ -1146,6 +1168,24 @@ spec:
description: GFD image tag
type: string
type: object
+ hostPaths:
+ description: HostPaths defines various paths on the host needed by
+ GPU Operator components
+ properties:
+ driverInstallDir:
+ description: |-
+ DriverInstallDir represents the root at which driver files including libraries,
+ config files, and executables can be found.
+ type: string
+ rootFS:
+ description: |-
+ RootFS represents the path to the root filesystem of the host.
+ This is used by components that need to interact with the host filesystem
+ and as such this must be a chroot-able filesystem.
+ Examples include the MIG Manager and Toolkit Container which may need to
+ stop, start, or restart systemd services.
+ type: string
+ type: object
kataManager:
description: KataManager component spec
properties:
@@ -2269,16 +2309,8 @@ spec:
description: Conditions is a list of conditions representing the ClusterPolicy's
current state.
items:
- description: "Condition contains details for one aspect of the current
- state of this API Resource.\n---\nThis struct is intended for
- direct use as an array at the field path .status.conditions. For
- example,\n\n\n\ttype FooStatus struct{\n\t // Represents the
- observations of a foo's current state.\n\t // Known .status.conditions.type
- are: \"Available\", \"Progressing\", and \"Degraded\"\n\t //
- +patchMergeKey=type\n\t // +patchStrategy=merge\n\t // +listType=map\n\t
- \ // +listMapKey=type\n\t Conditions []metav1.Condition `json:\"conditions,omitempty\"
- patchStrategy:\"merge\" patchMergeKey:\"type\" protobuf:\"bytes,1,rep,name=conditions\"`\n\n\n\t
- \ // other fields\n\t}"
+ description: Condition contains details for one aspect of the current
+ state of this API Resource.
properties:
lastTransitionTime:
description: |-
@@ -2319,12 +2351,7 @@ spec:
- Unknown
type: string
type:
- description: |-
- type of condition in CamelCase or in foo.example.com/CamelCase.
- ---
- Many .condition.type values are consistent across resources like Available, but because arbitrary conditions can be
- useful (see .node.status.conditions), the ability to deconflict is important.
- The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt)
+ description: type of condition in CamelCase or in foo.example.com/CamelCase.
maxLength: 316
pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
type: string
diff --git a/config/crd/bases/nvidia.com_nvidiadrivers.yaml b/config/crd/bases/nvidia.com_nvidiadrivers.yaml
index 317972fd2..c49059a38 100644
--- a/config/crd/bases/nvidia.com_nvidiadrivers.yaml
+++ b/config/crd/bases/nvidia.com_nvidiadrivers.yaml
@@ -3,7 +3,7 @@ apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
- controller-gen.kubebuilder.io/version: v0.14.0
+ controller-gen.kubebuilder.io/version: v0.16.5
name: nvidiadrivers.nvidia.com
spec:
group: nvidia.com
@@ -357,11 +357,13 @@ spec:
items:
type: string
type: array
+ x-kubernetes-list-type: atomic
required:
- key
- operator
type: object
type: array
+ x-kubernetes-list-type: atomic
matchFields:
description: A list of node selector requirements by
node's fields.
@@ -389,11 +391,13 @@ spec:
items:
type: string
type: array
+ x-kubernetes-list-type: atomic
required:
- key
- operator
type: object
type: array
+ x-kubernetes-list-type: atomic
type: object
x-kubernetes-map-type: atomic
weight:
@@ -406,6 +410,7 @@ spec:
- weight
type: object
type: array
+ x-kubernetes-list-type: atomic
requiredDuringSchedulingIgnoredDuringExecution:
description: |-
If the affinity requirements specified by this field are not met at
@@ -450,11 +455,13 @@ spec:
items:
type: string
type: array
+ x-kubernetes-list-type: atomic
required:
- key
- operator
type: object
type: array
+ x-kubernetes-list-type: atomic
matchFields:
description: A list of node selector requirements by
node's fields.
@@ -482,14 +489,17 @@ spec:
items:
type: string
type: array
+ x-kubernetes-list-type: atomic
required:
- key
- operator
type: object
type: array
+ x-kubernetes-list-type: atomic
type: object
x-kubernetes-map-type: atomic
type: array
+ x-kubernetes-list-type: atomic
required:
- nodeSelectorTerms
type: object
@@ -709,16 +719,8 @@ spec:
description: Conditions is a list of conditions representing the NVIDIADriver's
current state.
items:
- description: "Condition contains details for one aspect of the current
- state of this API Resource.\n---\nThis struct is intended for
- direct use as an array at the field path .status.conditions. For
- example,\n\n\n\ttype FooStatus struct{\n\t // Represents the
- observations of a foo's current state.\n\t // Known .status.conditions.type
- are: \"Available\", \"Progressing\", and \"Degraded\"\n\t //
- +patchMergeKey=type\n\t // +patchStrategy=merge\n\t // +listType=map\n\t
- \ // +listMapKey=type\n\t Conditions []metav1.Condition `json:\"conditions,omitempty\"
- patchStrategy:\"merge\" patchMergeKey:\"type\" protobuf:\"bytes,1,rep,name=conditions\"`\n\n\n\t
- \ // other fields\n\t}"
+ description: Condition contains details for one aspect of the current
+ state of this API Resource.
properties:
lastTransitionTime:
description: |-
@@ -759,12 +761,7 @@ spec:
- Unknown
type: string
type:
- description: |-
- type of condition in CamelCase or in foo.example.com/CamelCase.
- ---
- Many .condition.type values are consistent across resources like Available, but because arbitrary conditions can be
- useful (see .node.status.conditions), the ability to deconflict is important.
- The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt)
+ description: type of condition in CamelCase or in foo.example.com/CamelCase.
maxLength: 316
pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
type: string
diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml
index a21a8607d..7a631b9e3 100644
--- a/config/rbac/role.yaml
+++ b/config/rbac/role.yaml
@@ -8,25 +8,14 @@ rules:
- ""
resources:
- configmaps
+ - endpoints
- events
+ - namespaces
- nodes
- persistentvolumeclaims
- - secrets
- verbs:
- - create
- - delete
- - get
- - list
- - patch
- - update
- - watch
-- apiGroups:
- - ""
- resources:
- - endpoints
- - namespaces
- pods
- pods/eviction
+ - secrets
- serviceaccounts
- services
- services/finalizers
@@ -38,22 +27,6 @@ rules:
- patch
- update
- watch
-- apiGroups:
- - ""
- resources:
- - nodes
- verbs:
- - get
- - list
- - patch
- - update
- - watch
-- apiGroups:
- - ""
- resources:
- - pods
- verbs:
- - list
- apiGroups:
- apiextensions.k8s.io
resources:
@@ -159,17 +132,6 @@ rules:
- nvidia.com
resources:
- '*'
- verbs:
- - create
- - delete
- - get
- - list
- - patch
- - update
- - watch
-- apiGroups:
- - nvidia.com
- resources:
- nvidiadrivers
verbs:
- create
diff --git a/config/samples/nvidia_v1alpha1_nvidiadriver.yaml b/config/samples/nvidia_v1alpha1_nvidiadriver.yaml
index c49b2156c..a200a0f61 100644
--- a/config/samples/nvidia_v1alpha1_nvidiadriver.yaml
+++ b/config/samples/nvidia_v1alpha1_nvidiadriver.yaml
@@ -8,7 +8,7 @@ spec:
driverType: gpu
repository: nvcr.io/nvidia
image: driver
- version: "535.154.05"
+ version: "550.127.08"
imagePullPolicy: IfNotPresent
imagePullSecrets: []
nodeSelector: {}
diff --git a/controllers/clusterinfo/clusterinfo.go b/controllers/clusterinfo/clusterinfo.go
index 8f0cec78c..b1de7fe43 100644
--- a/controllers/clusterinfo/clusterinfo.go
+++ b/controllers/clusterinfo/clusterinfo.go
@@ -341,7 +341,7 @@ func getOpenshiftDTKImages(ctx context.Context, c *rest.Config) map[string]strin
logger := log.FromContext(ctx)
name := "driver-toolkit"
- namespace := "openshift"
+ namespace := consts.OpenshiftNamespace
ocpImageClient, err := imagesv1.NewForConfig(c)
if err != nil {
diff --git a/controllers/clusterpolicy_controller.go b/controllers/clusterpolicy_controller.go
index 30b7dec0c..11064efa2 100644
--- a/controllers/clusterpolicy_controller.go
+++ b/controllers/clusterpolicy_controller.go
@@ -43,7 +43,7 @@ import (
"github.com/NVIDIA/k8s-operator-libs/pkg/consts"
- gpuv1 "github.com/NVIDIA/gpu-operator/api/v1"
+ gpuv1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1"
"github.com/NVIDIA/gpu-operator/internal/conditions"
)
@@ -253,10 +253,10 @@ func updateCRState(ctx context.Context, r *ClusterPolicyReconciler, namespacedNa
}
}
-func addWatchNewGPUNode(ctx context.Context, r *ClusterPolicyReconciler, c controller.Controller, mgr ctrl.Manager) error {
+func addWatchNewGPUNode(r *ClusterPolicyReconciler, c controller.Controller, mgr ctrl.Manager) error {
// Define a mapping from the Node object in the event to one or more
// ClusterPolicy objects to Reconcile
- mapFn := func(ctx context.Context, a client.Object) []reconcile.Request {
+ mapFn := func(ctx context.Context, n *corev1.Node) []reconcile.Request {
// find all the ClusterPolicy to trigger their reconciliation
opts := []client.ListOption{} // Namespace = "" to list across all namespaces.
list := &gpuv1.ClusterPolicyList{}
@@ -280,13 +280,13 @@ func addWatchNewGPUNode(ctx context.Context, r *ClusterPolicyReconciler, c contr
return cpToRec
}
- p := predicate.Funcs{
- CreateFunc: func(e event.CreateEvent) bool {
+ p := predicate.TypedFuncs[*corev1.Node]{
+ CreateFunc: func(e event.TypedCreateEvent[*corev1.Node]) bool {
labels := e.Object.GetLabels()
return hasGPULabels(labels)
},
- UpdateFunc: func(e event.UpdateEvent) bool {
+ UpdateFunc: func(e event.TypedUpdateEvent[*corev1.Node]) bool {
newLabels := e.ObjectNew.GetLabels()
oldLabels := e.ObjectOld.GetLabels()
nodeName := e.ObjectNew.GetName()
@@ -324,7 +324,7 @@ func addWatchNewGPUNode(ctx context.Context, r *ClusterPolicyReconciler, c contr
}
return needsUpdate
},
- DeleteFunc: func(e event.DeleteEvent) bool {
+ DeleteFunc: func(e event.TypedDeleteEvent[*corev1.Node]) bool {
// if an RHCOS GPU node is deleted, trigger a
// reconciliation to ensure that there is no dangling
// OpenShift Driver-Toolkit (RHCOS version-specific)
@@ -341,9 +341,12 @@ func addWatchNewGPUNode(ctx context.Context, r *ClusterPolicyReconciler, c contr
}
err := c.Watch(
- source.Kind(mgr.GetCache(), &corev1.Node{}),
- handler.EnqueueRequestsFromMapFunc(mapFn),
- p)
+ source.Kind(mgr.GetCache(),
+ &corev1.Node{},
+ handler.TypedEnqueueRequestsFromMapFunc[*corev1.Node](mapFn),
+ p,
+ ),
+ )
return err
}
@@ -351,7 +354,8 @@ func addWatchNewGPUNode(ctx context.Context, r *ClusterPolicyReconciler, c contr
// SetupWithManager sets up the controller with the Manager.
func (r *ClusterPolicyReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager) error {
// Create a new controller
- c, err := controller.New("clusterpolicy-controller", mgr, controller.Options{Reconciler: r, MaxConcurrentReconciles: 1, RateLimiter: workqueue.NewItemExponentialFailureRateLimiter(minDelayCR, maxDelayCR)})
+ c, err := controller.New("clusterpolicy-controller", mgr, controller.Options{Reconciler: r, MaxConcurrentReconciles: 1,
+ RateLimiter: workqueue.NewTypedItemExponentialFailureRateLimiter[reconcile.Request](minDelayCR, maxDelayCR)})
if err != nil {
return err
}
@@ -360,20 +364,32 @@ func (r *ClusterPolicyReconciler) SetupWithManager(ctx context.Context, mgr ctrl
r.conditionUpdater = conditions.NewClusterPolicyUpdater(mgr.GetClient())
// Watch for changes to primary resource ClusterPolicy
- err = c.Watch(source.Kind(mgr.GetCache(), &gpuv1.ClusterPolicy{}), &handler.EnqueueRequestForObject{}, predicate.GenerationChangedPredicate{})
+ err = c.Watch(source.Kind(
+ mgr.GetCache(),
+ &gpuv1.ClusterPolicy{},
+ &handler.TypedEnqueueRequestForObject[*gpuv1.ClusterPolicy]{},
+ predicate.TypedGenerationChangedPredicate[*gpuv1.ClusterPolicy]{},
+ ),
+ )
if err != nil {
return err
}
// Watch for changes to Node labels and requeue the owner ClusterPolicy
- err = addWatchNewGPUNode(ctx, r, c, mgr)
+ err = addWatchNewGPUNode(r, c, mgr)
if err != nil {
return err
}
// TODO(user): Modify this to be the types you create that are owned by the primary resource
// Watch for changes to secondary resource Daemonsets and requeue the owner ClusterPolicy
- err = c.Watch(source.Kind(mgr.GetCache(), &appsv1.DaemonSet{}), handler.EnqueueRequestForOwner(mgr.GetScheme(), mgr.GetRESTMapper(), &gpuv1.ClusterPolicy{}, handler.OnlyControllerOwner()))
+ err = c.Watch(
+ source.Kind(mgr.GetCache(),
+ &appsv1.DaemonSet{},
+ handler.TypedEnqueueRequestForOwner[*appsv1.DaemonSet](mgr.GetScheme(), mgr.GetRESTMapper(), &gpuv1.ClusterPolicy{},
+ handler.OnlyControllerOwner()),
+ ),
+ )
if err != nil {
return err
}
@@ -395,7 +411,7 @@ func (r *ClusterPolicyReconciler) SetupWithManager(ctx context.Context, mgr ctrl
if owner == nil {
return nil
}
- if owner.APIVersion != gpuv1.GroupVersion.String() || owner.Kind != "ClusterPolicy" {
+ if owner.APIVersion != gpuv1.SchemeGroupVersion.String() || owner.Kind != "ClusterPolicy" {
return nil
}
return []string{owner.Name}
diff --git a/controllers/nvidiadriver_controller.go b/controllers/nvidiadriver_controller.go
index 41d5f5930..90734b143 100644
--- a/controllers/nvidiadriver_controller.go
+++ b/controllers/nvidiadriver_controller.go
@@ -39,8 +39,8 @@ import (
"sigs.k8s.io/controller-runtime/pkg/reconcile"
"sigs.k8s.io/controller-runtime/pkg/source"
- gpuv1 "github.com/NVIDIA/gpu-operator/api/v1"
- nvidiav1alpha1 "github.com/NVIDIA/gpu-operator/api/v1alpha1"
+ gpuv1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1"
+ nvidiav1alpha1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1alpha1"
"github.com/NVIDIA/gpu-operator/controllers/clusterinfo"
"github.com/NVIDIA/gpu-operator/internal/conditions"
"github.com/NVIDIA/gpu-operator/internal/consts"
@@ -256,21 +256,54 @@ func (r *NVIDIADriverReconciler) SetupWithManager(ctx context.Context, mgr ctrl.
c, err := controller.New("nvidia-driver-controller", mgr, controller.Options{
Reconciler: r,
MaxConcurrentReconciles: 1,
- RateLimiter: workqueue.NewItemExponentialFailureRateLimiter(minDelayCR, maxDelayCR),
+ RateLimiter: workqueue.NewTypedItemExponentialFailureRateLimiter[reconcile.Request](minDelayCR, maxDelayCR),
})
if err != nil {
return err
}
// Watch for changes to the primary resource NVIDIaDriver
- err = c.Watch(source.Kind(mgr.GetCache(), &nvidiav1alpha1.NVIDIADriver{}), &handler.EnqueueRequestForObject{}, predicate.GenerationChangedPredicate{})
+ err = c.Watch(source.Kind(
+ mgr.GetCache(),
+ &nvidiav1alpha1.NVIDIADriver{},
+ &handler.TypedEnqueueRequestForObject[*nvidiav1alpha1.NVIDIADriver]{},
+ predicate.TypedGenerationChangedPredicate[*nvidiav1alpha1.NVIDIADriver]{},
+ ),
+ )
if err != nil {
return err
}
// Watch for changes to ClusterPolicy. Whenever an event is generated for ClusterPolicy, enqueue
// a reconcile request for all NVIDIADriver instances.
- mapFn := func(ctx context.Context, a client.Object) []reconcile.Request {
+ mapFn := func(ctx context.Context, cp *gpuv1.ClusterPolicy) []reconcile.Request {
+ logger := log.FromContext(ctx)
+ opts := []client.ListOption{}
+ list := &nvidiav1alpha1.NVIDIADriverList{}
+
+ err := mgr.GetClient().List(ctx, list, opts...)
+ if err != nil {
+ logger.Error(err, "Unable to list NVIDIADriver resources")
+ return []reconcile.Request{}
+ }
+
+ reconcileRequests := []reconcile.Request{}
+ for _, nvidiaDriver := range list.Items {
+ reconcileRequests = append(reconcileRequests,
+ reconcile.Request{
+ NamespacedName: types.NamespacedName{
+ Name: nvidiaDriver.ObjectMeta.GetName(),
+ Namespace: nvidiaDriver.ObjectMeta.GetNamespace(),
+ },
+ })
+ }
+
+ return reconcileRequests
+ }
+
+ // Watch for changes to the Nodes. Whenever an event is generated for ClusterPolicy, enqueue
+ // a reconcile request for all NVIDIADriver instances.
+ nodeMapFn := func(ctx context.Context, cp *corev1.Node) []reconcile.Request {
logger := log.FromContext(ctx)
opts := []client.ListOption{}
list := &nvidiav1alpha1.NVIDIADriverList{}
@@ -296,20 +329,23 @@ func (r *NVIDIADriverReconciler) SetupWithManager(ctx context.Context, mgr ctrl.
}
err = c.Watch(
- source.Kind(mgr.GetCache(), &gpuv1.ClusterPolicy{}),
- handler.EnqueueRequestsFromMapFunc(mapFn),
- predicate.GenerationChangedPredicate{},
+ source.Kind(
+ mgr.GetCache(),
+ &gpuv1.ClusterPolicy{},
+ handler.TypedEnqueueRequestsFromMapFunc[*gpuv1.ClusterPolicy](mapFn),
+ predicate.TypedGenerationChangedPredicate[*gpuv1.ClusterPolicy]{},
+ ),
)
if err != nil {
return err
}
- nodePredicate := predicate.Funcs{
- CreateFunc: func(e event.CreateEvent) bool {
+ nodePredicate := predicate.TypedFuncs[*corev1.Node]{
+ CreateFunc: func(e event.TypedCreateEvent[*corev1.Node]) bool {
labels := e.Object.GetLabels()
return hasGPULabels(labels)
},
- UpdateFunc: func(e event.UpdateEvent) bool {
+ UpdateFunc: func(e event.TypedUpdateEvent[*corev1.Node]) bool {
logger := log.FromContext(ctx)
newLabels := e.ObjectNew.GetLabels()
oldLabels := e.ObjectOld.GetLabels()
@@ -324,7 +360,7 @@ func (r *NVIDIADriverReconciler) SetupWithManager(ctx context.Context, mgr ctrl.
}
return needsUpdate
},
- DeleteFunc: func(e event.DeleteEvent) bool {
+ DeleteFunc: func(e event.TypedDeleteEvent[*corev1.Node]) bool {
labels := e.Object.GetLabels()
return hasGPULabels(labels)
},
@@ -332,9 +368,11 @@ func (r *NVIDIADriverReconciler) SetupWithManager(ctx context.Context, mgr ctrl.
// Watch for changes to node labels
err = c.Watch(
- source.Kind(mgr.GetCache(), &corev1.Node{}),
- handler.EnqueueRequestsFromMapFunc(mapFn),
- nodePredicate,
+ source.Kind(mgr.GetCache(),
+ &corev1.Node{},
+ handler.TypedEnqueueRequestsFromMapFunc[*corev1.Node](nodeMapFn),
+ nodePredicate,
+ ),
)
if err != nil {
return err
@@ -342,20 +380,9 @@ func (r *NVIDIADriverReconciler) SetupWithManager(ctx context.Context, mgr ctrl.
// Watch for changes to secondary resources which each state manager manages
watchSources := stateManager.GetWatchSources(mgr)
- nvDriverPredicate, err := predicate.LabelSelectorPredicate(metav1.LabelSelector{MatchLabels: map[string]string{AppComponentLabelKey: AppComponentLabelValue}})
- if err != nil {
- return fmt.Errorf("failed to create labelSelector predicate: %w", err)
- }
for _, watchSource := range watchSources {
err = c.Watch(
watchSource,
- handler.EnqueueRequestForOwner(
- mgr.GetScheme(),
- mgr.GetRESTMapper(),
- &nvidiav1alpha1.NVIDIADriver{},
- handler.OnlyControllerOwner(),
- ),
- nvDriverPredicate,
)
if err != nil {
return fmt.Errorf("error setting up Watch for source type %v: %w", watchSource, err)
@@ -369,7 +396,7 @@ func (r *NVIDIADriverReconciler) SetupWithManager(ctx context.Context, mgr ctrl.
if owner == nil {
return nil
}
- if owner.APIVersion != nvidiav1alpha1.GroupVersion.String() || owner.Kind != nvidiav1alpha1.NVIDIADriverCRDName {
+ if owner.APIVersion != nvidiav1alpha1.SchemeGroupVersion.String() || owner.Kind != nvidiav1alpha1.NVIDIADriverCRDName {
return nil
}
return []string{owner.Name}
diff --git a/controllers/object_controls.go b/controllers/object_controls.go
index b66b0b630..1b1801391 100644
--- a/controllers/object_controls.go
+++ b/controllers/object_controls.go
@@ -32,7 +32,6 @@ import (
"path/filepath"
"github.com/davecgh/go-spew/spew"
- "github.com/mitchellh/hashstructure"
apiconfigv1 "github.com/openshift/api/config/v1"
apiimagev1 "github.com/openshift/api/image/v1"
secv1 "github.com/openshift/api/security/v1"
@@ -52,7 +51,9 @@ import (
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
"sigs.k8s.io/yaml"
- gpuv1 "github.com/NVIDIA/gpu-operator/api/v1"
+ gpuv1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1"
+ "github.com/NVIDIA/gpu-operator/internal/consts"
+ "github.com/NVIDIA/gpu-operator/internal/utils"
)
const (
@@ -112,8 +113,8 @@ const (
MigDefaultGPUClientsConfigMapName = "default-gpu-clients"
// DCGMRemoteEngineEnvName indicates env name to specify remote DCGM host engine ip:port
DCGMRemoteEngineEnvName = "DCGM_REMOTE_HOSTENGINE_INFO"
- // DCGMDefaultHostPort indicates default host port bound to DCGM host engine
- DCGMDefaultHostPort = 5555
+ // DCGMDefaultPort indicates default port bound to DCGM host engine
+ DCGMDefaultPort = 5555
// GPUDirectRDMAEnabledEnvName indicates if GPU direct RDMA is enabled through GPU operator
GPUDirectRDMAEnabledEnvName = "GPU_DIRECT_RDMA_ENABLED"
// UseHostMOFEDEnvName indicates if MOFED driver is pre-installed on the host
@@ -146,8 +147,8 @@ const (
NvidiaCtrRuntimeCDIPrefixesEnvName = "NVIDIA_CONTAINER_RUNTIME_MODES_CDI_ANNOTATION_PREFIXES"
// CDIEnabledEnvName is the name of the envvar used to enable CDI in the operands
CDIEnabledEnvName = "CDI_ENABLED"
- // NvidiaCTKPathEnvName is the name of the envvar specifying the path to the 'nvidia-ctk' binary
- NvidiaCTKPathEnvName = "NVIDIA_CTK_PATH"
+ // NvidiaCDIHookPathEnvName is the name of the envvar specifying the path to the 'nvidia-cdi-hook' binary
+ NvidiaCDIHookPathEnvName = "NVIDIA_CDI_HOOK_PATH"
// CrioConfigModeEnvName is the name of the envvar controlling how the toolkit container updates the cri-o configuration
CrioConfigModeEnvName = "CRIO_CONFIG_MODE"
// DeviceListStrategyEnvName is the name of the envvar for configuring the device-list-strategy in the device-plugin
@@ -164,6 +165,19 @@ const (
DefaultCCModeEnvName = "DEFAULT_CC_MODE"
// OpenKernelModulesEnabledEnvName is the name of the driver-container envvar for enabling open GPU kernel module support
OpenKernelModulesEnabledEnvName = "OPEN_KERNEL_MODULES_ENABLED"
+ // MPSRootEnvName is the name of the envvar for configuring the MPS root
+ MPSRootEnvName = "MPS_ROOT"
+ // DefaultMPSRoot is the default MPS root path on the host
+ DefaultMPSRoot = "/run/nvidia/mps"
+ // HostRootEnvName is the name of the envvar representing the root path of the underlying host
+ HostRootEnvName = "HOST_ROOT"
+ // DefaultDriverInstallDir represents the default path of a driver container installation
+ DefaultDriverInstallDir = "/run/nvidia/driver"
+ // DriverInstallDirEnvName is the name of the envvar used by the driver-validator to represent the driver install dir
+ DriverInstallDirEnvName = "DRIVER_INSTALL_DIR"
+ // DriverInstallDirCtrPathEnvName is the name of the envvar used by the driver-validator to represent the path
+ // of the driver install dir mounted in the container
+ DriverInstallDirCtrPathEnvName = "DRIVER_INSTALL_DIR_CTR_PATH"
)
// ContainerProbe defines container probe types
@@ -276,11 +290,11 @@ func ServiceAccount(n ClusterPolicyController) (gpuv1.State, error) {
obj := n.resources[state].ServiceAccount.DeepCopy()
obj.Namespace = n.operatorNamespace
- logger := n.rec.Log.WithValues("ServiceAccount", obj.Name, "Namespace", obj.Namespace)
+ logger := n.logger.WithValues("ServiceAccount", obj.Name, "Namespace", obj.Namespace)
// Check if state is disabled and cleanup resource if exists
if !n.isStateEnabled(n.stateNames[n.idx]) {
- err := n.rec.Client.Delete(ctx, obj)
+ err := n.client.Delete(ctx, obj)
if err != nil && !apierrors.IsNotFound(err) {
logger.Info("Couldn't delete", "Error", err)
return gpuv1.NotReady, err
@@ -288,11 +302,11 @@ func ServiceAccount(n ClusterPolicyController) (gpuv1.State, error) {
return gpuv1.Disabled, nil
}
- if err := controllerutil.SetControllerReference(n.singleton, obj, n.rec.Scheme); err != nil {
+ if err := controllerutil.SetControllerReference(n.singleton, obj, n.scheme); err != nil {
return gpuv1.NotReady, err
}
- if err := n.rec.Client.Create(ctx, obj); err != nil {
+ if err := n.client.Create(ctx, obj); err != nil {
if apierrors.IsAlreadyExists(err) {
logger.Info("Found Resource, skipping update")
return gpuv1.Ready, nil
@@ -311,11 +325,11 @@ func Role(n ClusterPolicyController) (gpuv1.State, error) {
obj := n.resources[state].Role.DeepCopy()
obj.Namespace = n.operatorNamespace
- logger := n.rec.Log.WithValues("Role", obj.Name, "Namespace", obj.Namespace)
+ logger := n.logger.WithValues("Role", obj.Name, "Namespace", obj.Namespace)
// Check if state is disabled and cleanup resource if exists
if !n.isStateEnabled(n.stateNames[n.idx]) {
- err := n.rec.Client.Delete(ctx, obj)
+ err := n.client.Delete(ctx, obj)
if err != nil && !apierrors.IsNotFound(err) {
logger.Info("Couldn't delete", "Error", err)
return gpuv1.NotReady, err
@@ -323,14 +337,14 @@ func Role(n ClusterPolicyController) (gpuv1.State, error) {
return gpuv1.Disabled, nil
}
- if err := controllerutil.SetControllerReference(n.singleton, obj, n.rec.Scheme); err != nil {
+ if err := controllerutil.SetControllerReference(n.singleton, obj, n.scheme); err != nil {
return gpuv1.NotReady, err
}
- if err := n.rec.Client.Create(ctx, obj); err != nil {
+ if err := n.client.Create(ctx, obj); err != nil {
if apierrors.IsAlreadyExists(err) {
logger.Info("Found Resource, updating...")
- err = n.rec.Client.Update(ctx, obj)
+ err = n.client.Update(ctx, obj)
if err != nil {
logger.Info("Couldn't update", "Error", err)
return gpuv1.NotReady, err
@@ -352,11 +366,11 @@ func RoleBinding(n ClusterPolicyController) (gpuv1.State, error) {
obj := n.resources[state].RoleBinding.DeepCopy()
obj.Namespace = n.operatorNamespace
- logger := n.rec.Log.WithValues("RoleBinding", obj.Name, "Namespace", obj.Namespace)
+ logger := n.logger.WithValues("RoleBinding", obj.Name, "Namespace", obj.Namespace)
// Check if state is disabled and cleanup resource if exists
if !n.isStateEnabled(n.stateNames[n.idx]) {
- err := n.rec.Client.Delete(ctx, obj)
+ err := n.client.Delete(ctx, obj)
if err != nil && !apierrors.IsNotFound(err) {
logger.Info("Couldn't delete", "Error", err)
return gpuv1.NotReady, err
@@ -374,14 +388,14 @@ func RoleBinding(n ClusterPolicyController) (gpuv1.State, error) {
obj.Subjects[idx].Namespace = n.operatorNamespace
}
- if err := controllerutil.SetControllerReference(n.singleton, obj, n.rec.Scheme); err != nil {
+ if err := controllerutil.SetControllerReference(n.singleton, obj, n.scheme); err != nil {
return gpuv1.NotReady, err
}
- if err := n.rec.Client.Create(ctx, obj); err != nil {
+ if err := n.client.Create(ctx, obj); err != nil {
if apierrors.IsAlreadyExists(err) {
logger.Info("Found Resource, updating...")
- err = n.rec.Client.Update(ctx, obj)
+ err = n.client.Update(ctx, obj)
if err != nil {
logger.Info("Couldn't update", "Error", err)
return gpuv1.NotReady, err
@@ -403,11 +417,11 @@ func ClusterRole(n ClusterPolicyController) (gpuv1.State, error) {
obj := n.resources[state].ClusterRole.DeepCopy()
obj.Namespace = n.operatorNamespace
- logger := n.rec.Log.WithValues("ClusterRole", obj.Name, "Namespace", obj.Namespace)
+ logger := n.logger.WithValues("ClusterRole", obj.Name, "Namespace", obj.Namespace)
// Check if state is disabled and cleanup resource if exists
if !n.isStateEnabled(n.stateNames[n.idx]) {
- err := n.rec.Client.Delete(ctx, obj)
+ err := n.client.Delete(ctx, obj)
if err != nil && !apierrors.IsNotFound(err) {
logger.Info("Couldn't delete", "Error", err)
return gpuv1.NotReady, err
@@ -415,14 +429,14 @@ func ClusterRole(n ClusterPolicyController) (gpuv1.State, error) {
return gpuv1.Disabled, nil
}
- if err := controllerutil.SetControllerReference(n.singleton, obj, n.rec.Scheme); err != nil {
+ if err := controllerutil.SetControllerReference(n.singleton, obj, n.scheme); err != nil {
return gpuv1.NotReady, err
}
- if err := n.rec.Client.Create(ctx, obj); err != nil {
+ if err := n.client.Create(ctx, obj); err != nil {
if apierrors.IsAlreadyExists(err) {
logger.Info("Found Resource, updating...")
- err = n.rec.Client.Update(ctx, obj)
+ err = n.client.Update(ctx, obj)
if err != nil {
logger.Info("Couldn't update", "Error", err)
return gpuv1.NotReady, err
@@ -444,11 +458,11 @@ func ClusterRoleBinding(n ClusterPolicyController) (gpuv1.State, error) {
obj := n.resources[state].ClusterRoleBinding.DeepCopy()
obj.Namespace = n.operatorNamespace
- logger := n.rec.Log.WithValues("ClusterRoleBinding", obj.Name, "Namespace", obj.Namespace)
+ logger := n.logger.WithValues("ClusterRoleBinding", obj.Name, "Namespace", obj.Namespace)
// Check if state is disabled and cleanup resource if exists
if !n.isStateEnabled(n.stateNames[n.idx]) {
- err := n.rec.Client.Delete(ctx, obj)
+ err := n.client.Delete(ctx, obj)
if err != nil && !apierrors.IsNotFound(err) {
logger.Info("Couldn't delete", "Error", err)
return gpuv1.NotReady, err
@@ -460,14 +474,14 @@ func ClusterRoleBinding(n ClusterPolicyController) (gpuv1.State, error) {
obj.Subjects[idx].Namespace = n.operatorNamespace
}
- if err := controllerutil.SetControllerReference(n.singleton, obj, n.rec.Scheme); err != nil {
+ if err := controllerutil.SetControllerReference(n.singleton, obj, n.scheme); err != nil {
return gpuv1.NotReady, err
}
- if err := n.rec.Client.Create(ctx, obj); err != nil {
+ if err := n.client.Create(ctx, obj); err != nil {
if apierrors.IsAlreadyExists(err) {
logger.Info("Found Resource, updating...")
- err = n.rec.Client.Update(ctx, obj)
+ err = n.client.Update(ctx, obj)
if err != nil {
logger.Info("Couldn't update", "Error", err)
return gpuv1.NotReady, err
@@ -490,11 +504,11 @@ func createConfigMap(n ClusterPolicyController, configMapIdx int) (gpuv1.State,
obj := n.resources[state].ConfigMaps[configMapIdx].DeepCopy()
obj.Namespace = n.operatorNamespace
- logger := n.rec.Log.WithValues("ConfigMap", obj.Name, "Namespace", obj.Namespace)
+ logger := n.logger.WithValues("ConfigMap", obj.Name, "Namespace", obj.Namespace)
// Check if state is disabled and cleanup resource if exists
if !n.isStateEnabled(n.stateNames[n.idx]) {
- err := n.rec.Client.Delete(ctx, obj)
+ err := n.client.Delete(ctx, obj)
if err != nil && !apierrors.IsNotFound(err) {
logger.Info("Couldn't delete", "Error", err)
return gpuv1.NotReady, err
@@ -536,18 +550,18 @@ func createConfigMap(n ClusterPolicyController, configMapIdx int) (gpuv1.State,
}
}
- if err := controllerutil.SetControllerReference(n.singleton, obj, n.rec.Scheme); err != nil {
+ if err := controllerutil.SetControllerReference(n.singleton, obj, n.scheme); err != nil {
return gpuv1.NotReady, err
}
- if err := n.rec.Client.Create(ctx, obj); err != nil {
+ if err := n.client.Create(ctx, obj); err != nil {
if !apierrors.IsAlreadyExists(err) {
logger.Info("Couldn't create", "Error", err)
return gpuv1.NotReady, err
}
logger.Info("Found Resource, updating...")
- err = n.rec.Client.Update(ctx, obj)
+ err = n.client.Update(ctx, obj)
if err != nil {
logger.Info("Couldn't update", "Error", err)
return gpuv1.NotReady, err
@@ -577,7 +591,7 @@ func ConfigMaps(n ClusterPolicyController) (gpuv1.State, error) {
func (n ClusterPolicyController) getKernelVersionsMap() (map[string]string, error) {
kernelVersionMap := make(map[string]string)
ctx := n.ctx
- logger := n.rec.Log.WithValues("Request.Namespace", "default", "Request.Name", "Node")
+ logger := n.logger.WithValues("Request.Namespace", "default", "Request.Name", "Node")
// Filter only GPU nodes
opts := []client.ListOption{
@@ -585,7 +599,7 @@ func (n ClusterPolicyController) getKernelVersionsMap() (map[string]string, erro
}
list := &corev1.NodeList{}
- err := n.rec.Client.List(ctx, list, opts...)
+ err := n.client.List(ctx, list, opts...)
if err != nil {
logger.Info("Could not get NodeList", "ERROR", err)
return nil, err
@@ -625,14 +639,14 @@ func (n ClusterPolicyController) getKernelVersionsMap() (map[string]string, erro
func kernelFullVersion(n ClusterPolicyController) (string, string, string) {
ctx := n.ctx
- logger := n.rec.Log.WithValues("Request.Namespace", "default", "Request.Name", "Node")
+ logger := n.logger.WithValues("Request.Namespace", "default", "Request.Name", "Node")
// We need the node labels to fetch the correct container
opts := []client.ListOption{
client.MatchingLabels{"nvidia.com/gpu.present": "true"},
}
list := &corev1.NodeList{}
- err := n.rec.Client.List(ctx, list, opts...)
+ err := n.client.List(ctx, list, opts...)
if err != nil {
logger.Info("Could not get NodeList", "ERROR", err)
return "", "", ""
@@ -674,7 +688,7 @@ func kernelFullVersion(n ClusterPolicyController) (string, string, string) {
}
func preProcessDaemonSet(obj *appsv1.DaemonSet, n ClusterPolicyController) error {
- logger := n.rec.Log.WithValues("Daemonset", obj.Name)
+ logger := n.logger.WithValues("Daemonset", obj.Name)
transformations := map[string]func(*appsv1.DaemonSet, *gpuv1.ClusterPolicySpec, ClusterPolicyController) error{
"nvidia-driver-daemonset": TransformDriver,
"nvidia-vgpu-manager-daemonset": TransformVGPUManager,
@@ -708,6 +722,12 @@ func preProcessDaemonSet(obj *appsv1.DaemonSet, n ClusterPolicyController) error
return err
}
+ // transform the host-root and host-dev-char volumes if a custom host root is configured with the operator
+ transformForHostRoot(obj, n.singleton.Spec.HostPaths.RootFS)
+
+ // transform the driver-root volume if a custom driver install dir is configured with the operator
+ transformForDriverInstallDir(obj, n.singleton.Spec.HostPaths.DriverInstallDir)
+
// apply per operand Daemonset config
err = t(obj, &n.singleton.Spec, n)
if err != nil {
@@ -769,6 +789,81 @@ func applyCommonDaemonsetConfig(obj *appsv1.DaemonSet, config *gpuv1.ClusterPoli
return nil
}
+// apply necessary transforms if a custom host root path is configured
+func transformForHostRoot(obj *appsv1.DaemonSet, hostRoot string) {
+ if hostRoot == "" || hostRoot == "/" {
+ return
+ }
+
+ transformHostRootVolume(obj, hostRoot)
+ transformHostDevCharVolume(obj, hostRoot)
+}
+
+func transformHostRootVolume(obj *appsv1.DaemonSet, hostRoot string) {
+ containsHostRootVolume := false
+ for _, volume := range obj.Spec.Template.Spec.Volumes {
+ if volume.Name == "host-root" {
+ volume.HostPath.Path = hostRoot
+ containsHostRootVolume = true
+ break
+ }
+ }
+
+ if !containsHostRootVolume {
+ return
+ }
+
+ for index := range obj.Spec.Template.Spec.InitContainers {
+ setContainerEnv(&(obj.Spec.Template.Spec.InitContainers[index]), HostRootEnvName, hostRoot)
+ }
+
+ for index := range obj.Spec.Template.Spec.Containers {
+ setContainerEnv(&(obj.Spec.Template.Spec.Containers[index]), HostRootEnvName, hostRoot)
+ }
+}
+
+func transformHostDevCharVolume(obj *appsv1.DaemonSet, hostRoot string) {
+ for _, volume := range obj.Spec.Template.Spec.Volumes {
+ if volume.Name == "host-dev-char" {
+ volume.HostPath.Path = filepath.Join(hostRoot, "/dev/char")
+ break
+ }
+ }
+}
+
+// apply necessary transforms if a custom driver install directory is configured
+func transformForDriverInstallDir(obj *appsv1.DaemonSet, driverInstallDir string) {
+ if driverInstallDir == "" || driverInstallDir == DefaultDriverInstallDir {
+ return
+ }
+
+ containsDriverInstallDirVolume := false
+ podSpec := obj.Spec.Template.Spec
+ for _, volume := range podSpec.Volumes {
+ if volume.Name == "driver-install-dir" {
+ volume.HostPath.Path = driverInstallDir
+ containsDriverInstallDirVolume = true
+ break
+ }
+ }
+
+ if !containsDriverInstallDirVolume {
+ return
+ }
+
+ for i, ctr := range podSpec.InitContainers {
+ if ctr.Name == "driver-validation" {
+ setContainerEnv(&(podSpec.InitContainers[i]), DriverInstallDirEnvName, driverInstallDir)
+ setContainerEnv(&(podSpec.InitContainers[i]), DriverInstallDirCtrPathEnvName, driverInstallDir)
+ for j, volumeMount := range ctr.VolumeMounts {
+ if volumeMount.Name == "driver-install-dir" {
+ podSpec.InitContainers[i].VolumeMounts[j].MountPath = driverInstallDir
+ }
+ }
+ }
+ }
+}
+
// TransformGPUDiscoveryPlugin transforms GPU discovery daemonset with required config as per ClusterPolicy
func TransformGPUDiscoveryPlugin(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n ClusterPolicyController) error {
// update validation container
@@ -784,6 +879,14 @@ func TransformGPUDiscoveryPlugin(obj *appsv1.DaemonSet, config *gpuv1.ClusterPol
}
obj.Spec.Template.Spec.Containers[0].Image = img
+ // update image for IMEX init container
+ for i, initCtr := range obj.Spec.Template.Spec.InitContainers {
+ if initCtr.Name == "gpu-feature-discovery-imex-init" {
+ obj.Spec.Template.Spec.InitContainers[i].Image = img
+ break
+ }
+ }
+
// update image pull policy
obj.Spec.Template.Spec.Containers[0].ImagePullPolicy = gpuv1.ImagePullPolicy(config.GPUFeatureDiscovery.ImagePullPolicy)
@@ -1016,17 +1119,17 @@ func getOrCreateTrustedCAConfigMap(n ClusterPolicyController, name string) (*cor
configMap.ObjectMeta.Labels = make(map[string]string)
configMap.ObjectMeta.Labels["config.openshift.io/inject-trusted-cabundle"] = "true"
- logger := n.rec.Log.WithValues("ConfigMap", configMap.ObjectMeta.Name, "Namespace", configMap.ObjectMeta.Namespace)
+ logger := n.logger.WithValues("ConfigMap", configMap.ObjectMeta.Name, "Namespace", configMap.ObjectMeta.Namespace)
- if err := controllerutil.SetControllerReference(n.singleton, configMap, n.rec.Scheme); err != nil {
+ if err := controllerutil.SetControllerReference(n.singleton, configMap, n.scheme); err != nil {
return nil, err
}
found := &corev1.ConfigMap{}
- err := n.rec.Client.Get(ctx, types.NamespacedName{Namespace: configMap.ObjectMeta.Namespace, Name: configMap.ObjectMeta.Name}, found)
+ err := n.client.Get(ctx, types.NamespacedName{Namespace: configMap.ObjectMeta.Namespace, Name: configMap.ObjectMeta.Name}, found)
if err != nil && apierrors.IsNotFound(err) {
logger.Info("Not found, creating")
- err = n.rec.Client.Create(ctx, configMap)
+ err = n.client.Create(ctx, configMap)
if err != nil {
logger.Info("Couldn't create")
return nil, fmt.Errorf("failed to create trusted CA bundle config map %q: %s", name, err)
@@ -1144,15 +1247,43 @@ func TransformToolkit(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n
// configure runtime
runtime := n.runtime.String()
- setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), "RUNTIME", runtime)
+ err = transformForRuntime(obj, config, runtime, "nvidia-container-toolkit-ctr")
+ if err != nil {
+ return fmt.Errorf("error transforming toolkit daemonset : %w", err)
+ }
+
+ // Update CRI-O hooks path to use default path for non OCP cases
+ if n.openshift == "" && n.runtime == gpuv1.CRIO {
+ for index, volume := range obj.Spec.Template.Spec.Volumes {
+ if volume.Name == "crio-hooks" {
+ obj.Spec.Template.Spec.Volumes[index].HostPath.Path = "/usr/share/containers/oci/hooks.d"
+ }
+ }
+ }
+ return nil
+}
+
+func transformForRuntime(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, runtime string, containerName string) error {
+ var mainContainer *corev1.Container
+ for i, ctr := range obj.Spec.Template.Spec.Containers {
+ if ctr.Name == containerName {
+ mainContainer = &obj.Spec.Template.Spec.Containers[i]
+ break
+ }
+ }
+ if mainContainer == nil {
+ return fmt.Errorf("failed to find main container %q", containerName)
+ }
+
+ setContainerEnv(mainContainer, "RUNTIME", runtime)
if runtime == gpuv1.Containerd.String() {
// Set the runtime class name that is to be configured for containerd
- setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), "CONTAINERD_RUNTIME_CLASS", getRuntimeClass(config))
+ setContainerEnv(mainContainer, "CONTAINERD_RUNTIME_CLASS", getRuntimeClass(config))
}
// setup mounts for runtime config file
- runtimeConfigFile, err := getRuntimeConfigFile(&(obj.Spec.Template.Spec.Containers[0]), runtime)
+ runtimeConfigFile, err := getRuntimeConfigFile(mainContainer, runtime)
if err != nil {
return fmt.Errorf("error getting path to runtime config file: %v", err)
}
@@ -1168,19 +1299,20 @@ func TransformToolkit(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n
configEnvvarName = "CRIO_CONFIG"
}
- setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), configEnvvarName, DefaultRuntimeConfigTargetDir+sourceConfigFileName)
+ setContainerEnv(mainContainer, "RUNTIME_CONFIG", DefaultRuntimeConfigTargetDir+sourceConfigFileName)
+ setContainerEnv(mainContainer, configEnvvarName, DefaultRuntimeConfigTargetDir+sourceConfigFileName)
volMountConfigName := fmt.Sprintf("%s-config", runtime)
volMountConfig := corev1.VolumeMount{Name: volMountConfigName, MountPath: DefaultRuntimeConfigTargetDir}
- obj.Spec.Template.Spec.Containers[0].VolumeMounts = append(obj.Spec.Template.Spec.Containers[0].VolumeMounts, volMountConfig)
+ mainContainer.VolumeMounts = append(mainContainer.VolumeMounts, volMountConfig)
configVol := corev1.Volume{Name: volMountConfigName, VolumeSource: corev1.VolumeSource{HostPath: &corev1.HostPathVolumeSource{Path: path.Dir(runtimeConfigFile), Type: newHostPathType(corev1.HostPathDirectoryOrCreate)}}}
obj.Spec.Template.Spec.Volumes = append(obj.Spec.Template.Spec.Volumes, configVol)
// setup mounts for runtime socket file
- runtimeSocketFile, err := getRuntimeSocketFile(&(obj.Spec.Template.Spec.Containers[0]), runtime)
+ runtimeSocketFile, err := getRuntimeSocketFile(mainContainer, runtime)
if err != nil {
- return fmt.Errorf("error getting path to runtime socket: %v", err)
+ return fmt.Errorf("error getting path to runtime socket: %w", err)
}
if runtimeSocketFile != "" {
sourceSocketFileName := path.Base(runtimeSocketFile)
@@ -1191,24 +1323,16 @@ func TransformToolkit(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n
} else if runtime == gpuv1.Docker.String() {
socketEnvvarName = "DOCKER_SOCKET"
}
- setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), socketEnvvarName, DefaultRuntimeSocketTargetDir+sourceSocketFileName)
+ setContainerEnv(mainContainer, "RUNTIME_SOCKET", DefaultRuntimeSocketTargetDir+sourceSocketFileName)
+ setContainerEnv(mainContainer, socketEnvvarName, DefaultRuntimeSocketTargetDir+sourceSocketFileName)
volMountSocketName := fmt.Sprintf("%s-socket", runtime)
volMountSocket := corev1.VolumeMount{Name: volMountSocketName, MountPath: DefaultRuntimeSocketTargetDir}
- obj.Spec.Template.Spec.Containers[0].VolumeMounts = append(obj.Spec.Template.Spec.Containers[0].VolumeMounts, volMountSocket)
+ mainContainer.VolumeMounts = append(mainContainer.VolumeMounts, volMountSocket)
socketVol := corev1.Volume{Name: volMountSocketName, VolumeSource: corev1.VolumeSource{HostPath: &corev1.HostPathVolumeSource{Path: path.Dir(runtimeSocketFile)}}}
obj.Spec.Template.Spec.Volumes = append(obj.Spec.Template.Spec.Volumes, socketVol)
}
-
- // Update CRI-O hooks path to use default path for non OCP cases
- if n.openshift == "" && n.runtime == gpuv1.CRIO {
- for index, volume := range obj.Spec.Template.Spec.Volumes {
- if volume.Name == "crio-hooks" {
- obj.Spec.Template.Spec.Volumes[index].HostPath.Path = "/usr/share/containers/oci/hooks.d"
- }
- }
- }
return nil
}
@@ -1277,8 +1401,21 @@ func TransformDevicePlugin(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpe
setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), DeviceListStrategyEnvName, "envvar,cdi-annotations")
setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), CDIAnnotationPrefixEnvName, "nvidia.cdi.k8s.io/")
if config.Toolkit.IsEnabled() {
- setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), NvidiaCTKPathEnvName, filepath.Join(config.Toolkit.InstallDir, "toolkit/nvidia-ctk"))
+ setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), NvidiaCDIHookPathEnvName, filepath.Join(config.Toolkit.InstallDir, "toolkit/nvidia-cdi-hook"))
+ }
+ }
+
+ // update MPS volumes and set MPS_ROOT env var if a custom MPS root is configured
+ if config.DevicePlugin.MPS != nil && config.DevicePlugin.MPS.Root != "" &&
+ config.DevicePlugin.MPS.Root != DefaultMPSRoot {
+ for i, volume := range obj.Spec.Template.Spec.Volumes {
+ if volume.Name == "mps-root" {
+ obj.Spec.Template.Spec.Volumes[i].HostPath.Path = config.DevicePlugin.MPS.Root
+ } else if volume.Name == "mps-shm" {
+ obj.Spec.Template.Spec.Volumes[i].HostPath.Path = filepath.Join(config.DevicePlugin.MPS.Root, "shm")
+ }
}
+ setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), MPSRootEnvName, config.DevicePlugin.MPS.Root)
}
return nil
@@ -1346,6 +1483,18 @@ func TransformMPSControlDaemon(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolic
// update env required for MIG support
applyMIGConfiguration(mainContainer, config.MIG.Strategy)
+ // update MPS volumes if a custom MPS root is configured
+ if config.DevicePlugin.MPS != nil && config.DevicePlugin.MPS.Root != "" &&
+ config.DevicePlugin.MPS.Root != DefaultMPSRoot {
+ for i, volume := range obj.Spec.Template.Spec.Volumes {
+ if volume.Name == "mps-root" {
+ obj.Spec.Template.Spec.Volumes[i].HostPath.Path = config.DevicePlugin.MPS.Root
+ } else if volume.Name == "mps-shm" {
+ obj.Spec.Template.Spec.Volumes[i].HostPath.Path = filepath.Join(config.DevicePlugin.MPS.Root, "shm")
+ }
+ }
+ }
+
return nil
}
@@ -1432,14 +1581,7 @@ func TransformDCGMExporter(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpe
// check if DCGM hostengine is enabled as a separate Pod and setup env accordingly
if config.DCGM.IsEnabled() {
- // enable hostNetwork for communication with external DCGM using NODE_IP(localhost)
- obj.Spec.Template.Spec.HostNetwork = true
- // set DCGM host engine env. localhost will be substituted during pod runtime
- dcgmHostPort := int32(DCGMDefaultHostPort)
- if config.DCGM.HostPort != 0 {
- dcgmHostPort = config.DCGM.HostPort
- }
- setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), DCGMRemoteEngineEnvName, fmt.Sprintf("localhost:%d", dcgmHostPort))
+ setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), DCGMRemoteEngineEnvName, fmt.Sprintf("nvidia-dcgm:%d", DCGMDefaultPort))
} else {
// case for DCGM running on the host itself(DGX BaseOS)
remoteEngine := getContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), DCGMRemoteEngineEnvName)
@@ -1448,6 +1590,7 @@ func TransformDCGMExporter(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpe
obj.Spec.Template.Spec.HostNetwork = true
}
}
+
// set RuntimeClass for supported runtimes
setRuntimeClass(&obj.Spec.Template.Spec, n.runtime, config.Operator.RuntimeClass)
@@ -1565,16 +1708,6 @@ func TransformDCGM(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n Clu
}
}
- // set host port to bind for DCGM engine
- for i, port := range obj.Spec.Template.Spec.Containers[0].Ports {
- if port.Name == "dcgm" {
- obj.Spec.Template.Spec.Containers[0].Ports[i].HostPort = DCGMDefaultHostPort
- if config.DCGM.HostPort != 0 {
- obj.Spec.Template.Spec.Containers[0].Ports[i].HostPort = config.DCGM.HostPort
- }
- }
- }
-
// set RuntimeClass for supported runtimes
setRuntimeClass(&obj.Spec.Template.Spec, n.runtime, config.Operator.RuntimeClass)
@@ -1659,6 +1792,9 @@ func TransformMIGManager(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec,
// update env required for CDI support
if config.CDI.IsEnabled() {
setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), CDIEnabledEnvName, "true")
+ if config.Toolkit.IsEnabled() {
+ setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), NvidiaCDIHookPathEnvName, filepath.Join(config.Toolkit.InstallDir, "toolkit/nvidia-cdi-hook"))
+ }
}
return nil
@@ -1721,47 +1857,20 @@ func TransformKataManager(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec
// setup mounts for runtime config file
runtime := n.runtime.String()
- runtimeConfigFile, err := getRuntimeConfigFile(&(obj.Spec.Template.Spec.Containers[0]), runtime)
+ err = transformForRuntime(obj, config, runtime, "nvidia-kata-manager")
if err != nil {
- return fmt.Errorf("error getting path to runtime config file: %v", err)
+ return fmt.Errorf("error transforming kata-manager daemonset : %w", err)
}
- sourceConfigFileName := path.Base(runtimeConfigFile)
- setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), "CONTAINERD_CONFIG", filepath.Join(DefaultRuntimeConfigTargetDir, sourceConfigFileName))
-
- volMountConfigName := fmt.Sprintf("%s-config", runtime)
- volMountConfig := corev1.VolumeMount{Name: volMountConfigName, MountPath: DefaultRuntimeConfigTargetDir}
- obj.Spec.Template.Spec.Containers[0].VolumeMounts = append(obj.Spec.Template.Spec.Containers[0].VolumeMounts, volMountConfig)
-
- configVol := corev1.Volume{Name: volMountConfigName, VolumeSource: corev1.VolumeSource{HostPath: &corev1.HostPathVolumeSource{Path: path.Dir(runtimeConfigFile), Type: newHostPathType(corev1.HostPathDirectoryOrCreate)}}}
- obj.Spec.Template.Spec.Volumes = append(obj.Spec.Template.Spec.Volumes, configVol)
-
- // setup mounts for runtime socket file
- runtimeSocketFile, err := getRuntimeSocketFile(&(obj.Spec.Template.Spec.Containers[0]), runtime)
- if err != nil {
- return fmt.Errorf("error getting path to runtime socket: %v", err)
- }
- sourceSocketFileName := path.Base(runtimeSocketFile)
- setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), "CONTAINERD_SOCKET", filepath.Join(DefaultRuntimeSocketTargetDir, sourceSocketFileName))
-
- volMountSocketName := fmt.Sprintf("%s-socket", runtime)
- volMountSocket := corev1.VolumeMount{Name: volMountSocketName, MountPath: DefaultRuntimeSocketTargetDir}
- obj.Spec.Template.Spec.Containers[0].VolumeMounts = append(obj.Spec.Template.Spec.Containers[0].VolumeMounts, volMountSocket)
-
- socketVol := corev1.Volume{Name: volMountSocketName, VolumeSource: corev1.VolumeSource{HostPath: &corev1.HostPathVolumeSource{Path: path.Dir(runtimeSocketFile)}}}
- obj.Spec.Template.Spec.Volumes = append(obj.Spec.Template.Spec.Volumes, socketVol)
// Compute hash of kata manager config and add an annotation with the value.
// If the kata config changes, a new revision of the daemonset will be
// created and thus the kata-manager pods will restart with the updated config.
- hash, err := hashstructure.Hash(config.KataManager.Config, nil)
- if err != nil {
- return fmt.Errorf("failed to get hash of kata-manager config: %v", err)
- }
+ hash := utils.GetObjectHash(config.KataManager.Config)
if obj.Spec.Template.Annotations == nil {
obj.Spec.Template.Annotations = make(map[string]string)
}
- obj.Spec.Template.Annotations[KataManagerAnnotationHashKey] = strconv.FormatUint(hash, 16)
+ obj.Spec.Template.Annotations[KataManagerAnnotationHashKey] = hash
return nil
}
@@ -1929,7 +2038,7 @@ func TransformVGPUDeviceManager(obj *appsv1.DaemonSet, config *gpuv1.ClusterPoli
// TransformValidator transforms nvidia-operator-validator daemonset with required config as per ClusterPolicy
func TransformValidator(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n ClusterPolicyController) error {
- err := TransformValidatorShared(obj, config, n)
+ err := TransformValidatorShared(obj, config)
if err != nil {
return fmt.Errorf("%v", err)
}
@@ -1954,7 +2063,7 @@ func TransformValidator(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec,
}
if validatorErr != nil {
- n.rec.Log.Info("WARN: errors transforming the validator containers: %v", validatorErr)
+ n.logger.Info("WARN: errors transforming the validator containers: %v", validatorErr)
}
return nil
@@ -1962,7 +2071,7 @@ func TransformValidator(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec,
// TransformSandboxValidator transforms nvidia-sandbox-validator daemonset with required config as per ClusterPolicy
func TransformSandboxValidator(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n ClusterPolicyController) error {
- err := TransformValidatorShared(obj, config, n)
+ err := TransformValidatorShared(obj, config)
if err != nil {
return fmt.Errorf("%v", err)
}
@@ -1983,14 +2092,14 @@ func TransformSandboxValidator(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolic
}
if validatorErr != nil {
- n.rec.Log.Info("WARN: errors transforming the validator containers: %v", validatorErr)
+ n.logger.Info("WARN: errors transforming the validator containers: %v", validatorErr)
}
return nil
}
// TransformValidatorShared applies general transformations to the validator daemonset with required config as per ClusterPolicy
-func TransformValidatorShared(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n ClusterPolicyController) error {
+func TransformValidatorShared(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec) error {
// update image
image, err := gpuv1.ImagePath(&config.Validator)
if err != nil {
@@ -2331,10 +2440,7 @@ func isCustomPluginConfigSet(pluginConfig *gpuv1.DevicePluginConfig) bool {
// adds shared volume mounts required for custom plugin config provided via a ConfigMap
func addSharedMountsForPluginConfig(container *corev1.Container, config *gpuv1.DevicePluginConfig) {
- emptyDirMount := corev1.VolumeMount{Name: "config", MountPath: "/config"}
configVolMount := corev1.VolumeMount{Name: config.Name, MountPath: "/available-configs"}
-
- container.VolumeMounts = append(container.VolumeMounts, emptyDirMount)
container.VolumeMounts = append(container.VolumeMounts, configVolMount)
}
@@ -2370,15 +2476,19 @@ func handleDevicePluginConfig(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicy
continue
}
setContainerEnv(&obj.Spec.Template.Spec.Containers[i], "CONFIG_FILE", "/config/config.yaml")
- // setup sharedvolume(emptydir) for main container
+ // add configmap volume mount
addSharedMountsForPluginConfig(&obj.Spec.Template.Spec.Containers[i], config.DevicePlugin.Config)
}
- // Enable process ns sharing for PID access
- shareProcessNamespace := true
- obj.Spec.Template.Spec.ShareProcessNamespace = &shareProcessNamespace
- // setup volumes from configmap and shared emptyDir
+
+ // if hostPID is already set, we skip setting the shareProcessNamespace field
+ // for context, go to https://github.com/kubernetes-client/go/blob/master/kubernetes/docs/V1PodSpec.md
+ if !obj.Spec.Template.Spec.HostPID {
+ // Enable process ns sharing for PID access
+ shareProcessNamespace := true
+ obj.Spec.Template.Spec.ShareProcessNamespace = &shareProcessNamespace
+ }
+ // add configmap volume
obj.Spec.Template.Spec.Volumes = append(obj.Spec.Template.Spec.Volumes, createConfigMapVolume(config.DevicePlugin.Config.Name, nil))
- obj.Spec.Template.Spec.Volumes = append(obj.Spec.Template.Spec.Volumes, createEmptyDirVolume("config"))
// apply env/volume changes to initContainer
err := transformConfigManagerInitContainer(obj, config)
@@ -2546,7 +2656,7 @@ func transformGDSContainer(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpe
continue
}
if config.GPUDirectStorage == nil || !config.GPUDirectStorage.IsEnabled() {
- n.rec.Log.Info("GPUDirect Storage is disabled")
+ n.logger.Info("GPUDirect Storage is disabled")
// remove nvidia-fs sidecar container from driver Daemonset if GDS is not enabled
obj.Spec.Template.Spec.Containers = append(obj.Spec.Template.Spec.Containers[:i], obj.Spec.Template.Spec.Containers[i+1:]...)
return nil
@@ -2628,7 +2738,7 @@ func transformGDRCopyContainer(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolic
continue
}
if config.GDRCopy == nil || !config.GDRCopy.IsEnabled() {
- n.rec.Log.Info("GDRCopy is disabled")
+ n.logger.Info("GDRCopy is disabled")
// remove nvidia-gdrcopy sidecar container from driver Daemonset if gdrcopy is not enabled
obj.Spec.Template.Spec.Containers = append(obj.Spec.Template.Spec.Containers[:i], obj.Spec.Template.Spec.Containers[i+1:]...)
return nil
@@ -2747,12 +2857,12 @@ func transformOpenShiftDriverToolkitContainer(obj *appsv1.DaemonSet, config *gpu
return nil, nil
}
- return nil, fmt.Errorf(fmt.Sprintf("could not find the '%s' container", name))
+ return nil, fmt.Errorf("could not find the '%s' container", name)
}
if !n.ocpDriverToolkit.enabled {
if n.ocpDriverToolkit.requested {
- n.rec.Log.Info("OpenShift DriverToolkit was requested but could not be enabled (dependencies missing)")
+ n.logger.Info("OpenShift DriverToolkit was requested but could not be enabled (dependencies missing)")
}
/* remove OpenShift Driver Toolkit side-car container from the Driver DaemonSet */
@@ -2792,18 +2902,18 @@ func transformOpenShiftDriverToolkitContainer(obj *appsv1.DaemonSet, config *gpu
if config.GPUDirectStorage != nil && config.GPUDirectStorage.IsEnabled() {
setContainerEnv(driverToolkitContainer, "GDS_ENABLED", "true")
- n.rec.Log.V(2).Info("transformOpenShiftDriverToolkitContainer", "GDS_ENABLED", config.GPUDirectStorage.IsEnabled())
+ n.logger.V(2).Info("transformOpenShiftDriverToolkitContainer", "GDS_ENABLED", config.GPUDirectStorage.IsEnabled())
}
if config.GDRCopy != nil && config.GDRCopy.IsEnabled() {
setContainerEnv(driverToolkitContainer, "GDRCOPY_ENABLED", "true")
- n.rec.Log.V(2).Info("transformOpenShiftDriverToolkitContainer", "GDRCOPY_ENABLED", "true")
+ n.logger.V(2).Info("transformOpenShiftDriverToolkitContainer", "GDRCOPY_ENABLED", "true")
}
image := n.ocpDriverToolkit.rhcosDriverToolkitImages[n.ocpDriverToolkit.currentRhcosVersion]
if image != "" {
driverToolkitContainer.Image = image
- n.rec.Log.Info("DriverToolkit", "image", driverToolkitContainer.Image)
+ n.logger.Info("DriverToolkit", "image", driverToolkitContainer.Image)
} else {
/* RHCOS tag missing in the Driver-Toolkit imagestream, setup fallback */
obj.ObjectMeta.Labels["openshift.driver-toolkit.rhcos-image-missing"] = "true"
@@ -2814,7 +2924,7 @@ func transformOpenShiftDriverToolkitContainer(obj *appsv1.DaemonSet, config *gpu
setContainerEnv(mainContainer, "RHCOS_VERSION", rhcosVersion)
setContainerEnv(driverToolkitContainer, "RHCOS_IMAGE_MISSING", "true")
- n.rec.Log.Info("WARNING: DriverToolkit image tag missing. Version-specific fallback mode enabled.", "rhcosVersion", rhcosVersion)
+ n.logger.Info("WARNING: DriverToolkit image tag missing. Version-specific fallback mode enabled.", "rhcosVersion", rhcosVersion)
}
/* prepare the main container to start from the DriverToolkit entrypoint */
@@ -2969,7 +3079,7 @@ func createConfigMapVolumeMounts(n ClusterPolicyController, configMapName string
// get the ConfigMap
cm := &corev1.ConfigMap{}
opts := client.ObjectKey{Namespace: n.operatorNamespace, Name: configMapName}
- err := n.rec.Client.Get(ctx, opts, cm)
+ err := n.client.Get(ctx, opts, cm)
if err != nil {
return nil, nil, fmt.Errorf("ERROR: could not get ConfigMap %s from client: %v", configMapName, err)
}
@@ -3006,15 +3116,6 @@ func createConfigMapVolume(configMapName string, itemsToInclude []corev1.KeyToPa
return corev1.Volume{Name: configMapName, VolumeSource: volumeSource}
}
-func createEmptyDirVolume(volumeName string) corev1.Volume {
- return corev1.Volume{
- Name: volumeName,
- VolumeSource: corev1.VolumeSource{
- EmptyDir: &corev1.EmptyDirVolumeSource{},
- },
- }
-}
-
func transformDriverContainer(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n ClusterPolicyController) error {
driverIndex := 0
driverCtrFound := false
@@ -3204,7 +3305,7 @@ func transformDriverContainer(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicy
// set up subscription entitlements for RHEL(using K8s with a non-CRIO runtime) and SLES
if (release["ID"] == "rhel" && n.openshift == "" && n.runtime != gpuv1.CRIO) || release["ID"] == "sles" {
- n.rec.Log.Info("Mounting subscriptions into the driver container", "OS", release["ID"])
+ n.logger.Info("Mounting subscriptions into the driver container", "OS", release["ID"])
pathToVolumeSource, err := getSubscriptionPathsToVolumeSources()
if err != nil {
return fmt.Errorf("ERROR: failed to get path items for subscription entitlements: %v", err)
@@ -3402,19 +3503,19 @@ func isDeploymentReady(name string, n ClusterPolicyController) gpuv1.State {
opts := []client.ListOption{
client.MatchingLabels{"app": name},
}
- n.rec.Log.V(1).Info("Deployment", "LabelSelector", fmt.Sprintf("app=%s", name))
+ n.logger.V(1).Info("Deployment", "LabelSelector", fmt.Sprintf("app=%s", name))
list := &appsv1.DeploymentList{}
- err := n.rec.Client.List(n.ctx, list, opts...)
+ err := n.client.List(n.ctx, list, opts...)
if err != nil {
- n.rec.Log.Info("Could not get DeploymentList", err)
+ n.logger.Info("Could not get DeploymentList", err)
}
- n.rec.Log.V(1).Info("Deployment", "NumberOfDeployment", len(list.Items))
+ n.logger.V(1).Info("Deployment", "NumberOfDeployment", len(list.Items))
if len(list.Items) == 0 {
return gpuv1.NotReady
}
ds := list.Items[0]
- n.rec.Log.V(1).Info("Deployment", "NumberUnavailable", ds.Status.UnavailableReplicas)
+ n.logger.V(1).Info("Deployment", "NumberUnavailable", ds.Status.UnavailableReplicas)
if ds.Status.UnavailableReplicas != 0 {
return gpuv1.NotReady
@@ -3426,19 +3527,19 @@ func isDeploymentReady(name string, n ClusterPolicyController) gpuv1.State {
func isDaemonSetReady(name string, n ClusterPolicyController) gpuv1.State {
ctx := n.ctx
ds := &appsv1.DaemonSet{}
- n.rec.Log.V(2).Info("checking daemonset for readiness", "name", name)
- err := n.rec.Client.Get(ctx, types.NamespacedName{Namespace: n.operatorNamespace, Name: name}, ds)
+ n.logger.V(2).Info("checking daemonset for readiness", "name", name)
+ err := n.client.Get(ctx, types.NamespacedName{Namespace: n.operatorNamespace, Name: name}, ds)
if err != nil {
- n.rec.Log.Error(err, "could not get daemonset", "name", name)
+ n.logger.Error(err, "could not get daemonset", "name", name)
}
if ds.Status.DesiredNumberScheduled == 0 {
- n.rec.Log.V(2).Info("Daemonset has desired pods of 0", "name", name)
+ n.logger.V(2).Info("Daemonset has desired pods of 0", "name", name)
return gpuv1.Ready
}
if ds.Status.NumberUnavailable != 0 {
- n.rec.Log.Info("daemonset not ready", "name", name)
+ n.logger.Info("daemonset not ready", "name", name)
return gpuv1.NotReady
}
@@ -3449,14 +3550,14 @@ func isDaemonSetReady(name string, n ClusterPolicyController) gpuv1.State {
opts := []client.ListOption{client.MatchingLabels(ds.Spec.Template.ObjectMeta.Labels)}
- n.rec.Log.V(2).Info("Pod", "LabelSelector", fmt.Sprintf("app=%s", name))
+ n.logger.V(2).Info("Pod", "LabelSelector", fmt.Sprintf("app=%s", name))
list := &corev1.PodList{}
- err = n.rec.Client.List(ctx, list, opts...)
+ err = n.client.List(ctx, list, opts...)
if err != nil {
- n.rec.Log.Info("Could not get PodList", err)
+ n.logger.Info("Could not get PodList", err)
return gpuv1.NotReady
}
- n.rec.Log.V(2).Info("Pod", "NumberOfPods", len(list.Items))
+ n.logger.V(2).Info("Pod", "NumberOfPods", len(list.Items))
if len(list.Items) == 0 {
return gpuv1.NotReady
}
@@ -3464,21 +3565,21 @@ func isDaemonSetReady(name string, n ClusterPolicyController) gpuv1.State {
dsPods := getPodsOwnedbyDaemonset(ds, list.Items, n)
daemonsetRevisionHash, err := getDaemonsetControllerRevisionHash(ctx, ds, n)
if err != nil {
- n.rec.Log.Error(
+ n.logger.Error(
err, "Failed to get daemonset template revision hash", "daemonset", ds)
return gpuv1.NotReady
}
- n.rec.Log.V(2).Info("daemonset template revision hash", "hash", daemonsetRevisionHash)
+ n.logger.V(2).Info("daemonset template revision hash", "hash", daemonsetRevisionHash)
for _, pod := range dsPods {
pod := pod
podRevisionHash, err := getPodControllerRevisionHash(ctx, &pod)
if err != nil {
- n.rec.Log.Error(
+ n.logger.Error(
err, "Failed to get pod template revision hash", "pod", pod)
return gpuv1.NotReady
}
- n.rec.Log.V(2).Info("pod template revision hash", "hash", podRevisionHash)
+ n.logger.V(2).Info("pod template revision hash", "hash", podRevisionHash)
// check if the revision hashes are matching and pod is in running state
if podRevisionHash != daemonsetRevisionHash || pod.Status.Phase != "Running" {
@@ -3505,13 +3606,13 @@ func getPodsOwnedbyDaemonset(ds *appsv1.DaemonSet, pods []corev1.Pod, n ClusterP
dsPodList := []corev1.Pod{}
for _, pod := range pods {
if pod.OwnerReferences == nil || len(pod.OwnerReferences) < 1 {
- n.rec.Log.Info("Driver Pod has no owner DaemonSet", "pod", pod.Name)
+ n.logger.Info("Driver Pod has no owner DaemonSet", "pod", pod.Name)
continue
}
- n.rec.Log.V(2).Info("Pod", "pod", pod.Name, "owner", pod.OwnerReferences[0].Name)
+ n.logger.V(2).Info("Pod", "pod", pod.Name, "owner", pod.OwnerReferences[0].Name)
if ds.UID != pod.OwnerReferences[0].UID {
- n.rec.Log.Info("Driver Pod is not owned by a Driver DaemonSet",
+ n.logger.Info("Driver Pod is not owned by a Driver DaemonSet",
"pod", pod, "actual owner", pod.OwnerReferences[0])
continue
}
@@ -3535,12 +3636,12 @@ func getDaemonsetControllerRevisionHash(ctx context.Context, daemonset *appsv1.D
client.InNamespace(n.operatorNamespace),
}
list := &appsv1.ControllerRevisionList{}
- err := n.rec.Client.List(ctx, list, opts...)
+ err := n.client.List(ctx, list, opts...)
if err != nil {
return "", fmt.Errorf("error getting controller revision list for daemonset %s: %v", daemonset.Name, err)
}
- n.rec.Log.V(2).Info("obtained controller revisions", "Daemonset", daemonset.Name, "len", len(list.Items))
+ n.logger.V(2).Info("obtained controller revisions", "Daemonset", daemonset.Name, "len", len(list.Items))
var revisions []appsv1.ControllerRevision
for _, controllerRevision := range list.Items {
@@ -3569,11 +3670,11 @@ func Deployment(n ClusterPolicyController) (gpuv1.State, error) {
obj := n.resources[state].Deployment.DeepCopy()
obj.Namespace = n.operatorNamespace
- logger := n.rec.Log.WithValues("Deployment", obj.Name, "Namespace", obj.Namespace)
+ logger := n.logger.WithValues("Deployment", obj.Name, "Namespace", obj.Namespace)
// Check if state is disabled and cleanup resource if exists
if !n.isStateEnabled(n.stateNames[n.idx]) {
- err := n.rec.Client.Delete(ctx, obj)
+ err := n.client.Delete(ctx, obj)
if err != nil && !apierrors.IsNotFound(err) {
logger.Info("Couldn't delete", "Error", err)
return gpuv1.NotReady, err
@@ -3581,14 +3682,14 @@ func Deployment(n ClusterPolicyController) (gpuv1.State, error) {
return gpuv1.Disabled, nil
}
- if err := controllerutil.SetControllerReference(n.singleton, obj, n.rec.Scheme); err != nil {
+ if err := controllerutil.SetControllerReference(n.singleton, obj, n.scheme); err != nil {
return gpuv1.NotReady, err
}
- if err := n.rec.Client.Create(ctx, obj); err != nil {
+ if err := n.client.Create(ctx, obj); err != nil {
if apierrors.IsAlreadyExists(err) {
logger.Info("Found Resource, updating...")
- err = n.rec.Client.Update(ctx, obj)
+ err = n.client.Update(ctx, obj)
if err != nil {
logger.Info("Couldn't update", "Error", err)
return gpuv1.NotReady, err
@@ -3607,22 +3708,22 @@ func ocpHasDriverToolkitImageStream(n *ClusterPolicyController) (bool, error) {
ctx := n.ctx
found := &apiimagev1.ImageStream{}
name := "driver-toolkit"
- namespace := "openshift"
- err := n.rec.Client.Get(ctx, types.NamespacedName{Namespace: namespace, Name: name}, found)
+ namespace := consts.OpenshiftNamespace
+ err := n.client.Get(ctx, types.NamespacedName{Namespace: namespace, Name: name}, found)
if err != nil {
if apierrors.IsNotFound(err) {
- n.rec.Log.Info("ocpHasDriverToolkitImageStream: driver-toolkit imagestream not found",
+ n.logger.Info("ocpHasDriverToolkitImageStream: driver-toolkit imagestream not found",
"Name", name,
"Namespace", namespace)
return false, nil
}
- n.rec.Log.Info("Couldn't get the driver-toolkit imagestream", "Error", err)
+ n.logger.Info("Couldn't get the driver-toolkit imagestream", "Error", err)
return false, err
}
- n.rec.Log.V(1).Info("ocpHasDriverToolkitImageStream: driver-toolkit imagestream found")
+ n.logger.V(1).Info("ocpHasDriverToolkitImageStream: driver-toolkit imagestream found")
isBroken := false
for _, tag := range found.Spec.Tags {
if tag.Name == "" {
@@ -3632,11 +3733,11 @@ func ocpHasDriverToolkitImageStream(n *ClusterPolicyController) (bool, error) {
if tag.Name == "latest" || tag.From == nil {
continue
}
- n.rec.Log.V(1).Info("ocpHasDriverToolkitImageStream: tag", tag.Name, tag.From.Name)
+ n.logger.V(1).Info("ocpHasDriverToolkitImageStream: tag", tag.Name, tag.From.Name)
n.ocpDriverToolkit.rhcosDriverToolkitImages[tag.Name] = tag.From.Name
}
if isBroken {
- n.rec.Log.Info("WARNING: ocpHasDriverToolkitImageStream: driver-toolkit imagestream is broken, see RHBZ#2015024")
+ n.logger.Info("WARNING: ocpHasDriverToolkitImageStream: driver-toolkit imagestream is broken, see RHBZ#2015024")
n.operatorMetrics.openshiftDriverToolkitIsBroken.Set(1)
} else {
@@ -3653,7 +3754,7 @@ func (n ClusterPolicyController) cleanupAllDriverDaemonSets(ctx context.Context)
// is allowed when specifying ListOptions or DeleteOptions.
// See GH issue: https://github.com/kubernetes-sigs/controller-runtime/issues/612
list := &appsv1.DaemonSetList{}
- err := n.rec.Client.List(ctx, list, client.MatchingFields{clusterPolicyControllerIndexKey: n.singleton.Name})
+ err := n.client.List(ctx, list, client.MatchingFields{clusterPolicyControllerIndexKey: n.singleton.Name})
if err != nil {
return fmt.Errorf("failed to list all NVIDIA driver daemonsets owned by ClusterPolicy: %w", err)
}
@@ -3662,8 +3763,8 @@ func (n ClusterPolicyController) cleanupAllDriverDaemonSets(ctx context.Context)
ds := ds
// filter out DaemonSets which are not the NVIDIA driver/vgpu-manager
if strings.HasPrefix(ds.Name, commonDriverDaemonsetName) || strings.HasPrefix(ds.Name, commonVGPUManagerDaemonsetName) {
- n.rec.Log.Info("Deleting NVIDIA driver daemonset owned by ClusterPolicy", "Name", ds.Name)
- err = n.rec.Client.Delete(ctx, &ds)
+ n.logger.Info("Deleting NVIDIA driver daemonset owned by ClusterPolicy", "Name", ds.Name)
+ err = n.client.Delete(ctx, &ds)
if err != nil {
return fmt.Errorf("error deleting NVIDIA driver daemonset: %w", err)
}
@@ -3683,32 +3784,37 @@ func (n ClusterPolicyController) cleanupStalePrecompiledDaemonsets(ctx context.C
},
}
list := &appsv1.DaemonSetList{}
- err := n.rec.Client.List(ctx, list, opts...)
+ err := n.client.List(ctx, list, opts...)
if err != nil {
- n.rec.Log.Error(err, "could not get daemonset list")
+ n.logger.Error(err, "could not get daemonset list")
return err
}
for idx := range list.Items {
- name := list.Items[idx].ObjectMeta.Name
- desiredNumberScheduled := list.Items[idx].Status.DesiredNumberScheduled
+ ds := list.Items[idx]
+ name := ds.ObjectMeta.Name
+ desiredNumberScheduled := ds.Status.DesiredNumberScheduled
+ numberMisscheduled := ds.Status.NumberMisscheduled
- n.rec.Log.V(1).Info("Driver DaemonSet found",
+ n.logger.V(1).Info("Driver DaemonSet found",
"Name", name,
- "desiredNumberScheduled", desiredNumberScheduled)
+ "Status.DesiredNumberScheduled", desiredNumberScheduled)
- if desiredNumberScheduled != 0 {
- n.rec.Log.Info("Driver DaemonSet active, keep it.",
- "Name", name, "Status.DesiredNumberScheduled", desiredNumberScheduled)
- continue
- }
+ // We consider a daemonset to be stale only if it has no desired number of pods and no pods currently mis-scheduled
+ // As per the Kubernetes docs, a daemonset pod is mis-scheduled when an already scheduled pod no longer satisfies
+ // node affinity constraints or has un-tolerated taints, for e.g. "node.kubernetes.io/unreachable:NoSchedule"
+ if desiredNumberScheduled == 0 && numberMisscheduled == 0 {
+ n.logger.Info("Delete Driver DaemonSet", "Name", name)
- n.rec.Log.Info("Delete Driver DaemonSet", "Name", name)
-
- err = n.rec.Client.Delete(ctx, &list.Items[idx])
- if err != nil {
- n.rec.Log.Info("ERROR: Could not get delete DaemonSet",
- "Name", name, "Error", err)
+ err = n.client.Delete(ctx, &ds)
+ if err != nil {
+ n.logger.Error(err, "Could not get delete DaemonSet",
+ "Name", name)
+ }
+ } else {
+ n.logger.Info("Driver DaemonSet active, keep it.",
+ "Name", name,
+ "Status.DesiredNumberScheduled", desiredNumberScheduled)
}
}
return nil
@@ -3721,23 +3827,23 @@ func (n ClusterPolicyController) cleanupStalePrecompiledDaemonsets(ctx context.C
func precompiledDriverDaemonsets(ctx context.Context, n ClusterPolicyController) (gpuv1.State, []error) {
overallState := gpuv1.Ready
var errs []error
- n.rec.Log.Info("cleaning any stale precompiled driver daemonsets")
+ n.logger.Info("cleaning any stale precompiled driver daemonsets")
err := n.cleanupStalePrecompiledDaemonsets(ctx)
if err != nil {
return gpuv1.NotReady, append(errs, err)
}
- n.rec.Log.V(1).Info("preparing pre-compiled driver daemonsets")
+ n.logger.V(1).Info("preparing pre-compiled driver daemonsets")
for kernelVersion, os := range n.kernelVersionMap {
// set current kernel version
n.currentKernelVersion = kernelVersion
- n.rec.Log.Info("preparing pre-compiled driver daemonset",
+ n.logger.Info("preparing pre-compiled driver daemonset",
"version", n.currentKernelVersion, "os", os)
state, err := DaemonSet(n)
if state != gpuv1.Ready {
- n.rec.Log.Info("pre-compiled driver daemonset not ready",
+ n.logger.Info("pre-compiled driver daemonset not ready",
"version", n.currentKernelVersion, "state", state)
overallState = state
}
@@ -3761,7 +3867,7 @@ func (n ClusterPolicyController) ocpDriverToolkitDaemonSets(ctx context.Context)
return gpuv1.NotReady, err
}
- n.rec.Log.V(1).Info("preparing DriverToolkit DaemonSet",
+ n.logger.V(1).Info("preparing DriverToolkit DaemonSet",
"rhcos", n.ocpDriverToolkit.rhcosVersions)
overallState := gpuv1.Ready
@@ -3770,12 +3876,12 @@ func (n ClusterPolicyController) ocpDriverToolkitDaemonSets(ctx context.Context)
for rhcosVersion := range n.ocpDriverToolkit.rhcosVersions {
n.ocpDriverToolkit.currentRhcosVersion = rhcosVersion
- n.rec.Log.V(1).Info("preparing DriverToolkit DaemonSet",
+ n.logger.V(1).Info("preparing DriverToolkit DaemonSet",
"rhcosVersion", n.ocpDriverToolkit.currentRhcosVersion)
state, err := DaemonSet(n)
- n.rec.Log.V(1).Info("preparing DriverToolkit DaemonSet",
+ n.logger.V(1).Info("preparing DriverToolkit DaemonSet",
"rhcosVersion", n.ocpDriverToolkit.currentRhcosVersion, "state", state)
if state != gpuv1.Ready {
overallState = state
@@ -3796,7 +3902,7 @@ func (n ClusterPolicyController) ocpDriverToolkitDaemonSets(ctx context.Context)
if image != "" {
continue
}
- n.rec.Log.Info("WARNINGs: RHCOS driver-toolkit image missing. Version-specific fallback mode enabled.", "rhcosVersion", rhcosVersion)
+ n.logger.Info("WARNINGs: RHCOS driver-toolkit image missing. Version-specific fallback mode enabled.", "rhcosVersion", rhcosVersion)
tagsMissing = true
}
if tagsMissing {
@@ -3823,9 +3929,9 @@ func (n ClusterPolicyController) ocpCleanupStaleDriverToolkitDaemonSets(ctx cont
}
list := &appsv1.DaemonSetList{}
- err := n.rec.Client.List(ctx, list, opts...)
+ err := n.client.List(ctx, list, opts...)
if err != nil {
- n.rec.Log.Info("ERROR: Could not get DaemonSetList", "Error", err)
+ n.logger.Info("ERROR: Could not get DaemonSetList", "Error", err)
return err
}
@@ -3835,30 +3941,30 @@ func (n ClusterPolicyController) ocpCleanupStaleDriverToolkitDaemonSets(ctx cont
clusterHasRhcosVersion, clusterOk := n.ocpDriverToolkit.rhcosVersions[dsRhcosVersion]
desiredNumberScheduled := list.Items[idx].Status.DesiredNumberScheduled
- n.rec.Log.V(1).Info("Driver DaemonSet found",
+ n.logger.V(1).Info("Driver DaemonSet found",
"Name", name,
"dsRhcosVersion", dsRhcosVersion,
"clusterHasRhcosVersion", clusterHasRhcosVersion,
"desiredNumberScheduled", desiredNumberScheduled)
if desiredNumberScheduled != 0 {
- n.rec.Log.Info("Driver DaemonSet active, keep it.",
+ n.logger.Info("Driver DaemonSet active, keep it.",
"Name", name, "Status.DesiredNumberScheduled", desiredNumberScheduled)
continue
}
if !versionOk {
- n.rec.Log.Info("WARNING: Driver DaemonSet doesn't have DriverToolkit version label",
+ n.logger.Info("WARNING: Driver DaemonSet doesn't have DriverToolkit version label",
"Name", name, "Label", ocpDriverToolkitVersionLabel,
)
} else {
switch {
case !clusterOk:
- n.rec.Log.V(1).Info("Driver DaemonSet RHCOS version NOT part of the cluster",
+ n.logger.V(1).Info("Driver DaemonSet RHCOS version NOT part of the cluster",
"Name", name, "RHCOS version", dsRhcosVersion,
)
case clusterHasRhcosVersion:
- n.rec.Log.V(1).Info("Driver DaemonSet RHCOS version is part of the cluster, keep it.",
+ n.logger.V(1).Info("Driver DaemonSet RHCOS version is part of the cluster, keep it.",
"Name", name, "RHCOS version", dsRhcosVersion,
)
@@ -3868,16 +3974,16 @@ func (n ClusterPolicyController) ocpCleanupStaleDriverToolkitDaemonSets(ctx cont
continue
default: /* clusterHasRhcosVersion == false */
// currently unexpected
- n.rec.Log.V(1).Info("Driver DaemonSet RHCOS version marked for deletion",
+ n.logger.V(1).Info("Driver DaemonSet RHCOS version marked for deletion",
"Name", name, "RHCOS version", dsRhcosVersion,
)
}
}
- n.rec.Log.Info("Delete Driver DaemonSet", "Name", name)
- err = n.rec.Client.Delete(ctx, &list.Items[idx])
+ n.logger.Info("Delete Driver DaemonSet", "Name", name)
+ err = n.client.Delete(ctx, &list.Items[idx])
if err != nil {
- n.rec.Log.Info("ERROR: Could not get delete DaemonSet",
+ n.logger.Info("ERROR: Could not get delete DaemonSet",
"Name", name, "Error", err)
return err
}
@@ -4006,22 +4112,22 @@ func (n ClusterPolicyController) cleanupDriverDaemonsets(ctx context.Context, se
var opts = []client.ListOption{client.MatchingLabels{searchKey: searchValue}}
dsList := &appsv1.DaemonSetList{}
- if err := n.rec.Client.List(ctx, dsList, opts...); err != nil {
- n.rec.Log.Error(err, "Could not get DaemonSetList")
+ if err := n.client.List(ctx, dsList, opts...); err != nil {
+ n.logger.Error(err, "Could not get DaemonSetList")
return 0, err
}
var lastErr error
for idx := range dsList.Items {
- n.rec.Log.Info("Delete DaemonSet",
+ n.logger.Info("Delete DaemonSet",
"Name", dsList.Items[idx].ObjectMeta.Name,
)
// ignore daemonsets that doesn't match the required name
if !strings.HasPrefix(dsList.Items[idx].ObjectMeta.Name, namePrefix) {
continue
}
- if err := n.rec.Client.Delete(ctx, &dsList.Items[idx]); err != nil {
- n.rec.Log.Error(err, "Could not get delete DaemonSet",
+ if err := n.client.Delete(ctx, &dsList.Items[idx]); err != nil {
+ n.logger.Error(err, "Could not get delete DaemonSet",
"Name", dsList.Items[idx].ObjectMeta.Name)
lastErr = err
}
@@ -4033,8 +4139,8 @@ func (n ClusterPolicyController) cleanupDriverDaemonsets(ctx context.Context, se
}
podList := &corev1.PodList{}
- if err := n.rec.Client.List(ctx, podList, opts...); err != nil {
- n.rec.Log.Info("ERROR: Could not get PodList", "Error", err)
+ if err := n.client.List(ctx, podList, opts...); err != nil {
+ n.logger.Info("ERROR: Could not get PodList", "Error", err)
return 0, err
}
@@ -4056,11 +4162,11 @@ func DaemonSet(n ClusterPolicyController) (gpuv1.State, error) {
obj := n.resources[state].DaemonSet.DeepCopy()
obj.Namespace = n.operatorNamespace
- logger := n.rec.Log.WithValues("DaemonSet", obj.Name, "Namespace", obj.Namespace)
+ logger := n.logger.WithValues("DaemonSet", obj.Name, "Namespace", obj.Namespace)
// Check if state is disabled and cleanup resource if exists
if !n.isStateEnabled(n.stateNames[n.idx]) {
- err := n.rec.Client.Delete(ctx, obj)
+ err := n.client.Delete(ctx, obj)
if err != nil && !apierrors.IsNotFound(err) {
logger.Info("Couldn't delete", "Error", err)
return gpuv1.NotReady, err
@@ -4134,7 +4240,7 @@ func DaemonSet(n ClusterPolicyController) (gpuv1.State, error) {
return gpuv1.NotReady, err
}
- if err := controllerutil.SetControllerReference(n.singleton, obj, n.rec.Scheme); err != nil {
+ if err := controllerutil.SetControllerReference(n.singleton, obj, n.scheme); err != nil {
logger.Info("SetControllerReference failed", "Error", err)
return gpuv1.NotReady, err
}
@@ -4157,7 +4263,7 @@ func DaemonSet(n ClusterPolicyController) (gpuv1.State, error) {
}
found := &appsv1.DaemonSet{}
- err = n.rec.Client.Get(ctx, types.NamespacedName{Namespace: obj.Namespace, Name: obj.Name}, found)
+ err = n.client.Get(ctx, types.NamespacedName{Namespace: obj.Namespace, Name: obj.Name}, found)
if err != nil && apierrors.IsNotFound(err) {
logger.Info("DaemonSet not found, creating",
"Name", obj.Name,
@@ -4166,7 +4272,7 @@ func DaemonSet(n ClusterPolicyController) (gpuv1.State, error) {
hashStr := getDaemonsetHash(obj)
// add annotation to the Daemonset with hash value during creation
obj.Annotations[NvidiaAnnotationHashKey] = hashStr
- err = n.rec.Client.Create(ctx, obj)
+ err = n.client.Create(ctx, obj)
if err != nil {
logger.Info("Couldn't create DaemonSet",
"Name", obj.Name,
@@ -4185,7 +4291,7 @@ func DaemonSet(n ClusterPolicyController) (gpuv1.State, error) {
changed := isDaemonsetSpecChanged(found, obj)
if changed {
logger.Info("DaemonSet is different, updating", "name", obj.ObjectMeta.Name)
- err = n.rec.Client.Update(ctx, obj)
+ err = n.client.Update(ctx, obj)
if err != nil {
return gpuv1.NotReady, err
}
@@ -4248,13 +4354,13 @@ func isPodReady(name string, n ClusterPolicyController, phase corev1.PodPhase) g
ctx := n.ctx
opts := []client.ListOption{&client.MatchingLabels{"app": name}}
- n.rec.Log.V(1).Info("Pod", "LabelSelector", fmt.Sprintf("app=%s", name))
+ n.logger.V(1).Info("Pod", "LabelSelector", fmt.Sprintf("app=%s", name))
list := &corev1.PodList{}
- err := n.rec.Client.List(ctx, list, opts...)
+ err := n.client.List(ctx, list, opts...)
if err != nil {
- n.rec.Log.Info("Could not get PodList", err)
+ n.logger.Info("Could not get PodList", err)
}
- n.rec.Log.V(1).Info("Pod", "NumberOfPods", len(list.Items))
+ n.logger.V(1).Info("Pod", "NumberOfPods", len(list.Items))
if len(list.Items) == 0 {
return gpuv1.NotReady
}
@@ -4262,10 +4368,10 @@ func isPodReady(name string, n ClusterPolicyController, phase corev1.PodPhase) g
pd := list.Items[0]
if pd.Status.Phase != phase {
- n.rec.Log.V(1).Info("Pod", "Phase", pd.Status.Phase, "!=", phase)
+ n.logger.V(1).Info("Pod", "Phase", pd.Status.Phase, "!=", phase)
return gpuv1.NotReady
}
- n.rec.Log.V(1).Info("Pod", "Phase", pd.Status.Phase, "==", phase)
+ n.logger.V(1).Info("Pod", "Phase", pd.Status.Phase, "==", phase)
return gpuv1.Ready
}
@@ -4276,11 +4382,11 @@ func SecurityContextConstraints(n ClusterPolicyController) (gpuv1.State, error)
obj := n.resources[state].SecurityContextConstraints.DeepCopy()
obj.Namespace = n.operatorNamespace
- logger := n.rec.Log.WithValues("SecurityContextConstraints", obj.Name, "Namespace", "default")
+ logger := n.logger.WithValues("SecurityContextConstraints", obj.Name, "Namespace", "default")
// Check if state is disabled and cleanup resource if exists
if !n.isStateEnabled(n.stateNames[n.idx]) {
- err := n.rec.Client.Delete(ctx, obj)
+ err := n.client.Delete(ctx, obj)
if err != nil && !apierrors.IsNotFound(err) {
logger.Info("Couldn't delete", "Error", err)
return gpuv1.NotReady, err
@@ -4295,20 +4401,15 @@ func SecurityContextConstraints(n ClusterPolicyController) (gpuv1.State, error)
obj.Users[idx] = fmt.Sprintf("system:serviceaccount:%s:%s", obj.Namespace, obj.Name)
}
- // Allow hostNetwork only when a separate standalone DCGM engine is deployed for communication
- if obj.Name == "nvidia-dcgm-exporter" && n.singleton.Spec.DCGM.IsEnabled() {
- obj.AllowHostNetwork = true
- }
-
- if err := controllerutil.SetControllerReference(n.singleton, obj, n.rec.Scheme); err != nil {
+ if err := controllerutil.SetControllerReference(n.singleton, obj, n.scheme); err != nil {
return gpuv1.NotReady, err
}
found := &secv1.SecurityContextConstraints{}
- err := n.rec.Client.Get(ctx, types.NamespacedName{Namespace: "", Name: obj.Name}, found)
+ err := n.client.Get(ctx, types.NamespacedName{Namespace: "", Name: obj.Name}, found)
if err != nil && apierrors.IsNotFound(err) {
logger.Info("Not found, creating...")
- err = n.rec.Client.Create(ctx, obj)
+ err = n.client.Create(ctx, obj)
if err != nil {
logger.Info("Couldn't create", "Error", err)
return gpuv1.NotReady, err
@@ -4321,7 +4422,7 @@ func SecurityContextConstraints(n ClusterPolicyController) (gpuv1.State, error)
logger.Info("Found Resource, updating...")
obj.ResourceVersion = found.ResourceVersion
- err = n.rec.Client.Update(ctx, obj)
+ err = n.client.Update(ctx, obj)
if err != nil {
logger.Info("Couldn't update", "Error", err)
return gpuv1.NotReady, err
@@ -4337,11 +4438,11 @@ func Service(n ClusterPolicyController) (gpuv1.State, error) {
obj.Namespace = n.operatorNamespace
- logger := n.rec.Log.WithValues("Service", obj.Name, "Namespace", obj.Namespace)
+ logger := n.logger.WithValues("Service", obj.Name, "Namespace", obj.Namespace)
// Check if state is disabled and cleanup resource if exists
if !n.isStateEnabled(n.stateNames[n.idx]) {
- err := n.rec.Client.Delete(ctx, obj)
+ err := n.client.Delete(ctx, obj)
if err != nil && !apierrors.IsNotFound(err) {
logger.Info("Couldn't delete", "Error", err)
return gpuv1.NotReady, err
@@ -4349,15 +4450,15 @@ func Service(n ClusterPolicyController) (gpuv1.State, error) {
return gpuv1.Disabled, nil
}
- if err := controllerutil.SetControllerReference(n.singleton, obj, n.rec.Scheme); err != nil {
+ if err := controllerutil.SetControllerReference(n.singleton, obj, n.scheme); err != nil {
return gpuv1.NotReady, err
}
found := &corev1.Service{}
- err := n.rec.Client.Get(ctx, types.NamespacedName{Namespace: obj.Namespace, Name: obj.Name}, found)
+ err := n.client.Get(ctx, types.NamespacedName{Namespace: obj.Namespace, Name: obj.Name}, found)
if err != nil && apierrors.IsNotFound(err) {
logger.Info("Not found, creating...")
- err = n.rec.Client.Create(ctx, obj)
+ err = n.client.Create(ctx, obj)
if err != nil {
logger.Info("Couldn't create", "Error", err)
return gpuv1.NotReady, err
@@ -4371,7 +4472,7 @@ func Service(n ClusterPolicyController) (gpuv1.State, error) {
obj.ResourceVersion = found.ResourceVersion
obj.Spec.ClusterIP = found.Spec.ClusterIP
- err = n.rec.Client.Update(ctx, obj)
+ err = n.client.Update(ctx, obj)
if err != nil {
logger.Info("Couldn't update", "Error", err)
return gpuv1.NotReady, err
@@ -4381,7 +4482,7 @@ func Service(n ClusterPolicyController) (gpuv1.State, error) {
func crdExists(n ClusterPolicyController, name string) (bool, error) {
crd := &apiextensionsv1.CustomResourceDefinition{}
- err := n.rec.Client.Get(n.ctx, client.ObjectKey{Name: name}, crd)
+ err := n.client.Get(n.ctx, client.ObjectKey{Name: name}, crd)
if err != nil && apierrors.IsNotFound(err) {
return false, nil
} else if err != nil {
@@ -4398,7 +4499,7 @@ func ServiceMonitor(n ClusterPolicyController) (gpuv1.State, error) {
obj := n.resources[state].ServiceMonitor.DeepCopy()
obj.Namespace = n.operatorNamespace
- logger := n.rec.Log.WithValues("ServiceMonitor", obj.Name, "Namespace", obj.Namespace)
+ logger := n.logger.WithValues("ServiceMonitor", obj.Name, "Namespace", obj.Namespace)
// Check if ServiceMonitor is a valid kind
serviceMonitorCRDExists, err := crdExists(n, ServiceMonitorCRDName)
@@ -4411,7 +4512,7 @@ func ServiceMonitor(n ClusterPolicyController) (gpuv1.State, error) {
if !serviceMonitorCRDExists {
return gpuv1.Ready, nil
}
- err := n.rec.Client.Delete(ctx, obj)
+ err := n.client.Delete(ctx, obj)
if err != nil && !apierrors.IsNotFound(err) {
logger.Info("Couldn't delete", "Error", err)
return gpuv1.NotReady, err
@@ -4426,7 +4527,7 @@ func ServiceMonitor(n ClusterPolicyController) (gpuv1.State, error) {
if !serviceMonitorCRDExists {
return gpuv1.Ready, nil
}
- err := n.rec.Client.Delete(ctx, obj)
+ err := n.client.Delete(ctx, obj)
if err != nil && !apierrors.IsNotFound(err) {
logger.Info("Couldn't delete", "Error", err)
return gpuv1.NotReady, err
@@ -4453,9 +4554,14 @@ func ServiceMonitor(n ClusterPolicyController) (gpuv1.State, error) {
obj.ObjectMeta.Labels[key] = value
}
}
-
if serviceMonitor.Relabelings != nil {
- obj.Spec.Endpoints[0].RelabelConfigs = serviceMonitor.Relabelings
+ relabelConfigs := make([]promv1.RelabelConfig, len(serviceMonitor.Relabelings))
+ for i, relabel := range serviceMonitor.Relabelings {
+ if relabel != nil {
+ relabelConfigs[i] = *relabel
+ }
+ }
+ obj.Spec.Endpoints[0].RelabelConfigs = relabelConfigs
}
}
if n.stateNames[state] == "state-operator-metrics" || n.stateNames[state] == "state-node-status-exporter" {
@@ -4474,15 +4580,15 @@ func ServiceMonitor(n ClusterPolicyController) (gpuv1.State, error) {
obj.Spec.NamespaceSelector.MatchNames[idx] = obj.Namespace
}
- if err := controllerutil.SetControllerReference(n.singleton, obj, n.rec.Scheme); err != nil {
+ if err := controllerutil.SetControllerReference(n.singleton, obj, n.scheme); err != nil {
return gpuv1.NotReady, err
}
found := &promv1.ServiceMonitor{}
- err = n.rec.Client.Get(ctx, types.NamespacedName{Namespace: obj.Namespace, Name: obj.Name}, found)
+ err = n.client.Get(ctx, types.NamespacedName{Namespace: obj.Namespace, Name: obj.Name}, found)
if err != nil && apierrors.IsNotFound(err) {
logger.Info("Not found, creating...")
- err = n.rec.Client.Create(ctx, obj)
+ err = n.client.Create(ctx, obj)
if err != nil {
logger.Info("Couldn't create", "Error", err)
return gpuv1.NotReady, err
@@ -4495,7 +4601,7 @@ func ServiceMonitor(n ClusterPolicyController) (gpuv1.State, error) {
logger.Info("Found Resource, updating...")
obj.ResourceVersion = found.ResourceVersion
- err = n.rec.Client.Update(ctx, obj)
+ err = n.client.Update(ctx, obj)
if err != nil {
logger.Info("Couldn't update", "Error", err)
return gpuv1.NotReady, err
@@ -4519,17 +4625,17 @@ func transformRuntimeClassLegacy(n ClusterPolicyController, spec nodev1.RuntimeC
obj.Labels = spec.Labels
- logger := n.rec.Log.WithValues("RuntimeClass", obj.Name)
+ logger := n.logger.WithValues("RuntimeClass", obj.Name)
- if err := controllerutil.SetControllerReference(n.singleton, obj, n.rec.Scheme); err != nil {
+ if err := controllerutil.SetControllerReference(n.singleton, obj, n.scheme); err != nil {
return gpuv1.NotReady, err
}
found := &nodev1beta1.RuntimeClass{}
- err := n.rec.Client.Get(ctx, types.NamespacedName{Namespace: "", Name: obj.Name}, found)
+ err := n.client.Get(ctx, types.NamespacedName{Namespace: "", Name: obj.Name}, found)
if err != nil && apierrors.IsNotFound(err) {
logger.Info("Not found, creating...")
- err = n.rec.Client.Create(ctx, obj)
+ err = n.client.Create(ctx, obj)
if err != nil {
logger.Info("Couldn't create", "Error", err)
return gpuv1.NotReady, err
@@ -4542,7 +4648,7 @@ func transformRuntimeClassLegacy(n ClusterPolicyController, spec nodev1.RuntimeC
logger.Info("Found Resource, updating...")
obj.ResourceVersion = found.ResourceVersion
- err = n.rec.Client.Update(ctx, obj)
+ err = n.client.Update(ctx, obj)
if err != nil {
logger.Info("Couldn't update", "Error", err)
return gpuv1.NotReady, err
@@ -4566,17 +4672,17 @@ func transformRuntimeClass(n ClusterPolicyController, spec nodev1.RuntimeClass)
obj.Labels = spec.Labels
- logger := n.rec.Log.WithValues("RuntimeClass", obj.Name)
+ logger := n.logger.WithValues("RuntimeClass", obj.Name)
- if err := controllerutil.SetControllerReference(n.singleton, obj, n.rec.Scheme); err != nil {
+ if err := controllerutil.SetControllerReference(n.singleton, obj, n.scheme); err != nil {
return gpuv1.NotReady, err
}
found := &nodev1.RuntimeClass{}
- err := n.rec.Client.Get(ctx, types.NamespacedName{Namespace: "", Name: obj.Name}, found)
+ err := n.client.Get(ctx, types.NamespacedName{Namespace: "", Name: obj.Name}, found)
if err != nil && apierrors.IsNotFound(err) {
logger.Info("Not found, creating...")
- err = n.rec.Client.Create(ctx, obj)
+ err = n.client.Create(ctx, obj)
if err != nil {
logger.Info("Couldn't create", "Error", err)
return gpuv1.NotReady, err
@@ -4589,7 +4695,7 @@ func transformRuntimeClass(n ClusterPolicyController, spec nodev1.RuntimeClass)
logger.Info("Found Resource, updating...")
obj.ResourceVersion = found.ResourceVersion
- err = n.rec.Client.Update(ctx, obj)
+ err = n.client.Update(ctx, obj)
if err != nil {
logger.Info("Couldn't update", "Error", err)
return gpuv1.NotReady, err
@@ -4605,20 +4711,20 @@ func transformKataRuntimeClasses(n ClusterPolicyController) (gpuv1.State, error)
// Get all existing Kata RuntimeClasses
opts := []client.ListOption{&client.MatchingLabels{"nvidia.com/kata-runtime-class": "true"}}
list := &nodev1.RuntimeClassList{}
- err := n.rec.Client.List(ctx, list, opts...)
+ err := n.client.List(ctx, list, opts...)
if err != nil {
- n.rec.Log.Info("Could not get Kata RuntimeClassList", err)
+ n.logger.Info("Could not get Kata RuntimeClassList", err)
return gpuv1.NotReady, fmt.Errorf("error getting kata RuntimeClassList: %v", err)
}
- n.rec.Log.V(1).Info("Kata RuntimeClasses", "Number", len(list.Items))
+ n.logger.V(1).Info("Kata RuntimeClasses", "Number", len(list.Items))
if !config.KataManager.IsEnabled() {
// Delete all Kata RuntimeClasses
- n.rec.Log.Info("Kata Manager disabled, deleting all Kata RuntimeClasses")
+ n.logger.Info("Kata Manager disabled, deleting all Kata RuntimeClasses")
for _, rc := range list.Items {
rc := rc
- n.rec.Log.V(1).Info("Deleting Kata RuntimeClass", "Name", rc.Name)
- err := n.rec.Client.Delete(ctx, &rc)
+ n.logger.V(1).Info("Deleting Kata RuntimeClass", "Name", rc.Name)
+ err := n.client.Delete(ctx, &rc)
if err != nil {
return gpuv1.NotReady, fmt.Errorf("error deleting kata RuntimeClass '%s': %v", rc.Name, err)
}
@@ -4636,8 +4742,8 @@ func transformKataRuntimeClasses(n ClusterPolicyController) (gpuv1.State, error)
for _, rc := range list.Items {
if _, ok := rcNames[rc.Name]; !ok {
rc := rc
- n.rec.Log.Info("Deleting Kata RuntimeClass", "Name", rc.Name)
- err := n.rec.Client.Delete(ctx, &rc)
+ n.logger.Info("Deleting Kata RuntimeClass", "Name", rc.Name)
+ err := n.client.Delete(ctx, &rc)
if err != nil {
return gpuv1.NotReady, fmt.Errorf("error deleting kata RuntimeClass '%s': %v", rc.Name, err)
}
@@ -4647,7 +4753,7 @@ func transformKataRuntimeClasses(n ClusterPolicyController) (gpuv1.State, error)
// Using kata RuntimClass template, create / update RuntimeClass objects specified in KataManager configuration
template := n.resources[state].RuntimeClasses[0]
for _, rc := range config.KataManager.Config.RuntimeClasses {
- logger := n.rec.Log.WithValues("RuntimeClass", rc.Name)
+ logger := n.logger.WithValues("RuntimeClass", rc.Name)
if rc.Name == config.Operator.RuntimeClass {
return gpuv1.NotReady, fmt.Errorf("error creating kata runtimeclass '%s' as it conflicts with the runtimeclass used for the gpu-operator operand pods itself", rc.Name)
@@ -4670,15 +4776,15 @@ func transformKataRuntimeClasses(n ClusterPolicyController) (gpuv1.State, error)
}
obj.Scheduling.NodeSelector = nodeSelector
- if err := controllerutil.SetControllerReference(n.singleton, &obj, n.rec.Scheme); err != nil {
+ if err := controllerutil.SetControllerReference(n.singleton, &obj, n.scheme); err != nil {
return gpuv1.NotReady, err
}
found := &nodev1.RuntimeClass{}
- err := n.rec.Client.Get(ctx, types.NamespacedName{Namespace: "", Name: obj.Name}, found)
+ err := n.client.Get(ctx, types.NamespacedName{Namespace: "", Name: obj.Name}, found)
if err != nil && apierrors.IsNotFound(err) {
logger.Info("Not found, creating...")
- err = n.rec.Client.Create(ctx, &obj)
+ err = n.client.Create(ctx, &obj)
if err != nil {
logger.Info("Couldn't create", "Error", err)
return gpuv1.NotReady, err
@@ -4691,7 +4797,7 @@ func transformKataRuntimeClasses(n ClusterPolicyController) (gpuv1.State, error)
logger.Info("Found Resource, updating...")
obj.ResourceVersion = found.ResourceVersion
- err = n.rec.Client.Update(ctx, &obj)
+ err = n.client.Update(ctx, &obj)
if err != nil {
logger.Info("Couldn't update", "Error", err)
return gpuv1.NotReady, err
@@ -4719,9 +4825,9 @@ func RuntimeClasses(n ClusterPolicyController) (gpuv1.State, error) {
// 'nvidia-legacy' runtime classes. Delete these objects if they were
// previously created.
if !n.singleton.Spec.CDI.IsEnabled() && (obj.Name == "nvidia-cdi" || obj.Name == "nvidia-legacy") {
- err := n.rec.Client.Delete(n.ctx, &obj)
+ err := n.client.Delete(n.ctx, &obj)
if err != nil && !apierrors.IsNotFound(err) {
- n.rec.Log.Info("Couldn't delete", "RuntimeClass", obj.Name, "Error", err)
+ n.logger.Info("Couldn't delete", "RuntimeClass", obj.Name, "Error", err)
return gpuv1.NotReady, err
}
continue
@@ -4744,17 +4850,17 @@ func PrometheusRule(n ClusterPolicyController) (gpuv1.State, error) {
obj := n.resources[state].PrometheusRule.DeepCopy()
obj.Namespace = n.operatorNamespace
- logger := n.rec.Log.WithValues("PrometheusRule", obj.Name)
+ logger := n.logger.WithValues("PrometheusRule", obj.Name)
- if err := controllerutil.SetControllerReference(n.singleton, obj, n.rec.Scheme); err != nil {
+ if err := controllerutil.SetControllerReference(n.singleton, obj, n.scheme); err != nil {
return gpuv1.NotReady, err
}
found := &promv1.PrometheusRule{}
- err := n.rec.Client.Get(ctx, types.NamespacedName{Namespace: obj.Namespace, Name: obj.Name}, found)
+ err := n.client.Get(ctx, types.NamespacedName{Namespace: obj.Namespace, Name: obj.Name}, found)
if err != nil && apierrors.IsNotFound(err) {
logger.Info("Not found, creating...")
- err = n.rec.Client.Create(ctx, obj)
+ err = n.client.Create(ctx, obj)
if err != nil {
logger.Info("Couldn't create", "Error", err)
return gpuv1.NotReady, err
@@ -4767,7 +4873,7 @@ func PrometheusRule(n ClusterPolicyController) (gpuv1.State, error) {
logger.Info("Found Resource, updating...")
obj.ResourceVersion = found.ResourceVersion
- err = n.rec.Client.Update(ctx, obj)
+ err = n.client.Update(ctx, obj)
if err != nil {
logger.Info("Couldn't update", "Error", err)
return gpuv1.NotReady, err
diff --git a/controllers/object_controls_test.go b/controllers/object_controls_test.go
index 44c8f4195..ae17a9f3d 100644
--- a/controllers/object_controls_test.go
+++ b/controllers/object_controls_test.go
@@ -34,6 +34,7 @@ import (
nodev1 "k8s.io/api/node/v1"
rbacv1 "k8s.io/api/rbac/v1"
schedv1 "k8s.io/api/scheduling/v1beta1"
+ apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
@@ -45,7 +46,7 @@ import (
"sigs.k8s.io/controller-runtime/pkg/client/fake"
"sigs.k8s.io/controller-runtime/pkg/log/zap"
- gpuv1 "github.com/NVIDIA/gpu-operator/api/v1"
+ gpuv1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1"
)
const (
@@ -55,6 +56,7 @@ const (
vGPUManagerAssetsPath = "assets/state-vgpu-manager/"
sandboxDevicePluginAssetsPath = "assets/state-sandbox-device-plugin"
devicePluginAssetsPath = "assets/state-device-plugin/"
+ dcgmExporterAssetsPath = "assets/state-dcgm-exporter/"
nfdNvidiaPCILabelKey = "feature.node.kubernetes.io/pci-10de.present"
upgradedKernel = "5.4.135-generic"
)
@@ -67,7 +69,6 @@ type testConfig struct {
var (
cfg *testConfig
clusterPolicyController ClusterPolicyController
- clusterPolicyReconciler ClusterPolicyReconciler
clusterPolicy gpuv1.ClusterPolicy
boolTrue *bool
boolFalse *bool
@@ -165,6 +166,9 @@ func setup() error {
if err := promv1.AddToScheme(s); err != nil {
return fmt.Errorf("unable to add promv1 schema: %v", err)
}
+ if err := apiextensionsv1.AddToScheme(s); err != nil {
+ return fmt.Errorf("unable to add apiextensionsv1 schema: %v", err)
+ }
if err := secv1.Install(s); err != nil {
return fmt.Errorf("unable to add secv1 schema: %v", err)
}
@@ -201,16 +205,12 @@ func setup() error {
}
ctrl.SetLogger(zap.New(zap.UseFlagOptions(&opts)))
- clusterPolicyReconciler = ClusterPolicyReconciler{
- Client: client,
- Log: ctrl.Log.WithName("controller").WithName("ClusterPolicy"),
- Scheme: s,
- }
-
clusterPolicyController = ClusterPolicyController{
ctx: ctx,
singleton: cp,
- rec: &clusterPolicyReconciler,
+ client: client,
+ logger: ctrl.Log.WithName("controller").WithName("ClusterPolicy"),
+ scheme: s,
}
clusterPolicyController.operatorMetrics = initOperatorMetrics(&clusterPolicyController)
@@ -271,7 +271,7 @@ func newCluster(nodes int, s *runtime.Scheme) (client.Client, error) {
// updateClusterPolicy updates an existing ClusterPolicy instance
func updateClusterPolicy(n *ClusterPolicyController, cp *gpuv1.ClusterPolicy) error {
n.singleton = cp
- err := n.rec.Client.Update(n.ctx, cp)
+ err := n.client.Update(n.ctx, cp)
if err != nil && !apierrors.IsConflict(err) {
return fmt.Errorf("failed to update ClusterPolicy: %v", err)
}
@@ -285,7 +285,7 @@ func removeState(n *ClusterPolicyController, idx int) error {
var err error
for _, res := range kubernetesResources {
// TODO: use n.operatorNamespace once MR is merged
- err = n.rec.Client.DeleteAllOf(n.ctx, res)
+ err = n.client.DeleteAllOf(n.ctx, res)
if err != nil {
return fmt.Errorf("error deleting objects from k8s client: %v", err)
}
@@ -391,6 +391,24 @@ func testDaemonsetCommon(t *testing.T, cp *gpuv1.ClusterPolicy, component string
if err != nil {
return nil, fmt.Errorf("unable to get mainCtrImage for sandbox-device-plugin: %v", err)
}
+ case "DCGMExporter":
+ spec = commonDaemonsetSpec{
+ repository: cp.Spec.DCGMExporter.Repository,
+ image: cp.Spec.DCGMExporter.Image,
+ version: cp.Spec.DCGMExporter.Version,
+ imagePullPolicy: cp.Spec.DCGMExporter.ImagePullPolicy,
+ imagePullSecrets: getImagePullSecrets(cp.Spec.DCGMExporter.ImagePullSecrets),
+ args: cp.Spec.DCGMExporter.Args,
+ env: cp.Spec.DCGMExporter.Env,
+ resources: cp.Spec.DCGMExporter.Resources,
+ }
+ dsLabel = "nvidia-dcgm-exporter"
+ mainCtrName = "nvidia-dcgm-exporter"
+ manifestFile = filepath.Join(cfg.root, dcgmExporterAssetsPath)
+ mainCtrImage, err = gpuv1.ImagePath(&cp.Spec.DCGMExporter)
+ if err != nil {
+ return nil, fmt.Errorf("unable to get mainCtrImage for dcgm-exporter: %v", err)
+ }
default:
return nil, fmt.Errorf("invalid component for testDaemonsetCommon(): %s", component)
}
@@ -414,7 +432,7 @@ func testDaemonsetCommon(t *testing.T, cp *gpuv1.ClusterPolicy, component string
client.MatchingLabels{"app": dsLabel},
}
list := &appsv1.DaemonSetList{}
- err = clusterPolicyController.rec.Client.List(ctx, list, opts...)
+ err = clusterPolicyController.client.List(ctx, list, opts...)
if err != nil {
t.Fatalf("could not get DaemonSetList from client: %v", err)
}
@@ -1001,3 +1019,117 @@ func TestIsOpenKernelModulesRequired(t *testing.T) {
})
}
}
+
+// getDCGMExporterTestInput return a ClusterPolicy instance for a particular
+// dcgm-exporter test case.
+func getDCGMExporterTestInput(testCase string) *gpuv1.ClusterPolicy {
+ cp := clusterPolicy.DeepCopy()
+
+ // Set some default values
+ cp.Spec.DCGMExporter.Repository = "nvcr.io/nvidia/k8s"
+ cp.Spec.DCGMExporter.Image = "dcgm-exporter"
+ cp.Spec.DCGMExporter.Version = "3.3.0-3.2.0-ubuntu22.04"
+ cp.Spec.DCGMExporter.ImagePullSecrets = []string{"ngc-secret"}
+
+ cp.Spec.Validator.Repository = "nvcr.io/nvidia/cloud-native"
+ cp.Spec.Validator.Image = "gpu-operator-validator"
+ cp.Spec.Validator.Version = "v23.9.2"
+ cp.Spec.Validator.ImagePullSecrets = []string{"ngc-secret"}
+
+ switch testCase {
+ case "default":
+ // Do nothing
+ case "standalone-dcgm":
+ dcgmEnabled := true
+ cp.Spec.DCGM.Enabled = &dcgmEnabled
+ default:
+ return nil
+ }
+
+ return cp
+}
+
+// getDCGMExporterTestOutput returns a map containing expected output for
+// dcgm-exporter test case.
+func getDCGMExporterTestOutput(testCase string) map[string]interface{} {
+ // default output
+ output := map[string]interface{}{
+ "numDaemonsets": 1,
+ "dcgmExporterImage": "nvcr.io/nvidia/k8s/dcgm-exporter:3.3.0-3.2.0-ubuntu22.04",
+ "imagePullSecret": "ngc-secret",
+ }
+
+ switch testCase {
+ case "default":
+ output["env"] = map[string]string{}
+ case "standalone-dcgm":
+ output["env"] = map[string]string{
+ "DCGM_REMOTE_HOSTENGINE_INFO": "nvidia-dcgm:5555",
+ }
+ default:
+ return nil
+ }
+
+ return output
+}
+
+// TestDCGMExporter tests that the GPU Operator correctly deploys the dcgm-exporter daemonset
+// under various scenarios/config options
+func TestDCGMExporter(t *testing.T) {
+ testCases := []struct {
+ description string
+ clusterPolicy *gpuv1.ClusterPolicy
+ output map[string]interface{}
+ }{
+ {
+ "Default",
+ getDCGMExporterTestInput("default"),
+ getDCGMExporterTestOutput("default"),
+ },
+ {
+ "StandalongDCGM",
+ getDCGMExporterTestInput("standalone-dcgm"),
+ getDCGMExporterTestOutput("standalone-dcgm"),
+ },
+ }
+
+ for _, tc := range testCases {
+ t.Run(tc.description, func(t *testing.T) {
+ ds, err := testDaemonsetCommon(t, tc.clusterPolicy, "DCGMExporter", tc.output["numDaemonsets"].(int))
+ if err != nil {
+ t.Fatalf("error in testDaemonsetCommon(): %v", err)
+ }
+ if ds == nil {
+ return
+ }
+
+ dcgmExporterImage := ""
+ for _, container := range ds.Spec.Template.Spec.Containers {
+ if container.Name == "nvidia-dcgm-exporter" {
+ dcgmExporterImage = container.Image
+ break
+ }
+ }
+ for key, value := range tc.output["env"].(map[string]string) {
+ envFound := false
+ for _, envVar := range ds.Spec.Template.Spec.Containers[0].Env {
+ if envVar.Name == key && envVar.Value == value {
+ envFound = true
+ }
+ }
+ if !envFound {
+ t.Fatalf("Expected env is not set for daemonset nvidia-dcgm-exporter %s->%s", key, value)
+ }
+ }
+
+ require.Equal(t, tc.output["dcgmExporterImage"], dcgmExporterImage, "Unexpected configuration for dcgm-exporter image")
+
+ // cleanup by deleting all kubernetes objects
+ err = removeState(&clusterPolicyController, clusterPolicyController.idx-1)
+ if err != nil {
+ t.Fatalf("error removing state %v:", err)
+ }
+ clusterPolicyController.idx--
+ })
+ }
+}
diff --git a/controllers/resource_manager.go b/controllers/resource_manager.go
index af4c1dbad..2789bfe3d 100644
--- a/controllers/resource_manager.go
+++ b/controllers/resource_manager.go
@@ -67,7 +67,7 @@ func filePathWalkDir(n *ClusterPolicyController, root string) ([]string, error)
var files []string
err := filepath.Walk(root, func(path string, info os.FileInfo, err error) error {
if err != nil {
- n.rec.Log.V(1).Info("error in filepath.Walk on %s: %v", root, err)
+ n.logger.V(1).Info("error in filepath.Walk on %s: %v", root, err)
return nil
}
if !info.IsDir() {
@@ -103,7 +103,7 @@ func addResourcesControls(n *ClusterPolicyController, path string) (Resources, c
res := Resources{}
ctrl := controlFunc{}
- n.rec.Log.Info("Getting assets from: ", "path:", path)
+ n.logger.Info("Getting assets from: ", "path:", path)
manifests := getAssetsFrom(n, path, n.openshift)
s := json.NewSerializerWithOptions(json.DefaultMetaFactory, scheme.Scheme,
@@ -115,7 +115,7 @@ func addResourcesControls(n *ClusterPolicyController, path string) (Resources, c
slce := strings.Split(kind, ":")
kind = strings.TrimSpace(slce[1])
- n.rec.Log.V(1).Info("Looking for ", "Kind", kind, "in path:", path)
+ n.logger.V(1).Info("Looking for ", "Kind", kind, "in path:", path)
switch kind {
case "ServiceAccount":
@@ -181,7 +181,7 @@ func addResourcesControls(n *ClusterPolicyController, path string) (Resources, c
panicIfError(err)
ctrl = append(ctrl, PrometheusRule)
default:
- n.rec.Log.Info("Unknown Resource", "Manifest", m, "Kind", kind)
+ n.logger.Info("Unknown Resource", "Manifest", m, "Kind", kind)
}
}
diff --git a/controllers/state_manager.go b/controllers/state_manager.go
index 796694780..9c1028ebc 100644
--- a/controllers/state_manager.go
+++ b/controllers/state_manager.go
@@ -23,23 +23,19 @@ import (
"path/filepath"
"strings"
- secv1 "github.com/openshift/api/security/v1"
- promv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
- utilruntime "k8s.io/apimachinery/pkg/util/runtime"
-
- gpuv1 "github.com/NVIDIA/gpu-operator/api/v1"
-
"github.com/go-logr/logr"
apiconfigv1 "github.com/openshift/api/config/v1"
- apiimagev1 "github.com/openshift/api/image/v1"
configv1 "github.com/openshift/client-go/config/clientset/versioned/typed/config/v1"
"golang.org/x/mod/semver"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+ "k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/discovery"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/config"
+
+ gpuv1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1"
)
const (
@@ -145,15 +141,18 @@ type OpenShiftDriverToolkit struct {
// ClusterPolicyController represents clusterpolicy controller spec for GPU operator
type ClusterPolicyController struct {
+ client client.Client
+
ctx context.Context
singleton *gpuv1.ClusterPolicy
+ logger logr.Logger
+ scheme *runtime.Scheme
operatorNamespace string
resources []Resources
controls []controlFunc
stateNames []string
operatorMetrics *OperatorMetrics
- rec *ClusterPolicyReconciler
idx int
kernelVersionMap map[string]string
currentKernelVersion string
@@ -425,7 +424,7 @@ func (n *ClusterPolicyController) applyDriverAutoUpgradeAnnotation() error {
// fetch all nodes
opts := []client.ListOption{}
list := &corev1.NodeList{}
- err := n.rec.Client.List(n.ctx, list, opts...)
+ err := n.client.List(n.ctx, list, opts...)
if err != nil {
return fmt.Errorf("Unable to list nodes to check annotations, err %s", err.Error())
}
@@ -465,9 +464,9 @@ func (n *ClusterPolicyController) applyDriverAutoUpgradeAnnotation() error {
// remove annotation if value is null
delete(node.ObjectMeta.Annotations, driverAutoUpgradeAnnotationKey)
}
- err := n.rec.Client.Update(n.ctx, &node)
+ err := n.client.Update(n.ctx, &node)
if err != nil {
- n.rec.Log.Info("Failed to update node state annotation on a node",
+ n.logger.Info("Failed to update node state annotation on a node",
"node", node.Name,
"annotationKey", driverAutoUpgradeAnnotationKey,
"annotationValue", value, "error", err)
@@ -484,7 +483,7 @@ func (n *ClusterPolicyController) labelGPUNodes() (bool, int, error) {
// fetch all nodes
opts := []client.ListOption{}
list := &corev1.NodeList{}
- err := n.rec.Client.List(ctx, list, opts...)
+ err := n.client.List(ctx, list, opts...)
if err != nil {
return false, 0, fmt.Errorf("Unable to list nodes to check labels, err %s", err.Error())
}
@@ -501,16 +500,16 @@ func (n *ClusterPolicyController) labelGPUNodes() (bool, int, error) {
}
config, err := getWorkloadConfig(labels, n.sandboxEnabled)
if err != nil {
- n.rec.Log.Info("WARNING: failed to get GPU workload config for node; using default",
+ n.logger.Info("WARNING: failed to get GPU workload config for node; using default",
"NodeName", node.ObjectMeta.Name, "SandboxEnabled", n.sandboxEnabled,
"Error", err, "defaultGPUWorkloadConfig", defaultGPUWorkloadConfig)
}
- n.rec.Log.Info("GPU workload configuration", "NodeName", node.ObjectMeta.Name, "GpuWorkloadConfig", config)
- gpuWorkloadConfig := &gpuWorkloadConfiguration{config, node.ObjectMeta.Name, n.rec.Log}
+ n.logger.Info("GPU workload configuration", "NodeName", node.ObjectMeta.Name, "GpuWorkloadConfig", config)
+ gpuWorkloadConfig := &gpuWorkloadConfiguration{config, node.ObjectMeta.Name, n.logger}
if !hasCommonGPULabel(labels) && hasGPULabels(labels) {
- n.rec.Log.Info("Node has GPU(s)", "NodeName", node.ObjectMeta.Name)
+ n.logger.Info("Node has GPU(s)", "NodeName", node.ObjectMeta.Name)
// label the node with common Nvidia GPU label
- n.rec.Log.Info("Setting node label", "NodeName", node.ObjectMeta.Name, "Label", commonGPULabelKey, "Value", commonGPULabelValue)
+ n.logger.Info("Setting node label", "NodeName", node.ObjectMeta.Name, "Label", commonGPULabelKey, "Value", commonGPULabelValue)
labels[commonGPULabelKey] = commonGPULabelValue
// update node labels
node.SetLabels(labels)
@@ -518,10 +517,10 @@ func (n *ClusterPolicyController) labelGPUNodes() (bool, int, error) {
} else if hasCommonGPULabel(labels) && !hasGPULabels(labels) {
// previously labelled node and no longer has GPU's
// label node to reset common Nvidia GPU label
- n.rec.Log.Info("Node no longer has GPUs", "NodeName", node.ObjectMeta.Name)
- n.rec.Log.Info("Setting node label", "Label", commonGPULabelKey, "Value", "false")
+ n.logger.Info("Node no longer has GPUs", "NodeName", node.ObjectMeta.Name)
+ n.logger.Info("Setting node label", "Label", commonGPULabelKey, "Value", "false")
labels[commonGPULabelKey] = "false"
- n.rec.Log.Info("Disabling all operands for node", "NodeName", node.ObjectMeta.Name)
+ n.logger.Info("Disabling all operands for node", "NodeName", node.ObjectMeta.Name)
removeAllGPUStateLabels(labels)
// update node labels
node.SetLabels(labels)
@@ -530,16 +529,16 @@ func (n *ClusterPolicyController) labelGPUNodes() (bool, int, error) {
if hasCommonGPULabel(labels) {
// If node has GPU, then add state labels as per the workload type
- n.rec.Log.Info("Checking GPU state labels on the node", "NodeName", node.ObjectMeta.Name)
+ n.logger.Info("Checking GPU state labels on the node", "NodeName", node.ObjectMeta.Name)
if gpuWorkloadConfig.updateGPUStateLabels(labels) {
- n.rec.Log.Info("Applying correct GPU state labels to the node", "NodeName", node.ObjectMeta.Name)
+ n.logger.Info("Applying correct GPU state labels to the node", "NodeName", node.ObjectMeta.Name)
node.SetLabels(labels)
updateLabels = true
}
// Disable MIG on the node explicitly where no MIG config is specified
if n.singleton.Spec.MIGManager.IsEnabled() && hasMIGCapableGPU(labels) && !hasMIGConfigLabel(labels) {
if n.singleton.Spec.MIGManager.Config != nil && n.singleton.Spec.MIGManager.Config.Default == migConfigDisabledValue {
- n.rec.Log.Info("Setting MIG config label", "NodeName", node.ObjectMeta.Name, "Label", migConfigLabelKey, "Value", migConfigDisabledValue)
+ n.logger.Info("Setting MIG config label", "NodeName", node.ObjectMeta.Name, "Label", migConfigLabelKey, "Value", migConfigDisabledValue)
labels[migConfigLabelKey] = migConfigDisabledValue
node.SetLabels(labels)
updateLabels = true
@@ -553,12 +552,12 @@ func (n *ClusterPolicyController) labelGPUNodes() (bool, int, error) {
rhcosVersion, ok := labels[nfdOSTreeVersionLabelKey]
if ok {
n.ocpDriverToolkit.rhcosVersions[rhcosVersion] = true
- n.rec.Log.V(1).Info("GPU node running RHCOS",
+ n.logger.V(1).Info("GPU node running RHCOS",
"nodeName", node.ObjectMeta.Name,
"RHCOS version", rhcosVersion,
)
} else {
- n.rec.Log.Info("node doesn't have the proper NFD RHCOS version label.",
+ n.logger.Info("node doesn't have the proper NFD RHCOS version label.",
"nodeName", node.ObjectMeta.Name,
"nfdLabel", nfdOSTreeVersionLabelKey,
)
@@ -568,7 +567,7 @@ func (n *ClusterPolicyController) labelGPUNodes() (bool, int, error) {
// update node with the latest labels
if updateLabels {
- err = n.rec.Client.Update(ctx, &node)
+ err = n.client.Update(ctx, &node)
if err != nil {
return false, 0, fmt.Errorf("Unable to label node %s for the GPU Operator deployment, err %s",
node.ObjectMeta.Name, err.Error())
@@ -576,7 +575,7 @@ func (n *ClusterPolicyController) labelGPUNodes() (bool, int, error) {
}
} // end node loop
- n.rec.Log.Info("Number of nodes with GPU label", "NodeCount", gpuNodesTotal)
+ n.logger.Info("Number of nodes with GPU label", "NodeCount", gpuNodesTotal)
n.operatorMetrics.gpuNodesTotal.Set(float64(gpuNodesTotal))
return clusterHasNFDLabels, gpuNodesTotal, nil
}
@@ -606,7 +605,7 @@ func (n *ClusterPolicyController) setPodSecurityLabelsForNamespace() error {
// The GPU Operator is not installed in the suggested
// namespace, so the namespace may be shared with other
// untrusted operators. Do not set Pod Security Admission labels.
- n.rec.Log.Info("GPU Operator is not installed in the suggested namespace. Not setting Pod Security Admission labels for namespace",
+ n.logger.Info("GPU Operator is not installed in the suggested namespace. Not setting Pod Security Admission labels for namespace",
"namespace", namespaceName,
"suggested namespace", ocpSuggestedNamespace)
return nil
@@ -614,7 +613,7 @@ func (n *ClusterPolicyController) setPodSecurityLabelsForNamespace() error {
ns := &corev1.Namespace{}
opts := client.ObjectKey{Name: namespaceName}
- err := n.rec.Client.Get(ctx, opts, ns)
+ err := n.client.Get(ctx, opts, ns)
if err != nil {
return fmt.Errorf("ERROR: could not get Namespace %s from client: %v", namespaceName, err)
}
@@ -640,7 +639,7 @@ func (n *ClusterPolicyController) setPodSecurityLabelsForNamespace() error {
return nil
}
- err = n.rec.Client.Patch(ctx, ns, patch)
+ err = n.client.Patch(ctx, ns, patch)
if err != nil {
return fmt.Errorf("unable to label namespace %s with pod security levels: %v", namespaceName, err)
}
@@ -657,7 +656,7 @@ func (n *ClusterPolicyController) ocpEnsureNamespaceMonitoring() error {
// namespace, so the namespace may be shared with other
// untrusted operators. Do not enable namespace monitoring in
// this case, as per OpenShift/Prometheus best practices.
- n.rec.Log.Info("GPU Operator not installed in the suggested namespace, skipping namespace monitoring verification",
+ n.logger.Info("GPU Operator not installed in the suggested namespace, skipping namespace monitoring verification",
"namespace", namespaceName,
"suggested namespace", ocpSuggestedNamespace)
return nil
@@ -665,7 +664,7 @@ func (n *ClusterPolicyController) ocpEnsureNamespaceMonitoring() error {
ns := &corev1.Namespace{}
opts := client.ObjectKey{Name: namespaceName}
- err := n.rec.Client.Get(ctx, opts, ns)
+ err := n.client.Get(ctx, opts, ns)
if err != nil {
return fmt.Errorf("ERROR: could not get Namespace %s from client: %v", namespaceName, err)
}
@@ -679,7 +678,7 @@ func (n *ClusterPolicyController) ocpEnsureNamespaceMonitoring() error {
} else {
msg = "WARNING: OpenShift monitoring currently disabled on user request"
}
- n.rec.Log.Info(msg,
+ n.logger.Info(msg,
"namespace", namespaceName,
"label", ocpNamespaceMonitoringLabelKey,
"value", val,
@@ -689,16 +688,16 @@ func (n *ClusterPolicyController) ocpEnsureNamespaceMonitoring() error {
}
// label not defined, enable monitoring
- n.rec.Log.Info("Enabling OpenShift monitoring")
- n.rec.Log.V(1).Info("Adding monitoring label to the operator namespace",
+ n.logger.Info("Enabling OpenShift monitoring")
+ n.logger.V(1).Info("Adding monitoring label to the operator namespace",
"namespace", namespaceName,
"label", ocpNamespaceMonitoringLabelKey,
"value", ocpNamespaceMonitoringLabelValue)
- n.rec.Log.Info("Monitoring can be disabled by setting the namespace label " +
+ n.logger.Info("Monitoring can be disabled by setting the namespace label " +
ocpNamespaceMonitoringLabelKey + "=false")
patch := client.MergeFrom(ns.DeepCopy())
ns.ObjectMeta.Labels[ocpNamespaceMonitoringLabelKey] = ocpNamespaceMonitoringLabelValue
- err = n.rec.Client.Patch(ctx, ns, patch)
+ err = n.client.Patch(ctx, ns, patch)
if err != nil {
return fmt.Errorf("Unable to label namespace %s for the GPU Operator monitoring, err %s",
namespaceName, err.Error())
@@ -724,7 +723,7 @@ func (n *ClusterPolicyController) getRuntime() error {
client.MatchingLabels{commonGPULabelKey: "true"},
}
list := &corev1.NodeList{}
- err := n.rec.Client.List(ctx, list, opts...)
+ err := n.client.List(ctx, list, opts...)
if err != nil {
return fmt.Errorf("Unable to list nodes prior to checking container runtime: %v", err)
}
@@ -733,7 +732,7 @@ func (n *ClusterPolicyController) getRuntime() error {
for _, node := range list.Items {
rt, err := getRuntimeString(node)
if err != nil {
- n.rec.Log.Info(fmt.Sprintf("Unable to get runtime info for node %s: %v", node.Name, err))
+ n.logger.Info(fmt.Sprintf("Unable to get runtime info for node %s: %v", node.Name, err))
continue
}
runtime = rt
@@ -744,7 +743,7 @@ func (n *ClusterPolicyController) getRuntime() error {
}
if runtime.String() == "" {
- n.rec.Log.Info("Unable to get runtime info from the cluster, defaulting to containerd")
+ n.logger.Info("Unable to get runtime info from the cluster, defaulting to containerd")
runtime = gpuv1.Containerd
}
n.runtime = runtime
@@ -754,14 +753,16 @@ func (n *ClusterPolicyController) getRuntime() error {
func (n *ClusterPolicyController) init(ctx context.Context, reconciler *ClusterPolicyReconciler, clusterPolicy *gpuv1.ClusterPolicy) error {
n.singleton = clusterPolicy
n.ctx = ctx
- n.rec = reconciler
n.idx = 0
+ n.logger = reconciler.Log
+ n.client = reconciler.Client
+ n.scheme = reconciler.Scheme
if len(n.controls) == 0 {
clusterPolicyCtrl.operatorNamespace = os.Getenv("OPERATOR_NAMESPACE")
if clusterPolicyCtrl.operatorNamespace == "" {
- n.rec.Log.Error(nil, "OPERATOR_NAMESPACE environment variable not set, cannot proceed")
+ n.logger.Error(nil, "OPERATOR_NAMESPACE environment variable not set, cannot proceed")
// we cannot do anything without the operator namespace,
// let the operator Pod run into `CrashloopBackOff`
@@ -782,15 +783,10 @@ func (n *ClusterPolicyController) init(ctx context.Context, reconciler *ClusterP
return fmt.Errorf("k8s version detected '%s' is not a valid semantic version", k8sVersion)
}
n.k8sVersion = k8sVersion
- n.rec.Log.Info("Kubernetes version detected", "version", k8sVersion)
-
- utilruntime.Must(promv1.AddToScheme(reconciler.Scheme))
- utilruntime.Must(secv1.Install(reconciler.Scheme))
- utilruntime.Must(apiconfigv1.Install(reconciler.Scheme))
- utilruntime.Must(apiimagev1.Install(reconciler.Scheme))
+ n.logger.Info("Kubernetes version detected", "version", k8sVersion)
n.operatorMetrics = initOperatorMetrics(n)
- n.rec.Log.Info("Operator metrics initialized.")
+ n.logger.Info("Operator metrics initialized.")
addState(n, "/opt/gpu-operator/pre-requisites")
addState(n, "/opt/gpu-operator/state-operator-metrics")
@@ -821,13 +817,13 @@ func (n *ClusterPolicyController) init(ctx context.Context, reconciler *ClusterP
// workload configuration
defaultWorkload := clusterPolicy.Spec.SandboxWorkloads.DefaultWorkload
if isValidWorkloadConfig(defaultWorkload) {
- n.rec.Log.Info("Default GPU workload is overridden in ClusterPolicy", "DefaultWorkload", defaultWorkload)
+ n.logger.Info("Default GPU workload is overridden in ClusterPolicy", "DefaultWorkload", defaultWorkload)
defaultGPUWorkloadConfig = defaultWorkload
}
} else {
n.sandboxEnabled = false
}
- n.rec.Log.Info("Sandbox workloads", "Enabled", n.sandboxEnabled, "DefaultWorkload", defaultGPUWorkloadConfig)
+ n.logger.Info("Sandbox workloads", "Enabled", n.sandboxEnabled, "DefaultWorkload", defaultGPUWorkloadConfig)
if n.openshift != "" && (n.singleton.Spec.Operator.UseOpenShiftDriverToolkit == nil ||
*n.singleton.Spec.Operator.UseOpenShiftDriverToolkit) {
@@ -849,12 +845,12 @@ func (n *ClusterPolicyController) init(ctx context.Context, reconciler *ClusterP
if clusterPolicy.Spec.PSA.IsEnabled() {
// label namespace with Pod Security Admission levels
- n.rec.Log.Info("Pod Security is enabled. Adding labels to GPU Operator namespace", "namespace", n.operatorNamespace)
+ n.logger.Info("Pod Security is enabled. Adding labels to GPU Operator namespace", "namespace", n.operatorNamespace)
err := n.setPodSecurityLabelsForNamespace()
if err != nil {
return err
}
- n.rec.Log.Info("Pod Security Admission labels added to GPU Operator namespace", "namespace", n.operatorNamespace)
+ n.logger.Info("Pod Security Admission labels added to GPU Operator namespace", "namespace", n.operatorNamespace)
}
// fetch all nodes and label gpu nodes
@@ -876,13 +872,13 @@ func (n *ClusterPolicyController) init(ctx context.Context, reconciler *ClusterP
if err != nil {
return err
}
- n.rec.Log.Info(fmt.Sprintf("Using container runtime: %s", n.runtime.String()))
+ n.logger.Info(fmt.Sprintf("Using container runtime: %s", n.runtime.String()))
// fetch all kernel versions from the GPU nodes in the cluster
if n.singleton.Spec.Driver.IsEnabled() && n.singleton.Spec.Driver.UsePrecompiledDrivers() {
kernelVersionMap, err := n.getKernelVersionsMap()
if err != nil {
- n.rec.Log.Info("Unable to obtain all kernel versions of the GPU nodes in the cluster", "err", err)
+ n.logger.Info("Unable to obtain all kernel versions of the GPU nodes in the cluster", "err", err)
return err
}
n.kernelVersionMap = kernelVersionMap
@@ -906,7 +902,7 @@ func (n *ClusterPolicyController) initOCPParams() error {
} else if n.ocpDriverToolkit.requested {
hasImageStream, err := ocpHasDriverToolkitImageStream(n)
if err != nil {
- n.rec.Log.Info("ocpHasDriverToolkitImageStream", "err", err)
+ n.logger.Info("ocpHasDriverToolkitImageStream", "err", err)
return err
}
hasCompatibleNFD := len(n.ocpDriverToolkit.rhcosVersions) != 0
@@ -917,11 +913,11 @@ func (n *ClusterPolicyController) initOCPParams() error {
} else {
n.operatorMetrics.openshiftDriverToolkitEnabled.Set(openshiftDriverToolkitNotPossible)
}
- n.rec.Log.Info("OpenShift Driver Toolkit requested",
+ n.logger.Info("OpenShift Driver Toolkit requested",
"hasCompatibleNFD", hasCompatibleNFD,
"hasDriverToolkitImageStream", hasImageStream)
- n.rec.Log.Info("OpenShift Driver Toolkit",
+ n.logger.Info("OpenShift Driver Toolkit",
"enabled", n.ocpDriverToolkit.enabled)
if hasImageStream {
@@ -954,7 +950,7 @@ func (n *ClusterPolicyController) step() (gpuv1.State, error) {
// updating / deleting objects owned by another controller.
if (n.stateNames[n.idx] == "state-driver" || n.stateNames[n.idx] == "state-vgpu-manager") &&
n.singleton.Spec.Driver.UseNvdiaDriverCRDType() {
- n.rec.Log.Info("NVIDIADriver CRD is enabled, cleaning up all NVIDIA driver daemonsets owned by ClusterPolicy")
+ n.logger.Info("NVIDIADriver CRD is enabled, cleaning up all NVIDIA driver daemonsets owned by ClusterPolicy")
n.idx++
// Cleanup all driver daemonsets owned by ClusterPolicy.
err := n.cleanupAllDriverDaemonSets(n.ctx)
@@ -1032,7 +1028,7 @@ func (n ClusterPolicyController) isStateEnabled(stateName string) bool {
case "state-operator-metrics":
return true
default:
- n.rec.Log.Error(nil, "invalid state passed", "stateName", stateName)
+ n.logger.Error(nil, "invalid state passed", "stateName", stateName)
return false
}
}
diff --git a/controllers/state_manager_test.go b/controllers/state_manager_test.go
index def5fea65..bdec856e0 100644
--- a/controllers/state_manager_test.go
+++ b/controllers/state_manager_test.go
@@ -21,7 +21,7 @@ import (
corev1 "k8s.io/api/core/v1"
- gpuv1 "github.com/NVIDIA/gpu-operator/api/v1"
+ gpuv1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1"
)
func TestGetRuntimeString(t *testing.T) {
diff --git a/controllers/transforms_test.go b/controllers/transforms_test.go
new file mode 100644
index 000000000..83b504b7e
--- /dev/null
+++ b/controllers/transforms_test.go
@@ -0,0 +1,1165 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package controllers
+
+import (
+ "path/filepath"
+ "testing"
+
+ "github.com/stretchr/testify/require"
+ appsv1 "k8s.io/api/apps/v1"
+ corev1 "k8s.io/api/core/v1"
+ "k8s.io/apimachinery/pkg/api/resource"
+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+ "k8s.io/apimachinery/pkg/util/intstr"
+ ctrl "sigs.k8s.io/controller-runtime"
+
+ gpuv1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1"
+)
+
+// Daemonset is a DaemonSet wrapper used for testing
+type Daemonset struct {
+ *appsv1.DaemonSet
+}
+
+func NewDaemonset() Daemonset {
+ ds := &appsv1.DaemonSet{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-ds",
+ Namespace: "test-ns",
+ },
+ Spec: appsv1.DaemonSetSpec{
+ Template: corev1.PodTemplateSpec{
+ Spec: corev1.PodSpec{},
+ },
+ },
+ }
+ return Daemonset{ds}
+}
+
+func (d Daemonset) WithHostPathVolume(name string, path string, hostPathType *corev1.HostPathType) Daemonset {
+ volume := corev1.Volume{
+ Name: name,
+ VolumeSource: corev1.VolumeSource{
+ HostPath: &corev1.HostPathVolumeSource{
+ Path: path,
+ Type: hostPathType,
+ },
+ },
+ }
+ d.Spec.Template.Spec.Volumes = append(d.Spec.Template.Spec.Volumes, volume)
+ return d
+}
+
+func (d Daemonset) WithInitContainer(container corev1.Container) Daemonset {
+ d.Spec.Template.Spec.InitContainers = append(d.Spec.Template.Spec.InitContainers, container)
+ return d
+}
+
+func (d Daemonset) WithContainer(container corev1.Container) Daemonset {
+ d.Spec.Template.Spec.Containers = append(d.Spec.Template.Spec.Containers, container)
+ return d
+}
+
+func (d Daemonset) WithName(name string) Daemonset {
+ d.Name = name
+ return d
+}
+
+func (d Daemonset) WithUpdateStrategy(strategy appsv1.DaemonSetUpdateStrategy) Daemonset {
+ d.Spec.UpdateStrategy = strategy
+ return d
+}
+
+func (d Daemonset) WithPriorityClass(name string) Daemonset {
+ d.Spec.Template.Spec.PriorityClassName = name
+ return d
+}
+
+func (d Daemonset) WithTolerations(tolerations []corev1.Toleration) Daemonset {
+ d.Spec.Template.Spec.Tolerations = tolerations
+ return d
+}
+
+func (d Daemonset) WithPodLabels(labels map[string]string) Daemonset {
+ d.Spec.Template.Labels = labels
+ return d
+}
+
+func (d Daemonset) WithPodAnnotations(annotations map[string]string) Daemonset {
+ d.Spec.Template.Annotations = annotations
+ return d
+}
+
+func (d Daemonset) WithPullSecret(secret string) Daemonset {
+ d.Spec.Template.Spec.ImagePullSecrets = []corev1.LocalObjectReference{{Name: secret}}
+ return d
+}
+
+func (d Daemonset) WithRuntimeClassName(name string) Daemonset {
+ d.Spec.Template.Spec.RuntimeClassName = &name
+ return d
+}
+
+// Pod is a Pod wrapper used for testing
+type Pod struct {
+ *corev1.Pod
+}
+
+func NewPod() Pod {
+ pod := &corev1.Pod{
+ Spec: corev1.PodSpec{},
+ }
+ return Pod{pod}
+}
+
+func (p Pod) WithInitContainer(container corev1.Container) Pod {
+ p.Spec.InitContainers = append(p.Spec.InitContainers, container)
+ return p
+}
+
+func (p Pod) WithRuntimeClassName(name string) Pod {
+ p.Spec.RuntimeClassName = &name
+ return p
+}
+
+func TestTransformForHostRoot(t *testing.T) {
+ hostRootVolumeName := "host-root"
+ hostDevCharVolumeName := "host-dev-char"
+ testCases := []struct {
+ description string
+ hostRoot string
+ input Daemonset
+ expectedOutput Daemonset
+ }{
+ {
+ description: "no host root or host-dev-char volume in daemonset",
+ hostRoot: "/custom-root",
+ input: NewDaemonset(),
+ expectedOutput: NewDaemonset(),
+ },
+ {
+ description: "empty host root is a no-op",
+ hostRoot: "",
+ input: NewDaemonset().
+ WithHostPathVolume(hostRootVolumeName, "/", nil).
+ WithHostPathVolume(hostDevCharVolumeName, "/", nil),
+ expectedOutput: NewDaemonset().
+ WithHostPathVolume(hostRootVolumeName, "/", nil).
+ WithHostPathVolume(hostDevCharVolumeName, "/", nil),
+ },
+ {
+ description: "custom host root with host-root and host-dev-char volumes",
+ hostRoot: "/custom-root",
+ input: NewDaemonset().
+ WithHostPathVolume(hostRootVolumeName, "/", nil).
+ WithHostPathVolume(hostDevCharVolumeName, "/", nil).
+ WithContainer(corev1.Container{Name: "test-ctr"}),
+ expectedOutput: NewDaemonset().
+ WithHostPathVolume(hostRootVolumeName, "/custom-root", nil).
+ WithHostPathVolume(hostDevCharVolumeName, "/custom-root/dev/char", nil).
+ WithContainer(corev1.Container{Name: "test-ctr", Env: []corev1.EnvVar{{Name: HostRootEnvName, Value: "/custom-root"}}}),
+ },
+ {
+ description: "custom host root with host-root volume",
+ hostRoot: "/custom-root",
+ input: NewDaemonset().
+ WithHostPathVolume(hostRootVolumeName, "/", nil).
+ WithContainer(corev1.Container{Name: "test-ctr"}),
+ expectedOutput: NewDaemonset().
+ WithHostPathVolume(hostRootVolumeName, "/custom-root", nil).
+ WithContainer(corev1.Container{Name: "test-ctr", Env: []corev1.EnvVar{{Name: HostRootEnvName, Value: "/custom-root"}}}),
+ },
+ {
+ description: "custom host root with host-dev-char volume",
+ hostRoot: "/custom-root",
+ input: NewDaemonset().
+ WithHostPathVolume(hostDevCharVolumeName, "/", nil),
+ expectedOutput: NewDaemonset().
+ WithHostPathVolume(hostDevCharVolumeName, "/custom-root/dev/char", nil),
+ },
+ }
+
+ for _, tc := range testCases {
+ t.Run(tc.description, func(t *testing.T) {
+ transformForHostRoot(tc.input.DaemonSet, tc.hostRoot)
+ require.EqualValues(t, tc.expectedOutput, tc.input)
+ })
+ }
+}
+
+func TestTransformForDriverInstallDir(t *testing.T) {
+ driverInstallDirVolumeName := "driver-install-dir"
+ testCases := []struct {
+ description string
+ driverInstallDir string
+ input Daemonset
+ expectedOutput Daemonset
+ }{
+ {
+ description: "no driver-install-dir volume in daemonset",
+ driverInstallDir: "/custom-root",
+ input: NewDaemonset(),
+ expectedOutput: NewDaemonset(),
+ },
+ {
+ description: "empty driverInstallDir is a no-op",
+ driverInstallDir: "",
+ input: NewDaemonset().
+ WithHostPathVolume(driverInstallDirVolumeName, "/run/nvidia/driver", nil).
+ WithInitContainer(
+ corev1.Container{
+ Name: "driver-validation",
+ VolumeMounts: []corev1.VolumeMount{
+ {Name: driverInstallDirVolumeName, MountPath: "/run/nvidia/driver"},
+ },
+ }),
+ expectedOutput: NewDaemonset().
+ WithHostPathVolume(driverInstallDirVolumeName, "/run/nvidia/driver", nil).
+ WithInitContainer(
+ corev1.Container{
+ Name: "driver-validation",
+ VolumeMounts: []corev1.VolumeMount{
+ {Name: driverInstallDirVolumeName, MountPath: "/run/nvidia/driver"},
+ },
+ }),
+ },
+ {
+ description: "custom driverInstallDir with driver-install-dir volume",
+ driverInstallDir: "/custom-root",
+ input: NewDaemonset().
+ WithHostPathVolume(driverInstallDirVolumeName, "/run/nvidia/driver", nil),
+ expectedOutput: NewDaemonset().
+ WithHostPathVolume(driverInstallDirVolumeName, "/custom-root", nil),
+ },
+ {
+ description: "custom driverInstallDir with driver-install-dir volume and driver-validation initContainer",
+ driverInstallDir: "/custom-root",
+ input: NewDaemonset().
+ WithHostPathVolume(driverInstallDirVolumeName, "/run/nvidia/driver", nil).
+ WithInitContainer(
+ corev1.Container{
+ Name: "driver-validation",
+ VolumeMounts: []corev1.VolumeMount{
+ {Name: driverInstallDirVolumeName, MountPath: "/run/nvidia/driver"},
+ },
+ }),
+ expectedOutput: NewDaemonset().
+ WithHostPathVolume(driverInstallDirVolumeName, "/custom-root", nil).
+ WithInitContainer(
+ corev1.Container{
+ Name: "driver-validation",
+ VolumeMounts: []corev1.VolumeMount{
+ {Name: driverInstallDirVolumeName, MountPath: "/custom-root"},
+ },
+ Env: []corev1.EnvVar{
+ {Name: DriverInstallDirEnvName, Value: "/custom-root"},
+ {Name: DriverInstallDirCtrPathEnvName, Value: "/custom-root"},
+ },
+ }),
+ },
+ }
+
+ for _, tc := range testCases {
+ t.Run(tc.description, func(t *testing.T) {
+ transformForDriverInstallDir(tc.input.DaemonSet, tc.driverInstallDir)
+ require.EqualValues(t, tc.expectedOutput, tc.input)
+ })
+ }
+}
+
+func TestTransformForRuntime(t *testing.T) {
+ testCases := []struct {
+ description string
+ runtime gpuv1.Runtime
+ input Daemonset
+ expectedOutput Daemonset
+ }{
+ {
+ description: "containerd",
+ runtime: gpuv1.Containerd,
+ input: NewDaemonset().
+ WithContainer(corev1.Container{Name: "test-ctr"}),
+ expectedOutput: NewDaemonset().
+ WithHostPathVolume("containerd-config", filepath.Dir(DefaultContainerdConfigFile), newHostPathType(corev1.HostPathDirectoryOrCreate)).
+ WithHostPathVolume("containerd-socket", filepath.Dir(DefaultContainerdSocketFile), nil).
+ WithContainer(corev1.Container{
+ Name: "test-ctr",
+ Env: []corev1.EnvVar{
+ {Name: "RUNTIME", Value: gpuv1.Containerd.String()},
+ {Name: "CONTAINERD_RUNTIME_CLASS", Value: DefaultRuntimeClass},
+ {Name: "RUNTIME_CONFIG", Value: filepath.Join(DefaultRuntimeConfigTargetDir, filepath.Base(DefaultContainerdConfigFile))},
+ {Name: "CONTAINERD_CONFIG", Value: filepath.Join(DefaultRuntimeConfigTargetDir, filepath.Base(DefaultContainerdConfigFile))},
+ {Name: "RUNTIME_SOCKET", Value: filepath.Join(DefaultRuntimeSocketTargetDir, filepath.Base(DefaultContainerdSocketFile))},
+ {Name: "CONTAINERD_SOCKET", Value: filepath.Join(DefaultRuntimeSocketTargetDir, filepath.Base(DefaultContainerdSocketFile))},
+ },
+ VolumeMounts: []corev1.VolumeMount{
+ {Name: "containerd-config", MountPath: DefaultRuntimeConfigTargetDir},
+ {Name: "containerd-socket", MountPath: DefaultRuntimeSocketTargetDir},
+ },
+ }),
+ },
+ {
+ description: "crio",
+ runtime: gpuv1.CRIO,
+ input: NewDaemonset().WithContainer(corev1.Container{Name: "test-ctr"}),
+ expectedOutput: NewDaemonset().
+ WithHostPathVolume("crio-config", filepath.Dir(DefaultCRIOConfigFile), newHostPathType(corev1.HostPathDirectoryOrCreate)).
+ WithContainer(corev1.Container{
+ Name: "test-ctr",
+ Env: []corev1.EnvVar{
+ {Name: "RUNTIME", Value: gpuv1.CRIO.String()},
+ {Name: "RUNTIME_CONFIG", Value: filepath.Join(DefaultRuntimeConfigTargetDir, filepath.Base(DefaultCRIOConfigFile))},
+ {Name: "CRIO_CONFIG", Value: filepath.Join(DefaultRuntimeConfigTargetDir, filepath.Base(DefaultCRIOConfigFile))},
+ },
+ VolumeMounts: []corev1.VolumeMount{
+ {Name: "crio-config", MountPath: DefaultRuntimeConfigTargetDir},
+ },
+ }),
+ },
+ }
+
+ cp := &gpuv1.ClusterPolicySpec{Operator: gpuv1.OperatorSpec{RuntimeClass: DefaultRuntimeClass}}
+ for _, tc := range testCases {
+ t.Run(tc.description, func(t *testing.T) {
+ err := transformForRuntime(tc.input.DaemonSet, cp, tc.runtime.String(), "test-ctr")
+ require.NoError(t, err)
+ require.EqualValues(t, tc.expectedOutput, tc.input)
+ })
+ }
+}
+
+func TestApplyUpdateStrategyConfig(t *testing.T) {
+ testCases := []struct {
+ description string
+ ds Daemonset
+ dsSpec gpuv1.DaemonsetsSpec
+ errorExpected bool
+ expectedDs Daemonset
+ }{
+ {
+ description: "empty daemonset spec configuration",
+ ds: NewDaemonset(),
+ dsSpec: gpuv1.DaemonsetsSpec{},
+ errorExpected: false,
+ expectedDs: NewDaemonset(),
+ },
+ {
+ description: "invalid update strategy string, no rolling update fields configured",
+ ds: NewDaemonset(),
+ dsSpec: gpuv1.DaemonsetsSpec{UpdateStrategy: "invalid"},
+ errorExpected: false,
+ expectedDs: NewDaemonset(),
+ },
+ {
+ description: "RollingUpdate update strategy string, no rolling update fields configured",
+ ds: NewDaemonset(),
+ dsSpec: gpuv1.DaemonsetsSpec{UpdateStrategy: "RollingUpdate"},
+ errorExpected: false,
+ expectedDs: NewDaemonset(),
+ },
+ {
+ description: "RollingUpdate update strategy string, daemonset is driver pod",
+ ds: NewDaemonset().WithName(commonDriverDaemonsetName),
+ dsSpec: gpuv1.DaemonsetsSpec{
+ UpdateStrategy: "RollingUpdate",
+ RollingUpdate: &gpuv1.RollingUpdateSpec{
+ MaxUnavailable: "1",
+ }},
+ errorExpected: false,
+ expectedDs: NewDaemonset().WithName(commonDriverDaemonsetName),
+ },
+ {
+ description: "RollingUpdate update strategy string, integer maxUnavailable",
+ ds: NewDaemonset(),
+ dsSpec: gpuv1.DaemonsetsSpec{
+ UpdateStrategy: "RollingUpdate",
+ RollingUpdate: &gpuv1.RollingUpdateSpec{
+ MaxUnavailable: "1",
+ }},
+ errorExpected: false,
+ expectedDs: NewDaemonset().WithUpdateStrategy(appsv1.DaemonSetUpdateStrategy{
+ Type: appsv1.RollingUpdateDaemonSetStrategyType,
+ RollingUpdate: &appsv1.RollingUpdateDaemonSet{MaxUnavailable: &intstr.IntOrString{Type: intstr.Int, IntVal: 1}},
+ }),
+ },
+ {
+ description: "RollingUpdate update strategy string, percentage maxUnavailable",
+ ds: NewDaemonset(),
+ dsSpec: gpuv1.DaemonsetsSpec{
+ UpdateStrategy: "RollingUpdate",
+ RollingUpdate: &gpuv1.RollingUpdateSpec{
+ MaxUnavailable: "10%",
+ }},
+ errorExpected: false,
+ expectedDs: NewDaemonset().WithUpdateStrategy(appsv1.DaemonSetUpdateStrategy{
+ Type: appsv1.RollingUpdateDaemonSetStrategyType,
+ RollingUpdate: &appsv1.RollingUpdateDaemonSet{MaxUnavailable: &intstr.IntOrString{Type: intstr.String, StrVal: "10%"}},
+ }),
+ },
+ {
+ description: "RollingUpdate update strategy string, invalid maxUnavailable",
+ ds: NewDaemonset(),
+ dsSpec: gpuv1.DaemonsetsSpec{
+ UpdateStrategy: "RollingUpdate",
+ RollingUpdate: &gpuv1.RollingUpdateSpec{
+ MaxUnavailable: "10%abc",
+ }},
+ errorExpected: true,
+ },
+ {
+ description: "OnDelete update strategy",
+ ds: NewDaemonset(),
+ dsSpec: gpuv1.DaemonsetsSpec{UpdateStrategy: "OnDelete"},
+ errorExpected: false,
+ expectedDs: NewDaemonset().WithUpdateStrategy(appsv1.DaemonSetUpdateStrategy{Type: appsv1.OnDeleteDaemonSetStrategyType}),
+ },
+ }
+
+ for _, tc := range testCases {
+ t.Run(tc.description, func(t *testing.T) {
+ cpSpec := &gpuv1.ClusterPolicySpec{
+ Daemonsets: tc.dsSpec,
+ }
+ err := applyUpdateStrategyConfig(tc.ds.DaemonSet, cpSpec)
+ if tc.errorExpected {
+ require.Error(t, err)
+ return
+ }
+ require.NoError(t, err)
+ require.EqualValues(t, tc.expectedDs, tc.ds)
+ })
+ }
+}
+
+func TestApplyCommonDaemonSetConfig(t *testing.T) {
+ testCases := []struct {
+ description string
+ ds Daemonset
+ dsSpec gpuv1.DaemonsetsSpec
+ errorExpected bool
+ expectedDs Daemonset
+ }{
+ {
+ description: "empty daemonset spec configuration",
+ ds: NewDaemonset(),
+ dsSpec: gpuv1.DaemonsetsSpec{},
+ expectedDs: NewDaemonset(),
+ },
+ {
+ description: "priorityclass configured",
+ ds: NewDaemonset(),
+ dsSpec: gpuv1.DaemonsetsSpec{PriorityClassName: "test-priority-class"},
+ expectedDs: NewDaemonset().WithPriorityClass("test-priority-class"),
+ },
+ {
+ description: "toleration configured",
+ ds: NewDaemonset(),
+ dsSpec: gpuv1.DaemonsetsSpec{
+ Tolerations: []corev1.Toleration{
+ {
+ Key: "test-key",
+ Operator: corev1.TolerationOpExists,
+ Effect: corev1.TaintEffectNoSchedule,
+ },
+ },
+ },
+ expectedDs: NewDaemonset().WithTolerations([]corev1.Toleration{
+ {
+ Key: "test-key",
+ Operator: corev1.TolerationOpExists,
+ Effect: corev1.TaintEffectNoSchedule,
+ },
+ }),
+ },
+ {
+ description: "invalid updatestrategy configured",
+ ds: NewDaemonset(),
+ dsSpec: gpuv1.DaemonsetsSpec{
+ UpdateStrategy: "RollingUpdate",
+ RollingUpdate: &gpuv1.RollingUpdateSpec{
+ MaxUnavailable: "10%abc",
+ }},
+ errorExpected: true,
+ },
+ }
+
+ for _, tc := range testCases {
+ t.Run(tc.description, func(t *testing.T) {
+ cpSpec := &gpuv1.ClusterPolicySpec{
+ Daemonsets: tc.dsSpec,
+ }
+ err := applyCommonDaemonsetConfig(tc.ds.DaemonSet, cpSpec)
+ if tc.errorExpected {
+ require.Error(t, err)
+ return
+ }
+ require.NoError(t, err)
+ require.EqualValues(t, tc.expectedDs, tc.ds)
+ })
+ }
+}
+
+func TestApplyCommonDaemonsetMetadata(t *testing.T) {
+ testCases := []struct {
+ description string
+ ds Daemonset
+ dsSpec gpuv1.DaemonsetsSpec
+ expectedDs Daemonset
+ }{
+ {
+ description: "empty daemonset spec configuration",
+ ds: NewDaemonset(),
+ dsSpec: gpuv1.DaemonsetsSpec{},
+ expectedDs: NewDaemonset(),
+ },
+ {
+ description: "common daemonset labels configured",
+ ds: NewDaemonset(),
+ dsSpec: gpuv1.DaemonsetsSpec{Labels: map[string]string{
+ "key": "value",
+ "app": "value",
+ "app.kubernetes.io/part-of": "value",
+ }},
+ expectedDs: NewDaemonset().WithPodLabels(map[string]string{
+ "key": "value",
+ }),
+ },
+ {
+ description: "common daemonset annotations configured",
+ ds: NewDaemonset(),
+ dsSpec: gpuv1.DaemonsetsSpec{Annotations: map[string]string{
+ "key": "value",
+ "app": "value",
+ "app.kubernetes.io/part-of": "value",
+ }},
+ expectedDs: NewDaemonset().WithPodAnnotations(map[string]string{
+ "key": "value",
+ "app": "value",
+ "app.kubernetes.io/part-of": "value",
+ }),
+ },
+ }
+
+ for _, tc := range testCases {
+ t.Run(tc.description, func(t *testing.T) {
+ applyCommonDaemonsetMetadata(tc.ds.DaemonSet, &tc.dsSpec)
+ require.EqualValues(t, tc.expectedDs, tc.ds)
+ })
+ }
+}
+
+func TestTransformValidationInitContainer(t *testing.T) {
+ testCases := []struct {
+ description string
+ ds Daemonset
+ cpSpec *gpuv1.ClusterPolicySpec
+ expectedDs Daemonset
+ }{
+ {
+ description: "transform both driver and toolkit validation initContainers",
+ ds: NewDaemonset().
+ WithInitContainer(corev1.Container{Name: "driver-validation"}).
+ WithInitContainer(corev1.Container{Name: "toolkit-validation"}).
+ WithInitContainer(corev1.Container{Name: "dummy"}),
+ cpSpec: &gpuv1.ClusterPolicySpec{
+ Validator: gpuv1.ValidatorSpec{
+ Repository: "nvcr.io/nvidia/cloud-native",
+ Image: "gpu-operator-validator",
+ Version: "v1.0.0",
+ ImagePullPolicy: "IfNotPresent",
+ ImagePullSecrets: []string{"pull-secret"},
+ Driver: gpuv1.DriverValidatorSpec{
+ Env: []gpuv1.EnvVar{{Name: "foo", Value: "bar"}},
+ },
+ Toolkit: gpuv1.ToolkitValidatorSpec{
+ Env: []gpuv1.EnvVar{{Name: "foo", Value: "bar"}},
+ },
+ },
+ },
+ expectedDs: NewDaemonset().WithInitContainer(corev1.Container{
+ Name: "driver-validation",
+ Image: "nvcr.io/nvidia/cloud-native/gpu-operator-validator:v1.0.0",
+ ImagePullPolicy: corev1.PullIfNotPresent,
+ Env: []corev1.EnvVar{{Name: "foo", Value: "bar"}},
+ }).WithInitContainer(corev1.Container{
+ Name: "toolkit-validation",
+ Image: "nvcr.io/nvidia/cloud-native/gpu-operator-validator:v1.0.0",
+ ImagePullPolicy: corev1.PullIfNotPresent,
+ Env: []corev1.EnvVar{{Name: "foo", Value: "bar"}},
+ }).WithInitContainer(corev1.Container{Name: "dummy"}).WithPullSecret("pull-secret"),
+ },
+ }
+
+ for _, tc := range testCases {
+ t.Run(tc.description, func(t *testing.T) {
+ err := transformValidationInitContainer(tc.ds.DaemonSet, tc.cpSpec)
+ require.NoError(t, err)
+ require.EqualValues(t, tc.expectedDs, tc.ds)
+ })
+ }
+}
+
+func newBoolPtr(b bool) *bool {
+ boolPtr := new(bool)
+ *boolPtr = b
+ return boolPtr
+}
+
+func TestTransformDriverManagerInitContainer(t *testing.T) {
+ testCases := []struct {
+ description string
+ ds Daemonset
+ cpSpec *gpuv1.ClusterPolicySpec
+ expectedDs Daemonset
+ }{
+ {
+ description: "transform k8s-driver-manager initContainer",
+ ds: NewDaemonset().
+ WithInitContainer(corev1.Container{Name: "k8s-driver-manager"}).
+ WithInitContainer(corev1.Container{Name: "dummy"}),
+ cpSpec: &gpuv1.ClusterPolicySpec{
+ Driver: gpuv1.DriverSpec{
+ Manager: gpuv1.DriverManagerSpec{
+ Repository: "nvcr.io/nvidia/cloud-native",
+ Image: "k8s-driver-manager",
+ Version: "v1.0.0",
+ ImagePullPolicy: "IfNotPresent",
+ ImagePullSecrets: []string{"pull-secret"},
+ Env: []gpuv1.EnvVar{{Name: "foo", Value: "bar"}},
+ },
+ GPUDirectRDMA: &gpuv1.GPUDirectRDMASpec{
+ Enabled: newBoolPtr(true),
+ UseHostMOFED: newBoolPtr(true),
+ },
+ },
+ },
+ expectedDs: NewDaemonset().WithInitContainer(corev1.Container{
+ Name: "k8s-driver-manager",
+ Image: "nvcr.io/nvidia/cloud-native/k8s-driver-manager:v1.0.0",
+ ImagePullPolicy: corev1.PullIfNotPresent,
+ Env: []corev1.EnvVar{
+ {Name: GPUDirectRDMAEnabledEnvName, Value: "true"},
+ {Name: UseHostMOFEDEnvName, Value: "true"},
+ {Name: "foo", Value: "bar"},
+ },
+ }).WithInitContainer(corev1.Container{Name: "dummy"}).WithPullSecret("pull-secret"),
+ },
+ }
+
+ for _, tc := range testCases {
+ t.Run(tc.description, func(t *testing.T) {
+ err := transformDriverManagerInitContainer(tc.ds.DaemonSet, &tc.cpSpec.Driver.Manager, tc.cpSpec.Driver.GPUDirectRDMA)
+ require.NoError(t, err)
+ require.EqualValues(t, tc.expectedDs, tc.ds)
+ })
+ }
+}
+
+func TestTransformValidatorShared(t *testing.T) {
+ testCases := []struct {
+ description string
+ ds Daemonset
+ cpSpec *gpuv1.ClusterPolicySpec
+ expectedDs Daemonset
+ }{
+ {
+ description: "transform validator daemonset's main container",
+ ds: NewDaemonset().WithContainer(corev1.Container{Name: "test-ctr"}),
+ cpSpec: &gpuv1.ClusterPolicySpec{
+ Validator: gpuv1.ValidatorSpec{
+ Repository: "nvcr.io/nvidia/cloud-native",
+ Image: "gpu-operator-validator",
+ Version: "v1.0.0",
+ ImagePullPolicy: "IfNotPresent",
+ ImagePullSecrets: []string{"pull-secret"},
+ Resources: &gpuv1.ResourceRequirements{
+ Limits: corev1.ResourceList{
+ corev1.ResourceCPU: resource.MustParse("500m"),
+ "memory": resource.MustParse("200Mi"),
+ },
+ Requests: corev1.ResourceList{
+ corev1.ResourceCPU: resource.MustParse("500m"),
+ "memory": resource.MustParse("200Mi"),
+ },
+ },
+ Args: []string{"--test-flag"},
+ Env: []gpuv1.EnvVar{{Name: "foo", Value: "bar"}},
+ },
+ },
+ expectedDs: NewDaemonset().WithContainer(corev1.Container{
+ Name: "test-ctr",
+ Image: "nvcr.io/nvidia/cloud-native/gpu-operator-validator:v1.0.0",
+ ImagePullPolicy: corev1.PullIfNotPresent,
+ Resources: corev1.ResourceRequirements{
+ Limits: corev1.ResourceList{
+ corev1.ResourceCPU: resource.MustParse("500m"),
+ "memory": resource.MustParse("200Mi"),
+ },
+ Requests: corev1.ResourceList{
+ corev1.ResourceCPU: resource.MustParse("500m"),
+ "memory": resource.MustParse("200Mi"),
+ },
+ },
+ Args: []string{"--test-flag"},
+ Env: []corev1.EnvVar{{Name: "foo", Value: "bar"}},
+ }).WithPullSecret("pull-secret"),
+ },
+ }
+
+ for _, tc := range testCases {
+ t.Run(tc.description, func(t *testing.T) {
+ err := TransformValidatorShared(tc.ds.DaemonSet, tc.cpSpec)
+ require.NoError(t, err)
+ require.EqualValues(t, tc.expectedDs, tc.ds)
+ })
+ }
+}
+
+func TestTransformValidatorComponent(t *testing.T) {
+ testCases := []struct {
+ description string
+ pod Pod
+ cpSpec *gpuv1.ClusterPolicySpec
+ component string
+ expectedPod Pod
+ errorExpected bool
+ }{
+ {
+ description: "no validation init container is a no-op",
+ pod: NewPod(),
+ cpSpec: nil,
+ component: "driver",
+ expectedPod: NewPod(),
+ },
+ {
+ description: "invalid component",
+ pod: NewPod().WithInitContainer(corev1.Container{Name: "invalid-validation"}),
+ cpSpec: &gpuv1.ClusterPolicySpec{
+ Validator: gpuv1.ValidatorSpec{},
+ },
+ component: "invalid",
+ expectedPod: NewPod(),
+ errorExpected: true,
+ },
+ {
+ description: "cuda validation",
+ pod: NewPod().
+ WithInitContainer(corev1.Container{Name: "cuda-validation"}).
+ WithRuntimeClassName("nvidia"),
+ cpSpec: &gpuv1.ClusterPolicySpec{
+ Validator: gpuv1.ValidatorSpec{
+ Repository: "nvcr.io/nvidia/cloud-native",
+ Image: "gpu-operator-validator",
+ Version: "v1.0.0",
+ ImagePullPolicy: "IfNotPresent",
+ ImagePullSecrets: []string{"pull-secret1", "pull-secret2"},
+ CUDA: gpuv1.CUDAValidatorSpec{
+ Env: []gpuv1.EnvVar{{Name: "foo", Value: "bar"}},
+ },
+ },
+ },
+ component: "cuda",
+ expectedPod: NewPod().WithInitContainer(corev1.Container{
+ Name: "cuda-validation",
+ Image: "nvcr.io/nvidia/cloud-native/gpu-operator-validator:v1.0.0",
+ ImagePullPolicy: corev1.PullIfNotPresent,
+ Env: []corev1.EnvVar{
+ {Name: "foo", Value: "bar"},
+ {Name: ValidatorImageEnvName, Value: "nvcr.io/nvidia/cloud-native/gpu-operator-validator:v1.0.0"},
+ {Name: ValidatorImagePullPolicyEnvName, Value: "IfNotPresent"},
+ {Name: ValidatorImagePullSecretsEnvName, Value: "pull-secret1,pull-secret2"},
+ {Name: ValidatorRuntimeClassEnvName, Value: "nvidia"},
+ },
+ }).WithRuntimeClassName("nvidia"),
+ },
+ {
+ description: "plugin validation",
+ pod: NewPod().
+ WithInitContainer(corev1.Container{Name: "plugin-validation"}).
+ WithRuntimeClassName("nvidia"),
+ cpSpec: &gpuv1.ClusterPolicySpec{
+ Validator: gpuv1.ValidatorSpec{
+ Repository: "nvcr.io/nvidia/cloud-native",
+ Image: "gpu-operator-validator",
+ Version: "v1.0.0",
+ ImagePullPolicy: "IfNotPresent",
+ ImagePullSecrets: []string{"pull-secret1", "pull-secret2"},
+ Plugin: gpuv1.PluginValidatorSpec{
+ Env: []gpuv1.EnvVar{{Name: "foo", Value: "bar"}},
+ },
+ },
+ MIG: gpuv1.MIGSpec{
+ Strategy: gpuv1.MIGStrategySingle,
+ },
+ },
+ component: "plugin",
+ expectedPod: NewPod().WithInitContainer(corev1.Container{
+ Name: "plugin-validation",
+ Image: "nvcr.io/nvidia/cloud-native/gpu-operator-validator:v1.0.0",
+ ImagePullPolicy: corev1.PullIfNotPresent,
+ Env: []corev1.EnvVar{
+ {Name: "foo", Value: "bar"},
+ {Name: ValidatorImageEnvName, Value: "nvcr.io/nvidia/cloud-native/gpu-operator-validator:v1.0.0"},
+ {Name: ValidatorImagePullPolicyEnvName, Value: "IfNotPresent"},
+ {Name: ValidatorImagePullSecretsEnvName, Value: "pull-secret1,pull-secret2"},
+ {Name: ValidatorRuntimeClassEnvName, Value: "nvidia"},
+ {Name: MigStrategyEnvName, Value: string(gpuv1.MIGStrategySingle)},
+ },
+ }).WithRuntimeClassName("nvidia"),
+ },
+ {
+ description: "plugin validation removed when plugin is disabled",
+ pod: NewPod().
+ WithInitContainer(corev1.Container{Name: "plugin-validation"}).
+ WithInitContainer(corev1.Container{Name: "dummy"}),
+ cpSpec: &gpuv1.ClusterPolicySpec{
+ Validator: gpuv1.ValidatorSpec{
+ Repository: "nvcr.io/nvidia/cloud-native",
+ Image: "gpu-operator-validator",
+ Version: "v1.0.0",
+ ImagePullPolicy: "IfNotPresent",
+ },
+ DevicePlugin: gpuv1.DevicePluginSpec{Enabled: newBoolPtr(false)},
+ },
+ component: "plugin",
+ expectedPod: NewPod().WithInitContainer(corev1.Container{Name: "dummy"}),
+ },
+ {
+ description: "driver validation",
+ pod: NewPod().WithInitContainer(corev1.Container{Name: "driver-validation"}),
+ cpSpec: &gpuv1.ClusterPolicySpec{
+ Validator: gpuv1.ValidatorSpec{
+ Repository: "nvcr.io/nvidia/cloud-native",
+ Image: "gpu-operator-validator",
+ Version: "v1.0.0",
+ ImagePullPolicy: "IfNotPresent",
+ Driver: gpuv1.DriverValidatorSpec{
+ Env: []gpuv1.EnvVar{{Name: "foo", Value: "bar"}},
+ },
+ },
+ },
+ component: "driver",
+ expectedPod: NewPod().WithInitContainer(corev1.Container{
+ Name: "driver-validation",
+ Image: "nvcr.io/nvidia/cloud-native/gpu-operator-validator:v1.0.0",
+ ImagePullPolicy: corev1.PullIfNotPresent,
+ Env: []corev1.EnvVar{
+ {Name: "foo", Value: "bar"},
+ },
+ }),
+ },
+ {
+ description: "nvidia-fs validation",
+ pod: NewPod().WithInitContainer(corev1.Container{Name: "nvidia-fs-validation"}),
+ cpSpec: &gpuv1.ClusterPolicySpec{
+ Validator: gpuv1.ValidatorSpec{
+ Repository: "nvcr.io/nvidia/cloud-native",
+ Image: "gpu-operator-validator",
+ Version: "v1.0.0",
+ ImagePullPolicy: "IfNotPresent",
+ },
+ GPUDirectStorage: &gpuv1.GPUDirectStorageSpec{Enabled: newBoolPtr(true)},
+ },
+ component: "nvidia-fs",
+ expectedPod: NewPod().WithInitContainer(corev1.Container{
+ Name: "nvidia-fs-validation",
+ Image: "nvcr.io/nvidia/cloud-native/gpu-operator-validator:v1.0.0",
+ ImagePullPolicy: corev1.PullIfNotPresent,
+ }),
+ },
+ {
+ description: "nvidia-fs validation is removed when gds is disabled",
+ pod: NewPod().
+ WithInitContainer(corev1.Container{Name: "nvidia-fs-validation"}).
+ WithInitContainer(corev1.Container{Name: "dummy"}),
+ cpSpec: &gpuv1.ClusterPolicySpec{
+ Validator: gpuv1.ValidatorSpec{
+ Repository: "nvcr.io/nvidia/cloud-native",
+ Image: "gpu-operator-validator",
+ Version: "v1.0.0",
+ ImagePullPolicy: "IfNotPresent",
+ },
+ GPUDirectStorage: &gpuv1.GPUDirectStorageSpec{Enabled: newBoolPtr(false)},
+ },
+ component: "nvidia-fs",
+ expectedPod: NewPod().WithInitContainer(corev1.Container{Name: "dummy"}),
+ },
+ {
+ description: "cc-manager validation",
+ pod: NewPod().WithInitContainer(corev1.Container{Name: "cc-manager-validation"}),
+ cpSpec: &gpuv1.ClusterPolicySpec{
+ Validator: gpuv1.ValidatorSpec{
+ Repository: "nvcr.io/nvidia/cloud-native",
+ Image: "gpu-operator-validator",
+ Version: "v1.0.0",
+ ImagePullPolicy: "IfNotPresent",
+ },
+ CCManager: gpuv1.CCManagerSpec{Enabled: newBoolPtr(true)},
+ },
+ component: "cc-manager",
+ expectedPod: NewPod().WithInitContainer(corev1.Container{
+ Name: "cc-manager-validation",
+ Image: "nvcr.io/nvidia/cloud-native/gpu-operator-validator:v1.0.0",
+ ImagePullPolicy: corev1.PullIfNotPresent,
+ }),
+ },
+ {
+ description: "cc-manager validation is removed when cc-manager is disabled",
+ pod: NewPod().
+ WithInitContainer(corev1.Container{Name: "cc-manager-validation"}).
+ WithInitContainer(corev1.Container{Name: "dummy"}),
+ cpSpec: &gpuv1.ClusterPolicySpec{
+ Validator: gpuv1.ValidatorSpec{
+ Repository: "nvcr.io/nvidia/cloud-native",
+ Image: "gpu-operator-validator",
+ Version: "v1.0.0",
+ ImagePullPolicy: "IfNotPresent",
+ },
+ CCManager: gpuv1.CCManagerSpec{Enabled: newBoolPtr(false)},
+ },
+ component: "cc-manager",
+ expectedPod: NewPod().WithInitContainer(corev1.Container{Name: "dummy"}),
+ },
+ {
+ description: "toolkit validation",
+ pod: NewPod().WithInitContainer(corev1.Container{Name: "toolkit-validation"}),
+ cpSpec: &gpuv1.ClusterPolicySpec{
+ Validator: gpuv1.ValidatorSpec{
+ Repository: "nvcr.io/nvidia/cloud-native",
+ Image: "gpu-operator-validator",
+ Version: "v1.0.0",
+ ImagePullPolicy: "IfNotPresent",
+ Toolkit: gpuv1.ToolkitValidatorSpec{
+ Env: []gpuv1.EnvVar{{Name: "foo", Value: "bar"}},
+ },
+ },
+ },
+ component: "toolkit",
+ expectedPod: NewPod().WithInitContainer(corev1.Container{
+ Name: "toolkit-validation",
+ Image: "nvcr.io/nvidia/cloud-native/gpu-operator-validator:v1.0.0",
+ ImagePullPolicy: corev1.PullIfNotPresent,
+ Env: []corev1.EnvVar{
+ {Name: "foo", Value: "bar"},
+ },
+ }),
+ },
+ {
+ description: "vfio-pci validation",
+ pod: NewPod().WithInitContainer(corev1.Container{Name: "vfio-pci-validation"}),
+ cpSpec: &gpuv1.ClusterPolicySpec{
+ Validator: gpuv1.ValidatorSpec{
+ Repository: "nvcr.io/nvidia/cloud-native",
+ Image: "gpu-operator-validator",
+ Version: "v1.0.0",
+ ImagePullPolicy: "IfNotPresent",
+ VFIOPCI: gpuv1.VFIOPCIValidatorSpec{
+ Env: []gpuv1.EnvVar{{Name: "foo", Value: "bar"}},
+ },
+ },
+ },
+ component: "vfio-pci",
+ expectedPod: NewPod().WithInitContainer(corev1.Container{
+ Name: "vfio-pci-validation",
+ Image: "nvcr.io/nvidia/cloud-native/gpu-operator-validator:v1.0.0",
+ ImagePullPolicy: corev1.PullIfNotPresent,
+ Env: []corev1.EnvVar{
+ {Name: "DEFAULT_GPU_WORKLOAD_CONFIG", Value: defaultGPUWorkloadConfig},
+ {Name: "foo", Value: "bar"},
+ },
+ }),
+ },
+ {
+ description: "vgpu-manager validation",
+ pod: NewPod().WithInitContainer(corev1.Container{Name: "vgpu-manager-validation"}),
+ cpSpec: &gpuv1.ClusterPolicySpec{
+ Validator: gpuv1.ValidatorSpec{
+ Repository: "nvcr.io/nvidia/cloud-native",
+ Image: "gpu-operator-validator",
+ Version: "v1.0.0",
+ ImagePullPolicy: "IfNotPresent",
+ VGPUManager: gpuv1.VGPUManagerValidatorSpec{
+ Env: []gpuv1.EnvVar{{Name: "foo", Value: "bar"}},
+ },
+ },
+ },
+ component: "vgpu-manager",
+ expectedPod: NewPod().WithInitContainer(corev1.Container{
+ Name: "vgpu-manager-validation",
+ Image: "nvcr.io/nvidia/cloud-native/gpu-operator-validator:v1.0.0",
+ ImagePullPolicy: corev1.PullIfNotPresent,
+ Env: []corev1.EnvVar{
+ {Name: "DEFAULT_GPU_WORKLOAD_CONFIG", Value: defaultGPUWorkloadConfig},
+ {Name: "foo", Value: "bar"},
+ },
+ }),
+ },
+ {
+ description: "vgpu-devices validation",
+ pod: NewPod().WithInitContainer(corev1.Container{Name: "vgpu-devices-validation"}),
+ cpSpec: &gpuv1.ClusterPolicySpec{
+ Validator: gpuv1.ValidatorSpec{
+ Repository: "nvcr.io/nvidia/cloud-native",
+ Image: "gpu-operator-validator",
+ Version: "v1.0.0",
+ ImagePullPolicy: "IfNotPresent",
+ VGPUDevices: gpuv1.VGPUDevicesValidatorSpec{
+ Env: []gpuv1.EnvVar{{Name: "foo", Value: "bar"}},
+ },
+ },
+ },
+ component: "vgpu-devices",
+ expectedPod: NewPod().WithInitContainer(corev1.Container{
+ Name: "vgpu-devices-validation",
+ Image: "nvcr.io/nvidia/cloud-native/gpu-operator-validator:v1.0.0",
+ ImagePullPolicy: corev1.PullIfNotPresent,
+ Env: []corev1.EnvVar{
+ {Name: "DEFAULT_GPU_WORKLOAD_CONFIG", Value: defaultGPUWorkloadConfig},
+ {Name: "foo", Value: "bar"},
+ },
+ }),
+ },
+ }
+
+ for _, tc := range testCases {
+ t.Run(tc.description, func(t *testing.T) {
+ err := TransformValidatorComponent(tc.cpSpec, &tc.pod.Pod.Spec, tc.component)
+ if tc.errorExpected {
+ require.Error(t, err)
+ return
+ }
+ require.NoError(t, err)
+ require.EqualValues(t, tc.expectedPod, tc.pod)
+ })
+ }
+}
+
+func TestTransformValidator(t *testing.T) {
+ testCases := []struct {
+ description string
+ ds Daemonset
+ cpSpec *gpuv1.ClusterPolicySpec
+ expectedDs Daemonset
+ errorExpected bool
+ }{
+ {
+ description: "empty validator spec",
+ ds: NewDaemonset().
+ WithInitContainer(corev1.Container{Name: "dummy"}).
+ WithContainer(corev1.Container{Name: "dummy"}),
+ cpSpec: &gpuv1.ClusterPolicySpec{
+ Validator: gpuv1.ValidatorSpec{},
+ },
+ expectedDs: NewDaemonset(),
+ errorExpected: true,
+ },
+ {
+ description: "valid validator spec",
+ ds: NewDaemonset().
+ WithInitContainer(corev1.Container{Name: "dummy"}).
+ WithContainer(corev1.Container{Name: "dummy"}),
+ cpSpec: &gpuv1.ClusterPolicySpec{
+ Validator: gpuv1.ValidatorSpec{
+ Repository: "nvcr.io/nvidia/cloud-native",
+ Image: "gpu-operator-validator",
+ Version: "v1.0.0",
+ ImagePullPolicy: "IfNotPresent",
+ ImagePullSecrets: []string{"pull-secret"},
+ },
+ },
+ expectedDs: NewDaemonset().
+ WithInitContainer(corev1.Container{Name: "dummy"}).
+ WithContainer(corev1.Container{
+ Name: "dummy",
+ Image: "nvcr.io/nvidia/cloud-native/gpu-operator-validator:v1.0.0",
+ ImagePullPolicy: corev1.PullIfNotPresent,
+ }).
+ WithPullSecret("pull-secret").
+ WithRuntimeClassName("nvidia"),
+ },
+ }
+
+ for _, tc := range testCases {
+ t.Run(tc.description, func(t *testing.T) {
+ err := TransformValidator(tc.ds.DaemonSet, tc.cpSpec, ClusterPolicyController{runtime: gpuv1.Containerd, logger: ctrl.Log.WithName("test")})
+ if tc.errorExpected {
+ require.Error(t, err)
+ return
+ }
+ require.NoError(t, err)
+ require.EqualValues(t, tc.expectedDs, tc.ds)
+ })
+ }
+}
+
+func TestTransformSandboxValidator(t *testing.T) {
+ testCases := []struct {
+ description string
+ ds Daemonset
+ cpSpec *gpuv1.ClusterPolicySpec
+ expectedDs Daemonset
+ errorExpected bool
+ }{
+ {
+ description: "empty validator spec",
+ ds: NewDaemonset().
+ WithInitContainer(corev1.Container{Name: "dummy"}).
+ WithContainer(corev1.Container{Name: "dummy"}),
+ cpSpec: &gpuv1.ClusterPolicySpec{
+ Validator: gpuv1.ValidatorSpec{},
+ },
+ expectedDs: NewDaemonset(),
+ errorExpected: true,
+ },
+ {
+ description: "valid validator spec",
+ ds: NewDaemonset().
+ WithInitContainer(corev1.Container{Name: "dummy"}).
+ WithContainer(corev1.Container{Name: "dummy"}),
+ cpSpec: &gpuv1.ClusterPolicySpec{
+ Validator: gpuv1.ValidatorSpec{
+ Repository: "nvcr.io/nvidia/cloud-native",
+ Image: "gpu-operator-validator",
+ Version: "v1.0.0",
+ ImagePullPolicy: "IfNotPresent",
+ ImagePullSecrets: []string{"pull-secret"},
+ },
+ },
+ expectedDs: NewDaemonset().
+ WithInitContainer(corev1.Container{Name: "dummy"}).
+ WithContainer(corev1.Container{
+ Name: "dummy",
+ Image: "nvcr.io/nvidia/cloud-native/gpu-operator-validator:v1.0.0",
+ ImagePullPolicy: corev1.PullIfNotPresent,
+ }).
+ WithPullSecret("pull-secret"),
+ },
+ }
+
+ for _, tc := range testCases {
+ t.Run(tc.description, func(t *testing.T) {
+ err := TransformSandboxValidator(tc.ds.DaemonSet, tc.cpSpec, ClusterPolicyController{runtime: gpuv1.Containerd, logger: ctrl.Log.WithName("test")})
+ if tc.errorExpected {
+ require.Error(t, err)
+ return
+ }
+ require.NoError(t, err)
+ require.EqualValues(t, tc.expectedDs, tc.ds)
+ })
+ }
+}
diff --git a/controllers/upgrade_controller.go b/controllers/upgrade_controller.go
index ab179ac78..0c6de580d 100644
--- a/controllers/upgrade_controller.go
+++ b/controllers/upgrade_controller.go
@@ -24,6 +24,7 @@ import (
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+ "k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/client-go/util/workqueue"
@@ -42,8 +43,8 @@ import (
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
- gpuv1 "github.com/NVIDIA/gpu-operator/api/v1"
- nvidiav1alpha1 "github.com/NVIDIA/gpu-operator/api/v1alpha1"
+ gpuv1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1"
+ nvidiav1alpha1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1alpha1"
)
// UpgradeReconciler reconciles Driver Daemon Sets for upgrade
@@ -231,29 +232,38 @@ func (r *UpgradeReconciler) removeNodeUpgradeStateLabels(ctx context.Context) er
//nolint:dupl
func (r *UpgradeReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager) error {
// Create a new controller
- c, err := controller.New("upgrade-controller", mgr, controller.Options{Reconciler: r, MaxConcurrentReconciles: 1, RateLimiter: workqueue.NewItemExponentialFailureRateLimiter(minDelayCR, maxDelayCR)})
+ c, err := controller.New("upgrade-controller", mgr, controller.Options{Reconciler: r, MaxConcurrentReconciles: 1,
+ RateLimiter: workqueue.NewTypedItemExponentialFailureRateLimiter[reconcile.Request](minDelayCR, maxDelayCR)})
if err != nil {
return err
}
// Watch for changes to primary resource ClusterPolicy
- err = c.Watch(source.Kind(mgr.GetCache(), &gpuv1.ClusterPolicy{}), &handler.EnqueueRequestForObject{}, predicate.GenerationChangedPredicate{})
+ err = c.Watch(source.Kind(
+ mgr.GetCache(),
+ &gpuv1.ClusterPolicy{},
+ &handler.TypedEnqueueRequestForObject[*gpuv1.ClusterPolicy]{},
+ predicate.TypedGenerationChangedPredicate[*gpuv1.ClusterPolicy]{}),
+ )
if err != nil {
return err
}
// Define a mapping from the Node object in the event to one or more
// ClusterPolicy objects to Reconcile
- nodeMapFn := func(ctx context.Context, a client.Object) []reconcile.Request {
+ nodeMapFn := func(ctx context.Context, o *corev1.Node) []reconcile.Request {
return getClusterPoliciesToReconcile(ctx, mgr.GetClient())
}
// Watch for changes to node labels
// TODO: only watch for changes to upgrade state label
err = c.Watch(
- source.Kind(mgr.GetCache(), &corev1.Node{}),
- handler.EnqueueRequestsFromMapFunc(nodeMapFn),
- predicate.LabelChangedPredicate{},
+ source.Kind(
+ mgr.GetCache(),
+ &corev1.Node{},
+ handler.TypedEnqueueRequestsFromMapFunc[*corev1.Node](nodeMapFn),
+ predicate.TypedLabelChangedPredicate[*corev1.Node]{},
+ ),
)
if err != nil {
return err
@@ -264,13 +274,13 @@ func (r *UpgradeReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manag
//
// For events generated by DaemonSets, ensure the object is
// owned by either ClusterPolicy or NVIDIADriver.
- dsMapFn := func(ctx context.Context, a client.Object) []reconcile.Request {
+ dsMapFn := func(ctx context.Context, a *appsv1.DaemonSet) []reconcile.Request {
ownerRefs := a.GetOwnerReferences()
ownedByNVIDIA := false
for _, owner := range ownerRefs {
- if (owner.APIVersion == gpuv1.GroupVersion.String() && owner.Kind == "ClusterPolicy") ||
- (owner.APIVersion == nvidiav1alpha1.GroupVersion.String() && owner.Kind == "NVIDIADriver") {
+ if (owner.APIVersion == gpuv1.SchemeGroupVersion.String() && owner.Kind == "ClusterPolicy") ||
+ (owner.APIVersion == nvidiav1alpha1.SchemeGroupVersion.String() && owner.Kind == "NVIDIADriver") {
ownedByNVIDIA = true
break
}
@@ -285,27 +295,34 @@ func (r *UpgradeReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manag
// Watch for changes to NVIDIA driver daemonsets and enqueue ClusterPolicy
// TODO: use one common label to identify all NVIDIA driver DaemonSets
- appLabelSelector, err := predicate.LabelSelectorPredicate(metav1.LabelSelector{MatchLabels: map[string]string{DriverLabelKey: DriverLabelValue}})
- if err != nil {
- return fmt.Errorf("failed to create label selector predicate: %w", err)
- }
- dtkLabelSelector, err := predicate.LabelSelectorPredicate(metav1.LabelSelector{MatchLabels: map[string]string{ocpDriverToolkitIdentificationLabel: ocpDriverToolkitIdentificationValue}})
- if err != nil {
- return fmt.Errorf("failed to create label selector predicate: %w", err)
- }
- componentLabelSelector, err := predicate.LabelSelectorPredicate(metav1.LabelSelector{MatchLabels: map[string]string{AppComponentLabelKey: AppComponentLabelValue}})
- if err != nil {
- return fmt.Errorf("failed to create label selector predicate: %w", err)
- }
+ appLabelSelector := predicate.NewTypedPredicateFuncs(func(ds *appsv1.DaemonSet) bool {
+ ls := metav1.LabelSelector{MatchLabels: map[string]string{DriverLabelKey: DriverLabelValue}}
+ selector, _ := metav1.LabelSelectorAsSelector(&ls)
+ return selector.Matches(labels.Set(ds.GetLabels()))
+ })
+
+ dtkLabelSelector := predicate.NewTypedPredicateFuncs(func(ds *appsv1.DaemonSet) bool {
+ ls := metav1.LabelSelector{MatchLabels: map[string]string{ocpDriverToolkitIdentificationLabel: ocpDriverToolkitIdentificationValue}}
+ selector, _ := metav1.LabelSelectorAsSelector(&ls)
+ return selector.Matches(labels.Set(ds.GetLabels()))
+ })
+
+ componentLabelSelector := predicate.NewTypedPredicateFuncs(func(ds *appsv1.DaemonSet) bool {
+ ls := metav1.LabelSelector{MatchLabels: map[string]string{AppComponentLabelKey: AppComponentLabelValue}}
+ selector, _ := metav1.LabelSelectorAsSelector(&ls)
+ return selector.Matches(labels.Set(ds.GetLabels()))
+ })
err = c.Watch(
- source.Kind(mgr.GetCache(), &appsv1.DaemonSet{}),
- handler.EnqueueRequestsFromMapFunc(dsMapFn),
- predicate.And(
- predicate.GenerationChangedPredicate{},
- predicate.Or(appLabelSelector, dtkLabelSelector, componentLabelSelector),
- ),
- )
+ source.Kind(
+ mgr.GetCache(),
+ &appsv1.DaemonSet{},
+ handler.TypedEnqueueRequestsFromMapFunc[*appsv1.DaemonSet](dsMapFn),
+ predicate.And[*appsv1.DaemonSet](
+ predicate.TypedGenerationChangedPredicate[*appsv1.DaemonSet]{},
+ predicate.Or[*appsv1.DaemonSet](appLabelSelector, dtkLabelSelector, componentLabelSelector),
+ ),
+ ))
if err != nil {
return err
}
diff --git a/deployments/gpu-operator/Chart.lock b/deployments/gpu-operator/Chart.lock
index f5c7fcca3..5d1a7d3dc 100644
--- a/deployments/gpu-operator/Chart.lock
+++ b/deployments/gpu-operator/Chart.lock
@@ -1,6 +1,6 @@
dependencies:
- name: node-feature-discovery
repository: https://kubernetes-sigs.github.io/node-feature-discovery/charts
- version: 0.14.2
-digest: sha256:84ec59c0c12da825ca7dc25bdac63d0f2106822a129f7fe1f9d60a4023a543ce
-generated: "2023-10-10T11:26:00.823757+02:00"
+ version: 0.16.6
+digest: sha256:e7b02cbdf9daff49892c0b74c50da2ed11e18eff2105a1b1abc9a8f2ebd8be47
+generated: "2024-10-31T07:12:50.141904-07:00"
diff --git a/deployments/gpu-operator/Chart.yaml b/deployments/gpu-operator/Chart.yaml
index 08cdee92a..59f9e6904 100644
--- a/deployments/gpu-operator/Chart.yaml
+++ b/deployments/gpu-operator/Chart.yaml
@@ -19,6 +19,6 @@ keywords:
dependencies:
- name: node-feature-discovery
- version: v0.14.2
+ version: v0.16.6
repository: https://kubernetes-sigs.github.io/node-feature-discovery/charts
condition: nfd.enabled
diff --git a/deployments/gpu-operator/charts/node-feature-discovery/Chart.yaml b/deployments/gpu-operator/charts/node-feature-discovery/Chart.yaml
index 8bd1f818d..7656c732f 100644
--- a/deployments/gpu-operator/charts/node-feature-discovery/Chart.yaml
+++ b/deployments/gpu-operator/charts/node-feature-discovery/Chart.yaml
@@ -1,5 +1,5 @@
apiVersion: v2
-appVersion: v0.14.2
+appVersion: v0.16.6
description: 'Detects hardware features available on each node in a Kubernetes cluster,
and advertises those features using node labels. '
home: https://github.com/kubernetes-sigs/node-feature-discovery
@@ -11,4 +11,4 @@ name: node-feature-discovery
sources:
- https://github.com/kubernetes-sigs/node-feature-discovery
type: application
-version: 0.14.2
+version: 0.16.6
diff --git a/deployments/gpu-operator/charts/node-feature-discovery/README.md b/deployments/gpu-operator/charts/node-feature-discovery/README.md
index 16b5254d5..93734f8b7 100644
--- a/deployments/gpu-operator/charts/node-feature-discovery/README.md
+++ b/deployments/gpu-operator/charts/node-feature-discovery/README.md
@@ -6,5 +6,5 @@ labels. NFD provides flexible configuration and extension points for a wide
range of vendor and application specific node labeling needs.
See
-[NFD documentation](https://kubernetes-sigs.github.io/node-feature-discovery/v0.14/deployment/helm.html)
+[NFD documentation](https://kubernetes-sigs.github.io/node-feature-discovery/v0.16/deployment/helm.html)
for deployment instructions.
diff --git a/deployments/gpu-operator/charts/node-feature-discovery/crds/nfd-api-crds.yaml b/deployments/gpu-operator/charts/node-feature-discovery/crds/nfd-api-crds.yaml
index 6866c7ffe..0a73c5dca 100644
--- a/deployments/gpu-operator/charts/node-feature-discovery/crds/nfd-api-crds.yaml
+++ b/deployments/gpu-operator/charts/node-feature-discovery/crds/nfd-api-crds.yaml
@@ -3,7 +3,7 @@ apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
- controller-gen.kubebuilder.io/version: v0.12.1
+ controller-gen.kubebuilder.io/version: v0.14.0
name: nodefeatures.nfd.k8s-sigs.io
spec:
group: nfd.k8s-sigs.io
@@ -17,23 +17,30 @@ spec:
- name: v1alpha1
schema:
openAPIV3Schema:
- description: NodeFeature resource holds the features discovered for one node
- in the cluster.
+ description: |-
+ NodeFeature resource holds the features discovered for one node in the
+ cluster.
properties:
apiVersion:
- description: 'APIVersion defines the versioned schema of this representation
- of an object. Servers should convert recognized schemas to the latest
- internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
+ description: |-
+ APIVersion defines the versioned schema of this representation of an object.
+ Servers should convert recognized schemas to the latest internal value, and
+ may reject unrecognized values.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
type: string
kind:
- description: 'Kind is a string value representing the REST resource this
- object represents. Servers may infer this from the endpoint the client
- submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
+ description: |-
+ Kind is a string value representing the REST resource this object represents.
+ Servers may infer this from the endpoint the client submits requests to.
+ Cannot be updated.
+ In CamelCase.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
type: string
metadata:
type: object
spec:
- description: NodeFeatureSpec describes a NodeFeature object.
+ description: Specification of the NodeFeature, containing features discovered
+ for a node.
properties:
features:
description: Features is the full "raw" features data that has been
@@ -47,6 +54,7 @@ spec:
elements:
additionalProperties:
type: string
+ description: Individual features of the feature set.
type: object
required:
- elements
@@ -64,6 +72,7 @@ spec:
description: Nil is a dummy empty struct for protobuf
compatibility
type: object
+ description: Individual features of the feature set.
type: object
required:
- elements
@@ -77,6 +86,7 @@ spec:
which is an instance having multiple attributes.
properties:
elements:
+ description: Individual features of the feature set.
items:
description: InstanceFeature represents one instance of
a complex features, e.g. a device.
@@ -84,6 +94,7 @@ spec:
attributes:
additionalProperties:
type: string
+ description: Attributes of the instance feature.
type: object
required:
- attributes
@@ -113,7 +124,278 @@ apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
- controller-gen.kubebuilder.io/version: v0.12.1
+ controller-gen.kubebuilder.io/version: v0.14.0
+ name: nodefeaturegroups.nfd.k8s-sigs.io
+spec:
+ group: nfd.k8s-sigs.io
+ names:
+ kind: NodeFeatureGroup
+ listKind: NodeFeatureGroupList
+ plural: nodefeaturegroups
+ shortNames:
+ - nfg
+ singular: nodefeaturegroup
+ scope: Namespaced
+ versions:
+ - name: v1alpha1
+ schema:
+ openAPIV3Schema:
+ description: NodeFeatureGroup resource holds Node pools by featureGroup
+ properties:
+ apiVersion:
+ description: |-
+ APIVersion defines the versioned schema of this representation of an object.
+ Servers should convert recognized schemas to the latest internal value, and
+ may reject unrecognized values.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+ type: string
+ kind:
+ description: |-
+ Kind is a string value representing the REST resource this object represents.
+ Servers may infer this from the endpoint the client submits requests to.
+ Cannot be updated.
+ In CamelCase.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+ type: string
+ metadata:
+ type: object
+ spec:
+ description: Spec defines the rules to be evaluated.
+ properties:
+ featureGroupRules:
+ description: List of rules to evaluate to determine nodes that belong
+ in this group.
+ items:
+ description: GroupRule defines a rule for nodegroup filtering.
+ properties:
+ matchAny:
+ description: MatchAny specifies a list of matchers one of which
+ must match.
+ items:
+ description: MatchAnyElem specifies one sub-matcher of MatchAny.
+ properties:
+ matchFeatures:
+ description: MatchFeatures specifies a set of matcher
+ terms all of which must match.
+ items:
+ description: |-
+ FeatureMatcherTerm defines requirements against one feature set. All
+ requirements (specified as MatchExpressions) are evaluated against each
+ element in the feature set.
+ properties:
+ feature:
+ description: Feature is the name of the feature
+ set to match against.
+ type: string
+ matchExpressions:
+ additionalProperties:
+ description: |-
+ MatchExpression specifies an expression to evaluate against a set of input
+ values. It contains an operator that is applied when matching the input and
+ an array of values that the operator evaluates the input against.
+ properties:
+ op:
+ description: Op is the operator to be applied.
+ enum:
+ - In
+ - NotIn
+ - InRegexp
+ - Exists
+ - DoesNotExist
+ - Gt
+ - Lt
+ - GtLt
+ - IsTrue
+ - IsFalse
+ type: string
+ value:
+ description: |-
+ Value is the list of values that the operand evaluates the input
+ against. Value should be empty if the operator is Exists, DoesNotExist,
+ IsTrue or IsFalse. Value should contain exactly one element if the
+ operator is Gt or Lt and exactly two elements if the operator is GtLt.
+ In other cases Value should contain at least one element.
+ items:
+ type: string
+ type: array
+ required:
+ - op
+ type: object
+ description: |-
+ MatchExpressions is the set of per-element expressions evaluated. These
+ match against the value of the specified elements.
+ type: object
+ matchName:
+ description: |-
+ MatchName in an expression that is matched against the name of each
+ element in the feature set.
+ properties:
+ op:
+ description: Op is the operator to be applied.
+ enum:
+ - In
+ - NotIn
+ - InRegexp
+ - Exists
+ - DoesNotExist
+ - Gt
+ - Lt
+ - GtLt
+ - IsTrue
+ - IsFalse
+ type: string
+ value:
+ description: |-
+ Value is the list of values that the operand evaluates the input
+ against. Value should be empty if the operator is Exists, DoesNotExist,
+ IsTrue or IsFalse. Value should contain exactly one element if the
+ operator is Gt or Lt and exactly two elements if the operator is GtLt.
+ In other cases Value should contain at least one element.
+ items:
+ type: string
+ type: array
+ required:
+ - op
+ type: object
+ required:
+ - feature
+ type: object
+ type: array
+ required:
+ - matchFeatures
+ type: object
+ type: array
+ matchFeatures:
+ description: MatchFeatures specifies a set of matcher terms
+ all of which must match.
+ items:
+ description: |-
+ FeatureMatcherTerm defines requirements against one feature set. All
+ requirements (specified as MatchExpressions) are evaluated against each
+ element in the feature set.
+ properties:
+ feature:
+ description: Feature is the name of the feature set to
+ match against.
+ type: string
+ matchExpressions:
+ additionalProperties:
+ description: |-
+ MatchExpression specifies an expression to evaluate against a set of input
+ values. It contains an operator that is applied when matching the input and
+ an array of values that the operator evaluates the input against.
+ properties:
+ op:
+ description: Op is the operator to be applied.
+ enum:
+ - In
+ - NotIn
+ - InRegexp
+ - Exists
+ - DoesNotExist
+ - Gt
+ - Lt
+ - GtLt
+ - IsTrue
+ - IsFalse
+ type: string
+ value:
+ description: |-
+ Value is the list of values that the operand evaluates the input
+ against. Value should be empty if the operator is Exists, DoesNotExist,
+ IsTrue or IsFalse. Value should contain exactly one element if the
+ operator is Gt or Lt and exactly two elements if the operator is GtLt.
+ In other cases Value should contain at least one element.
+ items:
+ type: string
+ type: array
+ required:
+ - op
+ type: object
+ description: |-
+ MatchExpressions is the set of per-element expressions evaluated. These
+ match against the value of the specified elements.
+ type: object
+ matchName:
+ description: |-
+ MatchName in an expression that is matched against the name of each
+ element in the feature set.
+ properties:
+ op:
+ description: Op is the operator to be applied.
+ enum:
+ - In
+ - NotIn
+ - InRegexp
+ - Exists
+ - DoesNotExist
+ - Gt
+ - Lt
+ - GtLt
+ - IsTrue
+ - IsFalse
+ type: string
+ value:
+ description: |-
+ Value is the list of values that the operand evaluates the input
+ against. Value should be empty if the operator is Exists, DoesNotExist,
+ IsTrue or IsFalse. Value should contain exactly one element if the
+ operator is Gt or Lt and exactly two elements if the operator is GtLt.
+ In other cases Value should contain at least one element.
+ items:
+ type: string
+ type: array
+ required:
+ - op
+ type: object
+ required:
+ - feature
+ type: object
+ type: array
+ name:
+ description: Name of the rule.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ required:
+ - featureGroupRules
+ type: object
+ status:
+ description: |-
+ Status of the NodeFeatureGroup after the most recent evaluation of the
+ specification.
+ properties:
+ nodes:
+ description: Nodes is a list of FeatureGroupNode in the cluster that
+ match the featureGroupRules
+ items:
+ properties:
+ name:
+ description: Name of the node.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ x-kubernetes-list-map-keys:
+ - name
+ x-kubernetes-list-type: map
+ type: object
+ required:
+ - spec
+ type: object
+ served: true
+ storage: true
+ subresources:
+ status: {}
+---
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+ annotations:
+ controller-gen.kubebuilder.io/version: v0.14.0
name: nodefeaturerules.nfd.k8s-sigs.io
spec:
group: nfd.k8s-sigs.io
@@ -129,23 +411,29 @@ spec:
- name: v1alpha1
schema:
openAPIV3Schema:
- description: NodeFeatureRule resource specifies a configuration for feature-based
+ description: |-
+ NodeFeatureRule resource specifies a configuration for feature-based
customization of node objects, such as node labeling.
properties:
apiVersion:
- description: 'APIVersion defines the versioned schema of this representation
- of an object. Servers should convert recognized schemas to the latest
- internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
+ description: |-
+ APIVersion defines the versioned schema of this representation of an object.
+ Servers should convert recognized schemas to the latest internal value, and
+ may reject unrecognized values.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
type: string
kind:
- description: 'Kind is a string value representing the REST resource this
- object represents. Servers may infer this from the endpoint the client
- submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
+ description: |-
+ Kind is a string value representing the REST resource this object represents.
+ Servers may infer this from the endpoint the client submits requests to.
+ Cannot be updated.
+ In CamelCase.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
type: string
metadata:
type: object
spec:
- description: NodeFeatureRuleSpec describes a NodeFeatureRule.
+ description: Spec defines the rules to be evaluated.
properties:
rules:
description: Rules is a list of node customization rules.
@@ -153,6 +441,11 @@ spec:
description: Rule defines a rule for node customization such as
labeling.
properties:
+ annotations:
+ additionalProperties:
+ type: string
+ description: Annotations to create if the rule matches.
+ type: object
extendedResources:
additionalProperties:
type: string
@@ -164,10 +457,10 @@ spec:
description: Labels to create if the rule matches.
type: object
labelsTemplate:
- description: LabelsTemplate specifies a template to expand for
- dynamically generating multiple labels. Data (after template
- expansion) must be keys with an optional value ([=])
- separated by newlines.
+ description: |-
+ LabelsTemplate specifies a template to expand for dynamically generating
+ multiple labels. Data (after template expansion) must be keys with an
+ optional value ([=]) separated by newlines.
type: string
matchAny:
description: MatchAny specifies a list of matchers one of which
@@ -179,25 +472,21 @@ spec:
description: MatchFeatures specifies a set of matcher
terms all of which must match.
items:
- description: FeatureMatcherTerm defines requirements
- against one feature set. All requirements (specified
- as MatchExpressions) are evaluated against each element
- in the feature set.
+ description: |-
+ FeatureMatcherTerm defines requirements against one feature set. All
+ requirements (specified as MatchExpressions) are evaluated against each
+ element in the feature set.
properties:
feature:
+ description: Feature is the name of the feature
+ set to match against.
type: string
matchExpressions:
additionalProperties:
- description: "MatchExpression specifies an expression
- to evaluate against a set of input values. It
- contains an operator that is applied when matching
- the input and an array of values that the operator
- evaluates the input against. \n NB: CreateMatchExpression
- or MustCreateMatchExpression() should be used
- for creating new instances. \n NB: Validate()
- must be called if Op or Value fields are modified
- or if a new instance is created from scratch
- without using the helper functions."
+ description: |-
+ MatchExpression specifies an expression to evaluate against a set of input
+ values. It contains an operator that is applied when matching the input and
+ an array of values that the operator evaluates the input against.
properties:
op:
description: Op is the operator to be applied.
@@ -214,28 +503,56 @@ spec:
- IsFalse
type: string
value:
- description: Value is the list of values that
- the operand evaluates the input against.
- Value should be empty if the operator is
- Exists, DoesNotExist, IsTrue or IsFalse.
- Value should contain exactly one element
- if the operator is Gt or Lt and exactly
- two elements if the operator is GtLt. In
- other cases Value should contain at least
- one element.
+ description: |-
+ Value is the list of values that the operand evaluates the input
+ against. Value should be empty if the operator is Exists, DoesNotExist,
+ IsTrue or IsFalse. Value should contain exactly one element if the
+ operator is Gt or Lt and exactly two elements if the operator is GtLt.
+ In other cases Value should contain at least one element.
items:
type: string
type: array
required:
- op
type: object
- description: MatchExpressionSet contains a set of
- MatchExpressions, each of which is evaluated against
- a set of input values.
+ description: |-
+ MatchExpressions is the set of per-element expressions evaluated. These
+ match against the value of the specified elements.
+ type: object
+ matchName:
+ description: |-
+ MatchName in an expression that is matched against the name of each
+ element in the feature set.
+ properties:
+ op:
+ description: Op is the operator to be applied.
+ enum:
+ - In
+ - NotIn
+ - InRegexp
+ - Exists
+ - DoesNotExist
+ - Gt
+ - Lt
+ - GtLt
+ - IsTrue
+ - IsFalse
+ type: string
+ value:
+ description: |-
+ Value is the list of values that the operand evaluates the input
+ against. Value should be empty if the operator is Exists, DoesNotExist,
+ IsTrue or IsFalse. Value should contain exactly one element if the
+ operator is Gt or Lt and exactly two elements if the operator is GtLt.
+ In other cases Value should contain at least one element.
+ items:
+ type: string
+ type: array
+ required:
+ - op
type: object
required:
- feature
- - matchExpressions
type: object
type: array
required:
@@ -246,23 +563,21 @@ spec:
description: MatchFeatures specifies a set of matcher terms
all of which must match.
items:
- description: FeatureMatcherTerm defines requirements against
- one feature set. All requirements (specified as MatchExpressions)
- are evaluated against each element in the feature set.
+ description: |-
+ FeatureMatcherTerm defines requirements against one feature set. All
+ requirements (specified as MatchExpressions) are evaluated against each
+ element in the feature set.
properties:
feature:
+ description: Feature is the name of the feature set to
+ match against.
type: string
matchExpressions:
additionalProperties:
- description: "MatchExpression specifies an expression
- to evaluate against a set of input values. It contains
- an operator that is applied when matching the input
- and an array of values that the operator evaluates
- the input against. \n NB: CreateMatchExpression or
- MustCreateMatchExpression() should be used for creating
- new instances. \n NB: Validate() must be called if
- Op or Value fields are modified or if a new instance
- is created from scratch without using the helper functions."
+ description: |-
+ MatchExpression specifies an expression to evaluate against a set of input
+ values. It contains an operator that is applied when matching the input and
+ an array of values that the operator evaluates the input against.
properties:
op:
description: Op is the operator to be applied.
@@ -279,25 +594,56 @@ spec:
- IsFalse
type: string
value:
- description: Value is the list of values that the
- operand evaluates the input against. Value should
- be empty if the operator is Exists, DoesNotExist,
- IsTrue or IsFalse. Value should contain exactly
- one element if the operator is Gt or Lt and exactly
- two elements if the operator is GtLt. In other
- cases Value should contain at least one element.
+ description: |-
+ Value is the list of values that the operand evaluates the input
+ against. Value should be empty if the operator is Exists, DoesNotExist,
+ IsTrue or IsFalse. Value should contain exactly one element if the
+ operator is Gt or Lt and exactly two elements if the operator is GtLt.
+ In other cases Value should contain at least one element.
items:
type: string
type: array
required:
- op
type: object
- description: MatchExpressionSet contains a set of MatchExpressions,
- each of which is evaluated against a set of input values.
+ description: |-
+ MatchExpressions is the set of per-element expressions evaluated. These
+ match against the value of the specified elements.
+ type: object
+ matchName:
+ description: |-
+ MatchName in an expression that is matched against the name of each
+ element in the feature set.
+ properties:
+ op:
+ description: Op is the operator to be applied.
+ enum:
+ - In
+ - NotIn
+ - InRegexp
+ - Exists
+ - DoesNotExist
+ - Gt
+ - Lt
+ - GtLt
+ - IsTrue
+ - IsFalse
+ type: string
+ value:
+ description: |-
+ Value is the list of values that the operand evaluates the input
+ against. Value should be empty if the operator is Exists, DoesNotExist,
+ IsTrue or IsFalse. Value should contain exactly one element if the
+ operator is Gt or Lt and exactly two elements if the operator is GtLt.
+ In other cases Value should contain at least one element.
+ items:
+ type: string
+ type: array
+ required:
+ - op
type: object
required:
- feature
- - matchExpressions
type: object
type: array
name:
@@ -306,21 +652,24 @@ spec:
taints:
description: Taints to create if the rule matches.
items:
- description: The node this Taint is attached to has the "effect"
- on any pod that does not tolerate the Taint.
+ description: |-
+ The node this Taint is attached to has the "effect" on
+ any pod that does not tolerate the Taint.
properties:
effect:
- description: Required. The effect of the taint on pods
- that do not tolerate the taint. Valid effects are NoSchedule,
- PreferNoSchedule and NoExecute.
+ description: |-
+ Required. The effect of the taint on pods
+ that do not tolerate the taint.
+ Valid effects are NoSchedule, PreferNoSchedule and NoExecute.
type: string
key:
description: Required. The taint key to be applied to
a node.
type: string
timeAdded:
- description: TimeAdded represents the time at which the
- taint was added. It is only written for NoExecute taints.
+ description: |-
+ TimeAdded represents the time at which the taint was added.
+ It is only written for NoExecute taints.
format: date-time
type: string
value:
@@ -335,17 +684,17 @@ spec:
vars:
additionalProperties:
type: string
- description: Vars is the variables to store if the rule matches.
- Variables do not directly inflict any changes in the node
- object. However, they can be referenced from other rules enabling
- more complex rule hierarchies, without exposing intermediary
- output values as labels.
+ description: |-
+ Vars is the variables to store if the rule matches. Variables do not
+ directly inflict any changes in the node object. However, they can be
+ referenced from other rules enabling more complex rule hierarchies,
+ without exposing intermediary output values as labels.
type: object
varsTemplate:
- description: VarsTemplate specifies a template to expand for
- dynamically generating multiple variables. Data (after template
- expansion) must be keys with an optional value ([=])
- separated by newlines.
+ description: |-
+ VarsTemplate specifies a template to expand for dynamically generating
+ multiple variables. Data (after template expansion) must be keys with an
+ optional value ([=]) separated by newlines.
type: string
required:
- name
diff --git a/deployments/gpu-operator/charts/node-feature-discovery/templates/cert-manager-certs.yaml b/deployments/gpu-operator/charts/node-feature-discovery/templates/cert-manager-certs.yaml
index ac2e51fc1..2d1576022 100644
--- a/deployments/gpu-operator/charts/node-feature-discovery/templates/cert-manager-certs.yaml
+++ b/deployments/gpu-operator/charts/node-feature-discovery/templates/cert-manager-certs.yaml
@@ -1,4 +1,5 @@
{{- if .Values.tls.certManager }}
+{{- if .Values.master.enable }}
---
apiVersion: cert-manager.io/v1
kind: Certificate
@@ -17,14 +18,17 @@ spec:
# first one is configured for use by the worker; below are for completeness
- {{ include "node-feature-discovery.fullname" . }}-master.{{ include "node-feature-discovery.namespace" . }}.svc
- {{ include "node-feature-discovery.fullname" . }}-master.{{ include "node-feature-discovery.namespace" . }}.svc.cluster.local
- # localhost needed for grpc_health_probe
- - localhost
issuerRef:
- name: nfd-ca-issuer
+ name: {{ default "nfd-ca-issuer" .Values.tls.certManagerCertificate.issuerName }}
+ {{- if and .Values.tls.certManagerCertificate.issuerName .Values.tls.certManagerCertificate.issuerKind }}
+ kind: {{ .Values.tls.certManagerCertificate.issuerKind }}
+ {{- else }}
kind: Issuer
+ {{- end }}
group: cert-manager.io
-
+{{- end }}
---
+{{- if .Values.worker.enable }}
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:
@@ -39,9 +43,14 @@ spec:
dnsNames:
- {{ include "node-feature-discovery.fullname" . }}-worker.{{ include "node-feature-discovery.namespace" . }}.svc.cluster.local
issuerRef:
- name: nfd-ca-issuer
+ name: {{ default "nfd-ca-issuer" .Values.tls.certManagerCertificate.issuerName }}
+ {{- if and .Values.tls.certManagerCertificate.issuerName .Values.tls.certManagerCertificate.issuerKind }}
+ kind: {{ .Values.tls.certManagerCertificate.issuerKind }}
+ {{- else }}
kind: Issuer
+ {{- end }}
group: cert-manager.io
+{{- end }}
{{- if .Values.topologyUpdater.enable }}
---
@@ -59,8 +68,12 @@ spec:
dnsNames:
- {{ include "node-feature-discovery.fullname" . }}-topology-updater.{{ include "node-feature-discovery.namespace" . }}.svc.cluster.local
issuerRef:
- name: nfd-ca-issuer
+ name: {{ default "nfd-ca-issuer" .Values.tls.certManagerCertificate.issuerName }}
+ {{- if and .Values.tls.certManagerCertificate.issuerName .Values.tls.certManagerCertificate.issuerKind }}
+ kind: {{ .Values.tls.certManagerCertificate.issuerKind }}
+ {{- else }}
kind: Issuer
+ {{- end }}
group: cert-manager.io
{{- end }}
diff --git a/deployments/gpu-operator/charts/node-feature-discovery/templates/cert-manager-issuer.yaml b/deployments/gpu-operator/charts/node-feature-discovery/templates/cert-manager-issuer.yaml
index f3c57acea..874468908 100644
--- a/deployments/gpu-operator/charts/node-feature-discovery/templates/cert-manager-issuer.yaml
+++ b/deployments/gpu-operator/charts/node-feature-discovery/templates/cert-manager-issuer.yaml
@@ -1,4 +1,4 @@
-{{- if .Values.tls.certManager }}
+{{- if and .Values.tls.certManager (not .Values.tls.certManagerCertificate.issuerName ) }}
# See https://cert-manager.io/docs/configuration/selfsigned/#bootstrapping-ca-issuers
# - Create a self signed issuer
# - Use this to create a CA cert
diff --git a/deployments/gpu-operator/charts/node-feature-discovery/templates/clusterrole.yaml b/deployments/gpu-operator/charts/node-feature-discovery/templates/clusterrole.yaml
index d4329338b..f935cfe41 100644
--- a/deployments/gpu-operator/charts/node-feature-discovery/templates/clusterrole.yaml
+++ b/deployments/gpu-operator/charts/node-feature-discovery/templates/clusterrole.yaml
@@ -1,4 +1,4 @@
-{{- if .Values.master.rbac.create }}
+{{- if and .Values.master.enable .Values.master.rbac.create }}
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
@@ -21,10 +21,18 @@ rules:
resources:
- nodefeatures
- nodefeaturerules
+ - nodefeaturegroups
verbs:
- get
- list
- watch
+- apiGroups:
+ - nfd.k8s-sigs.io
+ resources:
+ - nodefeaturegroups/status
+ verbs:
+ - patch
+ - update
- apiGroups:
- coordination.k8s.io
resources:
@@ -58,6 +66,12 @@ rules:
verbs:
- get
- list
+- apiGroups:
+ - ""
+ resources:
+ - namespaces
+ verbs:
+ - get
- apiGroups:
- ""
resources:
@@ -80,7 +94,7 @@ rules:
- update
{{- end }}
-{{- if and .Values.gc.enable .Values.gc.rbac.create (or .Values.enableNodeFeatureApi .Values.topologyUpdater.enable) }}
+{{- if and .Values.gc.enable .Values.gc.rbac.create (or (and .Values.featureGates.NodeFeatureAPI .Values.enableNodeFeatureApi) .Values.topologyUpdater.enable) }}
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
diff --git a/deployments/gpu-operator/charts/node-feature-discovery/templates/clusterrolebinding.yaml b/deployments/gpu-operator/charts/node-feature-discovery/templates/clusterrolebinding.yaml
index 87b3003e2..3f717988b 100644
--- a/deployments/gpu-operator/charts/node-feature-discovery/templates/clusterrolebinding.yaml
+++ b/deployments/gpu-operator/charts/node-feature-discovery/templates/clusterrolebinding.yaml
@@ -1,4 +1,4 @@
-{{- if .Values.master.rbac.create }}
+{{- if and .Values.master.enable .Values.master.rbac.create }}
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
@@ -33,7 +33,7 @@ subjects:
namespace: {{ include "node-feature-discovery.namespace" . }}
{{- end }}
-{{- if and .Values.gc.enable .Values.gc.rbac.create (or .Values.enableNodeFeatureApi .Values.topologyUpdater.enable) }}
+{{- if and .Values.gc.enable .Values.gc.rbac.create (or (and .Values.featureGates.NodeFeatureAPI .Values.enableNodeFeatureApi) .Values.topologyUpdater.enable) }}
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
diff --git a/deployments/gpu-operator/charts/node-feature-discovery/templates/master.yaml b/deployments/gpu-operator/charts/node-feature-discovery/templates/master.yaml
index e77ca136c..733131a03 100644
--- a/deployments/gpu-operator/charts/node-feature-discovery/templates/master.yaml
+++ b/deployments/gpu-operator/charts/node-feature-discovery/templates/master.yaml
@@ -1,3 +1,4 @@
+{{- if .Values.master.enable }}
apiVersion: apps/v1
kind: Deployment
metadata:
@@ -12,6 +13,7 @@ metadata:
{{- end }}
spec:
replicas: {{ .Values.master.replicaCount }}
+ revisionHistoryLimit: {{ .Values.master.revisionHistoryLimit }}
selector:
matchLabels:
{{- include "node-feature-discovery.selectorLabels" . | nindent 6 }}
@@ -26,6 +28,9 @@ spec:
{{- toYaml . | nindent 8 }}
{{- end }}
spec:
+ {{- with .Values.priorityClassName }}
+ priorityClassName: {{ . }}
+ {{- end }}
{{- with .Values.imagePullSecrets }}
imagePullSecrets:
{{- toYaml . | nindent 8 }}
@@ -34,6 +39,7 @@ spec:
enableServiceLinks: false
securityContext:
{{- toYaml .Values.master.podSecurityContext | nindent 8 }}
+ hostNetwork: {{ .Values.master.hostNetwork }}
containers:
- name: master
securityContext:
@@ -41,42 +47,24 @@ spec:
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
imagePullPolicy: {{ .Values.image.pullPolicy }}
livenessProbe:
- exec:
- command:
- - "/usr/bin/grpc_health_probe"
- - "-addr=:{{ .Values.master.port | default "8080" }}"
- {{- if .Values.tls.enable }}
- - "-tls"
- - "-tls-ca-cert=/etc/kubernetes/node-feature-discovery/certs/ca.crt"
- - "-tls-client-key=/etc/kubernetes/node-feature-discovery/certs/tls.key"
- - "-tls-client-cert=/etc/kubernetes/node-feature-discovery/certs/tls.crt"
- {{- end }}
- initialDelaySeconds: 10
- periodSeconds: 10
+ {{- toYaml .Values.master.livenessProbe | nindent 12 }}
readinessProbe:
- exec:
- command:
- - "/usr/bin/grpc_health_probe"
- - "-addr=:{{ .Values.master.port | default "8080" }}"
- {{- if .Values.tls.enable }}
- - "-tls"
- - "-tls-ca-cert=/etc/kubernetes/node-feature-discovery/certs/ca.crt"
- - "-tls-client-key=/etc/kubernetes/node-feature-discovery/certs/tls.key"
- - "-tls-client-cert=/etc/kubernetes/node-feature-discovery/certs/tls.crt"
- {{- end }}
- initialDelaySeconds: 5
- periodSeconds: 10
- failureThreshold: 10
+ {{- toYaml .Values.master.readinessProbe | nindent 12 }}
ports:
- containerPort: {{ .Values.master.port | default "8080" }}
name: grpc
- containerPort: {{ .Values.master.metricsPort | default "8081" }}
name: metrics
+ - containerPort: {{ .Values.master.healthPort | default "8082" }}
+ name: health
env:
- name: NODE_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
+ {{- with .Values.master.extraEnvs }}
+ {{- toYaml . | nindent 8 }}
+ {{- end}}
command:
- "nfd-master"
resources:
@@ -85,9 +73,8 @@ spec:
{{- if .Values.master.instance | empty | not }}
- "-instance={{ .Values.master.instance }}"
{{- end }}
+ {{- if not (and .Values.featureGates.NodeFeatureAPI .Values.enableNodeFeatureApi) }}
- "-port={{ .Values.master.port | default "8080" }}"
- {{- if not .Values.enableNodeFeatureApi }}
- - "-enable-nodefeature-api=false"
{{- else if gt (int .Values.master.replicaCount) 1 }}
- "-enable-leader-election"
{{- end }}
@@ -123,7 +110,12 @@ spec:
- "-key-file=/etc/kubernetes/node-feature-discovery/certs/tls.key"
- "-cert-file=/etc/kubernetes/node-feature-discovery/certs/tls.crt"
{{- end }}
+ # Go over featureGates and add the feature-gate flag
+ {{- range $key, $value := .Values.featureGates }}
+ - "-feature-gates={{ $key }}={{ $value }}"
+ {{- end }}
- "-metrics={{ .Values.master.metricsPort | default "8081" }}"
+ - "-grpc-health={{ .Values.master.healthPort | default "8082" }}"
volumeMounts:
{{- if .Values.tls.enable }}
- name: nfd-master-cert
@@ -157,3 +149,4 @@ spec:
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}
+{{- end }}
diff --git a/deployments/gpu-operator/charts/node-feature-discovery/templates/nfd-gc.yaml b/deployments/gpu-operator/charts/node-feature-discovery/templates/nfd-gc.yaml
index d803eef40..375f93827 100644
--- a/deployments/gpu-operator/charts/node-feature-discovery/templates/nfd-gc.yaml
+++ b/deployments/gpu-operator/charts/node-feature-discovery/templates/nfd-gc.yaml
@@ -1,4 +1,4 @@
-{{- if and .Values.gc.enable (or .Values.enableNodeFeatureApi .Values.topologyUpdater.enable) -}}
+{{- if and .Values.gc.enable (or (and .Values.featureGates.NodeFeatureAPI .Values.enableNodeFeatureApi) .Values.topologyUpdater.enable) -}}
apiVersion: apps/v1
kind: Deployment
metadata:
@@ -13,6 +13,7 @@ metadata:
{{- end }}
spec:
replicas: {{ .Values.gc.replicaCount | default 1 }}
+ revisionHistoryLimit: {{ .Values.gc.revisionHistoryLimit }}
selector:
matchLabels:
{{- include "node-feature-discovery.selectorLabels" . | nindent 6 }}
@@ -29,12 +30,16 @@ spec:
spec:
serviceAccountName: {{ include "node-feature-discovery.gc.serviceAccountName" . }}
dnsPolicy: ClusterFirstWithHostNet
+ {{- with .Values.priorityClassName }}
+ priorityClassName: {{ . }}
+ {{- end }}
{{- with .Values.imagePullSecrets }}
imagePullSecrets:
{{- toYaml . | nindent 8 }}
{{- end }}
securityContext:
{{- toYaml .Values.gc.podSecurityContext | nindent 8 }}
+ hostNetwork: {{ .Values.gc.hostNetwork }}
containers:
- name: gc
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
@@ -44,6 +49,9 @@ spec:
valueFrom:
fieldRef:
fieldPath: spec.nodeName
+ {{- with .Values.gc.extraEnvs }}
+ {{- toYaml . | nindent 8 }}
+ {{- end}}
command:
- "nfd-gc"
args:
@@ -58,6 +66,9 @@ spec:
drop: [ "ALL" ]
readOnlyRootFilesystem: true
runAsNonRoot: true
+ ports:
+ - name: metrics
+ containerPort: {{ .Values.gc.metricsPort | default "8081"}}
{{- with .Values.gc.nodeSelector }}
nodeSelector:
diff --git a/deployments/gpu-operator/charts/node-feature-discovery/templates/nfd-master-conf.yaml b/deployments/gpu-operator/charts/node-feature-discovery/templates/nfd-master-conf.yaml
index c806a8e5d..9c6e01cde 100644
--- a/deployments/gpu-operator/charts/node-feature-discovery/templates/nfd-master-conf.yaml
+++ b/deployments/gpu-operator/charts/node-feature-discovery/templates/nfd-master-conf.yaml
@@ -1,3 +1,4 @@
+{{- if .Values.master.enable }}
apiVersion: v1
kind: ConfigMap
metadata:
@@ -8,3 +9,4 @@ metadata:
data:
nfd-master.conf: |-
{{- .Values.master.config | toYaml | nindent 4 }}
+{{- end }}
diff --git a/deployments/gpu-operator/charts/node-feature-discovery/templates/nfd-topologyupdater-conf.yaml b/deployments/gpu-operator/charts/node-feature-discovery/templates/nfd-topologyupdater-conf.yaml
index 9867f5089..8d03aa2d8 100644
--- a/deployments/gpu-operator/charts/node-feature-discovery/templates/nfd-topologyupdater-conf.yaml
+++ b/deployments/gpu-operator/charts/node-feature-discovery/templates/nfd-topologyupdater-conf.yaml
@@ -1,3 +1,4 @@
+{{- if .Values.topologyUpdater.enable -}}
apiVersion: v1
kind: ConfigMap
metadata:
@@ -8,3 +9,4 @@ metadata:
data:
nfd-topology-updater.conf: |-
{{- .Values.topologyUpdater.config | toYaml | nindent 4 }}
+{{- end }}
diff --git a/deployments/gpu-operator/charts/node-feature-discovery/templates/nfd-worker-conf.yaml b/deployments/gpu-operator/charts/node-feature-discovery/templates/nfd-worker-conf.yaml
index 61d2a481a..a2299dea1 100644
--- a/deployments/gpu-operator/charts/node-feature-discovery/templates/nfd-worker-conf.yaml
+++ b/deployments/gpu-operator/charts/node-feature-discovery/templates/nfd-worker-conf.yaml
@@ -1,3 +1,4 @@
+{{- if .Values.worker.enable }}
apiVersion: v1
kind: ConfigMap
metadata:
@@ -8,3 +9,4 @@ metadata:
data:
nfd-worker.conf: |-
{{- .Values.worker.config | toYaml | nindent 4 }}
+{{- end }}
diff --git a/deployments/gpu-operator/charts/node-feature-discovery/templates/post-delete-job.yaml b/deployments/gpu-operator/charts/node-feature-discovery/templates/post-delete-job.yaml
new file mode 100644
index 000000000..4364f1aa2
--- /dev/null
+++ b/deployments/gpu-operator/charts/node-feature-discovery/templates/post-delete-job.yaml
@@ -0,0 +1,94 @@
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+ name: {{ include "node-feature-discovery.fullname" . }}-prune
+ namespace: {{ include "node-feature-discovery.namespace" . }}
+ labels:
+ {{- include "node-feature-discovery.labels" . | nindent 4 }}
+ annotations:
+ "helm.sh/hook": post-delete
+ "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+ name: {{ include "node-feature-discovery.fullname" . }}-prune
+ labels:
+ {{- include "node-feature-discovery.labels" . | nindent 4 }}
+ annotations:
+ "helm.sh/hook": post-delete
+ "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
+rules:
+- apiGroups:
+ - ""
+ resources:
+ - nodes
+ - nodes/status
+ verbs:
+ - get
+ - patch
+ - update
+ - list
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+ name: {{ include "node-feature-discovery.fullname" . }}-prune
+ labels:
+ {{- include "node-feature-discovery.labels" . | nindent 4 }}
+ annotations:
+ "helm.sh/hook": post-delete
+ "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
+roleRef:
+ apiGroup: rbac.authorization.k8s.io
+ kind: ClusterRole
+ name: {{ include "node-feature-discovery.fullname" . }}-prune
+subjects:
+- kind: ServiceAccount
+ name: {{ include "node-feature-discovery.fullname" . }}-prune
+ namespace: {{ include "node-feature-discovery.namespace" . }}
+---
+apiVersion: batch/v1
+kind: Job
+metadata:
+ name: {{ include "node-feature-discovery.fullname" . }}-prune
+ namespace: {{ include "node-feature-discovery.namespace" . }}
+ labels:
+ {{- include "node-feature-discovery.labels" . | nindent 4 }}
+ annotations:
+ "helm.sh/hook": post-delete
+ "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
+spec:
+ template:
+ metadata:
+ labels:
+ {{- include "node-feature-discovery.labels" . | nindent 8 }}
+ role: prune
+ spec:
+ serviceAccountName: {{ include "node-feature-discovery.fullname" . }}-prune
+ containers:
+ - name: nfd-master
+ securityContext:
+ {{- toYaml .Values.master.securityContext | nindent 12 }}
+ image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
+ imagePullPolicy: {{ .Values.image.pullPolicy }}
+ command:
+ - "nfd-master"
+ args:
+ - "-prune"
+ {{- if .Values.master.instance | empty | not }}
+ - "-instance={{ .Values.master.instance }}"
+ {{- end }}
+ restartPolicy: Never
+ {{- with .Values.master.nodeSelector }}
+ nodeSelector:
+ {{- toYaml . | nindent 8 }}
+ {{- end }}
+ {{- with .Values.master.affinity }}
+ affinity:
+ {{- toYaml . | nindent 8 }}
+ {{- end }}
+ {{- with .Values.master.tolerations }}
+ tolerations:
+ {{- toYaml . | nindent 8 }}
+ {{- end }}
diff --git a/deployments/gpu-operator/charts/node-feature-discovery/templates/prometheus.yaml b/deployments/gpu-operator/charts/node-feature-discovery/templates/prometheus.yaml
index b9f4b4640..3d680e24e 100644
--- a/deployments/gpu-operator/charts/node-feature-discovery/templates/prometheus.yaml
+++ b/deployments/gpu-operator/charts/node-feature-discovery/templates/prometheus.yaml
@@ -12,7 +12,7 @@ metadata:
spec:
podMetricsEndpoints:
- honorLabels: true
- interval: 10s
+ interval: {{ .Values.prometheus.scrapeInterval }}
path: /metrics
port: metrics
scheme: http
diff --git a/deployments/gpu-operator/charts/node-feature-discovery/templates/role.yaml b/deployments/gpu-operator/charts/node-feature-discovery/templates/role.yaml
index c71ede442..52c69eb19 100644
--- a/deployments/gpu-operator/charts/node-feature-discovery/templates/role.yaml
+++ b/deployments/gpu-operator/charts/node-feature-discovery/templates/role.yaml
@@ -1,4 +1,4 @@
-{{- if .Values.worker.rbac.create }}
+{{- if and .Values.worker.enable .Values.worker.rbac.create }}
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
@@ -15,5 +15,10 @@ rules:
- create
- get
- update
+- apiGroups:
+ - ""
+ resources:
+ - pods
+ verbs:
+ - get
{{- end }}
-
diff --git a/deployments/gpu-operator/charts/node-feature-discovery/templates/rolebinding.yaml b/deployments/gpu-operator/charts/node-feature-discovery/templates/rolebinding.yaml
index d8025be9b..a640d5f8b 100644
--- a/deployments/gpu-operator/charts/node-feature-discovery/templates/rolebinding.yaml
+++ b/deployments/gpu-operator/charts/node-feature-discovery/templates/rolebinding.yaml
@@ -1,4 +1,4 @@
-{{- if .Values.worker.rbac.create }}
+{{- if and .Values.worker.enable .Values.worker.rbac.create }}
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
diff --git a/deployments/gpu-operator/charts/node-feature-discovery/templates/service.yaml b/deployments/gpu-operator/charts/node-feature-discovery/templates/service.yaml
index 0d4789818..7191dca70 100644
--- a/deployments/gpu-operator/charts/node-feature-discovery/templates/service.yaml
+++ b/deployments/gpu-operator/charts/node-feature-discovery/templates/service.yaml
@@ -1,3 +1,4 @@
+{{- if and (not (and .Values.featureGates.NodeFeatureAPI .Values.enableNodeFeatureApi)) .Values.master.enable }}
apiVersion: v1
kind: Service
metadata:
@@ -16,3 +17,4 @@ spec:
selector:
{{- include "node-feature-discovery.selectorLabels" . | nindent 4 }}
role: master
+{{- end}}
diff --git a/deployments/gpu-operator/charts/node-feature-discovery/templates/serviceaccount.yaml b/deployments/gpu-operator/charts/node-feature-discovery/templates/serviceaccount.yaml
index 34dc8b753..59edc5e6c 100644
--- a/deployments/gpu-operator/charts/node-feature-discovery/templates/serviceaccount.yaml
+++ b/deployments/gpu-operator/charts/node-feature-discovery/templates/serviceaccount.yaml
@@ -1,4 +1,4 @@
-{{- if .Values.master.serviceAccount.create -}}
+{{- if and .Values.master.enable .Values.master.serviceAccount.create }}
apiVersion: v1
kind: ServiceAccount
metadata:
@@ -27,7 +27,7 @@ metadata:
{{- end }}
{{- end }}
-{{- if and .Values.gc.enable .Values.gc.serviceAccount.create (or .Values.enableNodeFeatureApi .Values.topologyUpdater.enable) }}
+{{- if and .Values.gc.enable .Values.gc.serviceAccount.create (or (and .Values.featureGates.NodeFeatureAPI .Values.enableNodeFeatureApi) .Values.topologyUpdater.enable) }}
---
apiVersion: v1
kind: ServiceAccount
@@ -42,7 +42,7 @@ metadata:
{{- end }}
{{- end }}
-{{- if .Values.worker.serviceAccount.create }}
+{{- if and .Values.worker.enable .Values.worker.serviceAccount.create }}
---
apiVersion: v1
kind: ServiceAccount
diff --git a/deployments/gpu-operator/charts/node-feature-discovery/templates/topologyupdater.yaml b/deployments/gpu-operator/charts/node-feature-discovery/templates/topologyupdater.yaml
index f51c10e6d..ba0214c88 100644
--- a/deployments/gpu-operator/charts/node-feature-discovery/templates/topologyupdater.yaml
+++ b/deployments/gpu-operator/charts/node-feature-discovery/templates/topologyupdater.yaml
@@ -12,6 +12,7 @@ metadata:
{{- toYaml . | nindent 4 }}
{{- end }}
spec:
+ revisionHistoryLimit: {{ .Values.topologyUpdater.revisionHistoryLimit }}
selector:
matchLabels:
{{- include "node-feature-discovery.selectorLabels" . | nindent 6 }}
@@ -28,16 +29,24 @@ spec:
spec:
serviceAccountName: {{ include "node-feature-discovery.topologyUpdater.serviceAccountName" . }}
dnsPolicy: ClusterFirstWithHostNet
+ {{- with .Values.priorityClassName }}
+ priorityClassName: {{ . }}
+ {{- end }}
{{- with .Values.imagePullSecrets }}
imagePullSecrets:
{{- toYaml . | nindent 8 }}
{{- end }}
securityContext:
{{- toYaml .Values.topologyUpdater.podSecurityContext | nindent 8 }}
+ hostNetwork: {{ .Values.topologyUpdater.hostNetwork }}
containers:
- name: topology-updater
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
imagePullPolicy: "{{ .Values.image.pullPolicy }}"
+ livenessProbe:
+ {{- toYaml .Values.topologyUpdater.livenessProbe | nindent 10 }}
+ readinessProbe:
+ {{- toYaml .Values.topologyUpdater.readinessProbe | nindent 10 }}
env:
- name: NODE_NAME
valueFrom:
@@ -47,6 +56,9 @@ spec:
valueFrom:
fieldRef:
fieldPath: status.hostIP
+ {{- with .Values.topologyUpdater.extraEnvs }}
+ {{- toYaml . | nindent 8 }}
+ {{- end}}
command:
- "nfd-topology-updater"
args:
@@ -66,8 +78,8 @@ spec:
- "-key-file=/etc/kubernetes/node-feature-discovery/certs/tls.key"
- "-cert-file=/etc/kubernetes/node-feature-discovery/certs/tls.crt"
{{- end }}
- {{- if .Values.topologyUpdater.podSetFingerprint }}
- - "-pods-fingerprint"
+ {{- if not .Values.topologyUpdater.podSetFingerprint }}
+ - "-pods-fingerprint=false"
{{- end }}
{{- if .Values.topologyUpdater.kubeletConfigPath | empty | not }}
- "-kubelet-config-uri=file:///host-var/kubelet-config"
@@ -77,9 +89,12 @@ spec:
- "-kubelet-state-dir="
{{- end }}
- -metrics={{ .Values.topologyUpdater.metricsPort | default "8081"}}
+ - "-grpc-health={{ .Values.topologyUpdater.healthPort | default "8082" }}"
ports:
- - name: metrics
- containerPort: {{ .Values.topologyUpdater.metricsPort | default "8081"}}
+ - containerPort: {{ .Values.topologyUpdater.metricsPort | default "8081"}}
+ name: metrics
+ - containerPort: {{ .Values.topologyUpdater.healthPort | default "8082" }}
+ name: health
volumeMounts:
{{- if .Values.topologyUpdater.kubeletConfigPath | empty | not }}
- name: kubelet-config
diff --git a/deployments/gpu-operator/charts/node-feature-discovery/templates/worker.yaml b/deployments/gpu-operator/charts/node-feature-discovery/templates/worker.yaml
index 0e56eb5d1..755466c75 100644
--- a/deployments/gpu-operator/charts/node-feature-discovery/templates/worker.yaml
+++ b/deployments/gpu-operator/charts/node-feature-discovery/templates/worker.yaml
@@ -1,3 +1,4 @@
+{{- if .Values.worker.enable }}
apiVersion: apps/v1
kind: DaemonSet
metadata:
@@ -11,6 +12,7 @@ metadata:
{{- toYaml . | nindent 4 }}
{{- end }}
spec:
+ revisionHistoryLimit: {{ .Values.worker.revisionHistoryLimit }}
selector:
matchLabels:
{{- include "node-feature-discovery.selectorLabels" . | nindent 6 }}
@@ -26,6 +28,9 @@ spec:
{{- end }}
spec:
dnsPolicy: ClusterFirstWithHostNet
+ {{- with .Values.priorityClassName }}
+ priorityClassName: {{ . }}
+ {{- end }}
{{- with .Values.imagePullSecrets }}
imagePullSecrets:
{{- toYaml . | nindent 8 }}
@@ -33,35 +38,57 @@ spec:
serviceAccountName: {{ include "node-feature-discovery.worker.serviceAccountName" . }}
securityContext:
{{- toYaml .Values.worker.podSecurityContext | nindent 8 }}
+ hostNetwork: {{ .Values.worker.hostNetwork }}
containers:
- name: worker
securityContext:
{{- toYaml .Values.worker.securityContext | nindent 12 }}
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
imagePullPolicy: {{ .Values.image.pullPolicy }}
+ livenessProbe:
+ {{- toYaml .Values.worker.livenessProbe | nindent 12 }}
+ readinessProbe:
+ {{- toYaml .Values.worker.readinessProbe | nindent 12 }}
env:
- name: NODE_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
+ - name: POD_NAME
+ valueFrom:
+ fieldRef:
+ fieldPath: metadata.name
+ - name: POD_UID
+ valueFrom:
+ fieldRef:
+ fieldPath: metadata.uid
+ {{- with .Values.worker.extraEnvs }}
+ {{- toYaml . | nindent 8 }}
+ {{- end}}
resources:
{{- toYaml .Values.worker.resources | nindent 12 }}
command:
- "nfd-worker"
args:
+{{- if not (and .Values.featureGates.NodeFeatureAPI .Values.enableNodeFeatureApi) }}
- "-server={{ include "node-feature-discovery.fullname" . }}-master:{{ .Values.master.service.port }}"
- {{- if not .Values.enableNodeFeatureApi }}
- - "-enable-nodefeature-api=false"
- {{- end }}
+{{- end }}
{{- if .Values.tls.enable }}
- "-ca-file=/etc/kubernetes/node-feature-discovery/certs/ca.crt"
- "-key-file=/etc/kubernetes/node-feature-discovery/certs/tls.key"
- "-cert-file=/etc/kubernetes/node-feature-discovery/certs/tls.crt"
+{{- end }}
+# Go over featureGate and add the feature-gate flag
+{{- range $key, $value := .Values.featureGates }}
+ - "-feature-gates={{ $key }}={{ $value }}"
{{- end }}
- "-metrics={{ .Values.worker.metricsPort | default "8081"}}"
+ - "-grpc-health={{ .Values.worker.healthPort | default "8082" }}"
ports:
- - name: metrics
- containerPort: {{ .Values.worker.metricsPort | default "8081"}}
+ - containerPort: {{ .Values.worker.metricsPort | default "8081"}}
+ name: metrics
+ - containerPort: {{ .Values.worker.healthPort | default "8082" }}
+ name: health
volumeMounts:
- name: host-boot
mountPath: "/host-boot"
@@ -78,6 +105,9 @@ spec:
- name: host-lib
mountPath: "/host-lib"
readOnly: true
+ - name: host-proc-swaps
+ mountPath: "/host-proc/swaps"
+ readOnly: true
{{- if .Values.worker.mountUsrSrc }}
- name: host-usr-src
mountPath: "/host-usr/src"
@@ -113,6 +143,9 @@ spec:
- name: host-lib
hostPath:
path: "/lib"
+ - name: host-proc-swaps
+ hostPath:
+ path: "/proc/swaps"
{{- if .Values.worker.mountUsrSrc }}
- name: host-usr-src
hostPath:
@@ -150,3 +183,4 @@ spec:
{{- with .Values.worker.priorityClassName }}
priorityClassName: {{ . | quote }}
{{- end }}
+{{- end }}
diff --git a/deployments/gpu-operator/charts/node-feature-discovery/values.yaml b/deployments/gpu-operator/charts/node-feature-discovery/values.yaml
index 2291aef4f..2d24983db 100644
--- a/deployments/gpu-operator/charts/node-feature-discovery/values.yaml
+++ b/deployments/gpu-operator/charts/node-feature-discovery/values.yaml
@@ -12,9 +12,19 @@ namespaceOverride: ""
enableNodeFeatureApi: true
+featureGates:
+ NodeFeatureAPI: true
+ NodeFeatureGroupAPI: false
+
+priorityClassName: ""
+
master:
+ enable: true
+ extraEnvs: []
+ hostNetwork: false
config: ###
# noPublish: false
+ # autoDefaultNs: true
# extraLabelNs: ["added.ns.io","added.kubernets.io"]
# denyLabelNs: ["denied.ns.io","denied.kubernetes.io"]
# resourceLabels: ["vendor-1.com/feature-1","vendor-2.io/feature-2"]
@@ -45,8 +55,11 @@ master:
# nfdApiParallelism: 10
###
# The TCP port that nfd-master listens for incoming requests. Default: 8080
+ # Deprecated this parameter is related to the deprecated gRPC API and will
+ # be removed with it in a future release
port: 8080
metricsPort: 8081
+ healthPort: 8082
instance:
featureApi:
resyncPeriod:
@@ -79,6 +92,9 @@ master:
# The name of the service account to use.
# If not set and create is true, a name is generated using the fullname template
name:
+
+ # specify how many old ReplicaSets for the Deployment to retain.
+ revisionHistoryLimit:
rbac:
create: true
@@ -87,17 +103,17 @@ master:
type: ClusterIP
port: 8080
- resources: {}
- # We usually recommend not to specify default resources and to leave this as a conscious
- # choice for the user. This also increases chances charts run on environments with little
- # resources, such as Minikube. If you do want to specify resources, uncomment the following
- # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
- # limits:
- # cpu: 100m
- # memory: 128Mi
- # requests:
- # cpu: 100m
- # memory: 128Mi
+ resources:
+ limits:
+ memory: 4Gi
+ requests:
+ cpu: 100m
+ # You may want to use the same value for `requests.memory` and `limits.memory`. The “requests” value affects scheduling to accommodate pods on nodes.
+ # If there is a large difference between “requests” and “limits” and nodes experience memory pressure, the kernel may invoke
+ # the OOM Killer, even if the memory does not exceed the “limits” threshold. This can cause unexpected pod evictions. Memory
+ # cannot be compressed and once allocated to a pod, it can only be reclaimed by killing the pod.
+ # Natan Yellin 22/09/2022 https://home.robusta.dev/blog/kubernetes-memory-limit
+ memory: 128Mi
nodeSelector: {}
@@ -128,8 +144,24 @@ master:
- key: "node-role.kubernetes.io/control-plane"
operator: In
values: [""]
+
+ livenessProbe:
+ grpc:
+ port: 8082
+ initialDelaySeconds: 10
+ # failureThreshold: 3
+ # periodSeconds: 10
+ readinessProbe:
+ grpc:
+ port: 8082
+ initialDelaySeconds: 5
+ failureThreshold: 10
+ # periodSeconds: 10
worker:
+ enable: true
+ extraEnvs: []
+ hostNetwork: false
config: ###
#core:
# labelWhiteList:
@@ -157,6 +189,7 @@ worker:
# cpuid:
## NOTE: whitelist has priority over blacklist
# attributeBlacklist:
+ # - "AVX10"
# - "BMI1"
# - "BMI2"
# - "CLMUL"
@@ -215,7 +248,7 @@ worker:
# # The following feature demonstrates the capabilities of the matchFeatures
# - name: "my custom rule"
# labels:
- # my-ng-feature: "true"
+ # "vendor.io/my-ng-feature": "true"
# # matchFeatures implements a logical AND over all matcher terms in the
# # list (i.e. all of the terms, or per-feature matchers, must match)
# matchFeatures:
@@ -286,7 +319,7 @@ worker:
# # The following feature demonstrates the capabilities of the matchAny
# - name: "my matchAny rule"
# labels:
- # my-ng-feature-2: "my-value"
+ # "vendor.io/my-ng-feature-2": "my-value"
# # matchAny implements a logical IF over all elements (sub-matchers) in
# # the list (i.e. at least one feature matcher must match)
# matchAny:
@@ -307,10 +340,17 @@ worker:
# vendor: {op: In, value: ["8086"]}
# class: {op: In, value: ["02"]}
#
+ # - name: "avx wildcard rule"
+ # labels:
+ # "my-avx-feature": "true"
+ # matchFeatures:
+ # - feature: cpu.cpuid
+ # matchName: {op: InRegexp, value: ["^AVX512"]}
+ #
# # The following features demonstreate label templating capabilities
# - name: "my template rule"
# labelsTemplate: |
- # {{ range .system.osrelease }}my-system-feature.{{ .Name }}={{ .Value }}
+ # {{ range .system.osrelease }}vendor.io/my-system-feature.{{ .Name }}={{ .Value }}
# {{ end }}
# matchFeatures:
# - feature: system.osrelease
@@ -320,7 +360,7 @@ worker:
#
# - name: "my template rule 2"
# labelsTemplate: |
- # {{ range .pci.device }}my-pci-device.{{ .class }}-{{ .device }}=with-cpuid
+ # {{ range .pci.device }}vendor.io/my-pci-device.{{ .class }}-{{ .device }}=with-cpuid
# {{ end }}
# matchFeatures:
# - feature: pci.device
@@ -335,7 +375,7 @@ worker:
# # previous labels and vars
# - name: "my dummy kernel rule"
# labels:
- # "my.kernel.feature": "true"
+ # "vendor.io/my.kernel.feature": "true"
# matchFeatures:
# - feature: kernel.version
# matchExpressions:
@@ -350,16 +390,24 @@ worker:
#
# - name: "my rule using backrefs"
# labels:
- # "my.backref.feature": "true"
+ # "vendor.io/my.backref.feature": "true"
# matchFeatures:
# - feature: rule.matched
# matchExpressions:
- # my.kernel.feature: {op: IsTrue}
+ # vendor.io/my.kernel.feature: {op: IsTrue}
# my.dummy.var: {op: Gt, value: ["0"]}
#
+ # - name: "kconfig template rule"
+ # labelsTemplate: |
+ # {{ range .kernel.config }}kconfig-{{ .Name }}={{ .Value }}
+ # {{ end }}
+ # matchFeatures:
+ # - feature: kernel.config
+ # matchName: {op: In, value: ["SWAP", "X86", "ARM"]}
###
metricsPort: 8081
+ healthPort: 8082
daemonsetAnnotations: {}
podSecurityContext: {}
# fsGroup: 2000
@@ -372,6 +420,19 @@ worker:
runAsNonRoot: true
# runAsUser: 1000
+ livenessProbe:
+ grpc:
+ port: 8082
+ initialDelaySeconds: 10
+ # failureThreshold: 3
+ # periodSeconds: 10
+ readinessProbe:
+ grpc:
+ port: 8082
+ initialDelaySeconds: 5
+ failureThreshold: 10
+ # periodSeconds: 10
+
serviceAccount:
# Specifies whether a service account should be created.
# We create this by default to make it easier for downstream users to apply PodSecurityPolicies.
@@ -382,6 +443,9 @@ worker:
# If not set and create is true, a name is generated using the fullname template
name:
+ # specify how many old ControllerRevisions for the DaemonSet to retain.
+ revisionHistoryLimit:
+
rbac:
create: true
@@ -389,17 +453,12 @@ worker:
# Does not work on systems without /usr/src AND a read-only /usr, such as Talos
mountUsrSrc: false
- resources: {}
- # We usually recommend not to specify default resources and to leave this as a conscious
- # choice for the user. This also increases chances charts run on environments with little
- # resources, such as Minikube. If you do want to specify resources, uncomment the following
- # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
- # limits:
- # cpu: 100m
- # memory: 128Mi
- # requests:
- # cpu: 100m
- # memory: 128Mi
+ resources:
+ limits:
+ memory: 512Mi
+ requests:
+ cpu: 5m
+ memory: 64Mi
nodeSelector: {}
@@ -424,15 +483,22 @@ topologyUpdater:
enable: false
createCRDs: false
+ extraEnvs: []
+ hostNetwork: false
serviceAccount:
create: true
annotations: {}
name:
+
+ # specify how many old ControllerRevisions for the DaemonSet to retain.
+ revisionHistoryLimit:
+
rbac:
create: true
metricsPort: 8081
+ healthPort: 8082
kubeletConfigPath:
kubeletPodResourcesSockPath:
updateInterval: 60s
@@ -446,18 +512,26 @@ topologyUpdater:
drop: [ "ALL" ]
readOnlyRootFilesystem: true
runAsUser: 0
-
- resources: {}
- # We usually recommend not to specify default resources and to leave this as a conscious
- # choice for the user. This also increases chances charts run on environments with little
- # resources, such as Minikube. If you do want to specify resources, uncomment the following
- # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
- # limits:
- # cpu: 100m
- # memory: 128Mi
- # requests:
- # cpu: 100m
- # memory: 128Mi
+
+ livenessProbe:
+ grpc:
+ port: 8082
+ initialDelaySeconds: 10
+ # failureThreshold: 3
+ # periodSeconds: 10
+ readinessProbe:
+ grpc:
+ port: 8082
+ initialDelaySeconds: 5
+ failureThreshold: 10
+ # periodSeconds: 10
+
+ resources:
+ limits:
+ memory: 60Mi
+ requests:
+ cpu: 50m
+ memory: 40Mi
nodeSelector: {}
tolerations: []
@@ -468,6 +542,8 @@ topologyUpdater:
gc:
enable: true
+ extraEnvs: []
+ hostNetwork: false
replicaCount: 1
serviceAccount:
@@ -481,17 +557,14 @@ gc:
podSecurityContext: {}
- resources: {}
- # We usually recommend not to specify default resources and to leave this as a conscious
- # choice for the user. This also increases chances charts run on environments with little
- # resources, such as Minikube. If you do want to specify resources, uncomment the following
- # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
- # limits:
- # cpu: 100m
- # memory: 128Mi
- # requests:
- # cpu: 100m
- # memory: 128Mi
+ resources:
+ limits:
+ memory: 1Gi
+ requests:
+ cpu: 10m
+ memory: 128Mi
+
+ metricsPort: 8081
nodeSelector: {}
tolerations: []
@@ -499,6 +572,9 @@ gc:
deploymentAnnotations: {}
affinity: {}
+ # specify how many old ReplicaSets for the Deployment to retain.
+ revisionHistoryLimit:
+
# Optionally use encryption for worker <--> master comms
# TODO: verify hostname is not yet supported
#
@@ -507,7 +583,11 @@ gc:
tls:
enable: false
certManager: false
+ certManagerCertificate:
+ issuerKind:
+ issuerName:
prometheus:
enable: false
+ scrapeInterval: 10s
labels: {}
diff --git a/deployments/gpu-operator/crds/nvidia.com_clusterpolicies.yaml b/deployments/gpu-operator/crds/nvidia.com_clusterpolicies.yaml
new file mode 100644
index 000000000..54e4a652b
--- /dev/null
+++ b/deployments/gpu-operator/crds/nvidia.com_clusterpolicies.yaml
@@ -0,0 +1,2384 @@
+---
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+ annotations:
+ controller-gen.kubebuilder.io/version: v0.16.5
+ name: clusterpolicies.nvidia.com
+spec:
+ group: nvidia.com
+ names:
+ kind: ClusterPolicy
+ listKind: ClusterPolicyList
+ plural: clusterpolicies
+ singular: clusterpolicy
+ scope: Cluster
+ versions:
+ - additionalPrinterColumns:
+ - jsonPath: .status.state
+ name: Status
+ type: string
+ - jsonPath: .metadata.creationTimestamp
+ name: Age
+ type: string
+ name: v1
+ schema:
+ openAPIV3Schema:
+ description: ClusterPolicy is the Schema for the clusterpolicies API
+ properties:
+ apiVersion:
+ description: |-
+ APIVersion defines the versioned schema of this representation of an object.
+ Servers should convert recognized schemas to the latest internal value, and
+ may reject unrecognized values.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+ type: string
+ kind:
+ description: |-
+ Kind is a string value representing the REST resource this object represents.
+ Servers may infer this from the endpoint the client submits requests to.
+ Cannot be updated.
+ In CamelCase.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+ type: string
+ metadata:
+ type: object
+ spec:
+ description: ClusterPolicySpec defines the desired state of ClusterPolicy
+ properties:
+ ccManager:
+ description: CCManager component spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ defaultMode:
+ description: Default CC mode setting for compatible GPUs on the
+ node
+ enum:
+ - "on"
+ - "off"
+ - devtools
+ type: string
+ enabled:
+ description: Enabled indicates if deployment of CC Manager is
+ enabled
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: CC Manager image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: CC Manager image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: CC Manager image tag
+ type: string
+ type: object
+ cdi:
+ description: CDI configures how the Container Device Interface is
+ used in the cluster
+ properties:
+ default:
+ default: false
+ description: Default indicates whether to use CDI as the default
+ mechanism for providing GPU access to containers.
+ type: boolean
+ enabled:
+ default: false
+ description: Enabled indicates whether CDI can be used to make
+ GPUs accessible to containers.
+ type: boolean
+ type: object
+ daemonsets:
+ description: Daemonset defines common configuration for all Daemonsets
+ properties:
+ annotations:
+ additionalProperties:
+ type: string
+ description: |-
+ Optional: Annotations is an unstructured key value map stored with a resource that may be
+ set by external tools to store and retrieve arbitrary metadata. They are not
+ queryable and should be preserved when modifying objects.
+ type: object
+ labels:
+ additionalProperties:
+ type: string
+ description: |-
+ Optional: Map of string keys and values that can be used to organize and categorize
+ (scope and select) objects. May match selectors of replication controllers
+ and services.
+ type: object
+ priorityClassName:
+ type: string
+ rollingUpdate:
+ description: 'Optional: Configuration for rolling update of all
+ DaemonSet pods'
+ properties:
+ maxUnavailable:
+ type: string
+ type: object
+ tolerations:
+ description: 'Optional: Set tolerations'
+ items:
+ description: |-
+ The pod this Toleration is attached to tolerates any taint that matches
+ the triple using the matching operator .
+ properties:
+ effect:
+ description: |-
+ Effect indicates the taint effect to match. Empty means match all taint effects.
+ When specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute.
+ type: string
+ key:
+ description: |-
+ Key is the taint key that the toleration applies to. Empty means match all taint keys.
+ If the key is empty, operator must be Exists; this combination means to match all values and all keys.
+ type: string
+ operator:
+ description: |-
+ Operator represents a key's relationship to the value.
+ Valid operators are Exists and Equal. Defaults to Equal.
+ Exists is equivalent to wildcard for value, so that a pod can
+ tolerate all taints of a particular category.
+ type: string
+ tolerationSeconds:
+ description: |-
+ TolerationSeconds represents the period of time the toleration (which must be
+ of effect NoExecute, otherwise this field is ignored) tolerates the taint. By default,
+ it is not set, which means tolerate the taint forever (do not evict). Zero and
+ negative values will be treated as 0 (evict immediately) by the system.
+ format: int64
+ type: integer
+ value:
+ description: |-
+ Value is the taint value the toleration matches to.
+ If the operator is Exists, the value should be empty, otherwise just a regular string.
+ type: string
+ type: object
+ type: array
+ updateStrategy:
+ default: RollingUpdate
+ enum:
+ - RollingUpdate
+ - OnDelete
+ type: string
+ type: object
+ dcgm:
+ description: DCGM component spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ enabled:
+ description: Enabled indicates if deployment of NVIDIA DCGM Hostengine
+ as a separate pod is enabled.
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ hostPort:
+ description: 'Deprecated: HostPort represents host port that needs
+ to be bound for DCGM engine (Default: 5555)'
+ format: int32
+ type: integer
+ image:
+ description: NVIDIA DCGM image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: NVIDIA DCGM image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: NVIDIA DCGM image tag
+ type: string
+ type: object
+ dcgmExporter:
+ description: DCGMExporter spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ config:
+ description: 'Optional: Custom metrics configuration for NVIDIA
+ DCGM Exporter'
+ properties:
+ name:
+ description: ConfigMap name with file dcgm-metrics.csv for
+ metrics to be collected by NVIDIA DCGM Exporter
+ type: string
+ type: object
+ enabled:
+ description: Enabled indicates if deployment of NVIDIA DCGM Exporter
+ through operator is enabled
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: NVIDIA DCGM Exporter image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: NVIDIA DCGM Exporter image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ serviceMonitor:
+ description: 'Optional: ServiceMonitor configuration for NVIDIA
+ DCGM Exporter'
+ properties:
+ additionalLabels:
+ additionalProperties:
+ type: string
+ description: AdditionalLabels to add to ServiceMonitor instance
+ for NVIDIA DCGM Exporter
+ type: object
+ enabled:
+ description: Enabled indicates if ServiceMonitor is deployed
+ for NVIDIA DCGM Exporter
+ type: boolean
+ honorLabels:
+ description: HonorLabels chooses the metric’s labels on collisions
+ with target labels.
+ type: boolean
+ interval:
+ description: |-
+ Interval which metrics should be scraped from NVIDIA DCGM Exporter. If not specified Prometheus’ global scrape interval is used.
+ Supported units: y, w, d, h, m, s, ms
+ pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$
+ type: string
+ relabelings:
+ description: Relabelings allows to rewrite labels on metric
+ sets for NVIDIA DCGM Exporter
+ items:
+ description: |-
+ RelabelConfig allows dynamic rewriting of the label set for targets, alerts,
+ scraped samples and remote write samples.
+
+ More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config
+ properties:
+ action:
+ default: replace
+ description: |-
+ Action to perform based on the regex matching.
+
+ `Uppercase` and `Lowercase` actions require Prometheus >= v2.36.0.
+ `DropEqual` and `KeepEqual` actions require Prometheus >= v2.41.0.
+
+ Default: "Replace"
+ enum:
+ - replace
+ - Replace
+ - keep
+ - Keep
+ - drop
+ - Drop
+ - hashmod
+ - HashMod
+ - labelmap
+ - LabelMap
+ - labeldrop
+ - LabelDrop
+ - labelkeep
+ - LabelKeep
+ - lowercase
+ - Lowercase
+ - uppercase
+ - Uppercase
+ - keepequal
+ - KeepEqual
+ - dropequal
+ - DropEqual
+ type: string
+ modulus:
+ description: |-
+ Modulus to take of the hash of the source label values.
+
+ Only applicable when the action is `HashMod`.
+ format: int64
+ type: integer
+ regex:
+ description: Regular expression against which the extracted
+ value is matched.
+ type: string
+ replacement:
+ description: |-
+ Replacement value against which a Replace action is performed if the
+ regular expression matches.
+
+ Regex capture groups are available.
+ type: string
+ separator:
+ description: Separator is the string between concatenated
+ SourceLabels.
+ type: string
+ sourceLabels:
+ description: |-
+ The source labels select values from existing labels. Their content is
+ concatenated using the configured Separator and matched against the
+ configured regular expression.
+ items:
+ description: |-
+ LabelName is a valid Prometheus label name which may only contain ASCII
+ letters, numbers, as well as underscores.
+ pattern: ^[a-zA-Z_][a-zA-Z0-9_]*$
+ type: string
+ type: array
+ targetLabel:
+ description: |-
+ Label to which the resulting string is written in a replacement.
+
+ It is mandatory for `Replace`, `HashMod`, `Lowercase`, `Uppercase`,
+ `KeepEqual` and `DropEqual` actions.
+
+ Regex capture groups are available.
+ type: string
+ type: object
+ type: array
+ type: object
+ version:
+ description: NVIDIA DCGM Exporter image tag
+ type: string
+ type: object
+ devicePlugin:
+ description: DevicePlugin component spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ config:
+ description: 'Optional: Configuration for the NVIDIA Device Plugin
+ via the ConfigMap'
+ properties:
+ default:
+ description: Default config name within the ConfigMap for
+ the NVIDIA Device Plugin config
+ type: string
+ name:
+ description: ConfigMap name for NVIDIA Device Plugin config
+ including shared config between plugin and GFD
+ type: string
+ type: object
+ enabled:
+ description: Enabled indicates if deployment of NVIDIA Device
+ Plugin through operator is enabled
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: NVIDIA Device Plugin image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ mps:
+ description: 'Optional: MPS related configuration for the NVIDIA
+ Device Plugin'
+ properties:
+ root:
+ default: /run/nvidia/mps
+ description: Root defines the MPS root path on the host
+ type: string
+ type: object
+ repository:
+ description: NVIDIA Device Plugin image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: NVIDIA Device Plugin image tag
+ type: string
+ type: object
+ driver:
+ description: Driver component spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ certConfig:
+ description: 'Optional: Custom certificates configuration for
+ NVIDIA Driver container'
+ properties:
+ name:
+ type: string
+ type: object
+ enabled:
+ description: Enabled indicates if deployment of NVIDIA Driver
+ through operator is enabled
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: NVIDIA Driver image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ kernelModuleConfig:
+ description: 'Optional: Kernel module configuration parameters
+ for the NVIDIA Driver'
+ properties:
+ name:
+ type: string
+ type: object
+ licensingConfig:
+ description: 'Optional: Licensing configuration for NVIDIA vGPU
+ licensing'
+ properties:
+ configMapName:
+ type: string
+ nlsEnabled:
+ description: NLSEnabled indicates if NVIDIA Licensing System
+ is used for licensing.
+ type: boolean
+ type: object
+ livenessProbe:
+ description: NVIDIA Driver container liveness probe settings
+ properties:
+ failureThreshold:
+ description: |-
+ Minimum consecutive failures for the probe to be considered failed after having succeeded.
+ Defaults to 3. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ initialDelaySeconds:
+ description: |-
+ Number of seconds after the container has started before liveness probes are initiated.
+ More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+ format: int32
+ type: integer
+ periodSeconds:
+ description: |-
+ How often (in seconds) to perform the probe.
+ Default to 10 seconds. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ successThreshold:
+ description: |-
+ Minimum consecutive successes for the probe to be considered successful after having failed.
+ Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ timeoutSeconds:
+ description: |-
+ Number of seconds after which the probe times out.
+ Defaults to 1 second. Minimum value is 1.
+ More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+ format: int32
+ minimum: 1
+ type: integer
+ type: object
+ manager:
+ description: Manager represents configuration for NVIDIA Driver
+ Manager initContainer
+ properties:
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: Image represents NVIDIA Driver Manager image
+ name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: Repository represents Driver Managerrepository
+ path
+ type: string
+ version:
+ description: Version represents NVIDIA Driver Manager image
+ tag(version)
+ type: string
+ type: object
+ rdma:
+ description: GPUDirectRDMASpec defines the properties for nvidia-peermem
+ deployment
+ properties:
+ enabled:
+ description: Enabled indicates if GPUDirect RDMA is enabled
+ through GPU operator
+ type: boolean
+ useHostMofed:
+ description: UseHostMOFED indicates to use MOFED drivers directly
+ installed on the host to enable GPUDirect RDMA
+ type: boolean
+ type: object
+ readinessProbe:
+ description: NVIDIA Driver container readiness probe settings
+ properties:
+ failureThreshold:
+ description: |-
+ Minimum consecutive failures for the probe to be considered failed after having succeeded.
+ Defaults to 3. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ initialDelaySeconds:
+ description: |-
+ Number of seconds after the container has started before liveness probes are initiated.
+ More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+ format: int32
+ type: integer
+ periodSeconds:
+ description: |-
+ How often (in seconds) to perform the probe.
+ Default to 10 seconds. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ successThreshold:
+ description: |-
+ Minimum consecutive successes for the probe to be considered successful after having failed.
+ Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ timeoutSeconds:
+ description: |-
+ Number of seconds after which the probe times out.
+ Defaults to 1 second. Minimum value is 1.
+ More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+ format: int32
+ minimum: 1
+ type: integer
+ type: object
+ repoConfig:
+ description: 'Optional: Custom repo configuration for NVIDIA Driver
+ container'
+ properties:
+ configMapName:
+ type: string
+ type: object
+ repository:
+ description: NVIDIA Driver image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ startupProbe:
+ description: NVIDIA Driver container startup probe settings
+ properties:
+ failureThreshold:
+ description: |-
+ Minimum consecutive failures for the probe to be considered failed after having succeeded.
+ Defaults to 3. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ initialDelaySeconds:
+ description: |-
+ Number of seconds after the container has started before liveness probes are initiated.
+ More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+ format: int32
+ type: integer
+ periodSeconds:
+ description: |-
+ How often (in seconds) to perform the probe.
+ Default to 10 seconds. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ successThreshold:
+ description: |-
+ Minimum consecutive successes for the probe to be considered successful after having failed.
+ Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1.
+ format: int32
+ minimum: 1
+ type: integer
+ timeoutSeconds:
+ description: |-
+ Number of seconds after which the probe times out.
+ Defaults to 1 second. Minimum value is 1.
+ More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
+ format: int32
+ minimum: 1
+ type: integer
+ type: object
+ upgradePolicy:
+ description: Driver auto-upgrade settings
+ properties:
+ autoUpgrade:
+ default: false
+ description: |-
+ AutoUpgrade is a global switch for automatic upgrade feature
+ if set to false all other options are ignored
+ type: boolean
+ drain:
+ description: DrainSpec describes configuration for node drain
+ during automatic upgrade
+ properties:
+ deleteEmptyDir:
+ default: false
+ description: |-
+ DeleteEmptyDir indicates if should continue even if there are pods using emptyDir
+ (local data that will be deleted when the node is drained)
+ type: boolean
+ enable:
+ default: false
+ description: Enable indicates if node draining is allowed
+ during upgrade
+ type: boolean
+ force:
+ default: false
+ description: Force indicates if force draining is allowed
+ type: boolean
+ podSelector:
+ description: |-
+ PodSelector specifies a label selector to filter pods on the node that need to be drained
+ For more details on label selectors, see:
+ https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#label-selectors
+ type: string
+ timeoutSeconds:
+ default: 300
+ description: TimeoutSecond specifies the length of time
+ in seconds to wait before giving up drain, zero means
+ infinite
+ minimum: 0
+ type: integer
+ type: object
+ maxParallelUpgrades:
+ default: 1
+ description: |-
+ MaxParallelUpgrades indicates how many nodes can be upgraded in parallel
+ 0 means no limit, all nodes will be upgraded in parallel
+ minimum: 0
+ type: integer
+ maxUnavailable:
+ anyOf:
+ - type: integer
+ - type: string
+ default: 25%
+ description: |-
+ MaxUnavailable is the maximum number of nodes with the driver installed, that can be unavailable during the upgrade.
+ Value can be an absolute number (ex: 5) or a percentage of total nodes at the start of upgrade (ex: 10%).
+ Absolute number is calculated from percentage by rounding up.
+ By default, a fixed value of 25% is used.
+ x-kubernetes-int-or-string: true
+ podDeletion:
+ description: PodDeletionSpec describes configuration for deletion
+ of pods using special resources during automatic upgrade
+ properties:
+ deleteEmptyDir:
+ default: false
+ description: |-
+ DeleteEmptyDir indicates if should continue even if there are pods using emptyDir
+ (local data that will be deleted when the pod is deleted)
+ type: boolean
+ force:
+ default: false
+ description: Force indicates if force deletion is allowed
+ type: boolean
+ timeoutSeconds:
+ default: 300
+ description: |-
+ TimeoutSecond specifies the length of time in seconds to wait before giving up on pod termination, zero means
+ infinite
+ minimum: 0
+ type: integer
+ type: object
+ waitForCompletion:
+ description: WaitForCompletionSpec describes the configuration
+ for waiting on job completions
+ properties:
+ podSelector:
+ description: |-
+ PodSelector specifies a label selector for the pods to wait for completion
+ For more details on label selectors, see:
+ https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#label-selectors
+ type: string
+ timeoutSeconds:
+ default: 0
+ description: |-
+ TimeoutSecond specifies the length of time in seconds to wait before giving up on pod termination, zero means
+ infinite
+ minimum: 0
+ type: integer
+ type: object
+ type: object
+ useNvidiaDriverCRD:
+ description: UseNvidiaDriverCRD indicates if the deployment of
+ NVIDIA Driver is managed by the NVIDIADriver CRD type
+ type: boolean
+ useOpenKernelModules:
+ description: UseOpenKernelModules indicates if the open GPU kernel
+ modules should be used
+ type: boolean
+ usePrecompiled:
+ description: UsePrecompiled indicates if deployment of NVIDIA
+ Driver using pre-compiled modules is enabled
+ type: boolean
+ version:
+ description: NVIDIA Driver image tag
+ type: string
+ virtualTopology:
+ description: 'Optional: Virtual Topology Daemon configuration
+ for NVIDIA vGPU drivers'
+ properties:
+ config:
+ description: 'Optional: Config name representing virtual topology
+ daemon configuration file nvidia-topologyd.conf'
+ type: string
+ type: object
+ type: object
+ gdrcopy:
+ description: GDRCopy component spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ enabled:
+ description: Enabled indicates if GDRCopy is enabled through GPU
+ Operator
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: NVIDIA GDRCopy driver image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: NVIDIA GDRCopy driver image repository
+ type: string
+ version:
+ description: NVIDIA GDRCopy driver image tag
+ type: string
+ type: object
+ gds:
+ description: GPUDirectStorage defines the spec for GDS components(Experimental)
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ enabled:
+ description: Enabled indicates if GPUDirect Storage is enabled
+ through GPU operator
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: NVIDIA GPUDirect Storage Driver image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: NVIDIA GPUDirect Storage Driver image repository
+ type: string
+ version:
+ description: NVIDIA GPUDirect Storage Driver image tag
+ type: string
+ type: object
+ gfd:
+ description: GPUFeatureDiscovery spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ enabled:
+ description: Enabled indicates if deployment of GPU Feature Discovery
+ Plugin is enabled.
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: GFD image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: GFD image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: GFD image tag
+ type: string
+ type: object
+ hostPaths:
+ description: HostPaths defines various paths on the host needed by
+ GPU Operator components
+ properties:
+ driverInstallDir:
+ description: |-
+ DriverInstallDir represents the root at which driver files including libraries,
+ config files, and executables can be found.
+ type: string
+ rootFS:
+ description: |-
+ RootFS represents the path to the root filesystem of the host.
+ This is used by components that need to interact with the host filesystem
+ and as such this must be a chroot-able filesystem.
+ Examples include the MIG Manager and Toolkit Container which may need to
+ stop, start, or restart systemd services.
+ type: string
+ type: object
+ kataManager:
+ description: KataManager component spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ config:
+ description: Kata Manager config
+ properties:
+ artifactsDir:
+ default: /opt/nvidia-gpu-operator/artifacts/runtimeclasses
+ description: |-
+ ArtifactsDir is the directory where kata artifacts (e.g. kernel / guest images, configuration, etc.)
+ are placed on the local filesystem.
+ type: string
+ runtimeClasses:
+ description: RuntimeClasses is a list of kata runtime classes
+ to configure.
+ items:
+ description: RuntimeClass defines the configuration for
+ a kata RuntimeClass
+ properties:
+ artifacts:
+ description: Artifacts are the kata artifacts associated
+ with the runtime class.
+ properties:
+ pullSecret:
+ description: PullSecret is the secret used to pull
+ the OCI artifact.
+ type: string
+ url:
+ description: |-
+ URL is the path to the OCI artifact (payload) containing all artifacts
+ associated with a kata runtime class.
+ type: string
+ required:
+ - url
+ type: object
+ name:
+ description: Name is the name of the kata runtime class.
+ type: string
+ nodeSelector:
+ additionalProperties:
+ type: string
+ description: |-
+ NodeSelector specifies the nodeSelector for the RuntimeClass object.
+ This ensures pods running with the RuntimeClass only get scheduled
+ onto nodes which support it.
+ type: object
+ required:
+ - artifacts
+ - name
+ type: object
+ type: array
+ type: object
+ enabled:
+ description: Enabled indicates if deployment of Kata Manager is
+ enabled
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: Kata Manager image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: Kata Manager image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: Kata Manager image tag
+ type: string
+ type: object
+ mig:
+ description: MIG spec
+ properties:
+ strategy:
+ description: 'Optional: MIGStrategy to apply for GFD and NVIDIA
+ Device Plugin'
+ enum:
+ - none
+ - single
+ - mixed
+ type: string
+ type: object
+ migManager:
+ description: MIGManager for configuration to deploy MIG Manager
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ config:
+ description: 'Optional: Custom mig-parted configuration for NVIDIA
+ MIG Manager container'
+ properties:
+ default:
+ default: all-disabled
+ description: Default MIG config to be applied on the node,
+ when there is no config specified with the node label nvidia.com/mig.config
+ enum:
+ - all-disabled
+ - ""
+ type: string
+ name:
+ default: default-mig-parted-config
+ description: ConfigMap name
+ type: string
+ type: object
+ enabled:
+ description: Enabled indicates if deployment of NVIDIA MIG Manager
+ is enabled
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ gpuClientsConfig:
+ description: 'Optional: Custom gpu-clients configuration for NVIDIA
+ MIG Manager container'
+ properties:
+ name:
+ description: ConfigMap name
+ type: string
+ type: object
+ image:
+ description: NVIDIA MIG Manager image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: NVIDIA MIG Manager image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: NVIDIA MIG Manager image tag
+ type: string
+ type: object
+ nodeStatusExporter:
+ description: NodeStatusExporter spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ enabled:
+ description: Enabled indicates if deployment of Node Status Exporter
+ is enabled.
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: Node Status Exporter image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: Node Status Exporterimage repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: Node Status Exporterimage tag
+ type: string
+ type: object
+ operator:
+ description: Operator component spec
+ properties:
+ annotations:
+ additionalProperties:
+ type: string
+ description: |-
+ Optional: Annotations is an unstructured key value map stored with a resource that may be
+ set by external tools to store and retrieve arbitrary metadata. They are not
+ queryable and should be preserved when modifying objects.
+ type: object
+ defaultRuntime:
+ default: docker
+ description: Runtime defines container runtime type
+ enum:
+ - docker
+ - crio
+ - containerd
+ type: string
+ initContainer:
+ description: InitContainerSpec describes configuration for initContainer
+ image used with all components
+ properties:
+ image:
+ description: Image represents image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: Repository represents image repository path
+ type: string
+ version:
+ description: Version represents image tag(version)
+ type: string
+ type: object
+ labels:
+ additionalProperties:
+ type: string
+ description: |-
+ Optional: Map of string keys and values that can be used to organize and categorize
+ (scope and select) objects. May match selectors of replication controllers
+ and services.
+ type: object
+ runtimeClass:
+ default: nvidia
+ type: string
+ use_ocp_driver_toolkit:
+ description: UseOpenShiftDriverToolkit indicates if DriverToolkit
+ image should be used on OpenShift to build and install driver
+ modules
+ type: boolean
+ required:
+ - defaultRuntime
+ type: object
+ psa:
+ description: PSA defines spec for PodSecurityAdmission configuration
+ properties:
+ enabled:
+ description: Enabled indicates if PodSecurityAdmission configuration
+ needs to be enabled for all Pods
+ type: boolean
+ type: object
+ psp:
+ description: |-
+ Deprecated: Pod Security Policies are no longer supported. Please use PodSecurityAdmission instead
+ PSP defines spec for handling PodSecurityPolicies
+ properties:
+ enabled:
+ description: Enabled indicates if PodSecurityPolicies needs to
+ be enabled for all Pods
+ type: boolean
+ type: object
+ sandboxDevicePlugin:
+ description: SandboxDevicePlugin component spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ enabled:
+ description: Enabled indicates if deployment of NVIDIA Sandbox
+ Device Plugin through operator is enabled
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: NVIDIA Sandbox Device Plugin image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: NVIDIA Sandbox Device Plugin image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: NVIDIA Sandbox Device Plugin image tag
+ type: string
+ type: object
+ sandboxWorkloads:
+ description: SandboxWorkloads defines the spec for handling sandbox
+ workloads (i.e. Virtual Machines)
+ properties:
+ defaultWorkload:
+ default: container
+ description: |-
+ DefaultWorkload indicates the default GPU workload type to configure
+ worker nodes in the cluster for
+ enum:
+ - container
+ - vm-passthrough
+ - vm-vgpu
+ type: string
+ enabled:
+ description: |-
+ Enabled indicates if the GPU Operator should manage additional operands required
+ for sandbox workloads (i.e. VFIO Manager, vGPU Manager, and additional device plugins)
+ type: boolean
+ type: object
+ toolkit:
+ description: Toolkit component spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ enabled:
+ description: Enabled indicates if deployment of NVIDIA Container
+ Toolkit through operator is enabled
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: NVIDIA Container Toolkit image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ installDir:
+ default: /usr/local/nvidia
+ description: Toolkit install directory on the host
+ type: string
+ repository:
+ description: NVIDIA Container Toolkit image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: NVIDIA Container Toolkit image tag
+ type: string
+ type: object
+ validator:
+ description: Validator defines the spec for operator-validator daemonset
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ cuda:
+ description: CUDA validator spec
+ properties:
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ type: object
+ driver:
+ description: Toolkit validator spec
+ properties:
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ type: object
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: Validator image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ plugin:
+ description: Plugin validator spec
+ properties:
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ type: object
+ repository:
+ description: Validator image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ toolkit:
+ description: Toolkit validator spec
+ properties:
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ type: object
+ version:
+ description: Validator image tag
+ type: string
+ vfioPCI:
+ description: VfioPCI validator spec
+ properties:
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ type: object
+ vgpuDevices:
+ description: VGPUDevices validator spec
+ properties:
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ type: object
+ vgpuManager:
+ description: VGPUManager validator spec
+ properties:
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ type: object
+ type: object
+ vfioManager:
+ description: VFIOManager for configuration to deploy VFIO-PCI Manager
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ driverManager:
+ description: DriverManager represents configuration for NVIDIA
+ Driver Manager
+ properties:
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: Image represents NVIDIA Driver Manager image
+ name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: Repository represents Driver Managerrepository
+ path
+ type: string
+ version:
+ description: Version represents NVIDIA Driver Manager image
+ tag(version)
+ type: string
+ type: object
+ enabled:
+ description: Enabled indicates if deployment of VFIO Manager is
+ enabled
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: VFIO Manager image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: VFIO Manager image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: VFIO Manager image tag
+ type: string
+ type: object
+ vgpuDeviceManager:
+ description: VGPUDeviceManager spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ config:
+ description: NVIDIA vGPU devices configuration for NVIDIA vGPU
+ Device Manager container
+ properties:
+ default:
+ default: default
+ description: Default config name within the ConfigMap
+ type: string
+ name:
+ description: ConfigMap name
+ type: string
+ type: object
+ enabled:
+ description: Enabled indicates if deployment of NVIDIA vGPU Device
+ Manager is enabled
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: NVIDIA vGPU Device Manager image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: NVIDIA vGPU Device Manager image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: NVIDIA vGPU Device Manager image tag
+ type: string
+ type: object
+ vgpuManager:
+ description: VGPUManager component spec
+ properties:
+ args:
+ description: 'Optional: List of arguments'
+ items:
+ type: string
+ type: array
+ driverManager:
+ description: DriverManager represents configuration for NVIDIA
+ Driver Manager initContainer
+ properties:
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: Image represents NVIDIA Driver Manager image
+ name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: Repository represents Driver Managerrepository
+ path
+ type: string
+ version:
+ description: Version represents NVIDIA Driver Manager image
+ tag(version)
+ type: string
+ type: object
+ enabled:
+ description: Enabled indicates if deployment of NVIDIA vGPU Manager
+ through operator is enabled
+ type: boolean
+ env:
+ description: 'Optional: List of environment variables'
+ items:
+ description: EnvVar represents an environment variable present
+ in a Container.
+ properties:
+ name:
+ description: Name of the environment variable.
+ type: string
+ value:
+ description: Value of the environment variable.
+ type: string
+ required:
+ - name
+ type: object
+ type: array
+ image:
+ description: NVIDIA vGPU Manager image name
+ pattern: '[a-zA-Z0-9\-]+'
+ type: string
+ imagePullPolicy:
+ description: Image pull policy
+ type: string
+ imagePullSecrets:
+ description: Image pull secrets
+ items:
+ type: string
+ type: array
+ repository:
+ description: NVIDIA vGPU Manager image repository
+ type: string
+ resources:
+ description: 'Optional: Define resources requests and limits for
+ each pod'
+ properties:
+ limits:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Limits describes the maximum amount of compute resources allowed.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ requests:
+ additionalProperties:
+ anyOf:
+ - type: integer
+ - type: string
+ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+ x-kubernetes-int-or-string: true
+ description: |-
+ Requests describes the minimum amount of compute resources required.
+ If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
+ otherwise to an implementation-defined value. Requests cannot exceed Limits.
+ More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
+ type: object
+ type: object
+ version:
+ description: NVIDIA vGPU Manager image tag
+ type: string
+ type: object
+ required:
+ - daemonsets
+ - dcgm
+ - dcgmExporter
+ - devicePlugin
+ - driver
+ - gfd
+ - nodeStatusExporter
+ - operator
+ - toolkit
+ type: object
+ status:
+ description: ClusterPolicyStatus defines the observed state of ClusterPolicy
+ properties:
+ conditions:
+ description: Conditions is a list of conditions representing the ClusterPolicy's
+ current state.
+ items:
+ description: Condition contains details for one aspect of the current
+ state of this API Resource.
+ properties:
+ lastTransitionTime:
+ description: |-
+ lastTransitionTime is the last time the condition transitioned from one status to another.
+ This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable.
+ format: date-time
+ type: string
+ message:
+ description: |-
+ message is a human readable message indicating details about the transition.
+ This may be an empty string.
+ maxLength: 32768
+ type: string
+ observedGeneration:
+ description: |-
+ observedGeneration represents the .metadata.generation that the condition was set based upon.
+ For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
+ with respect to the current state of the instance.
+ format: int64
+ minimum: 0
+ type: integer
+ reason:
+ description: |-
+ reason contains a programmatic identifier indicating the reason for the condition's last transition.
+ Producers of specific condition types may define expected values and meanings for this field,
+ and whether the values are considered a guaranteed API.
+ The value should be a CamelCase string.
+ This field may not be empty.
+ maxLength: 1024
+ minLength: 1
+ pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
+ type: string
+ status:
+ description: status of the condition, one of True, False, Unknown.
+ enum:
+ - "True"
+ - "False"
+ - Unknown
+ type: string
+ type:
+ description: type of condition in CamelCase or in foo.example.com/CamelCase.
+ maxLength: 316
+ pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
+ type: string
+ required:
+ - lastTransitionTime
+ - message
+ - reason
+ - status
+ - type
+ type: object
+ type: array
+ namespace:
+ description: Namespace indicates a namespace in which the operator
+ is installed
+ type: string
+ state:
+ description: State indicates status of ClusterPolicy
+ enum:
+ - ignored
+ - ready
+ - notReady
+ type: string
+ required:
+ - state
+ type: object
+ type: object
+ served: true
+ storage: true
+ subresources:
+ status: {}
diff --git a/deployments/gpu-operator/crds/nvidia.com_clusterpolicies_crd.yaml b/deployments/gpu-operator/crds/nvidia.com_clusterpolicies_crd.yaml
deleted file mode 100644
index 16e35bf4b..000000000
--- a/deployments/gpu-operator/crds/nvidia.com_clusterpolicies_crd.yaml
+++ /dev/null
@@ -1,2357 +0,0 @@
----
-apiVersion: apiextensions.k8s.io/v1
-kind: CustomResourceDefinition
-metadata:
- annotations:
- controller-gen.kubebuilder.io/version: v0.14.0
- name: clusterpolicies.nvidia.com
-spec:
- group: nvidia.com
- names:
- kind: ClusterPolicy
- listKind: ClusterPolicyList
- plural: clusterpolicies
- singular: clusterpolicy
- scope: Cluster
- versions:
- - additionalPrinterColumns:
- - jsonPath: .status.state
- name: Status
- type: string
- - jsonPath: .metadata.creationTimestamp
- name: Age
- type: string
- name: v1
- schema:
- openAPIV3Schema:
- description: ClusterPolicy is the Schema for the clusterpolicies API
- properties:
- apiVersion:
- description: |-
- APIVersion defines the versioned schema of this representation of an object.
- Servers should convert recognized schemas to the latest internal value, and
- may reject unrecognized values.
- More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
- type: string
- kind:
- description: |-
- Kind is a string value representing the REST resource this object represents.
- Servers may infer this from the endpoint the client submits requests to.
- Cannot be updated.
- In CamelCase.
- More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
- type: string
- metadata:
- type: object
- spec:
- description: ClusterPolicySpec defines the desired state of ClusterPolicy
- properties:
- ccManager:
- description: CCManager component spec
- properties:
- args:
- description: 'Optional: List of arguments'
- items:
- type: string
- type: array
- defaultMode:
- description: Default CC mode setting for compatible GPUs on the
- node
- enum:
- - "on"
- - "off"
- - devtools
- type: string
- enabled:
- description: Enabled indicates if deployment of CC Manager is
- enabled
- type: boolean
- env:
- description: 'Optional: List of environment variables'
- items:
- description: EnvVar represents an environment variable present
- in a Container.
- properties:
- name:
- description: Name of the environment variable.
- type: string
- value:
- description: Value of the environment variable.
- type: string
- required:
- - name
- type: object
- type: array
- image:
- description: CC Manager image name
- pattern: '[a-zA-Z0-9\-]+'
- type: string
- imagePullPolicy:
- description: Image pull policy
- type: string
- imagePullSecrets:
- description: Image pull secrets
- items:
- type: string
- type: array
- repository:
- description: CC Manager image repository
- type: string
- resources:
- description: 'Optional: Define resources requests and limits for
- each pod'
- properties:
- limits:
- additionalProperties:
- anyOf:
- - type: integer
- - type: string
- pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
- x-kubernetes-int-or-string: true
- description: |-
- Limits describes the maximum amount of compute resources allowed.
- More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
- type: object
- requests:
- additionalProperties:
- anyOf:
- - type: integer
- - type: string
- pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
- x-kubernetes-int-or-string: true
- description: |-
- Requests describes the minimum amount of compute resources required.
- If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
- otherwise to an implementation-defined value. Requests cannot exceed Limits.
- More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
- type: object
- type: object
- version:
- description: CC Manager image tag
- type: string
- type: object
- cdi:
- description: CDI configures how the Container Device Interface is
- used in the cluster
- properties:
- default:
- default: false
- description: Default indicates whether to use CDI as the default
- mechanism for providing GPU access to containers.
- type: boolean
- enabled:
- default: false
- description: Enabled indicates whether CDI can be used to make
- GPUs accessible to containers.
- type: boolean
- type: object
- daemonsets:
- description: Daemonset defines common configuration for all Daemonsets
- properties:
- annotations:
- additionalProperties:
- type: string
- description: |-
- Optional: Annotations is an unstructured key value map stored with a resource that may be
- set by external tools to store and retrieve arbitrary metadata. They are not
- queryable and should be preserved when modifying objects.
- type: object
- labels:
- additionalProperties:
- type: string
- description: |-
- Optional: Map of string keys and values that can be used to organize and categorize
- (scope and select) objects. May match selectors of replication controllers
- and services.
- type: object
- priorityClassName:
- type: string
- rollingUpdate:
- description: 'Optional: Configuration for rolling update of all
- DaemonSet pods'
- properties:
- maxUnavailable:
- type: string
- type: object
- tolerations:
- description: 'Optional: Set tolerations'
- items:
- description: |-
- The pod this Toleration is attached to tolerates any taint that matches
- the triple using the matching operator .
- properties:
- effect:
- description: |-
- Effect indicates the taint effect to match. Empty means match all taint effects.
- When specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute.
- type: string
- key:
- description: |-
- Key is the taint key that the toleration applies to. Empty means match all taint keys.
- If the key is empty, operator must be Exists; this combination means to match all values and all keys.
- type: string
- operator:
- description: |-
- Operator represents a key's relationship to the value.
- Valid operators are Exists and Equal. Defaults to Equal.
- Exists is equivalent to wildcard for value, so that a pod can
- tolerate all taints of a particular category.
- type: string
- tolerationSeconds:
- description: |-
- TolerationSeconds represents the period of time the toleration (which must be
- of effect NoExecute, otherwise this field is ignored) tolerates the taint. By default,
- it is not set, which means tolerate the taint forever (do not evict). Zero and
- negative values will be treated as 0 (evict immediately) by the system.
- format: int64
- type: integer
- value:
- description: |-
- Value is the taint value the toleration matches to.
- If the operator is Exists, the value should be empty, otherwise just a regular string.
- type: string
- type: object
- type: array
- updateStrategy:
- default: RollingUpdate
- enum:
- - RollingUpdate
- - OnDelete
- type: string
- type: object
- dcgm:
- description: DCGM component spec
- properties:
- args:
- description: 'Optional: List of arguments'
- items:
- type: string
- type: array
- enabled:
- description: Enabled indicates if deployment of NVIDIA DCGM Hostengine
- as a separate pod is enabled.
- type: boolean
- env:
- description: 'Optional: List of environment variables'
- items:
- description: EnvVar represents an environment variable present
- in a Container.
- properties:
- name:
- description: Name of the environment variable.
- type: string
- value:
- description: Value of the environment variable.
- type: string
- required:
- - name
- type: object
- type: array
- hostPort:
- description: 'HostPort represents host port that needs to be bound
- for DCGM engine (Default: 5555)'
- format: int32
- type: integer
- image:
- description: NVIDIA DCGM image name
- pattern: '[a-zA-Z0-9\-]+'
- type: string
- imagePullPolicy:
- description: Image pull policy
- type: string
- imagePullSecrets:
- description: Image pull secrets
- items:
- type: string
- type: array
- repository:
- description: NVIDIA DCGM image repository
- type: string
- resources:
- description: 'Optional: Define resources requests and limits for
- each pod'
- properties:
- limits:
- additionalProperties:
- anyOf:
- - type: integer
- - type: string
- pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
- x-kubernetes-int-or-string: true
- description: |-
- Limits describes the maximum amount of compute resources allowed.
- More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
- type: object
- requests:
- additionalProperties:
- anyOf:
- - type: integer
- - type: string
- pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
- x-kubernetes-int-or-string: true
- description: |-
- Requests describes the minimum amount of compute resources required.
- If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
- otherwise to an implementation-defined value. Requests cannot exceed Limits.
- More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
- type: object
- type: object
- version:
- description: NVIDIA DCGM image tag
- type: string
- type: object
- dcgmExporter:
- description: DCGMExporter spec
- properties:
- args:
- description: 'Optional: List of arguments'
- items:
- type: string
- type: array
- config:
- description: 'Optional: Custom metrics configuration for NVIDIA
- DCGM Exporter'
- properties:
- name:
- description: ConfigMap name with file dcgm-metrics.csv for
- metrics to be collected by NVIDIA DCGM Exporter
- type: string
- type: object
- enabled:
- description: Enabled indicates if deployment of NVIDIA DCGM Exporter
- through operator is enabled
- type: boolean
- env:
- description: 'Optional: List of environment variables'
- items:
- description: EnvVar represents an environment variable present
- in a Container.
- properties:
- name:
- description: Name of the environment variable.
- type: string
- value:
- description: Value of the environment variable.
- type: string
- required:
- - name
- type: object
- type: array
- image:
- description: NVIDIA DCGM Exporter image name
- pattern: '[a-zA-Z0-9\-]+'
- type: string
- imagePullPolicy:
- description: Image pull policy
- type: string
- imagePullSecrets:
- description: Image pull secrets
- items:
- type: string
- type: array
- repository:
- description: NVIDIA DCGM Exporter image repository
- type: string
- resources:
- description: 'Optional: Define resources requests and limits for
- each pod'
- properties:
- limits:
- additionalProperties:
- anyOf:
- - type: integer
- - type: string
- pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
- x-kubernetes-int-or-string: true
- description: |-
- Limits describes the maximum amount of compute resources allowed.
- More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
- type: object
- requests:
- additionalProperties:
- anyOf:
- - type: integer
- - type: string
- pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
- x-kubernetes-int-or-string: true
- description: |-
- Requests describes the minimum amount of compute resources required.
- If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
- otherwise to an implementation-defined value. Requests cannot exceed Limits.
- More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
- type: object
- type: object
- serviceMonitor:
- description: 'Optional: ServiceMonitor configuration for NVIDIA
- DCGM Exporter'
- properties:
- additionalLabels:
- additionalProperties:
- type: string
- description: AdditionalLabels to add to ServiceMonitor instance
- for NVIDIA DCGM Exporter
- type: object
- enabled:
- description: Enabled indicates if ServiceMonitor is deployed
- for NVIDIA DCGM Exporter
- type: boolean
- honorLabels:
- description: HonorLabels chooses the metric’s labels on collisions
- with target labels.
- type: boolean
- interval:
- description: |-
- Interval which metrics should be scraped from NVIDIA DCGM Exporter. If not specified Prometheus’ global scrape interval is used.
- Supported units: y, w, d, h, m, s, ms
- pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$
- type: string
- relabelings:
- description: Relabelings allows to rewrite labels on metric
- sets for NVIDIA DCGM Exporter
- items:
- description: |-
- RelabelConfig allows dynamic rewriting of the label set, being applied to samples before ingestion.
- It defines ``-section of Prometheus configuration.
- More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs
- properties:
- action:
- default: replace
- description: |-
- Action to perform based on regex matching. Default is 'replace'.
- uppercase and lowercase actions require Prometheus >= 2.36.
- enum:
- - replace
- - Replace
- - keep
- - Keep
- - drop
- - Drop
- - hashmod
- - HashMod
- - labelmap
- - LabelMap
- - labeldrop
- - LabelDrop
- - labelkeep
- - LabelKeep
- - lowercase
- - Lowercase
- - uppercase
- - Uppercase
- - keepequal
- - KeepEqual
- - dropequal
- - DropEqual
- type: string
- modulus:
- description: Modulus to take of the hash of the source
- label values.
- format: int64
- type: integer
- regex:
- description: Regular expression against which the extracted
- value is matched. Default is '(.*)'
- type: string
- replacement:
- description: |-
- Replacement value against which a regex replace is performed if the
- regular expression matches. Regex capture groups are available. Default is '$1'
- type: string
- separator:
- description: Separator placed between concatenated source
- label values. default is ';'.
- type: string
- sourceLabels:
- description: |-
- The source labels select values from existing labels. Their content is concatenated
- using the configured separator and matched against the configured regular expression
- for the replace, keep, and drop actions.
- items:
- description: LabelName is a valid Prometheus label
- name which may only contain ASCII letters, numbers,
- as well as underscores.
- pattern: ^[a-zA-Z_][a-zA-Z0-9_]*$
- type: string
- type: array
- targetLabel:
- description: |-
- Label to which the resulting value is written in a replace action.
- It is mandatory for replace actions. Regex capture groups are available.
- type: string
- type: object
- type: array
- type: object
- version:
- description: NVIDIA DCGM Exporter image tag
- type: string
- type: object
- devicePlugin:
- description: DevicePlugin component spec
- properties:
- args:
- description: 'Optional: List of arguments'
- items:
- type: string
- type: array
- config:
- description: 'Optional: Configuration for the NVIDIA Device Plugin
- via the ConfigMap'
- properties:
- default:
- description: Default config name within the ConfigMap for
- the NVIDIA Device Plugin config
- type: string
- name:
- description: ConfigMap name for NVIDIA Device Plugin config
- including shared config between plugin and GFD
- type: string
- type: object
- enabled:
- description: Enabled indicates if deployment of NVIDIA Device
- Plugin through operator is enabled
- type: boolean
- env:
- description: 'Optional: List of environment variables'
- items:
- description: EnvVar represents an environment variable present
- in a Container.
- properties:
- name:
- description: Name of the environment variable.
- type: string
- value:
- description: Value of the environment variable.
- type: string
- required:
- - name
- type: object
- type: array
- image:
- description: NVIDIA Device Plugin image name
- pattern: '[a-zA-Z0-9\-]+'
- type: string
- imagePullPolicy:
- description: Image pull policy
- type: string
- imagePullSecrets:
- description: Image pull secrets
- items:
- type: string
- type: array
- repository:
- description: NVIDIA Device Plugin image repository
- type: string
- resources:
- description: 'Optional: Define resources requests and limits for
- each pod'
- properties:
- limits:
- additionalProperties:
- anyOf:
- - type: integer
- - type: string
- pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
- x-kubernetes-int-or-string: true
- description: |-
- Limits describes the maximum amount of compute resources allowed.
- More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
- type: object
- requests:
- additionalProperties:
- anyOf:
- - type: integer
- - type: string
- pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
- x-kubernetes-int-or-string: true
- description: |-
- Requests describes the minimum amount of compute resources required.
- If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
- otherwise to an implementation-defined value. Requests cannot exceed Limits.
- More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
- type: object
- type: object
- version:
- description: NVIDIA Device Plugin image tag
- type: string
- type: object
- driver:
- description: Driver component spec
- properties:
- args:
- description: 'Optional: List of arguments'
- items:
- type: string
- type: array
- certConfig:
- description: 'Optional: Custom certificates configuration for
- NVIDIA Driver container'
- properties:
- name:
- type: string
- type: object
- enabled:
- description: Enabled indicates if deployment of NVIDIA Driver
- through operator is enabled
- type: boolean
- env:
- description: 'Optional: List of environment variables'
- items:
- description: EnvVar represents an environment variable present
- in a Container.
- properties:
- name:
- description: Name of the environment variable.
- type: string
- value:
- description: Value of the environment variable.
- type: string
- required:
- - name
- type: object
- type: array
- image:
- description: NVIDIA Driver image name
- pattern: '[a-zA-Z0-9\-]+'
- type: string
- imagePullPolicy:
- description: Image pull policy
- type: string
- imagePullSecrets:
- description: Image pull secrets
- items:
- type: string
- type: array
- kernelModuleConfig:
- description: 'Optional: Kernel module configuration parameters
- for the NVIDIA Driver'
- properties:
- name:
- type: string
- type: object
- licensingConfig:
- description: 'Optional: Licensing configuration for NVIDIA vGPU
- licensing'
- properties:
- configMapName:
- type: string
- nlsEnabled:
- description: NLSEnabled indicates if NVIDIA Licensing System
- is used for licensing.
- type: boolean
- type: object
- livenessProbe:
- description: NVIDIA Driver container liveness probe settings
- properties:
- failureThreshold:
- description: |-
- Minimum consecutive failures for the probe to be considered failed after having succeeded.
- Defaults to 3. Minimum value is 1.
- format: int32
- minimum: 1
- type: integer
- initialDelaySeconds:
- description: |-
- Number of seconds after the container has started before liveness probes are initiated.
- More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
- format: int32
- type: integer
- periodSeconds:
- description: |-
- How often (in seconds) to perform the probe.
- Default to 10 seconds. Minimum value is 1.
- format: int32
- minimum: 1
- type: integer
- successThreshold:
- description: |-
- Minimum consecutive successes for the probe to be considered successful after having failed.
- Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1.
- format: int32
- minimum: 1
- type: integer
- timeoutSeconds:
- description: |-
- Number of seconds after which the probe times out.
- Defaults to 1 second. Minimum value is 1.
- More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
- format: int32
- minimum: 1
- type: integer
- type: object
- manager:
- description: Manager represents configuration for NVIDIA Driver
- Manager initContainer
- properties:
- env:
- description: 'Optional: List of environment variables'
- items:
- description: EnvVar represents an environment variable present
- in a Container.
- properties:
- name:
- description: Name of the environment variable.
- type: string
- value:
- description: Value of the environment variable.
- type: string
- required:
- - name
- type: object
- type: array
- image:
- description: Image represents NVIDIA Driver Manager image
- name
- pattern: '[a-zA-Z0-9\-]+'
- type: string
- imagePullPolicy:
- description: Image pull policy
- type: string
- imagePullSecrets:
- description: Image pull secrets
- items:
- type: string
- type: array
- repository:
- description: Repository represents Driver Managerrepository
- path
- type: string
- version:
- description: Version represents NVIDIA Driver Manager image
- tag(version)
- type: string
- type: object
- rdma:
- description: GPUDirectRDMASpec defines the properties for nvidia-peermem
- deployment
- properties:
- enabled:
- description: Enabled indicates if GPUDirect RDMA is enabled
- through GPU operator
- type: boolean
- useHostMofed:
- description: UseHostMOFED indicates to use MOFED drivers directly
- installed on the host to enable GPUDirect RDMA
- type: boolean
- type: object
- readinessProbe:
- description: NVIDIA Driver container readiness probe settings
- properties:
- failureThreshold:
- description: |-
- Minimum consecutive failures for the probe to be considered failed after having succeeded.
- Defaults to 3. Minimum value is 1.
- format: int32
- minimum: 1
- type: integer
- initialDelaySeconds:
- description: |-
- Number of seconds after the container has started before liveness probes are initiated.
- More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
- format: int32
- type: integer
- periodSeconds:
- description: |-
- How often (in seconds) to perform the probe.
- Default to 10 seconds. Minimum value is 1.
- format: int32
- minimum: 1
- type: integer
- successThreshold:
- description: |-
- Minimum consecutive successes for the probe to be considered successful after having failed.
- Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1.
- format: int32
- minimum: 1
- type: integer
- timeoutSeconds:
- description: |-
- Number of seconds after which the probe times out.
- Defaults to 1 second. Minimum value is 1.
- More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
- format: int32
- minimum: 1
- type: integer
- type: object
- repoConfig:
- description: 'Optional: Custom repo configuration for NVIDIA Driver
- container'
- properties:
- configMapName:
- type: string
- type: object
- repository:
- description: NVIDIA Driver image repository
- type: string
- resources:
- description: 'Optional: Define resources requests and limits for
- each pod'
- properties:
- limits:
- additionalProperties:
- anyOf:
- - type: integer
- - type: string
- pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
- x-kubernetes-int-or-string: true
- description: |-
- Limits describes the maximum amount of compute resources allowed.
- More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
- type: object
- requests:
- additionalProperties:
- anyOf:
- - type: integer
- - type: string
- pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
- x-kubernetes-int-or-string: true
- description: |-
- Requests describes the minimum amount of compute resources required.
- If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
- otherwise to an implementation-defined value. Requests cannot exceed Limits.
- More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
- type: object
- type: object
- startupProbe:
- description: NVIDIA Driver container startup probe settings
- properties:
- failureThreshold:
- description: |-
- Minimum consecutive failures for the probe to be considered failed after having succeeded.
- Defaults to 3. Minimum value is 1.
- format: int32
- minimum: 1
- type: integer
- initialDelaySeconds:
- description: |-
- Number of seconds after the container has started before liveness probes are initiated.
- More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
- format: int32
- type: integer
- periodSeconds:
- description: |-
- How often (in seconds) to perform the probe.
- Default to 10 seconds. Minimum value is 1.
- format: int32
- minimum: 1
- type: integer
- successThreshold:
- description: |-
- Minimum consecutive successes for the probe to be considered successful after having failed.
- Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1.
- format: int32
- minimum: 1
- type: integer
- timeoutSeconds:
- description: |-
- Number of seconds after which the probe times out.
- Defaults to 1 second. Minimum value is 1.
- More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes
- format: int32
- minimum: 1
- type: integer
- type: object
- upgradePolicy:
- description: Driver auto-upgrade settings
- properties:
- autoUpgrade:
- default: false
- description: |-
- AutoUpgrade is a global switch for automatic upgrade feature
- if set to false all other options are ignored
- type: boolean
- drain:
- description: DrainSpec describes configuration for node drain
- during automatic upgrade
- properties:
- deleteEmptyDir:
- default: false
- description: |-
- DeleteEmptyDir indicates if should continue even if there are pods using emptyDir
- (local data that will be deleted when the node is drained)
- type: boolean
- enable:
- default: false
- description: Enable indicates if node draining is allowed
- during upgrade
- type: boolean
- force:
- default: false
- description: Force indicates if force draining is allowed
- type: boolean
- podSelector:
- description: |-
- PodSelector specifies a label selector to filter pods on the node that need to be drained
- For more details on label selectors, see:
- https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#label-selectors
- type: string
- timeoutSeconds:
- default: 300
- description: TimeoutSecond specifies the length of time
- in seconds to wait before giving up drain, zero means
- infinite
- minimum: 0
- type: integer
- type: object
- maxParallelUpgrades:
- default: 1
- description: |-
- MaxParallelUpgrades indicates how many nodes can be upgraded in parallel
- 0 means no limit, all nodes will be upgraded in parallel
- minimum: 0
- type: integer
- maxUnavailable:
- anyOf:
- - type: integer
- - type: string
- default: 25%
- description: |-
- MaxUnavailable is the maximum number of nodes with the driver installed, that can be unavailable during the upgrade.
- Value can be an absolute number (ex: 5) or a percentage of total nodes at the start of upgrade (ex: 10%).
- Absolute number is calculated from percentage by rounding up.
- By default, a fixed value of 25% is used.
- x-kubernetes-int-or-string: true
- podDeletion:
- description: PodDeletionSpec describes configuration for deletion
- of pods using special resources during automatic upgrade
- properties:
- deleteEmptyDir:
- default: false
- description: |-
- DeleteEmptyDir indicates if should continue even if there are pods using emptyDir
- (local data that will be deleted when the pod is deleted)
- type: boolean
- force:
- default: false
- description: Force indicates if force deletion is allowed
- type: boolean
- timeoutSeconds:
- default: 300
- description: TimeoutSecond specifies the length of time
- in seconds to wait before giving up on pod termination,
- zero means infinite
- minimum: 0
- type: integer
- type: object
- waitForCompletion:
- description: WaitForCompletionSpec describes the configuration
- for waiting on job completions
- properties:
- podSelector:
- description: |-
- PodSelector specifies a label selector for the pods to wait for completion
- For more details on label selectors, see:
- https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#label-selectors
- type: string
- timeoutSeconds:
- default: 0
- description: TimeoutSecond specifies the length of time
- in seconds to wait before giving up on pod termination,
- zero means infinite
- minimum: 0
- type: integer
- type: object
- type: object
- useNvidiaDriverCRD:
- description: UseNvidiaDriverCRD indicates if the deployment of
- NVIDIA Driver is managed by the NVIDIADriver CRD type
- type: boolean
- useOpenKernelModules:
- description: UseOpenKernelModules indicates if the open GPU kernel
- modules should be used
- type: boolean
- usePrecompiled:
- description: UsePrecompiled indicates if deployment of NVIDIA
- Driver using pre-compiled modules is enabled
- type: boolean
- version:
- description: NVIDIA Driver image tag
- type: string
- virtualTopology:
- description: 'Optional: Virtual Topology Daemon configuration
- for NVIDIA vGPU drivers'
- properties:
- config:
- description: 'Optional: Config name representing virtual topology
- daemon configuration file nvidia-topologyd.conf'
- type: string
- type: object
- type: object
- gdrcopy:
- description: GDRCopy component spec
- properties:
- args:
- description: 'Optional: List of arguments'
- items:
- type: string
- type: array
- enabled:
- description: Enabled indicates if GDRCopy is enabled through GPU
- Operator
- type: boolean
- env:
- description: 'Optional: List of environment variables'
- items:
- description: EnvVar represents an environment variable present
- in a Container.
- properties:
- name:
- description: Name of the environment variable.
- type: string
- value:
- description: Value of the environment variable.
- type: string
- required:
- - name
- type: object
- type: array
- image:
- description: NVIDIA GDRCopy driver image name
- pattern: '[a-zA-Z0-9\-]+'
- type: string
- imagePullPolicy:
- description: Image pull policy
- type: string
- imagePullSecrets:
- description: Image pull secrets
- items:
- type: string
- type: array
- repository:
- description: NVIDIA GDRCopy driver image repository
- type: string
- version:
- description: NVIDIA GDRCopy driver image tag
- type: string
- type: object
- gds:
- description: GPUDirectStorage defines the spec for GDS components(Experimental)
- properties:
- args:
- description: 'Optional: List of arguments'
- items:
- type: string
- type: array
- enabled:
- description: Enabled indicates if GPUDirect Storage is enabled
- through GPU operator
- type: boolean
- env:
- description: 'Optional: List of environment variables'
- items:
- description: EnvVar represents an environment variable present
- in a Container.
- properties:
- name:
- description: Name of the environment variable.
- type: string
- value:
- description: Value of the environment variable.
- type: string
- required:
- - name
- type: object
- type: array
- image:
- description: NVIDIA GPUDirect Storage Driver image name
- pattern: '[a-zA-Z0-9\-]+'
- type: string
- imagePullPolicy:
- description: Image pull policy
- type: string
- imagePullSecrets:
- description: Image pull secrets
- items:
- type: string
- type: array
- repository:
- description: NVIDIA GPUDirect Storage Driver image repository
- type: string
- version:
- description: NVIDIA GPUDirect Storage Driver image tag
- type: string
- type: object
- gfd:
- description: GPUFeatureDiscovery spec
- properties:
- args:
- description: 'Optional: List of arguments'
- items:
- type: string
- type: array
- enabled:
- description: Enabled indicates if deployment of GPU Feature Discovery
- Plugin is enabled.
- type: boolean
- env:
- description: 'Optional: List of environment variables'
- items:
- description: EnvVar represents an environment variable present
- in a Container.
- properties:
- name:
- description: Name of the environment variable.
- type: string
- value:
- description: Value of the environment variable.
- type: string
- required:
- - name
- type: object
- type: array
- image:
- description: GFD image name
- pattern: '[a-zA-Z0-9\-]+'
- type: string
- imagePullPolicy:
- description: Image pull policy
- type: string
- imagePullSecrets:
- description: Image pull secrets
- items:
- type: string
- type: array
- repository:
- description: GFD image repository
- type: string
- resources:
- description: 'Optional: Define resources requests and limits for
- each pod'
- properties:
- limits:
- additionalProperties:
- anyOf:
- - type: integer
- - type: string
- pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
- x-kubernetes-int-or-string: true
- description: |-
- Limits describes the maximum amount of compute resources allowed.
- More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
- type: object
- requests:
- additionalProperties:
- anyOf:
- - type: integer
- - type: string
- pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
- x-kubernetes-int-or-string: true
- description: |-
- Requests describes the minimum amount of compute resources required.
- If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
- otherwise to an implementation-defined value. Requests cannot exceed Limits.
- More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
- type: object
- type: object
- version:
- description: GFD image tag
- type: string
- type: object
- kataManager:
- description: KataManager component spec
- properties:
- args:
- description: 'Optional: List of arguments'
- items:
- type: string
- type: array
- config:
- description: Kata Manager config
- properties:
- artifactsDir:
- default: /opt/nvidia-gpu-operator/artifacts/runtimeclasses
- description: |-
- ArtifactsDir is the directory where kata artifacts (e.g. kernel / guest images, configuration, etc.)
- are placed on the local filesystem.
- type: string
- runtimeClasses:
- description: RuntimeClasses is a list of kata runtime classes
- to configure.
- items:
- description: RuntimeClass defines the configuration for
- a kata RuntimeClass
- properties:
- artifacts:
- description: Artifacts are the kata artifacts associated
- with the runtime class.
- properties:
- pullSecret:
- description: PullSecret is the secret used to pull
- the OCI artifact.
- type: string
- url:
- description: |-
- URL is the path to the OCI artifact (payload) containing all artifacts
- associated with a kata runtime class.
- type: string
- required:
- - url
- type: object
- name:
- description: Name is the name of the kata runtime class.
- type: string
- nodeSelector:
- additionalProperties:
- type: string
- description: |-
- NodeSelector specifies the nodeSelector for the RuntimeClass object.
- This ensures pods running with the RuntimeClass only get scheduled
- onto nodes which support it.
- type: object
- required:
- - artifacts
- - name
- type: object
- type: array
- type: object
- enabled:
- description: Enabled indicates if deployment of Kata Manager is
- enabled
- type: boolean
- env:
- description: 'Optional: List of environment variables'
- items:
- description: EnvVar represents an environment variable present
- in a Container.
- properties:
- name:
- description: Name of the environment variable.
- type: string
- value:
- description: Value of the environment variable.
- type: string
- required:
- - name
- type: object
- type: array
- image:
- description: Kata Manager image name
- pattern: '[a-zA-Z0-9\-]+'
- type: string
- imagePullPolicy:
- description: Image pull policy
- type: string
- imagePullSecrets:
- description: Image pull secrets
- items:
- type: string
- type: array
- repository:
- description: Kata Manager image repository
- type: string
- resources:
- description: 'Optional: Define resources requests and limits for
- each pod'
- properties:
- limits:
- additionalProperties:
- anyOf:
- - type: integer
- - type: string
- pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
- x-kubernetes-int-or-string: true
- description: |-
- Limits describes the maximum amount of compute resources allowed.
- More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
- type: object
- requests:
- additionalProperties:
- anyOf:
- - type: integer
- - type: string
- pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
- x-kubernetes-int-or-string: true
- description: |-
- Requests describes the minimum amount of compute resources required.
- If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
- otherwise to an implementation-defined value. Requests cannot exceed Limits.
- More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
- type: object
- type: object
- version:
- description: Kata Manager image tag
- type: string
- type: object
- mig:
- description: MIG spec
- properties:
- strategy:
- description: 'Optional: MIGStrategy to apply for GFD and NVIDIA
- Device Plugin'
- enum:
- - none
- - single
- - mixed
- type: string
- type: object
- migManager:
- description: MIGManager for configuration to deploy MIG Manager
- properties:
- args:
- description: 'Optional: List of arguments'
- items:
- type: string
- type: array
- config:
- description: 'Optional: Custom mig-parted configuration for NVIDIA
- MIG Manager container'
- properties:
- default:
- default: all-disabled
- description: Default MIG config to be applied on the node,
- when there is no config specified with the node label nvidia.com/mig.config
- enum:
- - all-disabled
- - ""
- type: string
- name:
- default: default-mig-parted-config
- description: ConfigMap name
- type: string
- type: object
- enabled:
- description: Enabled indicates if deployment of NVIDIA MIG Manager
- is enabled
- type: boolean
- env:
- description: 'Optional: List of environment variables'
- items:
- description: EnvVar represents an environment variable present
- in a Container.
- properties:
- name:
- description: Name of the environment variable.
- type: string
- value:
- description: Value of the environment variable.
- type: string
- required:
- - name
- type: object
- type: array
- gpuClientsConfig:
- description: 'Optional: Custom gpu-clients configuration for NVIDIA
- MIG Manager container'
- properties:
- name:
- description: ConfigMap name
- type: string
- type: object
- image:
- description: NVIDIA MIG Manager image name
- pattern: '[a-zA-Z0-9\-]+'
- type: string
- imagePullPolicy:
- description: Image pull policy
- type: string
- imagePullSecrets:
- description: Image pull secrets
- items:
- type: string
- type: array
- repository:
- description: NVIDIA MIG Manager image repository
- type: string
- resources:
- description: 'Optional: Define resources requests and limits for
- each pod'
- properties:
- limits:
- additionalProperties:
- anyOf:
- - type: integer
- - type: string
- pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
- x-kubernetes-int-or-string: true
- description: |-
- Limits describes the maximum amount of compute resources allowed.
- More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
- type: object
- requests:
- additionalProperties:
- anyOf:
- - type: integer
- - type: string
- pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
- x-kubernetes-int-or-string: true
- description: |-
- Requests describes the minimum amount of compute resources required.
- If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
- otherwise to an implementation-defined value. Requests cannot exceed Limits.
- More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
- type: object
- type: object
- version:
- description: NVIDIA MIG Manager image tag
- type: string
- type: object
- nodeStatusExporter:
- description: NodeStatusExporter spec
- properties:
- args:
- description: 'Optional: List of arguments'
- items:
- type: string
- type: array
- enabled:
- description: Enabled indicates if deployment of Node Status Exporter
- is enabled.
- type: boolean
- env:
- description: 'Optional: List of environment variables'
- items:
- description: EnvVar represents an environment variable present
- in a Container.
- properties:
- name:
- description: Name of the environment variable.
- type: string
- value:
- description: Value of the environment variable.
- type: string
- required:
- - name
- type: object
- type: array
- image:
- description: Node Status Exporter image name
- pattern: '[a-zA-Z0-9\-]+'
- type: string
- imagePullPolicy:
- description: Image pull policy
- type: string
- imagePullSecrets:
- description: Image pull secrets
- items:
- type: string
- type: array
- repository:
- description: Node Status Exporterimage repository
- type: string
- resources:
- description: 'Optional: Define resources requests and limits for
- each pod'
- properties:
- limits:
- additionalProperties:
- anyOf:
- - type: integer
- - type: string
- pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
- x-kubernetes-int-or-string: true
- description: |-
- Limits describes the maximum amount of compute resources allowed.
- More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
- type: object
- requests:
- additionalProperties:
- anyOf:
- - type: integer
- - type: string
- pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
- x-kubernetes-int-or-string: true
- description: |-
- Requests describes the minimum amount of compute resources required.
- If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
- otherwise to an implementation-defined value. Requests cannot exceed Limits.
- More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
- type: object
- type: object
- version:
- description: Node Status Exporterimage tag
- type: string
- type: object
- operator:
- description: Operator component spec
- properties:
- annotations:
- additionalProperties:
- type: string
- description: |-
- Optional: Annotations is an unstructured key value map stored with a resource that may be
- set by external tools to store and retrieve arbitrary metadata. They are not
- queryable and should be preserved when modifying objects.
- type: object
- defaultRuntime:
- default: docker
- description: Runtime defines container runtime type
- enum:
- - docker
- - crio
- - containerd
- type: string
- initContainer:
- description: InitContainerSpec describes configuration for initContainer
- image used with all components
- properties:
- image:
- description: Image represents image name
- pattern: '[a-zA-Z0-9\-]+'
- type: string
- imagePullPolicy:
- description: Image pull policy
- type: string
- imagePullSecrets:
- description: Image pull secrets
- items:
- type: string
- type: array
- repository:
- description: Repository represents image repository path
- type: string
- version:
- description: Version represents image tag(version)
- type: string
- type: object
- labels:
- additionalProperties:
- type: string
- description: |-
- Optional: Map of string keys and values that can be used to organize and categorize
- (scope and select) objects. May match selectors of replication controllers
- and services.
- type: object
- runtimeClass:
- default: nvidia
- type: string
- use_ocp_driver_toolkit:
- description: UseOpenShiftDriverToolkit indicates if DriverToolkit
- image should be used on OpenShift to build and install driver
- modules
- type: boolean
- required:
- - defaultRuntime
- type: object
- psa:
- description: PSA defines spec for PodSecurityAdmission configuration
- properties:
- enabled:
- description: Enabled indicates if PodSecurityAdmission configuration
- needs to be enabled for all Pods
- type: boolean
- type: object
- psp:
- description: |-
- Deprecated: Pod Security Policies are no longer supported. Please use PodSecurityAdmission instead
- PSP defines spec for handling PodSecurityPolicies
- properties:
- enabled:
- description: Enabled indicates if PodSecurityPolicies needs to
- be enabled for all Pods
- type: boolean
- type: object
- sandboxDevicePlugin:
- description: SandboxDevicePlugin component spec
- properties:
- args:
- description: 'Optional: List of arguments'
- items:
- type: string
- type: array
- enabled:
- description: Enabled indicates if deployment of NVIDIA Sandbox
- Device Plugin through operator is enabled
- type: boolean
- env:
- description: 'Optional: List of environment variables'
- items:
- description: EnvVar represents an environment variable present
- in a Container.
- properties:
- name:
- description: Name of the environment variable.
- type: string
- value:
- description: Value of the environment variable.
- type: string
- required:
- - name
- type: object
- type: array
- image:
- description: NVIDIA Sandbox Device Plugin image name
- pattern: '[a-zA-Z0-9\-]+'
- type: string
- imagePullPolicy:
- description: Image pull policy
- type: string
- imagePullSecrets:
- description: Image pull secrets
- items:
- type: string
- type: array
- repository:
- description: NVIDIA Sandbox Device Plugin image repository
- type: string
- resources:
- description: 'Optional: Define resources requests and limits for
- each pod'
- properties:
- limits:
- additionalProperties:
- anyOf:
- - type: integer
- - type: string
- pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
- x-kubernetes-int-or-string: true
- description: |-
- Limits describes the maximum amount of compute resources allowed.
- More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
- type: object
- requests:
- additionalProperties:
- anyOf:
- - type: integer
- - type: string
- pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
- x-kubernetes-int-or-string: true
- description: |-
- Requests describes the minimum amount of compute resources required.
- If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
- otherwise to an implementation-defined value. Requests cannot exceed Limits.
- More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
- type: object
- type: object
- version:
- description: NVIDIA Sandbox Device Plugin image tag
- type: string
- type: object
- sandboxWorkloads:
- description: SandboxWorkloads defines the spec for handling sandbox
- workloads (i.e. Virtual Machines)
- properties:
- defaultWorkload:
- default: container
- description: |-
- DefaultWorkload indicates the default GPU workload type to configure
- worker nodes in the cluster for
- enum:
- - container
- - vm-passthrough
- - vm-vgpu
- type: string
- enabled:
- description: |-
- Enabled indicates if the GPU Operator should manage additional operands required
- for sandbox workloads (i.e. VFIO Manager, vGPU Manager, and additional device plugins)
- type: boolean
- type: object
- toolkit:
- description: Toolkit component spec
- properties:
- args:
- description: 'Optional: List of arguments'
- items:
- type: string
- type: array
- enabled:
- description: Enabled indicates if deployment of NVIDIA Container
- Toolkit through operator is enabled
- type: boolean
- env:
- description: 'Optional: List of environment variables'
- items:
- description: EnvVar represents an environment variable present
- in a Container.
- properties:
- name:
- description: Name of the environment variable.
- type: string
- value:
- description: Value of the environment variable.
- type: string
- required:
- - name
- type: object
- type: array
- image:
- description: NVIDIA Container Toolkit image name
- pattern: '[a-zA-Z0-9\-]+'
- type: string
- imagePullPolicy:
- description: Image pull policy
- type: string
- imagePullSecrets:
- description: Image pull secrets
- items:
- type: string
- type: array
- installDir:
- default: /usr/local/nvidia
- description: Toolkit install directory on the host
- type: string
- repository:
- description: NVIDIA Container Toolkit image repository
- type: string
- resources:
- description: 'Optional: Define resources requests and limits for
- each pod'
- properties:
- limits:
- additionalProperties:
- anyOf:
- - type: integer
- - type: string
- pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
- x-kubernetes-int-or-string: true
- description: |-
- Limits describes the maximum amount of compute resources allowed.
- More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
- type: object
- requests:
- additionalProperties:
- anyOf:
- - type: integer
- - type: string
- pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
- x-kubernetes-int-or-string: true
- description: |-
- Requests describes the minimum amount of compute resources required.
- If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
- otherwise to an implementation-defined value. Requests cannot exceed Limits.
- More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
- type: object
- type: object
- version:
- description: NVIDIA Container Toolkit image tag
- type: string
- type: object
- validator:
- description: Validator defines the spec for operator-validator daemonset
- properties:
- args:
- description: 'Optional: List of arguments'
- items:
- type: string
- type: array
- cuda:
- description: CUDA validator spec
- properties:
- env:
- description: 'Optional: List of environment variables'
- items:
- description: EnvVar represents an environment variable present
- in a Container.
- properties:
- name:
- description: Name of the environment variable.
- type: string
- value:
- description: Value of the environment variable.
- type: string
- required:
- - name
- type: object
- type: array
- type: object
- driver:
- description: Toolkit validator spec
- properties:
- env:
- description: 'Optional: List of environment variables'
- items:
- description: EnvVar represents an environment variable present
- in a Container.
- properties:
- name:
- description: Name of the environment variable.
- type: string
- value:
- description: Value of the environment variable.
- type: string
- required:
- - name
- type: object
- type: array
- type: object
- env:
- description: 'Optional: List of environment variables'
- items:
- description: EnvVar represents an environment variable present
- in a Container.
- properties:
- name:
- description: Name of the environment variable.
- type: string
- value:
- description: Value of the environment variable.
- type: string
- required:
- - name
- type: object
- type: array
- image:
- description: Validator image name
- pattern: '[a-zA-Z0-9\-]+'
- type: string
- imagePullPolicy:
- description: Image pull policy
- type: string
- imagePullSecrets:
- description: Image pull secrets
- items:
- type: string
- type: array
- plugin:
- description: Plugin validator spec
- properties:
- env:
- description: 'Optional: List of environment variables'
- items:
- description: EnvVar represents an environment variable present
- in a Container.
- properties:
- name:
- description: Name of the environment variable.
- type: string
- value:
- description: Value of the environment variable.
- type: string
- required:
- - name
- type: object
- type: array
- type: object
- repository:
- description: Validator image repository
- type: string
- resources:
- description: 'Optional: Define resources requests and limits for
- each pod'
- properties:
- limits:
- additionalProperties:
- anyOf:
- - type: integer
- - type: string
- pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
- x-kubernetes-int-or-string: true
- description: |-
- Limits describes the maximum amount of compute resources allowed.
- More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
- type: object
- requests:
- additionalProperties:
- anyOf:
- - type: integer
- - type: string
- pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
- x-kubernetes-int-or-string: true
- description: |-
- Requests describes the minimum amount of compute resources required.
- If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
- otherwise to an implementation-defined value. Requests cannot exceed Limits.
- More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
- type: object
- type: object
- toolkit:
- description: Toolkit validator spec
- properties:
- env:
- description: 'Optional: List of environment variables'
- items:
- description: EnvVar represents an environment variable present
- in a Container.
- properties:
- name:
- description: Name of the environment variable.
- type: string
- value:
- description: Value of the environment variable.
- type: string
- required:
- - name
- type: object
- type: array
- type: object
- version:
- description: Validator image tag
- type: string
- vfioPCI:
- description: VfioPCI validator spec
- properties:
- env:
- description: 'Optional: List of environment variables'
- items:
- description: EnvVar represents an environment variable present
- in a Container.
- properties:
- name:
- description: Name of the environment variable.
- type: string
- value:
- description: Value of the environment variable.
- type: string
- required:
- - name
- type: object
- type: array
- type: object
- vgpuDevices:
- description: VGPUDevices validator spec
- properties:
- env:
- description: 'Optional: List of environment variables'
- items:
- description: EnvVar represents an environment variable present
- in a Container.
- properties:
- name:
- description: Name of the environment variable.
- type: string
- value:
- description: Value of the environment variable.
- type: string
- required:
- - name
- type: object
- type: array
- type: object
- vgpuManager:
- description: VGPUManager validator spec
- properties:
- env:
- description: 'Optional: List of environment variables'
- items:
- description: EnvVar represents an environment variable present
- in a Container.
- properties:
- name:
- description: Name of the environment variable.
- type: string
- value:
- description: Value of the environment variable.
- type: string
- required:
- - name
- type: object
- type: array
- type: object
- type: object
- vfioManager:
- description: VFIOManager for configuration to deploy VFIO-PCI Manager
- properties:
- args:
- description: 'Optional: List of arguments'
- items:
- type: string
- type: array
- driverManager:
- description: DriverManager represents configuration for NVIDIA
- Driver Manager
- properties:
- env:
- description: 'Optional: List of environment variables'
- items:
- description: EnvVar represents an environment variable present
- in a Container.
- properties:
- name:
- description: Name of the environment variable.
- type: string
- value:
- description: Value of the environment variable.
- type: string
- required:
- - name
- type: object
- type: array
- image:
- description: Image represents NVIDIA Driver Manager image
- name
- pattern: '[a-zA-Z0-9\-]+'
- type: string
- imagePullPolicy:
- description: Image pull policy
- type: string
- imagePullSecrets:
- description: Image pull secrets
- items:
- type: string
- type: array
- repository:
- description: Repository represents Driver Managerrepository
- path
- type: string
- version:
- description: Version represents NVIDIA Driver Manager image
- tag(version)
- type: string
- type: object
- enabled:
- description: Enabled indicates if deployment of VFIO Manager is
- enabled
- type: boolean
- env:
- description: 'Optional: List of environment variables'
- items:
- description: EnvVar represents an environment variable present
- in a Container.
- properties:
- name:
- description: Name of the environment variable.
- type: string
- value:
- description: Value of the environment variable.
- type: string
- required:
- - name
- type: object
- type: array
- image:
- description: VFIO Manager image name
- pattern: '[a-zA-Z0-9\-]+'
- type: string
- imagePullPolicy:
- description: Image pull policy
- type: string
- imagePullSecrets:
- description: Image pull secrets
- items:
- type: string
- type: array
- repository:
- description: VFIO Manager image repository
- type: string
- resources:
- description: 'Optional: Define resources requests and limits for
- each pod'
- properties:
- limits:
- additionalProperties:
- anyOf:
- - type: integer
- - type: string
- pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
- x-kubernetes-int-or-string: true
- description: |-
- Limits describes the maximum amount of compute resources allowed.
- More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
- type: object
- requests:
- additionalProperties:
- anyOf:
- - type: integer
- - type: string
- pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
- x-kubernetes-int-or-string: true
- description: |-
- Requests describes the minimum amount of compute resources required.
- If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
- otherwise to an implementation-defined value. Requests cannot exceed Limits.
- More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
- type: object
- type: object
- version:
- description: VFIO Manager image tag
- type: string
- type: object
- vgpuDeviceManager:
- description: VGPUDeviceManager spec
- properties:
- args:
- description: 'Optional: List of arguments'
- items:
- type: string
- type: array
- config:
- description: NVIDIA vGPU devices configuration for NVIDIA vGPU
- Device Manager container
- properties:
- default:
- default: default
- description: Default config name within the ConfigMap
- type: string
- name:
- description: ConfigMap name
- type: string
- type: object
- enabled:
- description: Enabled indicates if deployment of NVIDIA vGPU Device
- Manager is enabled
- type: boolean
- env:
- description: 'Optional: List of environment variables'
- items:
- description: EnvVar represents an environment variable present
- in a Container.
- properties:
- name:
- description: Name of the environment variable.
- type: string
- value:
- description: Value of the environment variable.
- type: string
- required:
- - name
- type: object
- type: array
- image:
- description: NVIDIA vGPU Device Manager image name
- pattern: '[a-zA-Z0-9\-]+'
- type: string
- imagePullPolicy:
- description: Image pull policy
- type: string
- imagePullSecrets:
- description: Image pull secrets
- items:
- type: string
- type: array
- repository:
- description: NVIDIA vGPU Device Manager image repository
- type: string
- resources:
- description: 'Optional: Define resources requests and limits for
- each pod'
- properties:
- limits:
- additionalProperties:
- anyOf:
- - type: integer
- - type: string
- pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
- x-kubernetes-int-or-string: true
- description: |-
- Limits describes the maximum amount of compute resources allowed.
- More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
- type: object
- requests:
- additionalProperties:
- anyOf:
- - type: integer
- - type: string
- pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
- x-kubernetes-int-or-string: true
- description: |-
- Requests describes the minimum amount of compute resources required.
- If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
- otherwise to an implementation-defined value. Requests cannot exceed Limits.
- More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
- type: object
- type: object
- version:
- description: NVIDIA vGPU Device Manager image tag
- type: string
- type: object
- vgpuManager:
- description: VGPUManager component spec
- properties:
- args:
- description: 'Optional: List of arguments'
- items:
- type: string
- type: array
- driverManager:
- description: DriverManager represents configuration for NVIDIA
- Driver Manager initContainer
- properties:
- env:
- description: 'Optional: List of environment variables'
- items:
- description: EnvVar represents an environment variable present
- in a Container.
- properties:
- name:
- description: Name of the environment variable.
- type: string
- value:
- description: Value of the environment variable.
- type: string
- required:
- - name
- type: object
- type: array
- image:
- description: Image represents NVIDIA Driver Manager image
- name
- pattern: '[a-zA-Z0-9\-]+'
- type: string
- imagePullPolicy:
- description: Image pull policy
- type: string
- imagePullSecrets:
- description: Image pull secrets
- items:
- type: string
- type: array
- repository:
- description: Repository represents Driver Managerrepository
- path
- type: string
- version:
- description: Version represents NVIDIA Driver Manager image
- tag(version)
- type: string
- type: object
- enabled:
- description: Enabled indicates if deployment of NVIDIA vGPU Manager
- through operator is enabled
- type: boolean
- env:
- description: 'Optional: List of environment variables'
- items:
- description: EnvVar represents an environment variable present
- in a Container.
- properties:
- name:
- description: Name of the environment variable.
- type: string
- value:
- description: Value of the environment variable.
- type: string
- required:
- - name
- type: object
- type: array
- image:
- description: NVIDIA vGPU Manager image name
- pattern: '[a-zA-Z0-9\-]+'
- type: string
- imagePullPolicy:
- description: Image pull policy
- type: string
- imagePullSecrets:
- description: Image pull secrets
- items:
- type: string
- type: array
- repository:
- description: NVIDIA vGPU Manager image repository
- type: string
- resources:
- description: 'Optional: Define resources requests and limits for
- each pod'
- properties:
- limits:
- additionalProperties:
- anyOf:
- - type: integer
- - type: string
- pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
- x-kubernetes-int-or-string: true
- description: |-
- Limits describes the maximum amount of compute resources allowed.
- More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
- type: object
- requests:
- additionalProperties:
- anyOf:
- - type: integer
- - type: string
- pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
- x-kubernetes-int-or-string: true
- description: |-
- Requests describes the minimum amount of compute resources required.
- If Requests is omitted for a container, it defaults to Limits if that is explicitly specified,
- otherwise to an implementation-defined value. Requests cannot exceed Limits.
- More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
- type: object
- type: object
- version:
- description: NVIDIA vGPU Manager image tag
- type: string
- type: object
- required:
- - daemonsets
- - dcgm
- - dcgmExporter
- - devicePlugin
- - driver
- - gfd
- - nodeStatusExporter
- - operator
- - toolkit
- type: object
- status:
- description: ClusterPolicyStatus defines the observed state of ClusterPolicy
- properties:
- conditions:
- description: Conditions is a list of conditions representing the ClusterPolicy's
- current state.
- items:
- description: "Condition contains details for one aspect of the current
- state of this API Resource.\n---\nThis struct is intended for
- direct use as an array at the field path .status.conditions. For
- example,\n\n\n\ttype FooStatus struct{\n\t // Represents the
- observations of a foo's current state.\n\t // Known .status.conditions.type
- are: \"Available\", \"Progressing\", and \"Degraded\"\n\t //
- +patchMergeKey=type\n\t // +patchStrategy=merge\n\t // +listType=map\n\t
- \ // +listMapKey=type\n\t Conditions []metav1.Condition `json:\"conditions,omitempty\"
- patchStrategy:\"merge\" patchMergeKey:\"type\" protobuf:\"bytes,1,rep,name=conditions\"`\n\n\n\t
- \ // other fields\n\t}"
- properties:
- lastTransitionTime:
- description: |-
- lastTransitionTime is the last time the condition transitioned from one status to another.
- This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable.
- format: date-time
- type: string
- message:
- description: |-
- message is a human readable message indicating details about the transition.
- This may be an empty string.
- maxLength: 32768
- type: string
- observedGeneration:
- description: |-
- observedGeneration represents the .metadata.generation that the condition was set based upon.
- For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
- with respect to the current state of the instance.
- format: int64
- minimum: 0
- type: integer
- reason:
- description: |-
- reason contains a programmatic identifier indicating the reason for the condition's last transition.
- Producers of specific condition types may define expected values and meanings for this field,
- and whether the values are considered a guaranteed API.
- The value should be a CamelCase string.
- This field may not be empty.
- maxLength: 1024
- minLength: 1
- pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
- type: string
- status:
- description: status of the condition, one of True, False, Unknown.
- enum:
- - "True"
- - "False"
- - Unknown
- type: string
- type:
- description: |-
- type of condition in CamelCase or in foo.example.com/CamelCase.
- ---
- Many .condition.type values are consistent across resources like Available, but because arbitrary conditions can be
- useful (see .node.status.conditions), the ability to deconflict is important.
- The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt)
- maxLength: 316
- pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
- type: string
- required:
- - lastTransitionTime
- - message
- - reason
- - status
- - type
- type: object
- type: array
- namespace:
- description: Namespace indicates a namespace in which the operator
- is installed
- type: string
- state:
- description: State indicates status of ClusterPolicy
- enum:
- - ignored
- - ready
- - notReady
- type: string
- required:
- - state
- type: object
- type: object
- served: true
- storage: true
- subresources:
- status: {}
diff --git a/deployments/gpu-operator/crds/nvidia.com_nvidiadrivers.yaml b/deployments/gpu-operator/crds/nvidia.com_nvidiadrivers.yaml
index 317972fd2..c49059a38 100644
--- a/deployments/gpu-operator/crds/nvidia.com_nvidiadrivers.yaml
+++ b/deployments/gpu-operator/crds/nvidia.com_nvidiadrivers.yaml
@@ -3,7 +3,7 @@ apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
- controller-gen.kubebuilder.io/version: v0.14.0
+ controller-gen.kubebuilder.io/version: v0.16.5
name: nvidiadrivers.nvidia.com
spec:
group: nvidia.com
@@ -357,11 +357,13 @@ spec:
items:
type: string
type: array
+ x-kubernetes-list-type: atomic
required:
- key
- operator
type: object
type: array
+ x-kubernetes-list-type: atomic
matchFields:
description: A list of node selector requirements by
node's fields.
@@ -389,11 +391,13 @@ spec:
items:
type: string
type: array
+ x-kubernetes-list-type: atomic
required:
- key
- operator
type: object
type: array
+ x-kubernetes-list-type: atomic
type: object
x-kubernetes-map-type: atomic
weight:
@@ -406,6 +410,7 @@ spec:
- weight
type: object
type: array
+ x-kubernetes-list-type: atomic
requiredDuringSchedulingIgnoredDuringExecution:
description: |-
If the affinity requirements specified by this field are not met at
@@ -450,11 +455,13 @@ spec:
items:
type: string
type: array
+ x-kubernetes-list-type: atomic
required:
- key
- operator
type: object
type: array
+ x-kubernetes-list-type: atomic
matchFields:
description: A list of node selector requirements by
node's fields.
@@ -482,14 +489,17 @@ spec:
items:
type: string
type: array
+ x-kubernetes-list-type: atomic
required:
- key
- operator
type: object
type: array
+ x-kubernetes-list-type: atomic
type: object
x-kubernetes-map-type: atomic
type: array
+ x-kubernetes-list-type: atomic
required:
- nodeSelectorTerms
type: object
@@ -709,16 +719,8 @@ spec:
description: Conditions is a list of conditions representing the NVIDIADriver's
current state.
items:
- description: "Condition contains details for one aspect of the current
- state of this API Resource.\n---\nThis struct is intended for
- direct use as an array at the field path .status.conditions. For
- example,\n\n\n\ttype FooStatus struct{\n\t // Represents the
- observations of a foo's current state.\n\t // Known .status.conditions.type
- are: \"Available\", \"Progressing\", and \"Degraded\"\n\t //
- +patchMergeKey=type\n\t // +patchStrategy=merge\n\t // +listType=map\n\t
- \ // +listMapKey=type\n\t Conditions []metav1.Condition `json:\"conditions,omitempty\"
- patchStrategy:\"merge\" patchMergeKey:\"type\" protobuf:\"bytes,1,rep,name=conditions\"`\n\n\n\t
- \ // other fields\n\t}"
+ description: Condition contains details for one aspect of the current
+ state of this API Resource.
properties:
lastTransitionTime:
description: |-
@@ -759,12 +761,7 @@ spec:
- Unknown
type: string
type:
- description: |-
- type of condition in CamelCase or in foo.example.com/CamelCase.
- ---
- Many .condition.type values are consistent across resources like Available, but because arbitrary conditions can be
- useful (see .node.status.conditions), the ability to deconflict is important.
- The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt)
+ description: type of condition in CamelCase or in foo.example.com/CamelCase.
maxLength: 316
pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
type: string
diff --git a/deployments/gpu-operator/templates/_helpers.tpl b/deployments/gpu-operator/templates/_helpers.tpl
index 305c9d1fe..8969d66e1 100644
--- a/deployments/gpu-operator/templates/_helpers.tpl
+++ b/deployments/gpu-operator/templates/_helpers.tpl
@@ -76,5 +76,5 @@ Full image name with tag
Full image name with tag
*/}}
{{- define "driver-manager.fullimage" -}}
-{{- .Values.driver.manager.repository -}}/{{- .Values.driver.manager.image -}}:{{- .Values.driver.manager.version -}}
+{{- .Values.driverManager.repository -}}/{{- .Values.driverManager.image -}}:{{- .Values.driverManager.version -}}
{{- end }}
diff --git a/deployments/gpu-operator/templates/cleanup_crd.yaml b/deployments/gpu-operator/templates/cleanup_crd.yaml
index 550525f00..fd0c1b799 100644
--- a/deployments/gpu-operator/templates/cleanup_crd.yaml
+++ b/deployments/gpu-operator/templates/cleanup_crd.yaml
@@ -26,6 +26,10 @@ spec:
- name: {{ . }}
{{- end }}
{{- end }}
+ {{- with .Values.operator.tolerations }}
+ tolerations:
+ {{- toYaml . | nindent 8 }}
+ {{- end }}
containers:
- name: cleanup-crd
image: {{ include "gpu-operator.fullimage" . }}
diff --git a/deployments/gpu-operator/templates/clusterpolicy.yaml b/deployments/gpu-operator/templates/clusterpolicy.yaml
index f036c936a..b6d825759 100644
--- a/deployments/gpu-operator/templates/clusterpolicy.yaml
+++ b/deployments/gpu-operator/templates/clusterpolicy.yaml
@@ -12,6 +12,9 @@ metadata:
"helm.sh/resource-policy": keep
{{- end }}
spec:
+ hostPaths:
+ rootFS: {{ .Values.hostPaths.rootFS }}
+ driverInstallDir: {{ .Values.hostPaths.driverInstallDir }}
operator:
{{- if .Values.operator.defaultRuntime }}
defaultRuntime: {{ .Values.operator.defaultRuntime }}
@@ -170,17 +173,17 @@ spec:
enabled: {{ .Values.driver.rdma.enabled }}
useHostMofed: {{ .Values.driver.rdma.useHostMofed }}
manager:
- {{- if .Values.driver.manager.repository }}
- repository: {{ .Values.driver.manager.repository }}
+ {{- if .Values.driverManager.repository }}
+ repository: {{ .Values.driverManager.repository }}
{{- end }}
- {{- if .Values.driver.manager.image }}
- image: {{ .Values.driver.manager.image }}
+ {{- if .Values.driverManager.image }}
+ image: {{ .Values.driverManager.image }}
{{- end }}
- {{- if .Values.driver.manager.version }}
- version: {{ .Values.driver.manager.version | quote }}
+ {{- if .Values.driverManager.version }}
+ version: {{ .Values.driverManager.version | quote }}
{{- end }}
- {{- if .Values.driver.manager.imagePullPolicy }}
- imagePullPolicy: {{ .Values.driver.manager.imagePullPolicy }}
+ {{- if .Values.driverManager.imagePullPolicy }}
+ imagePullPolicy: {{ .Values.driverManager.imagePullPolicy }}
{{- end }}
{{- if .Values.driver.manager.env }}
env: {{ toYaml .Values.driver.manager.env | nindent 8 }}
@@ -259,17 +262,17 @@ spec:
args: {{ toYaml .Values.vgpuManager.args | nindent 6 }}
{{- end }}
driverManager:
- {{- if .Values.vgpuManager.driverManager.repository }}
- repository: {{ .Values.vgpuManager.driverManager.repository }}
+ {{- if .Values.driverManager.repository }}
+ repository: {{ .Values.driverManager.repository }}
{{- end }}
- {{- if .Values.vgpuManager.driverManager.image }}
- image: {{ .Values.vgpuManager.driverManager.image }}
+ {{- if .Values.driverManager.image }}
+ image: {{ .Values.driverManager.image }}
{{- end }}
- {{- if .Values.vgpuManager.driverManager.version }}
- version: {{ .Values.vgpuManager.driverManager.version | quote }}
+ {{- if .Values.driverManager.version }}
+ version: {{ .Values.driverManager.version | quote }}
{{- end }}
- {{- if .Values.vgpuManager.driverManager.imagePullPolicy }}
- imagePullPolicy: {{ .Values.vgpuManager.driverManager.imagePullPolicy }}
+ {{- if .Values.driverManager.imagePullPolicy }}
+ imagePullPolicy: {{ .Values.driverManager.imagePullPolicy }}
{{- end }}
{{- if .Values.vgpuManager.driverManager.env }}
env: {{ toYaml .Values.vgpuManager.driverManager.env | nindent 8 }}
@@ -328,17 +331,17 @@ spec:
args: {{ toYaml .Values.vfioManager.args | nindent 6 }}
{{- end }}
driverManager:
- {{- if .Values.vfioManager.driverManager.repository }}
- repository: {{ .Values.vfioManager.driverManager.repository }}
+ {{- if .Values.driverManager.repository }}
+ repository: {{ .Values.driverManager.repository }}
{{- end }}
- {{- if .Values.vfioManager.driverManager.image }}
- image: {{ .Values.vfioManager.driverManager.image }}
+ {{- if .Values.driverManager.image }}
+ image: {{ .Values.driverManager.image }}
{{- end }}
- {{- if .Values.vfioManager.driverManager.version }}
- version: {{ .Values.vfioManager.driverManager.version | quote }}
+ {{- if .Values.driverManager.version }}
+ version: {{ .Values.driverManager.version | quote }}
{{- end }}
- {{- if .Values.vfioManager.driverManager.imagePullPolicy }}
- imagePullPolicy: {{ .Values.vfioManager.driverManager.imagePullPolicy }}
+ {{- if .Values.driverManager.imagePullPolicy }}
+ imagePullPolicy: {{ .Values.driverManager.imagePullPolicy }}
{{- end }}
{{- if .Values.vfioManager.driverManager.env }}
env: {{ toYaml .Values.vfioManager.driverManager.env | nindent 8 }}
@@ -482,9 +485,6 @@ spec:
{{- if .Values.dcgm.args }}
args: {{ toYaml .Values.dcgm.args | nindent 6 }}
{{- end }}
- {{- if .Values.dcgm.hostPort }}
- hostPort: {{ .Values.dcgm.hostPort }}
- {{- end }}
dcgmExporter:
enabled: {{ .Values.dcgmExporter.enabled }}
{{- if .Values.dcgmExporter.repository }}
@@ -511,8 +511,9 @@ spec:
{{- if .Values.dcgmExporter.args }}
args: {{ toYaml .Values.dcgmExporter.args | nindent 6 }}
{{- end }}
- {{- if .Values.dcgmExporter.config }}
- config: {{ toYaml .Values.dcgmExporter.config | nindent 6 }}
+ {{- if and (.Values.dcgmExporter.config) (.Values.dcgmExporter.config.name) }}
+ config:
+ name: {{ .Values.dcgmExporter.config.name }}
{{- end }}
{{- if .Values.dcgmExporter.serviceMonitor }}
serviceMonitor: {{ toYaml .Values.dcgmExporter.serviceMonitor | nindent 6 }}
@@ -570,7 +571,9 @@ spec:
args: {{ toYaml .Values.migManager.args | nindent 6 }}
{{- end }}
{{- if .Values.migManager.config }}
- config: {{ toYaml .Values.migManager.config | nindent 6 }}
+ config:
+ name: {{ .Values.migManager.config.name }}
+ default: {{ .Values.migManager.config.default }}
{{- end }}
{{- if .Values.migManager.gpuClientsConfig }}
gpuClientsConfig: {{ toYaml .Values.migManager.gpuClientsConfig | nindent 6 }}
diff --git a/deployments/gpu-operator/templates/clusterrole.yaml b/deployments/gpu-operator/templates/clusterrole.yaml
new file mode 100644
index 000000000..4acbcf29c
--- /dev/null
+++ b/deployments/gpu-operator/templates/clusterrole.yaml
@@ -0,0 +1,146 @@
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+ name: gpu-operator
+ labels:
+ {{- include "gpu-operator.labels" . | nindent 4 }}
+ app.kubernetes.io/component: "gpu-operator"
+rules:
+- apiGroups:
+ - config.openshift.io
+ resources:
+ - clusterversions
+ - proxies
+ verbs:
+ - get
+ - list
+ - watch
+- apiGroups:
+ - image.openshift.io
+ resources:
+ - imagestreams
+ verbs:
+ - get
+ - list
+ - watch
+- apiGroups:
+ - security.openshift.io
+ resources:
+ - securitycontextconstraints
+ verbs:
+ - create
+ - get
+ - list
+ - watch
+ - update
+ - patch
+ - delete
+ - use
+- apiGroups:
+ - rbac.authorization.k8s.io
+ resources:
+ - clusterroles
+ - clusterrolebindings
+ verbs:
+ - create
+ - get
+ - list
+ - watch
+ - update
+ - patch
+ - delete
+- apiGroups:
+ - ""
+ resources:
+ - nodes
+ verbs:
+ - get
+ - list
+ - watch
+ - update
+ - patch
+- apiGroups:
+ - ""
+ resources:
+ - namespaces
+ verbs:
+ - get
+ - list
+ - create
+ - watch
+ - update
+ - patch
+- apiGroups:
+ - ""
+ resources:
+ - events
+ - pods
+ - pods/eviction
+ verbs:
+ - create
+ - get
+ - list
+ - watch
+ - update
+ - patch
+ - delete
+- apiGroups:
+ - apps
+ resources:
+ - daemonsets
+ verbs:
+ - get
+ - list
+ - watch
+- apiGroups:
+ - nvidia.com
+ resources:
+ - clusterpolicies
+ - clusterpolicies/finalizers
+ - clusterpolicies/status
+ - nvidiadrivers
+ - nvidiadrivers/finalizers
+ - nvidiadrivers/status
+ verbs:
+ - create
+ - get
+ - list
+ - watch
+ - update
+ - patch
+ - delete
+ - deletecollection
+- apiGroups:
+ - scheduling.k8s.io
+ resources:
+ - priorityclasses
+ verbs:
+ - get
+ - list
+ - watch
+ - create
+- apiGroups:
+ - node.k8s.io
+ resources:
+ - runtimeclasses
+ verbs:
+ - get
+ - list
+ - create
+ - update
+ - watch
+ - delete
+- apiGroups:
+ - apiextensions.k8s.io
+ resources:
+ - customresourcedefinitions
+ verbs:
+ - get
+ - list
+ - watch
+ - update
+ - patch
+ - create
+{{- if .Values.operator.cleanupCRD }}
+ - delete
+{{- end }}
diff --git a/deployments/gpu-operator/templates/clusterrolebinding.yaml b/deployments/gpu-operator/templates/clusterrolebinding.yaml
new file mode 100644
index 000000000..08b87fbce
--- /dev/null
+++ b/deployments/gpu-operator/templates/clusterrolebinding.yaml
@@ -0,0 +1,18 @@
+kind: ClusterRoleBinding
+apiVersion: rbac.authorization.k8s.io/v1
+metadata:
+ name: gpu-operator
+ labels:
+ {{- include "gpu-operator.labels" . | nindent 4 }}
+ app.kubernetes.io/component: "gpu-operator"
+subjects:
+- kind: ServiceAccount
+ name: gpu-operator
+ namespace: {{ $.Release.Namespace }}
+- kind: ServiceAccount
+ name: node-feature-discovery
+ namespace: {{ $.Release.Namespace }}
+roleRef:
+ kind: ClusterRole
+ name: gpu-operator
+ apiGroup: rbac.authorization.k8s.io
diff --git a/deployments/gpu-operator/templates/dcgm_exporter_config.yaml b/deployments/gpu-operator/templates/dcgm_exporter_config.yaml
new file mode 100644
index 000000000..c4bf6dcc8
--- /dev/null
+++ b/deployments/gpu-operator/templates/dcgm_exporter_config.yaml
@@ -0,0 +1,14 @@
+{{- if .Values.dcgmExporter.config }}
+{{- if and (.Values.dcgmExporter.config.create) (not (empty .Values.dcgmExporter.config.data)) }}
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: {{ .Values.dcgmExporter.config.name }}
+ namespace: {{ .Release.Namespace }}
+ labels:
+ {{- include "gpu-operator.labels" . | nindent 4 }}
+data:
+ dcgm-metrics.csv: |
+{{- .Values.dcgmExporter.config.data | nindent 4 }}
+{{- end }}
+{{- end }}
diff --git a/deployments/gpu-operator/templates/mig_config.yaml b/deployments/gpu-operator/templates/mig_config.yaml
new file mode 100644
index 000000000..2ceb04779
--- /dev/null
+++ b/deployments/gpu-operator/templates/mig_config.yaml
@@ -0,0 +1,10 @@
+{{- if and (.Values.migManager.config.create) (not (empty .Values.migManager.config.data)) }}
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: {{ .Values.migManager.config.name }}
+ namespace: {{ .Release.Namespace }}
+ labels:
+ {{- include "gpu-operator.labels" . | nindent 4 }}
+data: {{ toYaml .Values.migManager.config.data | nindent 2 }}
+{{- end }}
diff --git a/deployments/gpu-operator/templates/role.yaml b/deployments/gpu-operator/templates/role.yaml
index ef65b1af6..9e5bcede3 100644
--- a/deployments/gpu-operator/templates/role.yaml
+++ b/deployments/gpu-operator/templates/role.yaml
@@ -1,159 +1,67 @@
apiVersion: rbac.authorization.k8s.io/v1
-kind: ClusterRole
+kind: Role
metadata:
name: gpu-operator
labels:
{{- include "gpu-operator.labels" . | nindent 4 }}
app.kubernetes.io/component: "gpu-operator"
rules:
-- apiGroups:
- - config.openshift.io
- resources:
- - proxies
- verbs:
- - get
- apiGroups:
- rbac.authorization.k8s.io
resources:
- roles
- rolebindings
- - clusterroles
- - clusterrolebindings
- verbs:
- - '*'
-- apiGroups:
- - ""
- resources:
- - pods
- - services
- - endpoints
- - persistentvolumeclaims
- - events
- - configmaps
- - secrets
- - serviceaccounts
- - nodes
- verbs:
- - '*'
-- apiGroups:
- - ""
- resources:
- - namespaces
verbs:
+ - create
- get
- list
- - create
- watch
- update
- patch
-- apiGroups:
- - apps
- resources:
- - deployments
- - daemonsets
- - replicasets
- - statefulsets
- verbs:
- - '*'
+ - delete
- apiGroups:
- apps
resources:
- controllerrevisions
verbs:
- - 'get'
- - 'list'
- - 'watch'
-- apiGroups:
- - monitoring.coreos.com
- resources:
- - servicemonitors
- - prometheusrules
- verbs:
- get
- list
- - create
- watch
- - update
- - delete
- apiGroups:
- - nvidia.com
- resources:
- - '*'
- verbs:
- - '*'
-- apiGroups:
- - scheduling.k8s.io
+ - apps
resources:
- - priorityclasses
+ - daemonsets
verbs:
- - get
- - list
- - watch
- create
-- apiGroups:
- - security.openshift.io
- resources:
- - securitycontextconstraints
- verbs:
- - '*'
-- apiGroups:
- - config.openshift.io
- resources:
- - clusterversions
- verbs:
- - get
- - list
- - watch
-- apiGroups:
- - ""
- - coordination.k8s.io
- resources:
- - configmaps
- - leases
- verbs:
- get
- list
- watch
- - create
- update
- patch
- delete
-- apiGroups:
- - node.k8s.io
- resources:
- - runtimeclasses
- verbs:
- - get
- - list
- - create
- - update
- - watch
- - delete
-- apiGroups:
- - image.openshift.io
- resources:
- - imagestreams
- verbs:
- - get
- - list
- - watch
- apiGroups:
- ""
resources:
+ - configmaps
+ - endpoints
- pods
- pods/eviction
+ - secrets
+ - services
+ - services/finalizers
+ - serviceaccounts
verbs:
+ - create
- get
- list
- watch
- - create
- - delete
- update
- patch
+ - delete
- apiGroups:
- - ""
+ - coordination.k8s.io
resources:
- - nodes
+ - leases
verbs:
- get
- list
@@ -161,17 +69,16 @@ rules:
- create
- update
- patch
+ - delete
- apiGroups:
- - apiextensions.k8s.io
+ - monitoring.coreos.com
resources:
- - customresourcedefinitions
+ - servicemonitors
+ - prometheusrules
verbs:
- get
- list
+ - create
- watch
- update
- - patch
- - create
-{{- if .Values.operator.cleanupCRD }}
- delete
-{{- end }}
diff --git a/deployments/gpu-operator/templates/rolebinding.yaml b/deployments/gpu-operator/templates/rolebinding.yaml
index 08b87fbce..c915a4659 100644
--- a/deployments/gpu-operator/templates/rolebinding.yaml
+++ b/deployments/gpu-operator/templates/rolebinding.yaml
@@ -1,4 +1,4 @@
-kind: ClusterRoleBinding
+kind: RoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: gpu-operator
@@ -9,10 +9,7 @@ subjects:
- kind: ServiceAccount
name: gpu-operator
namespace: {{ $.Release.Namespace }}
-- kind: ServiceAccount
- name: node-feature-discovery
- namespace: {{ $.Release.Namespace }}
roleRef:
- kind: ClusterRole
+ kind: Role
name: gpu-operator
apiGroup: rbac.authorization.k8s.io
diff --git a/deployments/gpu-operator/templates/upgrade_crd.yaml b/deployments/gpu-operator/templates/upgrade_crd.yaml
index 4fbb34847..6552558af 100644
--- a/deployments/gpu-operator/templates/upgrade_crd.yaml
+++ b/deployments/gpu-operator/templates/upgrade_crd.yaml
@@ -74,6 +74,10 @@ spec:
- name: {{ . }}
{{- end }}
{{- end }}
+ {{- with .Values.operator.tolerations }}
+ tolerations:
+ {{- toYaml . | nindent 8 }}
+ {{- end }}
containers:
- name: upgrade-crd
image: {{ include "gpu-operator.fullimage" . }}
@@ -82,7 +86,7 @@ spec:
- /bin/sh
- -c
- >
- kubectl apply -f /opt/gpu-operator/nvidia.com_clusterpolicies_crd.yaml;
+ kubectl apply -f /opt/gpu-operator/nvidia.com_clusterpolicies.yaml;
kubectl apply -f /opt/gpu-operator/nvidia.com_nvidiadrivers.yaml;
{{- if .Values.nfd.enabled }}
kubectl apply -f /opt/gpu-operator/nfd-api-crds.yaml;
diff --git a/deployments/gpu-operator/values.yaml b/deployments/gpu-operator/values.yaml
index f7c776fdf..90e2efd9d 100644
--- a/deployments/gpu-operator/values.yaml
+++ b/deployments/gpu-operator/values.yaml
@@ -20,6 +20,18 @@ sandboxWorkloads:
enabled: false
defaultWorkload: "container"
+hostPaths:
+ # rootFS represents the path to the root filesystem of the host.
+ # This is used by components that need to interact with the host filesystem
+ # and as such this must be a chroot-able filesystem.
+ # Examples include the MIG Manager and Toolkit Container which may need to
+ # stop, start, or restart systemd services
+ rootFS: "/"
+
+ # driverInstallDir represents the root at which driver files including libraries,
+ # config files, and executables can be found.
+ driverInstallDir: "/run/nvidia/driver"
+
daemonsets:
labels: {}
annotations: {}
@@ -67,11 +79,11 @@ operator:
cleanupCRD: false
# upgrade CRD on chart upgrade, requires --disable-openapi-validation flag
# to be passed during helm upgrade.
- upgradeCRD: false
+ upgradeCRD: true
initContainer:
image: cuda
repository: nvcr.io/nvidia
- version: 12.3.2-base-ubi8
+ version: 12.6.2-base-ubi9
imagePullPolicy: IfNotPresent
tolerations:
- key: "node-role.kubernetes.io/master"
@@ -131,7 +143,7 @@ driver:
usePrecompiled: false
repository: nvcr.io/nvidia
image: driver
- version: "550.54.14"
+ version: "550.127.08"
imagePullPolicy: IfNotPresent
imagePullSecrets: []
startupProbe:
@@ -177,12 +189,6 @@ driver:
timeoutSeconds: 300
deleteEmptyDir: false
manager:
- image: k8s-driver-manager
- repository: nvcr.io/nvidia/cloud-native
- # When choosing a different version of k8s-driver-manager, DO NOT downgrade to a version lower than v0.6.4
- # to ensure k8s-driver-manager stays compatible with gpu-operator starting from v24.3.0
- version: v0.6.5
- imagePullPolicy: IfNotPresent
env:
- name: ENABLE_GPU_POD_EVICTION
value: "true"
@@ -219,7 +225,7 @@ toolkit:
enabled: true
repository: nvcr.io/nvidia/k8s
image: container-toolkit
- version: v1.14.6-ubuntu20.04
+ version: v1.17.2-ubuntu20.04
imagePullPolicy: IfNotPresent
imagePullSecrets: []
env: []
@@ -230,7 +236,7 @@ devicePlugin:
enabled: true
repository: nvcr.io/nvidia
image: k8s-device-plugin
- version: v0.15.0-rc.1-ubi8
+ version: v0.17.0
imagePullPolicy: IfNotPresent
imagePullSecrets: []
args: []
@@ -271,12 +277,16 @@ devicePlugin:
config:
# Create a ConfigMap (default: false)
create: false
- # ConfigMap name (either exiting or to create a new one with create=true above)
+ # ConfigMap name (either existing or to create a new one with create=true above)
name: ""
# Default config name within the ConfigMap
default: ""
# Data section for the ConfigMap to create (i.e only applies when create=true)
data: {}
+ # MPS related configuration for the plugin
+ mps:
+ # MPS root path on the host
+ root: "/run/nvidia/mps"
# standalone dcgm hostengine
dcgm:
@@ -284,9 +294,8 @@ dcgm:
enabled: false
repository: nvcr.io/nvidia/cloud-native
image: dcgm
- version: 3.3.0-1-ubuntu22.04
+ version: 3.3.9-1-ubuntu22.04
imagePullPolicy: IfNotPresent
- hostPort: 5555
args: []
env: []
resources: {}
@@ -295,7 +304,7 @@ dcgmExporter:
enabled: true
repository: nvcr.io/nvidia/k8s
image: dcgm-exporter
- version: 3.3.0-3.2.0-ubuntu22.04
+ version: 3.3.9-3.6.1-ubuntu22.04
imagePullPolicy: IfNotPresent
env:
- name: DCGM_EXPORTER_LISTEN
@@ -317,12 +326,31 @@ dcgmExporter:
# target_label: instance
# replacement: $1
# action: replace
+ # DCGM Exporter configuration
+ # This block is used to configure DCGM Exporter to emit a customized list of metrics.
+ # Use "name" to either point to an existing ConfigMap or to create a new one with a
+ # list of configurations (i.e with create=true).
+ # When pointing to an existing ConfigMap, the ConfigMap must exist in the same namespace as the release.
+ # The metrics are expected to be listed under a key called `dcgm-metrics.csv`.
+ # Use "data" to build an integrated ConfigMap from a set of custom metrics as
+ # part of the chart. An example of some custom metrics are shown below. Note that
+ # the contents of "data" must be in CSV format and be valid DCGM Exporter metric configurations.
+ # config:
+ # name: custom-dcgm-exporter-metrics
+ # create: true
+ # data: |-
+ # Format
+ # If line starts with a '#' it is considered a comment
+ # DCGM FIELD, Prometheus metric type, help message
+ # Clocks
+ # DCGM_FI_DEV_SM_CLOCK, gauge, SM clock frequency (in MHz).
+ # DCGM_FI_DEV_MEM_CLOCK, gauge, Memory clock frequency (in MHz).
gfd:
enabled: true
repository: nvcr.io/nvidia
image: k8s-device-plugin
- version: v0.15.0-rc.1-ubi8
+ version: v0.17.0
imagePullPolicy: IfNotPresent
imagePullSecrets: []
env:
@@ -336,16 +364,52 @@ migManager:
enabled: true
repository: nvcr.io/nvidia/cloud-native
image: k8s-mig-manager
- version: v0.6.0-ubuntu20.04
+ version: v0.10.0-ubuntu20.04
imagePullPolicy: IfNotPresent
imagePullSecrets: []
env:
- name: WITH_REBOOT
value: "false"
resources: {}
+ # MIG configuration
+ # Use "name" to either point to an existing ConfigMap or to create a new one with a list of configurations(i.e with create=true).
+ # Use "data" to build an integrated ConfigMap from a set of configurations as
+ # part of this helm chart. An example of setting "data" might be:
+ # config:
+ # name: custom-mig-parted-configs
+ # create: true
+ # data: |-
+ # config.yaml: |-
+ # version: v1
+ # mig-configs:
+ # all-disabled:
+ # - devices: all
+ # mig-enabled: false
+ # custom-mig:
+ # - devices: [0]
+ # mig-enabled: false
+ # - devices: [1]
+ # mig-enabled: true
+ # mig-devices:
+ # "1g.10gb": 7
+ # - devices: [2]
+ # mig-enabled: true
+ # mig-devices:
+ # "2g.20gb": 2
+ # "3g.40gb": 1
+ # - devices: [3]
+ # mig-enabled: true
+ # mig-devices:
+ # "3g.40gb": 1
+ # "4g.40gb": 1
config:
- name: "default-mig-parted-config"
default: "all-disabled"
+ # Create a ConfigMap (default: false)
+ create: false
+ # ConfigMap name (either existing or to create a new one with create=true above)
+ name: ""
+ # Data section for the ConfigMap to create (i.e only applies when create=true)
+ data: {}
gpuClientsConfig:
name: ""
@@ -363,7 +427,7 @@ gds:
enabled: false
repository: nvcr.io/nvidia/cloud-native
image: nvidia-fs
- version: "2.17.5"
+ version: "2.20.5"
imagePullPolicy: IfNotPresent
imagePullSecrets: []
env: []
@@ -373,7 +437,7 @@ gdrcopy:
enabled: false
repository: nvcr.io/nvidia/cloud-native
image: gdrdrv
- version: "v2.4.1"
+ version: "v2.4.1-2"
imagePullPolicy: IfNotPresent
imagePullSecrets: []
env: []
@@ -389,12 +453,6 @@ vgpuManager:
env: []
resources: {}
driverManager:
- image: k8s-driver-manager
- repository: nvcr.io/nvidia/cloud-native
- # When choosing a different version of k8s-driver-manager, DO NOT downgrade to a version lower than v0.6.4
- # to ensure k8s-driver-manager stays compatible with gpu-operator starting from v24.3.0
- version: v0.6.4
- imagePullPolicy: IfNotPresent
env:
- name: ENABLE_GPU_POD_EVICTION
value: "false"
@@ -405,7 +463,7 @@ vgpuDeviceManager:
enabled: true
repository: nvcr.io/nvidia/cloud-native
image: vgpu-device-manager
- version: "v0.2.4"
+ version: v0.2.8
imagePullPolicy: IfNotPresent
imagePullSecrets: []
env: []
@@ -417,18 +475,12 @@ vfioManager:
enabled: true
repository: nvcr.io/nvidia
image: cuda
- version: 12.3.2-base-ubi8
+ version: 12.6.2-base-ubi9
imagePullPolicy: IfNotPresent
imagePullSecrets: []
env: []
resources: {}
driverManager:
- image: k8s-driver-manager
- repository: nvcr.io/nvidia/cloud-native
- # When choosing a different version of k8s-driver-manager, DO NOT downgrade to a version lower than v0.6.4
- # to ensure k8s-driver-manager stays compatible with gpu-operator starting from v24.3.0
- version: v0.6.5
- imagePullPolicy: IfNotPresent
env:
- name: ENABLE_GPU_POD_EVICTION
value: "false"
@@ -440,12 +492,12 @@ kataManager:
config:
artifactsDir: "/opt/nvidia-gpu-operator/artifacts/runtimeclasses"
runtimeClasses:
- - name: kata-qemu-nvidia-gpu
+ - name: kata-nvidia-gpu
nodeSelector: {}
artifacts:
url: nvcr.io/nvidia/cloud-native/kata-gpu-artifacts:ubuntu22.04-535.54.03
pullSecret: ""
- - name: kata-qemu-nvidia-gpu-snp
+ - name: kata-nvidia-gpu-snp
nodeSelector:
"nvidia.com/cc.capable": "true"
artifacts:
@@ -453,7 +505,7 @@ kataManager:
pullSecret: ""
repository: nvcr.io/nvidia/cloud-native
image: k8s-kata-manager
- version: v0.1.2
+ version: v0.2.2
imagePullPolicy: IfNotPresent
imagePullSecrets: []
env: []
@@ -463,7 +515,7 @@ sandboxDevicePlugin:
enabled: true
repository: nvcr.io/nvidia
image: kubevirt-gpu-device-plugin
- version: v1.2.4
+ version: v1.2.10
imagePullPolicy: IfNotPresent
imagePullSecrets: []
args: []
@@ -485,6 +537,7 @@ ccManager:
node-feature-discovery:
enableNodeFeatureApi: true
+ priorityClassName: system-node-critical
gc:
enable: true
replicaCount: 1
@@ -529,3 +582,10 @@ node-feature-discovery:
# resourceLabels: ["nvidia.com/feature-1","nvidia.com/feature-2"]
# enableTaints: false
# labelWhiteList: "nvidia.com/gpu"
+
+# all use driver manager components from the same image version
+driverManager:
+ image: k8s-driver-manager
+ repository: nvcr.io/nvidia/cloud-native
+ version: v0.7.0
+ imagePullPolicy: IfNotPresent
diff --git a/docker/Dockerfile b/docker/Dockerfile
index 471285567..9678b5a2a 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -12,15 +12,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-ARG CUDA_IMAGE=nvcr.io/nvidia/cuda
-ARG CUDA_VERSION=undefined
-
-ARG BASE_DIST=ubi8
ARG GOLANG_VERSION=x.x.x
-FROM ${CUDA_IMAGE}:${CUDA_VERSION}-base-${BASE_DIST} as builder
+FROM nvcr.io/nvidia/cuda:12.6.2-base-ubi9 as builder
-RUN yum install -y wget make git gcc
+RUN dnf install -y wget make git gcc
ARG GOLANG_VERSION=0.0.0
RUN set -eux; \
@@ -29,7 +25,7 @@ RUN set -eux; \
case "${arch##*-}" in \
x86_64 | amd64) ARCH='amd64' ;; \
ppc64el | ppc64le) ARCH='ppc64le' ;; \
- aarch64) ARCH='arm64' ;; \
+ aarch64 | arm64) ARCH='arm64' ;; \
*) echo "unsupported architecture" ; exit 1 ;; \
esac; \
wget -nv -O - https://storage.googleapis.com/golang/go${GOLANG_VERSION}.linux-${ARCH}.tar.gz \
@@ -59,7 +55,7 @@ ARG VERSION="unknown"
ARG GIT_COMMIT="unknown"
RUN make gpu-operator
-FROM ${CUDA_IMAGE}:${CUDA_VERSION}-base-${BASE_DIST}
+FROM nvcr.io/nvidia/cuda:12.6.2-base-ubi9
# Remove CUDA libs(compat etc) in favor of libs installed by the NVIDIA driver
RUN dnf remove -y cuda-*
@@ -94,14 +90,14 @@ RUN chmod +x ./kubectl
RUN mv ./kubectl /usr/local/bin
# Add CRD resource into the image for helm upgrades
-COPY deployments/gpu-operator/crds/nvidia.com_clusterpolicies_crd.yaml /opt/gpu-operator/nvidia.com_clusterpolicies_crd.yaml
+COPY deployments/gpu-operator/crds/nvidia.com_clusterpolicies.yaml /opt/gpu-operator/nvidia.com_clusterpolicies.yaml
COPY deployments/gpu-operator/crds/nvidia.com_nvidiadrivers.yaml /opt/gpu-operator/nvidia.com_nvidiadrivers.yaml
COPY deployments/gpu-operator/charts/node-feature-discovery/crds/nfd-api-crds.yaml /opt/gpu-operator/nfd-api-crds.yaml
# Install / upgrade packages here that are required to resolve CVEs
ARG CVE_UPDATES
RUN if [ -n "${CVE_UPDATES}" ]; then \
- yum update -y ${CVE_UPDATES} && \
+ dnf update -y ${CVE_UPDATES} && \
rm -rf /var/cache/yum/*; \
fi
diff --git a/docker/Dockerfile.devel b/docker/Dockerfile.devel
index 5deb5be52..33d0abc15 100644
--- a/docker/Dockerfile.devel
+++ b/docker/Dockerfile.devel
@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
ARG GOLANG_VERSION=x.x.x
-ARG GOLANGCI_LINT_VERSION=1.55.2
+ARG GOLANGCI_LINT_VERSION=1.60.3
FROM golang:${GOLANG_VERSION}
diff --git a/go.mod b/go.mod
index 75a9fbfe2..c306ecd65 100644
--- a/go.mod
+++ b/go.mod
@@ -1,91 +1,92 @@
module github.com/NVIDIA/gpu-operator
-go 1.21
+go 1.23
+
+toolchain go1.23.3
require (
- github.com/Masterminds/sprig/v3 v3.2.3
- github.com/NVIDIA/go-nvlib v0.1.0
- github.com/NVIDIA/k8s-kata-manager v0.0.0-20230620232711-08b57feb9b5a
- github.com/NVIDIA/k8s-operator-libs v0.0.0-20240214071211-ea58a3ada15c
- github.com/NVIDIA/nvidia-container-toolkit v1.14.6
- github.com/davecgh/go-spew v1.1.1
- github.com/go-logr/logr v1.4.1
- github.com/mitchellh/hashstructure v1.1.0
- github.com/mittwald/go-helm-client v0.12.7
- github.com/onsi/ginkgo/v2 v2.14.0
- github.com/onsi/gomega v1.30.0
- github.com/openshift/api v0.0.0-20240306072808-610cbc77dbab
- github.com/openshift/client-go v0.0.0-20240215090359-b71f6f2731f5
- github.com/operator-framework/api v0.17.6
- github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.65.2
- github.com/prometheus/client_golang v1.18.0
- github.com/regclient/regclient v0.4.8
+ github.com/Masterminds/sprig/v3 v3.3.0
+ github.com/NVIDIA/go-nvlib v0.7.0
+ github.com/NVIDIA/k8s-kata-manager v0.2.2
+ github.com/NVIDIA/k8s-operator-libs v0.0.0-20240826221728-249ba446fa35
+ github.com/NVIDIA/nvidia-container-toolkit v1.17.2
+ github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc
+ github.com/go-logr/logr v1.4.2
+ github.com/mittwald/go-helm-client v0.12.14
+ github.com/onsi/ginkgo/v2 v2.22.0
+ github.com/onsi/gomega v1.35.1
+ github.com/openshift/api v0.0.0-20241001152557-e415140e5d5f
+ github.com/openshift/client-go v0.0.0-20241001162912-da6d55e4611f
+ github.com/operator-framework/api v0.27.0
+ github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.78.1
+ github.com/prometheus/client_golang v1.20.5
+ github.com/regclient/regclient v0.7.2
github.com/sirupsen/logrus v1.9.3
github.com/stretchr/testify v1.9.0
- github.com/urfave/cli/v2 v2.27.1
- go.uber.org/zap v1.26.0
- golang.org/x/mod v0.15.0
- k8s.io/api v0.29.1
- k8s.io/apiextensions-apiserver v0.29.1
- k8s.io/apimachinery v0.29.1
- k8s.io/client-go v0.29.1
- k8s.io/klog/v2 v2.110.1
- sigs.k8s.io/controller-runtime v0.17.1
- sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd
+ github.com/urfave/cli/v2 v2.27.5
+ go.uber.org/zap v1.27.0
+ golang.org/x/mod v0.22.0
+ k8s.io/api v0.31.2
+ k8s.io/apiextensions-apiserver v0.31.2
+ k8s.io/apimachinery v0.31.2
+ k8s.io/client-go v0.31.2
+ k8s.io/klog/v2 v2.130.1
+ sigs.k8s.io/controller-runtime v0.19.1
sigs.k8s.io/yaml v1.4.0
)
require (
+ dario.cat/mergo v1.0.1 // indirect
github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24 // indirect
github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 // indirect
- github.com/BurntSushi/toml v1.3.2 // indirect
+ github.com/BurntSushi/toml v1.4.0 // indirect
github.com/MakeNowJust/heredoc v1.0.0 // indirect
github.com/Masterminds/goutils v1.1.1 // indirect
- github.com/Masterminds/semver/v3 v3.2.1 // indirect
+ github.com/Masterminds/semver/v3 v3.3.0 // indirect
github.com/Masterminds/squirrel v1.5.4 // indirect
github.com/Microsoft/hcsshim v0.11.4 // indirect
github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/blang/semver/v4 v4.0.0 // indirect
- github.com/cespare/xxhash/v2 v2.2.0 // indirect
+ github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/chai2010/gettext-go v1.0.2 // indirect
- github.com/containerd/containerd v1.7.11 // indirect
+ github.com/containerd/containerd v1.7.12 // indirect
github.com/containerd/log v0.1.0 // indirect
- github.com/cpuguy83/go-md2man/v2 v2.0.3 // indirect
- github.com/cyphar/filepath-securejoin v0.2.4 // indirect
+ github.com/cpuguy83/go-md2man/v2 v2.0.5 // indirect
+ github.com/cyphar/filepath-securejoin v0.3.1 // indirect
github.com/distribution/reference v0.5.0 // indirect
- github.com/docker/cli v24.0.7+incompatible // indirect
+ github.com/docker/cli v25.0.1+incompatible // indirect
github.com/docker/distribution v2.8.3+incompatible // indirect
- github.com/docker/docker v24.0.7+incompatible // indirect
+ github.com/docker/docker v25.0.6+incompatible // indirect
github.com/docker/docker-credential-helpers v0.8.0 // indirect
- github.com/docker/go-connections v0.4.0 // indirect
+ github.com/docker/go-connections v0.5.0 // indirect
github.com/docker/go-metrics v0.0.1 // indirect
- github.com/docker/go-units v0.5.0 // indirect
github.com/docker/libtrust v0.0.0-20160708172513-aabc10ec26b7 // indirect
- github.com/emicklei/go-restful/v3 v3.11.1 // indirect
- github.com/evanphx/json-patch v5.7.0+incompatible // indirect
- github.com/evanphx/json-patch/v5 v5.8.0 // indirect
+ github.com/emicklei/go-restful/v3 v3.11.2 // indirect
+ github.com/evanphx/json-patch v5.9.0+incompatible // indirect
+ github.com/evanphx/json-patch/v5 v5.9.0 // indirect
github.com/exponent-io/jsonpath v0.0.0-20210407135951-1de76d718b3f // indirect
github.com/fatih/color v1.16.0 // indirect
github.com/felixge/httpsnoop v1.0.4 // indirect
github.com/fsnotify/fsnotify v1.7.0 // indirect
+ github.com/fxamacker/cbor/v2 v2.7.0 // indirect
github.com/go-errors/errors v1.5.1 // indirect
github.com/go-gorp/gorp/v3 v3.1.0 // indirect
github.com/go-logr/stdr v1.2.2 // indirect
github.com/go-logr/zapr v1.3.0 // indirect
github.com/go-openapi/jsonpointer v0.20.2 // indirect
github.com/go-openapi/jsonreference v0.20.4 // indirect
- github.com/go-openapi/swag v0.22.7 // indirect
- github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect
+ github.com/go-openapi/swag v0.22.9 // indirect
+ github.com/go-task/slim-sprig/v3 v3.0.0 // indirect
github.com/gobwas/glob v0.2.3 // indirect
github.com/gogo/protobuf v1.3.2 // indirect
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
- github.com/golang/protobuf v1.5.3 // indirect
+ github.com/golang/protobuf v1.5.4 // indirect
github.com/google/btree v1.1.2 // indirect
github.com/google/gnostic-models v0.6.8 // indirect
github.com/google/go-cmp v0.6.0 // indirect
github.com/google/gofuzz v1.2.0 // indirect
- github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1 // indirect
+ github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db // indirect
github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/gorilla/mux v1.8.1 // indirect
@@ -94,13 +95,13 @@ require (
github.com/gregjones/httpcache v0.0.0-20190611155906-901d90724c79 // indirect
github.com/hashicorp/errwrap v1.1.0 // indirect
github.com/hashicorp/go-multierror v1.1.1 // indirect
- github.com/huandu/xstrings v1.4.0 // indirect
+ github.com/huandu/xstrings v1.5.0 // indirect
github.com/imdario/mergo v0.3.16 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect
- github.com/jmoiron/sqlx v1.3.5 // indirect
+ github.com/jmoiron/sqlx v1.4.0 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
- github.com/klauspost/compress v1.17.4 // indirect
+ github.com/klauspost/compress v1.17.11 // indirect
github.com/lann/builder v0.0.0-20180802200727-47ae307949d0 // indirect
github.com/lann/ps v0.0.0-20150810152359-62de8c46ede0 // indirect
github.com/lib/pq v1.10.9 // indirect
@@ -109,74 +110,73 @@ require (
github.com/mattn/go-colorable v0.1.13 // indirect
github.com/mattn/go-isatty v0.0.20 // indirect
github.com/mattn/go-runewidth v0.0.15 // indirect
- github.com/matttproud/golang_protobuf_extensions/v2 v2.0.0 // indirect
github.com/mitchellh/copystructure v1.2.0 // indirect
github.com/mitchellh/go-wordwrap v1.0.1 // indirect
github.com/mitchellh/reflectwalk v1.0.2 // indirect
github.com/moby/locker v1.0.1 // indirect
- github.com/moby/spdystream v0.2.0 // indirect
+ github.com/moby/spdystream v0.4.0 // indirect
github.com/moby/term v0.5.0 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/monochromegane/go-gitignore v0.0.0-20200626010858-205db1a8cc00 // indirect
- github.com/morikuni/aec v1.0.0 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f // indirect
github.com/opencontainers/go-digest v1.0.0 // indirect
- github.com/opencontainers/image-spec v1.1.0-rc5 // indirect
+ github.com/opencontainers/image-spec v1.1.0 // indirect
github.com/peterbourgon/diskv v2.0.1+incompatible // indirect
github.com/pkg/errors v0.9.1 // indirect
- github.com/pmezard/go-difflib v1.0.0 // indirect
- github.com/prometheus/client_model v0.5.0 // indirect
- github.com/prometheus/common v0.45.0 // indirect
- github.com/prometheus/procfs v0.12.0 // indirect
+ github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
+ github.com/prometheus/client_model v0.6.1 // indirect
+ github.com/prometheus/common v0.55.0 // indirect
+ github.com/prometheus/procfs v0.15.1 // indirect
github.com/rivo/uniseg v0.4.4 // indirect
- github.com/rubenv/sql-migrate v1.6.0 // indirect
+ github.com/rubenv/sql-migrate v1.7.0 // indirect
github.com/russross/blackfriday/v2 v2.1.0 // indirect
- github.com/shopspring/decimal v1.3.1 // indirect
- github.com/spf13/cast v1.6.0 // indirect
- github.com/spf13/cobra v1.8.0 // indirect
+ github.com/shopspring/decimal v1.4.0 // indirect
+ github.com/spf13/cast v1.7.0 // indirect
+ github.com/spf13/cobra v1.8.1 // indirect
github.com/spf13/pflag v1.0.5 // indirect
+ github.com/ulikunitz/xz v0.5.12 // indirect
+ github.com/x448/float16 v0.8.4 // indirect
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb // indirect
github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 // indirect
github.com/xeipuuv/gojsonschema v1.2.0 // indirect
github.com/xlab/treeprint v1.2.0 // indirect
- github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect
- gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20230818092907-09424fdc8884 // indirect
- go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.46.1 // indirect
- go.opentelemetry.io/otel v1.21.0 // indirect
- go.opentelemetry.io/otel/metric v1.21.0 // indirect
- go.opentelemetry.io/otel/trace v1.21.0 // indirect
+ github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 // indirect
+ go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.53.0 // indirect
+ go.opentelemetry.io/otel v1.28.0 // indirect
+ go.opentelemetry.io/otel/metric v1.28.0 // indirect
+ go.opentelemetry.io/otel/trace v1.28.0 // indirect
go.starlark.net v0.0.0-20231121155337-90ade8b19d09 // indirect
go.uber.org/multierr v1.11.0 // indirect
- golang.org/x/crypto v0.17.0 // indirect
- golang.org/x/exp v0.0.0-20240103183307-be819d1f06fc // indirect
- golang.org/x/net v0.19.0 // indirect
- golang.org/x/oauth2 v0.15.0 // indirect
- golang.org/x/sync v0.5.0 // indirect
- golang.org/x/sys v0.17.0 // indirect
- golang.org/x/term v0.15.0 // indirect
- golang.org/x/text v0.14.0 // indirect
+ golang.org/x/crypto v0.28.0 // indirect
+ golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 // indirect
+ golang.org/x/net v0.30.0 // indirect
+ golang.org/x/oauth2 v0.21.0 // indirect
+ golang.org/x/sync v0.8.0 // indirect
+ golang.org/x/sys v0.26.0 // indirect
+ golang.org/x/term v0.25.0 // indirect
+ golang.org/x/text v0.19.0 // indirect
golang.org/x/time v0.5.0 // indirect
- golang.org/x/tools v0.16.1 // indirect
+ golang.org/x/tools v0.26.0 // indirect
gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect
- google.golang.org/appengine v1.6.8 // indirect
- google.golang.org/genproto/googleapis/rpc v0.0.0-20240102182953-50ed04b92917 // indirect
- google.golang.org/grpc v1.60.1 // indirect
- google.golang.org/protobuf v1.32.0 // indirect
- gopkg.in/evanphx/json-patch.v5 v5.7.0 // indirect
+ google.golang.org/genproto/googleapis/rpc v0.0.0-20240701130421-f6361c86f094 // indirect
+ google.golang.org/grpc v1.65.0 // indirect
+ google.golang.org/protobuf v1.35.1 // indirect
+ gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect
gopkg.in/inf.v0 v0.9.1 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
- helm.sh/helm/v3 v3.13.3 // indirect
- k8s.io/apiserver v0.29.1 // indirect
- k8s.io/cli-runtime v0.29.1 // indirect
- k8s.io/component-base v0.29.1 // indirect
- k8s.io/kube-openapi v0.0.0-20240103195357-a9f8850cb432 // indirect
- k8s.io/kubectl v0.29.1 // indirect
- k8s.io/utils v0.0.0-20240102154912-e7106e64919e // indirect
- oras.land/oras-go v1.2.4 // indirect
- sigs.k8s.io/kustomize/api v0.16.0 // indirect
- sigs.k8s.io/kustomize/kyaml v0.16.0 // indirect
+ helm.sh/helm/v3 v3.16.1 // indirect
+ k8s.io/apiserver v0.31.2 // indirect
+ k8s.io/cli-runtime v0.31.1 // indirect
+ k8s.io/component-base v0.31.2 // indirect
+ k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 // indirect
+ k8s.io/kubectl v0.31.0 // indirect
+ k8s.io/utils v0.0.0-20240921022957-49e7df575cb6 // indirect
+ oras.land/oras-go v1.2.5 // indirect
+ sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect
+ sigs.k8s.io/kustomize/api v0.17.2 // indirect
+ sigs.k8s.io/kustomize/kyaml v0.17.1 // indirect
sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect
)
diff --git a/go.sum b/go.sum
index aa60d3945..3c2dda11a 100644
--- a/go.sum
+++ b/go.sum
@@ -1,34 +1,37 @@
+dario.cat/mergo v1.0.1 h1:Ra4+bf83h2ztPIQYNP99R6m+Y7KfnARDfID+a+vLl4s=
+dario.cat/mergo v1.0.1/go.mod h1:uNxQE+84aUszobStD9th8a29P2fMDhsBdgRYvZOxGmk=
+filippo.io/edwards25519 v1.1.0 h1:FNf4tywRC1HmFuKW5xopWpigGjJKiJSV0Cqo0cJWDaA=
+filippo.io/edwards25519 v1.1.0/go.mod h1:BxyFTGdWcka3PhytdK4V28tE5sGfRvvvRV7EaN4VDT4=
github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24 h1:bvDV9vkmnHYOMsOr4WLk+Vo07yKIzd94sVoIqshQ4bU=
github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24/go.mod h1:8o94RPi1/7XTJvwPpRSzSUedZrtlirdB3r9Z20bi2f8=
github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 h1:L/gRVlceqvL25UVaW/CKtUDjefjrs0SPonmDGUVOYP0=
github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E=
-github.com/BurntSushi/toml v1.3.2 h1:o7IhLm0Msx3BaB+n3Ag7L8EVlByGnpq14C4YWiu/gL8=
-github.com/BurntSushi/toml v1.3.2/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
-github.com/DATA-DOG/go-sqlmock v1.5.0 h1:Shsta01QNfFxHCfpW6YH2STWB0MudeXXEWMr20OEh60=
-github.com/DATA-DOG/go-sqlmock v1.5.0/go.mod h1:f/Ixk793poVmq4qj/V1dPUg2JEAKC73Q5eFN3EC/SaM=
+github.com/BurntSushi/toml v1.4.0 h1:kuoIxZQy2WRRk1pttg9asf+WVv6tWQuBNVmK8+nqPr0=
+github.com/BurntSushi/toml v1.4.0/go.mod h1:ukJfTF/6rtPPRCnwkur4qwRxa8vTRFBF0uk2lLoLwho=
+github.com/DATA-DOG/go-sqlmock v1.5.2 h1:OcvFkGmslmlZibjAjaHm3L//6LiuBgolP7OputlJIzU=
+github.com/DATA-DOG/go-sqlmock v1.5.2/go.mod h1:88MAG/4G7SMwSE3CeA0ZKzrT5CiOU3OJ+JlNzwDqpNU=
github.com/MakeNowJust/heredoc v1.0.0 h1:cXCdzVdstXyiTqTvfqk9SDHpKNjxuom+DOlyEeQ4pzQ=
github.com/MakeNowJust/heredoc v1.0.0/go.mod h1:mG5amYoWBHf8vpLOuehzbGGw0EHxpZZ6lCpQ4fNJ8LE=
github.com/Masterminds/goutils v1.1.1 h1:5nUrii3FMTL5diU80unEVvNevw1nH4+ZV4DSLVJLSYI=
github.com/Masterminds/goutils v1.1.1/go.mod h1:8cTjp+g8YejhMuvIA5y2vz3BpJxksy863GQaJW2MFNU=
-github.com/Masterminds/semver/v3 v3.2.0/go.mod h1:qvl/7zhW3nngYb5+80sSMF+FG2BjYrf8m9wsX0PNOMQ=
-github.com/Masterminds/semver/v3 v3.2.1 h1:RN9w6+7QoMeJVGyfmbcgs28Br8cvmnucEXnY0rYXWg0=
-github.com/Masterminds/semver/v3 v3.2.1/go.mod h1:qvl/7zhW3nngYb5+80sSMF+FG2BjYrf8m9wsX0PNOMQ=
-github.com/Masterminds/sprig/v3 v3.2.3 h1:eL2fZNezLomi0uOLqjQoN6BfsDD+fyLtgbJMAj9n6YA=
-github.com/Masterminds/sprig/v3 v3.2.3/go.mod h1:rXcFaZ2zZbLRJv/xSysmlgIM1u11eBaRMhvYXJNkGuM=
+github.com/Masterminds/semver/v3 v3.3.0 h1:B8LGeaivUe71a5qox1ICM/JLl0NqZSW5CHyL+hmvYS0=
+github.com/Masterminds/semver/v3 v3.3.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM=
+github.com/Masterminds/sprig/v3 v3.3.0 h1:mQh0Yrg1XPo6vjYXgtf5OtijNAKJRNcTdOOGZe3tPhs=
+github.com/Masterminds/sprig/v3 v3.3.0/go.mod h1:Zy1iXRYNqNLUolqCpL4uhk6SHUMAOSCzdgBfDb35Lz0=
github.com/Masterminds/squirrel v1.5.4 h1:uUcX/aBc8O7Fg9kaISIUsHXdKuqehiXAMQTYX8afzqM=
github.com/Masterminds/squirrel v1.5.4/go.mod h1:NNaOrjSoIDfDA40n7sr2tPNZRfjzjA400rg+riTZj10=
github.com/Microsoft/go-winio v0.6.1 h1:9/kr64B9VUZrLm5YYwbGtUJnMgqWVOdUAXu6Migciow=
github.com/Microsoft/go-winio v0.6.1/go.mod h1:LRdKpFKfdobln8UmuiYcKPot9D2v6svN5+sAH+4kjUM=
github.com/Microsoft/hcsshim v0.11.4 h1:68vKo2VN8DE9AdN4tnkWnmdhqdbpUFM8OF3Airm7fz8=
github.com/Microsoft/hcsshim v0.11.4/go.mod h1:smjE4dvqPX9Zldna+t5FG3rnoHhaB7QYxPRqGcpAD9w=
-github.com/NVIDIA/go-nvlib v0.1.0 h1:VYNqzGRaE5zrku1ysS9J+hSkuuwTpYuSLqDF1BaCNUs=
-github.com/NVIDIA/go-nvlib v0.1.0/go.mod h1:lDrLM77CNdwfCN5ySYpuyzBQLQR6pGC+rHri1T4l+l4=
-github.com/NVIDIA/k8s-kata-manager v0.0.0-20230620232711-08b57feb9b5a h1:3nyTp1cXzZMHoUuhMwHdz9QDzl100ECvIDYFxdjWk6o=
-github.com/NVIDIA/k8s-kata-manager v0.0.0-20230620232711-08b57feb9b5a/go.mod h1:K7HCLTndSwBEZwBu6sU7daVeryV1Qt/DtKH8nONJj4o=
-github.com/NVIDIA/k8s-operator-libs v0.0.0-20240214071211-ea58a3ada15c h1:nt9jPM6K7DCYydMKhlfMrZ9aFasdNU4WKUZvO4cN2us=
-github.com/NVIDIA/k8s-operator-libs v0.0.0-20240214071211-ea58a3ada15c/go.mod h1:m9Xr+fGiGWTxyCYnbby7a91cDF1GpMH4PSiDwoDp5FA=
-github.com/NVIDIA/nvidia-container-toolkit v1.14.6 h1:42PccGiwrz2K5KTEOOO3X023ToBqNdd0xnnJVCh+Mqs=
-github.com/NVIDIA/nvidia-container-toolkit v1.14.6/go.mod h1:SD4zQVx3nyNeh1JKewquKoGd5i+nzJwjRCTu9Xmh5H4=
+github.com/NVIDIA/go-nvlib v0.7.0 h1:Z/J7skMdLbTiHvomKVsGYsttfQMZj5FwNYIFXhZ4i/c=
+github.com/NVIDIA/go-nvlib v0.7.0/go.mod h1:9UrsLGx/q1OrENygXjOuM5Ey5KCtiZhbvBlbUIxtGWY=
+github.com/NVIDIA/k8s-kata-manager v0.2.2 h1:+xVIp4yLfCjZ31Dfrm9LOKo4T47b4g+DV6XkwAqalns=
+github.com/NVIDIA/k8s-kata-manager v0.2.2/go.mod h1:UGjGQUcpXTegwyOc5IwcyLTzPKwO9lOIkqw/qUzk8Q0=
+github.com/NVIDIA/k8s-operator-libs v0.0.0-20240826221728-249ba446fa35 h1:w9DXPTJCq9k2PVpdBQJrWE4vAmZcFaSHKLpM/xos9WI=
+github.com/NVIDIA/k8s-operator-libs v0.0.0-20240826221728-249ba446fa35/go.mod h1:sw6XRI5wq0Q+nSgaWa1Pyo/ZKxQebc70x6VIznDAxtM=
+github.com/NVIDIA/nvidia-container-toolkit v1.17.2 h1:iE6PK9SQH3HyDrOolu27xn3CJgURR3bDtnbfFrxdML8=
+github.com/NVIDIA/nvidia-container-toolkit v1.17.2/go.mod h1:R6bNf6ca0IjjACa0ncKGvsrx6zSjsgz8QkFyBDk5szU=
github.com/Shopify/logrus-bugsnag v0.0.0-20171204204709-577dee27f20d h1:UrqY+r/OJnIp5u0s1SbQ8dVfLCZJsnvazdBP5hS4iRs=
github.com/Shopify/logrus-bugsnag v0.0.0-20171204204709-577dee27f20d/go.mod h1:HI8ITrYtUY+O+ZhtlqUnD8+KwNPOyugEhfP9fdUIaEQ=
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
@@ -51,70 +54,69 @@ github.com/bugsnag/osext v0.0.0-20130617224835-0dd3f918b21b h1:otBG+dV+YK+Soembj
github.com/bugsnag/osext v0.0.0-20130617224835-0dd3f918b21b/go.mod h1:obH5gd0BsqsP2LwDJ9aOkm/6J86V6lyAXCoQWGw3K50=
github.com/bugsnag/panicwrap v0.0.0-20151223152923-e2c28503fcd0 h1:nvj0OLI3YqYXer/kZD8Ri1aaunCxIEsOst1BVJswV0o=
github.com/bugsnag/panicwrap v0.0.0-20151223152923-e2c28503fcd0/go.mod h1:D/8v3kj0zr8ZAKg1AQ6crr+5VwKN5eIywRkfhyM/+dE=
-github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44=
-github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
+github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
+github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/chai2010/gettext-go v1.0.2 h1:1Lwwip6Q2QGsAdl/ZKPCwTe9fe0CjlUbqj5bFNSjIRk=
github.com/chai2010/gettext-go v1.0.2/go.mod h1:y+wnP2cHYaVj19NZhYKAwEMH2CI1gNHeQQ+5AjwawxA=
-github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
-github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
-github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
github.com/containerd/cgroups v1.1.0 h1:v8rEWFl6EoqHB+swVNjVoCJE8o3jX7e8nqBGPLaDFBM=
github.com/containerd/cgroups v1.1.0/go.mod h1:6ppBcbh/NOOUU+dMKrykgaBnK9lCIBxHqJDGwsa1mIw=
-github.com/containerd/containerd v1.7.11 h1:lfGKw3eU35sjV0aG2eYZTiwFEY1pCzxdzicHP3SZILw=
-github.com/containerd/containerd v1.7.11/go.mod h1:5UluHxHTX2rdvYuZ5OJTC5m/KJNs0Zs9wVoJm9zf5ZE=
+github.com/containerd/containerd v1.7.12 h1:+KQsnv4VnzyxWcfO9mlxxELaoztsDEjOuCMPAuPqgU0=
+github.com/containerd/containerd v1.7.12/go.mod h1:/5OMpE1p0ylxtEUGY8kuCYkDRzJm9NO1TFMWjUpdevk=
github.com/containerd/continuity v0.4.2 h1:v3y/4Yz5jwnvqPKJJ+7Wf93fyWoCB3F5EclWG023MDM=
github.com/containerd/continuity v0.4.2/go.mod h1:F6PTNCKepoxEaXLQp3wDAjygEnImnZ/7o4JzpodfroQ=
github.com/containerd/log v0.1.0 h1:TCJt7ioM2cr/tfR8GPbGf9/VRAX8D2B4PjzCpfX540I=
github.com/containerd/log v0.1.0/go.mod h1:VRRf09a7mHDIRezVKTRCrOq78v577GXq3bSa3EhrzVo=
-github.com/cpuguy83/go-md2man/v2 v2.0.3 h1:qMCsGGgs+MAzDFyp9LpAe1Lqy/fY/qCovCm0qnXZOBM=
-github.com/cpuguy83/go-md2man/v2 v2.0.3/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
+github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
+github.com/cpuguy83/go-md2man/v2 v2.0.5 h1:ZtcqGrnekaHpVLArFSe4HK5DoKx1T0rq2DwVB0alcyc=
+github.com/cpuguy83/go-md2man/v2 v2.0.5/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/creack/pty v1.1.18 h1:n56/Zwd5o6whRC5PMGretI4IdRLlmBXYNjScPaBgsbY=
github.com/creack/pty v1.1.18/go.mod h1:MOBLtS5ELjhRRrroQr9kyvTxUAFNvYEK993ew/Vr4O4=
-github.com/cyphar/filepath-securejoin v0.2.4 h1:Ugdm7cg7i6ZK6x3xDF1oEu1nfkyfH53EtKeQYTC3kyg=
-github.com/cyphar/filepath-securejoin v0.2.4/go.mod h1:aPGpWjXOXUn2NCNjFvBE6aRxGGx79pTxQpKOJNYHHl4=
+github.com/cyphar/filepath-securejoin v0.3.1 h1:1V7cHiaW+C+39wEfpH6XlLBQo3j/PciWFrgfCLS8XrE=
+github.com/cyphar/filepath-securejoin v0.3.1/go.mod h1:F7i41x/9cBF7lzCrVsYs9fuzwRZm4NQsGTBdpp6mETc=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
-github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
+github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/distribution/distribution/v3 v3.0.0-20221208165359-362910506bc2 h1:aBfCb7iqHmDEIp6fBvC/hQUddQfg+3qdYjwzaiP9Hnc=
github.com/distribution/distribution/v3 v3.0.0-20221208165359-362910506bc2/go.mod h1:WHNsWjnIn2V1LYOrME7e8KxSeKunYHsxEm4am0BUtcI=
github.com/distribution/reference v0.5.0 h1:/FUIFXtfc/x2gpa5/VGfiGLuOIdYa1t65IKK2OFGvA0=
github.com/distribution/reference v0.5.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E=
-github.com/docker/cli v24.0.7+incompatible h1:wa/nIwYFW7BVTGa7SWPVyyXU9lgORqUb1xfI36MSkFg=
-github.com/docker/cli v24.0.7+incompatible/go.mod h1:JLrzqnKDaYBop7H2jaqPtU4hHvMKP+vjCwu2uszcLI8=
+github.com/docker/cli v25.0.1+incompatible h1:mFpqnrS6Hsm3v1k7Wa/BO23oz0k121MTbTO1lpcGSkU=
+github.com/docker/cli v25.0.1+incompatible/go.mod h1:JLrzqnKDaYBop7H2jaqPtU4hHvMKP+vjCwu2uszcLI8=
github.com/docker/distribution v2.8.3+incompatible h1:AtKxIZ36LoNK51+Z6RpzLpddBirtxJnzDrHLEKxTAYk=
github.com/docker/distribution v2.8.3+incompatible/go.mod h1:J2gT2udsDAN96Uj4KfcMRqY0/ypR+oyYUYmja8H+y+w=
-github.com/docker/docker v24.0.7+incompatible h1:Wo6l37AuwP3JaMnZa226lzVXGA3F9Ig1seQen0cKYlM=
-github.com/docker/docker v24.0.7+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk=
+github.com/docker/docker v25.0.6+incompatible h1:5cPwbwriIcsua2REJe8HqQV+6WlWc1byg2QSXzBxBGg=
+github.com/docker/docker v25.0.6+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk=
github.com/docker/docker-credential-helpers v0.8.0 h1:YQFtbBQb4VrpoPxhFuzEBPQ9E16qz5SpHLS+uswaCp8=
github.com/docker/docker-credential-helpers v0.8.0/go.mod h1:UGFXcuoQ5TxPiB54nHOZ32AWRqQdECoh/Mg0AlEYb40=
-github.com/docker/go-connections v0.4.0 h1:El9xVISelRB7BuFusrZozjnkIM5YnzCViNKohAFqRJQ=
-github.com/docker/go-connections v0.4.0/go.mod h1:Gbd7IOopHjR8Iph03tsViu4nIes5XhDvyHbTtUxmeec=
+github.com/docker/go-connections v0.5.0 h1:USnMq7hx7gwdVZq1L49hLXaFtUdTADjXGp+uj1Br63c=
+github.com/docker/go-connections v0.5.0/go.mod h1:ov60Kzw0kKElRwhNs9UlUHAE/F9Fe6GLaXnqyDdmEXc=
github.com/docker/go-events v0.0.0-20190806004212-e31b211e4f1c h1:+pKlWGMw7gf6bQ+oDZB4KHQFypsfjYlq/C4rfL7D3g8=
github.com/docker/go-events v0.0.0-20190806004212-e31b211e4f1c/go.mod h1:Uw6UezgYA44ePAFQYUehOuCzmy5zmg/+nl2ZfMWGkpA=
github.com/docker/go-metrics v0.0.1 h1:AgB/0SvBxihN0X8OR4SjsblXkbMvalQ8cjmtKQ2rQV8=
github.com/docker/go-metrics v0.0.1/go.mod h1:cG1hvH2utMXtqgqqYE9plW6lDxS3/5ayHzueweSI3Vw=
-github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4=
-github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
github.com/docker/libtrust v0.0.0-20160708172513-aabc10ec26b7 h1:UhxFibDNY/bfvqU5CAUmr9zpesgbU6SWc8/B4mflAE4=
github.com/docker/libtrust v0.0.0-20160708172513-aabc10ec26b7/go.mod h1:cyGadeNEkKy96OOhEzfZl+yxihPEzKnqJwvfuSUqbZE=
-github.com/emicklei/go-restful/v3 v3.11.1 h1:S+9bSbua1z3FgCnV0KKOSSZ3mDthb5NyEPL5gEpCvyk=
-github.com/emicklei/go-restful/v3 v3.11.1/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc=
-github.com/evanphx/json-patch v5.7.0+incompatible h1:vgGkfT/9f8zE6tvSCe74nfpAVDQ2tG6yudJd8LBksgI=
-github.com/evanphx/json-patch v5.7.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk=
-github.com/evanphx/json-patch/v5 v5.8.0 h1:lRj6N9Nci7MvzrXuX6HFzU8XjmhPiXPlsKEy1u0KQro=
-github.com/evanphx/json-patch/v5 v5.8.0/go.mod h1:VNkHZ/282BpEyt/tObQO8s5CMPmYYq14uClGH4abBuQ=
+github.com/emicklei/go-restful/v3 v3.11.2 h1:1onLa9DcsMYO9P+CXaL0dStDqQ2EHHXLiz+BtnqkLAU=
+github.com/emicklei/go-restful/v3 v3.11.2/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc=
+github.com/evanphx/json-patch v5.9.0+incompatible h1:fBXyNpNMuTTDdquAq/uisOr2lShz4oaXpDTX2bLe7ls=
+github.com/evanphx/json-patch v5.9.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk=
+github.com/evanphx/json-patch/v5 v5.9.0 h1:kcBlZQbplgElYIlo/n1hJbls2z/1awpXxpRi0/FOJfg=
+github.com/evanphx/json-patch/v5 v5.9.0/go.mod h1:VNkHZ/282BpEyt/tObQO8s5CMPmYYq14uClGH4abBuQ=
github.com/exponent-io/jsonpath v0.0.0-20210407135951-1de76d718b3f h1:Wl78ApPPB2Wvf/TIe2xdyJxTlb6obmF18d8QdkxNDu4=
github.com/exponent-io/jsonpath v0.0.0-20210407135951-1de76d718b3f/go.mod h1:OSYXu++VVOHnXeitef/D8n/6y4QV8uLHSFXX4NeXMGc=
github.com/fatih/color v1.16.0 h1:zmkK9Ngbjj+K0yRhTVONQh1p/HknKYSlNT+vZCzyokM=
github.com/fatih/color v1.16.0/go.mod h1:fL2Sau1YI5c0pdGEVCbKQbLXB6edEj1ZgiY4NijnWvE=
github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
-github.com/foxcpp/go-mockdns v1.0.0 h1:7jBqxd3WDWwi/6WhDvacvH1XsN3rOLXyHM1uhvIx6FI=
-github.com/foxcpp/go-mockdns v1.0.0/go.mod h1:lgRN6+KxQBawyIghpnl5CezHFGS9VLzvtVlwxvzXTQ4=
+github.com/foxcpp/go-mockdns v1.1.0 h1:jI0rD8M0wuYAxL7r/ynTrCQQq0BVqfB99Vgk7DlmewI=
+github.com/foxcpp/go-mockdns v1.1.0/go.mod h1:IhLeSFGed3mJIAXPH2aiRQB+kqz7oqu8ld2qVbOu7Wk=
github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8=
github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0=
github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA=
github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM=
+github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E=
+github.com/fxamacker/cbor/v2 v2.7.0/go.mod h1:pxXPTn3joSm21Gbwsv0w9OSA2y1HFR9qXEeXQVeNoDQ=
github.com/go-errors/errors v1.5.1 h1:ZwEMSLRCapFLflTpT7NKaAc7ukJ8ZPEjzlxt8rPN8bk=
github.com/go-errors/errors v1.5.1/go.mod h1:sIVyrIiJhuEF+Pj9Ebtd6P/rEYROXFi3BopGUQ5a5Og=
github.com/go-gorp/gorp/v3 v3.1.0 h1:ItKF/Vbuj31dmV4jxA1qblpSwkl9g1typ24xoe70IGs=
@@ -123,9 +125,8 @@ github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2
github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE=
github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk=
github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
-github.com/go-logr/logr v1.3.0/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
-github.com/go-logr/logr v1.4.1 h1:pKouT5E8xu9zeFC39JXRDukb6JFQPXM5p5I91188VAQ=
-github.com/go-logr/logr v1.4.1/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
+github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY=
+github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
github.com/go-logr/zapr v1.3.0 h1:XGdV8XW8zdwFiwOA2Dryh1gj2KRQyOOoNmBy4EplIcQ=
@@ -134,13 +135,13 @@ github.com/go-openapi/jsonpointer v0.20.2 h1:mQc3nmndL8ZBzStEo3JYF8wzmeWffDH4VbX
github.com/go-openapi/jsonpointer v0.20.2/go.mod h1:bHen+N0u1KEO3YlmqOjTT9Adn1RfD91Ar825/PuiRVs=
github.com/go-openapi/jsonreference v0.20.4 h1:bKlDxQxQJgwpUSgOENiMPzCTBVuc7vTdXSSgNeAhojU=
github.com/go-openapi/jsonreference v0.20.4/go.mod h1:5pZJyJP2MnYCpoeoMAql78cCHauHj0V9Lhc506VOpw4=
-github.com/go-openapi/swag v0.22.7 h1:JWrc1uc/P9cSomxfnsFSVWoE1FW6bNbrVPmpQYpCcR8=
-github.com/go-openapi/swag v0.22.7/go.mod h1:Gl91UqO+btAM0plGGxHqJcQZ1ZTy6jbmridBTsDy8A0=
-github.com/go-sql-driver/mysql v1.6.0 h1:BCTh4TKNUYmOmMUcQ3IipzF5prigylS7XXjEkfCHuOE=
-github.com/go-sql-driver/mysql v1.6.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg=
+github.com/go-openapi/swag v0.22.9 h1:XX2DssF+mQKM2DHsbgZK74y/zj4mo9I99+89xUmuZCE=
+github.com/go-openapi/swag v0.22.9/go.mod h1:3/OXnFfnMAwBD099SwYRk7GD3xOrr1iL7d/XNLXVVwE=
+github.com/go-sql-driver/mysql v1.8.1 h1:LedoTUt/eveggdHS9qUFC1EFSa8bU2+1pZjSRpvNJ1Y=
+github.com/go-sql-driver/mysql v1.8.1/go.mod h1:wEBSXgmK//2ZFJyE+qWnIsVGmvmEKlqwuVSjsCm7DZg=
github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=
-github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
-github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls=
+github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI=
+github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8=
github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y=
github.com/gobwas/glob v0.2.3/go.mod h1:d3Ez4x06l9bZtSvzIay5+Yzi0fmZzPgnTbPcKjJAkT8=
github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
@@ -151,10 +152,8 @@ github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4er
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
-github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
-github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
-github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg=
-github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
+github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
+github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
github.com/gomodule/redigo v1.8.2 h1:H5XSIre1MB5NbPYFp+i1NBbb5qN1W8Y8YAQoAYbkm8k=
github.com/gomodule/redigo v1.8.2/go.mod h1:P9dn9mFrCBvWhGE1wpxx6fgq7BAeLBk+UUUzlpkBYO0=
github.com/google/btree v1.1.2 h1:xf4v41cLI2Z6FxbKm+8Bu+m8ifhj15JuZ9sa0jZCMUU=
@@ -162,25 +161,22 @@ github.com/google/btree v1.1.2/go.mod h1:qOPhT0dTNdNzV6Z/lhRX0YXUafgPLFUh+gZMl76
github.com/google/gnostic-models v0.6.8 h1:yo/ABAfM5IMRsS1VnXjTBvUb61tFIHozhlYvRgGre9I=
github.com/google/gnostic-models v0.6.8/go.mod h1:5n7qKqH0f5wFt+aWF8CW6pZLLNOfYuF5OpfBSENuI8U=
github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
-github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0=
github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
-github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1 h1:K6RDEckDVWvDI9JAJYCmNdQXq6neHJOYx3V6jnqNEec=
-github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
+github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db h1:097atOisP2aRj7vFgYQBbFN4U4JNXUNYpxael3UzMyo=
+github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db/go.mod h1:vavhavw2zAxS5dIdcRluK6cSGGPlZynqzFM8NdvU144=
github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 h1:El6M4kTTCOh6aBiKaUGG7oYTSPP8MxqL4YI3kZKwcP4=
github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510/go.mod h1:pupxD2MaaD3pAXIBCelhxNneeOaAeabZDe5s4K6zSpQ=
-github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/gorilla/handlers v1.5.1 h1:9lRY6j8DEeeBT10CvO9hGW0gmky0BprnvDI5vfhUHH4=
github.com/gorilla/handlers v1.5.1/go.mod h1:t8XrUpc4KVXb7HGyJ4/cEnwQiaxrX/hz1Zv/4g96P1Q=
github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY=
github.com/gorilla/mux v1.8.1/go.mod h1:AKf9I4AEqPTmMytcMc0KkNouC66V3BtZ4qD5fmWSiMQ=
-github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
github.com/gorilla/websocket v1.5.1 h1:gmztn0JnHVt9JZquRuzLw3g4wouNVzKL15iLr/zn/QY=
github.com/gorilla/websocket v1.5.1/go.mod h1:x3kM2JMyaluk02fnUJpQuwD2dCS5NDG2ZHL0uE0tcaY=
github.com/gosuri/uitable v0.0.4 h1:IG2xLKRvErL3uhY6e1BylFzG+aJiwQviDDTfOKeKTpY=
@@ -194,17 +190,14 @@ github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+l
github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM=
github.com/hashicorp/golang-lru v0.5.4 h1:YDjusn29QI/Das2iO9M0BHnIbxPeyuCHsjMW+lJfyTc=
github.com/hashicorp/golang-lru v0.5.4/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uGBxy+E8yxSoD4=
-github.com/huandu/xstrings v1.3.3/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
-github.com/huandu/xstrings v1.4.0 h1:D17IlohoQq4UcpqD7fDk80P7l+lwAmlFaBHgOipl2FU=
-github.com/huandu/xstrings v1.4.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
-github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
-github.com/imdario/mergo v0.3.11/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH0dnCYA=
+github.com/huandu/xstrings v1.5.0 h1:2ag3IFq9ZDANvthTwTiqSSZLjDc+BedvHPAp5tJy2TI=
+github.com/huandu/xstrings v1.5.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4=
github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY=
github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
-github.com/jmoiron/sqlx v1.3.5 h1:vFFPA71p1o5gAeqtEAwLU4dnX2napprKtHr7PYIcN3g=
-github.com/jmoiron/sqlx v1.3.5/go.mod h1:nRVWtLre0KfCLJvgxzCsLVMogSvQ1zNJtpYr2Ccp0mQ=
+github.com/jmoiron/sqlx v1.4.0 h1:1PLqN7S1UYp5t4SrVVnt4nUVNemrDAtxlulVe+Qgm3o=
+github.com/jmoiron/sqlx v1.4.0/go.mod h1:ZrZ7UsYB/weZdl2Bxg6jCRO9c3YHl8r3ahlKmRT4JLY=
github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
@@ -214,19 +207,20 @@ github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHm
github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w=
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
-github.com/klauspost/compress v1.17.4 h1:Ej5ixsIri7BrIjBkRZLTo6ghwrEtHFk7ijlczPW4fZ4=
-github.com/klauspost/compress v1.17.4/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM=
+github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc=
+github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0=
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
+github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
+github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
github.com/lann/builder v0.0.0-20180802200727-47ae307949d0 h1:SOEGU9fKiNWd/HOJuq6+3iTQz8KNCLtVX6idSoTLdUw=
github.com/lann/builder v0.0.0-20180802200727-47ae307949d0/go.mod h1:dXGbAdH5GtBTC4WfIxhKZfyBF/HBFgRZSWwZ9g/He9o=
github.com/lann/ps v0.0.0-20150810152359-62de8c46ede0 h1:P6pPBnrTSX3DEVR4fDembhRWSsG5rVo6hYhAB/ADZrk=
github.com/lann/ps v0.0.0-20150810152359-62de8c46ede0/go.mod h1:vmVJ0l/dxyfGW6FmdpVm2joNMFikkuWg0EoCKLGUMNw=
-github.com/lib/pq v1.2.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo=
github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw=
github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=
github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de h1:9TO3cAIGXtEhnIaL+V+BEER86oLrvS+kWobKpbJuye0=
@@ -240,30 +234,23 @@ github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWE
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/mattn/go-runewidth v0.0.15 h1:UNAjwbU9l54TA3KzvqLGxwWjHmMgBUVhBiTjelZgg3U=
github.com/mattn/go-runewidth v0.0.15/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
-github.com/mattn/go-sqlite3 v1.14.6/go.mod h1:NyWgC/yNuGj7Q9rpYnZvas74GogHl5/Z4A/KQRfk6bU=
-github.com/mattn/go-sqlite3 v1.14.15 h1:vfoHhTN1af61xCRSWzFIWzx2YskyMTwHLrExkBOjvxI=
-github.com/mattn/go-sqlite3 v1.14.15/go.mod h1:2eHXhiwb8IkHr+BDWZGa96P6+rkvnG63S2DGjv9HUNg=
+github.com/mattn/go-sqlite3 v1.14.22 h1:2gZY6PC6kBnID23Tichd1K+Z0oS6nE/XwU+Vz/5o4kU=
+github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=
-github.com/matttproud/golang_protobuf_extensions/v2 v2.0.0 h1:jWpvCLoY8Z/e3VKvlsiIGKtc+UG6U5vzxaoagmhXfyg=
-github.com/matttproud/golang_protobuf_extensions/v2 v2.0.0/go.mod h1:QUyp042oQthUoa9bqDv0ER0wrtXnBruoNd7aNjkbP+k=
-github.com/miekg/dns v1.1.25 h1:dFwPR6SfLtrSwgDcIq2bcU/gVutB4sNApq2HBdqcakg=
-github.com/miekg/dns v1.1.25/go.mod h1:bPDLeHnStXmXAq1m/Ch/hvfNHr14JKNPMBo3VZKjuso=
-github.com/mitchellh/copystructure v1.0.0/go.mod h1:SNtv71yrdKgLRyLFxmLdkAbkKEFWgYaq1OVrnRcwhnw=
+github.com/miekg/dns v1.1.57 h1:Jzi7ApEIzwEPLHWRcafCN9LZSBbqQpxjt/wpgvg7wcM=
+github.com/miekg/dns v1.1.57/go.mod h1:uqRjCRUuEAA6qsOiJvDd+CFo/vW+y5WR6SNmHE55hZk=
github.com/mitchellh/copystructure v1.2.0 h1:vpKXTN4ewci03Vljg/q9QvCGUDttBOGBIa15WveJJGw=
github.com/mitchellh/copystructure v1.2.0/go.mod h1:qLl+cE2AmVv+CoeAwDPye/v+N2HKCj9FbZEVFJRxO9s=
github.com/mitchellh/go-wordwrap v1.0.1 h1:TLuKupo69TCn6TQSyGxwI1EblZZEsQ0vMlAFQflz0v0=
github.com/mitchellh/go-wordwrap v1.0.1/go.mod h1:R62XHJLzvMFRBbcrT7m7WgmE1eOyTSsCt+hzestvNj0=
-github.com/mitchellh/hashstructure v1.1.0 h1:P6P1hdjqAAknpY/M1CGipelZgp+4y9ja9kmUZPXP+H0=
-github.com/mitchellh/hashstructure v1.1.0/go.mod h1:xUDAozZz0Wmdiufv0uyhnHkUTN6/6d8ulp4AwfLKrmA=
-github.com/mitchellh/reflectwalk v1.0.0/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw=
github.com/mitchellh/reflectwalk v1.0.2 h1:G2LzWKi524PWgd3mLHV8Y5k7s6XUvT0Gef6zxSIeXaQ=
github.com/mitchellh/reflectwalk v1.0.2/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw=
-github.com/mittwald/go-helm-client v0.12.7 h1:+rhDcLP96d+IavqTORDsY8tc6BNzukwzka85HX5hBzw=
-github.com/mittwald/go-helm-client v0.12.7/go.mod h1:ipUXxirGzXl7hfMV4lY3SYXhwudYYb6T/dnNfKHDQiM=
+github.com/mittwald/go-helm-client v0.12.14 h1:az3GJ4kRmFK609Ic3iHXveNtg92n9jWG0YpKKTIK4oo=
+github.com/mittwald/go-helm-client v0.12.14/go.mod h1:2VogAupgnV7FiuoPqtpCYKS/RrMh9fFA3/pD/OmTaLc=
github.com/moby/locker v1.0.1 h1:fOXqR41zeveg4fFODix+1Ch4mj/gT0NE1XJbp/epuBg=
github.com/moby/locker v1.0.1/go.mod h1:S7SDdo5zpBK84bzzVlKr2V0hz+7x9hWbYC/kq7oQppc=
-github.com/moby/spdystream v0.2.0 h1:cjW1zVyyoiM0T7b6UoySUFqzXMoqRckQtXwGPiBhOM8=
-github.com/moby/spdystream v0.2.0/go.mod h1:f7i0iNDQJ059oMTcWxx8MA/zKFIuD/lY+0GqbN2Wy8c=
+github.com/moby/spdystream v0.4.0 h1:Vy79D6mHeJJjiPdFEL2yku1kl0chZpJfZcPpb16BRl8=
+github.com/moby/spdystream v0.4.0/go.mod h1:xBAYlnt/ay+11ShkdFKNAG7LsyK/tmNBVvVOwrfMgdI=
github.com/moby/sys/mountinfo v0.6.2 h1:BzJjoreD5BMFNmD9Rus6gdd1pLuecOFPt8wC+Vygl78=
github.com/moby/sys/mountinfo v0.6.2/go.mod h1:IJb6JQeOklcdMU9F5xQ8ZALD+CUr5VlGpwtX+VE0rpI=
github.com/moby/term v0.5.0 h1:xt8Q1nalod/v7BqbG21f8mQPqH+xAaC9C3N3wfWbVP0=
@@ -277,27 +264,27 @@ github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9G
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
github.com/monochromegane/go-gitignore v0.0.0-20200626010858-205db1a8cc00 h1:n6/2gBQ3RWajuToeY6ZtZTIKv2v7ThUy5KKusIT0yc0=
github.com/monochromegane/go-gitignore v0.0.0-20200626010858-205db1a8cc00/go.mod h1:Pm3mSP3c5uWn86xMLZ5Sa7JB9GsEZySvHYXCTK4E9q4=
-github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A=
-github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f h1:y5//uYreIhSUg3J1GEMiLbxo1LJaP8RfCpH6pymGZus=
github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f/go.mod h1:ZdcZmHo+o7JKHSa8/e818NopupXU1YMK5fe1lsApnBw=
-github.com/onsi/ginkgo/v2 v2.14.0 h1:vSmGj2Z5YPb9JwCWT6z6ihcUvDhuXLc3sJiqd3jMKAY=
-github.com/onsi/ginkgo/v2 v2.14.0/go.mod h1:JkUdW7JkN0V6rFvsHcJ478egV3XH9NxpD27Hal/PhZw=
-github.com/onsi/gomega v1.30.0 h1:hvMK7xYz4D3HapigLTeGdId/NcfQx1VHMJc60ew99+8=
-github.com/onsi/gomega v1.30.0/go.mod h1:9sxs+SwGrKI0+PWe4Fxa9tFQQBG5xSsSbMXOI8PPpoQ=
+github.com/olareg/olareg v0.1.1 h1:Ui7q93zjcoF+U9U71sgqgZWByDoZOpqHitUXEu2xV+g=
+github.com/olareg/olareg v0.1.1/go.mod h1:w8NP4SWrHHtxsFaUiv1lnCnYPm4sN1seCd2h7FK/dc0=
+github.com/onsi/ginkgo/v2 v2.22.0 h1:Yed107/8DjTr0lKCNt7Dn8yQ6ybuDRQoMGrNFKzMfHg=
+github.com/onsi/ginkgo/v2 v2.22.0/go.mod h1:7Du3c42kxCUegi0IImZ1wUQzMBVecgIHjR1C+NkhLQo=
+github.com/onsi/gomega v1.35.1 h1:Cwbd75ZBPxFSuZ6T+rN/WCb/gOc6YgFBXLlZLhC7Ds4=
+github.com/onsi/gomega v1.35.1/go.mod h1:PvZbdDc8J6XJEpDK4HCuRBm8a6Fzp9/DmhC9C7yFlog=
github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
-github.com/opencontainers/image-spec v1.1.0-rc5 h1:Ygwkfw9bpDvs+c9E34SdgGOj41dX/cbdlwvlWt0pnFI=
-github.com/opencontainers/image-spec v1.1.0-rc5/go.mod h1:X4pATf0uXsnn3g5aiGIsVnJBR4mxhKzfwmvK/B2NTm8=
-github.com/openshift/api v0.0.0-20240306072808-610cbc77dbab h1:L3k198pZJhluvJZzD/ySpkKqJzQh5MgWlsT4U6NOYUY=
-github.com/openshift/api v0.0.0-20240306072808-610cbc77dbab/go.mod h1:CxgbWAlvu2iQB0UmKTtRu1YfepRg1/vJ64n2DlIEVz4=
-github.com/openshift/client-go v0.0.0-20240215090359-b71f6f2731f5 h1:iQ2Y1LUX7FbBm6ddaSVz/KeWXUMkqrBP/C5yt0DvBgI=
-github.com/openshift/client-go v0.0.0-20240215090359-b71f6f2731f5/go.mod h1:Y5Hp789dTrF6Fq8cA5YQlpwffmlLy8mc2un/CY0cg7Q=
-github.com/operator-framework/api v0.17.6 h1:E6+vlvYUKafvoXYtCuHlDZrXX4vl8AT+r93OxNlzjpU=
-github.com/operator-framework/api v0.17.6/go.mod h1:l/cuwtPxkVUY7fzYgdust2m9tlmb8I4pOvbsUufRb24=
+github.com/opencontainers/image-spec v1.1.0 h1:8SG7/vwALn54lVB/0yZ/MMwhFrPYtpEHQb2IpWsCzug=
+github.com/opencontainers/image-spec v1.1.0/go.mod h1:W4s4sFTMaBeK1BQLXbG4AdM2szdn85PY75RI83NrTrM=
+github.com/openshift/api v0.0.0-20241001152557-e415140e5d5f h1:ya1OmyZm3LIIxI3U9VE9Nyx3ehCHgBwxyFUPflYPWls=
+github.com/openshift/api v0.0.0-20241001152557-e415140e5d5f/go.mod h1:Shkl4HanLwDiiBzakv+con/aMGnVE2MAGvoKp5oyYUo=
+github.com/openshift/client-go v0.0.0-20241001162912-da6d55e4611f h1:FRc0bVNWprihWS0GqQWzb3dY4dkCwpOP3mDw5NwSoR4=
+github.com/openshift/client-go v0.0.0-20241001162912-da6d55e4611f/go.mod h1:KiZi2mJRH1TOJ3FtBDYS6YvUL30s/iIXaGSUrSa36mo=
+github.com/operator-framework/api v0.27.0 h1:OrVaGKZJvbZo58HTv2guz7aURkhVKYhFqZ/6VpifiXI=
+github.com/operator-framework/api v0.27.0/go.mod h1:lg2Xx+S8NQWGYlEOvFwQvH46E5EK5IrAIL7HWfAhciM=
github.com/peterbourgon/diskv v2.0.1+incompatible h1:UBdAOUP5p4RWqPBg048CAvpKN+vxiaj6gdUUzhl4XmI=
github.com/peterbourgon/diskv v2.0.1+incompatible/go.mod h1:uqqh8zWWbv1HBMNONnaR/tNboyR3/BZd58JJSHlUSCU=
github.com/phayes/freeport v0.0.0-20220201140144-74d24b5ae9f5 h1:Ii+DKncOVM8Cu1Hc+ETb5K+23HdAMvESYE3ZJ5b5cMI=
@@ -305,54 +292,53 @@ github.com/phayes/freeport v0.0.0-20220201140144-74d24b5ae9f5/go.mod h1:iIss55rK
github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
-github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=
+github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/poy/onpar v1.1.2 h1:QaNrNiZx0+Nar5dLgTVp5mXkyoVFIbepjyEoGSnhbAY=
github.com/poy/onpar v1.1.2/go.mod h1:6X8FLNoxyr9kkmnlqpK6LSoiOtrO6MICtWwEuWkLjzg=
-github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.65.2 h1:DZzMjhqxx3+kAPpwWdng3ktO6NErh1wGuW5tXJamak8=
-github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.65.2/go.mod h1:xcfWyzl4BpEe5jnVkw7D1yCHU7GHjfjCERJsEfGbpSU=
+github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.78.1 h1:Fm9Z+FabnB+6EoGq15j+pyLmaK6hYrYOpBlTzOLTQ+E=
+github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.78.1/go.mod h1:SvsRXw4m1F2vk7HquU5h475bFpke27mIUswfyw9u3ug=
github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw=
github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo=
github.com/prometheus/client_golang v1.1.0/go.mod h1:I1FGZT9+L76gKKOs5djB6ezCbFQP1xR9D75/vuwEF3g=
-github.com/prometheus/client_golang v1.18.0 h1:HzFfmkOzH5Q8L8G+kSJKUx5dtG87sewO+FoDDqP5Tbk=
-github.com/prometheus/client_golang v1.18.0/go.mod h1:T+GXkCk5wSJyOqMIzVgvvjFDlkOQntgjkJWKrN5txjA=
+github.com/prometheus/client_golang v1.20.5 h1:cxppBPuYhUnsO6yo/aoRol4L7q7UFfdm+bR9r+8l63Y=
+github.com/prometheus/client_golang v1.20.5/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE=
github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
-github.com/prometheus/client_model v0.5.0 h1:VQw1hfvPvk3Uv6Qf29VrPF32JB6rtbgI6cYPYQjL0Qw=
-github.com/prometheus/client_model v0.5.0/go.mod h1:dTiFglRmd66nLR9Pv9f0mZi7B7fk5Pm3gvsjB5tr+kI=
+github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E=
+github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY=
github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4=
github.com/prometheus/common v0.6.0/go.mod h1:eBmuwkDJBwy6iBfxCBob6t6dR6ENT/y+J+Zk0j9GMYc=
-github.com/prometheus/common v0.45.0 h1:2BGz0eBc2hdMDLnO/8n0jeB3oPrt2D08CekT0lneoxM=
-github.com/prometheus/common v0.45.0/go.mod h1:YJmSTw9BoKxJplESWWxlbyttQR4uaEcGyv9MZjVOJsY=
+github.com/prometheus/common v0.55.0 h1:KEi6DK7lXW/m7Ig5i47x0vRzuBsHuvJdi5ee6Y3G1dc=
+github.com/prometheus/common v0.55.0/go.mod h1:2SECS4xJG1kd8XF9IcM1gMX6510RAEL65zxzNImwdc8=
github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk=
github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA=
github.com/prometheus/procfs v0.0.3/go.mod h1:4A/X28fw3Fc593LaREMrKMqOKvUAntwMDaekg4FpcdQ=
-github.com/prometheus/procfs v0.12.0 h1:jluTpSng7V9hY0O2R9DzzJHYb2xULk9VTR1V1R/k6Bo=
-github.com/prometheus/procfs v0.12.0/go.mod h1:pcuDEFsWDnvcgNzo4EEweacyhjeA9Zk3cnaOZAZEfOo=
-github.com/regclient/regclient v0.4.8 h1:h4uZRR4OT4oO+50qWu4bj+rzqRs/JwD3erb6lHIkYK4=
-github.com/regclient/regclient v0.4.8/go.mod h1:UC6i29I09h9KHyABGLGvsvGi7KYRY8ZKLyt7fzvW4oE=
+github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc=
+github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk=
+github.com/regclient/regclient v0.7.2 h1:vcldDAwBMLtighYVMeb6qNt5+0hKg3AN2IkCc0JIJNM=
+github.com/regclient/regclient v0.7.2/go.mod h1:QlA7W9/pvmbblOXM4d49JgfuOTwVXcUMKt3bFuOSVIQ=
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
github.com/rivo/uniseg v0.4.4 h1:8TfxU8dW6PdqD27gjM8MVNuicgxIjxpm4K7x4jp8sis=
github.com/rivo/uniseg v0.4.4/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
-github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M=
-github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUzkipdSkR5nkCZA=
-github.com/rubenv/sql-migrate v1.6.0 h1:IZpcTlAx/VKXphWEpwWJ7BaMq05tYtE80zYz+8a5Il8=
-github.com/rubenv/sql-migrate v1.6.0/go.mod h1:m3ilnKP7sNb4eYkLsp6cGdPOl4OBcXM6rcbzU+Oqc5k=
+github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8=
+github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4=
+github.com/rubenv/sql-migrate v1.7.0 h1:HtQq1xyTN2ISmQDggnh0c9U3JlP8apWh8YO2jzlXpTI=
+github.com/rubenv/sql-migrate v1.7.0/go.mod h1:S4wtDEG1CKn+0ShpTtzWhFpHHI5PvCUtiGI+C+Z2THE=
github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
-github.com/sergi/go-diff v1.1.0 h1:we8PVUC3FE2uYfodKH/nBHMSetSfHDR6scGdBi+erh0=
-github.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM=
-github.com/shopspring/decimal v1.2.0/go.mod h1:DKyhrW/HYNuLGql+MJL6WCR6knT2jwCFRcu2hWCYk4o=
-github.com/shopspring/decimal v1.3.1 h1:2Usl1nmF/WZucqkFZhnfFYxxxu8LG21F6nPQBE5gKV8=
-github.com/shopspring/decimal v1.3.1/go.mod h1:DKyhrW/HYNuLGql+MJL6WCR6knT2jwCFRcu2hWCYk4o=
+github.com/sergi/go-diff v1.2.0 h1:XU+rvMAioB0UC3q1MFrIQy4Vo5/4VsRDQQXHsEya6xQ=
+github.com/sergi/go-diff v1.2.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM=
+github.com/shopspring/decimal v1.4.0 h1:bxl37RwXBklmTi0C79JfXCEBD1cqqHt0bbgBAGFp81k=
+github.com/shopspring/decimal v1.4.0/go.mod h1:gawqmDU56v4yIKSwfBSFip1HdCCXN8/+DMd9qYNcwME=
github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=
github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
-github.com/spf13/cast v1.3.1/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE=
-github.com/spf13/cast v1.6.0 h1:GEiTHELF+vaR5dhz3VqZfFSzZjYbgeKDpBxQVS4GYJ0=
-github.com/spf13/cast v1.6.0/go.mod h1:ancEpBxwJDODSW/UG4rDrAqiKolqNNh2DX3mk86cAdo=
-github.com/spf13/cobra v1.8.0 h1:7aJaZx1B85qltLMc546zn58BxxfZdR/W22ej9CFoEf0=
-github.com/spf13/cobra v1.8.0/go.mod h1:WXLWApfZ71AjXPya3WOlMsY9yMs7YeiHhFVlvLyhcho=
+github.com/spf13/cast v1.7.0 h1:ntdiHjuueXFgm5nzDRdOS4yfT43P5Fnud6DH50rz/7w=
+github.com/spf13/cast v1.7.0/go.mod h1:ancEpBxwJDODSW/UG4rDrAqiKolqNNh2DX3mk86cAdo=
+github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM=
+github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y=
github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
@@ -361,13 +347,16 @@ github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY=
github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
-github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
-github.com/urfave/cli/v2 v2.27.1 h1:8xSQ6szndafKVRmfyeUMxkNUJQMjL1F2zmsZ+qHpfho=
-github.com/urfave/cli/v2 v2.27.1/go.mod h1:8qnjx1vcq5s2/wpsqoZFndg2CE5tNFyrTvS6SinrnYQ=
+github.com/ulikunitz/xz v0.5.12 h1:37Nm15o69RwBkXM0J6A5OlE67RZTfzUxTj8fB3dfcsc=
+github.com/ulikunitz/xz v0.5.12/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14=
+github.com/urfave/cli/v2 v2.27.5 h1:WoHEJLdsXr6dDWoJgMq/CboDmyY/8HMMH1fTECbih+w=
+github.com/urfave/cli/v2 v2.27.5/go.mod h1:3Sevf16NykTbInEnD0yKkjDAeZDS0A6bzhBH5hrMvTQ=
+github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM=
+github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg=
github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb h1:zGWFAtiMcyryUHoUjUJX0/lt1H2+i2Ka2n+D3DImSNo=
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
@@ -377,140 +366,111 @@ github.com/xeipuuv/gojsonschema v1.2.0 h1:LhYJRs+L4fBtjZUfuSZIKGeVu0QRy8e5Xi7D17
github.com/xeipuuv/gojsonschema v1.2.0/go.mod h1:anYRn/JVcOK2ZgGU+IjEV4nwlhoK5sQluxsYJ78Id3Y=
github.com/xlab/treeprint v1.2.0 h1:HzHnuAF1plUN2zGlAFHbSQP2qJ0ZAD3XF5XD7OesXRQ=
github.com/xlab/treeprint v1.2.0/go.mod h1:gj5Gd3gPdKtR1ikdDK6fnFLdmIS0X30kTTuNd/WEJu0=
-github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 h1:bAn7/zixMGCfxrRTfdpNzjtPYqr8smhKouy9mxVdGPU=
-github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673/go.mod h1:N3UwUGtsrSj3ccvlPHLoLsHnpR27oXr4ZE984MbSER8=
+github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 h1:gEOO8jv9F4OT7lGCjxCBTO/36wtF6j2nSip77qHd4x4=
+github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1/go.mod h1:Ohn+xnUBiLI6FVj/9LpzZWtj1/D6lUovWYBkxHVV3aM=
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
-github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
github.com/yvasiyarov/go-metrics v0.0.0-20140926110328-57bccd1ccd43 h1:+lm10QQTNSBd8DVTNGHx7o/IKu9HYDvLMffDhbyLccI=
github.com/yvasiyarov/go-metrics v0.0.0-20140926110328-57bccd1ccd43/go.mod h1:aX5oPXxHm3bOH+xeAttToC8pqch2ScQN/JoXYupl6xs=
github.com/yvasiyarov/gorelic v0.0.0-20141212073537-a9bba5b9ab50 h1:hlE8//ciYMztlGpl/VA+Zm1AcTPHYkHJPbHqE6WJUXE=
github.com/yvasiyarov/gorelic v0.0.0-20141212073537-a9bba5b9ab50/go.mod h1:NUSPSUX/bi6SeDMUh6brw0nXpxHnc96TguQh0+r/ssA=
github.com/yvasiyarov/newrelic_platform_go v0.0.0-20140908184405-b21fdbd4370f h1:ERexzlUfuTvpE74urLSbIQW0Z/6hF9t8U4NsJLaioAY=
github.com/yvasiyarov/newrelic_platform_go v0.0.0-20140908184405-b21fdbd4370f/go.mod h1:GlGEuHIJweS1mbCqG+7vt2nvWLzLLnRHbXz5JKd/Qbg=
-gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20230818092907-09424fdc8884 h1:V0LUbfm4kVA1CPG8FgG9AGZqa3ykE5U12Gd3PZgoItA=
-gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20230818092907-09424fdc8884/go.mod h1:/x5Ky1ZJNyCjDkgSL1atII0EFKQF5WaIHKeP5nkaQfk=
go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0=
go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo=
-go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.46.1 h1:aFJWCqJMNjENlcleuuOkGAPH82y0yULBScfXcIEdS24=
-go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.46.1/go.mod h1:sEGXWArGqc3tVa+ekntsN65DmVbVeW+7lTKTjZF3/Fo=
-go.opentelemetry.io/otel v1.21.0 h1:hzLeKBZEL7Okw2mGzZ0cc4k/A7Fta0uoPgaJCr8fsFc=
-go.opentelemetry.io/otel v1.21.0/go.mod h1:QZzNPQPm1zLX4gZK4cMi+71eaorMSGT3A4znnUvNNEo=
-go.opentelemetry.io/otel/metric v1.21.0 h1:tlYWfeo+Bocx5kLEloTjbcDwBuELRrIFxwdQ36PlJu4=
-go.opentelemetry.io/otel/metric v1.21.0/go.mod h1:o1p3CA8nNHW8j5yuQLdc1eeqEaPfzug24uvsyIEJRWM=
-go.opentelemetry.io/otel/trace v1.21.0 h1:WD9i5gzvoUPuXIXH24ZNBudiarZDKuekPqi/E8fpfLc=
-go.opentelemetry.io/otel/trace v1.21.0/go.mod h1:LGbsEB0f9LGjN+OZaQQ26sohbOmiMR+BaslueVtS/qQ=
+go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.53.0 h1:4K4tsIXefpVJtvA/8srF4V4y0akAoPHkIslgAkjixJA=
+go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.53.0/go.mod h1:jjdQuTGVsXV4vSs+CJ2qYDeDPf9yIJV23qlIzBm73Vg=
+go.opentelemetry.io/otel v1.28.0 h1:/SqNcYk+idO0CxKEUOtKQClMK/MimZihKYMruSMViUo=
+go.opentelemetry.io/otel v1.28.0/go.mod h1:q68ijF8Fc8CnMHKyzqL6akLO46ePnjkgfIMIjUIX9z4=
+go.opentelemetry.io/otel/metric v1.28.0 h1:f0HGvSl1KRAU1DLgLGFjrwVyismPlnuU6JD6bOeuA5Q=
+go.opentelemetry.io/otel/metric v1.28.0/go.mod h1:Fb1eVBFZmLVTMb6PPohq3TO9IIhUisDsbJoL/+uQW4s=
+go.opentelemetry.io/otel/trace v1.28.0 h1:GhQ9cUuQGmNDd5BTCP2dAvv75RdMxEfTmYejp+lkx9g=
+go.opentelemetry.io/otel/trace v1.28.0/go.mod h1:jPyXzNPg6da9+38HEwElrQiHlVMTnVfM3/yv2OlIHaI=
go.starlark.net v0.0.0-20231121155337-90ade8b19d09 h1:hzy3LFnSN8kuQK8h9tHl4ndF6UruMj47OqwqsS+/Ai4=
go.starlark.net v0.0.0-20231121155337-90ade8b19d09/go.mod h1:LcLNIzVOMp4oV+uusnpk+VU+SzXaJakUuBjoCSWH5dM=
go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0=
go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y=
-go.uber.org/zap v1.26.0 h1:sI7k6L95XOKS281NhVKOFCUNIvv9e0w4BF8N3u+tCRo=
-go.uber.org/zap v1.26.0/go.mod h1:dtElttAiwGvoJ/vj4IwHBS/gXsEu/pZ50mUIRWuG0so=
+go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8=
+go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E=
golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
-golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
-golang.org/x/crypto v0.3.0/go.mod h1:hebNnKkNXi2UzZN1eVRvBB7co0a+JxK6XbPiWVs/3J4=
-golang.org/x/crypto v0.17.0 h1:r8bRNjWL3GshPW3gkd+RpvzWrZAwPS49OmTGZ/uhM4k=
-golang.org/x/crypto v0.17.0/go.mod h1:gCAAfMLgwOJRpTjQ2zCCt2OcSfYMTeZVSRtQlPC7Nq4=
-golang.org/x/exp v0.0.0-20240103183307-be819d1f06fc h1:ao2WRsKSzW6KuUY9IWPwWahcHCgR0s52IfwutMfEbdM=
-golang.org/x/exp v0.0.0-20240103183307-be819d1f06fc/go.mod h1:iRJReGqOEeBhDZGkGbynYwcHlctCvnjTYIamk7uXpHI=
+golang.org/x/crypto v0.28.0 h1:GBDwsMXVQi34v5CCYUm2jkJvu4cbtru2U4TN2PSyQnw=
+golang.org/x/crypto v0.28.0/go.mod h1:rmgy+3RHxRZMyY0jjAJShp2zgEdOqj2AO7U0pYmeQ7U=
+golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 h1:2dVuKD2vS7b0QIHQbpyTISPd0LeHDbnYEryqj5Q1ug8=
+golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56/go.mod h1:M4RDyNAINzryxdtnbRXRL/OHtkFuWGRjvuhBJpk2IlY=
golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
-golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
-golang.org/x/mod v0.15.0 h1:SernR4v+D55NyBH2QiEQrlBAnj1ECL6AGrA5+dPaMY8=
-golang.org/x/mod v0.15.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
+golang.org/x/mod v0.22.0 h1:D4nJWe9zXqHOmWqj4VMOJhvzj7bEZg4wEYa759z1pH4=
+golang.org/x/mod v0.22.0/go.mod h1:6SkKJ3Xj0I0BrPOZoBy3bdMptDDU9oJrpohJ3eWZ1fY=
golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
-golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
-golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
-golang.org/x/net v0.2.0/go.mod h1:KqCZLdyyvdV855qA2rE3GC2aiw5xGR5TEjj8smXukLY=
-golang.org/x/net v0.19.0 h1:zTwKpTd2XuCqf8huc7Fo2iSy+4RHPd10s4KzeTnVr1c=
-golang.org/x/net v0.19.0/go.mod h1:CfAk/cbD4CthTvqiEl8NpboMuiuOYsAr/7NOjZJtv1U=
-golang.org/x/oauth2 v0.15.0 h1:s8pnnxNVzjWyrvYdFUQq5llS1PX2zhPXmccZv99h7uQ=
-golang.org/x/oauth2 v0.15.0/go.mod h1:q48ptWNTY5XWf+JNten23lcvHpLJ0ZSxF5ttTHKVCAM=
+golang.org/x/net v0.30.0 h1:AcW1SDZMkb8IpzCdQUaIq2sP4sZ4zw+55h6ynffypl4=
+golang.org/x/net v0.30.0/go.mod h1:2wGyMJ5iFasEhkwi13ChkO/t1ECNC4X4eBKkVFyYFlU=
+golang.org/x/oauth2 v0.21.0 h1:tsimM75w1tF/uws5rbeHzIWxEqElMehnc+iW793zsZs=
+golang.org/x/oauth2 v0.21.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI=
golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.5.0 h1:60k92dhOjHxJkrqnwsfl8KuaHbn/5dl0lUPUklKo3qE=
-golang.org/x/sync v0.5.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
+golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ=
+golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190801041406-cbf593c0f2f3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.17.0 h1:25cE3gD+tdBA7lp7QfhuV+rJiE9YXTcS3VG1SqssI/Y=
-golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
-golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
-golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
-golang.org/x/term v0.2.0/go.mod h1:TVmDHMZPmdnySmBfhjOoOdhjzdE1h4u1VwSiw2l1Nuc=
-golang.org/x/term v0.15.0 h1:y/Oo/a/q3IXu26lQgl04j/gjuBDOBlx7X6Om1j2CPW4=
-golang.org/x/term v0.15.0/go.mod h1:BDl952bC7+uMoWR75FIrCDx79TPU9oHkTZ9yRbYOrX0=
+golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo=
+golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/term v0.25.0 h1:WtHI/ltw4NvSUig5KARz9h521QvRC8RmF/cuYqifU24=
+golang.org/x/term v0.25.0/go.mod h1:RPyXicDX+6vLxogjjRxjgD2TKtmAO6NZBsBRfrOLu7M=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
-golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
-golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ=
-golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
-golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ=
-golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
+golang.org/x/text v0.19.0 h1:kTxAhCbGbxhK0IwgSKiMO5awPoDQ0RpfiVYBfK860YM=
+golang.org/x/text v0.19.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY=
golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk=
golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
-golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
-golang.org/x/tools v0.16.1 h1:TLyB3WofjdOEepBHAU20JdNC1Zbg87elYofWYAY5oZA=
-golang.org/x/tools v0.16.1/go.mod h1:kYVVN6I1mBNoB1OX+noeBjbRk4IUEPa7JJ+TJMEooJ0=
+golang.org/x/tools v0.26.0 h1:v/60pFQmzmT9ExmjDv2gGIfi3OqfKoEP6I5+umXlbnQ=
+golang.org/x/tools v0.26.0/go.mod h1:TPVVj70c7JJ3WCazhD8OdXcZg/og+b9+tH/KxylGwH0=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
gomodules.xyz/jsonpatch/v2 v2.4.0 h1:Ci3iUJyx9UeRx7CeFN8ARgGbkESwJK+KB9lLcWxY/Zw=
gomodules.xyz/jsonpatch/v2 v2.4.0/go.mod h1:AH3dM2RI6uoBZxn3LVrfvJ3E0/9dG4cSrbuBJT4moAY=
-google.golang.org/appengine v1.6.8 h1:IhEN5q69dyKagZPYMSdIjS2HqprW324FRQZJcGqPAsM=
-google.golang.org/appengine v1.6.8/go.mod h1:1jJ3jBArFh5pcgW8gCtRJnepW8FzD1V44FJffLiz/Ds=
-google.golang.org/genproto/googleapis/rpc v0.0.0-20240102182953-50ed04b92917 h1:6G8oQ016D88m1xAKljMlBOOGWDZkes4kMhgGFlf8WcQ=
-google.golang.org/genproto/googleapis/rpc v0.0.0-20240102182953-50ed04b92917/go.mod h1:xtjpI3tXFPP051KaWnhvxkiubL/6dJ18vLVf7q2pTOU=
-google.golang.org/grpc v1.60.1 h1:26+wFr+cNqSGFcOXcabYC0lUVJVRa2Sb2ortSK7VrEU=
-google.golang.org/grpc v1.60.1/go.mod h1:OlCHIeLYqSSsLi6i49B5QGdzaMZK9+M7LXN2FKz4eGM=
-google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
-google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
-google.golang.org/protobuf v1.32.0 h1:pPC6BG5ex8PDFnkbrGU3EixyhKcQ2aDuBS36lqK/C7I=
-google.golang.org/protobuf v1.32.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20240701130421-f6361c86f094 h1:BwIjyKYGsK9dMCBOorzRri8MQwmi7mT9rGHsCEinZkA=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20240701130421-f6361c86f094/go.mod h1:Ue6ibwXGpU+dqIcODieyLOcgj7z8+IcskoNIgZxtrFY=
+google.golang.org/grpc v1.65.0 h1:bs/cUb4lp1G5iImFFd3u5ixQzweKizoZJAwBNLR42lc=
+google.golang.org/grpc v1.65.0/go.mod h1:WgYC2ypjlB0EiQi6wdKixMqukr6lBc0Vo+oOgjrM5ZQ=
+google.golang.org/protobuf v1.35.1 h1:m3LfL6/Ca+fqnjnlqQXNpFPABW1UD7mjh8KO2mKFytA=
+google.golang.org/protobuf v1.35.1/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE=
gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
-gopkg.in/evanphx/json-patch.v5 v5.7.0 h1:dGKGylPlZ/jus2g1YqhhyzfH0gPy2R8/MYUpW/OslTY=
-gopkg.in/evanphx/json-patch.v5 v5.7.0/go.mod h1:/kvTRh1TVm5wuM6OkHxqXtE/1nUZZpihg29RtuIyfvk=
+gopkg.in/evanphx/json-patch.v4 v4.12.0 h1:n6jtcsulIzXPJaxegRbvFNNrZDjbij7ny3gmSPG+6V4=
+gopkg.in/evanphx/json-patch.v4 v4.12.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M=
gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc=
gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw=
gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
-gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
-gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
@@ -518,40 +478,40 @@ gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gotest.tools/v3 v3.4.0 h1:ZazjZUfuVeZGLAmlKKuyv3IKP5orXcwtOwDQH6YVr6o=
gotest.tools/v3 v3.4.0/go.mod h1:CtbdzLSsqVhDgMtKsx03ird5YTGB3ar27v0u/yKBW5g=
-helm.sh/helm/v3 v3.13.3 h1:0zPEdGqHcubehJHP9emCtzRmu8oYsJFRrlVF3TFj8xY=
-helm.sh/helm/v3 v3.13.3/go.mod h1:3OKO33yI3p4YEXtTITN2+4oScsHeQe71KuzhlZ+aPfg=
-k8s.io/api v0.29.1 h1:DAjwWX/9YT7NQD4INu49ROJuZAAAP/Ijki48GUPzxqw=
-k8s.io/api v0.29.1/go.mod h1:7Kl10vBRUXhnQQI8YR/R327zXC8eJ7887/+Ybta+RoQ=
-k8s.io/apiextensions-apiserver v0.29.1 h1:S9xOtyk9M3Sk1tIpQMu9wXHm5O2MX6Y1kIpPMimZBZw=
-k8s.io/apiextensions-apiserver v0.29.1/go.mod h1:zZECpujY5yTW58co8V2EQR4BD6A9pktVgHhvc0uLfeU=
-k8s.io/apimachinery v0.29.1 h1:KY4/E6km/wLBguvCZv8cKTeOwwOBqFNjwJIdMkMbbRc=
-k8s.io/apimachinery v0.29.1/go.mod h1:6HVkd1FwxIagpYrHSwJlQqZI3G9LfYWRPAkUvLnXTKU=
-k8s.io/apiserver v0.29.1 h1:e2wwHUfEmMsa8+cuft8MT56+16EONIEK8A/gpBSco+g=
-k8s.io/apiserver v0.29.1/go.mod h1:V0EpkTRrJymyVT3M49we8uh2RvXf7fWC5XLB0P3SwRw=
-k8s.io/cli-runtime v0.29.1 h1:By3WVOlEWYfyxhGko0f/IuAOLQcbBSMzwSaDren2JUs=
-k8s.io/cli-runtime v0.29.1/go.mod h1:vjEY9slFp8j8UoMhV5AlO8uulX9xk6ogfIesHobyBDU=
-k8s.io/client-go v0.29.1 h1:19B/+2NGEwnFLzt0uB5kNJnfTsbV8w6TgQRz9l7ti7A=
-k8s.io/client-go v0.29.1/go.mod h1:TDG/psL9hdet0TI9mGyHJSgRkW3H9JZk2dNEUS7bRks=
-k8s.io/component-base v0.29.1 h1:MUimqJPCRnnHsskTTjKD+IC1EHBbRCVyi37IoFBrkYw=
-k8s.io/component-base v0.29.1/go.mod h1:fP9GFjxYrLERq1GcWWZAE3bqbNcDKDytn2srWuHTtKc=
-k8s.io/klog/v2 v2.110.1 h1:U/Af64HJf7FcwMcXyKm2RPM22WZzyR7OSpYj5tg3cL0=
-k8s.io/klog/v2 v2.110.1/go.mod h1:YGtd1984u+GgbuZ7e08/yBuAfKLSO0+uR1Fhi6ExXjo=
-k8s.io/kube-openapi v0.0.0-20240103195357-a9f8850cb432 h1:+XYBQU3ZKUu60H6fEnkitTTabGoKfIG8zczhZBENu9o=
-k8s.io/kube-openapi v0.0.0-20240103195357-a9f8850cb432/go.mod h1:Pa1PvrP7ACSkuX6I7KYomY6cmMA0Tx86waBhDUgoKPw=
-k8s.io/kubectl v0.29.1 h1:rWnW3hi/rEUvvg7jp4iYB68qW5un/urKbv7fu3Vj0/s=
-k8s.io/kubectl v0.29.1/go.mod h1:SZzvLqtuOJYSvZzPZR9weSuP0wDQ+N37CENJf0FhDF4=
-k8s.io/utils v0.0.0-20240102154912-e7106e64919e h1:eQ/4ljkx21sObifjzXwlPKpdGLrCfRziVtos3ofG/sQ=
-k8s.io/utils v0.0.0-20240102154912-e7106e64919e/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
-oras.land/oras-go v1.2.4 h1:djpBY2/2Cs1PV87GSJlxv4voajVOMZxqqtq9AB8YNvY=
-oras.land/oras-go v1.2.4/go.mod h1:DYcGfb3YF1nKjcezfX2SNlDAeQFKSXmf+qrFmrh4324=
-sigs.k8s.io/controller-runtime v0.17.1 h1:V1dQELMGVk46YVXXQUbTFujU7u4DQj6YUj9Rb6cuzz8=
-sigs.k8s.io/controller-runtime v0.17.1/go.mod h1:+MngTvIQQQhfXtwfdGw/UOQ/aIaqsYywfCINOtwMO/s=
+helm.sh/helm/v3 v3.16.1 h1:cER6tI/8PgUAsaJaQCVBUg3VI9KN4oVaZJgY60RIc0c=
+helm.sh/helm/v3 v3.16.1/go.mod h1:r+xBHHP20qJeEqtvBXMf7W35QDJnzY/eiEBzt+TfHps=
+k8s.io/api v0.31.2 h1:3wLBbL5Uom/8Zy98GRPXpJ254nEFpl+hwndmk9RwmL0=
+k8s.io/api v0.31.2/go.mod h1:bWmGvrGPssSK1ljmLzd3pwCQ9MgoTsRCuK35u6SygUk=
+k8s.io/apiextensions-apiserver v0.31.2 h1:W8EwUb8+WXBLu56ser5IudT2cOho0gAKeTOnywBLxd0=
+k8s.io/apiextensions-apiserver v0.31.2/go.mod h1:i+Geh+nGCJEGiCGR3MlBDkS7koHIIKWVfWeRFiOsUcM=
+k8s.io/apimachinery v0.31.2 h1:i4vUt2hPK56W6mlT7Ry+AO8eEsyxMD1U44NR22CLTYw=
+k8s.io/apimachinery v0.31.2/go.mod h1:rsPdaZJfTfLsNJSQzNHQvYoTmxhoOEofxtOsF3rtsMo=
+k8s.io/apiserver v0.31.2 h1:VUzOEUGRCDi6kX1OyQ801m4A7AUPglpsmGvdsekmcI4=
+k8s.io/apiserver v0.31.2/go.mod h1:o3nKZR7lPlJqkU5I3Ove+Zx3JuoFjQobGX1Gctw6XuE=
+k8s.io/cli-runtime v0.31.1 h1:/ZmKhmZ6hNqDM+yf9s3Y4KEYakNXUn5sod2LWGGwCuk=
+k8s.io/cli-runtime v0.31.1/go.mod h1:pKv1cDIaq7ehWGuXQ+A//1OIF+7DI+xudXtExMCbe9U=
+k8s.io/client-go v0.31.2 h1:Y2F4dxU5d3AQj+ybwSMqQnpZH9F30//1ObxOKlTI9yc=
+k8s.io/client-go v0.31.2/go.mod h1:NPa74jSVR/+eez2dFsEIHNa+3o09vtNaWwWwb1qSxSs=
+k8s.io/component-base v0.31.2 h1:Z1J1LIaC0AV+nzcPRFqfK09af6bZ4D1nAOpWsy9owlA=
+k8s.io/component-base v0.31.2/go.mod h1:9PeyyFN/drHjtJZMCTkSpQJS3U9OXORnHQqMLDz0sUQ=
+k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk=
+k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE=
+k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 h1:BZqlfIlq5YbRMFko6/PM7FjZpUb45WallggurYhKGag=
+k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340/go.mod h1:yD4MZYeKMBwQKVht279WycxKyM84kkAx2DPrTXaeb98=
+k8s.io/kubectl v0.31.0 h1:kANwAAPVY02r4U4jARP/C+Q1sssCcN/1p9Nk+7BQKVg=
+k8s.io/kubectl v0.31.0/go.mod h1:pB47hhFypGsaHAPjlwrNbvhXgmuAr01ZBvAIIUaI8d4=
+k8s.io/utils v0.0.0-20240921022957-49e7df575cb6 h1:MDF6h2H/h4tbzmtIKTuctcwZmY0tY9mD9fNT47QO6HI=
+k8s.io/utils v0.0.0-20240921022957-49e7df575cb6/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
+oras.land/oras-go v1.2.5 h1:XpYuAwAb0DfQsunIyMfeET92emK8km3W4yEzZvUbsTo=
+oras.land/oras-go v1.2.5/go.mod h1:PuAwRShRZCsZb7g8Ar3jKKQR/2A/qN+pkYxIOd/FAoo=
+sigs.k8s.io/controller-runtime v0.19.1 h1:Son+Q40+Be3QWb+niBXAg2vFiYWolDjjRfO8hn/cxOk=
+sigs.k8s.io/controller-runtime v0.19.1/go.mod h1:iRmWllt8IlaLjvTTDLhRBXIEtkCK6hwVBJJsYS9Ajf4=
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo=
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0=
-sigs.k8s.io/kustomize/api v0.16.0 h1:/zAR4FOQDCkgSDmVzV2uiFbuy9bhu3jEzthrHCuvm1g=
-sigs.k8s.io/kustomize/api v0.16.0/go.mod h1:MnFZ7IP2YqVyVwMWoRxPtgl/5hpA+eCCrQR/866cm5c=
-sigs.k8s.io/kustomize/kyaml v0.16.0 h1:6J33uKSoATlKZH16unr2XOhDI+otoe2sR3M8PDzW3K0=
-sigs.k8s.io/kustomize/kyaml v0.16.0/go.mod h1:xOK/7i+vmE14N2FdFyugIshB8eF6ALpy7jI87Q2nRh4=
+sigs.k8s.io/kustomize/api v0.17.2 h1:E7/Fjk7V5fboiuijoZHgs4aHuexi5Y2loXlVOAVAG5g=
+sigs.k8s.io/kustomize/api v0.17.2/go.mod h1:UWTz9Ct+MvoeQsHcJ5e+vziRRkwimm3HytpZgIYqye0=
+sigs.k8s.io/kustomize/kyaml v0.17.1 h1:TnxYQxFXzbmNG6gOINgGWQt09GghzgTP6mIurOgrLCQ=
+sigs.k8s.io/kustomize/kyaml v0.17.1/go.mod h1:9V0mCjIEYjlXuCdYsSXvyoy2BTsLESH7TlGV81S282U=
sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4=
sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08=
sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E=
diff --git a/hack/must-gather.sh b/hack/must-gather.sh
index cb9e0b3e5..c66b172dc 100755
--- a/hack/must-gather.sh
+++ b/hack/must-gather.sh
@@ -110,7 +110,7 @@ for node in $(echo "$gpu_pci_nodes"); do
done
echo "Get the GPU nodes (status)"
-$K get nodes -l nvidia.com/gpu.present=true > $ARTIFACT_DIR/gpu_nodes.status
+$K get nodes -l nvidia.com/gpu.present=true -o wide > $ARTIFACT_DIR/gpu_nodes.status
echo "Get the GPU nodes (description)"
$K describe nodes -l nvidia.com/gpu.present=true > $ARTIFACT_DIR/gpu_nodes.descr
diff --git a/internal/conditions/clusterpolicy.go b/internal/conditions/clusterpolicy.go
index 4782a634d..b1d89ba13 100644
--- a/internal/conditions/clusterpolicy.go
+++ b/internal/conditions/clusterpolicy.go
@@ -25,7 +25,7 @@ import (
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/log"
- nvidiav1 "github.com/NVIDIA/gpu-operator/api/v1"
+ nvidiav1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1"
)
// Specific implementation of the Updater interface for one of our controllers
diff --git a/internal/conditions/conditions_test.go b/internal/conditions/conditions_test.go
index 515683e53..6d811d09e 100644
--- a/internal/conditions/conditions_test.go
+++ b/internal/conditions/conditions_test.go
@@ -26,7 +26,7 @@ import (
"k8s.io/client-go/kubernetes/scheme"
"sigs.k8s.io/controller-runtime/pkg/client/fake"
- nvidiav1alpha1 "github.com/NVIDIA/gpu-operator/api/v1alpha1"
+ nvidiav1alpha1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1alpha1"
)
func TestConditionsUpdater_SetConditionsReady(t *testing.T) {
diff --git a/internal/conditions/nvidiadriver.go b/internal/conditions/nvidiadriver.go
index 749a202a5..c3ff538fd 100644
--- a/internal/conditions/nvidiadriver.go
+++ b/internal/conditions/nvidiadriver.go
@@ -25,7 +25,7 @@ import (
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/log"
- nvidiav1alpha1 "github.com/NVIDIA/gpu-operator/api/v1alpha1"
+ nvidiav1alpha1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1alpha1"
)
const (
diff --git a/internal/consts/consts.go b/internal/consts/consts.go
index a65c3027d..c2850f419 100644
--- a/internal/consts/consts.go
+++ b/internal/consts/consts.go
@@ -39,6 +39,9 @@ const (
// Containerd runtime
Containerd = "containerd"
+ // OpenshiftNamespace indicates the main namespace of an Openshift cluster
+ OpenshiftNamespace = "openshift"
+
OcpDriverToolkitVersionLabel = "openshift.driver-toolkit.rhcos"
OcpDriverToolkitIdentificationLabel = "openshift.driver-toolkit"
NfdOSTreeVersionLabelKey = "feature.node.kubernetes.io/system-os_release.OSTREE_VERSION"
diff --git a/internal/state/driver.go b/internal/state/driver.go
index 8a00216d3..34e19ef71 100644
--- a/internal/state/driver.go
+++ b/internal/state/driver.go
@@ -29,15 +29,20 @@ import (
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
+ "k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
+ "sigs.k8s.io/controller-runtime/pkg/handler"
"sigs.k8s.io/controller-runtime/pkg/log"
+ "sigs.k8s.io/controller-runtime/pkg/predicate"
"sigs.k8s.io/controller-runtime/pkg/source"
- nvidiav1alpha1 "github.com/NVIDIA/gpu-operator/api/v1alpha1"
+ gpuv1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1"
+ nvidiav1alpha1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1alpha1"
"github.com/NVIDIA/gpu-operator/controllers/clusterinfo"
"github.com/NVIDIA/gpu-operator/internal/consts"
"github.com/NVIDIA/gpu-operator/internal/image"
@@ -48,6 +53,11 @@ import (
const (
nfdOSReleaseIDLabelKey = "feature.node.kubernetes.io/system-os_release.ID"
nfdOSVersionIDLabelKey = "feature.node.kubernetes.io/system-os_release.VERSION_ID"
+
+ // AppComponentLabelKey indicates the label key of the component
+ AppComponentLabelKey = "app.kubernetes.io/component"
+ // AppComponentLabelValue indicates the label values of the nvidia-gpu-driver component
+ AppComponentLabelValue = "nvidia-driver"
)
type stateDriver struct {
@@ -90,6 +100,7 @@ type driverRenderData struct {
Openshift *openshiftSpec
Precompiled *precompiledSpec
AdditionalConfigs *additionalConfigs
+ HostRoot string
}
func NewStateDriver(
@@ -121,23 +132,12 @@ func (s *stateDriver) Sync(ctx context.Context, customResource interface{}, info
return SyncStateError, fmt.Errorf("NVIDIADriver CR not provided as input to Sync()")
}
- info := infoCatalog.Get(InfoTypeClusterPolicyCR)
- if info == nil {
- return SyncStateError, fmt.Errorf("failed to get ClusterPolicy CR from info catalog")
- }
-
- info = infoCatalog.Get(InfoTypeClusterInfo)
- if info == nil {
- return SyncStateNotReady, fmt.Errorf("failed to get cluster info from info catalog")
- }
- clusterInfo := info.(clusterinfo.Interface)
-
err := s.cleanupStaleDriverDaemonsets(ctx, cr)
if err != nil {
return SyncStateNotReady, fmt.Errorf("failed to cleanup stale driver DaemonSets: %w", err)
}
- objs, err := s.getManifestObjects(ctx, cr, clusterInfo)
+ objs, err := s.getManifestObjects(ctx, cr, infoCatalog)
if err != nil {
return SyncStateNotReady, fmt.Errorf("failed to create k8s objects from manifests: %v", err)
}
@@ -163,9 +163,17 @@ func (s *stateDriver) Sync(ctx context.Context, customResource interface{}, info
func (s *stateDriver) GetWatchSources(mgr ctrlManager) map[string]SyncingSource {
wr := make(map[string]SyncingSource)
+ nvDriverPredicate := predicate.NewTypedPredicateFuncs(func(ds *appsv1.DaemonSet) bool {
+ ls := metav1.LabelSelector{MatchLabels: map[string]string{AppComponentLabelKey: AppComponentLabelValue}}
+ selector, _ := metav1.LabelSelectorAsSelector(&ls)
+ return selector.Matches(labels.Set(ds.GetLabels()))
+ })
wr["DaemonSet"] = source.Kind(
mgr.GetCache(),
&appsv1.DaemonSet{},
+ handler.TypedEnqueueRequestForOwner[*appsv1.DaemonSet](mgr.GetScheme(), mgr.GetRESTMapper(),
+ &nvidiav1alpha1.NVIDIADriver{}, handler.OnlyControllerOwner()),
+ nvDriverPredicate,
)
return wr
}
@@ -183,7 +191,10 @@ func (s *stateDriver) cleanupStaleDriverDaemonsets(ctx context.Context, cr *nvid
for _, ds := range list.Items {
ds := ds
- if ds.Status.DesiredNumberScheduled == 0 {
+ // We consider a daemonset to be stale only if it has no desired number of pods and no pods currently mis-scheduled
+ // As per the Kubernetes docs, a daemonset pod is mis-scheduled when an already scheduled pod no longer satisfies
+ // node affinity constraints or has un-tolerated taints, for e.g. "node.kubernetes.io/unreachable:NoSchedule"
+ if ds.Status.DesiredNumberScheduled == 0 && ds.Status.NumberMisscheduled == 0 {
logger.V(consts.LogLevelInfo).Info("Deleting inactive driver DaemonSet", "Name", ds.Name)
err = s.client.Delete(ctx, &ds)
if err != nil {
@@ -197,9 +208,21 @@ func (s *stateDriver) cleanupStaleDriverDaemonsets(ctx context.Context, cr *nvid
return nil
}
-func (s *stateDriver) getManifestObjects(ctx context.Context, cr *nvidiav1alpha1.NVIDIADriver, clusterInfo clusterinfo.Interface) ([]*unstructured.Unstructured, error) {
+func (s *stateDriver) getManifestObjects(ctx context.Context, cr *nvidiav1alpha1.NVIDIADriver, infoCatalog InfoCatalog) ([]*unstructured.Unstructured, error) {
logger := log.FromContext(ctx)
+ info := infoCatalog.Get(InfoTypeClusterPolicyCR)
+ if info == nil {
+ return nil, fmt.Errorf("failed to get ClusterPolicy CR from info catalog")
+ }
+ clusterPolicy := info.(gpuv1.ClusterPolicy)
+
+ info = infoCatalog.Get(InfoTypeClusterInfo)
+ if info == nil {
+ return nil, fmt.Errorf("failed to get cluster info from info catalog")
+ }
+ clusterInfo := info.(clusterinfo.Interface)
+
runtimeSpec, err := getRuntimeSpec(ctx, s.client, clusterInfo, &cr.Spec)
if err != nil {
return nil, fmt.Errorf("failed to construct cluster runtime spec: %w", err)
@@ -210,6 +233,7 @@ func (s *stateDriver) getManifestObjects(ctx context.Context, cr *nvidiav1alpha1
renderData := &driverRenderData{
GPUDirectRDMA: gpuDirectRDMASpec,
Runtime: runtimeSpec,
+ HostRoot: clusterPolicy.Spec.HostPaths.RootFS,
}
if len(runtimeSpec.NodePools) == 0 {
diff --git a/internal/state/driver_test.go b/internal/state/driver_test.go
index bb1b354d1..a591fa9d3 100644
--- a/internal/state/driver_test.go
+++ b/internal/state/driver_test.go
@@ -18,7 +18,6 @@ package state
import (
"bytes"
- "fmt"
"os"
"path/filepath"
"strings"
@@ -27,17 +26,15 @@ import (
configv1 "github.com/openshift/api/config/v1"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
- appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
- "k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/serializer/json"
apitypes "k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/kubernetes/scheme"
- nvidiav1alpha1 "github.com/NVIDIA/gpu-operator/api/v1alpha1"
+ nvidiav1alpha1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1alpha1"
"github.com/NVIDIA/gpu-operator/internal/render"
"github.com/NVIDIA/gpu-operator/internal/utils"
)
@@ -105,6 +102,8 @@ func TestDriverRenderRDMA(t *testing.T) {
renderData := getMinimalDriverRenderData()
+ renderData.AdditionalConfigs = getSampleAdditionalConfigs()
+
renderData.GPUDirectRDMA = &nvidiav1alpha1.GPUDirectRDMASpec{
Enabled: utils.BoolPtr(true),
}
@@ -116,50 +115,6 @@ func TestDriverRenderRDMA(t *testing.T) {
require.Nil(t, err)
require.NotEmpty(t, objs)
- ds, err := getDaemonSetObj(objs)
- require.Nil(t, err)
- require.NotNil(t, ds)
-
- nvidiaDriverCtr, err := getContainerObj(ds.Spec.Template.Spec.Containers, "nvidia-driver-ctr")
- require.Nil(t, err, "nvidia-driver-ctr should be in the list of containers")
-
- driverEnvars := []corev1.EnvVar{
- {
- Name: "NVIDIA_VISIBLE_DEVICES",
- Value: "void",
- },
- {
- Name: "GPU_DIRECT_RDMA_ENABLED",
- Value: "true",
- },
- }
- checkEnv(t, driverEnvars, nvidiaDriverCtr.Env)
-
- nvidiaPeermemCtr, err := getContainerObj(ds.Spec.Template.Spec.Containers, "nvidia-peermem-ctr")
- require.Nil(t, err, "nvidia-peermem-ctr should be in the list of containers")
-
- peermemEnvars := []corev1.EnvVar{
- {
- Name: "NVIDIA_VISIBLE_DEVICES",
- Value: "void",
- },
- }
-
- checkEnv(t, peermemEnvars, nvidiaPeermemCtr.Env)
-
- expectedVolumes := getDriverVolumes()
- expectedVolumes = append(expectedVolumes, corev1.Volume{
- Name: "mlnx-ofed-usr-src",
- VolumeSource: corev1.VolumeSource{
- HostPath: &corev1.HostPathVolumeSource{
- Path: "/run/mellanox/drivers/usr/src",
- Type: newHostPathType(corev1.HostPathDirectoryOrCreate),
- },
- },
- })
-
- checkVolumes(t, expectedVolumes, ds.Spec.Template.Spec.Volumes)
-
actual, err := getYAMLString(objs)
require.Nil(t, err)
@@ -180,6 +135,8 @@ func TestDriverRDMAHostMOFED(t *testing.T) {
renderData := getMinimalDriverRenderData()
+ renderData.AdditionalConfigs = getSampleAdditionalConfigs()
+
renderData.GPUDirectRDMA = &nvidiav1alpha1.GPUDirectRDMASpec{
Enabled: utils.BoolPtr(true),
UseHostMOFED: utils.BoolPtr(true),
@@ -300,6 +257,8 @@ func TestDriverGDS(t *testing.T) {
renderData := getMinimalDriverRenderData()
+ renderData.AdditionalConfigs = getSampleAdditionalConfigs()
+
renderData.GDS = &gdsDriverSpec{
ImagePath: "nvcr.io/nvidia/cloud-native/nvidia-fs:2.16.1",
Spec: &nvidiav1alpha1.GPUDirectStorageSpec{
@@ -336,6 +295,8 @@ func TestDriverGDRCopy(t *testing.T) {
renderData := getMinimalDriverRenderData()
+ renderData.AdditionalConfigs = getSampleAdditionalConfigs()
+
renderData.GDRCopy = &gdrcopyDriverSpec{
ImagePath: "nvcr.io/nvidia/cloud-native/gdrdrv:v2.4.1",
Spec: &nvidiav1alpha1.GDRCopySpec{
@@ -373,6 +334,7 @@ func TestDriverGDRCopyOpenShift(t *testing.T) {
require.True(t, ok)
renderData := getMinimalDriverRenderData()
+ renderData.AdditionalConfigs = getSampleAdditionalConfigs()
renderData.Driver.Name = "nvidia-gpu-driver-openshift"
renderData.Driver.AppName = "nvidia-gpu-driver-openshift-79d6bd954f"
renderData.Driver.ImagePath = "nvcr.io/nvidia/driver:525.85.03-rhel8.0"
@@ -428,61 +390,7 @@ func TestDriverAdditionalConfigs(t *testing.T) {
renderData := getMinimalDriverRenderData()
- renderData.AdditionalConfigs = &additionalConfigs{
- VolumeMounts: []corev1.VolumeMount{
- {
- Name: "test-cm",
- ReadOnly: true,
- MountPath: "/opt/config/test-file",
- SubPath: "test-file",
- },
- {
- Name: "test-host-path",
- MountPath: "/opt/config/test-host-path",
- },
- {
- Name: "test-host-path-ro",
- MountPath: "/opt/config/test-host-path-ro",
- ReadOnly: true,
- },
- },
- Volumes: []corev1.Volume{
- {
- Name: "test-cm",
- VolumeSource: corev1.VolumeSource{
- ConfigMap: &corev1.ConfigMapVolumeSource{
- LocalObjectReference: corev1.LocalObjectReference{
- Name: "test-cm",
- },
- Items: []corev1.KeyToPath{
- {
- Key: "test-file",
- Path: "test-file",
- },
- },
- },
- },
- },
- {
- Name: "test-host-path",
- VolumeSource: corev1.VolumeSource{
- HostPath: &corev1.HostPathVolumeSource{
- Path: "/opt/config/test-host-path",
- Type: newHostPathType(corev1.HostPathDirectoryOrCreate),
- },
- },
- },
- {
- Name: "test-host-path-ro",
- VolumeSource: corev1.VolumeSource{
- HostPath: &corev1.HostPathVolumeSource{
- Path: "/opt/config/test-host-path-ro",
- Type: newHostPathType(corev1.HostPathDirectoryOrCreate),
- },
- },
- },
- },
- }
+ renderData.AdditionalConfigs = getSampleAdditionalConfigs()
objs, err := stateDriver.renderer.RenderObjects(
&render.TemplatingData{
@@ -668,29 +576,43 @@ func TestVGPUHostManagerDaemonset(t *testing.T) {
require.Equal(t, string(o), actual)
}
-func getDaemonSetObj(objs []*unstructured.Unstructured) (*appsv1.DaemonSet, error) {
- ds := &appsv1.DaemonSet{}
+func TestVGPUHostManagerDaemonsetOpenShift(t *testing.T) {
+ const (
+ testName = "driver-vgpu-host-manager-openshift"
+ rhcosVersion = "413.92.202304252344-0"
+ toolkitImage = "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:7fecaebc1d51b28bc3548171907e4d91823a031d7a6a694ab686999be2b4d867"
+ )
+ state, err := NewStateDriver(nil, nil, manifestDir)
+ require.Nil(t, err)
+ stateDriver, ok := state.(*stateDriver)
+ require.True(t, ok)
- for _, obj := range objs {
- if obj.GetKind() == "DaemonSet" {
- err := runtime.DefaultUnstructuredConverter.FromUnstructured(obj.Object, ds)
- if err != nil {
- return nil, err
- }
- return ds, nil
- }
+ renderData := getMinimalDriverRenderData()
+ renderData.Driver.Spec.DriverType = nvidiav1alpha1.VGPUHostManager
+ renderData.Driver.Name = "nvidia-vgpu-manager-openshift"
+ renderData.Driver.AppName = "nvidia-vgpu-manager-openshift-7c6d7bd86b"
+ renderData.Driver.ImagePath = "nvcr.io/nvidia/vgpu-manager:525.85.03-rhel8.0"
+ renderData.Driver.OSVersion = "rhel8.0"
+ renderData.Openshift = &openshiftSpec{
+ ToolkitImage: toolkitImage,
+ RHCOSVersion: rhcosVersion,
}
+ renderData.Runtime.OpenshiftDriverToolkitEnabled = true
+ renderData.Runtime.OpenshiftVersion = "4.13"
- return nil, fmt.Errorf("could not find object of kind 'DaemonSet'")
-}
+ objs, err := stateDriver.renderer.RenderObjects(
+ &render.TemplatingData{
+ Data: renderData,
+ })
+ require.Nil(t, err)
-func getContainerObj(containers []corev1.Container, name string) (corev1.Container, error) {
- for _, c := range containers {
- if c.Name == name {
- return c, nil
- }
- }
- return corev1.Container{}, fmt.Errorf("failed to find container with name '%s'", name)
+ actual, err := getYAMLString(objs)
+ require.Nil(t, err)
+
+ o, err := os.ReadFile(filepath.Join(manifestResultDir, testName+".yaml"))
+ require.Nil(t, err)
+
+ require.Equal(t, string(o), actual)
}
func getMinimalDriverRenderData() *driverRenderData {
@@ -712,6 +634,7 @@ func getMinimalDriverRenderData() *driverRenderData {
Namespace: "test-operator",
KubernetesVersion: "1.28.0",
},
+ HostRoot: "",
}
}
@@ -725,150 +648,58 @@ func getDefaultContainerProbeSpec() *nvidiav1alpha1.ContainerProbeSpec {
}
}
-func checkEnv(t *testing.T, input []corev1.EnvVar, output []corev1.EnvVar) {
- inputMap := map[string]string{}
- for _, env := range input {
- inputMap[env.Name] = env.Value
- }
-
- outputMap := map[string]string{}
- for _, env := range output {
- outputMap[env.Name] = env.Value
- }
-
- for key, value := range inputMap {
- outputValue, exists := outputMap[key]
- require.True(t, exists)
- require.Equal(t, value, outputValue)
- }
-}
-
-func checkVolumes(t *testing.T, expected []corev1.Volume, actual []corev1.Volume) {
- expectedMap := volumeSliceToMap(expected)
- actualMap := volumeSliceToMap(actual)
-
- require.Equal(t, len(expectedMap), len(actualMap))
-
- for k, vol := range expectedMap {
- expectedVol, exists := actualMap[k]
- require.True(t, exists)
- require.Equal(t, expectedVol.HostPath.Path, vol.HostPath.Path,
- "Mismatch in Host Path value for volume %s", vol.Name)
- require.Equal(t, expectedVol.HostPath.Type, vol.HostPath.Type,
- "Mismatch in Host Path type for volume %s", vol.Name)
- }
-}
-
-func volumeSliceToMap(volumes []corev1.Volume) map[string]corev1.Volume {
- volumeMap := map[string]corev1.Volume{}
- for _, v := range volumes {
- volumeMap[v.Name] = v
- }
-
- return volumeMap
-}
-
-func getDriverVolumes() []corev1.Volume {
- return []corev1.Volume{
- {
- Name: "run-nvidia",
- VolumeSource: corev1.VolumeSource{
- HostPath: &corev1.HostPathVolumeSource{
- Path: "/run/nvidia",
- Type: newHostPathType(corev1.HostPathDirectoryOrCreate),
- },
- },
- },
- {
- Name: "var-log",
- VolumeSource: corev1.VolumeSource{
- HostPath: &corev1.HostPathVolumeSource{
- Path: "/var/log",
- },
- },
- },
- {
- Name: "dev-log",
- VolumeSource: corev1.VolumeSource{
- HostPath: &corev1.HostPathVolumeSource{
- Path: "/dev/log",
- },
- },
- },
- {
- Name: "host-os-release",
- VolumeSource: corev1.VolumeSource{
- HostPath: &corev1.HostPathVolumeSource{
- Path: "/etc/os-release",
- },
- },
- },
- {
- Name: "run-nvidia-topologyd",
- VolumeSource: corev1.VolumeSource{
- HostPath: &corev1.HostPathVolumeSource{
- Path: "/run/nvidia-topologyd",
- Type: newHostPathType(corev1.HostPathDirectoryOrCreate),
- },
- },
- },
- {
- Name: "run-mellanox-drivers",
- VolumeSource: corev1.VolumeSource{
- HostPath: &corev1.HostPathVolumeSource{
- Path: "/run/mellanox/drivers",
- Type: newHostPathType(corev1.HostPathDirectoryOrCreate),
- },
- },
- },
- {
- Name: "run-nvidia-validations",
- VolumeSource: corev1.VolumeSource{
- HostPath: &corev1.HostPathVolumeSource{
- Path: "/run/nvidia/validations",
- Type: newHostPathType(corev1.HostPathDirectoryOrCreate),
- },
+func getSampleAdditionalConfigs() *additionalConfigs {
+ return &additionalConfigs{
+ VolumeMounts: []corev1.VolumeMount{
+ {
+ Name: "test-cm",
+ ReadOnly: true,
+ MountPath: "/opt/config/test-file",
+ SubPath: "test-file",
},
- },
- {
- Name: "host-root",
- VolumeSource: corev1.VolumeSource{
- HostPath: &corev1.HostPathVolumeSource{
- Path: "/",
- },
+ {
+ Name: "test-host-path",
+ MountPath: "/opt/config/test-host-path",
},
- },
- {
- Name: "host-sys",
- VolumeSource: corev1.VolumeSource{
- HostPath: &corev1.HostPathVolumeSource{
- Path: "/sys",
- Type: newHostPathType(corev1.HostPathDirectory),
- },
+ {
+ Name: "test-host-path-ro",
+ MountPath: "/opt/config/test-host-path-ro",
+ ReadOnly: true,
},
},
- {
- Name: "firmware-search-path",
- VolumeSource: corev1.VolumeSource{
- HostPath: &corev1.HostPathVolumeSource{
- Path: "/sys/module/firmware_class/parameters/path",
+ Volumes: []corev1.Volume{
+ {
+ Name: "test-cm",
+ VolumeSource: corev1.VolumeSource{
+ ConfigMap: &corev1.ConfigMapVolumeSource{
+ LocalObjectReference: corev1.LocalObjectReference{
+ Name: "test-cm",
+ },
+ Items: []corev1.KeyToPath{
+ {
+ Key: "test-file",
+ Path: "test-file",
+ },
+ },
+ },
},
},
- },
- {
- Name: "sysfs-memory-online",
- VolumeSource: corev1.VolumeSource{
- HostPath: &corev1.HostPathVolumeSource{
- Path: "/sys/devices/system/memory/auto_online_blocks",
+ {
+ Name: "test-host-path",
+ VolumeSource: corev1.VolumeSource{
+ HostPath: &corev1.HostPathVolumeSource{
+ Path: "/opt/config/test-host-path",
+ Type: newHostPathType(corev1.HostPathDirectoryOrCreate),
+ },
},
},
- },
- {
- Name: "nv-firmware",
- VolumeSource: corev1.VolumeSource{
- HostPath: &corev1.HostPathVolumeSource{
- Path: "/run/nvidia/driver/lib/firmware",
- Type: newHostPathType(corev1.HostPathDirectoryOrCreate),
+ {
+ Name: "test-host-path-ro",
+ VolumeSource: corev1.VolumeSource{
+ HostPath: &corev1.HostPathVolumeSource{
+ Path: "/opt/config/test-host-path-ro",
+ Type: newHostPathType(corev1.HostPathDirectoryOrCreate),
+ },
},
},
},
diff --git a/internal/state/driver_volumes.go b/internal/state/driver_volumes.go
index 66ccf56a9..b11ad512b 100644
--- a/internal/state/driver_volumes.go
+++ b/internal/state/driver_volumes.go
@@ -25,7 +25,7 @@ import (
corev1 "k8s.io/api/core/v1"
"sigs.k8s.io/controller-runtime/pkg/log"
- "github.com/NVIDIA/gpu-operator/api/v1alpha1"
+ "github.com/NVIDIA/gpu-operator/api/nvidia/v1alpha1"
"github.com/NVIDIA/gpu-operator/controllers/clusterinfo"
"github.com/NVIDIA/gpu-operator/internal/consts"
)
diff --git a/internal/state/manager.go b/internal/state/manager.go
index 89acc5799..b61eb77b5 100644
--- a/internal/state/manager.go
+++ b/internal/state/manager.go
@@ -24,7 +24,7 @@ import (
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/log"
- nvidiav1alpha1 "github.com/NVIDIA/gpu-operator/api/v1alpha1"
+ nvidiav1alpha1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1alpha1"
"github.com/NVIDIA/gpu-operator/internal/consts"
)
diff --git a/internal/state/testdata/golden/driver-additional-configs.yaml b/internal/state/testdata/golden/driver-additional-configs.yaml
index f4eb1a5d4..2200df4f4 100644
--- a/internal/state/testdata/golden/driver-additional-configs.yaml
+++ b/internal/state/testdata/golden/driver-additional-configs.yaml
@@ -18,23 +18,6 @@ rules:
- securitycontextconstraints
verbs:
- use
-- apiGroups:
- - ""
- resources:
- - pods
- - pods/eviction
- - nodes
- verbs:
- - '*'
-- apiGroups:
- - apps
- resources:
- - deployments
- - daemonsets
- - replicasets
- - statefulsets
- verbs:
- - '*'
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
@@ -73,12 +56,9 @@ rules:
- apiGroups:
- apps
resources:
- - deployments
- daemonsets
- - replicasets
- - statefulsets
verbs:
- - '*'
+ - get
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
@@ -152,6 +132,14 @@ spec:
env:
- name: NVIDIA_VISIBLE_DEVICES
value: void
+ - name: NODE_NAME
+ valueFrom:
+ fieldRef:
+ fieldPath: spec.nodeName
+ - name: NODE_IP
+ valueFrom:
+ fieldRef:
+ fieldPath: status.hostIP
image: nvcr.io/nvidia/driver:525.85.03-ubuntu22.04
imagePullPolicy: IfNotPresent
lifecycle:
diff --git a/internal/state/testdata/golden/driver-full-spec.yaml b/internal/state/testdata/golden/driver-full-spec.yaml
index c3df3c898..60065333e 100644
--- a/internal/state/testdata/golden/driver-full-spec.yaml
+++ b/internal/state/testdata/golden/driver-full-spec.yaml
@@ -18,23 +18,6 @@ rules:
- securitycontextconstraints
verbs:
- use
-- apiGroups:
- - ""
- resources:
- - pods
- - pods/eviction
- - nodes
- verbs:
- - '*'
-- apiGroups:
- - apps
- resources:
- - deployments
- - daemonsets
- - replicasets
- - statefulsets
- verbs:
- - '*'
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
@@ -73,12 +56,9 @@ rules:
- apiGroups:
- apps
resources:
- - deployments
- daemonsets
- - replicasets
- - statefulsets
verbs:
- - '*'
+ - get
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
@@ -158,6 +138,14 @@ spec:
env:
- name: NVIDIA_VISIBLE_DEVICES
value: void
+ - name: NODE_NAME
+ valueFrom:
+ fieldRef:
+ fieldPath: spec.nodeName
+ - name: NODE_IP
+ valueFrom:
+ fieldRef:
+ fieldPath: status.hostIP
- name: OPEN_KERNEL_MODULES_ENABLED
value: "true"
- name: FOO
diff --git a/internal/state/testdata/golden/driver-gdrcopy-openshift.yaml b/internal/state/testdata/golden/driver-gdrcopy-openshift.yaml
index 67e1f93fe..0324944d0 100644
--- a/internal/state/testdata/golden/driver-gdrcopy-openshift.yaml
+++ b/internal/state/testdata/golden/driver-gdrcopy-openshift.yaml
@@ -18,23 +18,6 @@ rules:
- securitycontextconstraints
verbs:
- use
-- apiGroups:
- - ""
- resources:
- - pods
- - pods/eviction
- - nodes
- verbs:
- - '*'
-- apiGroups:
- - apps
- resources:
- - deployments
- - daemonsets
- - replicasets
- - statefulsets
- verbs:
- - '*'
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
@@ -73,12 +56,9 @@ rules:
- apiGroups:
- apps
resources:
- - deployments
- daemonsets
- - replicasets
- - statefulsets
verbs:
- - '*'
+ - get
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
@@ -208,6 +188,14 @@ spec:
env:
- name: NVIDIA_VISIBLE_DEVICES
value: void
+ - name: NODE_NAME
+ valueFrom:
+ fieldRef:
+ fieldPath: spec.nodeName
+ - name: NODE_IP
+ valueFrom:
+ fieldRef:
+ fieldPath: status.hostIP
- name: OPENSHIFT_VERSION
value: "4.13"
- name: HTTP_PROXY
@@ -272,6 +260,15 @@ spec:
name: sysfs-memory-online
- mountPath: /lib/firmware
name: nv-firmware
+ - mountPath: /opt/config/test-file
+ name: test-cm
+ readOnly: true
+ subPath: test-file
+ - mountPath: /opt/config/test-host-path
+ name: test-host-path
+ - mountPath: /opt/config/test-host-path-ro
+ name: test-host-path-ro
+ readOnly: true
- mountPath: /mnt/shared-nvidia-driver-toolkit
name: shared-nvidia-driver-toolkit
- mountPath: /etc/pki/ca-trust/extracted/pem
@@ -321,6 +318,15 @@ spec:
readOnly: true
- mountPath: /mnt/shared-nvidia-driver-toolkit
name: shared-nvidia-driver-toolkit
+ - mountPath: /opt/config/test-file
+ name: test-cm
+ readOnly: true
+ subPath: test-file
+ - mountPath: /opt/config/test-host-path
+ name: test-host-path
+ - mountPath: /opt/config/test-host-path-ro
+ name: test-host-path-ro
+ readOnly: true
- args:
- until [ -f /mnt/shared-nvidia-driver-toolkit/dir_prepared ]; do echo Waiting
for nvidia-driver-ctr container to prepare the shared directory ...; sleep
@@ -353,6 +359,10 @@ spec:
- mountPath: /host-etc/os-release
name: host-os-release
readOnly: true
+ - mountPath: /sys/module/firmware_class/parameters/path
+ name: firmware-search-path
+ - mountPath: /lib/firmware
+ name: nv-firmware
hostPID: true
imagePullSecrets:
- name: ngc-secret
@@ -458,6 +468,20 @@ spec:
path: /run/nvidia/driver/lib/firmware
type: DirectoryOrCreate
name: nv-firmware
+ - configMap:
+ items:
+ - key: test-file
+ path: test-file
+ name: test-cm
+ name: test-cm
+ - hostPath:
+ path: /opt/config/test-host-path
+ type: DirectoryOrCreate
+ name: test-host-path
+ - hostPath:
+ path: /opt/config/test-host-path-ro
+ type: DirectoryOrCreate
+ name: test-host-path-ro
- emptyDir: {}
name: shared-nvidia-driver-toolkit
- configMap:
diff --git a/internal/state/testdata/golden/driver-gdrcopy.yaml b/internal/state/testdata/golden/driver-gdrcopy.yaml
index ae5f98395..eedde2670 100644
--- a/internal/state/testdata/golden/driver-gdrcopy.yaml
+++ b/internal/state/testdata/golden/driver-gdrcopy.yaml
@@ -18,23 +18,6 @@ rules:
- securitycontextconstraints
verbs:
- use
-- apiGroups:
- - ""
- resources:
- - pods
- - pods/eviction
- - nodes
- verbs:
- - '*'
-- apiGroups:
- - apps
- resources:
- - deployments
- - daemonsets
- - replicasets
- - statefulsets
- verbs:
- - '*'
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
@@ -73,12 +56,9 @@ rules:
- apiGroups:
- apps
resources:
- - deployments
- daemonsets
- - replicasets
- - statefulsets
verbs:
- - '*'
+ - get
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
@@ -152,6 +132,14 @@ spec:
env:
- name: NVIDIA_VISIBLE_DEVICES
value: void
+ - name: NODE_NAME
+ valueFrom:
+ fieldRef:
+ fieldPath: spec.nodeName
+ - name: NODE_IP
+ valueFrom:
+ fieldRef:
+ fieldPath: status.hostIP
image: nvcr.io/nvidia/driver:525.85.03-ubuntu22.04
imagePullPolicy: IfNotPresent
lifecycle:
@@ -202,6 +190,15 @@ spec:
name: sysfs-memory-online
- mountPath: /lib/firmware
name: nv-firmware
+ - mountPath: /opt/config/test-file
+ name: test-cm
+ readOnly: true
+ subPath: test-file
+ - mountPath: /opt/config/test-host-path
+ name: test-host-path
+ - mountPath: /opt/config/test-host-path-ro
+ name: test-host-path-ro
+ readOnly: true
- args:
- until [ -d /run/nvidia/driver/usr/src ] && lsmod | grep nvidia; do echo Waiting
for nvidia-driver to be installed...; sleep 10; done; exec nvidia-gdrcopy-driver
@@ -247,6 +244,15 @@ spec:
- mountPath: /dev/log
name: dev-log
readOnly: true
+ - mountPath: /opt/config/test-file
+ name: test-cm
+ readOnly: true
+ subPath: test-file
+ - mountPath: /opt/config/test-host-path
+ name: test-host-path
+ - mountPath: /opt/config/test-host-path-ro
+ name: test-host-path-ro
+ readOnly: true
hostPID: true
imagePullSecrets:
- name: ngc-secrets
@@ -351,6 +357,20 @@ spec:
path: /run/nvidia/driver/lib/firmware
type: DirectoryOrCreate
name: nv-firmware
+ - configMap:
+ items:
+ - key: test-file
+ path: test-file
+ name: test-cm
+ name: test-cm
+ - hostPath:
+ path: /opt/config/test-host-path
+ type: DirectoryOrCreate
+ name: test-host-path
+ - hostPath:
+ path: /opt/config/test-host-path-ro
+ type: DirectoryOrCreate
+ name: test-host-path-ro
updateStrategy:
type: OnDelete
---
diff --git a/internal/state/testdata/golden/driver-gds.yaml b/internal/state/testdata/golden/driver-gds.yaml
index 934c7d095..1ebf80c70 100644
--- a/internal/state/testdata/golden/driver-gds.yaml
+++ b/internal/state/testdata/golden/driver-gds.yaml
@@ -18,23 +18,6 @@ rules:
- securitycontextconstraints
verbs:
- use
-- apiGroups:
- - ""
- resources:
- - pods
- - pods/eviction
- - nodes
- verbs:
- - '*'
-- apiGroups:
- - apps
- resources:
- - deployments
- - daemonsets
- - replicasets
- - statefulsets
- verbs:
- - '*'
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
@@ -73,12 +56,9 @@ rules:
- apiGroups:
- apps
resources:
- - deployments
- daemonsets
- - replicasets
- - statefulsets
verbs:
- - '*'
+ - get
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
@@ -152,6 +132,14 @@ spec:
env:
- name: NVIDIA_VISIBLE_DEVICES
value: void
+ - name: NODE_NAME
+ valueFrom:
+ fieldRef:
+ fieldPath: spec.nodeName
+ - name: NODE_IP
+ valueFrom:
+ fieldRef:
+ fieldPath: status.hostIP
image: nvcr.io/nvidia/driver:525.85.03-ubuntu22.04
imagePullPolicy: IfNotPresent
lifecycle:
@@ -202,6 +190,15 @@ spec:
name: sysfs-memory-online
- mountPath: /lib/firmware
name: nv-firmware
+ - mountPath: /opt/config/test-file
+ name: test-cm
+ readOnly: true
+ subPath: test-file
+ - mountPath: /opt/config/test-host-path
+ name: test-host-path
+ - mountPath: /opt/config/test-host-path-ro
+ name: test-host-path-ro
+ readOnly: true
- args:
- until [ -d /run/nvidia/driver/usr/src ] && lsmod | grep nvidia; do echo Waiting
for nvidia-driver to be installed...; sleep 10; done; exec nvidia-gds-driver
@@ -247,6 +244,15 @@ spec:
- mountPath: /dev/log
name: dev-log
readOnly: true
+ - mountPath: /opt/config/test-file
+ name: test-cm
+ readOnly: true
+ subPath: test-file
+ - mountPath: /opt/config/test-host-path
+ name: test-host-path
+ - mountPath: /opt/config/test-host-path-ro
+ name: test-host-path-ro
+ readOnly: true
hostPID: true
imagePullSecrets:
- name: ngc-secrets
@@ -351,6 +357,20 @@ spec:
path: /run/nvidia/driver/lib/firmware
type: DirectoryOrCreate
name: nv-firmware
+ - configMap:
+ items:
+ - key: test-file
+ path: test-file
+ name: test-cm
+ name: test-cm
+ - hostPath:
+ path: /opt/config/test-host-path
+ type: DirectoryOrCreate
+ name: test-host-path
+ - hostPath:
+ path: /opt/config/test-host-path-ro
+ type: DirectoryOrCreate
+ name: test-host-path-ro
updateStrategy:
type: OnDelete
---
diff --git a/internal/state/testdata/golden/driver-minimal.yaml b/internal/state/testdata/golden/driver-minimal.yaml
index 4feebe5ef..f834e2474 100644
--- a/internal/state/testdata/golden/driver-minimal.yaml
+++ b/internal/state/testdata/golden/driver-minimal.yaml
@@ -18,23 +18,6 @@ rules:
- securitycontextconstraints
verbs:
- use
-- apiGroups:
- - ""
- resources:
- - pods
- - pods/eviction
- - nodes
- verbs:
- - '*'
-- apiGroups:
- - apps
- resources:
- - deployments
- - daemonsets
- - replicasets
- - statefulsets
- verbs:
- - '*'
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
@@ -73,12 +56,9 @@ rules:
- apiGroups:
- apps
resources:
- - deployments
- daemonsets
- - replicasets
- - statefulsets
verbs:
- - '*'
+ - get
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
@@ -152,6 +132,14 @@ spec:
env:
- name: NVIDIA_VISIBLE_DEVICES
value: void
+ - name: NODE_NAME
+ valueFrom:
+ fieldRef:
+ fieldPath: spec.nodeName
+ - name: NODE_IP
+ valueFrom:
+ fieldRef:
+ fieldPath: status.hostIP
image: nvcr.io/nvidia/driver:525.85.03-ubuntu22.04
imagePullPolicy: IfNotPresent
lifecycle:
diff --git a/internal/state/testdata/golden/driver-openshift-drivertoolkit.yaml b/internal/state/testdata/golden/driver-openshift-drivertoolkit.yaml
index ab29eca7d..ad63e44a9 100644
--- a/internal/state/testdata/golden/driver-openshift-drivertoolkit.yaml
+++ b/internal/state/testdata/golden/driver-openshift-drivertoolkit.yaml
@@ -18,23 +18,6 @@ rules:
- securitycontextconstraints
verbs:
- use
-- apiGroups:
- - ""
- resources:
- - pods
- - pods/eviction
- - nodes
- verbs:
- - '*'
-- apiGroups:
- - apps
- resources:
- - deployments
- - daemonsets
- - replicasets
- - statefulsets
- verbs:
- - '*'
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
@@ -73,12 +56,9 @@ rules:
- apiGroups:
- apps
resources:
- - deployments
- daemonsets
- - replicasets
- - statefulsets
verbs:
- - '*'
+ - get
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
@@ -208,6 +188,14 @@ spec:
env:
- name: NVIDIA_VISIBLE_DEVICES
value: void
+ - name: NODE_NAME
+ valueFrom:
+ fieldRef:
+ fieldPath: spec.nodeName
+ - name: NODE_IP
+ valueFrom:
+ fieldRef:
+ fieldPath: status.hostIP
- name: OPENSHIFT_VERSION
value: "4.13"
- name: HTTP_PROXY
@@ -307,6 +295,10 @@ spec:
- mountPath: /host-etc/os-release
name: host-os-release
readOnly: true
+ - mountPath: /sys/module/firmware_class/parameters/path
+ name: firmware-search-path
+ - mountPath: /lib/firmware
+ name: nv-firmware
hostPID: true
initContainers:
- args:
diff --git a/internal/state/testdata/golden/driver-precompiled.yaml b/internal/state/testdata/golden/driver-precompiled.yaml
index ed9f6b6ff..2b0728407 100644
--- a/internal/state/testdata/golden/driver-precompiled.yaml
+++ b/internal/state/testdata/golden/driver-precompiled.yaml
@@ -18,23 +18,6 @@ rules:
- securitycontextconstraints
verbs:
- use
-- apiGroups:
- - ""
- resources:
- - pods
- - pods/eviction
- - nodes
- verbs:
- - '*'
-- apiGroups:
- - apps
- resources:
- - deployments
- - daemonsets
- - replicasets
- - statefulsets
- verbs:
- - '*'
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
@@ -73,12 +56,9 @@ rules:
- apiGroups:
- apps
resources:
- - deployments
- daemonsets
- - replicasets
- - statefulsets
verbs:
- - '*'
+ - get
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
@@ -154,6 +134,14 @@ spec:
env:
- name: NVIDIA_VISIBLE_DEVICES
value: void
+ - name: NODE_NAME
+ valueFrom:
+ fieldRef:
+ fieldPath: spec.nodeName
+ - name: NODE_IP
+ valueFrom:
+ fieldRef:
+ fieldPath: status.hostIP
image: nvcr.io/nvidia/driver:535-5.4.0-150-generic-ubuntu22.04
imagePullPolicy: IfNotPresent
lifecycle:
diff --git a/internal/state/testdata/golden/driver-rdma-hostmofed.yaml b/internal/state/testdata/golden/driver-rdma-hostmofed.yaml
index 679d8cca3..29a712342 100644
--- a/internal/state/testdata/golden/driver-rdma-hostmofed.yaml
+++ b/internal/state/testdata/golden/driver-rdma-hostmofed.yaml
@@ -18,23 +18,6 @@ rules:
- securitycontextconstraints
verbs:
- use
-- apiGroups:
- - ""
- resources:
- - pods
- - pods/eviction
- - nodes
- verbs:
- - '*'
-- apiGroups:
- - apps
- resources:
- - deployments
- - daemonsets
- - replicasets
- - statefulsets
- verbs:
- - '*'
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
@@ -73,12 +56,9 @@ rules:
- apiGroups:
- apps
resources:
- - deployments
- daemonsets
- - replicasets
- - statefulsets
verbs:
- - '*'
+ - get
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
@@ -152,6 +132,14 @@ spec:
env:
- name: NVIDIA_VISIBLE_DEVICES
value: void
+ - name: NODE_NAME
+ valueFrom:
+ fieldRef:
+ fieldPath: spec.nodeName
+ - name: NODE_IP
+ valueFrom:
+ fieldRef:
+ fieldPath: status.hostIP
- name: GPU_DIRECT_RDMA_ENABLED
value: "true"
- name: USE_HOST_MOFED
@@ -206,6 +194,15 @@ spec:
name: sysfs-memory-online
- mountPath: /lib/firmware
name: nv-firmware
+ - mountPath: /opt/config/test-file
+ name: test-cm
+ readOnly: true
+ subPath: test-file
+ - mountPath: /opt/config/test-host-path
+ name: test-host-path
+ - mountPath: /opt/config/test-host-path-ro
+ name: test-host-path-ro
+ readOnly: true
- args:
- reload_nvidia_peermem
command:
@@ -256,6 +253,15 @@ spec:
- mountPath: /run/mellanox/drivers
mountPropagation: HostToContainer
name: run-mellanox-drivers
+ - mountPath: /opt/config/test-file
+ name: test-cm
+ readOnly: true
+ subPath: test-file
+ - mountPath: /opt/config/test-host-path
+ name: test-host-path
+ - mountPath: /opt/config/test-host-path-ro
+ name: test-host-path-ro
+ readOnly: true
hostPID: true
initContainers:
- args:
@@ -362,6 +368,20 @@ spec:
path: /run/nvidia/driver/lib/firmware
type: DirectoryOrCreate
name: nv-firmware
+ - configMap:
+ items:
+ - key: test-file
+ path: test-file
+ name: test-cm
+ name: test-cm
+ - hostPath:
+ path: /opt/config/test-host-path
+ type: DirectoryOrCreate
+ name: test-host-path
+ - hostPath:
+ path: /opt/config/test-host-path-ro
+ type: DirectoryOrCreate
+ name: test-host-path-ro
updateStrategy:
type: OnDelete
---
diff --git a/internal/state/testdata/golden/driver-rdma.yaml b/internal/state/testdata/golden/driver-rdma.yaml
index b33e779f7..b43d212c5 100644
--- a/internal/state/testdata/golden/driver-rdma.yaml
+++ b/internal/state/testdata/golden/driver-rdma.yaml
@@ -18,23 +18,6 @@ rules:
- securitycontextconstraints
verbs:
- use
-- apiGroups:
- - ""
- resources:
- - pods
- - pods/eviction
- - nodes
- verbs:
- - '*'
-- apiGroups:
- - apps
- resources:
- - deployments
- - daemonsets
- - replicasets
- - statefulsets
- verbs:
- - '*'
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
@@ -73,12 +56,9 @@ rules:
- apiGroups:
- apps
resources:
- - deployments
- daemonsets
- - replicasets
- - statefulsets
verbs:
- - '*'
+ - get
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
@@ -152,6 +132,14 @@ spec:
env:
- name: NVIDIA_VISIBLE_DEVICES
value: void
+ - name: NODE_NAME
+ valueFrom:
+ fieldRef:
+ fieldPath: spec.nodeName
+ - name: NODE_IP
+ valueFrom:
+ fieldRef:
+ fieldPath: status.hostIP
- name: GPU_DIRECT_RDMA_ENABLED
value: "true"
image: nvcr.io/nvidia/driver:525.85.03-ubuntu22.04
@@ -204,6 +192,15 @@ spec:
name: sysfs-memory-online
- mountPath: /lib/firmware
name: nv-firmware
+ - mountPath: /opt/config/test-file
+ name: test-cm
+ readOnly: true
+ subPath: test-file
+ - mountPath: /opt/config/test-host-path
+ name: test-host-path
+ - mountPath: /opt/config/test-host-path-ro
+ name: test-host-path-ro
+ readOnly: true
- args:
- reload_nvidia_peermem
command:
@@ -252,6 +249,15 @@ spec:
- mountPath: /run/mellanox/drivers
mountPropagation: HostToContainer
name: run-mellanox-drivers
+ - mountPath: /opt/config/test-file
+ name: test-cm
+ readOnly: true
+ subPath: test-file
+ - mountPath: /opt/config/test-host-path
+ name: test-host-path
+ - mountPath: /opt/config/test-host-path-ro
+ name: test-host-path-ro
+ readOnly: true
hostPID: true
initContainers:
- args:
@@ -356,6 +362,20 @@ spec:
path: /run/nvidia/driver/lib/firmware
type: DirectoryOrCreate
name: nv-firmware
+ - configMap:
+ items:
+ - key: test-file
+ path: test-file
+ name: test-cm
+ name: test-cm
+ - hostPath:
+ path: /opt/config/test-host-path
+ type: DirectoryOrCreate
+ name: test-host-path
+ - hostPath:
+ path: /opt/config/test-host-path-ro
+ type: DirectoryOrCreate
+ name: test-host-path-ro
updateStrategy:
type: OnDelete
---
diff --git a/internal/state/testdata/golden/driver-vgpu-host-manager-openshift.yaml b/internal/state/testdata/golden/driver-vgpu-host-manager-openshift.yaml
new file mode 100644
index 000000000..f21a74423
--- /dev/null
+++ b/internal/state/testdata/golden/driver-vgpu-host-manager-openshift.yaml
@@ -0,0 +1,376 @@
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+ name: nvidia-vgpu-manager-openshift
+ namespace: test-operator
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: Role
+metadata:
+ name: nvidia-vgpu-manager-openshift
+ namespace: test-operator
+rules:
+- apiGroups:
+ - security.openshift.io
+ resourceNames:
+ - privileged
+ resources:
+ - securitycontextconstraints
+ verbs:
+ - use
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+ name: nvidia-vgpu-manager-openshift
+rules:
+- apiGroups:
+ - config.openshift.io
+ resources:
+ - clusterversions
+ verbs:
+ - get
+ - list
+- apiGroups:
+ - ""
+ resources:
+ - nodes
+ verbs:
+ - get
+ - list
+ - patch
+ - update
+ - watch
+- apiGroups:
+ - ""
+ resources:
+ - pods
+ - pods/eviction
+ verbs:
+ - create
+ - delete
+ - get
+ - list
+ - patch
+ - update
+- apiGroups:
+ - apps
+ resources:
+ - daemonsets
+ verbs:
+ - get
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+ name: nvidia-vgpu-manager-openshift
+ namespace: test-operator
+roleRef:
+ apiGroup: rbac.authorization.k8s.io
+ kind: Role
+ name: nvidia-vgpu-manager-openshift
+subjects:
+- kind: ServiceAccount
+ name: nvidia-vgpu-manager-openshift
+ namespace: test-operator
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+ name: nvidia-vgpu-manager-openshift
+roleRef:
+ apiGroup: rbac.authorization.k8s.io
+ kind: ClusterRole
+ name: nvidia-vgpu-manager-openshift
+subjects:
+- kind: ServiceAccount
+ name: nvidia-vgpu-manager-openshift
+ namespace: test-operator
+---
+allowHostDirVolumePlugin: true
+allowHostIPC: false
+allowHostNetwork: false
+allowHostPID: true
+allowHostPorts: false
+allowPrivilegeEscalation: true
+allowPrivilegedContainer: true
+allowedCapabilities:
+- '*'
+allowedUnsafeSysctls:
+- '*'
+apiVersion: security.openshift.io/v1
+defaultAddCapabilities: null
+fsGroup:
+ type: RunAsAny
+groups:
+- system:cluster-admins
+- system:nodes
+- system:masters
+kind: SecurityContextConstraints
+metadata:
+ annotations:
+ kubernetes.io/description: 'privileged allows access to all privileged and host
+ features and the ability to run as any user, any group, any fsGroup, and with
+ any SELinux context. WARNING: this is the most relaxed SCC and should be used
+ only for cluster administration. Grant with caution.'
+ name: nvidia-vgpu-manager-openshift
+priority: null
+readOnlyRootFilesystem: false
+requiredDropCapabilities: null
+runAsUser:
+ type: RunAsAny
+seLinuxContext:
+ type: RunAsAny
+seccompProfiles:
+- '*'
+supplementalGroups:
+ type: RunAsAny
+users:
+- system:serviceaccount:test-operator:nvidia-vgpu-manager-openshift
+volumes:
+- '*'
+---
+apiVersion: apps/v1
+kind: DaemonSet
+metadata:
+ annotations:
+ openshift.io/scc: nvidia-vgpu-manager-openshift
+ labels:
+ app: nvidia-vgpu-manager-openshift-7c6d7bd86b
+ app.kubernetes.io/component: nvidia-vgpu-host-manager
+ nvidia.com/node.os-version: rhel8.0
+ nvidia.com/precompiled: "false"
+ openshift.driver-toolkit: "true"
+ openshift.driver-toolkit.rhcos: 413.92.202304252344-0
+ name: nvidia-vgpu-manager-openshift-7c6d7bd86b
+ namespace: test-operator
+spec:
+ selector:
+ matchLabels:
+ app: nvidia-vgpu-manager-openshift-7c6d7bd86b
+ template:
+ metadata:
+ annotations:
+ kubectl.kubernetes.io/default-container: nvidia-driver-ctr
+ labels:
+ app: nvidia-vgpu-manager-openshift-7c6d7bd86b
+ app.kubernetes.io/component: nvidia-vgpu-host-manager
+ nvidia.com/node.os-version: rhel8.0
+ nvidia.com/precompiled: "false"
+ openshift.driver-toolkit: "true"
+ spec:
+ affinity:
+ podAntiAffinity:
+ requiredDuringSchedulingIgnoredDuringExecution:
+ - labelSelector:
+ matchExpressions:
+ - key: app.kubernetes.io/component
+ operator: In
+ values:
+ - nvidia-driver
+ - nvidia-vgpu-manager
+ topologyKey: kubernetes.io/hostname
+ containers:
+ - args:
+ - nv-ctr-run-with-dtk
+ command:
+ - ocp_dtk_entrypoint
+ env:
+ - name: NVIDIA_VISIBLE_DEVICES
+ value: void
+ - name: NODE_NAME
+ valueFrom:
+ fieldRef:
+ fieldPath: spec.nodeName
+ - name: NODE_IP
+ valueFrom:
+ fieldRef:
+ fieldPath: status.hostIP
+ - name: OPENSHIFT_VERSION
+ value: "4.13"
+ image: nvcr.io/nvidia/vgpu-manager:525.85.03-rhel8.0
+ imagePullPolicy: IfNotPresent
+ name: nvidia-driver-ctr
+ securityContext:
+ privileged: true
+ seLinuxOptions:
+ level: s0
+ volumeMounts:
+ - mountPath: /run/nvidia
+ mountPropagation: Bidirectional
+ name: run-nvidia
+ - mountPath: /run/nvidia-topologyd
+ name: run-nvidia-topologyd
+ - mountPath: /var/log
+ name: var-log
+ - mountPath: /dev/log
+ name: dev-log
+ - mountPath: /host-etc/os-release
+ name: host-os-release
+ readOnly: true
+ - mountPath: /sys/fs/cgroup
+ name: cgroup
+ - mountPath: /dev/vfio
+ name: vfio
+ - mountPath: /run/mellanox/drivers/usr/src
+ mountPropagation: HostToContainer
+ name: mlnx-ofed-usr-src
+ - mountPath: /run/mellanox/drivers
+ mountPropagation: HostToContainer
+ name: run-mellanox-drivers
+ - mountPath: /sys/module/firmware_class/parameters/path
+ name: firmware-search-path
+ - mountPath: /sys/devices/system/memory/auto_online_blocks
+ name: sysfs-memory-online
+ - mountPath: /lib/firmware
+ name: nv-firmware
+ - mountPath: /mnt/shared-nvidia-driver-toolkit
+ name: shared-nvidia-driver-toolkit
+ - args:
+ - until [ -f /mnt/shared-nvidia-driver-toolkit/dir_prepared ]; do echo Waiting
+ for nvidia-driver-ctr container to prepare the shared directory ...; sleep
+ 10; done; exec /mnt/shared-nvidia-driver-toolkit/ocp_dtk_entrypoint dtk-build-driver
+ command:
+ - bash
+ - -xc
+ env:
+ - name: RHCOS_VERSION
+ value: 413.92.202304252344-0
+ - name: NVIDIA_VISIBLE_DEVICES
+ value: void
+ image: quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:7fecaebc1d51b28bc3548171907e4d91823a031d7a6a694ab686999be2b4d867
+ imagePullPolicy: IfNotPresent
+ name: openshift-driver-toolkit-ctr
+ securityContext:
+ privileged: true
+ seLinuxOptions:
+ level: s0
+ volumeMounts:
+ - mountPath: /mnt/shared-nvidia-driver-toolkit
+ name: shared-nvidia-driver-toolkit
+ - mountPath: /var/log
+ name: var-log
+ - mountPath: /run/mellanox/drivers/usr/src
+ mountPropagation: HostToContainer
+ name: mlnx-ofed-usr-src
+ - mountPath: /host-etc/os-release
+ name: host-os-release
+ readOnly: true
+ - mountPath: /sys/module/firmware_class/parameters/path
+ name: firmware-search-path
+ - mountPath: /lib/firmware
+ name: nv-firmware
+ hostPID: true
+ initContainers:
+ - args:
+ - uninstall_driver
+ command:
+ - driver-manager
+ env:
+ - name: NODE_NAME
+ valueFrom:
+ fieldRef:
+ fieldPath: spec.nodeName
+ - name: NVIDIA_VISIBLE_DEVICES
+ value: void
+ - name: ENABLE_GPU_POD_EVICTION
+ value: "true"
+ - name: ENABLE_AUTO_DRAIN
+ value: "false"
+ - name: DRAIN_USE_FORCE
+ value: "false"
+ - name: DRAIN_POD_SELECTOR_LABEL
+ value: ""
+ - name: DRAIN_TIMEOUT_SECONDS
+ value: 0s
+ - name: DRAIN_DELETE_EMPTYDIR_DATA
+ value: "false"
+ - name: OPERATOR_NAMESPACE
+ valueFrom:
+ fieldRef:
+ fieldPath: metadata.namespace
+ image: nvcr.io/nvidia/cloud-native/k8s-driver-manager:devel
+ imagePullPolicy: IfNotPresent
+ name: k8s-driver-manager
+ securityContext:
+ privileged: true
+ volumeMounts:
+ - mountPath: /run/nvidia
+ mountPropagation: Bidirectional
+ name: run-nvidia
+ - mountPath: /host
+ mountPropagation: HostToContainer
+ name: host-root
+ readOnly: true
+ - mountPath: /sys
+ name: host-sys
+ - mountPath: /run/mellanox/drivers
+ mountPropagation: HostToContainer
+ name: run-mellanox-drivers
+ nodeSelector:
+ feature.node.kubernetes.io/system-os_release.OSTREE_VERSION: 413.92.202304252344-0
+ nvidia.com/gpu.deploy.vgpu-manager: "true"
+ priorityClassName: system-node-critical
+ serviceAccountName: nvidia-vgpu-manager-openshift
+ tolerations:
+ - effect: NoSchedule
+ key: nvidia.com/gpu
+ operator: Exists
+ volumes:
+ - hostPath:
+ path: /run/nvidia
+ type: DirectoryOrCreate
+ name: run-nvidia
+ - hostPath:
+ path: /var/log
+ name: var-log
+ - hostPath:
+ path: /dev/log
+ name: dev-log
+ - hostPath:
+ path: /etc/os-release
+ name: host-os-release
+ - hostPath:
+ path: /sys/fs/cgroup
+ name: cgroup
+ - hostPath:
+ path: /dev/vfio
+ name: vfio
+ - hostPath:
+ path: /run/nvidia-topologyd
+ type: DirectoryOrCreate
+ name: run-nvidia-topologyd
+ - hostPath:
+ path: /run/mellanox/drivers/usr/src
+ type: DirectoryOrCreate
+ name: mlnx-ofed-usr-src
+ - hostPath:
+ path: /run/mellanox/drivers
+ type: DirectoryOrCreate
+ name: run-mellanox-drivers
+ - hostPath:
+ path: /run/nvidia/validations
+ type: DirectoryOrCreate
+ name: run-nvidia-validations
+ - hostPath:
+ path: /
+ name: host-root
+ - hostPath:
+ path: /sys
+ type: Directory
+ name: host-sys
+ - hostPath:
+ path: /sys/module/firmware_class/parameters/path
+ name: firmware-search-path
+ - hostPath:
+ path: /sys/devices/system/memory/auto_online_blocks
+ name: sysfs-memory-online
+ - hostPath:
+ path: /run/nvidia/driver/lib/firmware
+ type: DirectoryOrCreate
+ name: nv-firmware
+ - emptyDir: {}
+ name: shared-nvidia-driver-toolkit
+ updateStrategy:
+ type: OnDelete
+---
diff --git a/internal/state/testdata/golden/driver-vgpu-host-manager.yaml b/internal/state/testdata/golden/driver-vgpu-host-manager.yaml
index 10498a256..c2145ee41 100644
--- a/internal/state/testdata/golden/driver-vgpu-host-manager.yaml
+++ b/internal/state/testdata/golden/driver-vgpu-host-manager.yaml
@@ -18,23 +18,6 @@ rules:
- securitycontextconstraints
verbs:
- use
-- apiGroups:
- - ""
- resources:
- - pods
- - pods/eviction
- - nodes
- verbs:
- - '*'
-- apiGroups:
- - apps
- resources:
- - deployments
- - daemonsets
- - replicasets
- - statefulsets
- verbs:
- - '*'
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
@@ -73,12 +56,9 @@ rules:
- apiGroups:
- apps
resources:
- - deployments
- daemonsets
- - replicasets
- - statefulsets
verbs:
- - '*'
+ - get
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
@@ -152,6 +132,14 @@ spec:
env:
- name: NVIDIA_VISIBLE_DEVICES
value: void
+ - name: NODE_NAME
+ valueFrom:
+ fieldRef:
+ fieldPath: spec.nodeName
+ - name: NODE_IP
+ valueFrom:
+ fieldRef:
+ fieldPath: status.hostIP
image: nvcr.io/nvidia/vgpu-manager:525.85.03-ubuntu22.04
imagePullPolicy: IfNotPresent
name: nvidia-driver-ctr
diff --git a/internal/state/testdata/golden/driver-vgpu-licensing.yaml b/internal/state/testdata/golden/driver-vgpu-licensing.yaml
index 90d29b7be..96e505895 100644
--- a/internal/state/testdata/golden/driver-vgpu-licensing.yaml
+++ b/internal/state/testdata/golden/driver-vgpu-licensing.yaml
@@ -18,23 +18,6 @@ rules:
- securitycontextconstraints
verbs:
- use
-- apiGroups:
- - ""
- resources:
- - pods
- - pods/eviction
- - nodes
- verbs:
- - '*'
-- apiGroups:
- - apps
- resources:
- - deployments
- - daemonsets
- - replicasets
- - statefulsets
- verbs:
- - '*'
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
@@ -73,12 +56,9 @@ rules:
- apiGroups:
- apps
resources:
- - deployments
- daemonsets
- - replicasets
- - statefulsets
verbs:
- - '*'
+ - get
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
@@ -152,6 +132,14 @@ spec:
env:
- name: NVIDIA_VISIBLE_DEVICES
value: void
+ - name: NODE_NAME
+ valueFrom:
+ fieldRef:
+ fieldPath: spec.nodeName
+ - name: NODE_IP
+ valueFrom:
+ fieldRef:
+ fieldPath: status.hostIP
image: nvcr.io/nvidia/driver:525.85.03-ubuntu22.04
imagePullPolicy: IfNotPresent
lifecycle:
diff --git a/internal/state/types.go b/internal/state/types.go
index d4af979f5..000eb10f2 100644
--- a/internal/state/types.go
+++ b/internal/state/types.go
@@ -20,7 +20,7 @@ import (
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/source"
- nvidiav1alpha1 "github.com/NVIDIA/gpu-operator/api/v1alpha1"
+ nvidiav1alpha1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1alpha1"
)
type ctrlManager ctrl.Manager
diff --git a/internal/utils/utils.go b/internal/utils/utils.go
index ae51d1468..87cc70bce 100644
--- a/internal/utils/utils.go
+++ b/internal/utils/utils.go
@@ -24,7 +24,6 @@ import (
"strings"
"github.com/davecgh/go-spew/spew"
- "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/util/rand"
)
@@ -64,7 +63,7 @@ func BoolPtr(v bool) *bool {
}
// GetObjectHash invokes Sum32 Hash function to return hash value of an unstructured Object
-func GetObjectHash(obj *unstructured.Unstructured) string {
+func GetObjectHash(obj interface{}) string {
hasher := fnv.New32a()
printer := spew.ConfigState{
Indent: " ",
diff --git a/internal/validator/validator.go b/internal/validator/validator.go
index ca3a19ded..e43c8127c 100644
--- a/internal/validator/validator.go
+++ b/internal/validator/validator.go
@@ -24,7 +24,7 @@ import (
"k8s.io/apimachinery/pkg/labels"
"sigs.k8s.io/controller-runtime/pkg/client"
- nvidiav1alpha1 "github.com/NVIDIA/gpu-operator/api/v1alpha1"
+ nvidiav1alpha1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1alpha1"
)
// Validator provides interface to validate NVIDIADriver fields
@@ -51,21 +51,21 @@ func (nsv *nodeSelectorValidator) Validate(ctx context.Context, cr *nvidiav1alph
return err
}
- names := []string{}
- for _, driver := range drivers.Items {
- driver := driver
- nodeList, err := nsv.getNVIDIADriverSelectedNodes(ctx, &driver)
+ names := map[string]struct{}{}
+ for di := range drivers.Items {
+ nodeList, err := nsv.getNVIDIADriverSelectedNodes(ctx, &drivers.Items[di])
if err != nil {
return err
}
- for _, n := range nodeList.Items {
- names = append(names, n.Name)
+ for ni := range nodeList.Items {
+ if _, ok := names[nodeList.Items[ni].Name]; ok {
+ return fmt.Errorf("conflicting NVIDIADriver NodeSelectors found for resource: %s, nodeSelector: %q", cr.Name, cr.Spec.NodeSelector)
+ }
+
+ names[nodeList.Items[ni].Name] = struct{}{}
}
- }
- if containsDuplicates(names) {
- return fmt.Errorf("conflicting NVIDIADriver NodeSelectors found for resource: %s, nodeSelector: %q", cr.Name, cr.Spec.NodeSelector)
}
return nil
@@ -88,14 +88,3 @@ func (nsv *nodeSelectorValidator) getNVIDIADriverSelectedNodes(ctx context.Conte
return nodeList, err
}
-
-func containsDuplicates(arr []string) bool {
- visited := make(map[string]bool, 0)
- for _, e := range arr {
- if _, exists := visited[e]; exists {
- return true
- }
- visited[e] = true
- }
- return false
-}
diff --git a/internal/validator/validator_test.go b/internal/validator/validator_test.go
index 5d926bf3c..8171f6cf5 100644
--- a/internal/validator/validator_test.go
+++ b/internal/validator/validator_test.go
@@ -28,7 +28,7 @@ import (
"k8s.io/client-go/kubernetes/scheme"
"sigs.k8s.io/controller-runtime/pkg/client/fake"
- nvidiav1alpha1 "github.com/NVIDIA/gpu-operator/api/v1alpha1"
+ nvidiav1alpha1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1alpha1"
)
const (
@@ -51,7 +51,7 @@ func makeTestDriver(opts ...driverOptions) *nvidiav1alpha1.NVIDIADriver {
c.Kind = reflect.TypeOf(nvidiav1alpha1.NVIDIADriver{}).Name()
- gvk := nvidiav1alpha1.GroupVersion.WithKind(c.Kind)
+ gvk := nvidiav1alpha1.SchemeGroupVersion.WithKind(c.Kind)
c.APIVersion = gvk.GroupVersion().String()
@@ -128,17 +128,3 @@ func TestCheckNodeSelector(t *testing.T) {
}
}
}
-
-func TestContainsDuplicates(t *testing.T) {
- tests := []struct {
- arr []string
- shouldReturnTrue bool
- }{
- {arr: []string{"foo", "bar"}, shouldReturnTrue: false},
- {arr: []string{"foo", "foo"}, shouldReturnTrue: true},
- }
-
- for _, tc := range tests {
- assert.Equal(t, tc.shouldReturnTrue, containsDuplicates(tc.arr))
- }
-}
diff --git a/manifests/state-driver/0200_role.yaml b/manifests/state-driver/0200_role.yaml
index ec2d6434f..e3a8a3287 100644
--- a/manifests/state-driver/0200_role.yaml
+++ b/manifests/state-driver/0200_role.yaml
@@ -12,20 +12,3 @@ rules:
- use
resourceNames:
- privileged
-- apiGroups:
- - ""
- resources:
- - pods
- - pods/eviction
- - nodes
- verbs:
- - '*'
-- apiGroups:
- - apps
- resources:
- - deployments
- - daemonsets
- - replicasets
- - statefulsets
- verbs:
- - '*'
diff --git a/manifests/state-driver/0210_clusterrole.yaml b/manifests/state-driver/0210_clusterrole.yaml
index 47c56af59..c0db68d62 100644
--- a/manifests/state-driver/0210_clusterrole.yaml
+++ b/manifests/state-driver/0210_clusterrole.yaml
@@ -35,9 +35,6 @@ rules:
- apiGroups:
- apps
resources:
- - deployments
- daemonsets
- - replicasets
- - statefulsets
verbs:
- - '*'
+ - get
diff --git a/manifests/state-driver/0500_daemonset.yaml b/manifests/state-driver/0500_daemonset.yaml
index d553681d9..26dfecf15 100644
--- a/manifests/state-driver/0500_daemonset.yaml
+++ b/manifests/state-driver/0500_daemonset.yaml
@@ -205,6 +205,14 @@ spec:
# always use runc for driver containers
- name: NVIDIA_VISIBLE_DEVICES
value: void
+ - name: NODE_NAME
+ valueFrom:
+ fieldRef:
+ fieldPath: spec.nodeName
+ - name: NODE_IP
+ valueFrom:
+ fieldRef:
+ fieldPath: status.hostIP
{{- if deref .Driver.Spec.UseOpenKernelModules }}
- name: OPEN_KERNEL_MODULES_ENABLED
value: "true"
@@ -427,8 +435,12 @@ spec:
{{- range .AdditionalConfigs.VolumeMounts }}
- name: {{ .Name }}
mountPath: {{ .MountPath }}
+ {{- if .SubPath }}
subPath: {{ .SubPath }}
+ {{- end }}
+ {{- if .ReadOnly }}
readOnly: {{ .ReadOnly }}
+ {{- end}}
{{- end }}
{{- end }}
startupProbe:
@@ -490,10 +502,14 @@ spec:
{{- end}}
{{- if and .AdditionalConfigs .AdditionalConfigs.VolumeMounts }}
{{- range .AdditionalConfigs.VolumeMounts }}
- - name: {{ .Name }}
- mountPath: {{ .MountPath }}
- subPath: {{ .SubPath }}
- readOnly: {{ .ReadOnly }}
+ - name: {{ .Name }}
+ mountPath: {{ .MountPath }}
+ {{- if .SubPath }}
+ subPath: {{ .SubPath }}
+ {{- end }}
+ {{- if .ReadOnly }}
+ readOnly: {{ .ReadOnly }}
+ {{- end }}
{{- end }}
{{- end }}
startupProbe:
@@ -564,6 +580,10 @@ spec:
- name: host-os-release
mountPath: /host-etc/os-release
readOnly: true
+ - name: firmware-search-path
+ mountPath: /sys/module/firmware_class/parameters/path
+ - name: nv-firmware
+ mountPath: /lib/firmware
{{- end }}
volumes:
- name: run-nvidia
@@ -609,7 +629,7 @@ spec:
type: DirectoryOrCreate
- name: host-root
hostPath:
- path: "/"
+ path: {{ .HostRoot | default "/" }}
- name: host-sys
hostPath:
path: /sys
diff --git a/multi-arch.mk b/multi-arch.mk
index 4f199ed5d..d0f4f06ac 100644
--- a/multi-arch.mk
+++ b/multi-arch.mk
@@ -13,7 +13,8 @@
# limitations under the License.
PUSH_ON_BUILD ?= false
-DOCKER_BUILD_OPTIONS = --output=type=image,push=$(PUSH_ON_BUILD)
+ATTACH_ATTESTATIONS ?= false
+DOCKER_BUILD_OPTIONS = --output=type=image,push=$(PUSH_ON_BUILD) --provenance=$(ATTACH_ATTESTATIONS) --sbom=$(ATTACH_ATTESTATIONS)
DOCKER_BUILD_PLATFORM_OPTIONS = --platform=linux/amd64,linux/arm64
REGCTL ?= regctl
@@ -21,8 +22,3 @@ $(PUSH_TARGETS): push-%:
$(REGCTL) \
image copy \
$(IMAGE) $(OUT_IMAGE)
-
-push-short:
- $(REGCTL) \
- image copy \
- $(IMAGE) $(OUT_IMAGE_NAME):$(OUT_IMAGE_VERSION)
\ No newline at end of file
diff --git a/native-only.mk b/native-only.mk
index d541b8186..19dd4bd04 100644
--- a/native-only.mk
+++ b/native-only.mk
@@ -18,8 +18,3 @@ $(PUSH_TARGETS): OUT_IMAGE ?= $(IMAGE_NAME):$(IMAGE_TAG)
$(PUSH_TARGETS): push-%:
$(DOCKER) tag "$(IMAGE_NAME):$(VERSION)-$(DEFAULT_PUSH_TARGET)" "$(OUT_IMAGE)"
$(DOCKER) push "$(OUT_IMAGE)"
-
-push-short: OUT_IMAGE ?= $(IMAGE_NAME):$(VERSION)
-push-short:
- $(DOCKER) tag "$(IMAGE_NAME):$(VERSION)-$(DEFAULT_PUSH_TARGET)" "$(OUT_IMAGE)"
- $(DOCKER) push "$(OUT_IMAGE)"
diff --git a/tests/e2e/framework/framework.go b/tests/e2e/framework/framework.go
index 2e19eea32..969e674bb 100644
--- a/tests/e2e/framework/framework.go
+++ b/tests/e2e/framework/framework.go
@@ -190,7 +190,7 @@ func (f *Framework) AfterEach(ctx context.Context) {
for namespaceKey, namespaceErr := range nsDeletionErrors {
messages = append(messages, fmt.Sprintf("Couldn't delete ns: %q: %s (%#v)", namespaceKey, namespaceErr, namespaceErr))
}
- e2elog.Failf(strings.Join(messages, ","))
+ ginkgo.Fail(strings.Join(messages, ","))
}
}()
diff --git a/tests/holodeck.yaml b/tests/holodeck.yaml
new file mode 100644
index 000000000..47efd2dd7
--- /dev/null
+++ b/tests/holodeck.yaml
@@ -0,0 +1,34 @@
+apiVersion: holodeck.nvidia.com/v1alpha1
+kind: Environment
+metadata:
+ name: HOLODECK_NAME
+ description: "end-to-end test infrastructure"
+spec:
+ provider: aws
+ auth:
+ keyName: cnt-ci
+ privateKey: HOLODECK_PRIVATE_KEY
+ instance:
+ type: g4dn.xlarge
+ region: us-west-1
+ ingressIpRanges:
+ - 18.190.12.32/32
+ - 3.143.46.93/32
+ - 52.15.119.136/32
+ - 35.155.108.162/32
+ - 35.162.190.51/32
+ - 54.201.61.24/32
+ - 52.24.205.48/32
+ - 44.235.4.62/32
+ - 44.230.241.223/32
+ image:
+ architecture: amd64
+ imageId: ami-0ce2cb35386fc22e9
+ containerRuntime:
+ install: true
+ name: containerd
+ kubernetes:
+ install: true
+ installer: kubeadm
+ version: v1.31.0
+ crictlVersion: v1.31.1
diff --git a/tests/scripts/.definitions.sh b/tests/scripts/.definitions.sh
index 5655c8a3d..5843232dd 100644
--- a/tests/scripts/.definitions.sh
+++ b/tests/scripts/.definitions.sh
@@ -17,7 +17,7 @@ TERRAFORM="terraform -chdir=${TERRAFORM_DIR}"
: ${LOG_DIR:="/tmp/logs"}
: ${PROJECT:="$(basename "${PROJECT_DIR}")"}
: ${TEST_NAMESPACE:="test-operator"}
-: ${TARGET_DRIVER_VERSION:="535.104.05"}
+: ${TARGET_DRIVER_VERSION:="565.57.01"}
: ${OPERATOR_IMAGE:="nvcr.io/nvidia/gpu-operator"}
diff --git a/tests/scripts/.rsync-excludes b/tests/scripts/.rsync-excludes
index 962cde65f..3a945297f 100644
--- a/tests/scripts/.rsync-excludes
+++ b/tests/scripts/.rsync-excludes
@@ -2,3 +2,4 @@ vendor/
.git
aws-kube-ci
cnt-ci
+key.pem
diff --git a/tests/scripts/update-clusterpolicy.sh b/tests/scripts/update-clusterpolicy.sh
index 0b8b14cc9..220cb363d 100755
--- a/tests/scripts/update-clusterpolicy.sh
+++ b/tests/scripts/update-clusterpolicy.sh
@@ -112,6 +112,13 @@ test_enable_dcgm() {
# Verify that standalone nvidia-dcgm and exporter pods are running successfully after update
check_pod_ready "nvidia-dcgm"
check_pod_ready "nvidia-dcgm-exporter"
+
+ # Test that nvidia-dcgm service is created with interalTrafficPolicy set to "local"
+ trafficPolicy=$(kubectl get service nvidia-dcgm -n $TEST_NAMESPACE -o json | jq -r '.spec.internalTrafficPolicy')
+ if [ "$trafficPolicy" != "Local" ]; then
+ echo "service nvidia-dcgm is missing or internal traffic policy is not set to local"
+ exit 1
+ fi
}
test_gpu_sharing() {
diff --git a/tools/go.mod b/tools/go.mod
new file mode 100644
index 000000000..bf6e6ece1
--- /dev/null
+++ b/tools/go.mod
@@ -0,0 +1,68 @@
+module github.com/NVIDIA/gpu-operator/tools
+
+go 1.22.7
+
+toolchain go1.23.2
+
+require (
+ k8s.io/code-generator v0.31.2
+ sigs.k8s.io/controller-tools v0.16.5
+ sigs.k8s.io/kustomize/kustomize/v5 v5.5.0
+)
+
+require (
+ github.com/blang/semver/v4 v4.0.0 // indirect
+ github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
+ github.com/fatih/color v1.18.0 // indirect
+ github.com/fxamacker/cbor/v2 v2.7.0 // indirect
+ github.com/go-errors/errors v1.4.2 // indirect
+ github.com/go-logr/logr v1.4.2 // indirect
+ github.com/go-openapi/jsonpointer v0.19.6 // indirect
+ github.com/go-openapi/jsonreference v0.20.2 // indirect
+ github.com/go-openapi/swag v0.22.4 // indirect
+ github.com/gobuffalo/flect v1.0.3 // indirect
+ github.com/gogo/protobuf v1.3.2 // indirect
+ github.com/golang/protobuf v1.5.4 // indirect
+ github.com/google/gnostic-models v0.6.8 // indirect
+ github.com/google/gofuzz v1.2.0 // indirect
+ github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect
+ github.com/inconshreveable/mousetrap v1.1.0 // indirect
+ github.com/josharian/intern v1.0.0 // indirect
+ github.com/json-iterator/go v1.1.12 // indirect
+ github.com/mailru/easyjson v0.7.7 // indirect
+ github.com/mattn/go-colorable v0.1.13 // indirect
+ github.com/mattn/go-isatty v0.0.20 // indirect
+ github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
+ github.com/modern-go/reflect2 v1.0.2 // indirect
+ github.com/monochromegane/go-gitignore v0.0.0-20200626010858-205db1a8cc00 // indirect
+ github.com/pkg/errors v0.9.1 // indirect
+ github.com/sergi/go-diff v1.2.0 // indirect
+ github.com/spf13/cobra v1.8.1 // indirect
+ github.com/spf13/pflag v1.0.5 // indirect
+ github.com/x448/float16 v0.8.4 // indirect
+ github.com/xlab/treeprint v1.2.0 // indirect
+ golang.org/x/mod v0.21.0 // indirect
+ golang.org/x/net v0.30.0 // indirect
+ golang.org/x/sync v0.8.0 // indirect
+ golang.org/x/sys v0.26.0 // indirect
+ golang.org/x/text v0.19.0 // indirect
+ golang.org/x/tools v0.26.0 // indirect
+ google.golang.org/protobuf v1.34.2 // indirect
+ gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect
+ gopkg.in/inf.v0 v0.9.1 // indirect
+ gopkg.in/yaml.v2 v2.4.0 // indirect
+ gopkg.in/yaml.v3 v3.0.1 // indirect
+ k8s.io/api v0.31.2 // indirect
+ k8s.io/apiextensions-apiserver v0.31.2 // indirect
+ k8s.io/apimachinery v0.31.2 // indirect
+ k8s.io/gengo/v2 v2.0.0-20240228010128-51d4e06bde70 // indirect
+ k8s.io/klog/v2 v2.130.1 // indirect
+ k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 // indirect
+ k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 // indirect
+ sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect
+ sigs.k8s.io/kustomize/api v0.18.0 // indirect
+ sigs.k8s.io/kustomize/cmd/config v0.15.0 // indirect
+ sigs.k8s.io/kustomize/kyaml v0.18.1 // indirect
+ sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect
+ sigs.k8s.io/yaml v1.4.0 // indirect
+)
diff --git a/tools/go.sum b/tools/go.sum
new file mode 100644
index 000000000..6e947638e
--- /dev/null
+++ b/tools/go.sum
@@ -0,0 +1,203 @@
+github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM=
+github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ=
+github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
+github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
+github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
+github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/fatih/color v1.18.0 h1:S8gINlzdQ840/4pfAwic/ZE0djQEH3wM94VfqLTZcOM=
+github.com/fatih/color v1.18.0/go.mod h1:4FelSpRwEGDpQ12mAdzqdOukCy4u8WUtOY6lkT/6HfU=
+github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA=
+github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM=
+github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E=
+github.com/fxamacker/cbor/v2 v2.7.0/go.mod h1:pxXPTn3joSm21Gbwsv0w9OSA2y1HFR9qXEeXQVeNoDQ=
+github.com/go-errors/errors v1.4.2 h1:J6MZopCL4uSllY1OfXM374weqZFFItUbrImctkmUxIA=
+github.com/go-errors/errors v1.4.2/go.mod h1:sIVyrIiJhuEF+Pj9Ebtd6P/rEYROXFi3BopGUQ5a5Og=
+github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY=
+github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
+github.com/go-openapi/jsonpointer v0.19.6 h1:eCs3fxoIi3Wh6vtgmLTOjdhSpiqphQ+DaPn38N2ZdrE=
+github.com/go-openapi/jsonpointer v0.19.6/go.mod h1:osyAmYz/mB/C3I+WsTTSgw1ONzaLJoLCyoi6/zppojs=
+github.com/go-openapi/jsonreference v0.20.2 h1:3sVjiK66+uXK/6oQ8xgcRKcFgQ5KXa2KvnJRumpMGbE=
+github.com/go-openapi/jsonreference v0.20.2/go.mod h1:Bl1zwGIM8/wsvqjsOQLJ/SH+En5Ap4rVB5KVcIDZG2k=
+github.com/go-openapi/swag v0.22.3/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14=
+github.com/go-openapi/swag v0.22.4 h1:QLMzNJnMGPRNDCbySlcj1x01tzU8/9LTTL9hZZZogBU=
+github.com/go-openapi/swag v0.22.4/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14=
+github.com/gobuffalo/flect v1.0.3 h1:xeWBM2nui+qnVvNM4S3foBhCAL2XgPU+a7FdpelbTq4=
+github.com/gobuffalo/flect v1.0.3/go.mod h1:A5msMlrHtLqh9umBSnvabjsMrCcCpAyzglnDvkbYKHs=
+github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
+github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
+github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
+github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
+github.com/google/gnostic-models v0.6.8 h1:yo/ABAfM5IMRsS1VnXjTBvUb61tFIHozhlYvRgGre9I=
+github.com/google/gnostic-models v0.6.8/go.mod h1:5n7qKqH0f5wFt+aWF8CW6pZLLNOfYuF5OpfBSENuI8U=
+github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
+github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
+github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
+github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
+github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0=
+github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
+github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 h1:El6M4kTTCOh6aBiKaUGG7oYTSPP8MxqL4YI3kZKwcP4=
+github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510/go.mod h1:pupxD2MaaD3pAXIBCelhxNneeOaAeabZDe5s4K6zSpQ=
+github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
+github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
+github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
+github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
+github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
+github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
+github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
+github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
+github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
+github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
+github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
+github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
+github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
+github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
+github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
+github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
+github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
+github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
+github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
+github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
+github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
+github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
+github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
+github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
+github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
+github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
+github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
+github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
+github.com/monochromegane/go-gitignore v0.0.0-20200626010858-205db1a8cc00 h1:n6/2gBQ3RWajuToeY6ZtZTIKv2v7ThUy5KKusIT0yc0=
+github.com/monochromegane/go-gitignore v0.0.0-20200626010858-205db1a8cc00/go.mod h1:Pm3mSP3c5uWn86xMLZ5Sa7JB9GsEZySvHYXCTK4E9q4=
+github.com/nxadm/tail v1.4.8 h1:nPr65rt6Y5JFSKQO7qToXr7pePgD6Gwiw05lkbyAQTE=
+github.com/nxadm/tail v1.4.8/go.mod h1:+ncqLTQzXmGhMZNUePPaPqPvBxHAIsmXswZKocGu+AU=
+github.com/onsi/ginkgo v1.16.5 h1:8xi0RTUf59SOSfEtZMvwTvXYMzG4gV23XVHOZiXNtnE=
+github.com/onsi/ginkgo v1.16.5/go.mod h1:+E8gABHa3K6zRBolWtd+ROzc/U5bkGt0FwiG042wbpU=
+github.com/onsi/gomega v1.34.2 h1:pNCwDkzrsv7MS9kpaQvVb1aVLahQXyJ/Tv5oAZMI3i8=
+github.com/onsi/gomega v1.34.2/go.mod h1:v1xfxRgk0KIsG+QOdm7p8UosrOzPYRo60fd3B/1Dukc=
+github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
+github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
+github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=
+github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8=
+github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4=
+github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
+github.com/sergi/go-diff v1.2.0 h1:XU+rvMAioB0UC3q1MFrIQy4Vo5/4VsRDQQXHsEya6xQ=
+github.com/sergi/go-diff v1.2.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM=
+github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM=
+github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y=
+github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
+github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
+github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
+github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
+github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
+github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY=
+github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
+github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
+github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
+github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
+github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
+github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
+github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
+github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
+github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
+github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM=
+github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg=
+github.com/xlab/treeprint v1.2.0 h1:HzHnuAF1plUN2zGlAFHbSQP2qJ0ZAD3XF5XD7OesXRQ=
+github.com/xlab/treeprint v1.2.0/go.mod h1:gj5Gd3gPdKtR1ikdDK6fnFLdmIS0X30kTTuNd/WEJu0=
+github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
+github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
+go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
+go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
+golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
+golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
+golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
+golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
+golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
+golang.org/x/mod v0.21.0 h1:vvrHzRwRfVKSiLrG+d4FMl/Qi4ukBCE6kZlTUkDYRT0=
+golang.org/x/mod v0.21.0/go.mod h1:6SkKJ3Xj0I0BrPOZoBy3bdMptDDU9oJrpohJ3eWZ1fY=
+golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
+golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
+golang.org/x/net v0.30.0 h1:AcW1SDZMkb8IpzCdQUaIq2sP4sZ4zw+55h6ynffypl4=
+golang.org/x/net v0.30.0/go.mod h1:2wGyMJ5iFasEhkwi13ChkO/t1ECNC4X4eBKkVFyYFlU=
+golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ=
+golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
+golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo=
+golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
+golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/text v0.19.0 h1:kTxAhCbGbxhK0IwgSKiMO5awPoDQ0RpfiVYBfK860YM=
+golang.org/x/text v0.19.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY=
+golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
+golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
+golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
+golang.org/x/tools v0.26.0 h1:v/60pFQmzmT9ExmjDv2gGIfi3OqfKoEP6I5+umXlbnQ=
+golang.org/x/tools v0.26.0/go.mod h1:TPVVj70c7JJ3WCazhD8OdXcZg/og+b9+tH/KxylGwH0=
+golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg=
+google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
+gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
+gopkg.in/evanphx/json-patch.v4 v4.12.0 h1:n6jtcsulIzXPJaxegRbvFNNrZDjbij7ny3gmSPG+6V4=
+gopkg.in/evanphx/json-patch.v4 v4.12.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M=
+gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc=
+gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw=
+gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ=
+gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw=
+gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
+gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
+gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
+gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
+gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
+gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
+gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+k8s.io/api v0.31.2 h1:3wLBbL5Uom/8Zy98GRPXpJ254nEFpl+hwndmk9RwmL0=
+k8s.io/api v0.31.2/go.mod h1:bWmGvrGPssSK1ljmLzd3pwCQ9MgoTsRCuK35u6SygUk=
+k8s.io/apiextensions-apiserver v0.31.2 h1:W8EwUb8+WXBLu56ser5IudT2cOho0gAKeTOnywBLxd0=
+k8s.io/apiextensions-apiserver v0.31.2/go.mod h1:i+Geh+nGCJEGiCGR3MlBDkS7koHIIKWVfWeRFiOsUcM=
+k8s.io/apimachinery v0.31.2 h1:i4vUt2hPK56W6mlT7Ry+AO8eEsyxMD1U44NR22CLTYw=
+k8s.io/apimachinery v0.31.2/go.mod h1:rsPdaZJfTfLsNJSQzNHQvYoTmxhoOEofxtOsF3rtsMo=
+k8s.io/code-generator v0.31.2 h1:xLWxG0HEpMSHfcM//3u3Ro2Hmc6AyyLINQS//Z2GEOI=
+k8s.io/code-generator v0.31.2/go.mod h1:eEQHXgBU/m7LDaToDoiz3t97dUUVyOblQdwOr8rivqc=
+k8s.io/gengo/v2 v2.0.0-20240228010128-51d4e06bde70 h1:NGrVE502P0s0/1hudf8zjgwki1X/TByhmAoILTarmzo=
+k8s.io/gengo/v2 v2.0.0-20240228010128-51d4e06bde70/go.mod h1:VH3AT8AaQOqiGjMF9p0/IM1Dj+82ZwjfxUP1IxaHE+8=
+k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk=
+k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE=
+k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 h1:BZqlfIlq5YbRMFko6/PM7FjZpUb45WallggurYhKGag=
+k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340/go.mod h1:yD4MZYeKMBwQKVht279WycxKyM84kkAx2DPrTXaeb98=
+k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 h1:pUdcCO1Lk/tbT5ztQWOBi5HBgbBP1J8+AsQnQCKsi8A=
+k8s.io/utils v0.0.0-20240711033017-18e509b52bc8/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
+sigs.k8s.io/controller-tools v0.16.5 h1:5k9FNRqziBPwqr17AMEPPV/En39ZBplLAdOwwQHruP4=
+sigs.k8s.io/controller-tools v0.16.5/go.mod h1:8vztuRVzs8IuuJqKqbXCSlXcw+lkAv/M2sTpg55qjMY=
+sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo=
+sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0=
+sigs.k8s.io/kustomize/api v0.18.0 h1:hTzp67k+3NEVInwz5BHyzc9rGxIauoXferXyjv5lWPo=
+sigs.k8s.io/kustomize/api v0.18.0/go.mod h1:f8isXnX+8b+SGLHQ6yO4JG1rdkZlvhaCf/uZbLVMb0U=
+sigs.k8s.io/kustomize/cmd/config v0.15.0 h1:WkdY8V2+8J+W00YbImXa2ke9oegfrHH79e+kywW7EdU=
+sigs.k8s.io/kustomize/cmd/config v0.15.0/go.mod h1:Jq57b0nPaoYUlOqg//0JtAh6iibboqMcfbtCYoWPM00=
+sigs.k8s.io/kustomize/kustomize/v5 v5.5.0 h1:o1mtt6vpxsxDYaZKrw3BnEtc+pAjLz7UffnIvHNbvW0=
+sigs.k8s.io/kustomize/kustomize/v5 v5.5.0/go.mod h1:AeFCmgCrXzmvjWWaeZCyBp6XzG1Y0w1svYus8GhJEOE=
+sigs.k8s.io/kustomize/kyaml v0.18.1 h1:WvBo56Wzw3fjS+7vBjN6TeivvpbW9GmRaWZ9CIVmt4E=
+sigs.k8s.io/kustomize/kyaml v0.18.1/go.mod h1:C3L2BFVU1jgcddNBE1TxuVLgS46TjObMwW5FT9FcjYo=
+sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4=
+sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08=
+sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E=
+sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY=
diff --git a/tools/tools.go b/tools/tools.go
new file mode 100644
index 000000000..f16e2e217
--- /dev/null
+++ b/tools/tools.go
@@ -0,0 +1,26 @@
+//go:build tools
+// +build tools
+
+/**
+# Copyright (c) NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package tools
+
+import (
+ _ "k8s.io/code-generator/cmd/client-gen"
+ _ "sigs.k8s.io/controller-tools/cmd/controller-gen"
+ _ "sigs.k8s.io/kustomize/kustomize/v5"
+)
diff --git a/validator/Dockerfile b/validator/Dockerfile
index f35c96c82..877928861 100644
--- a/validator/Dockerfile
+++ b/validator/Dockerfile
@@ -12,16 +12,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-ARG CUDA_IMAGE=nvcr.io/nvidia/cuda
-ARG CUDA_VERSION=undefined
ARG CUDA_SAMPLE_IMAGE=undefined
-
-ARG BASE_DIST=ubi8
ARG GOLANG_VERSION=x.x.x
-FROM ${CUDA_IMAGE}:${CUDA_VERSION}-base-${BASE_DIST} as build
+FROM nvcr.io/nvidia/cuda:12.6.2-base-ubi9 as build
-RUN yum install -y wget make git gcc
+RUN dnf install -y wget make git gcc
ARG GOLANG_VERSION=0.0.0
RUN set -eux; \
@@ -30,7 +26,7 @@ RUN set -eux; \
case "${arch##*-}" in \
x86_64 | amd64) ARCH='amd64' ;; \
ppc64el | ppc64le) ARCH='ppc64le' ;; \
- aarch64) ARCH='arm64' ;; \
+ aarch64 | arm64) ARCH='arm64' ;; \
*) echo "unsupported architecture" ; exit 1 ;; \
esac; \
wget -nv -O - https://storage.googleapis.com/golang/go${GOLANG_VERSION}.linux-${ARCH}.tar.gz \
@@ -55,7 +51,7 @@ FROM ${CUDA_SAMPLE_IMAGE} AS sample-builder
RUN mkdir /artifacts
RUN cp /cuda-samples/vectorAdd /artifacts/vectorAdd
-FROM ${CUDA_IMAGE}:${CUDA_VERSION}-base-${BASE_DIST}
+FROM nvcr.io/nvidia/cuda:12.6.2-base-ubi9
# Remove CUDA libs(compat etc) in favor of libs installed by the NVIDIA driver
RUN dnf remove -y cuda-*
@@ -91,7 +87,7 @@ LABEL vsc-ref=${GIT_COMMIT}
# Install / upgrade packages here that are required to resolve CVEs
ARG CVE_UPDATES
RUN if [ -n "${CVE_UPDATES}" ]; then \
- yum update -y ${CVE_UPDATES} && \
+ dnf update -y ${CVE_UPDATES} && \
rm -rf /var/cache/yum/*; \
fi
diff --git a/validator/Makefile b/validator/Makefile
index 381bff12b..4d7474ed4 100644
--- a/validator/Makefile
+++ b/validator/Makefile
@@ -22,7 +22,6 @@ endif
##### Global variables #####
include $(CURDIR)/versions.mk
-CUDA_IMAGE ?= nvcr.io/nvidia/cuda
BUILDER_IMAGE ?= golang:$(GOLANG_VERSION)
ifeq ($(IMAGE_NAME),)
@@ -32,18 +31,16 @@ endif
BUILD_DIR ?= ../
-IMAGE_VERSION := $(VERSION)
-IMAGE_TAG ?= $(IMAGE_VERSION)-$(DIST)
+IMAGE_TAG ?= $(VERSION)
IMAGE = $(IMAGE_NAME):$(IMAGE_TAG)
OUT_IMAGE_NAME ?= $(IMAGE_NAME)
-OUT_IMAGE_VERSION ?= $(VERSION)
-OUT_IMAGE_TAG = $(OUT_IMAGE_VERSION)-$(DIST)
+OUT_IMAGE_TAG = $(VERSION)
OUT_IMAGE = $(OUT_IMAGE_NAME):$(OUT_IMAGE_TAG)
##### Public rules #####
-DISTRIBUTIONS := ubi8 ubuntu20.04
-DEFAULT_PUSH_TARGET := ubi8
+DISTRIBUTIONS := ubi9
+DEFAULT_PUSH_TARGET := ubi9
PUSH_TARGETS := $(patsubst %,push-%, $(DISTRIBUTIONS))
BUILD_TARGETS := $(patsubst %,build-%, $(DISTRIBUTIONS))
@@ -65,17 +62,6 @@ validator:
CGO_ENABLED=0 GOOS=$(GOOS) \
go build -ldflags "-s -w -X $(VERSION_PKG).gitCommit=$(GIT_COMMIT) -X $(VERSION_PKG).version=$(VERSION)" -o validator .
-# For the default push target we also push a short tag equal to the version.
-# We skip this for the development release
-DEVEL_RELEASE_IMAGE_VERSION ?= devel
-ifneq ($(strip $(VERSION)),$(DEVEL_RELEASE_IMAGE_VERSION))
-push-$(DEFAULT_PUSH_TARGET): push-short
-endif
-
-push-%: DIST = $(*)
-push-short: DIST = $(DEFAULT_PUSH_TARGET)
-
-build-%: DIST = $(*)
build-%: DOCKERFILE = $(CURDIR)/Dockerfile
$(DISTRIBUTIONS): %: build-%:
@@ -85,13 +71,10 @@ $(BUILD_TARGETS): build-%:
$(DOCKER_BUILD_OPTIONS) \
$(DOCKER_BUILD_PLATFORM_OPTIONS) \
--tag $(IMAGE) \
- --build-arg BASE_DIST="$(DIST)" \
- --build-arg CUDA_IMAGE="$(CUDA_IMAGE)" \
- --build-arg CUDA_VERSION="$(CUDA_VERSION)" \
--build-arg VERSION="$(VERSION)" \
--build-arg GIT_COMMIT="$(GIT_COMMIT)" \
--build-arg BUILDER_IMAGE="$(BUILDER_IMAGE)" \
- --build-arg CUDA_SAMPLE_IMAGE=nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda$(CUDA_SAMPLES_VERSION)-$(*) \
+ --build-arg CUDA_SAMPLE_IMAGE=nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda$(CUDA_SAMPLES_VERSION)-ubi8 \
--build-arg GOLANG_VERSION="$(GOLANG_VERSION)" \
--build-arg CVE_UPDATES="$(CVE_UPDATES)" \
--file $(DOCKERFILE) $(BUILD_DIR)
@@ -101,4 +84,3 @@ $(BUILD_TARGETS): build-%:
docker-image: OUT_IMAGE ?= $(IMAGE_NAME):$(IMAGE_TAG)
docker-image: $(DEFAULT_PUSH_TARGET)
$(DOCKER) tag $(IMAGE_NAME):$(IMAGE_TAG) $(OUT_IMAGE)
-
diff --git a/validator/driver.go b/validator/driver.go
new file mode 100644
index 000000000..6c25fada3
--- /dev/null
+++ b/validator/driver.go
@@ -0,0 +1,73 @@
+/*
+# Copyright 2024 NVIDIA CORPORATION
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+*/
+
+package main
+
+// driverInfo contains information about an NVIDIA driver installation.
+//
+// isHostDriver indicates whether the driver is installed directly on
+// the host at the host's root filesystem.
+//
+// hostRoot represents the host's root filesystem (typically '/').
+//
+// driverRoot and devRoot represent the absolute paths of the driver install
+// and NVIDIA device nodes on the host.
+//
+// driverRootCtrPath and devRootCtrPath represent the paths of the driver install
+// and NVIDIA device nodes in the management containers that require them, like
+// the Toolkit Container, the Device Plugin, and MIG Manager.
+type driverInfo struct {
+ isHostDriver bool
+ hostRoot string
+ driverRoot string
+ driverRootCtrPath string
+ devRoot string
+ devRootCtrPath string
+}
+
+func getDriverInfo(isHostDriver bool, hostRoot string, driverInstallDir string, driverInstallDirCtrPath string) driverInfo {
+ if isHostDriver {
+ return driverInfo{
+ isHostDriver: true,
+ hostRoot: hostRoot,
+ driverRoot: hostRoot,
+ driverRootCtrPath: "/host",
+ devRoot: hostRoot,
+ devRootCtrPath: "/host",
+ }
+ }
+
+ // For drivers not installed directly on the host, devRoot can either be
+ // hostRoot or driverInstallDir
+ var devRoot, devRootCtrPath string
+ devRoot = root(driverInstallDirCtrPath).getDevRoot()
+ if devRoot == "/" {
+ devRoot = hostRoot
+ devRootCtrPath = "/host"
+ } else {
+ devRoot = driverInstallDir
+ devRootCtrPath = "/driver-root"
+ }
+
+ return driverInfo{
+ isHostDriver: false,
+ hostRoot: hostRoot,
+ driverRoot: driverInstallDir,
+ driverRootCtrPath: "/driver-root",
+ devRoot: devRoot,
+ devRootCtrPath: devRootCtrPath,
+ }
+}
diff --git a/validator/find.go b/validator/find.go
new file mode 100644
index 000000000..0d0d39697
--- /dev/null
+++ b/validator/find.go
@@ -0,0 +1,109 @@
+/*
+# Copyright 2024 NVIDIA CORPORATION
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+*/
+
+package main
+
+import (
+ "fmt"
+ "os"
+ "path/filepath"
+)
+
+type root string
+
+// getDriverLibraryPath returns path to `libnvidia-ml.so.1` in the driver root.
+// The folder for this file is also expected to be the location of other driver files.
+func (r root) getDriverLibraryPath() (string, error) {
+ librarySearchPaths := []string{
+ "/usr/lib64",
+ "/usr/lib/x86_64-linux-gnu",
+ "/usr/lib/aarch64-linux-gnu",
+ "/lib64",
+ "/lib/x86_64-linux-gnu",
+ "/lib/aarch64-linux-gnu",
+ }
+
+ libraryPath, err := r.findFile("libnvidia-ml.so.1", librarySearchPaths...)
+ if err != nil {
+ return "", err
+ }
+
+ return libraryPath, nil
+}
+
+// getNvidiaSMIPath returns path to the `nvidia-smi` executable in the driver root.
+func (r root) getNvidiaSMIPath() (string, error) {
+ binarySearchPaths := []string{
+ "/usr/bin",
+ "/usr/sbin",
+ "/bin",
+ "/sbin",
+ }
+
+ binaryPath, err := r.findFile("nvidia-smi", binarySearchPaths...)
+ if err != nil {
+ return "", err
+ }
+
+ return binaryPath, nil
+}
+
+// isDevRoot checks whether the specified root is a dev root.
+// A dev root is defined as a root containing a /dev folder.
+func (r root) isDevRoot() bool {
+ stat, err := os.Stat(filepath.Join(string(r), "dev"))
+ if err != nil {
+ return false
+ }
+ return stat.IsDir()
+}
+
+// getDevRoot returns the dev root associated with the root.
+// If the root is not a dev root, this defaults to "/".
+func (r root) getDevRoot() string {
+ if r.isDevRoot() {
+ return string(r)
+ }
+ return "/"
+}
+
+// findFile searches the root for a specified file.
+// A number of folders can be specified to search in addition to the root itself.
+// If the file represents a symlink, this is resolved and the final path is returned.
+func (r root) findFile(name string, searchIn ...string) (string, error) {
+
+ for _, d := range append([]string{"/"}, searchIn...) {
+ l := filepath.Join(string(r), d, name)
+ candidate, err := resolveLink(l)
+ if err != nil {
+ continue
+ }
+ return candidate, nil
+ }
+
+ return "", fmt.Errorf("error locating %q", name)
+}
+
+// resolveLink finds the target of a symlink or the file itself in the
+// case of a regular file.
+// This is equivalent to running `readlink -f ${l}`.
+func resolveLink(l string) (string, error) {
+ resolved, err := filepath.EvalSymlinks(l)
+ if err != nil {
+ return "", fmt.Errorf("error resolving link '%s': %w", l, err)
+ }
+ return resolved, nil
+}
diff --git a/validator/main.go b/validator/main.go
index c95894803..df92cd4d4 100644
--- a/validator/main.go
+++ b/validator/main.go
@@ -33,7 +33,6 @@ import (
devchar "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/system/create-dev-char-symlinks"
log "github.com/sirupsen/logrus"
cli "github.com/urfave/cli/v2"
-
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
meta_v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -44,6 +43,8 @@ import (
"k8s.io/client-go/kubernetes/scheme"
"k8s.io/client-go/rest"
+ nvidiav1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1"
+ nvidiav1alpha1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1alpha1"
"github.com/NVIDIA/gpu-operator/internal/info"
)
@@ -55,7 +56,9 @@ type Component interface {
}
// Driver component
-type Driver struct{}
+type Driver struct {
+ ctx context.Context
+}
// NvidiaFs GDS Driver component
type NvidiaFs struct{}
@@ -121,6 +124,9 @@ var (
metricsPort int
defaultGPUWorkloadConfigFlag string
disableDevCharSymlinkCreation bool
+ hostRootFlag string
+ driverInstallDirFlag string
+ driverInstallDirCtrPathFlag string
)
// defaultGPUWorkloadConfig is "vm-passthrough" unless
@@ -136,12 +142,12 @@ const (
defaultMetricsPort = 0
// hostDevCharPath indicates the path in the container where the host '/dev/char' directory is mounted to
hostDevCharPath = "/host-dev-char"
- // driverContainerRoot indicates the path on the host where driver container mounts it's root filesystem
- driverContainerRoot = "/run/nvidia/driver"
+ // defaultDriverInstallDir indicates the default path on the host where the driver container installation is made available
+ defaultDriverInstallDir = "/run/nvidia/driver"
+ // defaultDriverInstallDirCtrPath indicates the default path where the NVIDIA driver install dir is mounted in the container
+ defaultDriverInstallDirCtrPath = "/run/nvidia/driver"
// driverStatusFile indicates status file for containerizeddriver readiness
driverStatusFile = "driver-ready"
- // hostDriverStatusFile indicates status file for host driver readiness
- hostDriverStatusFile = "host-driver-ready"
// nvidiaFsStatusFile indicates status file for nvidia-fs driver readiness
nvidiaFsStatusFile = "nvidia-fs-ready"
// toolkitStatusFile indicates status file for toolkit readiness
@@ -207,6 +213,8 @@ const (
gpuWorkloadConfigVMVgpu = "vm-vgpu"
// CCCapableLabelKey represents NFD label name to indicate if the node is capable to run CC workloads
CCCapableLabelKey = "nvidia.com/cc.capable"
+ // appComponentLabelKey indicates the label key of the component
+ appComponentLabelKey = "app.kubernetes.io/component"
)
func main() {
@@ -318,6 +326,27 @@ func main() {
Destination: &disableDevCharSymlinkCreation,
EnvVars: []string{"DISABLE_DEV_CHAR_SYMLINK_CREATION"},
},
+ &cli.StringFlag{
+ Name: "host-root",
+ Value: "/",
+ Usage: "root path of the underlying host",
+ Destination: &hostRootFlag,
+ EnvVars: []string{"HOST_ROOT"},
+ },
+ &cli.StringFlag{
+ Name: "driver-install-dir",
+ Value: defaultDriverInstallDir,
+ Usage: "the path on the host where a containerized NVIDIA driver installation is made available",
+ Destination: &driverInstallDirFlag,
+ EnvVars: []string{"DRIVER_INSTALL_DIR"},
+ },
+ &cli.StringFlag{
+ Name: "driver-install-dir-ctr-path",
+ Value: defaultDriverInstallDirCtrPath,
+ Usage: "the path where the NVIDIA driver install dir is mounted in the container",
+ Destination: &driverInstallDirCtrPathFlag,
+ EnvVars: []string{"DRIVER_INSTALL_DIR_CTR_PATH"},
+ },
}
// Log version info
@@ -426,12 +455,12 @@ func getWorkloadConfig(ctx context.Context) (string, error) {
kubeClient, err := kubernetes.NewForConfig(kubeConfig)
if err != nil {
- return "", fmt.Errorf("Error getting k8s client - %s", err.Error())
+ return "", fmt.Errorf("error getting k8s client - %w", err)
}
node, err := getNode(ctx, kubeClient)
if err != nil {
- return "", fmt.Errorf("Error getting node labels - %s", err.Error())
+ return "", fmt.Errorf("error getting node labels - %w", err)
}
labels := node.GetLabels()
@@ -467,24 +496,26 @@ func start(c *cli.Context) error {
switch componentFlag {
case "driver":
- driver := &Driver{}
+ driver := &Driver{
+ ctx: c.Context,
+ }
err := driver.validate()
if err != nil {
- return fmt.Errorf("error validating driver installation: %s", err)
+ return fmt.Errorf("error validating driver installation: %w", err)
}
return nil
case "nvidia-fs":
nvidiaFs := &NvidiaFs{}
err := nvidiaFs.validate()
if err != nil {
- return fmt.Errorf("error validating nvidia-fs driver installation: %s", err)
+ return fmt.Errorf("error validating nvidia-fs driver installation: %w", err)
}
return nil
case "toolkit":
toolkit := &Toolkit{}
err := toolkit.validate()
if err != nil {
- return fmt.Errorf("error validating toolkit installation: %s", err)
+ return fmt.Errorf("error validating toolkit installation: %w", err)
}
return nil
case "cuda":
@@ -493,7 +524,7 @@ func start(c *cli.Context) error {
}
err := cuda.validate()
if err != nil {
- return fmt.Errorf("error validating cuda workload: %s", err)
+ return fmt.Errorf("error validating cuda workload: %w", err)
}
return nil
case "plugin":
@@ -502,7 +533,7 @@ func start(c *cli.Context) error {
}
err := plugin.validate()
if err != nil {
- return fmt.Errorf("error validating plugin installation: %s", err)
+ return fmt.Errorf("error validating plugin installation: %w", err)
}
return nil
case "mofed":
@@ -529,7 +560,7 @@ func start(c *cli.Context) error {
}
err := vfioPCI.validate()
if err != nil {
- return fmt.Errorf("error validating vfio-pci driver installation: %s", err)
+ return fmt.Errorf("error validating vfio-pci driver installation: %w", err)
}
return nil
case "vgpu-manager":
@@ -538,7 +569,7 @@ func start(c *cli.Context) error {
}
err := vGPUManager.validate()
if err != nil {
- return fmt.Errorf("error validating vGPU Manager installation: %s", err)
+ return fmt.Errorf("error validating vGPU Manager installation: %w", err)
}
return nil
case "vgpu-devices":
@@ -556,7 +587,7 @@ func start(c *cli.Context) error {
}
err := CCManager.validate()
if err != nil {
- return fmt.Errorf("error validating CC Manager installation: %s", err)
+ return fmt.Errorf("error validating CC Manager installation: %w", err)
}
return nil
default:
@@ -591,19 +622,31 @@ func runCommandWithWait(command string, args []string, sleepSeconds int, silent
}
}
-func getDriverRoot() (string, bool) {
- // check if driver is pre-installed on the host and use host path for validation
- if fileInfo, err := os.Lstat("/host/usr/bin/nvidia-smi"); err == nil && fileInfo.Size() != 0 {
- log.Infof("Detected pre-installed driver on the host")
- return "/host", true
+// prependPathListEnvvar prepends a specified list of strings to a specified envvar and returns its value.
+func prependPathListEnvvar(envvar string, prepend ...string) string {
+ if len(prepend) == 0 {
+ return os.Getenv(envvar)
}
+ current := filepath.SplitList(os.Getenv(envvar))
+ return strings.Join(append(prepend, current...), string(filepath.ListSeparator))
+}
- return driverContainerRoot, false
+// setEnvVar adds or updates an envar to the list of specified envvars and returns it.
+func setEnvVar(envvars []string, key, value string) []string {
+ var updated []string
+ for _, envvar := range envvars {
+ pair := strings.SplitN(envvar, "=", 2)
+ if pair[0] == key {
+ continue
+ }
+ updated = append(updated, envvar)
+ }
+ return append(updated, fmt.Sprintf("%s=%s", key, value))
}
// For driver container installs, check existence of .driver-ctr-ready to confirm running driver
// container has completed and is in Ready state.
-func assertDriverContainerReady(silent, withWaitFlag bool) error {
+func assertDriverContainerReady(silent bool) error {
command := "bash"
args := []string{"-c", "stat /run/nvidia/validations/.driver-ctr-ready"}
@@ -614,24 +657,117 @@ func assertDriverContainerReady(silent, withWaitFlag bool) error {
return runCommand(command, args, silent)
}
-func (d *Driver) runValidation(silent bool) (string, bool, error) {
- driverRoot, isHostDriver := getDriverRoot()
- if !isHostDriver {
- log.Infof("Driver is not pre-installed on the host. Checking driver container status.")
- if err := assertDriverContainerReady(silent, withWaitFlag); err != nil {
- return "", false, fmt.Errorf("error checking driver container status: %v", err)
+// isDriverManagedByOperator determines if the NVIDIA driver is managed by the GPU Operator.
+// We check if at least one driver DaemonSet exists in the operator namespace that is
+// owned by the ClusterPolicy or NVIDIADriver controllers.
+func isDriverManagedByOperator(ctx context.Context) (bool, error) {
+ kubeConfig, err := rest.InClusterConfig()
+ if err != nil {
+ return false, fmt.Errorf("error getting cluster config: %w", err)
+ }
+
+ kubeClient, err := kubernetes.NewForConfig(kubeConfig)
+ if err != nil {
+ return false, fmt.Errorf("error getting k8s client: %w", err)
+ }
+
+ opts := meta_v1.ListOptions{LabelSelector: labels.Set{appComponentLabelKey: "nvidia-driver"}.AsSelector().String()}
+ dsList, err := kubeClient.AppsV1().DaemonSets(namespaceFlag).List(ctx, opts)
+ if err != nil {
+ return false, fmt.Errorf("error listing daemonsets: %w", err)
+ }
+
+ for i := range dsList.Items {
+ ds := dsList.Items[i]
+ owner := meta_v1.GetControllerOf(&ds)
+ if owner == nil {
+ continue
+ }
+ if strings.HasPrefix(owner.APIVersion, "nvidia.com/") && (owner.Kind == nvidiav1.ClusterPolicyCRDName || owner.Kind == nvidiav1alpha1.NVIDIADriverCRDName) {
+ return true, nil
}
}
- // invoke validation command
+ return false, nil
+}
+
+func validateHostDriver(silent bool) error {
+ log.Info("Attempting to validate a pre-installed driver on the host")
+ fileInfo, err := os.Lstat("/host/usr/bin/nvidia-smi")
+ if err != nil {
+ return fmt.Errorf("no 'nvidia-smi' file present on the host: %w", err)
+ }
+ if fileInfo.Size() == 0 {
+ return fmt.Errorf("empty 'nvidia-smi' file found on the host")
+ }
command := "chroot"
- args := []string{driverRoot, "nvidia-smi"}
+ args := []string{"/host", "nvidia-smi"}
- if withWaitFlag {
- return driverRoot, isHostDriver, runCommandWithWait(command, args, sleepIntervalSecondsFlag, silent)
+ return runCommand(command, args, silent)
+}
+
+func validateDriverContainer(silent bool, ctx context.Context) error {
+ driverManagedByOperator, err := isDriverManagedByOperator(ctx)
+ if err != nil {
+ return fmt.Errorf("error checking if driver is managed by GPU Operator: %w", err)
+ }
+
+ if driverManagedByOperator {
+ log.Infof("Driver is not pre-installed on the host and is managed by GPU Operator. Checking driver container status.")
+ if err := assertDriverContainerReady(silent); err != nil {
+ return fmt.Errorf("error checking driver container status: %w", err)
+ }
+ }
+
+ driverRoot := root(driverInstallDirCtrPathFlag)
+
+ validateDriver := func(silent bool) error {
+ driverLibraryPath, err := driverRoot.getDriverLibraryPath()
+ if err != nil {
+ return fmt.Errorf("failed to locate driver libraries: %w", err)
+ }
+
+ nvidiaSMIPath, err := driverRoot.getNvidiaSMIPath()
+ if err != nil {
+ return fmt.Errorf("failed to locate nvidia-smi: %w", err)
+ }
+ cmd := exec.Command(nvidiaSMIPath)
+ // In order for nvidia-smi to run, we need to update LD_PRELOAD to include the path to libnvidia-ml.so.1.
+ cmd.Env = setEnvVar(os.Environ(), "LD_PRELOAD", prependPathListEnvvar("LD_PRELOAD", driverLibraryPath))
+ if !silent {
+ cmd.Stdout = os.Stdout
+ cmd.Stderr = os.Stderr
+ }
+ return cmd.Run()
+ }
+
+ for {
+ log.Info("Attempting to validate a driver container installation")
+ err := validateDriver(silent)
+ if err != nil {
+ if !withWaitFlag {
+ return fmt.Errorf("error validating driver: %w", err)
+ }
+ log.Warningf("failed to validate the driver, retrying after %d seconds\n", sleepIntervalSecondsFlag)
+ time.Sleep(time.Duration(sleepIntervalSecondsFlag) * time.Second)
+ continue
+ }
+ return nil
+ }
+}
+
+func (d *Driver) runValidation(silent bool) (driverInfo, error) {
+ err := validateHostDriver(silent)
+ if err == nil {
+ log.Info("Detected a pre-installed driver on the host")
+ return getDriverInfo(true, hostRootFlag, hostRootFlag, "/host"), nil
}
- return driverRoot, isHostDriver, runCommand(command, args, silent)
+ err = validateDriverContainer(silent, d.ctx)
+ if err != nil {
+ return driverInfo{}, err
+ }
+ return getDriverInfo(false, hostRootFlag, driverInstallDirFlag, driverInstallDirCtrPathFlag), nil
}
func (d *Driver) validate() error {
@@ -641,77 +777,86 @@ func (d *Driver) validate() error {
return err
}
- // delete host driver status file is already present
- err = deleteStatusFile(outputDirFlag + "/" + hostDriverStatusFile)
+ driverInfo, err := d.runValidation(false)
if err != nil {
+ log.Errorf("driver is not ready: %v", err)
return err
}
- driverRoot, isHostDriver, err := d.runValidation(false)
+ err = createDevCharSymlinks(driverInfo, disableDevCharSymlinkCreation)
if err != nil {
- log.Error("driver is not ready")
- return err
+ msg := strings.Join([]string{
+ "Failed to create symlinks under /dev/char that point to all possible NVIDIA character devices.",
+ "The existence of these symlinks is required to address the following bug:",
+ "",
+ " https://github.com/NVIDIA/gpu-operator/issues/430",
+ "",
+ "This bug impacts container runtimes configured with systemd cgroup management enabled.",
+ "To disable the symlink creation, set the following envvar in ClusterPolicy:",
+ "",
+ " validator:",
+ " driver:",
+ " env:",
+ " - name: DISABLE_DEV_CHAR_SYMLINK_CREATION",
+ " value: \"true\""}, "\n")
+ return fmt.Errorf("%w\n\n%s", err, msg)
}
- if !disableDevCharSymlinkCreation {
- log.Info("creating symlinks under /dev/char that correspond to NVIDIA character devices")
- err = createDevCharSymlinks(driverRoot, isHostDriver)
- if err != nil {
- msg := strings.Join([]string{
- "Failed to create symlinks under /dev/char that point to all possible NVIDIA character devices.",
- "The existence of these symlinks is required to address the following bug:",
- "",
- " https://github.com/NVIDIA/gpu-operator/issues/430",
- "",
- "This bug impacts container runtimes configured with systemd cgroup management enabled.",
- "To disable the symlink creation, set the following envvar in ClusterPolicy:",
- "",
- " validator:",
- " driver:",
- " env:",
- " - name: DISABLE_DEV_CHAR_SYMLINK_CREATION",
- " value: \"true\""}, "\n")
- return fmt.Errorf("%v\n\n%s", err, msg)
- }
- }
+ return d.createStatusFile(driverInfo)
+}
- statusFile := driverStatusFile
- if isHostDriver {
- statusFile = hostDriverStatusFile
- }
+func (d *Driver) createStatusFile(driverInfo driverInfo) error {
+ statusFileContent := strings.Join([]string{
+ fmt.Sprintf("IS_HOST_DRIVER=%t", driverInfo.isHostDriver),
+ fmt.Sprintf("NVIDIA_DRIVER_ROOT=%s", driverInfo.driverRoot),
+ fmt.Sprintf("DRIVER_ROOT_CTR_PATH=%s", driverInfo.driverRootCtrPath),
+ fmt.Sprintf("NVIDIA_DEV_ROOT=%s", driverInfo.devRoot),
+ fmt.Sprintf("DEV_ROOT_CTR_PATH=%s", driverInfo.devRootCtrPath),
+ }, "\n") + "\n"
// create driver status file
- err = createStatusFile(outputDirFlag + "/" + statusFile)
- if err != nil {
- return err
- }
- return nil
+ return createStatusFileWithContent(outputDirFlag+"/"+driverStatusFile, statusFileContent)
}
// createDevCharSymlinks creates symlinks in /host-dev-char that point to all possible NVIDIA devices nodes.
-func createDevCharSymlinks(driverRoot string, isHostDriver bool) error {
- // If the host driver is being used, we rely on the fact that we are running a privileged container and as such
- // have access to /dev
- devRoot := driverRoot
- if isHostDriver {
- devRoot = "/"
+func createDevCharSymlinks(driverInfo driverInfo, disableDevCharSymlinkCreation bool) error {
+ if disableDevCharSymlinkCreation {
+ log.WithField("disableDevCharSymlinkCreation", true).
+ Info("skipping the creation of symlinks under /dev/char that correspond to NVIDIA character devices")
+ return nil
+ }
+
+ log.Info("creating symlinks under /dev/char that correspond to NVIDIA character devices")
+
+ // Only attempt to load NVIDIA kernel modules when we can chroot into driverRoot
+ loadKernelModules := driverInfo.isHostDriver || (driverInfo.devRoot == driverInfo.driverRoot)
+
+ // driverRootCtrPath is the path of the driver install dir in the container. This will either be
+ // driverInstallDirCtrPathFlag or '/host'.
+ // Note, if we always mounted the driver install dir to '/driver-root' in the validation container
+ // instead, then we could simplify to always use driverInfo.driverRootCtrPath -- which would be
+ // either '/host' or '/driver-root', both paths would exist in the validation container.
+ driverRootCtrPath := driverInstallDirCtrPathFlag
+ if driverInfo.isHostDriver {
+ driverRootCtrPath = "/host"
}
+
// We now create the symlinks in /dev/char.
creator, err := devchar.NewSymlinkCreator(
- devchar.WithDriverRoot(driverRoot),
- devchar.WithDevRoot(devRoot),
+ devchar.WithDriverRoot(driverRootCtrPath),
+ devchar.WithDevRoot(driverInfo.devRoot),
devchar.WithDevCharPath(hostDevCharPath),
devchar.WithCreateAll(true),
devchar.WithCreateDeviceNodes(true),
- devchar.WithLoadKernelModules(true),
+ devchar.WithLoadKernelModules(loadKernelModules),
)
if err != nil {
- return fmt.Errorf("error creating symlink creator: %v", err)
+ return fmt.Errorf("error creating symlink creator: %w", err)
}
err = creator.CreateLinks()
if err != nil {
- return fmt.Errorf("error creating symlinks: %v", err)
+ return fmt.Errorf("error creating symlinks: %w", err)
}
return nil
@@ -726,16 +871,23 @@ func createStatusFile(statusFile string) error {
}
func createStatusFileWithContent(statusFile string, content string) error {
- f, err := os.Create(statusFile)
+ dir := filepath.Dir(statusFile)
+ tmpFile, err := os.CreateTemp(dir, filepath.Base(statusFile)+".*.tmp")
if err != nil {
- return fmt.Errorf("unable to create status file %s: %s", statusFile, err)
+ return fmt.Errorf("failed to create temporary status file: %w", err)
}
-
- _, err = f.WriteString(content)
+ _, err = tmpFile.WriteString(content)
+ tmpFile.Close()
if err != nil {
- return fmt.Errorf("unable to write contents of status file %s: %s", statusFile, err)
+ return fmt.Errorf("failed to write temporary status file: %w", err)
}
+ defer func() {
+ _ = os.Remove(tmpFile.Name())
+ }()
+ if err := os.Rename(tmpFile.Name(), statusFile); err != nil {
+ return fmt.Errorf("error moving temporary file to '%s': %w", statusFile, err)
+ }
return nil
}
@@ -743,7 +895,7 @@ func deleteStatusFile(statusFile string) error {
err := os.Remove(statusFile)
if err != nil {
if !os.IsNotExist(err) {
- return fmt.Errorf("unable to remove driver status file %s: %s", statusFile, err)
+ return fmt.Errorf("unable to remove driver status file %s: %w", statusFile, err)
}
// status file already removed
}
@@ -879,7 +1031,7 @@ func (m *MOFED) validate() error {
present, err := m.isMellanoxDevicePresent()
if err != nil {
- log.Errorf(err.Error())
+ log.Errorf("Error trying to retrieve Mellanox device - %s\n", err.Error())
return err
}
if !present {
@@ -968,19 +1120,16 @@ func (p *Plugin) runWorkload() error {
pod.Spec.RuntimeClassName = &runtimeClass
}
- // update owner reference
- err = setOwnerReference(ctx, p.kubeClient, pod)
+ validatorDaemonset, err := p.kubeClient.AppsV1().DaemonSets(namespaceFlag).Get(ctx, "nvidia-operator-validator", meta_v1.GetOptions{})
if err != nil {
- return fmt.Errorf("unable to set ownerReference for validator pod: %s", err)
+ return fmt.Errorf("unable to retrieve the operator validator daemonset: %w", err)
}
+ // update owner reference
+ pod.SetOwnerReferences(validatorDaemonset.ObjectMeta.OwnerReferences)
// set pod tolerations
- err = setTolerations(ctx, p.kubeClient, pod)
- if err != nil {
- return fmt.Errorf("unable to set tolerations for validator pod: %s", err)
- }
-
- // update podSpec with node name so it will just run on current node
+ pod.Spec.Tolerations = validatorDaemonset.Spec.Template.Spec.Tolerations
+ // update podSpec with node name, so it will just run on current node
pod.Spec.NodeName = nodeNameFlag
resourceName, err := p.getGPUResourceName()
@@ -1000,7 +1149,7 @@ func (p *Plugin) runWorkload() error {
// check if plugin validation pod is already running and cleanup.
podList, err := p.kubeClient.CoreV1().Pods(namespaceFlag).List(ctx, opts)
if err != nil {
- return fmt.Errorf("cannot list existing validation pods: %s", err)
+ return fmt.Errorf("cannot list existing validation pods: %w", err)
}
if podList != nil && len(podList.Items) > 0 {
@@ -1009,14 +1158,14 @@ func (p *Plugin) runWorkload() error {
options := meta_v1.DeleteOptions{PropagationPolicy: &propagation, GracePeriodSeconds: &gracePeriod}
err = p.kubeClient.CoreV1().Pods(namespaceFlag).Delete(ctx, podList.Items[0].ObjectMeta.Name, options)
if err != nil {
- return fmt.Errorf("cannot delete previous validation pod: %s", err)
+ return fmt.Errorf("cannot delete previous validation pod: %w", err)
}
}
// wait for plugin validation pod to be ready.
newPod, err := p.kubeClient.CoreV1().Pods(namespaceFlag).Create(ctx, pod, meta_v1.CreateOptions{})
if err != nil {
- return fmt.Errorf("failed to create plugin validation pod %s, err %+v", pod.ObjectMeta.Name, err)
+ return fmt.Errorf("failed to create plugin validation pod %s, err %w", pod.ObjectMeta.Name, err)
}
// make sure its available
@@ -1027,37 +1176,13 @@ func (p *Plugin) runWorkload() error {
return nil
}
-func setOwnerReference(ctx context.Context, kubeClient kubernetes.Interface, pod *corev1.Pod) error {
- // get owner of validator daemonset (which is ClusterPolicy)
- validatorDaemonset, err := kubeClient.AppsV1().DaemonSets(namespaceFlag).Get(ctx, "nvidia-operator-validator", meta_v1.GetOptions{})
- if err != nil {
- return err
- }
-
- // update owner reference of plugin workload validation pod as ClusterPolicy for cleanup
- pod.SetOwnerReferences(validatorDaemonset.ObjectMeta.OwnerReferences)
- return nil
-}
-
-func setTolerations(ctx context.Context, kubeClient kubernetes.Interface, pod *corev1.Pod) error {
- // get tolerations of validator daemonset
- validatorDaemonset, err := kubeClient.AppsV1().DaemonSets(namespaceFlag).Get(ctx, "nvidia-operator-validator", meta_v1.GetOptions{})
- if err != nil {
- return err
- }
-
- // set same tolerations for individual validator pods
- pod.Spec.Tolerations = validatorDaemonset.Spec.Template.Spec.Tolerations
- return nil
-}
-
// waits for the pod to be created
func waitForPod(ctx context.Context, kubeClient kubernetes.Interface, name string, namespace string) error {
for i := 0; i < podCreationWaitRetries; i++ {
// check for the existence of the resource
pod, err := kubeClient.CoreV1().Pods(namespace).Get(ctx, name, meta_v1.GetOptions{})
if err != nil {
- return fmt.Errorf("failed to get pod %s, err %+v", name, err)
+ return fmt.Errorf("failed to get pod %s, err %w", name, err)
}
if pod.Status.Phase != "Succeeded" {
log.Infof("pod %s is curently in %s phase", name, pod.Status.Phase)
@@ -1097,7 +1222,7 @@ func (p *Plugin) countGPUResources() (int64, error) {
// get node info to check discovered GPU resources
node, err := getNode(p.ctx, p.kubeClient)
if err != nil {
- return -1, fmt.Errorf("unable to fetch node by name %s to check for GPU resources: %s", nodeNameFlag, err)
+ return -1, fmt.Errorf("unable to fetch node by name %s to check for GPU resources: %w", nodeNameFlag, err)
}
count := int64(0)
@@ -1170,7 +1295,7 @@ func (p *Plugin) getGPUResourceName() (corev1.ResourceName, error) {
return resourceName, nil
}
- return "", fmt.Errorf("Unable to find any allocatable GPU resource")
+ return "", fmt.Errorf("unable to find any allocatable GPU resource")
}
func (p *Plugin) setKubeClient(kubeClient kubernetes.Interface) {
@@ -1180,7 +1305,7 @@ func (p *Plugin) setKubeClient(kubeClient kubernetes.Interface) {
func getNode(ctx context.Context, kubeClient kubernetes.Interface) (*corev1.Node, error) {
node, err := kubeClient.CoreV1().Nodes().Get(ctx, nodeNameFlag, meta_v1.GetOptions{})
if err != nil {
- log.Errorf("unable to get node with name %s, err %s", nodeNameFlag, err.Error())
+ log.Errorf("unable to get node with name %s, err %v", nodeNameFlag, err)
return nil, err
}
return node, nil
@@ -1259,19 +1384,16 @@ func (c *CUDA) runWorkload() error {
pod.Spec.RuntimeClassName = &runtimeClass
}
- // update owner reference
- err = setOwnerReference(ctx, c.kubeClient, pod)
+ validatorDaemonset, err := c.kubeClient.AppsV1().DaemonSets(namespaceFlag).Get(ctx, "nvidia-operator-validator", meta_v1.GetOptions{})
if err != nil {
- return fmt.Errorf("unable to set owner reference for validator pod: %s", err)
+ return fmt.Errorf("unable to retrieve the operator validator daemonset: %w", err)
}
+ // update owner reference
+ pod.SetOwnerReferences(validatorDaemonset.ObjectMeta.OwnerReferences)
// set pod tolerations
- err = setTolerations(ctx, c.kubeClient, pod)
- if err != nil {
- return fmt.Errorf("unable to set tolerations for validator pod: %s", err)
- }
-
- // update podSpec with node name so it will just run on current node
+ pod.Spec.Tolerations = validatorDaemonset.Spec.Template.Spec.Tolerations
+ // update podSpec with node name, so it will just run on current node
pod.Spec.NodeName = nodeNameFlag
opts := meta_v1.ListOptions{LabelSelector: labels.Set{"app": cudaValidatorLabelValue}.AsSelector().String(),
@@ -1299,7 +1421,7 @@ func (c *CUDA) runWorkload() error {
return fmt.Errorf("failed to create cuda validation pod %s, err %+v", pod.ObjectMeta.Name, err)
}
- // make sure its available
+ // make sure it's available
err = waitForPod(ctx, c.kubeClient, newPod.ObjectMeta.Name, namespaceFlag)
if err != nil {
return err
@@ -1318,13 +1440,13 @@ func (v *VfioPCI) validate() error {
gpuWorkloadConfig, err := getWorkloadConfig(ctx)
if err != nil {
- return fmt.Errorf("Error getting gpu workload config: %s", err.Error())
+ return fmt.Errorf("error getting gpu workload config: %w", err)
}
log.Infof("GPU workload configuration: %s", gpuWorkloadConfig)
err = createStatusFileWithContent(filepath.Join(outputDirFlag, workloadTypeStatusFile), gpuWorkloadConfig+"\n")
if err != nil {
- return fmt.Errorf("Error updating %s status file: %v", workloadTypeStatusFile, err)
+ return fmt.Errorf("error updating %s status file: %w", workloadTypeStatusFile, err)
}
if gpuWorkloadConfig != gpuWorkloadConfigVMPassthrough {
@@ -1340,7 +1462,7 @@ func (v *VfioPCI) validate() error {
return err
}
- err = v.runValidation(false)
+ err = v.runValidation()
if err != nil {
return err
}
@@ -1354,11 +1476,11 @@ func (v *VfioPCI) validate() error {
return nil
}
-func (v *VfioPCI) runValidation(silent bool) error {
+func (v *VfioPCI) runValidation() error {
nvpci := nvpci.New()
nvdevices, err := nvpci.GetGPUs()
if err != nil {
- return fmt.Errorf("error getting NVIDIA PCI devices: %v", err)
+ return fmt.Errorf("error getting NVIDIA PCI devices: %w", err)
}
for _, dev := range nvdevices {
@@ -1375,13 +1497,13 @@ func (v *VGPUManager) validate() error {
gpuWorkloadConfig, err := getWorkloadConfig(ctx)
if err != nil {
- return fmt.Errorf("Error getting gpu workload config: %s", err.Error())
+ return fmt.Errorf("error getting gpu workload config: %w", err)
}
log.Infof("GPU workload configuration: %s", gpuWorkloadConfig)
err = createStatusFileWithContent(filepath.Join(outputDirFlag, workloadTypeStatusFile), gpuWorkloadConfig+"\n")
if err != nil {
- return fmt.Errorf("Error updating %s status file: %v", workloadTypeStatusFile, err)
+ return fmt.Errorf("error updating %s status file: %w", workloadTypeStatusFile, err)
}
if gpuWorkloadConfig != gpuWorkloadConfigVMVgpu {
@@ -1449,12 +1571,12 @@ func (c *CCManager) validate() error {
kubeConfig, err := rest.InClusterConfig()
if err != nil {
- return fmt.Errorf("Error getting cluster config - %s", err.Error())
+ return fmt.Errorf("error getting cluster config - %w", err)
}
kubeClient, err := kubernetes.NewForConfig(kubeConfig)
if err != nil {
- log.Errorf("Error getting k8s client - %s\n", err.Error())
+ log.Errorf("Error getting k8s client - %v\n", err)
return err
}
@@ -1478,7 +1600,8 @@ func (c *CCManager) validate() error {
func (c *CCManager) runValidation(silent bool) error {
node, err := getNode(c.ctx, c.kubeClient)
if err != nil {
- return fmt.Errorf("unable to fetch node by name %s to check for %s label: %s", nodeNameFlag, CCCapableLabelKey, err)
+ return fmt.Errorf("unable to fetch node by name %s to check for %s label: %w",
+ nodeNameFlag, CCCapableLabelKey, err)
}
// make sure this is a CC capable node
@@ -1517,13 +1640,13 @@ func (v *VGPUDevices) validate() error {
gpuWorkloadConfig, err := getWorkloadConfig(ctx)
if err != nil {
- return fmt.Errorf("Error getting gpu workload config: %s", err.Error())
+ return fmt.Errorf("error getting gpu workload config: %w", err)
}
log.Infof("GPU workload configuration: %s", gpuWorkloadConfig)
err = createStatusFileWithContent(filepath.Join(outputDirFlag, workloadTypeStatusFile), gpuWorkloadConfig+"\n")
if err != nil {
- return fmt.Errorf("Error updating %s status file: %v", workloadTypeStatusFile, err)
+ return fmt.Errorf("error updating %s status file: %w", workloadTypeStatusFile, err)
}
if gpuWorkloadConfig != gpuWorkloadConfigVMVgpu {
@@ -1539,7 +1662,7 @@ func (v *VGPUDevices) validate() error {
return err
}
- err = v.runValidation(false)
+ err = v.runValidation()
if err != nil {
return err
}
@@ -1554,17 +1677,17 @@ func (v *VGPUDevices) validate() error {
return nil
}
-func (v *VGPUDevices) runValidation(silent bool) error {
+func (v *VGPUDevices) runValidation() error {
nvmdev := nvmdev.New()
vGPUDevices, err := nvmdev.GetAllDevices()
if err != nil {
- return fmt.Errorf("Error checking for vGPU devices on the host: %v", err)
+ return fmt.Errorf("error checking for vGPU devices on the host: %w", err)
}
if !withWaitFlag {
numDevices := len(vGPUDevices)
if numDevices == 0 {
- return fmt.Errorf("No vGPU devices found")
+ return fmt.Errorf("no vGPU devices found")
}
log.Infof("Found %d vGPU devices", numDevices)
@@ -1582,7 +1705,7 @@ func (v *VGPUDevices) runValidation(silent bool) error {
vGPUDevices, err = nvmdev.GetAllDevices()
if err != nil {
- return fmt.Errorf("Error checking for vGPU devices on the host: %v", err)
+ return fmt.Errorf("error checking for vGPU devices on the host: %w", err)
}
}
}
diff --git a/validator/metrics.go b/validator/metrics.go
index d9ac75e5e..4105dd166 100644
--- a/validator/metrics.go
+++ b/validator/metrics.go
@@ -163,11 +163,6 @@ func (nm *NodeMetrics) watchStatusFile(statusFile *promcli.Gauge, statusFileFile
for {
_, err := os.Stat(outputDirFlag + "/" + statusFileFilename)
ready = !os.IsNotExist(err)
- if !ready && statusFileFilename == driverStatusFile {
- // check if the driver status file for pre-installed driver exists
- _, err = os.Stat(outputDirFlag + "/" + hostDriverStatusFile)
- ready = !os.IsNotExist(err)
- }
if ready != prevReady {
prevReady = ready
@@ -233,14 +228,17 @@ func (nm *NodeMetrics) watchDevicePluginValidation() {
}
func (nm *NodeMetrics) watchDriverValidation() {
- driver := &Driver{}
+ driver := &Driver{
+ ctx: nm.ctx,
+ }
for {
- _, _, err := driver.runValidation(true)
+ _, err := driver.runValidation(true)
if err == nil {
nm.driverValidation.Set(1)
nm.driverValidationLastSuccess.Set(float64(time.Now().Unix()))
} else {
+ log.Errorf("failed to validate driver: %v", err)
nm.driverValidation.Set(0)
}
time.Sleep(driverValidationCheckDelaySeconds * time.Second)
diff --git a/validator/multi-arch.mk b/validator/multi-arch.mk
index 4f199ed5d..d0f4f06ac 100644
--- a/validator/multi-arch.mk
+++ b/validator/multi-arch.mk
@@ -13,7 +13,8 @@
# limitations under the License.
PUSH_ON_BUILD ?= false
-DOCKER_BUILD_OPTIONS = --output=type=image,push=$(PUSH_ON_BUILD)
+ATTACH_ATTESTATIONS ?= false
+DOCKER_BUILD_OPTIONS = --output=type=image,push=$(PUSH_ON_BUILD) --provenance=$(ATTACH_ATTESTATIONS) --sbom=$(ATTACH_ATTESTATIONS)
DOCKER_BUILD_PLATFORM_OPTIONS = --platform=linux/amd64,linux/arm64
REGCTL ?= regctl
@@ -21,8 +22,3 @@ $(PUSH_TARGETS): push-%:
$(REGCTL) \
image copy \
$(IMAGE) $(OUT_IMAGE)
-
-push-short:
- $(REGCTL) \
- image copy \
- $(IMAGE) $(OUT_IMAGE_NAME):$(OUT_IMAGE_VERSION)
\ No newline at end of file
diff --git a/validator/versions.mk b/validator/versions.mk
index c8294d44b..b61bc782b 100644
--- a/validator/versions.mk
+++ b/validator/versions.mk
@@ -16,4 +16,4 @@
include $(CURDIR)/../versions.mk
CUDA_SAMPLES_VERSION ?= 11.7.1
-GOLANG_VERSION ?= 1.21.1
+GOLANG_VERSION ?= 1.23.3
diff --git a/vendor/dario.cat/mergo/.deepsource.toml b/vendor/dario.cat/mergo/.deepsource.toml
new file mode 100644
index 000000000..a8bc979e0
--- /dev/null
+++ b/vendor/dario.cat/mergo/.deepsource.toml
@@ -0,0 +1,12 @@
+version = 1
+
+test_patterns = [
+ "*_test.go"
+]
+
+[[analyzers]]
+name = "go"
+enabled = true
+
+ [analyzers.meta]
+ import_path = "dario.cat/mergo"
\ No newline at end of file
diff --git a/vendor/dario.cat/mergo/.gitignore b/vendor/dario.cat/mergo/.gitignore
new file mode 100644
index 000000000..45ad0f1ae
--- /dev/null
+++ b/vendor/dario.cat/mergo/.gitignore
@@ -0,0 +1,36 @@
+#### joe made this: http://goel.io/joe
+
+#### go ####
+# Binaries for programs and plugins
+*.exe
+*.dll
+*.so
+*.dylib
+
+# Test binary, build with `go test -c`
+*.test
+
+# Output of the go coverage tool, specifically when used with LiteIDE
+*.out
+
+# Golang/Intellij
+.idea
+
+# Project-local glide cache, RE: https://github.com/Masterminds/glide/issues/736
+.glide/
+
+#### vim ####
+# Swap
+[._]*.s[a-v][a-z]
+[._]*.sw[a-p]
+[._]s[a-v][a-z]
+[._]sw[a-p]
+
+# Session
+Session.vim
+
+# Temporary
+.netrwhist
+*~
+# Auto-generated tag files
+tags
diff --git a/vendor/dario.cat/mergo/.travis.yml b/vendor/dario.cat/mergo/.travis.yml
new file mode 100644
index 000000000..d324c43ba
--- /dev/null
+++ b/vendor/dario.cat/mergo/.travis.yml
@@ -0,0 +1,12 @@
+language: go
+arch:
+ - amd64
+ - ppc64le
+install:
+ - go get -t
+ - go get golang.org/x/tools/cmd/cover
+ - go get github.com/mattn/goveralls
+script:
+ - go test -race -v ./...
+after_script:
+ - $HOME/gopath/bin/goveralls -service=travis-ci -repotoken $COVERALLS_TOKEN
diff --git a/vendor/dario.cat/mergo/CODE_OF_CONDUCT.md b/vendor/dario.cat/mergo/CODE_OF_CONDUCT.md
new file mode 100644
index 000000000..469b44907
--- /dev/null
+++ b/vendor/dario.cat/mergo/CODE_OF_CONDUCT.md
@@ -0,0 +1,46 @@
+# Contributor Covenant Code of Conduct
+
+## Our Pledge
+
+In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation.
+
+## Our Standards
+
+Examples of behavior that contributes to creating a positive environment include:
+
+* Using welcoming and inclusive language
+* Being respectful of differing viewpoints and experiences
+* Gracefully accepting constructive criticism
+* Focusing on what is best for the community
+* Showing empathy towards other community members
+
+Examples of unacceptable behavior by participants include:
+
+* The use of sexualized language or imagery and unwelcome sexual attention or advances
+* Trolling, insulting/derogatory comments, and personal or political attacks
+* Public or private harassment
+* Publishing others' private information, such as a physical or electronic address, without explicit permission
+* Other conduct which could reasonably be considered inappropriate in a professional setting
+
+## Our Responsibilities
+
+Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior.
+
+Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful.
+
+## Scope
+
+This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers.
+
+## Enforcement
+
+Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at i@dario.im. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately.
+
+Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership.
+
+## Attribution
+
+This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version]
+
+[homepage]: http://contributor-covenant.org
+[version]: http://contributor-covenant.org/version/1/4/
diff --git a/vendor/dario.cat/mergo/CONTRIBUTING.md b/vendor/dario.cat/mergo/CONTRIBUTING.md
new file mode 100644
index 000000000..0a1ff9f94
--- /dev/null
+++ b/vendor/dario.cat/mergo/CONTRIBUTING.md
@@ -0,0 +1,112 @@
+
+# Contributing to mergo
+
+First off, thanks for taking the time to contribute! ❤️
+
+All types of contributions are encouraged and valued. See the [Table of Contents](#table-of-contents) for different ways to help and details about how this project handles them. Please make sure to read the relevant section before making your contribution. It will make it a lot easier for us maintainers and smooth out the experience for all involved. The community looks forward to your contributions. 🎉
+
+> And if you like the project, but just don't have time to contribute, that's fine. There are other easy ways to support the project and show your appreciation, which we would also be very happy about:
+> - Star the project
+> - Tweet about it
+> - Refer this project in your project's readme
+> - Mention the project at local meetups and tell your friends/colleagues
+
+
+## Table of Contents
+
+- [Code of Conduct](#code-of-conduct)
+- [I Have a Question](#i-have-a-question)
+- [I Want To Contribute](#i-want-to-contribute)
+- [Reporting Bugs](#reporting-bugs)
+- [Suggesting Enhancements](#suggesting-enhancements)
+
+## Code of Conduct
+
+This project and everyone participating in it is governed by the
+[mergo Code of Conduct](https://github.com/imdario/mergoblob/master/CODE_OF_CONDUCT.md).
+By participating, you are expected to uphold this code. Please report unacceptable behavior
+to <>.
+
+
+## I Have a Question
+
+> If you want to ask a question, we assume that you have read the available [Documentation](https://pkg.go.dev/github.com/imdario/mergo).
+
+Before you ask a question, it is best to search for existing [Issues](https://github.com/imdario/mergo/issues) that might help you. In case you have found a suitable issue and still need clarification, you can write your question in this issue. It is also advisable to search the internet for answers first.
+
+If you then still feel the need to ask a question and need clarification, we recommend the following:
+
+- Open an [Issue](https://github.com/imdario/mergo/issues/new).
+- Provide as much context as you can about what you're running into.
+- Provide project and platform versions (nodejs, npm, etc), depending on what seems relevant.
+
+We will then take care of the issue as soon as possible.
+
+## I Want To Contribute
+
+> ### Legal Notice
+> When contributing to this project, you must agree that you have authored 100% of the content, that you have the necessary rights to the content and that the content you contribute may be provided under the project license.
+
+### Reporting Bugs
+
+
+#### Before Submitting a Bug Report
+
+A good bug report shouldn't leave others needing to chase you up for more information. Therefore, we ask you to investigate carefully, collect information and describe the issue in detail in your report. Please complete the following steps in advance to help us fix any potential bug as fast as possible.
+
+- Make sure that you are using the latest version.
+- Determine if your bug is really a bug and not an error on your side e.g. using incompatible environment components/versions (Make sure that you have read the [documentation](). If you are looking for support, you might want to check [this section](#i-have-a-question)).
+- To see if other users have experienced (and potentially already solved) the same issue you are having, check if there is not already a bug report existing for your bug or error in the [bug tracker](https://github.com/imdario/mergoissues?q=label%3Abug).
+- Also make sure to search the internet (including Stack Overflow) to see if users outside of the GitHub community have discussed the issue.
+- Collect information about the bug:
+- Stack trace (Traceback)
+- OS, Platform and Version (Windows, Linux, macOS, x86, ARM)
+- Version of the interpreter, compiler, SDK, runtime environment, package manager, depending on what seems relevant.
+- Possibly your input and the output
+- Can you reliably reproduce the issue? And can you also reproduce it with older versions?
+
+
+#### How Do I Submit a Good Bug Report?
+
+> You must never report security related issues, vulnerabilities or bugs including sensitive information to the issue tracker, or elsewhere in public. Instead sensitive bugs must be sent by email to .
+
+
+We use GitHub issues to track bugs and errors. If you run into an issue with the project:
+
+- Open an [Issue](https://github.com/imdario/mergo/issues/new). (Since we can't be sure at this point whether it is a bug or not, we ask you not to talk about a bug yet and not to label the issue.)
+- Explain the behavior you would expect and the actual behavior.
+- Please provide as much context as possible and describe the *reproduction steps* that someone else can follow to recreate the issue on their own. This usually includes your code. For good bug reports you should isolate the problem and create a reduced test case.
+- Provide the information you collected in the previous section.
+
+Once it's filed:
+
+- The project team will label the issue accordingly.
+- A team member will try to reproduce the issue with your provided steps. If there are no reproduction steps or no obvious way to reproduce the issue, the team will ask you for those steps and mark the issue as `needs-repro`. Bugs with the `needs-repro` tag will not be addressed until they are reproduced.
+- If the team is able to reproduce the issue, it will be marked `needs-fix`, as well as possibly other tags (such as `critical`), and the issue will be left to be implemented by someone.
+
+### Suggesting Enhancements
+
+This section guides you through submitting an enhancement suggestion for mergo, **including completely new features and minor improvements to existing functionality**. Following these guidelines will help maintainers and the community to understand your suggestion and find related suggestions.
+
+
+#### Before Submitting an Enhancement
+
+- Make sure that you are using the latest version.
+- Read the [documentation]() carefully and find out if the functionality is already covered, maybe by an individual configuration.
+- Perform a [search](https://github.com/imdario/mergo/issues) to see if the enhancement has already been suggested. If it has, add a comment to the existing issue instead of opening a new one.
+- Find out whether your idea fits with the scope and aims of the project. It's up to you to make a strong case to convince the project's developers of the merits of this feature. Keep in mind that we want features that will be useful to the majority of our users and not just a small subset. If you're just targeting a minority of users, consider writing an add-on/plugin library.
+
+
+#### How Do I Submit a Good Enhancement Suggestion?
+
+Enhancement suggestions are tracked as [GitHub issues](https://github.com/imdario/mergo/issues).
+
+- Use a **clear and descriptive title** for the issue to identify the suggestion.
+- Provide a **step-by-step description of the suggested enhancement** in as many details as possible.
+- **Describe the current behavior** and **explain which behavior you expected to see instead** and why. At this point you can also tell which alternatives do not work for you.
+- You may want to **include screenshots and animated GIFs** which help you demonstrate the steps or point out the part which the suggestion is related to. You can use [this tool](https://www.cockos.com/licecap/) to record GIFs on macOS and Windows, and [this tool](https://github.com/colinkeenan/silentcast) or [this tool](https://github.com/GNOME/byzanz) on Linux.
+- **Explain why this enhancement would be useful** to most mergo users. You may also want to point out the other projects that solved it better and which could serve as inspiration.
+
+
+## Attribution
+This guide is based on the **contributing-gen**. [Make your own](https://github.com/bttger/contributing-gen)!
diff --git a/vendor/dario.cat/mergo/LICENSE b/vendor/dario.cat/mergo/LICENSE
new file mode 100644
index 000000000..686680298
--- /dev/null
+++ b/vendor/dario.cat/mergo/LICENSE
@@ -0,0 +1,28 @@
+Copyright (c) 2013 Dario Castañé. All rights reserved.
+Copyright (c) 2012 The Go Authors. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+ * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+ * Neither the name of Google Inc. nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/vendor/dario.cat/mergo/README.md b/vendor/dario.cat/mergo/README.md
new file mode 100644
index 000000000..0b3c48889
--- /dev/null
+++ b/vendor/dario.cat/mergo/README.md
@@ -0,0 +1,258 @@
+# Mergo
+
+[![GitHub release][5]][6]
+[![GoCard][7]][8]
+[![Test status][1]][2]
+[![OpenSSF Scorecard][21]][22]
+[![OpenSSF Best Practices][19]][20]
+[![Coverage status][9]][10]
+[![Sourcegraph][11]][12]
+[![FOSSA status][13]][14]
+
+[![GoDoc][3]][4]
+[![Become my sponsor][15]][16]
+[![Tidelift][17]][18]
+
+[1]: https://github.com/imdario/mergo/workflows/tests/badge.svg?branch=master
+[2]: https://github.com/imdario/mergo/actions/workflows/tests.yml
+[3]: https://godoc.org/github.com/imdario/mergo?status.svg
+[4]: https://godoc.org/github.com/imdario/mergo
+[5]: https://img.shields.io/github/release/imdario/mergo.svg
+[6]: https://github.com/imdario/mergo/releases
+[7]: https://goreportcard.com/badge/imdario/mergo
+[8]: https://goreportcard.com/report/github.com/imdario/mergo
+[9]: https://coveralls.io/repos/github/imdario/mergo/badge.svg?branch=master
+[10]: https://coveralls.io/github/imdario/mergo?branch=master
+[11]: https://sourcegraph.com/github.com/imdario/mergo/-/badge.svg
+[12]: https://sourcegraph.com/github.com/imdario/mergo?badge
+[13]: https://app.fossa.io/api/projects/git%2Bgithub.com%2Fimdario%2Fmergo.svg?type=shield
+[14]: https://app.fossa.io/projects/git%2Bgithub.com%2Fimdario%2Fmergo?ref=badge_shield
+[15]: https://img.shields.io/github/sponsors/imdario
+[16]: https://github.com/sponsors/imdario
+[17]: https://tidelift.com/badges/package/go/github.com%2Fimdario%2Fmergo
+[18]: https://tidelift.com/subscription/pkg/go-github.com-imdario-mergo
+[19]: https://bestpractices.coreinfrastructure.org/projects/7177/badge
+[20]: https://bestpractices.coreinfrastructure.org/projects/7177
+[21]: https://api.securityscorecards.dev/projects/github.com/imdario/mergo/badge
+[22]: https://api.securityscorecards.dev/projects/github.com/imdario/mergo
+
+A helper to merge structs and maps in Golang. Useful for configuration default values, avoiding messy if-statements.
+
+Mergo merges same-type structs and maps by setting default values in zero-value fields. Mergo won't merge unexported (private) fields. It will do recursively any exported one. It also won't merge structs inside maps (because they are not addressable using Go reflection).
+
+Also a lovely [comune](http://en.wikipedia.org/wiki/Mergo) (municipality) in the Province of Ancona in the Italian region of Marche.
+
+## Status
+
+Mergo is stable and frozen, ready for production. Check a short list of the projects using at large scale it [here](https://github.com/imdario/mergo#mergo-in-the-wild).
+
+No new features are accepted. They will be considered for a future v2 that improves the implementation and fixes bugs for corner cases.
+
+### Important notes
+
+#### 1.0.0
+
+In [1.0.0](//github.com/imdario/mergo/releases/tag/1.0.0) Mergo moves to a vanity URL `dario.cat/mergo`. No more v1 versions will be released.
+
+If the vanity URL is causing issues in your project due to a dependency pulling Mergo - it isn't a direct dependency in your project - it is recommended to use [replace](https://github.com/golang/go/wiki/Modules#when-should-i-use-the-replace-directive) to pin the version to the last one with the old import URL:
+
+```
+replace github.com/imdario/mergo => github.com/imdario/mergo v0.3.16
+```
+
+#### 0.3.9
+
+Please keep in mind that a problematic PR broke [0.3.9](//github.com/imdario/mergo/releases/tag/0.3.9). I reverted it in [0.3.10](//github.com/imdario/mergo/releases/tag/0.3.10), and I consider it stable but not bug-free. Also, this version adds support for go modules.
+
+Keep in mind that in [0.3.2](//github.com/imdario/mergo/releases/tag/0.3.2), Mergo changed `Merge()`and `Map()` signatures to support [transformers](#transformers). I added an optional/variadic argument so that it won't break the existing code.
+
+If you were using Mergo before April 6th, 2015, please check your project works as intended after updating your local copy with ```go get -u dario.cat/mergo```. I apologize for any issue caused by its previous behavior and any future bug that Mergo could cause in existing projects after the change (release 0.2.0).
+
+### Donations
+
+If Mergo is useful to you, consider buying me a coffee, a beer, or making a monthly donation to allow me to keep building great free software. :heart_eyes:
+
+
+
+
+### Mergo in the wild
+
+Mergo is used by [thousands](https://deps.dev/go/dario.cat%2Fmergo/v1.0.0/dependents) [of](https://deps.dev/go/github.com%2Fimdario%2Fmergo/v0.3.16/dependents) [projects](https://deps.dev/go/github.com%2Fimdario%2Fmergo/v0.3.12), including:
+
+* [containerd/containerd](https://github.com/containerd/containerd)
+* [datadog/datadog-agent](https://github.com/datadog/datadog-agent)
+* [docker/cli/](https://github.com/docker/cli/)
+* [goreleaser/goreleaser](https://github.com/goreleaser/goreleaser)
+* [go-micro/go-micro](https://github.com/go-micro/go-micro)
+* [grafana/loki](https://github.com/grafana/loki)
+* [kubernetes/kubernetes](https://github.com/kubernetes/kubernetes)
+* [masterminds/sprig](github.com/Masterminds/sprig)
+* [moby/moby](https://github.com/moby/moby)
+* [slackhq/nebula](https://github.com/slackhq/nebula)
+* [volcano-sh/volcano](https://github.com/volcano-sh/volcano)
+
+## Install
+
+ go get dario.cat/mergo
+
+ // use in your .go code
+ import (
+ "dario.cat/mergo"
+ )
+
+## Usage
+
+You can only merge same-type structs with exported fields initialized as zero value of their type and same-types maps. Mergo won't merge unexported (private) fields but will do recursively any exported one. It won't merge empty structs value as [they are zero values](https://golang.org/ref/spec#The_zero_value) too. Also, maps will be merged recursively except for structs inside maps (because they are not addressable using Go reflection).
+
+```go
+if err := mergo.Merge(&dst, src); err != nil {
+ // ...
+}
+```
+
+Also, you can merge overwriting values using the transformer `WithOverride`.
+
+```go
+if err := mergo.Merge(&dst, src, mergo.WithOverride); err != nil {
+ // ...
+}
+```
+
+If you need to override pointers, so the source pointer's value is assigned to the destination's pointer, you must use `WithoutDereference`:
+
+```go
+package main
+
+import (
+ "fmt"
+
+ "dario.cat/mergo"
+)
+
+type Foo struct {
+ A *string
+ B int64
+}
+
+func main() {
+ first := "first"
+ second := "second"
+ src := Foo{
+ A: &first,
+ B: 2,
+ }
+
+ dest := Foo{
+ A: &second,
+ B: 1,
+ }
+
+ mergo.Merge(&dest, src, mergo.WithOverride, mergo.WithoutDereference)
+}
+```
+
+Additionally, you can map a `map[string]interface{}` to a struct (and otherwise, from struct to map), following the same restrictions as in `Merge()`. Keys are capitalized to find each corresponding exported field.
+
+```go
+if err := mergo.Map(&dst, srcMap); err != nil {
+ // ...
+}
+```
+
+Warning: if you map a struct to map, it won't do it recursively. Don't expect Mergo to map struct members of your struct as `map[string]interface{}`. They will be just assigned as values.
+
+Here is a nice example:
+
+```go
+package main
+
+import (
+ "fmt"
+ "dario.cat/mergo"
+)
+
+type Foo struct {
+ A string
+ B int64
+}
+
+func main() {
+ src := Foo{
+ A: "one",
+ B: 2,
+ }
+ dest := Foo{
+ A: "two",
+ }
+ mergo.Merge(&dest, src)
+ fmt.Println(dest)
+ // Will print
+ // {two 2}
+}
+```
+
+Note: if test are failing due missing package, please execute:
+
+ go get gopkg.in/yaml.v3
+
+### Transformers
+
+Transformers allow to merge specific types differently than in the default behavior. In other words, now you can customize how some types are merged. For example, `time.Time` is a struct; it doesn't have zero value but IsZero can return true because it has fields with zero value. How can we merge a non-zero `time.Time`?
+
+```go
+package main
+
+import (
+ "fmt"
+ "dario.cat/mergo"
+ "reflect"
+ "time"
+)
+
+type timeTransformer struct {
+}
+
+func (t timeTransformer) Transformer(typ reflect.Type) func(dst, src reflect.Value) error {
+ if typ == reflect.TypeOf(time.Time{}) {
+ return func(dst, src reflect.Value) error {
+ if dst.CanSet() {
+ isZero := dst.MethodByName("IsZero")
+ result := isZero.Call([]reflect.Value{})
+ if result[0].Bool() {
+ dst.Set(src)
+ }
+ }
+ return nil
+ }
+ }
+ return nil
+}
+
+type Snapshot struct {
+ Time time.Time
+ // ...
+}
+
+func main() {
+ src := Snapshot{time.Now()}
+ dest := Snapshot{}
+ mergo.Merge(&dest, src, mergo.WithTransformers(timeTransformer{}))
+ fmt.Println(dest)
+ // Will print
+ // { 2018-01-12 01:15:00 +0000 UTC m=+0.000000001 }
+}
+```
+
+## Contact me
+
+If I can help you, you have an idea or you are using Mergo in your projects, don't hesitate to drop me a line (or a pull request): [@im_dario](https://twitter.com/im_dario)
+
+## About
+
+Written by [Dario Castañé](http://dario.im).
+
+## License
+
+[BSD 3-Clause](http://opensource.org/licenses/BSD-3-Clause) license, as [Go language](http://golang.org/LICENSE).
+
+[](https://app.fossa.io/projects/git%2Bgithub.com%2Fimdario%2Fmergo?ref=badge_large)
diff --git a/vendor/dario.cat/mergo/SECURITY.md b/vendor/dario.cat/mergo/SECURITY.md
new file mode 100644
index 000000000..a5de61f77
--- /dev/null
+++ b/vendor/dario.cat/mergo/SECURITY.md
@@ -0,0 +1,14 @@
+# Security Policy
+
+## Supported Versions
+
+| Version | Supported |
+| ------- | ------------------ |
+| 0.3.x | :white_check_mark: |
+| < 0.3 | :x: |
+
+## Security contact information
+
+To report a security vulnerability, please use the
+[Tidelift security contact](https://tidelift.com/security).
+Tidelift will coordinate the fix and disclosure.
diff --git a/vendor/dario.cat/mergo/doc.go b/vendor/dario.cat/mergo/doc.go
new file mode 100644
index 000000000..7d96ec054
--- /dev/null
+++ b/vendor/dario.cat/mergo/doc.go
@@ -0,0 +1,148 @@
+// Copyright 2013 Dario Castañé. All rights reserved.
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/*
+A helper to merge structs and maps in Golang. Useful for configuration default values, avoiding messy if-statements.
+
+Mergo merges same-type structs and maps by setting default values in zero-value fields. Mergo won't merge unexported (private) fields. It will do recursively any exported one. It also won't merge structs inside maps (because they are not addressable using Go reflection).
+
+# Status
+
+It is ready for production use. It is used in several projects by Docker, Google, The Linux Foundation, VMWare, Shopify, etc.
+
+# Important notes
+
+1.0.0
+
+In 1.0.0 Mergo moves to a vanity URL `dario.cat/mergo`.
+
+0.3.9
+
+Please keep in mind that a problematic PR broke 0.3.9. We reverted it in 0.3.10. We consider 0.3.10 as stable but not bug-free. . Also, this version adds suppot for go modules.
+
+Keep in mind that in 0.3.2, Mergo changed Merge() and Map() signatures to support transformers. We added an optional/variadic argument so that it won't break the existing code.
+
+If you were using Mergo before April 6th, 2015, please check your project works as intended after updating your local copy with go get -u dario.cat/mergo. I apologize for any issue caused by its previous behavior and any future bug that Mergo could cause in existing projects after the change (release 0.2.0).
+
+# Install
+
+Do your usual installation procedure:
+
+ go get dario.cat/mergo
+
+ // use in your .go code
+ import (
+ "dario.cat/mergo"
+ )
+
+# Usage
+
+You can only merge same-type structs with exported fields initialized as zero value of their type and same-types maps. Mergo won't merge unexported (private) fields but will do recursively any exported one. It won't merge empty structs value as they are zero values too. Also, maps will be merged recursively except for structs inside maps (because they are not addressable using Go reflection).
+
+ if err := mergo.Merge(&dst, src); err != nil {
+ // ...
+ }
+
+Also, you can merge overwriting values using the transformer WithOverride.
+
+ if err := mergo.Merge(&dst, src, mergo.WithOverride); err != nil {
+ // ...
+ }
+
+Additionally, you can map a map[string]interface{} to a struct (and otherwise, from struct to map), following the same restrictions as in Merge(). Keys are capitalized to find each corresponding exported field.
+
+ if err := mergo.Map(&dst, srcMap); err != nil {
+ // ...
+ }
+
+Warning: if you map a struct to map, it won't do it recursively. Don't expect Mergo to map struct members of your struct as map[string]interface{}. They will be just assigned as values.
+
+Here is a nice example:
+
+ package main
+
+ import (
+ "fmt"
+ "dario.cat/mergo"
+ )
+
+ type Foo struct {
+ A string
+ B int64
+ }
+
+ func main() {
+ src := Foo{
+ A: "one",
+ B: 2,
+ }
+ dest := Foo{
+ A: "two",
+ }
+ mergo.Merge(&dest, src)
+ fmt.Println(dest)
+ // Will print
+ // {two 2}
+ }
+
+# Transformers
+
+Transformers allow to merge specific types differently than in the default behavior. In other words, now you can customize how some types are merged. For example, time.Time is a struct; it doesn't have zero value but IsZero can return true because it has fields with zero value. How can we merge a non-zero time.Time?
+
+ package main
+
+ import (
+ "fmt"
+ "dario.cat/mergo"
+ "reflect"
+ "time"
+ )
+
+ type timeTransformer struct {
+ }
+
+ func (t timeTransformer) Transformer(typ reflect.Type) func(dst, src reflect.Value) error {
+ if typ == reflect.TypeOf(time.Time{}) {
+ return func(dst, src reflect.Value) error {
+ if dst.CanSet() {
+ isZero := dst.MethodByName("IsZero")
+ result := isZero.Call([]reflect.Value{})
+ if result[0].Bool() {
+ dst.Set(src)
+ }
+ }
+ return nil
+ }
+ }
+ return nil
+ }
+
+ type Snapshot struct {
+ Time time.Time
+ // ...
+ }
+
+ func main() {
+ src := Snapshot{time.Now()}
+ dest := Snapshot{}
+ mergo.Merge(&dest, src, mergo.WithTransformers(timeTransformer{}))
+ fmt.Println(dest)
+ // Will print
+ // { 2018-01-12 01:15:00 +0000 UTC m=+0.000000001 }
+ }
+
+# Contact me
+
+If I can help you, you have an idea or you are using Mergo in your projects, don't hesitate to drop me a line (or a pull request): https://twitter.com/im_dario
+
+# About
+
+Written by Dario Castañé: https://da.rio.hn
+
+# License
+
+BSD 3-Clause license, as Go language.
+*/
+package mergo
diff --git a/vendor/dario.cat/mergo/map.go b/vendor/dario.cat/mergo/map.go
new file mode 100644
index 000000000..759b4f74f
--- /dev/null
+++ b/vendor/dario.cat/mergo/map.go
@@ -0,0 +1,178 @@
+// Copyright 2014 Dario Castañé. All rights reserved.
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Based on src/pkg/reflect/deepequal.go from official
+// golang's stdlib.
+
+package mergo
+
+import (
+ "fmt"
+ "reflect"
+ "unicode"
+ "unicode/utf8"
+)
+
+func changeInitialCase(s string, mapper func(rune) rune) string {
+ if s == "" {
+ return s
+ }
+ r, n := utf8.DecodeRuneInString(s)
+ return string(mapper(r)) + s[n:]
+}
+
+func isExported(field reflect.StructField) bool {
+ r, _ := utf8.DecodeRuneInString(field.Name)
+ return r >= 'A' && r <= 'Z'
+}
+
+// Traverses recursively both values, assigning src's fields values to dst.
+// The map argument tracks comparisons that have already been seen, which allows
+// short circuiting on recursive types.
+func deepMap(dst, src reflect.Value, visited map[uintptr]*visit, depth int, config *Config) (err error) {
+ overwrite := config.Overwrite
+ if dst.CanAddr() {
+ addr := dst.UnsafeAddr()
+ h := 17 * addr
+ seen := visited[h]
+ typ := dst.Type()
+ for p := seen; p != nil; p = p.next {
+ if p.ptr == addr && p.typ == typ {
+ return nil
+ }
+ }
+ // Remember, remember...
+ visited[h] = &visit{typ, seen, addr}
+ }
+ zeroValue := reflect.Value{}
+ switch dst.Kind() {
+ case reflect.Map:
+ dstMap := dst.Interface().(map[string]interface{})
+ for i, n := 0, src.NumField(); i < n; i++ {
+ srcType := src.Type()
+ field := srcType.Field(i)
+ if !isExported(field) {
+ continue
+ }
+ fieldName := field.Name
+ fieldName = changeInitialCase(fieldName, unicode.ToLower)
+ if _, ok := dstMap[fieldName]; !ok || (!isEmptyValue(reflect.ValueOf(src.Field(i).Interface()), !config.ShouldNotDereference) && overwrite) || config.overwriteWithEmptyValue {
+ dstMap[fieldName] = src.Field(i).Interface()
+ }
+ }
+ case reflect.Ptr:
+ if dst.IsNil() {
+ v := reflect.New(dst.Type().Elem())
+ dst.Set(v)
+ }
+ dst = dst.Elem()
+ fallthrough
+ case reflect.Struct:
+ srcMap := src.Interface().(map[string]interface{})
+ for key := range srcMap {
+ config.overwriteWithEmptyValue = true
+ srcValue := srcMap[key]
+ fieldName := changeInitialCase(key, unicode.ToUpper)
+ dstElement := dst.FieldByName(fieldName)
+ if dstElement == zeroValue {
+ // We discard it because the field doesn't exist.
+ continue
+ }
+ srcElement := reflect.ValueOf(srcValue)
+ dstKind := dstElement.Kind()
+ srcKind := srcElement.Kind()
+ if srcKind == reflect.Ptr && dstKind != reflect.Ptr {
+ srcElement = srcElement.Elem()
+ srcKind = reflect.TypeOf(srcElement.Interface()).Kind()
+ } else if dstKind == reflect.Ptr {
+ // Can this work? I guess it can't.
+ if srcKind != reflect.Ptr && srcElement.CanAddr() {
+ srcPtr := srcElement.Addr()
+ srcElement = reflect.ValueOf(srcPtr)
+ srcKind = reflect.Ptr
+ }
+ }
+
+ if !srcElement.IsValid() {
+ continue
+ }
+ if srcKind == dstKind {
+ if err = deepMerge(dstElement, srcElement, visited, depth+1, config); err != nil {
+ return
+ }
+ } else if dstKind == reflect.Interface && dstElement.Kind() == reflect.Interface {
+ if err = deepMerge(dstElement, srcElement, visited, depth+1, config); err != nil {
+ return
+ }
+ } else if srcKind == reflect.Map {
+ if err = deepMap(dstElement, srcElement, visited, depth+1, config); err != nil {
+ return
+ }
+ } else {
+ return fmt.Errorf("type mismatch on %s field: found %v, expected %v", fieldName, srcKind, dstKind)
+ }
+ }
+ }
+ return
+}
+
+// Map sets fields' values in dst from src.
+// src can be a map with string keys or a struct. dst must be the opposite:
+// if src is a map, dst must be a valid pointer to struct. If src is a struct,
+// dst must be map[string]interface{}.
+// It won't merge unexported (private) fields and will do recursively
+// any exported field.
+// If dst is a map, keys will be src fields' names in lower camel case.
+// Missing key in src that doesn't match a field in dst will be skipped. This
+// doesn't apply if dst is a map.
+// This is separated method from Merge because it is cleaner and it keeps sane
+// semantics: merging equal types, mapping different (restricted) types.
+func Map(dst, src interface{}, opts ...func(*Config)) error {
+ return _map(dst, src, opts...)
+}
+
+// MapWithOverwrite will do the same as Map except that non-empty dst attributes will be overridden by
+// non-empty src attribute values.
+// Deprecated: Use Map(…) with WithOverride
+func MapWithOverwrite(dst, src interface{}, opts ...func(*Config)) error {
+ return _map(dst, src, append(opts, WithOverride)...)
+}
+
+func _map(dst, src interface{}, opts ...func(*Config)) error {
+ if dst != nil && reflect.ValueOf(dst).Kind() != reflect.Ptr {
+ return ErrNonPointerArgument
+ }
+ var (
+ vDst, vSrc reflect.Value
+ err error
+ )
+ config := &Config{}
+
+ for _, opt := range opts {
+ opt(config)
+ }
+
+ if vDst, vSrc, err = resolveValues(dst, src); err != nil {
+ return err
+ }
+ // To be friction-less, we redirect equal-type arguments
+ // to deepMerge. Only because arguments can be anything.
+ if vSrc.Kind() == vDst.Kind() {
+ return deepMerge(vDst, vSrc, make(map[uintptr]*visit), 0, config)
+ }
+ switch vSrc.Kind() {
+ case reflect.Struct:
+ if vDst.Kind() != reflect.Map {
+ return ErrExpectedMapAsDestination
+ }
+ case reflect.Map:
+ if vDst.Kind() != reflect.Struct {
+ return ErrExpectedStructAsDestination
+ }
+ default:
+ return ErrNotSupported
+ }
+ return deepMap(vDst, vSrc, make(map[uintptr]*visit), 0, config)
+}
diff --git a/vendor/dario.cat/mergo/merge.go b/vendor/dario.cat/mergo/merge.go
new file mode 100644
index 000000000..fd47c95b2
--- /dev/null
+++ b/vendor/dario.cat/mergo/merge.go
@@ -0,0 +1,409 @@
+// Copyright 2013 Dario Castañé. All rights reserved.
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Based on src/pkg/reflect/deepequal.go from official
+// golang's stdlib.
+
+package mergo
+
+import (
+ "fmt"
+ "reflect"
+)
+
+func hasMergeableFields(dst reflect.Value) (exported bool) {
+ for i, n := 0, dst.NumField(); i < n; i++ {
+ field := dst.Type().Field(i)
+ if field.Anonymous && dst.Field(i).Kind() == reflect.Struct {
+ exported = exported || hasMergeableFields(dst.Field(i))
+ } else if isExportedComponent(&field) {
+ exported = exported || len(field.PkgPath) == 0
+ }
+ }
+ return
+}
+
+func isExportedComponent(field *reflect.StructField) bool {
+ pkgPath := field.PkgPath
+ if len(pkgPath) > 0 {
+ return false
+ }
+ c := field.Name[0]
+ if 'a' <= c && c <= 'z' || c == '_' {
+ return false
+ }
+ return true
+}
+
+type Config struct {
+ Transformers Transformers
+ Overwrite bool
+ ShouldNotDereference bool
+ AppendSlice bool
+ TypeCheck bool
+ overwriteWithEmptyValue bool
+ overwriteSliceWithEmptyValue bool
+ sliceDeepCopy bool
+ debug bool
+}
+
+type Transformers interface {
+ Transformer(reflect.Type) func(dst, src reflect.Value) error
+}
+
+// Traverses recursively both values, assigning src's fields values to dst.
+// The map argument tracks comparisons that have already been seen, which allows
+// short circuiting on recursive types.
+func deepMerge(dst, src reflect.Value, visited map[uintptr]*visit, depth int, config *Config) (err error) {
+ overwrite := config.Overwrite
+ typeCheck := config.TypeCheck
+ overwriteWithEmptySrc := config.overwriteWithEmptyValue
+ overwriteSliceWithEmptySrc := config.overwriteSliceWithEmptyValue
+ sliceDeepCopy := config.sliceDeepCopy
+
+ if !src.IsValid() {
+ return
+ }
+ if dst.CanAddr() {
+ addr := dst.UnsafeAddr()
+ h := 17 * addr
+ seen := visited[h]
+ typ := dst.Type()
+ for p := seen; p != nil; p = p.next {
+ if p.ptr == addr && p.typ == typ {
+ return nil
+ }
+ }
+ // Remember, remember...
+ visited[h] = &visit{typ, seen, addr}
+ }
+
+ if config.Transformers != nil && !isReflectNil(dst) && dst.IsValid() {
+ if fn := config.Transformers.Transformer(dst.Type()); fn != nil {
+ err = fn(dst, src)
+ return
+ }
+ }
+
+ switch dst.Kind() {
+ case reflect.Struct:
+ if hasMergeableFields(dst) {
+ for i, n := 0, dst.NumField(); i < n; i++ {
+ if err = deepMerge(dst.Field(i), src.Field(i), visited, depth+1, config); err != nil {
+ return
+ }
+ }
+ } else {
+ if dst.CanSet() && (isReflectNil(dst) || overwrite) && (!isEmptyValue(src, !config.ShouldNotDereference) || overwriteWithEmptySrc) {
+ dst.Set(src)
+ }
+ }
+ case reflect.Map:
+ if dst.IsNil() && !src.IsNil() {
+ if dst.CanSet() {
+ dst.Set(reflect.MakeMap(dst.Type()))
+ } else {
+ dst = src
+ return
+ }
+ }
+
+ if src.Kind() != reflect.Map {
+ if overwrite && dst.CanSet() {
+ dst.Set(src)
+ }
+ return
+ }
+
+ for _, key := range src.MapKeys() {
+ srcElement := src.MapIndex(key)
+ if !srcElement.IsValid() {
+ continue
+ }
+ dstElement := dst.MapIndex(key)
+ switch srcElement.Kind() {
+ case reflect.Chan, reflect.Func, reflect.Map, reflect.Interface, reflect.Slice:
+ if srcElement.IsNil() {
+ if overwrite {
+ dst.SetMapIndex(key, srcElement)
+ }
+ continue
+ }
+ fallthrough
+ default:
+ if !srcElement.CanInterface() {
+ continue
+ }
+ switch reflect.TypeOf(srcElement.Interface()).Kind() {
+ case reflect.Struct:
+ fallthrough
+ case reflect.Ptr:
+ fallthrough
+ case reflect.Map:
+ srcMapElm := srcElement
+ dstMapElm := dstElement
+ if srcMapElm.CanInterface() {
+ srcMapElm = reflect.ValueOf(srcMapElm.Interface())
+ if dstMapElm.IsValid() {
+ dstMapElm = reflect.ValueOf(dstMapElm.Interface())
+ }
+ }
+ if err = deepMerge(dstMapElm, srcMapElm, visited, depth+1, config); err != nil {
+ return
+ }
+ case reflect.Slice:
+ srcSlice := reflect.ValueOf(srcElement.Interface())
+
+ var dstSlice reflect.Value
+ if !dstElement.IsValid() || dstElement.IsNil() {
+ dstSlice = reflect.MakeSlice(srcSlice.Type(), 0, srcSlice.Len())
+ } else {
+ dstSlice = reflect.ValueOf(dstElement.Interface())
+ }
+
+ if (!isEmptyValue(src, !config.ShouldNotDereference) || overwriteWithEmptySrc || overwriteSliceWithEmptySrc) && (overwrite || isEmptyValue(dst, !config.ShouldNotDereference)) && !config.AppendSlice && !sliceDeepCopy {
+ if typeCheck && srcSlice.Type() != dstSlice.Type() {
+ return fmt.Errorf("cannot override two slices with different type (%s, %s)", srcSlice.Type(), dstSlice.Type())
+ }
+ dstSlice = srcSlice
+ } else if config.AppendSlice {
+ if srcSlice.Type() != dstSlice.Type() {
+ return fmt.Errorf("cannot append two slices with different type (%s, %s)", srcSlice.Type(), dstSlice.Type())
+ }
+ dstSlice = reflect.AppendSlice(dstSlice, srcSlice)
+ } else if sliceDeepCopy {
+ i := 0
+ for ; i < srcSlice.Len() && i < dstSlice.Len(); i++ {
+ srcElement := srcSlice.Index(i)
+ dstElement := dstSlice.Index(i)
+
+ if srcElement.CanInterface() {
+ srcElement = reflect.ValueOf(srcElement.Interface())
+ }
+ if dstElement.CanInterface() {
+ dstElement = reflect.ValueOf(dstElement.Interface())
+ }
+
+ if err = deepMerge(dstElement, srcElement, visited, depth+1, config); err != nil {
+ return
+ }
+ }
+
+ }
+ dst.SetMapIndex(key, dstSlice)
+ }
+ }
+
+ if dstElement.IsValid() && !isEmptyValue(dstElement, !config.ShouldNotDereference) {
+ if reflect.TypeOf(srcElement.Interface()).Kind() == reflect.Slice {
+ continue
+ }
+ if reflect.TypeOf(srcElement.Interface()).Kind() == reflect.Map && reflect.TypeOf(dstElement.Interface()).Kind() == reflect.Map {
+ continue
+ }
+ }
+
+ if srcElement.IsValid() && ((srcElement.Kind() != reflect.Ptr && overwrite) || !dstElement.IsValid() || isEmptyValue(dstElement, !config.ShouldNotDereference)) {
+ if dst.IsNil() {
+ dst.Set(reflect.MakeMap(dst.Type()))
+ }
+ dst.SetMapIndex(key, srcElement)
+ }
+ }
+
+ // Ensure that all keys in dst are deleted if they are not in src.
+ if overwriteWithEmptySrc {
+ for _, key := range dst.MapKeys() {
+ srcElement := src.MapIndex(key)
+ if !srcElement.IsValid() {
+ dst.SetMapIndex(key, reflect.Value{})
+ }
+ }
+ }
+ case reflect.Slice:
+ if !dst.CanSet() {
+ break
+ }
+ if (!isEmptyValue(src, !config.ShouldNotDereference) || overwriteWithEmptySrc || overwriteSliceWithEmptySrc) && (overwrite || isEmptyValue(dst, !config.ShouldNotDereference)) && !config.AppendSlice && !sliceDeepCopy {
+ dst.Set(src)
+ } else if config.AppendSlice {
+ if src.Type() != dst.Type() {
+ return fmt.Errorf("cannot append two slice with different type (%s, %s)", src.Type(), dst.Type())
+ }
+ dst.Set(reflect.AppendSlice(dst, src))
+ } else if sliceDeepCopy {
+ for i := 0; i < src.Len() && i < dst.Len(); i++ {
+ srcElement := src.Index(i)
+ dstElement := dst.Index(i)
+ if srcElement.CanInterface() {
+ srcElement = reflect.ValueOf(srcElement.Interface())
+ }
+ if dstElement.CanInterface() {
+ dstElement = reflect.ValueOf(dstElement.Interface())
+ }
+
+ if err = deepMerge(dstElement, srcElement, visited, depth+1, config); err != nil {
+ return
+ }
+ }
+ }
+ case reflect.Ptr:
+ fallthrough
+ case reflect.Interface:
+ if isReflectNil(src) {
+ if overwriteWithEmptySrc && dst.CanSet() && src.Type().AssignableTo(dst.Type()) {
+ dst.Set(src)
+ }
+ break
+ }
+
+ if src.Kind() != reflect.Interface {
+ if dst.IsNil() || (src.Kind() != reflect.Ptr && overwrite) {
+ if dst.CanSet() && (overwrite || isEmptyValue(dst, !config.ShouldNotDereference)) {
+ dst.Set(src)
+ }
+ } else if src.Kind() == reflect.Ptr {
+ if !config.ShouldNotDereference {
+ if err = deepMerge(dst.Elem(), src.Elem(), visited, depth+1, config); err != nil {
+ return
+ }
+ } else if src.Elem().Kind() != reflect.Struct {
+ if overwriteWithEmptySrc || (overwrite && !src.IsNil()) || dst.IsNil() {
+ dst.Set(src)
+ }
+ }
+ } else if dst.Elem().Type() == src.Type() {
+ if err = deepMerge(dst.Elem(), src, visited, depth+1, config); err != nil {
+ return
+ }
+ } else {
+ return ErrDifferentArgumentsTypes
+ }
+ break
+ }
+
+ if dst.IsNil() || overwrite {
+ if dst.CanSet() && (overwrite || isEmptyValue(dst, !config.ShouldNotDereference)) {
+ dst.Set(src)
+ }
+ break
+ }
+
+ if dst.Elem().Kind() == src.Elem().Kind() {
+ if err = deepMerge(dst.Elem(), src.Elem(), visited, depth+1, config); err != nil {
+ return
+ }
+ break
+ }
+ default:
+ mustSet := (isEmptyValue(dst, !config.ShouldNotDereference) || overwrite) && (!isEmptyValue(src, !config.ShouldNotDereference) || overwriteWithEmptySrc)
+ if mustSet {
+ if dst.CanSet() {
+ dst.Set(src)
+ } else {
+ dst = src
+ }
+ }
+ }
+
+ return
+}
+
+// Merge will fill any empty for value type attributes on the dst struct using corresponding
+// src attributes if they themselves are not empty. dst and src must be valid same-type structs
+// and dst must be a pointer to struct.
+// It won't merge unexported (private) fields and will do recursively any exported field.
+func Merge(dst, src interface{}, opts ...func(*Config)) error {
+ return merge(dst, src, opts...)
+}
+
+// MergeWithOverwrite will do the same as Merge except that non-empty dst attributes will be overridden by
+// non-empty src attribute values.
+// Deprecated: use Merge(…) with WithOverride
+func MergeWithOverwrite(dst, src interface{}, opts ...func(*Config)) error {
+ return merge(dst, src, append(opts, WithOverride)...)
+}
+
+// WithTransformers adds transformers to merge, allowing to customize the merging of some types.
+func WithTransformers(transformers Transformers) func(*Config) {
+ return func(config *Config) {
+ config.Transformers = transformers
+ }
+}
+
+// WithOverride will make merge override non-empty dst attributes with non-empty src attributes values.
+func WithOverride(config *Config) {
+ config.Overwrite = true
+}
+
+// WithOverwriteWithEmptyValue will make merge override non empty dst attributes with empty src attributes values.
+func WithOverwriteWithEmptyValue(config *Config) {
+ config.Overwrite = true
+ config.overwriteWithEmptyValue = true
+}
+
+// WithOverrideEmptySlice will make merge override empty dst slice with empty src slice.
+func WithOverrideEmptySlice(config *Config) {
+ config.overwriteSliceWithEmptyValue = true
+}
+
+// WithoutDereference prevents dereferencing pointers when evaluating whether they are empty
+// (i.e. a non-nil pointer is never considered empty).
+func WithoutDereference(config *Config) {
+ config.ShouldNotDereference = true
+}
+
+// WithAppendSlice will make merge append slices instead of overwriting it.
+func WithAppendSlice(config *Config) {
+ config.AppendSlice = true
+}
+
+// WithTypeCheck will make merge check types while overwriting it (must be used with WithOverride).
+func WithTypeCheck(config *Config) {
+ config.TypeCheck = true
+}
+
+// WithSliceDeepCopy will merge slice element one by one with Overwrite flag.
+func WithSliceDeepCopy(config *Config) {
+ config.sliceDeepCopy = true
+ config.Overwrite = true
+}
+
+func merge(dst, src interface{}, opts ...func(*Config)) error {
+ if dst != nil && reflect.ValueOf(dst).Kind() != reflect.Ptr {
+ return ErrNonPointerArgument
+ }
+ var (
+ vDst, vSrc reflect.Value
+ err error
+ )
+
+ config := &Config{}
+
+ for _, opt := range opts {
+ opt(config)
+ }
+
+ if vDst, vSrc, err = resolveValues(dst, src); err != nil {
+ return err
+ }
+ if vDst.Type() != vSrc.Type() {
+ return ErrDifferentArgumentsTypes
+ }
+ return deepMerge(vDst, vSrc, make(map[uintptr]*visit), 0, config)
+}
+
+// IsReflectNil is the reflect value provided nil
+func isReflectNil(v reflect.Value) bool {
+ k := v.Kind()
+ switch k {
+ case reflect.Interface, reflect.Slice, reflect.Chan, reflect.Func, reflect.Map, reflect.Ptr:
+ // Both interface and slice are nil if first word is 0.
+ // Both are always bigger than a word; assume flagIndir.
+ return v.IsNil()
+ default:
+ return false
+ }
+}
diff --git a/vendor/dario.cat/mergo/mergo.go b/vendor/dario.cat/mergo/mergo.go
new file mode 100644
index 000000000..0a721e2d8
--- /dev/null
+++ b/vendor/dario.cat/mergo/mergo.go
@@ -0,0 +1,81 @@
+// Copyright 2013 Dario Castañé. All rights reserved.
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Based on src/pkg/reflect/deepequal.go from official
+// golang's stdlib.
+
+package mergo
+
+import (
+ "errors"
+ "reflect"
+)
+
+// Errors reported by Mergo when it finds invalid arguments.
+var (
+ ErrNilArguments = errors.New("src and dst must not be nil")
+ ErrDifferentArgumentsTypes = errors.New("src and dst must be of same type")
+ ErrNotSupported = errors.New("only structs, maps, and slices are supported")
+ ErrExpectedMapAsDestination = errors.New("dst was expected to be a map")
+ ErrExpectedStructAsDestination = errors.New("dst was expected to be a struct")
+ ErrNonPointerArgument = errors.New("dst must be a pointer")
+)
+
+// During deepMerge, must keep track of checks that are
+// in progress. The comparison algorithm assumes that all
+// checks in progress are true when it reencounters them.
+// Visited are stored in a map indexed by 17 * a1 + a2;
+type visit struct {
+ typ reflect.Type
+ next *visit
+ ptr uintptr
+}
+
+// From src/pkg/encoding/json/encode.go.
+func isEmptyValue(v reflect.Value, shouldDereference bool) bool {
+ switch v.Kind() {
+ case reflect.Array, reflect.Map, reflect.Slice, reflect.String:
+ return v.Len() == 0
+ case reflect.Bool:
+ return !v.Bool()
+ case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
+ return v.Int() == 0
+ case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
+ return v.Uint() == 0
+ case reflect.Float32, reflect.Float64:
+ return v.Float() == 0
+ case reflect.Interface, reflect.Ptr:
+ if v.IsNil() {
+ return true
+ }
+ if shouldDereference {
+ return isEmptyValue(v.Elem(), shouldDereference)
+ }
+ return false
+ case reflect.Func:
+ return v.IsNil()
+ case reflect.Invalid:
+ return true
+ }
+ return false
+}
+
+func resolveValues(dst, src interface{}) (vDst, vSrc reflect.Value, err error) {
+ if dst == nil || src == nil {
+ err = ErrNilArguments
+ return
+ }
+ vDst = reflect.ValueOf(dst).Elem()
+ if vDst.Kind() != reflect.Struct && vDst.Kind() != reflect.Map && vDst.Kind() != reflect.Slice {
+ err = ErrNotSupported
+ return
+ }
+ vSrc = reflect.ValueOf(src)
+ // We check if vSrc is a pointer to dereference it.
+ if vSrc.Kind() == reflect.Ptr {
+ vSrc = vSrc.Elem()
+ }
+ return
+}
diff --git a/vendor/github.com/BurntSushi/toml/README.md b/vendor/github.com/BurntSushi/toml/README.md
index 3651cfa96..639e6c399 100644
--- a/vendor/github.com/BurntSushi/toml/README.md
+++ b/vendor/github.com/BurntSushi/toml/README.md
@@ -9,7 +9,7 @@ See the [releases page](https://github.com/BurntSushi/toml/releases) for a
changelog; this information is also in the git tag annotations (e.g. `git show
v0.4.0`).
-This library requires Go 1.13 or newer; add it to your go.mod with:
+This library requires Go 1.18 or newer; add it to your go.mod with:
% go get github.com/BurntSushi/toml@latest
diff --git a/vendor/github.com/BurntSushi/toml/decode.go b/vendor/github.com/BurntSushi/toml/decode.go
index 4d38f3bfc..7aaf462c9 100644
--- a/vendor/github.com/BurntSushi/toml/decode.go
+++ b/vendor/github.com/BurntSushi/toml/decode.go
@@ -6,7 +6,7 @@ import (
"encoding/json"
"fmt"
"io"
- "io/ioutil"
+ "io/fs"
"math"
"os"
"reflect"
@@ -18,13 +18,13 @@ import (
// Unmarshaler is the interface implemented by objects that can unmarshal a
// TOML description of themselves.
type Unmarshaler interface {
- UnmarshalTOML(interface{}) error
+ UnmarshalTOML(any) error
}
// Unmarshal decodes the contents of data in TOML format into a pointer v.
//
// See [Decoder] for a description of the decoding process.
-func Unmarshal(data []byte, v interface{}) error {
+func Unmarshal(data []byte, v any) error {
_, err := NewDecoder(bytes.NewReader(data)).Decode(v)
return err
}
@@ -32,12 +32,12 @@ func Unmarshal(data []byte, v interface{}) error {
// Decode the TOML data in to the pointer v.
//
// See [Decoder] for a description of the decoding process.
-func Decode(data string, v interface{}) (MetaData, error) {
+func Decode(data string, v any) (MetaData, error) {
return NewDecoder(strings.NewReader(data)).Decode(v)
}
// DecodeFile reads the contents of a file and decodes it with [Decode].
-func DecodeFile(path string, v interface{}) (MetaData, error) {
+func DecodeFile(path string, v any) (MetaData, error) {
fp, err := os.Open(path)
if err != nil {
return MetaData{}, err
@@ -46,6 +46,17 @@ func DecodeFile(path string, v interface{}) (MetaData, error) {
return NewDecoder(fp).Decode(v)
}
+// DecodeFS reads the contents of a file from [fs.FS] and decodes it with
+// [Decode].
+func DecodeFS(fsys fs.FS, path string, v any) (MetaData, error) {
+ fp, err := fsys.Open(path)
+ if err != nil {
+ return MetaData{}, err
+ }
+ defer fp.Close()
+ return NewDecoder(fp).Decode(v)
+}
+
// Primitive is a TOML value that hasn't been decoded into a Go value.
//
// This type can be used for any value, which will cause decoding to be delayed.
@@ -58,7 +69,7 @@ func DecodeFile(path string, v interface{}) (MetaData, error) {
// overhead of reflection. They can be useful when you don't know the exact type
// of TOML data until runtime.
type Primitive struct {
- undecoded interface{}
+ undecoded any
context Key
}
@@ -122,7 +133,7 @@ var (
)
// Decode TOML data in to the pointer `v`.
-func (dec *Decoder) Decode(v interface{}) (MetaData, error) {
+func (dec *Decoder) Decode(v any) (MetaData, error) {
rv := reflect.ValueOf(v)
if rv.Kind() != reflect.Ptr {
s := "%q"
@@ -136,8 +147,8 @@ func (dec *Decoder) Decode(v interface{}) (MetaData, error) {
return MetaData{}, fmt.Errorf("toml: cannot decode to nil value of %q", reflect.TypeOf(v))
}
- // Check if this is a supported type: struct, map, interface{}, or something
- // that implements UnmarshalTOML or UnmarshalText.
+ // Check if this is a supported type: struct, map, any, or something that
+ // implements UnmarshalTOML or UnmarshalText.
rv = indirect(rv)
rt := rv.Type()
if rv.Kind() != reflect.Struct && rv.Kind() != reflect.Map &&
@@ -148,7 +159,7 @@ func (dec *Decoder) Decode(v interface{}) (MetaData, error) {
// TODO: parser should read from io.Reader? Or at the very least, make it
// read from []byte rather than string
- data, err := ioutil.ReadAll(dec.r)
+ data, err := io.ReadAll(dec.r)
if err != nil {
return MetaData{}, err
}
@@ -179,7 +190,7 @@ func (dec *Decoder) Decode(v interface{}) (MetaData, error) {
// will only reflect keys that were decoded. Namely, any keys hidden behind a
// Primitive will be considered undecoded. Executing this method will update the
// undecoded keys in the meta data. (See the example.)
-func (md *MetaData) PrimitiveDecode(primValue Primitive, v interface{}) error {
+func (md *MetaData) PrimitiveDecode(primValue Primitive, v any) error {
md.context = primValue.context
defer func() { md.context = nil }()
return md.unify(primValue.undecoded, rvalue(v))
@@ -190,7 +201,7 @@ func (md *MetaData) PrimitiveDecode(primValue Primitive, v interface{}) error {
//
// Any type mismatch produces an error. Finding a type that we don't know
// how to handle produces an unsupported type error.
-func (md *MetaData) unify(data interface{}, rv reflect.Value) error {
+func (md *MetaData) unify(data any, rv reflect.Value) error {
// Special case. Look for a `Primitive` value.
// TODO: #76 would make this superfluous after implemented.
if rv.Type() == primitiveType {
@@ -207,7 +218,11 @@ func (md *MetaData) unify(data interface{}, rv reflect.Value) error {
rvi := rv.Interface()
if v, ok := rvi.(Unmarshaler); ok {
- return v.UnmarshalTOML(data)
+ err := v.UnmarshalTOML(data)
+ if err != nil {
+ return md.parseErr(err)
+ }
+ return nil
}
if v, ok := rvi.(encoding.TextUnmarshaler); ok {
return md.unifyText(data, v)
@@ -227,14 +242,6 @@ func (md *MetaData) unify(data interface{}, rv reflect.Value) error {
return md.unifyInt(data, rv)
}
switch k {
- case reflect.Ptr:
- elem := reflect.New(rv.Type().Elem())
- err := md.unify(data, reflect.Indirect(elem))
- if err != nil {
- return err
- }
- rv.Set(elem)
- return nil
case reflect.Struct:
return md.unifyStruct(data, rv)
case reflect.Map:
@@ -258,14 +265,13 @@ func (md *MetaData) unify(data interface{}, rv reflect.Value) error {
return md.e("unsupported type %s", rv.Kind())
}
-func (md *MetaData) unifyStruct(mapping interface{}, rv reflect.Value) error {
- tmap, ok := mapping.(map[string]interface{})
+func (md *MetaData) unifyStruct(mapping any, rv reflect.Value) error {
+ tmap, ok := mapping.(map[string]any)
if !ok {
if mapping == nil {
return nil
}
- return md.e("type mismatch for %s: expected table but found %T",
- rv.Type().String(), mapping)
+ return md.e("type mismatch for %s: expected table but found %s", rv.Type().String(), fmtType(mapping))
}
for key, datum := range tmap {
@@ -304,14 +310,14 @@ func (md *MetaData) unifyStruct(mapping interface{}, rv reflect.Value) error {
return nil
}
-func (md *MetaData) unifyMap(mapping interface{}, rv reflect.Value) error {
+func (md *MetaData) unifyMap(mapping any, rv reflect.Value) error {
keyType := rv.Type().Key().Kind()
if keyType != reflect.String && keyType != reflect.Interface {
return fmt.Errorf("toml: cannot decode to a map with non-string key type (%s in %q)",
keyType, rv.Type())
}
- tmap, ok := mapping.(map[string]interface{})
+ tmap, ok := mapping.(map[string]any)
if !ok {
if tmap == nil {
return nil
@@ -347,7 +353,7 @@ func (md *MetaData) unifyMap(mapping interface{}, rv reflect.Value) error {
return nil
}
-func (md *MetaData) unifyArray(data interface{}, rv reflect.Value) error {
+func (md *MetaData) unifyArray(data any, rv reflect.Value) error {
datav := reflect.ValueOf(data)
if datav.Kind() != reflect.Slice {
if !datav.IsValid() {
@@ -361,7 +367,7 @@ func (md *MetaData) unifyArray(data interface{}, rv reflect.Value) error {
return md.unifySliceArray(datav, rv)
}
-func (md *MetaData) unifySlice(data interface{}, rv reflect.Value) error {
+func (md *MetaData) unifySlice(data any, rv reflect.Value) error {
datav := reflect.ValueOf(data)
if datav.Kind() != reflect.Slice {
if !datav.IsValid() {
@@ -388,7 +394,7 @@ func (md *MetaData) unifySliceArray(data, rv reflect.Value) error {
return nil
}
-func (md *MetaData) unifyString(data interface{}, rv reflect.Value) error {
+func (md *MetaData) unifyString(data any, rv reflect.Value) error {
_, ok := rv.Interface().(json.Number)
if ok {
if i, ok := data.(int64); ok {
@@ -408,7 +414,7 @@ func (md *MetaData) unifyString(data interface{}, rv reflect.Value) error {
return md.badtype("string", data)
}
-func (md *MetaData) unifyFloat64(data interface{}, rv reflect.Value) error {
+func (md *MetaData) unifyFloat64(data any, rv reflect.Value) error {
rvk := rv.Kind()
if num, ok := data.(float64); ok {
@@ -429,7 +435,7 @@ func (md *MetaData) unifyFloat64(data interface{}, rv reflect.Value) error {
if num, ok := data.(int64); ok {
if (rvk == reflect.Float32 && (num < -maxSafeFloat32Int || num > maxSafeFloat32Int)) ||
(rvk == reflect.Float64 && (num < -maxSafeFloat64Int || num > maxSafeFloat64Int)) {
- return md.parseErr(errParseRange{i: num, size: rvk.String()})
+ return md.parseErr(errUnsafeFloat{i: num, size: rvk.String()})
}
rv.SetFloat(float64(num))
return nil
@@ -438,7 +444,7 @@ func (md *MetaData) unifyFloat64(data interface{}, rv reflect.Value) error {
return md.badtype("float", data)
}
-func (md *MetaData) unifyInt(data interface{}, rv reflect.Value) error {
+func (md *MetaData) unifyInt(data any, rv reflect.Value) error {
_, ok := rv.Interface().(time.Duration)
if ok {
// Parse as string duration, and fall back to regular integer parsing
@@ -481,7 +487,7 @@ func (md *MetaData) unifyInt(data interface{}, rv reflect.Value) error {
return nil
}
-func (md *MetaData) unifyBool(data interface{}, rv reflect.Value) error {
+func (md *MetaData) unifyBool(data any, rv reflect.Value) error {
if b, ok := data.(bool); ok {
rv.SetBool(b)
return nil
@@ -489,12 +495,12 @@ func (md *MetaData) unifyBool(data interface{}, rv reflect.Value) error {
return md.badtype("boolean", data)
}
-func (md *MetaData) unifyAnything(data interface{}, rv reflect.Value) error {
+func (md *MetaData) unifyAnything(data any, rv reflect.Value) error {
rv.Set(reflect.ValueOf(data))
return nil
}
-func (md *MetaData) unifyText(data interface{}, v encoding.TextUnmarshaler) error {
+func (md *MetaData) unifyText(data any, v encoding.TextUnmarshaler) error {
var s string
switch sdata := data.(type) {
case Marshaler:
@@ -523,13 +529,13 @@ func (md *MetaData) unifyText(data interface{}, v encoding.TextUnmarshaler) erro
return md.badtype("primitive (string-like)", data)
}
if err := v.UnmarshalText([]byte(s)); err != nil {
- return err
+ return md.parseErr(err)
}
return nil
}
-func (md *MetaData) badtype(dst string, data interface{}) error {
- return md.e("incompatible types: TOML value has type %T; destination has type %s", data, dst)
+func (md *MetaData) badtype(dst string, data any) error {
+ return md.e("incompatible types: TOML value has type %s; destination has type %s", fmtType(data), dst)
}
func (md *MetaData) parseErr(err error) error {
@@ -543,7 +549,7 @@ func (md *MetaData) parseErr(err error) error {
}
}
-func (md *MetaData) e(format string, args ...interface{}) error {
+func (md *MetaData) e(format string, args ...any) error {
f := "toml: "
if len(md.context) > 0 {
f = fmt.Sprintf("toml: (last key %q): ", md.context)
@@ -556,7 +562,7 @@ func (md *MetaData) e(format string, args ...interface{}) error {
}
// rvalue returns a reflect.Value of `v`. All pointers are resolved.
-func rvalue(v interface{}) reflect.Value {
+func rvalue(v any) reflect.Value {
return indirect(reflect.ValueOf(v))
}
@@ -600,3 +606,8 @@ func isUnifiable(rv reflect.Value) bool {
}
return false
}
+
+// fmt %T with "interface {}" replaced with "any", which is far more readable.
+func fmtType(t any) string {
+ return strings.ReplaceAll(fmt.Sprintf("%T", t), "interface {}", "any")
+}
diff --git a/vendor/github.com/BurntSushi/toml/decode_go116.go b/vendor/github.com/BurntSushi/toml/decode_go116.go
deleted file mode 100644
index 086d0b686..000000000
--- a/vendor/github.com/BurntSushi/toml/decode_go116.go
+++ /dev/null
@@ -1,19 +0,0 @@
-//go:build go1.16
-// +build go1.16
-
-package toml
-
-import (
- "io/fs"
-)
-
-// DecodeFS reads the contents of a file from [fs.FS] and decodes it with
-// [Decode].
-func DecodeFS(fsys fs.FS, path string, v interface{}) (MetaData, error) {
- fp, err := fsys.Open(path)
- if err != nil {
- return MetaData{}, err
- }
- defer fp.Close()
- return NewDecoder(fp).Decode(v)
-}
diff --git a/vendor/github.com/BurntSushi/toml/deprecated.go b/vendor/github.com/BurntSushi/toml/deprecated.go
index b9e309717..155709a80 100644
--- a/vendor/github.com/BurntSushi/toml/deprecated.go
+++ b/vendor/github.com/BurntSushi/toml/deprecated.go
@@ -15,15 +15,15 @@ type TextMarshaler encoding.TextMarshaler
// Deprecated: use encoding.TextUnmarshaler
type TextUnmarshaler encoding.TextUnmarshaler
+// DecodeReader is an alias for NewDecoder(r).Decode(v).
+//
+// Deprecated: use NewDecoder(reader).Decode(&value).
+func DecodeReader(r io.Reader, v any) (MetaData, error) { return NewDecoder(r).Decode(v) }
+
// PrimitiveDecode is an alias for MetaData.PrimitiveDecode().
//
// Deprecated: use MetaData.PrimitiveDecode.
-func PrimitiveDecode(primValue Primitive, v interface{}) error {
+func PrimitiveDecode(primValue Primitive, v any) error {
md := MetaData{decoded: make(map[string]struct{})}
return md.unify(primValue.undecoded, rvalue(v))
}
-
-// DecodeReader is an alias for NewDecoder(r).Decode(v).
-//
-// Deprecated: use NewDecoder(reader).Decode(&value).
-func DecodeReader(r io.Reader, v interface{}) (MetaData, error) { return NewDecoder(r).Decode(v) }
diff --git a/vendor/github.com/BurntSushi/toml/doc.go b/vendor/github.com/BurntSushi/toml/doc.go
index 81a7c0fe9..82c90a905 100644
--- a/vendor/github.com/BurntSushi/toml/doc.go
+++ b/vendor/github.com/BurntSushi/toml/doc.go
@@ -2,9 +2,6 @@
//
// This package supports TOML v1.0.0, as specified at https://toml.io
//
-// There is also support for delaying decoding with the Primitive type, and
-// querying the set of keys in a TOML document with the MetaData type.
-//
// The github.com/BurntSushi/toml/cmd/tomlv package implements a TOML validator,
// and can be used to verify if TOML document is valid. It can also be used to
// print the type of each key.
diff --git a/vendor/github.com/BurntSushi/toml/encode.go b/vendor/github.com/BurntSushi/toml/encode.go
index 9cd25d757..73366c0d9 100644
--- a/vendor/github.com/BurntSushi/toml/encode.go
+++ b/vendor/github.com/BurntSushi/toml/encode.go
@@ -2,6 +2,7 @@ package toml
import (
"bufio"
+ "bytes"
"encoding"
"encoding/json"
"errors"
@@ -76,6 +77,17 @@ type Marshaler interface {
MarshalTOML() ([]byte, error)
}
+// Marshal returns a TOML representation of the Go value.
+//
+// See [Encoder] for a description of the encoding process.
+func Marshal(v any) ([]byte, error) {
+ buff := new(bytes.Buffer)
+ if err := NewEncoder(buff).Encode(v); err != nil {
+ return nil, err
+ }
+ return buff.Bytes(), nil
+}
+
// Encoder encodes a Go to a TOML document.
//
// The mapping between Go values and TOML values should be precisely the same as
@@ -115,26 +127,21 @@ type Marshaler interface {
// NOTE: only exported keys are encoded due to the use of reflection. Unexported
// keys are silently discarded.
type Encoder struct {
- // String to use for a single indentation level; default is two spaces.
- Indent string
-
+ Indent string // string for a single indentation level; default is two spaces.
+ hasWritten bool // written any output to w yet?
w *bufio.Writer
- hasWritten bool // written any output to w yet?
}
// NewEncoder create a new Encoder.
func NewEncoder(w io.Writer) *Encoder {
- return &Encoder{
- w: bufio.NewWriter(w),
- Indent: " ",
- }
+ return &Encoder{w: bufio.NewWriter(w), Indent: " "}
}
// Encode writes a TOML representation of the Go value to the [Encoder]'s writer.
//
// An error is returned if the value given cannot be encoded to a valid TOML
// document.
-func (enc *Encoder) Encode(v interface{}) error {
+func (enc *Encoder) Encode(v any) error {
rv := eindirect(reflect.ValueOf(v))
err := enc.safeEncode(Key([]string{}), rv)
if err != nil {
@@ -280,18 +287,30 @@ func (enc *Encoder) eElement(rv reflect.Value) {
case reflect.Float32:
f := rv.Float()
if math.IsNaN(f) {
+ if math.Signbit(f) {
+ enc.wf("-")
+ }
enc.wf("nan")
} else if math.IsInf(f, 0) {
- enc.wf("%cinf", map[bool]byte{true: '-', false: '+'}[math.Signbit(f)])
+ if math.Signbit(f) {
+ enc.wf("-")
+ }
+ enc.wf("inf")
} else {
enc.wf(floatAddDecimal(strconv.FormatFloat(f, 'f', -1, 32)))
}
case reflect.Float64:
f := rv.Float()
if math.IsNaN(f) {
+ if math.Signbit(f) {
+ enc.wf("-")
+ }
enc.wf("nan")
} else if math.IsInf(f, 0) {
- enc.wf("%cinf", map[bool]byte{true: '-', false: '+'}[math.Signbit(f)])
+ if math.Signbit(f) {
+ enc.wf("-")
+ }
+ enc.wf("inf")
} else {
enc.wf(floatAddDecimal(strconv.FormatFloat(f, 'f', -1, 64)))
}
@@ -304,7 +323,7 @@ func (enc *Encoder) eElement(rv reflect.Value) {
case reflect.Interface:
enc.eElement(rv.Elem())
default:
- encPanic(fmt.Errorf("unexpected type: %T", rv.Interface()))
+ encPanic(fmt.Errorf("unexpected type: %s", fmtType(rv.Interface())))
}
}
@@ -712,7 +731,7 @@ func (enc *Encoder) writeKeyValue(key Key, val reflect.Value, inline bool) {
}
}
-func (enc *Encoder) wf(format string, v ...interface{}) {
+func (enc *Encoder) wf(format string, v ...any) {
_, err := fmt.Fprintf(enc.w, format, v...)
if err != nil {
encPanic(err)
diff --git a/vendor/github.com/BurntSushi/toml/error.go b/vendor/github.com/BurntSushi/toml/error.go
index efd68865b..b45a3f45f 100644
--- a/vendor/github.com/BurntSushi/toml/error.go
+++ b/vendor/github.com/BurntSushi/toml/error.go
@@ -114,13 +114,22 @@ func (pe ParseError) ErrorWithPosition() string {
msg, pe.Position.Line, col, col+pe.Position.Len)
}
if pe.Position.Line > 2 {
- fmt.Fprintf(b, "% 7d | %s\n", pe.Position.Line-2, lines[pe.Position.Line-3])
+ fmt.Fprintf(b, "% 7d | %s\n", pe.Position.Line-2, expandTab(lines[pe.Position.Line-3]))
}
if pe.Position.Line > 1 {
- fmt.Fprintf(b, "% 7d | %s\n", pe.Position.Line-1, lines[pe.Position.Line-2])
+ fmt.Fprintf(b, "% 7d | %s\n", pe.Position.Line-1, expandTab(lines[pe.Position.Line-2]))
}
- fmt.Fprintf(b, "% 7d | %s\n", pe.Position.Line, lines[pe.Position.Line-1])
- fmt.Fprintf(b, "% 10s%s%s\n", "", strings.Repeat(" ", col), strings.Repeat("^", pe.Position.Len))
+
+ /// Expand tabs, so that the ^^^s are at the correct position, but leave
+ /// "column 10-13" intact. Adjusting this to the visual column would be
+ /// better, but we don't know the tabsize of the user in their editor, which
+ /// can be 8, 4, 2, or something else. We can't know. So leaving it as the
+ /// character index is probably the "most correct".
+ expanded := expandTab(lines[pe.Position.Line-1])
+ diff := len(expanded) - len(lines[pe.Position.Line-1])
+
+ fmt.Fprintf(b, "% 7d | %s\n", pe.Position.Line, expanded)
+ fmt.Fprintf(b, "% 10s%s%s\n", "", strings.Repeat(" ", col+diff), strings.Repeat("^", pe.Position.Len))
return b.String()
}
@@ -159,17 +168,47 @@ func (pe ParseError) column(lines []string) int {
return col
}
+func expandTab(s string) string {
+ var (
+ b strings.Builder
+ l int
+ fill = func(n int) string {
+ b := make([]byte, n)
+ for i := range b {
+ b[i] = ' '
+ }
+ return string(b)
+ }
+ )
+ b.Grow(len(s))
+ for _, r := range s {
+ switch r {
+ case '\t':
+ tw := 8 - l%8
+ b.WriteString(fill(tw))
+ l += tw
+ default:
+ b.WriteRune(r)
+ l += 1
+ }
+ }
+ return b.String()
+}
+
type (
errLexControl struct{ r rune }
errLexEscape struct{ r rune }
errLexUTF8 struct{ b byte }
- errLexInvalidNum struct{ v string }
- errLexInvalidDate struct{ v string }
+ errParseDate struct{ v string }
errLexInlineTableNL struct{}
errLexStringNL struct{}
errParseRange struct {
- i interface{} // int or float
- size string // "int64", "uint16", etc.
+ i any // int or float
+ size string // "int64", "uint16", etc.
+ }
+ errUnsafeFloat struct {
+ i interface{} // float32 or float64
+ size string // "float32" or "float64"
}
errParseDuration struct{ d string }
)
@@ -183,18 +222,20 @@ func (e errLexEscape) Error() string { return fmt.Sprintf(`invalid escape
func (e errLexEscape) Usage() string { return usageEscape }
func (e errLexUTF8) Error() string { return fmt.Sprintf("invalid UTF-8 byte: 0x%02x", e.b) }
func (e errLexUTF8) Usage() string { return "" }
-func (e errLexInvalidNum) Error() string { return fmt.Sprintf("invalid number: %q", e.v) }
-func (e errLexInvalidNum) Usage() string { return "" }
-func (e errLexInvalidDate) Error() string { return fmt.Sprintf("invalid date: %q", e.v) }
-func (e errLexInvalidDate) Usage() string { return "" }
+func (e errParseDate) Error() string { return fmt.Sprintf("invalid datetime: %q", e.v) }
+func (e errParseDate) Usage() string { return usageDate }
func (e errLexInlineTableNL) Error() string { return "newlines not allowed within inline tables" }
func (e errLexInlineTableNL) Usage() string { return usageInlineNewline }
func (e errLexStringNL) Error() string { return "strings cannot contain newlines" }
func (e errLexStringNL) Usage() string { return usageStringNewline }
func (e errParseRange) Error() string { return fmt.Sprintf("%v is out of range for %s", e.i, e.size) }
func (e errParseRange) Usage() string { return usageIntOverflow }
-func (e errParseDuration) Error() string { return fmt.Sprintf("invalid duration: %q", e.d) }
-func (e errParseDuration) Usage() string { return usageDuration }
+func (e errUnsafeFloat) Error() string {
+ return fmt.Sprintf("%v is out of the safe %s range", e.i, e.size)
+}
+func (e errUnsafeFloat) Usage() string { return usageUnsafeFloat }
+func (e errParseDuration) Error() string { return fmt.Sprintf("invalid duration: %q", e.d) }
+func (e errParseDuration) Usage() string { return usageDuration }
const usageEscape = `
A '\' inside a "-delimited string is interpreted as an escape character.
@@ -251,19 +292,35 @@ bug in the program that uses too small of an integer.
The maximum and minimum values are:
size │ lowest │ highest
- ───────┼────────────────┼──────────
+ ───────┼────────────────┼──────────────
int8 │ -128 │ 127
int16 │ -32,768 │ 32,767
int32 │ -2,147,483,648 │ 2,147,483,647
int64 │ -9.2 × 10¹⁷ │ 9.2 × 10¹⁷
uint8 │ 0 │ 255
- uint16 │ 0 │ 65535
- uint32 │ 0 │ 4294967295
+ uint16 │ 0 │ 65,535
+ uint32 │ 0 │ 4,294,967,295
uint64 │ 0 │ 1.8 × 10¹⁸
int refers to int32 on 32-bit systems and int64 on 64-bit systems.
`
+const usageUnsafeFloat = `
+This number is outside of the "safe" range for floating point numbers; whole
+(non-fractional) numbers outside the below range can not always be represented
+accurately in a float, leading to some loss of accuracy.
+
+Explicitly mark a number as a fractional unit by adding ".0", which will incur
+some loss of accuracy; for example:
+
+ f = 2_000_000_000.0
+
+Accuracy ranges:
+
+ float32 = 16,777,215
+ float64 = 9,007,199,254,740,991
+`
+
const usageDuration = `
A duration must be as "number", without any spaces. Valid units are:
@@ -277,3 +334,23 @@ A duration must be as "number", without any spaces. Valid units are:
You can combine multiple units; for example "5m10s" for 5 minutes and 10
seconds.
`
+
+const usageDate = `
+A TOML datetime must be in one of the following formats:
+
+ 2006-01-02T15:04:05Z07:00 Date and time, with timezone.
+ 2006-01-02T15:04:05 Date and time, but without timezone.
+ 2006-01-02 Date without a time or timezone.
+ 15:04:05 Just a time, without any timezone.
+
+Seconds may optionally have a fraction, up to nanosecond precision:
+
+ 15:04:05.123
+ 15:04:05.856018510
+`
+
+// TOML 1.1:
+// The seconds part in times is optional, and may be omitted:
+// 2006-01-02T15:04Z07:00
+// 2006-01-02T15:04
+// 15:04
diff --git a/vendor/github.com/BurntSushi/toml/lex.go b/vendor/github.com/BurntSushi/toml/lex.go
index 3545a6ad6..a1016d98a 100644
--- a/vendor/github.com/BurntSushi/toml/lex.go
+++ b/vendor/github.com/BurntSushi/toml/lex.go
@@ -17,6 +17,7 @@ const (
itemEOF
itemText
itemString
+ itemStringEsc
itemRawString
itemMultilineString
itemRawMultilineString
@@ -53,6 +54,7 @@ type lexer struct {
state stateFn
items chan item
tomlNext bool
+ esc bool
// Allow for backing up up to 4 runes. This is necessary because TOML
// contains 3-rune tokens (""" and ''').
@@ -164,7 +166,7 @@ func (lx *lexer) next() (r rune) {
}
r, w := utf8.DecodeRuneInString(lx.input[lx.pos:])
- if r == utf8.RuneError {
+ if r == utf8.RuneError && w == 1 {
lx.error(errLexUTF8{lx.input[lx.pos]})
return utf8.RuneError
}
@@ -270,7 +272,7 @@ func (lx *lexer) errorPos(start, length int, err error) stateFn {
}
// errorf is like error, and creates a new error.
-func (lx *lexer) errorf(format string, values ...interface{}) stateFn {
+func (lx *lexer) errorf(format string, values ...any) stateFn {
if lx.atEOF {
pos := lx.getPos()
pos.Line--
@@ -333,9 +335,7 @@ func lexTopEnd(lx *lexer) stateFn {
lx.emit(itemEOF)
return nil
}
- return lx.errorf(
- "expected a top-level item to end with a newline, comment, or EOF, but got %q instead",
- r)
+ return lx.errorf("expected a top-level item to end with a newline, comment, or EOF, but got %q instead", r)
}
// lexTable lexes the beginning of a table. Namely, it makes sure that
@@ -698,7 +698,12 @@ func lexString(lx *lexer) stateFn {
return lexStringEscape
case r == '"':
lx.backup()
- lx.emit(itemString)
+ if lx.esc {
+ lx.esc = false
+ lx.emit(itemStringEsc)
+ } else {
+ lx.emit(itemString)
+ }
lx.next()
lx.ignore()
return lx.pop()
@@ -748,6 +753,7 @@ func lexMultilineString(lx *lexer) stateFn {
lx.backup() /// backup: don't include the """ in the item.
lx.backup()
lx.backup()
+ lx.esc = false
lx.emit(itemMultilineString)
lx.next() /// Read over ''' again and discard it.
lx.next()
@@ -837,6 +843,7 @@ func lexMultilineStringEscape(lx *lexer) stateFn {
}
func lexStringEscape(lx *lexer) stateFn {
+ lx.esc = true
r := lx.next()
switch r {
case 'e':
@@ -879,10 +886,8 @@ func lexHexEscape(lx *lexer) stateFn {
var r rune
for i := 0; i < 2; i++ {
r = lx.next()
- if !isHexadecimal(r) {
- return lx.errorf(
- `expected two hexadecimal digits after '\x', but got %q instead`,
- lx.current())
+ if !isHex(r) {
+ return lx.errorf(`expected two hexadecimal digits after '\x', but got %q instead`, lx.current())
}
}
return lx.pop()
@@ -892,10 +897,8 @@ func lexShortUnicodeEscape(lx *lexer) stateFn {
var r rune
for i := 0; i < 4; i++ {
r = lx.next()
- if !isHexadecimal(r) {
- return lx.errorf(
- `expected four hexadecimal digits after '\u', but got %q instead`,
- lx.current())
+ if !isHex(r) {
+ return lx.errorf(`expected four hexadecimal digits after '\u', but got %q instead`, lx.current())
}
}
return lx.pop()
@@ -905,10 +908,8 @@ func lexLongUnicodeEscape(lx *lexer) stateFn {
var r rune
for i := 0; i < 8; i++ {
r = lx.next()
- if !isHexadecimal(r) {
- return lx.errorf(
- `expected eight hexadecimal digits after '\U', but got %q instead`,
- lx.current())
+ if !isHex(r) {
+ return lx.errorf(`expected eight hexadecimal digits after '\U', but got %q instead`, lx.current())
}
}
return lx.pop()
@@ -975,7 +976,7 @@ func lexDatetime(lx *lexer) stateFn {
// lexHexInteger consumes a hexadecimal integer after seeing the '0x' prefix.
func lexHexInteger(lx *lexer) stateFn {
r := lx.next()
- if isHexadecimal(r) {
+ if isHex(r) {
return lexHexInteger
}
switch r {
@@ -1109,7 +1110,7 @@ func lexBaseNumberOrDate(lx *lexer) stateFn {
return lexOctalInteger
case 'x':
r = lx.peek()
- if !isHexadecimal(r) {
+ if !isHex(r) {
lx.errorf("not a hexidecimal number: '%s%c'", lx.current(), r)
}
return lexHexInteger
@@ -1207,7 +1208,7 @@ func (itype itemType) String() string {
return "EOF"
case itemText:
return "Text"
- case itemString, itemRawString, itemMultilineString, itemRawMultilineString:
+ case itemString, itemStringEsc, itemRawString, itemMultilineString, itemRawMultilineString:
return "String"
case itemBool:
return "Bool"
@@ -1240,7 +1241,7 @@ func (itype itemType) String() string {
}
func (item item) String() string {
- return fmt.Sprintf("(%s, %s)", item.typ.String(), item.val)
+ return fmt.Sprintf("(%s, %s)", item.typ, item.val)
}
func isWhitespace(r rune) bool { return r == '\t' || r == ' ' }
@@ -1256,10 +1257,7 @@ func isControl(r rune) bool { // Control characters except \t, \r, \n
func isDigit(r rune) bool { return r >= '0' && r <= '9' }
func isBinary(r rune) bool { return r == '0' || r == '1' }
func isOctal(r rune) bool { return r >= '0' && r <= '7' }
-func isHexadecimal(r rune) bool {
- return (r >= '0' && r <= '9') || (r >= 'a' && r <= 'f') || (r >= 'A' && r <= 'F')
-}
-
+func isHex(r rune) bool { return (r >= '0' && r <= '9') || (r|0x20 >= 'a' && r|0x20 <= 'f') }
func isBareKeyChar(r rune, tomlNext bool) bool {
if tomlNext {
return (r >= 'A' && r <= 'Z') ||
diff --git a/vendor/github.com/BurntSushi/toml/meta.go b/vendor/github.com/BurntSushi/toml/meta.go
index 2e78b24e9..e61453730 100644
--- a/vendor/github.com/BurntSushi/toml/meta.go
+++ b/vendor/github.com/BurntSushi/toml/meta.go
@@ -13,7 +13,7 @@ type MetaData struct {
context Key // Used only during decoding.
keyInfo map[string]keyInfo
- mapping map[string]interface{}
+ mapping map[string]any
keys []Key
decoded map[string]struct{}
data []byte // Input file; for errors.
@@ -31,12 +31,12 @@ func (md *MetaData) IsDefined(key ...string) bool {
}
var (
- hash map[string]interface{}
+ hash map[string]any
ok bool
- hashOrVal interface{} = md.mapping
+ hashOrVal any = md.mapping
)
for _, k := range key {
- if hash, ok = hashOrVal.(map[string]interface{}); !ok {
+ if hash, ok = hashOrVal.(map[string]any); !ok {
return false
}
if hashOrVal, ok = hash[k]; !ok {
@@ -94,28 +94,55 @@ func (md *MetaData) Undecoded() []Key {
type Key []string
func (k Key) String() string {
- ss := make([]string, len(k))
- for i := range k {
- ss[i] = k.maybeQuoted(i)
+ // This is called quite often, so it's a bit funky to make it faster.
+ var b strings.Builder
+ b.Grow(len(k) * 25)
+outer:
+ for i, kk := range k {
+ if i > 0 {
+ b.WriteByte('.')
+ }
+ if kk == "" {
+ b.WriteString(`""`)
+ } else {
+ for _, r := range kk {
+ // "Inline" isBareKeyChar
+ if !((r >= 'A' && r <= 'Z') || (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') || r == '_' || r == '-') {
+ b.WriteByte('"')
+ b.WriteString(dblQuotedReplacer.Replace(kk))
+ b.WriteByte('"')
+ continue outer
+ }
+ }
+ b.WriteString(kk)
+ }
}
- return strings.Join(ss, ".")
+ return b.String()
}
func (k Key) maybeQuoted(i int) string {
if k[i] == "" {
return `""`
}
- for _, c := range k[i] {
- if !isBareKeyChar(c, false) {
- return `"` + dblQuotedReplacer.Replace(k[i]) + `"`
+ for _, r := range k[i] {
+ if (r >= 'A' && r <= 'Z') || (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') || r == '_' || r == '-' {
+ continue
}
+ return `"` + dblQuotedReplacer.Replace(k[i]) + `"`
}
return k[i]
}
+// Like append(), but only increase the cap by 1.
func (k Key) add(piece string) Key {
+ if cap(k) > len(k) {
+ return append(k, piece)
+ }
newKey := make(Key, len(k)+1)
copy(newKey, k)
newKey[len(k)] = piece
return newKey
}
+
+func (k Key) parent() Key { return k[:len(k)-1] } // all except the last piece.
+func (k Key) last() string { return k[len(k)-1] } // last piece of this key.
diff --git a/vendor/github.com/BurntSushi/toml/parse.go b/vendor/github.com/BurntSushi/toml/parse.go
index 9c1915369..11ac3108b 100644
--- a/vendor/github.com/BurntSushi/toml/parse.go
+++ b/vendor/github.com/BurntSushi/toml/parse.go
@@ -2,6 +2,7 @@ package toml
import (
"fmt"
+ "math"
"os"
"strconv"
"strings"
@@ -20,9 +21,9 @@ type parser struct {
ordered []Key // List of keys in the order that they appear in the TOML data.
- keyInfo map[string]keyInfo // Map keyname → info about the TOML key.
- mapping map[string]interface{} // Map keyname → key value.
- implicits map[string]struct{} // Record implicit keys (e.g. "key.group.names").
+ keyInfo map[string]keyInfo // Map keyname → info about the TOML key.
+ mapping map[string]any // Map keyname → key value.
+ implicits map[string]struct{} // Record implicit keys (e.g. "key.group.names").
}
type keyInfo struct {
@@ -49,6 +50,7 @@ func parse(data string) (p *parser, err error) {
// it anyway.
if strings.HasPrefix(data, "\xff\xfe") || strings.HasPrefix(data, "\xfe\xff") { // UTF-16
data = data[2:]
+ //lint:ignore S1017 https://github.com/dominikh/go-tools/issues/1447
} else if strings.HasPrefix(data, "\xef\xbb\xbf") { // UTF-8
data = data[3:]
}
@@ -71,7 +73,7 @@ func parse(data string) (p *parser, err error) {
p = &parser{
keyInfo: make(map[string]keyInfo),
- mapping: make(map[string]interface{}),
+ mapping: make(map[string]any),
lx: lex(data, tomlNext),
ordered: make([]Key, 0),
implicits: make(map[string]struct{}),
@@ -97,7 +99,7 @@ func (p *parser) panicErr(it item, err error) {
})
}
-func (p *parser) panicItemf(it item, format string, v ...interface{}) {
+func (p *parser) panicItemf(it item, format string, v ...any) {
panic(ParseError{
Message: fmt.Sprintf(format, v...),
Position: it.pos,
@@ -106,7 +108,7 @@ func (p *parser) panicItemf(it item, format string, v ...interface{}) {
})
}
-func (p *parser) panicf(format string, v ...interface{}) {
+func (p *parser) panicf(format string, v ...any) {
panic(ParseError{
Message: fmt.Sprintf(format, v...),
Position: p.pos,
@@ -139,7 +141,7 @@ func (p *parser) nextPos() item {
return it
}
-func (p *parser) bug(format string, v ...interface{}) {
+func (p *parser) bug(format string, v ...any) {
panic(fmt.Sprintf("BUG: "+format+"\n\n", v...))
}
@@ -194,11 +196,11 @@ func (p *parser) topLevel(item item) {
p.assertEqual(itemKeyEnd, k.typ)
/// The current key is the last part.
- p.currentKey = key[len(key)-1]
+ p.currentKey = key.last()
/// All the other parts (if any) are the context; need to set each part
/// as implicit.
- context := key[:len(key)-1]
+ context := key.parent()
for i := range context {
p.addImplicitContext(append(p.context, context[i:i+1]...))
}
@@ -207,7 +209,8 @@ func (p *parser) topLevel(item item) {
/// Set value.
vItem := p.next()
val, typ := p.value(vItem, false)
- p.set(p.currentKey, val, typ, vItem.pos)
+ p.setValue(p.currentKey, val)
+ p.setType(p.currentKey, typ, vItem.pos)
/// Remove the context we added (preserving any context from [tbl] lines).
p.context = outerContext
@@ -222,7 +225,7 @@ func (p *parser) keyString(it item) string {
switch it.typ {
case itemText:
return it.val
- case itemString, itemMultilineString,
+ case itemString, itemStringEsc, itemMultilineString,
itemRawString, itemRawMultilineString:
s, _ := p.value(it, false)
return s.(string)
@@ -239,9 +242,11 @@ var datetimeRepl = strings.NewReplacer(
// value translates an expected value from the lexer into a Go value wrapped
// as an empty interface.
-func (p *parser) value(it item, parentIsArray bool) (interface{}, tomlType) {
+func (p *parser) value(it item, parentIsArray bool) (any, tomlType) {
switch it.typ {
case itemString:
+ return it.val, p.typeOfPrimitive(it)
+ case itemStringEsc:
return p.replaceEscapes(it, it.val), p.typeOfPrimitive(it)
case itemMultilineString:
return p.replaceEscapes(it, p.stripEscapedNewlines(stripFirstNewline(it.val))), p.typeOfPrimitive(it)
@@ -274,7 +279,7 @@ func (p *parser) value(it item, parentIsArray bool) (interface{}, tomlType) {
panic("unreachable")
}
-func (p *parser) valueInteger(it item) (interface{}, tomlType) {
+func (p *parser) valueInteger(it item) (any, tomlType) {
if !numUnderscoresOK(it.val) {
p.panicItemf(it, "Invalid integer %q: underscores must be surrounded by digits", it.val)
}
@@ -298,7 +303,7 @@ func (p *parser) valueInteger(it item) (interface{}, tomlType) {
return num, p.typeOfPrimitive(it)
}
-func (p *parser) valueFloat(it item) (interface{}, tomlType) {
+func (p *parser) valueFloat(it item) (any, tomlType) {
parts := strings.FieldsFunc(it.val, func(r rune) bool {
switch r {
case '.', 'e', 'E':
@@ -322,7 +327,9 @@ func (p *parser) valueFloat(it item) (interface{}, tomlType) {
p.panicItemf(it, "Invalid float %q: '.' must be followed by one or more digits", it.val)
}
val := strings.Replace(it.val, "_", "", -1)
- if val == "+nan" || val == "-nan" { // Go doesn't support this, but TOML spec does.
+ signbit := false
+ if val == "+nan" || val == "-nan" {
+ signbit = val == "-nan"
val = "nan"
}
num, err := strconv.ParseFloat(val, 64)
@@ -333,6 +340,9 @@ func (p *parser) valueFloat(it item) (interface{}, tomlType) {
p.panicItemf(it, "Invalid float value: %q", it.val)
}
}
+ if signbit {
+ num = math.Copysign(num, -1)
+ }
return num, p.typeOfPrimitive(it)
}
@@ -352,7 +362,7 @@ var dtTypes = []struct {
{"15:04", internal.LocalTime, true},
}
-func (p *parser) valueDatetime(it item) (interface{}, tomlType) {
+func (p *parser) valueDatetime(it item) (any, tomlType) {
it.val = datetimeRepl.Replace(it.val)
var (
t time.Time
@@ -365,26 +375,44 @@ func (p *parser) valueDatetime(it item) (interface{}, tomlType) {
}
t, err = time.ParseInLocation(dt.fmt, it.val, dt.zone)
if err == nil {
+ if missingLeadingZero(it.val, dt.fmt) {
+ p.panicErr(it, errParseDate{it.val})
+ }
ok = true
break
}
}
if !ok {
- p.panicItemf(it, "Invalid TOML Datetime: %q.", it.val)
+ p.panicErr(it, errParseDate{it.val})
}
return t, p.typeOfPrimitive(it)
}
-func (p *parser) valueArray(it item) (interface{}, tomlType) {
+// Go's time.Parse() will accept numbers without a leading zero; there isn't any
+// way to require it. https://github.com/golang/go/issues/29911
+//
+// Depend on the fact that the separators (- and :) should always be at the same
+// location.
+func missingLeadingZero(d, l string) bool {
+ for i, c := range []byte(l) {
+ if c == '.' || c == 'Z' {
+ return false
+ }
+ if (c < '0' || c > '9') && d[i] != c {
+ return true
+ }
+ }
+ return false
+}
+
+func (p *parser) valueArray(it item) (any, tomlType) {
p.setType(p.currentKey, tomlArray, it.pos)
var (
- types []tomlType
-
- // Initialize to a non-nil empty slice. This makes it consistent with
- // how S = [] decodes into a non-nil slice inside something like struct
- // { S []string }. See #338
- array = []interface{}{}
+ // Initialize to a non-nil slice to make it consistent with how S = []
+ // decodes into a non-nil slice inside something like struct { S
+ // []string }. See #338
+ array = make([]any, 0, 2)
)
for it = p.next(); it.typ != itemArrayEnd; it = p.next() {
if it.typ == itemCommentStart {
@@ -394,21 +422,20 @@ func (p *parser) valueArray(it item) (interface{}, tomlType) {
val, typ := p.value(it, true)
array = append(array, val)
- types = append(types, typ)
- // XXX: types isn't used here, we need it to record the accurate type
+ // XXX: type isn't used here, we need it to record the accurate type
// information.
//
// Not entirely sure how to best store this; could use "key[0]",
// "key[1]" notation, or maybe store it on the Array type?
- _ = types
+ _ = typ
}
return array, tomlArray
}
-func (p *parser) valueInlineTable(it item, parentIsArray bool) (interface{}, tomlType) {
+func (p *parser) valueInlineTable(it item, parentIsArray bool) (any, tomlType) {
var (
- hash = make(map[string]interface{})
+ topHash = make(map[string]any)
outerContext = p.context
outerKey = p.currentKey
)
@@ -436,11 +463,11 @@ func (p *parser) valueInlineTable(it item, parentIsArray bool) (interface{}, tom
p.assertEqual(itemKeyEnd, k.typ)
/// The current key is the last part.
- p.currentKey = key[len(key)-1]
+ p.currentKey = key.last()
/// All the other parts (if any) are the context; need to set each part
/// as implicit.
- context := key[:len(key)-1]
+ context := key.parent()
for i := range context {
p.addImplicitContext(append(p.context, context[i:i+1]...))
}
@@ -448,7 +475,21 @@ func (p *parser) valueInlineTable(it item, parentIsArray bool) (interface{}, tom
/// Set the value.
val, typ := p.value(p.next(), false)
- p.set(p.currentKey, val, typ, it.pos)
+ p.setValue(p.currentKey, val)
+ p.setType(p.currentKey, typ, it.pos)
+
+ hash := topHash
+ for _, c := range context {
+ h, ok := hash[c]
+ if !ok {
+ h = make(map[string]any)
+ hash[c] = h
+ }
+ hash, ok = h.(map[string]any)
+ if !ok {
+ p.panicf("%q is not a table", p.context)
+ }
+ }
hash[p.currentKey] = val
/// Restore context.
@@ -456,7 +497,7 @@ func (p *parser) valueInlineTable(it item, parentIsArray bool) (interface{}, tom
}
p.context = outerContext
p.currentKey = outerKey
- return hash, tomlHash
+ return topHash, tomlHash
}
// numHasLeadingZero checks if this number has leading zeroes, allowing for '0',
@@ -486,9 +527,9 @@ func numUnderscoresOK(s string) bool {
}
}
- // isHexadecimal is a superset of all the permissable characters
- // surrounding an underscore.
- accept = isHexadecimal(r)
+ // isHexis a superset of all the permissable characters surrounding an
+ // underscore.
+ accept = isHex(r)
}
return accept
}
@@ -511,21 +552,19 @@ func numPeriodsOK(s string) bool {
// Establishing the context also makes sure that the key isn't a duplicate, and
// will create implicit hashes automatically.
func (p *parser) addContext(key Key, array bool) {
- var ok bool
-
- // Always start at the top level and drill down for our context.
+ /// Always start at the top level and drill down for our context.
hashContext := p.mapping
- keyContext := make(Key, 0)
+ keyContext := make(Key, 0, len(key)-1)
- // We only need implicit hashes for key[0:-1]
- for _, k := range key[0 : len(key)-1] {
- _, ok = hashContext[k]
+ /// We only need implicit hashes for the parents.
+ for _, k := range key.parent() {
+ _, ok := hashContext[k]
keyContext = append(keyContext, k)
// No key? Make an implicit hash and move on.
if !ok {
p.addImplicit(keyContext)
- hashContext[k] = make(map[string]interface{})
+ hashContext[k] = make(map[string]any)
}
// If the hash context is actually an array of tables, then set
@@ -534,9 +573,9 @@ func (p *parser) addContext(key Key, array bool) {
// Otherwise, it better be a table, since this MUST be a key group (by
// virtue of it not being the last element in a key).
switch t := hashContext[k].(type) {
- case []map[string]interface{}:
+ case []map[string]any:
hashContext = t[len(t)-1]
- case map[string]interface{}:
+ case map[string]any:
hashContext = t
default:
p.panicf("Key '%s' was already created as a hash.", keyContext)
@@ -547,39 +586,33 @@ func (p *parser) addContext(key Key, array bool) {
if array {
// If this is the first element for this array, then allocate a new
// list of tables for it.
- k := key[len(key)-1]
+ k := key.last()
if _, ok := hashContext[k]; !ok {
- hashContext[k] = make([]map[string]interface{}, 0, 4)
+ hashContext[k] = make([]map[string]any, 0, 4)
}
// Add a new table. But make sure the key hasn't already been used
// for something else.
- if hash, ok := hashContext[k].([]map[string]interface{}); ok {
- hashContext[k] = append(hash, make(map[string]interface{}))
+ if hash, ok := hashContext[k].([]map[string]any); ok {
+ hashContext[k] = append(hash, make(map[string]any))
} else {
p.panicf("Key '%s' was already created and cannot be used as an array.", key)
}
} else {
- p.setValue(key[len(key)-1], make(map[string]interface{}))
+ p.setValue(key.last(), make(map[string]any))
}
- p.context = append(p.context, key[len(key)-1])
-}
-
-// set calls setValue and setType.
-func (p *parser) set(key string, val interface{}, typ tomlType, pos Position) {
- p.setValue(key, val)
- p.setType(key, typ, pos)
+ p.context = append(p.context, key.last())
}
// setValue sets the given key to the given value in the current context.
// It will make sure that the key hasn't already been defined, account for
// implicit key groups.
-func (p *parser) setValue(key string, value interface{}) {
+func (p *parser) setValue(key string, value any) {
var (
- tmpHash interface{}
+ tmpHash any
ok bool
hash = p.mapping
- keyContext Key
+ keyContext = make(Key, 0, len(p.context)+1)
)
for _, k := range p.context {
keyContext = append(keyContext, k)
@@ -587,11 +620,11 @@ func (p *parser) setValue(key string, value interface{}) {
p.bug("Context for key '%s' has not been established.", keyContext)
}
switch t := tmpHash.(type) {
- case []map[string]interface{}:
+ case []map[string]any:
// The context is a table of hashes. Pick the most recent table
// defined as the current hash.
hash = t[len(t)-1]
- case map[string]interface{}:
+ case map[string]any:
hash = t
default:
p.panicf("Key '%s' has already been defined.", keyContext)
@@ -618,9 +651,8 @@ func (p *parser) setValue(key string, value interface{}) {
p.removeImplicit(keyContext)
return
}
-
- // Otherwise, we have a concrete key trying to override a previous
- // key, which is *always* wrong.
+ // Otherwise, we have a concrete key trying to override a previous key,
+ // which is *always* wrong.
p.panicf("Key '%s' has already been defined.", keyContext)
}
@@ -683,8 +715,11 @@ func stripFirstNewline(s string) string {
// the next newline. After a line-ending backslash, all whitespace is removed
// until the next non-whitespace character.
func (p *parser) stripEscapedNewlines(s string) string {
- var b strings.Builder
- var i int
+ var (
+ b strings.Builder
+ i int
+ )
+ b.Grow(len(s))
for {
ix := strings.Index(s[i:], `\`)
if ix < 0 {
@@ -714,9 +749,8 @@ func (p *parser) stripEscapedNewlines(s string) string {
continue
}
if !strings.Contains(s[i:j], "\n") {
- // This is not a line-ending backslash.
- // (It's a bad escape sequence, but we can let
- // replaceEscapes catch it.)
+ // This is not a line-ending backslash. (It's a bad escape sequence,
+ // but we can let replaceEscapes catch it.)
i++
continue
}
@@ -727,79 +761,78 @@ func (p *parser) stripEscapedNewlines(s string) string {
}
func (p *parser) replaceEscapes(it item, str string) string {
- replaced := make([]rune, 0, len(str))
- s := []byte(str)
- r := 0
- for r < len(s) {
- if s[r] != '\\' {
- c, size := utf8.DecodeRune(s[r:])
- r += size
- replaced = append(replaced, c)
+ var (
+ b strings.Builder
+ skip = 0
+ )
+ b.Grow(len(str))
+ for i, c := range str {
+ if skip > 0 {
+ skip--
continue
}
- r += 1
- if r >= len(s) {
+ if c != '\\' {
+ b.WriteRune(c)
+ continue
+ }
+
+ if i >= len(str) {
p.bug("Escape sequence at end of string.")
return ""
}
- switch s[r] {
+ switch str[i+1] {
default:
- p.bug("Expected valid escape code after \\, but got %q.", s[r])
+ p.bug("Expected valid escape code after \\, but got %q.", str[i+1])
case ' ', '\t':
- p.panicItemf(it, "invalid escape: '\\%c'", s[r])
+ p.panicItemf(it, "invalid escape: '\\%c'", str[i+1])
case 'b':
- replaced = append(replaced, rune(0x0008))
- r += 1
+ b.WriteByte(0x08)
+ skip = 1
case 't':
- replaced = append(replaced, rune(0x0009))
- r += 1
+ b.WriteByte(0x09)
+ skip = 1
case 'n':
- replaced = append(replaced, rune(0x000A))
- r += 1
+ b.WriteByte(0x0a)
+ skip = 1
case 'f':
- replaced = append(replaced, rune(0x000C))
- r += 1
+ b.WriteByte(0x0c)
+ skip = 1
case 'r':
- replaced = append(replaced, rune(0x000D))
- r += 1
+ b.WriteByte(0x0d)
+ skip = 1
case 'e':
if p.tomlNext {
- replaced = append(replaced, rune(0x001B))
- r += 1
+ b.WriteByte(0x1b)
+ skip = 1
}
case '"':
- replaced = append(replaced, rune(0x0022))
- r += 1
+ b.WriteByte(0x22)
+ skip = 1
case '\\':
- replaced = append(replaced, rune(0x005C))
- r += 1
+ b.WriteByte(0x5c)
+ skip = 1
+ // The lexer guarantees the correct number of characters are present;
+ // don't need to check here.
case 'x':
if p.tomlNext {
- escaped := p.asciiEscapeToUnicode(it, s[r+1:r+3])
- replaced = append(replaced, escaped)
- r += 3
+ escaped := p.asciiEscapeToUnicode(it, str[i+2:i+4])
+ b.WriteRune(escaped)
+ skip = 3
}
case 'u':
- // At this point, we know we have a Unicode escape of the form
- // `uXXXX` at [r, r+5). (Because the lexer guarantees this
- // for us.)
- escaped := p.asciiEscapeToUnicode(it, s[r+1:r+5])
- replaced = append(replaced, escaped)
- r += 5
+ escaped := p.asciiEscapeToUnicode(it, str[i+2:i+6])
+ b.WriteRune(escaped)
+ skip = 5
case 'U':
- // At this point, we know we have a Unicode escape of the form
- // `uXXXX` at [r, r+9). (Because the lexer guarantees this
- // for us.)
- escaped := p.asciiEscapeToUnicode(it, s[r+1:r+9])
- replaced = append(replaced, escaped)
- r += 9
+ escaped := p.asciiEscapeToUnicode(it, str[i+2:i+10])
+ b.WriteRune(escaped)
+ skip = 9
}
}
- return string(replaced)
+ return b.String()
}
-func (p *parser) asciiEscapeToUnicode(it item, bs []byte) rune {
- s := string(bs)
+func (p *parser) asciiEscapeToUnicode(it item, s string) rune {
hex, err := strconv.ParseUint(strings.ToLower(s), 16, 32)
if err != nil {
p.bug("Could not parse '%s' as a hexadecimal number, but the lexer claims it's OK: %s", s, err)
diff --git a/vendor/github.com/BurntSushi/toml/type_fields.go b/vendor/github.com/BurntSushi/toml/type_fields.go
index 254ca82e5..10c51f7ee 100644
--- a/vendor/github.com/BurntSushi/toml/type_fields.go
+++ b/vendor/github.com/BurntSushi/toml/type_fields.go
@@ -25,10 +25,8 @@ type field struct {
// breaking ties with index sequence.
type byName []field
-func (x byName) Len() int { return len(x) }
-
+func (x byName) Len() int { return len(x) }
func (x byName) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
-
func (x byName) Less(i, j int) bool {
if x[i].name != x[j].name {
return x[i].name < x[j].name
@@ -45,10 +43,8 @@ func (x byName) Less(i, j int) bool {
// byIndex sorts field by index sequence.
type byIndex []field
-func (x byIndex) Len() int { return len(x) }
-
+func (x byIndex) Len() int { return len(x) }
func (x byIndex) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
-
func (x byIndex) Less(i, j int) bool {
for k, xik := range x[i].index {
if k >= len(x[j].index) {
diff --git a/vendor/github.com/BurntSushi/toml/type_toml.go b/vendor/github.com/BurntSushi/toml/type_toml.go
index 4e90d7737..1c090d331 100644
--- a/vendor/github.com/BurntSushi/toml/type_toml.go
+++ b/vendor/github.com/BurntSushi/toml/type_toml.go
@@ -22,13 +22,8 @@ func typeIsTable(t tomlType) bool {
type tomlBaseType string
-func (btype tomlBaseType) typeString() string {
- return string(btype)
-}
-
-func (btype tomlBaseType) String() string {
- return btype.typeString()
-}
+func (btype tomlBaseType) typeString() string { return string(btype) }
+func (btype tomlBaseType) String() string { return btype.typeString() }
var (
tomlInteger tomlBaseType = "Integer"
@@ -54,7 +49,7 @@ func (p *parser) typeOfPrimitive(lexItem item) tomlType {
return tomlFloat
case itemDatetime:
return tomlDatetime
- case itemString:
+ case itemString, itemStringEsc:
return tomlString
case itemMultilineString:
return tomlString
diff --git a/vendor/github.com/Masterminds/semver/v3/CHANGELOG.md b/vendor/github.com/Masterminds/semver/v3/CHANGELOG.md
index f12626423..f95a504fe 100644
--- a/vendor/github.com/Masterminds/semver/v3/CHANGELOG.md
+++ b/vendor/github.com/Masterminds/semver/v3/CHANGELOG.md
@@ -1,5 +1,33 @@
# Changelog
+## 3.3.0 (2024-08-27)
+
+### Added
+
+- #238: Add LessThanEqual and GreaterThanEqual functions (thanks @grosser)
+- #213: nil version equality checking (thanks @KnutZuidema)
+
+### Changed
+
+- #241: Simplify StrictNewVersion parsing (thanks @grosser)
+- Testing support up through Go 1.23
+- Minimum version set to 1.21 as this is what's tested now
+- Fuzz testing now supports caching
+
+## 3.2.1 (2023-04-10)
+
+### Changed
+
+- #198: Improved testing around pre-release names
+- #200: Improved code scanning with addition of CodeQL
+- #201: Testing now includes Go 1.20. Go 1.17 has been dropped
+- #202: Migrated Fuzz testing to Go built-in Fuzzing. CI runs daily
+- #203: Docs updated for security details
+
+### Fixed
+
+- #199: Fixed issue with range transformations
+
## 3.2.0 (2022-11-28)
### Added
diff --git a/vendor/github.com/Masterminds/semver/v3/Makefile b/vendor/github.com/Masterminds/semver/v3/Makefile
index 0e7b5c713..9ca87a2c7 100644
--- a/vendor/github.com/Masterminds/semver/v3/Makefile
+++ b/vendor/github.com/Masterminds/semver/v3/Makefile
@@ -19,6 +19,7 @@ test-cover:
.PHONY: fuzz
fuzz:
@echo "==> Running Fuzz Tests"
+ go env GOCACHE
go test -fuzz=FuzzNewVersion -fuzztime=15s .
go test -fuzz=FuzzStrictNewVersion -fuzztime=15s .
go test -fuzz=FuzzNewConstraint -fuzztime=15s .
@@ -27,4 +28,4 @@ $(GOLANGCI_LINT):
# Install golangci-lint. The configuration for it is in the .golangci.yml
# file in the root of the repository
echo ${GOPATH}
- curl -sfL https://install.goreleaser.com/github.com/golangci/golangci-lint.sh | sh -s -- -b $(GOPATH)/bin v1.17.1
+ curl -sfL https://install.goreleaser.com/github.com/golangci/golangci-lint.sh | sh -s -- -b $(GOPATH)/bin v1.56.2
diff --git a/vendor/github.com/Masterminds/semver/v3/README.md b/vendor/github.com/Masterminds/semver/v3/README.md
index eab8cac3b..ed5693608 100644
--- a/vendor/github.com/Masterminds/semver/v3/README.md
+++ b/vendor/github.com/Masterminds/semver/v3/README.md
@@ -13,12 +13,9 @@ Active](https://masterminds.github.io/stability/active.svg)](https://masterminds
[](https://pkg.go.dev/github.com/Masterminds/semver/v3)
[](https://goreportcard.com/report/github.com/Masterminds/semver)
-If you are looking for a command line tool for version comparisons please see
-[vert](https://github.com/Masterminds/vert) which uses this library.
-
## Package Versions
-Note, import `github.com/github.com/Masterminds/semver/v3` to use the latest version.
+Note, import `github.com/Masterminds/semver/v3` to use the latest version.
There are three major versions fo the `semver` package.
@@ -80,12 +77,12 @@ There are two methods for comparing versions. One uses comparison methods on
differences to notes between these two methods of comparison.
1. When two versions are compared using functions such as `Compare`, `LessThan`,
- and others it will follow the specification and always include prereleases
+ and others it will follow the specification and always include pre-releases
within the comparison. It will provide an answer that is valid with the
comparison section of the spec at https://semver.org/#spec-item-11
2. When constraint checking is used for checks or validation it will follow a
different set of rules that are common for ranges with tools like npm/js
- and Rust/Cargo. This includes considering prereleases to be invalid if the
+ and Rust/Cargo. This includes considering pre-releases to be invalid if the
ranges does not include one. If you want to have it include pre-releases a
simple solution is to include `-0` in your range.
3. Constraint ranges can have some complex rules including the shorthand use of
@@ -113,7 +110,7 @@ v, err := semver.NewVersion("1.3")
if err != nil {
// Handle version not being parsable.
}
-// Check if the version meets the constraints. The a variable will be true.
+// Check if the version meets the constraints. The variable a will be true.
a := c.Check(v)
```
@@ -137,20 +134,20 @@ The basic comparisons are:
### Working With Prerelease Versions
Pre-releases, for those not familiar with them, are used for software releases
-prior to stable or generally available releases. Examples of prereleases include
-development, alpha, beta, and release candidate releases. A prerelease may be
+prior to stable or generally available releases. Examples of pre-releases include
+development, alpha, beta, and release candidate releases. A pre-release may be
a version such as `1.2.3-beta.1` while the stable release would be `1.2.3`. In the
-order of precedence, prereleases come before their associated releases. In this
+order of precedence, pre-releases come before their associated releases. In this
example `1.2.3-beta.1 < 1.2.3`.
-According to the Semantic Version specification prereleases may not be
+According to the Semantic Version specification, pre-releases may not be
API compliant with their release counterpart. It says,
> A pre-release version indicates that the version is unstable and might not satisfy the intended compatibility requirements as denoted by its associated normal version.
-SemVer comparisons using constraints without a prerelease comparator will skip
-prerelease versions. For example, `>=1.2.3` will skip prereleases when looking
-at a list of releases while `>=1.2.3-0` will evaluate and find prereleases.
+SemVer's comparisons using constraints without a pre-release comparator will skip
+pre-release versions. For example, `>=1.2.3` will skip pre-releases when looking
+at a list of releases while `>=1.2.3-0` will evaluate and find pre-releases.
The reason for the `0` as a pre-release version in the example comparison is
because pre-releases can only contain ASCII alphanumerics and hyphens (along with
@@ -171,6 +168,9 @@ These look like:
* `1.2 - 1.4.5` which is equivalent to `>= 1.2 <= 1.4.5`
* `2.3.4 - 4.5` which is equivalent to `>= 2.3.4 <= 4.5`
+Note that `1.2-1.4.5` without whitespace is parsed completely differently; it's
+parsed as a single constraint `1.2.0` with _prerelease_ `1.4.5`.
+
### Wildcards In Comparisons
The `x`, `X`, and `*` characters can be used as a wildcard character. This works
diff --git a/vendor/github.com/Masterminds/semver/v3/version.go b/vendor/github.com/Masterminds/semver/v3/version.go
index 7c4bed334..ff499fb66 100644
--- a/vendor/github.com/Masterminds/semver/v3/version.go
+++ b/vendor/github.com/Masterminds/semver/v3/version.go
@@ -83,22 +83,23 @@ func StrictNewVersion(v string) (*Version, error) {
original: v,
}
- // check for prerelease or build metadata
- var extra []string
- if strings.ContainsAny(parts[2], "-+") {
- // Start with the build metadata first as it needs to be on the right
- extra = strings.SplitN(parts[2], "+", 2)
- if len(extra) > 1 {
- // build metadata found
- sv.metadata = extra[1]
- parts[2] = extra[0]
+ // Extract build metadata
+ if strings.Contains(parts[2], "+") {
+ extra := strings.SplitN(parts[2], "+", 2)
+ sv.metadata = extra[1]
+ parts[2] = extra[0]
+ if err := validateMetadata(sv.metadata); err != nil {
+ return nil, err
}
+ }
- extra = strings.SplitN(parts[2], "-", 2)
- if len(extra) > 1 {
- // prerelease found
- sv.pre = extra[1]
- parts[2] = extra[0]
+ // Extract build prerelease
+ if strings.Contains(parts[2], "-") {
+ extra := strings.SplitN(parts[2], "-", 2)
+ sv.pre = extra[1]
+ parts[2] = extra[0]
+ if err := validatePrerelease(sv.pre); err != nil {
+ return nil, err
}
}
@@ -114,7 +115,7 @@ func StrictNewVersion(v string) (*Version, error) {
}
}
- // Extract the major, minor, and patch elements onto the returned Version
+ // Extract major, minor, and patch
var err error
sv.major, err = strconv.ParseUint(parts[0], 10, 64)
if err != nil {
@@ -131,23 +132,6 @@ func StrictNewVersion(v string) (*Version, error) {
return nil, err
}
- // No prerelease or build metadata found so returning now as a fastpath.
- if sv.pre == "" && sv.metadata == "" {
- return sv, nil
- }
-
- if sv.pre != "" {
- if err = validatePrerelease(sv.pre); err != nil {
- return nil, err
- }
- }
-
- if sv.metadata != "" {
- if err = validateMetadata(sv.metadata); err != nil {
- return nil, err
- }
- }
-
return sv, nil
}
@@ -381,15 +365,31 @@ func (v *Version) LessThan(o *Version) bool {
return v.Compare(o) < 0
}
+// LessThanEqual tests if one version is less or equal than another one.
+func (v *Version) LessThanEqual(o *Version) bool {
+ return v.Compare(o) <= 0
+}
+
// GreaterThan tests if one version is greater than another one.
func (v *Version) GreaterThan(o *Version) bool {
return v.Compare(o) > 0
}
+// GreaterThanEqual tests if one version is greater or equal than another one.
+func (v *Version) GreaterThanEqual(o *Version) bool {
+ return v.Compare(o) >= 0
+}
+
// Equal tests if two versions are equal to each other.
// Note, versions can be equal with different metadata since metadata
// is not considered part of the comparable version.
func (v *Version) Equal(o *Version) bool {
+ if v == o {
+ return true
+ }
+ if v == nil || o == nil {
+ return false
+ }
return v.Compare(o) == 0
}
diff --git a/vendor/github.com/Masterminds/sprig/v3/CHANGELOG.md b/vendor/github.com/Masterminds/sprig/v3/CHANGELOG.md
index 2ce45dd4e..b5ef766a7 100644
--- a/vendor/github.com/Masterminds/sprig/v3/CHANGELOG.md
+++ b/vendor/github.com/Masterminds/sprig/v3/CHANGELOG.md
@@ -1,5 +1,23 @@
# Changelog
+## Release 3.3.0 (2024-08-29)
+
+### Added
+
+- #400: added sha512sum function (thanks @itzik-elayev)
+
+### Changed
+
+- #407: Removed duplicate documentation (functions were documentated in 2 places)
+- #290: Corrected copy/paster oops in math documentation (thanks @zzhu41)
+- #369: Corrected template reference in docs (thanks @chey)
+- #375: Added link to URL documenation (thanks @carlpett)
+- #406: Updated the mergo dependency which had a breaking change (which was accounted for)
+- #376: Fixed documentation error (thanks @jheyduk)
+- #404: Updated dependency tree
+- #391: Fixed misspelling (thanks @chrishalbert)
+- #405: Updated Go versions used in testing
+
## Release 3.2.3 (2022-11-29)
### Changed
@@ -307,7 +325,7 @@ This release adds new functions, including:
- Added `semver` and `semverCompare` for Semantic Versions
- `list` replaces `tuple`
- Fixed issue with `join`
-- Added `first`, `last`, `intial`, `rest`, `prepend`, `append`, `toString`, `toStrings`, `sortAlpha`, `reverse`, `coalesce`, `pluck`, `pick`, `compact`, `keys`, `omit`, `uniq`, `has`, `without`
+- Added `first`, `last`, `initial`, `rest`, `prepend`, `append`, `toString`, `toStrings`, `sortAlpha`, `reverse`, `coalesce`, `pluck`, `pick`, `compact`, `keys`, `omit`, `uniq`, `has`, `without`
## Release 2.9.0 (2017-02-23)
@@ -361,7 +379,7 @@ Because we switched from `int` to `int64` as the return value for all integer ma
- `min` complements `max` (formerly `biggest`)
- `empty` indicates that a value is the empty value for its type
- `tuple` creates a tuple inside of a template: `{{$t := tuple "a", "b" "c"}}`
-- `dict` creates a dictionary inside of a template `{{$d := dict "key1" "val1" "key2" "val2"}}`
+- `dict` creates a dictionary inside of a template `{{$d := dict "key1" "val1" "key2" "val2"}}`
- Date formatters have been added for HTML dates (as used in `date` input fields)
- Integer math functions can convert from a number of types, including `string` (via `strconv.ParseInt`).
diff --git a/vendor/github.com/Masterminds/sprig/v3/crypto.go b/vendor/github.com/Masterminds/sprig/v3/crypto.go
index 13a5cd559..75fe027e4 100644
--- a/vendor/github.com/Masterminds/sprig/v3/crypto.go
+++ b/vendor/github.com/Masterminds/sprig/v3/crypto.go
@@ -14,6 +14,7 @@ import (
"crypto/rsa"
"crypto/sha1"
"crypto/sha256"
+ "crypto/sha512"
"crypto/x509"
"crypto/x509/pkix"
"encoding/asn1"
@@ -36,6 +37,11 @@ import (
"golang.org/x/crypto/scrypt"
)
+func sha512sum(input string) string {
+ hash := sha512.Sum512([]byte(input))
+ return hex.EncodeToString(hash[:])
+}
+
func sha256sum(input string) string {
hash := sha256.Sum256([]byte(input))
return hex.EncodeToString(hash[:])
diff --git a/vendor/github.com/Masterminds/sprig/v3/dict.go b/vendor/github.com/Masterminds/sprig/v3/dict.go
index ade889698..4315b3542 100644
--- a/vendor/github.com/Masterminds/sprig/v3/dict.go
+++ b/vendor/github.com/Masterminds/sprig/v3/dict.go
@@ -1,7 +1,7 @@
package sprig
import (
- "github.com/imdario/mergo"
+ "dario.cat/mergo"
"github.com/mitchellh/copystructure"
)
diff --git a/vendor/github.com/Masterminds/sprig/v3/doc.go b/vendor/github.com/Masterminds/sprig/v3/doc.go
index aabb9d448..91031d6d1 100644
--- a/vendor/github.com/Masterminds/sprig/v3/doc.go
+++ b/vendor/github.com/Masterminds/sprig/v3/doc.go
@@ -6,7 +6,7 @@ inside of Go `html/template` and `text/template` files.
To add these functions, use the `template.Funcs()` method:
- t := templates.New("foo").Funcs(sprig.FuncMap())
+ t := template.New("foo").Funcs(sprig.FuncMap())
Note that you should add the function map before you parse any template files.
diff --git a/vendor/github.com/Masterminds/sprig/v3/functions.go b/vendor/github.com/Masterminds/sprig/v3/functions.go
index 57fcec1d9..cda47d26f 100644
--- a/vendor/github.com/Masterminds/sprig/v3/functions.go
+++ b/vendor/github.com/Masterminds/sprig/v3/functions.go
@@ -22,8 +22,7 @@ import (
//
// Use this to pass the functions into the template engine:
//
-// tpl := template.New("foo").Funcs(sprig.FuncMap()))
-//
+// tpl := template.New("foo").Funcs(sprig.FuncMap()))
func FuncMap() template.FuncMap {
return HtmlFuncMap()
}
@@ -142,10 +141,13 @@ var genericMap = map[string]interface{}{
"swapcase": util.SwapCase,
"shuffle": xstrings.Shuffle,
"snakecase": xstrings.ToSnakeCase,
- "camelcase": xstrings.ToCamelCase,
- "kebabcase": xstrings.ToKebabCase,
- "wrap": func(l int, s string) string { return util.Wrap(s, l) },
- "wrapWith": func(l int, sep, str string) string { return util.WrapCustom(str, l, sep, true) },
+ // camelcase used to call xstrings.ToCamelCase, but that function had a breaking change in version
+ // 1.5 that moved it from upper camel case to lower camel case. This is a breaking change for sprig.
+ // A new xstrings.ToPascalCase function was added that provided upper camel case.
+ "camelcase": xstrings.ToPascalCase,
+ "kebabcase": xstrings.ToKebabCase,
+ "wrap": func(l int, s string) string { return util.Wrap(s, l) },
+ "wrapWith": func(l int, sep, str string) string { return util.WrapCustom(str, l, sep, true) },
// Switch order so that "foobar" | contains "foo"
"contains": func(substr string, str string) bool { return strings.Contains(str, substr) },
"hasPrefix": func(substr string, str string) bool { return strings.HasPrefix(str, substr) },
@@ -159,6 +161,7 @@ var genericMap = map[string]interface{}{
"plural": plural,
"sha1sum": sha1sum,
"sha256sum": sha256sum,
+ "sha512sum": sha512sum,
"adler32sum": adler32sum,
"toString": strval,
@@ -336,20 +339,20 @@ var genericMap = map[string]interface{}{
"mustChunk": mustChunk,
// Crypto:
- "bcrypt": bcrypt,
- "htpasswd": htpasswd,
- "genPrivateKey": generatePrivateKey,
- "derivePassword": derivePassword,
- "buildCustomCert": buildCustomCertificate,
- "genCA": generateCertificateAuthority,
- "genCAWithKey": generateCertificateAuthorityWithPEMKey,
- "genSelfSignedCert": generateSelfSignedCertificate,
+ "bcrypt": bcrypt,
+ "htpasswd": htpasswd,
+ "genPrivateKey": generatePrivateKey,
+ "derivePassword": derivePassword,
+ "buildCustomCert": buildCustomCertificate,
+ "genCA": generateCertificateAuthority,
+ "genCAWithKey": generateCertificateAuthorityWithPEMKey,
+ "genSelfSignedCert": generateSelfSignedCertificate,
"genSelfSignedCertWithKey": generateSelfSignedCertificateWithPEMKey,
- "genSignedCert": generateSignedCertificate,
- "genSignedCertWithKey": generateSignedCertificateWithPEMKey,
- "encryptAES": encryptAES,
- "decryptAES": decryptAES,
- "randBytes": randBytes,
+ "genSignedCert": generateSignedCertificate,
+ "genSignedCertWithKey": generateSignedCertificateWithPEMKey,
+ "encryptAES": encryptAES,
+ "decryptAES": decryptAES,
+ "randBytes": randBytes,
// UUIDs:
"uuidv4": uuidv4,
diff --git a/vendor/github.com/NVIDIA/go-nvlib/pkg/nvmdev/mock.go b/vendor/github.com/NVIDIA/go-nvlib/pkg/nvmdev/mock.go
index 2a53d1741..78192f88c 100644
--- a/vendor/github.com/NVIDIA/go-nvlib/pkg/nvmdev/mock.go
+++ b/vendor/github.com/NVIDIA/go-nvlib/pkg/nvmdev/mock.go
@@ -25,14 +25,15 @@ import (
"github.com/NVIDIA/go-nvlib/pkg/nvpci/bytes"
)
-// MockNvmdev mock mdev device
+// MockNvmdev mock mdev device.
type MockNvmdev struct {
*nvmdev
+ pciDevicesRoot string
}
var _ Interface = (*MockNvmdev)(nil)
-// NewMock creates new mock mediated (vGPU) and parent PCI devices and removes old devices
+// NewMock creates new mock mediated (vGPU) and parent PCI devices and removes old devices.
func NewMock() (mock *MockNvmdev, rerr error) {
mdevParentsRootDir, err := os.MkdirTemp(os.TempDir(), "")
if err != nil {
@@ -53,23 +54,47 @@ func NewMock() (mock *MockNvmdev, rerr error) {
}
}()
+ pciRootDir, err := os.MkdirTemp(os.TempDir(), "")
+ if err != nil {
+ return nil, err
+ }
+ defer func() {
+ if rerr != nil {
+ os.RemoveAll(pciRootDir)
+ }
+ }()
+
+ nvpciLib := nvpci.New(nvpci.WithPCIDevicesRoot(pciRootDir))
mock = &MockNvmdev{
- &nvmdev{mdevParentsRootDir, mdevDevicesRootDir},
+ nvmdev: &nvmdev{
+ mdevParentsRoot: mdevParentsRootDir,
+ mdevDevicesRoot: mdevDevicesRootDir,
+ nvpci: nvpciLib,
+ },
+ pciDevicesRoot: pciRootDir,
}
return mock, nil
}
-// Cleanup removes the mocked mediated (vGPU) and parent PCI devices root folders
+// Cleanup removes the mocked mediated (vGPU) and parent PCI devices root folders.
func (m *MockNvmdev) Cleanup() {
os.RemoveAll(m.mdevParentsRoot)
os.RemoveAll(m.mdevDevicesRoot)
+ os.RemoveAll(m.pciDevicesRoot)
}
-// AddMockA100Parent creates an A100 like parent GPU mock device
+// AddMockA100Parent creates an A100 like parent GPU mock device.
func (m *MockNvmdev) AddMockA100Parent(address string, numaNode int) error {
+ pciDeviceDir := filepath.Join(m.pciDevicesRoot, address)
+ err := os.MkdirAll(pciDeviceDir, 0755)
+ if err != nil {
+ return err
+ }
+
+ // /sys/class/mdev_bus/ is a symlink to /sys/bus/pci/devices/
deviceDir := filepath.Join(m.mdevParentsRoot, address)
- err := os.MkdirAll(deviceDir, 0755)
+ err = os.Symlink(pciDeviceDir, deviceDir)
if err != nil {
return err
}
@@ -220,6 +245,9 @@ func (m *MockNvmdev) AddMockA100Mdev(uuid string, mdevType string, mdevTypeDir s
return err
}
err = os.Symlink(filepath.Join(mdevDeviceDir, "vfio_mdev"), filepath.Join(mdevDeviceDir, "driver"))
+ if err != nil {
+ return err
+ }
_, err = os.Create(filepath.Join(mdevDeviceDir, "200"))
if err != nil {
diff --git a/vendor/github.com/NVIDIA/go-nvlib/pkg/nvmdev/nvmdev.go b/vendor/github.com/NVIDIA/go-nvlib/pkg/nvmdev/nvmdev.go
index 926125821..c85d79d4b 100644
--- a/vendor/github.com/NVIDIA/go-nvlib/pkg/nvmdev/nvmdev.go
+++ b/vendor/github.com/NVIDIA/go-nvlib/pkg/nvmdev/nvmdev.go
@@ -33,7 +33,7 @@ const (
mdevDevicesRoot = "/sys/bus/mdev/devices"
)
-// Interface allows us to get a list of NVIDIA MDEV (vGPU) and parent devices
+// Interface allows us to get a list of NVIDIA MDEV (vGPU) and parent devices.
type Interface interface {
GetAllDevices() ([]*Device, error)
GetAllParentDevices() ([]*ParentDevice, error)
@@ -42,17 +42,18 @@ type Interface interface {
type nvmdev struct {
mdevParentsRoot string
mdevDevicesRoot string
+ nvpci nvpci.Interface
}
var _ Interface = (*nvmdev)(nil)
-// ParentDevice represents an NVIDIA parent PCI device
+// ParentDevice represents an NVIDIA parent PCI device.
type ParentDevice struct {
*nvpci.NvidiaPCIDevice
mdevPaths map[string]string
}
-// Device represents an NVIDIA MDEV (vGPU) device
+// Device represents an NVIDIA MDEV (vGPU) device.
type Device struct {
Path string
UUID string
@@ -62,12 +63,29 @@ type Device struct {
Parent *ParentDevice
}
-// New interface that allows us to get a list of all NVIDIA parent and MDEV (vGPU) devices
-func New() Interface {
- return &nvmdev{mdevParentsRoot, mdevDevicesRoot}
+// New interface that allows us to get a list of all NVIDIA parent and MDEV (vGPU) devices.
+func New(opts ...Option) Interface {
+ n := &nvmdev{mdevParentsRoot: mdevParentsRoot, mdevDevicesRoot: mdevDevicesRoot}
+ for _, opt := range opts {
+ opt(n)
+ }
+ if n.nvpci == nil {
+ n.nvpci = nvpci.New()
+ }
+ return n
}
-// GetAllParentDevices returns all NVIDIA Parent PCI devices on the system
+// Option defines a function for passing options to the New() call.
+type Option func(*nvmdev)
+
+// WithNvpciLib provides an Option to set the nvpci library.
+func WithNvpciLib(nvpciLib nvpci.Interface) Option {
+ return func(n *nvmdev) {
+ n.nvpci = nvpciLib
+ }
+}
+
+// GetAllParentDevices returns all NVIDIA Parent PCI devices on the system.
func (m *nvmdev) GetAllParentDevices() ([]*ParentDevice, error) {
deviceDirs, err := os.ReadDir(m.mdevParentsRoot)
if err != nil {
@@ -77,7 +95,7 @@ func (m *nvmdev) GetAllParentDevices() ([]*ParentDevice, error) {
var nvdevices []*ParentDevice
for _, deviceDir := range deviceDirs {
devicePath := path.Join(m.mdevParentsRoot, deviceDir.Name())
- nvdevice, err := NewParentDevice(devicePath)
+ nvdevice, err := m.NewParentDevice(devicePath)
if err != nil {
return nil, fmt.Errorf("error constructing NVIDIA parent device: %v", err)
}
@@ -101,7 +119,7 @@ func (m *nvmdev) GetAllParentDevices() ([]*ParentDevice, error) {
return nvdevices, nil
}
-// GetAllDevices returns all NVIDIA mdev (vGPU) devices on the system
+// GetAllDevices returns all NVIDIA mdev (vGPU) devices on the system.
func (m *nvmdev) GetAllDevices() ([]*Device, error) {
deviceDirs, err := os.ReadDir(m.mdevDevicesRoot)
if err != nil {
@@ -110,7 +128,7 @@ func (m *nvmdev) GetAllDevices() ([]*Device, error) {
var nvdevices []*Device
for _, deviceDir := range deviceDirs {
- nvdevice, err := NewDevice(m.mdevDevicesRoot, deviceDir.Name())
+ nvdevice, err := m.NewDevice(m.mdevDevicesRoot, deviceDir.Name())
if err != nil {
return nil, fmt.Errorf("error constructing MDEV device: %v", err)
}
@@ -123,8 +141,8 @@ func (m *nvmdev) GetAllDevices() ([]*Device, error) {
return nvdevices, nil
}
-// NewDevice constructs a Device, which represents an NVIDIA mdev (vGPU) device
-func NewDevice(root string, uuid string) (*Device, error) {
+// NewDevice constructs a Device, which represents an NVIDIA mdev (vGPU) device.
+func (n *nvmdev) NewDevice(root string, uuid string) (*Device, error) {
path := path.Join(root, uuid)
m, err := newMdev(path)
@@ -132,7 +150,7 @@ func NewDevice(root string, uuid string) (*Device, error) {
return nil, err
}
- parent, err := NewParentDevice(m.parentDevicePath())
+ parent, err := n.NewParentDevice(m.parentDevicePath())
if err != nil {
return nil, fmt.Errorf("error constructing NVIDIA PCI device: %v", err)
}
@@ -240,14 +258,15 @@ func (m mdev) iommuGroup() (int, error) {
return int(iommuGroup), nil
}
-// NewParentDevice constructs a ParentDevice
-func NewParentDevice(devicePath string) (*ParentDevice, error) {
- nvdevice, err := newNvidiaPCIDeviceFromPath(devicePath)
+// NewParentDevice constructs a ParentDevice.
+func (m *nvmdev) NewParentDevice(devicePath string) (*ParentDevice, error) {
+ address := filepath.Base(devicePath)
+ nvdevice, err := m.nvpci.GetGPUByPciBusID(address)
if err != nil {
return nil, fmt.Errorf("failed to construct NVIDIA PCI device: %v", err)
}
if nvdevice == nil {
- // not a NVIDIA device
+ // not a NVIDIA device.
return nil, err
}
@@ -275,7 +294,7 @@ func NewParentDevice(devicePath string) (*ParentDevice, error) {
return &ParentDevice{nvdevice, mdevTypesMap}, err
}
-// CreateMDEVDevice creates a mediated device (vGPU) on the parent GPU
+// CreateMDEVDevice creates a mediated device (vGPU) on the parent GPU.
func (p *ParentDevice) CreateMDEVDevice(mdevType string, id string) error {
mdevPath, ok := p.mdevPaths[mdevType]
if !ok {
@@ -292,7 +311,7 @@ func (p *ParentDevice) CreateMDEVDevice(mdevType string, id string) error {
return nil
}
-// DeleteMDEVDevice deletes a mediated device (vGPU)
+// DeleteMDEVDevice deletes a mediated device (vGPU).
func (p *ParentDevice) DeleteMDEVDevice(id string) error {
removeFile, err := os.OpenFile(filepath.Join(p.Path, id, "remove"), os.O_WRONLY|os.O_SYNC, 0200)
if err != nil {
@@ -306,7 +325,7 @@ func (p *ParentDevice) DeleteMDEVDevice(id string) error {
return nil
}
-// Delete deletes a mediated device (vGPU)
+// Delete deletes a mediated device (vGPU).
func (m *Device) Delete() error {
removeFile, err := os.OpenFile(filepath.Join(m.Path, "remove"), os.O_WRONLY|os.O_SYNC, 0200)
if err != nil {
@@ -320,32 +339,27 @@ func (m *Device) Delete() error {
return nil
}
-// GetPhysicalFunction gets the physical PCI device backing a 'parent' device
-func (p *ParentDevice) GetPhysicalFunction() (*nvpci.NvidiaPCIDevice, error) {
- if !p.IsVF {
- return p.NvidiaPCIDevice, nil
+// GetPhysicalFunction gets the physical PCI device backing a 'parent' device.
+func (p *ParentDevice) GetPhysicalFunction() *nvpci.NvidiaPCIDevice {
+ if p.SriovInfo.IsVF() {
+ return p.SriovInfo.VirtualFunction.PhysicalFunction
}
-
- physfnPath, err := filepath.EvalSymlinks(path.Join(p.Path, "physfn"))
- if err != nil {
- return nil, fmt.Errorf("unable to resolve %s: %v", path.Join(p.Path, "physfn"), err)
- }
-
- return newNvidiaPCIDeviceFromPath(physfnPath)
+ // Either it is an SRIOV physical function or a non-SRIOV device, so return the device itself
+ return p.NvidiaPCIDevice
}
-// GetPhysicalFunction gets the physical PCI device that a vGPU is created on
-func (m *Device) GetPhysicalFunction() (*nvpci.NvidiaPCIDevice, error) {
+// GetPhysicalFunction gets the physical PCI device that a vGPU is created on.
+func (m *Device) GetPhysicalFunction() *nvpci.NvidiaPCIDevice {
return m.Parent.GetPhysicalFunction()
}
-// IsMDEVTypeSupported checks if the mdevType is supported by the GPU
+// IsMDEVTypeSupported checks if the mdevType is supported by the GPU.
func (p *ParentDevice) IsMDEVTypeSupported(mdevType string) bool {
_, found := p.mdevPaths[mdevType]
return found
}
-// IsMDEVTypeAvailable checks if a vGPU instance of mdevType can be created on the parent GPU
+// IsMDEVTypeAvailable checks if a vGPU instance of mdevType can be created on the parent GPU.
func (p *ParentDevice) IsMDEVTypeAvailable(mdevType string) (bool, error) {
availableInstances, err := p.GetAvailableMDEVInstances(mdevType)
if err != nil {
@@ -375,12 +389,3 @@ func (p *ParentDevice) GetAvailableMDEVInstances(mdevType string) (int, error) {
return availableInstances, nil
}
-
-// newNvidiaPCIDeviceFromPath constructs an NvidiaPCIDevice for the specified device path.
-func newNvidiaPCIDeviceFromPath(devicePath string) (*nvpci.NvidiaPCIDevice, error) {
- root := filepath.Dir(devicePath)
- address := filepath.Base(devicePath)
- return nvpci.New(
- nvpci.WithPCIDevicesRoot(root),
- ).GetGPUByPciBusID(address)
-}
diff --git a/vendor/github.com/NVIDIA/go-nvlib/pkg/nvpci/bytes/bytes.go b/vendor/github.com/NVIDIA/go-nvlib/pkg/nvpci/bytes/bytes.go
index 7788a1fbe..04fb4aa93 100644
--- a/vendor/github.com/NVIDIA/go-nvlib/pkg/nvpci/bytes/bytes.go
+++ b/vendor/github.com/NVIDIA/go-nvlib/pkg/nvpci/bytes/bytes.go
@@ -21,12 +21,12 @@ import (
"unsafe"
)
-// Raw returns just the bytes without any assumptions about layout
+// Raw returns just the bytes without any assumptions about layout.
type Raw interface {
Raw() *[]byte
}
-// Reader used to read various data sizes in the byte array
+// Reader used to read various data sizes in the byte array.
type Reader interface {
Read8(pos int) uint8
Read16(pos int) uint16
@@ -35,7 +35,7 @@ type Reader interface {
Len() int
}
-// Writer used to write various sizes of data in the byte array
+// Writer used to write various sizes of data in the byte array.
type Writer interface {
Write8(pos int, value uint8)
Write16(pos int, value uint16)
@@ -44,7 +44,7 @@ type Writer interface {
Len() int
}
-// Bytes object for manipulating arbitrary byte arrays
+// Bytes object for manipulating arbitrary byte arrays.
type Bytes interface {
Raw
Reader
@@ -70,12 +70,12 @@ func init() {
}
}
-// New raw bytearray
+// New raw bytearray.
func New(data *[]byte) Bytes {
return (*native)(data)
}
-// NewLittleEndian little endian ordering of bytes
+// NewLittleEndian little endian ordering of bytes.
func NewLittleEndian(data *[]byte) Bytes {
if nativeByteOrder == binary.LittleEndian {
return (*native)(data)
@@ -84,7 +84,7 @@ func NewLittleEndian(data *[]byte) Bytes {
return (*swapbo)(data)
}
-// NewBigEndian big endian ordering of bytes
+// NewBigEndian big endian ordering of bytes.
func NewBigEndian(data *[]byte) Bytes {
if nativeByteOrder == binary.BigEndian {
return (*native)(data)
diff --git a/vendor/github.com/NVIDIA/go-nvlib/pkg/nvpci/config.go b/vendor/github.com/NVIDIA/go-nvlib/pkg/nvpci/config.go
index e25e72f68..397c86508 100644
--- a/vendor/github.com/NVIDIA/go-nvlib/pkg/nvpci/config.go
+++ b/vendor/github.com/NVIDIA/go-nvlib/pkg/nvpci/config.go
@@ -24,24 +24,24 @@ import (
)
const (
- // PCICfgSpaceStandardSize represents the size in bytes of the standard config space
+ // PCICfgSpaceStandardSize represents the size in bytes of the standard config space.
PCICfgSpaceStandardSize = 256
- // PCICfgSpaceExtendedSize represents the size in bytes of the extended config space
+ // PCICfgSpaceExtendedSize represents the size in bytes of the extended config space.
PCICfgSpaceExtendedSize = 4096
- // PCICapabilityListPointer represents offset for the capability list pointer
+ // PCICapabilityListPointer represents offset for the capability list pointer.
PCICapabilityListPointer = 0x34
- // PCIStatusCapabilityList represents the status register bit which indicates capability list support
+ // PCIStatusCapabilityList represents the status register bit which indicates capability list support.
PCIStatusCapabilityList = 0x10
- // PCIStatusBytePosition represents the position of the status register
+ // PCIStatusBytePosition represents the position of the status register.
PCIStatusBytePosition = 0x06
)
-// ConfigSpace PCI configuration space (standard extended) file path
+// ConfigSpace PCI configuration space (standard extended) file path.
type ConfigSpace struct {
Path string
}
-// ConfigSpaceIO Interface for reading and writing raw and preconfigured values
+// ConfigSpaceIO Interface for reading and writing raw and preconfigured values.
type ConfigSpaceIO interface {
bytes.Bytes
GetVendorID() uint16
@@ -53,18 +53,18 @@ type configSpaceIO struct {
bytes.Bytes
}
-// PCIStandardCapability standard PCI config space
+// PCIStandardCapability standard PCI config space.
type PCIStandardCapability struct {
bytes.Bytes
}
-// PCIExtendedCapability extended PCI config space
+// PCIExtendedCapability extended PCI config space.
type PCIExtendedCapability struct {
bytes.Bytes
Version uint8
}
-// PCICapabilities combines the standard and extended config space
+// PCICapabilities combines the standard and extended config space.
type PCICapabilities struct {
Standard map[uint8]*PCIStandardCapability
Extended map[uint16]*PCIExtendedCapability
diff --git a/vendor/github.com/NVIDIA/go-nvlib/pkg/nvpci/mlxpci.go b/vendor/github.com/NVIDIA/go-nvlib/pkg/nvpci/mlxpci.go
index 62937d7f9..ddf7d19f8 100644
--- a/vendor/github.com/NVIDIA/go-nvlib/pkg/nvpci/mlxpci.go
+++ b/vendor/github.com/NVIDIA/go-nvlib/pkg/nvpci/mlxpci.go
@@ -22,15 +22,15 @@ import (
)
const (
- // PCIMellanoxVendorID represents PCI vendor id for Mellanox
+ // PCIMellanoxVendorID represents PCI vendor id for Mellanox.
PCIMellanoxVendorID uint16 = 0x15b3
- // PCINetworkControllerClass represents the PCI class for network controllers
+ // PCINetworkControllerClass represents the PCI class for network controllers.
PCINetworkControllerClass uint32 = 0x020000
- // PCIBridgeClass represents the PCI class for network controllers
+ // PCIBridgeClass represents the PCI class for network controllers.
PCIBridgeClass uint32 = 0x060400
)
-// GetNetworkControllers returns all Mellanox Network Controller PCI devices on the system
+// GetNetworkControllers returns all Mellanox Network Controller PCI devices on the system.
func (p *nvpci) GetNetworkControllers() ([]*NvidiaPCIDevice, error) {
devices, err := p.GetAllDevices()
if err != nil {
@@ -47,7 +47,7 @@ func (p *nvpci) GetNetworkControllers() ([]*NvidiaPCIDevice, error) {
return filtered, nil
}
-// GetPciBridges retrieves all Mellanox PCI(e) Bridges
+// GetPciBridges retrieves all Mellanox PCI(e) Bridges.
func (p *nvpci) GetPciBridges() ([]*NvidiaPCIDevice, error) {
devices, err := p.GetAllDevices()
if err != nil {
@@ -64,17 +64,17 @@ func (p *nvpci) GetPciBridges() ([]*NvidiaPCIDevice, error) {
return filtered, nil
}
-// IsNetworkController if class == 0x300
+// IsNetworkController if class == 0x300.
func (d *NvidiaPCIDevice) IsNetworkController() bool {
return d.Class == PCINetworkControllerClass
}
-// IsPciBridge if class == 0x0604
+// IsPciBridge if class == 0x0604.
func (d *NvidiaPCIDevice) IsPciBridge() bool {
return d.Class == PCIBridgeClass
}
-// IsDPU returns if a device is a DPU
+// IsDPU returns if a device is a DPU.
func (d *NvidiaPCIDevice) IsDPU() bool {
if !strings.Contains(d.DeviceName, "BlueField") {
return false
@@ -87,7 +87,7 @@ func (d *NvidiaPCIDevice) IsDPU() bool {
return false
}
-// GetDPUs returns all Mellanox DPU devices on the system
+// GetDPUs returns all Mellanox DPU devices on the system.
func (p *nvpci) GetDPUs() ([]*NvidiaPCIDevice, error) {
devices, err := p.GetNetworkControllers()
if err != nil {
diff --git a/vendor/github.com/NVIDIA/go-nvlib/pkg/nvpci/mmio/mmio.go b/vendor/github.com/NVIDIA/go-nvlib/pkg/nvpci/mmio/mmio.go
index 1535fa049..88dd7ddf5 100644
--- a/vendor/github.com/NVIDIA/go-nvlib/pkg/nvpci/mmio/mmio.go
+++ b/vendor/github.com/NVIDIA/go-nvlib/pkg/nvpci/mmio/mmio.go
@@ -25,7 +25,7 @@ import (
"github.com/NVIDIA/go-nvlib/pkg/nvpci/bytes"
)
-// Mmio memory map a region
+// Mmio memory map a region.
type Mmio interface {
bytes.Raw
bytes.Reader
@@ -84,12 +84,12 @@ func open(path string, offset int, size int, flags int) (Mmio, error) {
return &mmio{bytes.New(&mmap)}, nil
}
-// OpenRO open region readonly
+// OpenRO open region readonly.
func OpenRO(path string, offset int, size int) (Mmio, error) {
return open(path, offset, size, os.O_RDONLY)
}
-// OpenRW open region read write
+// OpenRW open region read write.
func OpenRW(path string, offset int, size int) (Mmio, error) {
return open(path, offset, size, os.O_RDWR)
}
diff --git a/vendor/github.com/NVIDIA/go-nvlib/pkg/nvpci/mmio/mock.go b/vendor/github.com/NVIDIA/go-nvlib/pkg/nvpci/mmio/mock.go
index 57151b3ff..da3074c17 100644
--- a/vendor/github.com/NVIDIA/go-nvlib/pkg/nvpci/mmio/mock.go
+++ b/vendor/github.com/NVIDIA/go-nvlib/pkg/nvpci/mmio/mock.go
@@ -48,18 +48,21 @@ func mockOpen(source *[]byte, offset int, size int, rw bool) (Mmio, error) {
return m, nil
}
-// MockOpenRO open read only
+// MockOpenRO open read only.
func MockOpenRO(source *[]byte, offset int, size int) (Mmio, error) {
return mockOpen(source, offset, size, false)
}
-// MockOpenRW open read write
+// MockOpenRW open read write.
func MockOpenRW(source *[]byte, offset int, size int) (Mmio, error) {
return mockOpen(source, offset, size, true)
}
func (m *mockMmio) Close() error {
- m = &mockMmio{}
+ m.Bytes = nil
+ m.source = nil
+ m.offset = 0
+ m.rw = false
return nil
}
diff --git a/vendor/github.com/NVIDIA/go-nvlib/pkg/nvpci/mock.go b/vendor/github.com/NVIDIA/go-nvlib/pkg/nvpci/mock.go
index e42271212..9b3d6e2aa 100644
--- a/vendor/github.com/NVIDIA/go-nvlib/pkg/nvpci/mock.go
+++ b/vendor/github.com/NVIDIA/go-nvlib/pkg/nvpci/mock.go
@@ -20,18 +20,20 @@ import (
"fmt"
"os"
"path/filepath"
+ "regexp"
+ "strconv"
"github.com/NVIDIA/go-nvlib/pkg/nvpci/bytes"
)
-// MockNvpci mock pci device
+// MockNvpci mock pci device.
type MockNvpci struct {
*nvpci
}
var _ Interface = (*MockNvpci)(nil)
-// NewMockNvpci create new mock PCI and remove old devices
+// NewMockNvpci create new mock PCI and remove old devices.
func NewMockNvpci() (mock *MockNvpci, rerr error) {
rootDir, err := os.MkdirTemp(os.TempDir(), "")
if err != nil {
@@ -50,69 +52,119 @@ func NewMockNvpci() (mock *MockNvpci, rerr error) {
return mock, nil
}
-// Cleanup remove the mocked PCI devices root folder
+// Cleanup remove the mocked PCI devices root folder.
func (m *MockNvpci) Cleanup() {
os.RemoveAll(m.pciDevicesRoot)
}
-// AddMockA100 Create an A100 like GPU mock device
-func (m *MockNvpci) AddMockA100(address string, numaNode int) error {
+func validatePCIAddress(addr string) error {
+ r := regexp.MustCompile(`0{4}:[0-9a-f]{2}:[0-9a-f]{2}\.[0-9]`)
+ if !r.Match([]byte(addr)) {
+ return fmt.Errorf(`invalid PCI address should match 0{4}:[0-9a-f]{2}:[0-9a-f]{2}\.[0-9]: %s`, addr)
+ }
+
+ return nil
+}
+
+// AddMockA100 Create an A100 like GPU mock device.
+func (m *MockNvpci) AddMockA100(address string, numaNode int, sriov *SriovInfo) error {
+ err := validatePCIAddress(address)
+ if err != nil {
+ return err
+ }
+
deviceDir := filepath.Join(m.pciDevicesRoot, address)
- err := os.MkdirAll(deviceDir, 0755)
+ err = os.MkdirAll(deviceDir, 0755)
if err != nil {
return err
}
- vendor, err := os.Create(filepath.Join(deviceDir, "vendor"))
+ err = createNVIDIAgpuFiles(deviceDir)
if err != nil {
return err
}
- _, err = vendor.WriteString(fmt.Sprintf("0x%x", PCINvidiaVendorID))
+
+ iommuGroup := 20
+ _, err = os.Create(filepath.Join(deviceDir, strconv.Itoa(iommuGroup)))
+ if err != nil {
+ return err
+ }
+ err = os.Symlink(filepath.Join(deviceDir, strconv.Itoa(iommuGroup)), filepath.Join(deviceDir, "iommu_group"))
if err != nil {
return err
}
- class, err := os.Create(filepath.Join(deviceDir, "class"))
+ numa, err := os.Create(filepath.Join(deviceDir, "numa_node"))
if err != nil {
return err
}
- _, err = class.WriteString(fmt.Sprintf("0x%x", PCI3dControllerClass))
+ _, err = numa.WriteString(fmt.Sprintf("%v", numaNode))
if err != nil {
return err
}
- device, err := os.Create(filepath.Join(deviceDir, "device"))
+ if sriov != nil && sriov.PhysicalFunction != nil {
+ totalVFs, err := os.Create(filepath.Join(deviceDir, "sriov_totalvfs"))
+ if err != nil {
+ return err
+ }
+ _, err = fmt.Fprintf(totalVFs, "%d", sriov.PhysicalFunction.TotalVFs)
+ if err != nil {
+ return err
+ }
+
+ numVFs, err := os.Create(filepath.Join(deviceDir, "sriov_numvfs"))
+ if err != nil {
+ return err
+ }
+ _, err = fmt.Fprintf(numVFs, "%d", sriov.PhysicalFunction.NumVFs)
+ if err != nil {
+ return err
+ }
+ for i := 1; i <= int(sriov.PhysicalFunction.NumVFs); i++ {
+ err = m.createVf(address, i, iommuGroup, numaNode)
+ if err != nil {
+ return err
+ }
+ }
+ }
+
+ return nil
+}
+
+func createNVIDIAgpuFiles(deviceDir string) error {
+ vendor, err := os.Create(filepath.Join(deviceDir, "vendor"))
if err != nil {
return err
}
- _, err = device.WriteString("0x20bf")
+ _, err = vendor.WriteString(fmt.Sprintf("0x%x", PCINvidiaVendorID))
if err != nil {
return err
}
- _, err = os.Create(filepath.Join(deviceDir, "nvidia"))
+ class, err := os.Create(filepath.Join(deviceDir, "class"))
if err != nil {
return err
}
- err = os.Symlink(filepath.Join(deviceDir, "nvidia"), filepath.Join(deviceDir, "driver"))
+ _, err = class.WriteString(fmt.Sprintf("0x%x", PCI3dControllerClass))
if err != nil {
return err
}
- _, err = os.Create(filepath.Join(deviceDir, "20"))
+ device, err := os.Create(filepath.Join(deviceDir, "device"))
if err != nil {
return err
}
- err = os.Symlink(filepath.Join(deviceDir, "20"), filepath.Join(deviceDir, "iommu_group"))
+ _, err = device.WriteString("0x20bf")
if err != nil {
return err
}
- numa, err := os.Create(filepath.Join(deviceDir, "numa_node"))
+ _, err = os.Create(filepath.Join(deviceDir, "nvidia"))
if err != nil {
return err
}
- _, err = numa.WriteString(fmt.Sprintf("%v", numaNode))
+ err = os.Symlink(filepath.Join(deviceDir, "nvidia"), filepath.Join(deviceDir, "driver"))
if err != nil {
return err
}
@@ -156,3 +208,53 @@ func (m *MockNvpci) AddMockA100(address string, numaNode int) error {
return nil
}
+
+func (m *MockNvpci) createVf(pfAddress string, id, iommu_group, numaNode int) error {
+ functionID := pfAddress[len(pfAddress)-1]
+ // we are verifying the last character of pfAddress is integer.
+ functionNumber, err := strconv.Atoi(string(functionID))
+ if err != nil {
+ return fmt.Errorf("can't conver physical function pci address function number %s to integer: %v", string(functionID), err)
+ }
+
+ vfFunctionNumber := functionNumber + id
+ vfAddress := pfAddress[:len(pfAddress)-1] + strconv.Itoa(vfFunctionNumber)
+
+ deviceDir := filepath.Join(m.pciDevicesRoot, vfAddress)
+ err = os.MkdirAll(deviceDir, 0755)
+ if err != nil {
+ return err
+ }
+
+ err = createNVIDIAgpuFiles(deviceDir)
+ if err != nil {
+ return err
+ }
+
+ vfIommuGroup := strconv.Itoa(iommu_group + id)
+
+ _, err = os.Create(filepath.Join(deviceDir, vfIommuGroup))
+ if err != nil {
+ return err
+ }
+ err = os.Symlink(filepath.Join(deviceDir, vfIommuGroup), filepath.Join(deviceDir, "iommu_group"))
+ if err != nil {
+ return err
+ }
+
+ numa, err := os.Create(filepath.Join(deviceDir, "numa_node"))
+ if err != nil {
+ return err
+ }
+ _, err = numa.WriteString(fmt.Sprintf("%v", numaNode))
+ if err != nil {
+ return err
+ }
+
+ err = os.Symlink(filepath.Join(m.pciDevicesRoot, pfAddress), filepath.Join(deviceDir, "physfn"))
+ if err != nil {
+ return err
+ }
+
+ return nil
+}
diff --git a/vendor/github.com/NVIDIA/go-nvlib/pkg/nvpci/nvpci.go b/vendor/github.com/NVIDIA/go-nvlib/pkg/nvpci/nvpci.go
index 5210ff504..6ff197b15 100644
--- a/vendor/github.com/NVIDIA/go-nvlib/pkg/nvpci/nvpci.go
+++ b/vendor/github.com/NVIDIA/go-nvlib/pkg/nvpci/nvpci.go
@@ -29,23 +29,23 @@ import (
)
const (
- // PCIDevicesRoot represents base path for all pci devices under sysfs
+ // PCIDevicesRoot represents base path for all pci devices under sysfs.
PCIDevicesRoot = "/sys/bus/pci/devices"
- // PCINvidiaVendorID represents PCI vendor id for NVIDIA
+ // PCINvidiaVendorID represents PCI vendor id for NVIDIA.
PCINvidiaVendorID uint16 = 0x10de
- // PCIVgaControllerClass represents the PCI class for VGA Controllers
+ // PCIVgaControllerClass represents the PCI class for VGA Controllers.
PCIVgaControllerClass uint32 = 0x030000
- // PCI3dControllerClass represents the PCI class for 3D Graphics accellerators
+ // PCI3dControllerClass represents the PCI class for 3D Graphics accellerators.
PCI3dControllerClass uint32 = 0x030200
- // PCINvSwitchClass represents the PCI class for NVSwitches
+ // PCINvSwitchClass represents the PCI class for NVSwitches.
PCINvSwitchClass uint32 = 0x068000
- // UnknownDeviceString is the device name to set for devices not found in the PCI database
+ // UnknownDeviceString is the device name to set for devices not found in the PCI database.
UnknownDeviceString = "UNKNOWN_DEVICE"
- // UnknownClassString is the class name to set for devices not found in the PCI database
+ // UnknownClassString is the class name to set for devices not found in the PCI database.
UnknownClassString = "UNKNOWN_CLASS"
)
-// Interface allows us to get a list of all NVIDIA PCI devices
+// Interface allows us to get a list of all NVIDIA PCI devices.
type Interface interface {
GetAllDevices() ([]*NvidiaPCIDevice, error)
Get3DControllers() ([]*NvidiaPCIDevice, error)
@@ -59,10 +59,10 @@ type Interface interface {
GetDPUs() ([]*NvidiaPCIDevice, error)
}
-// MemoryResources a more human readable handle
+// MemoryResources a more human readable handle.
type MemoryResources map[int]*MemoryResource
-// ResourceInterface exposes some higher level functions of resources
+// ResourceInterface exposes some higher level functions of resources.
type ResourceInterface interface {
GetTotalAddressableMemory(bool) (uint64, uint64)
}
@@ -76,7 +76,33 @@ type nvpci struct {
var _ Interface = (*nvpci)(nil)
var _ ResourceInterface = (*MemoryResources)(nil)
-// NvidiaPCIDevice represents a PCI device for an NVIDIA product
+// SriovInfo indicates whether device is VF/PF for SRIOV capable devices.
+// Only one should be set at any given time.
+type SriovInfo struct {
+ PhysicalFunction *SriovPhysicalFunction
+ VirtualFunction *SriovVirtualFunction
+}
+
+// SriovPhysicalFunction stores info about SRIOV physical function.
+type SriovPhysicalFunction struct {
+ TotalVFs uint64
+ NumVFs uint64
+}
+
+// SriovVirtualFunction keeps data about SRIOV virtual function.
+type SriovVirtualFunction struct {
+ PhysicalFunction *NvidiaPCIDevice
+}
+
+func (s *SriovInfo) IsPF() bool {
+ return s != nil && s.PhysicalFunction != nil
+}
+
+func (s *SriovInfo) IsVF() bool {
+ return s != nil && s.VirtualFunction != nil
+}
+
+// NvidiaPCIDevice represents a PCI device for an NVIDIA product.
type NvidiaPCIDevice struct {
Path string
Address string
@@ -90,37 +116,37 @@ type NvidiaPCIDevice struct {
NumaNode int
Config *ConfigSpace
Resources MemoryResources
- IsVF bool
+ SriovInfo SriovInfo
}
-// IsVGAController if class == 0x300
+// IsVGAController if class == 0x300.
func (d *NvidiaPCIDevice) IsVGAController() bool {
return d.Class == PCIVgaControllerClass
}
-// Is3DController if class == 0x302
+// Is3DController if class == 0x302.
func (d *NvidiaPCIDevice) Is3DController() bool {
return d.Class == PCI3dControllerClass
}
-// IsNVSwitch if class == 0x068
+// IsNVSwitch if class == 0x068.
func (d *NvidiaPCIDevice) IsNVSwitch() bool {
return d.Class == PCINvSwitchClass
}
-// IsGPU either VGA for older cards or 3D for newer
+// IsGPU either VGA for older cards or 3D for newer.
func (d *NvidiaPCIDevice) IsGPU() bool {
return d.IsVGAController() || d.Is3DController()
}
// IsResetAvailable some devices can be reset without rebooting,
-// check if applicable
+// check if applicable.
func (d *NvidiaPCIDevice) IsResetAvailable() bool {
_, err := os.Stat(path.Join(d.Path, "reset"))
return err == nil
}
-// Reset perform a reset to apply a new configuration at HW level
+// Reset perform a reset to apply a new configuration at HW level.
func (d *NvidiaPCIDevice) Reset() error {
err := os.WriteFile(path.Join(d.Path, "reset"), []byte("1"), 0)
if err != nil {
@@ -129,7 +155,7 @@ func (d *NvidiaPCIDevice) Reset() error {
return nil
}
-// New interface that allows us to get a list of all NVIDIA PCI devices
+// New interface that allows us to get a list of all NVIDIA PCI devices.
func New(opts ...Option) Interface {
n := &nvpci{}
for _, opt := range opts {
@@ -144,10 +170,10 @@ func New(opts ...Option) Interface {
return n
}
-// Option defines a function for passing options to the New() call
+// Option defines a function for passing options to the New() call.
type Option func(*nvpci)
-// WithLogger provides an Option to set the logger for the library
+// WithLogger provides an Option to set the logger for the library.
func WithLogger(logger logger) Option {
return func(n *nvpci) {
n.logger = logger
@@ -170,7 +196,7 @@ func WithPCIDatabasePath(path string) Option {
}
}
-// GetAllDevices returns all Nvidia PCI devices on the system
+// GetAllDevices returns all Nvidia PCI devices on the system.
func (p *nvpci) GetAllDevices() ([]*NvidiaPCIDevice, error) {
deviceDirs, err := os.ReadDir(p.pciDevicesRoot)
if err != nil {
@@ -178,9 +204,11 @@ func (p *nvpci) GetAllDevices() ([]*NvidiaPCIDevice, error) {
}
var nvdevices []*NvidiaPCIDevice
+ // Cache devices for each GetAllDevices invocation to speed things up.
+ cache := make(map[string]*NvidiaPCIDevice)
for _, deviceDir := range deviceDirs {
deviceAddress := deviceDir.Name()
- nvdevice, err := p.GetGPUByPciBusID(deviceAddress)
+ nvdevice, err := p.getGPUByPciBusID(deviceAddress, cache)
if err != nil {
return nil, fmt.Errorf("error constructing NVIDIA PCI device %s: %v", deviceAddress, err)
}
@@ -204,8 +232,18 @@ func (p *nvpci) GetAllDevices() ([]*NvidiaPCIDevice, error) {
return nvdevices, nil
}
-// GetGPUByPciBusID constructs an NvidiaPCIDevice for the specified address (PCI Bus ID)
+// GetGPUByPciBusID constructs an NvidiaPCIDevice for the specified address (PCI Bus ID).
func (p *nvpci) GetGPUByPciBusID(address string) (*NvidiaPCIDevice, error) {
+ // Pass nil as to force reading device information from sysfs.
+ return p.getGPUByPciBusID(address, nil)
+}
+
+func (p *nvpci) getGPUByPciBusID(address string, cache map[string]*NvidiaPCIDevice) (*NvidiaPCIDevice, error) {
+ if cache != nil {
+ if pciDevice, exists := cache[address]; exists {
+ return pciDevice, nil
+ }
+ }
devicePath := filepath.Join(p.pciDevicesRoot, address)
vendor, err := os.ReadFile(path.Join(devicePath, "vendor"))
@@ -265,16 +303,6 @@ func (p *nvpci) GetGPUByPciBusID(address string) (*NvidiaPCIDevice, error) {
return nil, fmt.Errorf("unable to detect iommu_group for %s: %v", address, err)
}
- // device is a virtual function (VF) if "physfn" symlink exists
- var isVF bool
- _, err = filepath.EvalSymlinks(path.Join(devicePath, "physfn"))
- if err == nil {
- isVF = true
- }
- if err != nil && !os.IsNotExist(err) {
- return nil, fmt.Errorf("unable to resolve %s: %v", path.Join(devicePath, "physfn"), err)
- }
-
numa, err := os.ReadFile(path.Join(devicePath, "numa_node"))
if err != nil {
return nil, fmt.Errorf("unable to read PCI NUMA node for %s: %v", address, err)
@@ -328,6 +356,28 @@ func (p *nvpci) GetGPUByPciBusID(address string) (*NvidiaPCIDevice, error) {
className = UnknownClassString
}
+ var sriovInfo SriovInfo
+ // Device is a virtual function (VF) if "physfn" symlink exists.
+ physFnAddress, err := filepath.EvalSymlinks(path.Join(devicePath, "physfn"))
+ if err == nil {
+ physFn, err := p.getGPUByPciBusID(filepath.Base(physFnAddress), cache)
+ if err != nil {
+ return nil, fmt.Errorf("unable to detect physfn for %s: %v", address, err)
+ }
+ sriovInfo = SriovInfo{
+ VirtualFunction: &SriovVirtualFunction{
+ PhysicalFunction: physFn,
+ },
+ }
+ } else if os.IsNotExist(err) {
+ sriovInfo, err = p.getSriovInfoForPhysicalFunction(devicePath)
+ if err != nil {
+ return nil, fmt.Errorf("unable to read SRIOV physical function details for %s: %v", devicePath, err)
+ }
+ } else {
+ return nil, fmt.Errorf("unable to read %s: %v", path.Join(devicePath, "physfn"), err)
+ }
+
nvdevice := &NvidiaPCIDevice{
Path: devicePath,
Address: address,
@@ -339,15 +389,20 @@ func (p *nvpci) GetGPUByPciBusID(address string) (*NvidiaPCIDevice, error) {
NumaNode: int(numaNode),
Config: config,
Resources: resources,
- IsVF: isVF,
DeviceName: deviceName,
ClassName: className,
+ SriovInfo: sriovInfo,
+ }
+
+ // Cache physical functions only as VF can't be a root device.
+ if cache != nil && sriovInfo.IsPF() {
+ cache[address] = nvdevice
}
return nvdevice, nil
}
-// Get3DControllers returns all NVIDIA 3D Controller PCI devices on the system
+// Get3DControllers returns all NVIDIA 3D Controller PCI devices on the system.
func (p *nvpci) Get3DControllers() ([]*NvidiaPCIDevice, error) {
devices, err := p.GetAllDevices()
if err != nil {
@@ -364,7 +419,7 @@ func (p *nvpci) Get3DControllers() ([]*NvidiaPCIDevice, error) {
return filtered, nil
}
-// GetVGAControllers returns all NVIDIA VGA Controller PCI devices on the system
+// GetVGAControllers returns all NVIDIA VGA Controller PCI devices on the system.
func (p *nvpci) GetVGAControllers() ([]*NvidiaPCIDevice, error) {
devices, err := p.GetAllDevices()
if err != nil {
@@ -381,7 +436,7 @@ func (p *nvpci) GetVGAControllers() ([]*NvidiaPCIDevice, error) {
return filtered, nil
}
-// GetNVSwitches returns all NVIDIA NVSwitch PCI devices on the system
+// GetNVSwitches returns all NVIDIA NVSwitch PCI devices on the system.
func (p *nvpci) GetNVSwitches() ([]*NvidiaPCIDevice, error) {
devices, err := p.GetAllDevices()
if err != nil {
@@ -398,7 +453,7 @@ func (p *nvpci) GetNVSwitches() ([]*NvidiaPCIDevice, error) {
return filtered, nil
}
-// GetGPUs returns all NVIDIA GPU devices on the system
+// GetGPUs returns all NVIDIA GPU devices on the system.
func (p *nvpci) GetGPUs() ([]*NvidiaPCIDevice, error) {
devices, err := p.GetAllDevices()
if err != nil {
@@ -407,7 +462,7 @@ func (p *nvpci) GetGPUs() ([]*NvidiaPCIDevice, error) {
var filtered []*NvidiaPCIDevice
for _, d := range devices {
- if d.IsGPU() && !d.IsVF {
+ if d.IsGPU() && !d.SriovInfo.IsVF() {
filtered = append(filtered, d)
}
}
@@ -415,7 +470,7 @@ func (p *nvpci) GetGPUs() ([]*NvidiaPCIDevice, error) {
return filtered, nil
}
-// GetGPUByIndex returns an NVIDIA GPU device at a particular index
+// GetGPUByIndex returns an NVIDIA GPU device at a particular index.
func (p *nvpci) GetGPUByIndex(i int) (*NvidiaPCIDevice, error) {
gpus, err := p.GetGPUs()
if err != nil {
@@ -428,3 +483,41 @@ func (p *nvpci) GetGPUByIndex(i int) (*NvidiaPCIDevice, error) {
return gpus[i], nil
}
+
+func (p *nvpci) getSriovInfoForPhysicalFunction(devicePath string) (sriovInfo SriovInfo, err error) {
+ totalVfsPath := filepath.Join(devicePath, "sriov_totalvfs")
+ numVfsPath := filepath.Join(devicePath, "sriov_numvfs")
+
+ // No file for sriov_totalvfs exists? Not an SRIOV device, return nil
+ _, err = os.Stat(totalVfsPath)
+ if err != nil && os.IsNotExist(err) {
+ return sriovInfo, nil
+ }
+ sriovTotalVfs, err := os.ReadFile(totalVfsPath)
+ if err != nil {
+ return sriovInfo, fmt.Errorf("unable to read sriov_totalvfs: %v", err)
+ }
+ totalVfsStr := strings.TrimSpace(string(sriovTotalVfs))
+ totalVfsInt, err := strconv.ParseUint(totalVfsStr, 10, 16)
+ if err != nil {
+ return sriovInfo, fmt.Errorf("unable to convert sriov_totalvfs to uint64: %v", err)
+ }
+
+ sriovNumVfs, err := os.ReadFile(numVfsPath)
+ if err != nil {
+ return sriovInfo, fmt.Errorf("unable to read sriov_numvfs for: %v", err)
+ }
+ numVfsStr := strings.TrimSpace(string(sriovNumVfs))
+ numVfsInt, err := strconv.ParseUint(numVfsStr, 10, 16)
+ if err != nil {
+ return sriovInfo, fmt.Errorf("unable to convert sriov_numvfs to uint64: %v", err)
+ }
+
+ sriovInfo = SriovInfo{
+ PhysicalFunction: &SriovPhysicalFunction{
+ TotalVFs: totalVfsInt,
+ NumVFs: numVfsInt,
+ },
+ }
+ return sriovInfo, nil
+}
diff --git a/vendor/github.com/NVIDIA/go-nvlib/pkg/nvpci/resources.go b/vendor/github.com/NVIDIA/go-nvlib/pkg/nvpci/resources.go
index 6c6e53eec..b3b7d3155 100644
--- a/vendor/github.com/NVIDIA/go-nvlib/pkg/nvpci/resources.go
+++ b/vendor/github.com/NVIDIA/go-nvlib/pkg/nvpci/resources.go
@@ -29,7 +29,7 @@ const (
pmcBigEndian = 0x01000001
)
-// MemoryResource represents a mmio region
+// MemoryResource represents a mmio region.
type MemoryResource struct {
Start uintptr
End uintptr
@@ -37,7 +37,7 @@ type MemoryResource struct {
Path string
}
-// OpenRW read write mmio region
+// OpenRW read write mmio region.
func (mr *MemoryResource) OpenRW() (mmio.Mmio, error) {
rw, err := mmio.OpenRW(mr.Path, 0, int(mr.End-mr.Start+1))
if err != nil {
@@ -52,7 +52,7 @@ func (mr *MemoryResource) OpenRW() (mmio.Mmio, error) {
return nil, fmt.Errorf("unknown endianness for mmio: %v", err)
}
-// OpenRO read only mmio region
+// OpenRO read only mmio region.
func (mr *MemoryResource) OpenRO() (mmio.Mmio, error) {
ro, err := mmio.OpenRO(mr.Path, 0, int(mr.End-mr.Start+1))
if err != nil {
@@ -67,7 +67,7 @@ func (mr *MemoryResource) OpenRO() (mmio.Mmio, error) {
return nil, fmt.Errorf("unknown endianness for mmio: %v", err)
}
-// From Bit Twiddling Hacks, great resource for all low level bit manipulations
+// From Bit Twiddling Hacks, great resource for all low level bit manipulations.
func calcNextPowerOf2(n uint64) uint64 {
n--
n |= n >> 1
@@ -83,7 +83,7 @@ func calcNextPowerOf2(n uint64) uint64 {
// GetTotalAddressableMemory will accumulate the 32bit and 64bit memory windows
// of each BAR and round the value if needed to the next power of 2; first
-// return value is the accumulated 32bit addresable memory size the second one
+// return value is the accumulated 32bit addressable memory size the second one
// is the accumulated 64bit addressable memory size in bytes. These values are
// needed to configure virtualized environments.
func (mrs MemoryResources) GetTotalAddressableMemory(roundUp bool) (uint64, uint64) {
diff --git a/vendor/github.com/NVIDIA/go-nvlib/pkg/pciids/default_pci.ids b/vendor/github.com/NVIDIA/go-nvlib/pkg/pciids/default_pci.ids
index 50115d979..f16867890 100644
--- a/vendor/github.com/NVIDIA/go-nvlib/pkg/pciids/default_pci.ids
+++ b/vendor/github.com/NVIDIA/go-nvlib/pkg/pciids/default_pci.ids
@@ -1,8 +1,8 @@
#
# List of PCI ID's
#
-# Version: 2023.09.22
-# Date: 2023-09-22 03:15:02
+# Version: 2024.06.23
+# Date: 2024-06-23 03:15:02
#
# Maintained by Albert Pool, Martin Mares, and other volunteers from
# the PCI ID Project at https://pci-ids.ucw.cz/.
@@ -48,7 +48,11 @@
7a15 Vivante GPU (Graphics Processing Unit)
7a19 PCI-to-PCI Bridge
7a24 OHCI USB Controller
+# Found on 7A2000 PCH
+ 7a25 LG100 GPU
7a29 PCI-to-PCI Bridge
+# Found on 7A2000 PCH
+ 7a36 Display Controller
0018 Fn-Link Technology Limited
6252 6252CPUB 802.11ax PCIe Wireless Network Adapter
001c PEAK-System Technik GmbH
@@ -82,16 +86,37 @@
8139 HNE-300 (RealTek RTL8139c) [iPaq Networking]
025e Solidigm
0b60 NVMe DC SSD [Sentinel Rock Plus controller]
+ 025e 8008 NVMe DC SSD U.2 15mm [D7-P5510]
025e 8208 NVMe DC SSD U.2 15mm [D7-P5810]
+ 025e 8d1d NVMe DC SSD E1.L 9.5mm [D5-P5316]
+ 025e 9008 NVMe DC SSD U.2 15mm [D7-P5520]
+ 025e 900c NVMe DC SSD E1.S 9.5mm [D7-P5520]
+ 025e 900d NVMe DC SSD E1.S 15mm [D7-P5520]
+ 025e 901c NVMe DC SSD E1.L 9.5mm [D7-P5520]
+ 025e 9108 NVMe DC SSD U.2 15mm [D7-P5620]
+ 025e c008 NVMe DC SSD U.2 15mm [D5-P5530]
025e d408 NVMe DC SSD U.2 15mm [D5-P5430]
025e d40c NVMe DC SSD E1.S 9.5mm [D5-P5430]
025e d419 NVMe DC SSD E3.S 7.5mm [D5-P5430]
025e d808 NVMe DC SSD U.2 15mm [D5-P5336]
025e d819 NVMe DC SSD E3.S 7.5mm [D5-P5336]
+ 025e d81c NVMe DC SSD E1.L 18mm [D5-P5336]
025e d81d NVMe DC SSD E1.L 9.5mm [D5-P5336]
0b70 NVMe DC SSD [Yorktown controller]
- f1ab P41 Plus NVMe SSD (DRAM-less)
- f1ac P44 Pro NVMe SSD
+ 2b59 NVMe DC SSD [Atomos Prime]
+ 025e 0008 NVMe DC SSD U.2-SFF 15mm [D7-PS1010]
+ 025e 0019 NVMe DC SSD E3.S-1T 7.5mm [D7-PS1010]
+ 025e 0108 NVMe DC SSD U.2-SFF 15mm [D7-PS1030]
+ 025e 0119 NVMe DC SSD E3.S-1T 7.5mm [D7-PS1030]
+ 108e 48a0 NVMe DC SSD U.2-SFF 15mm 3.84TB [D7-PS1010 Custom]
+ 108e 48a1 NVMe DC SSD U.2-SFF 15mm 7.68TB [D7-PS1010 Custom]
+ 108e 48a2 NVMe DC SSD U.2-SFF 15mm 15.36TB [D7-PS1010 Custom]
+ 108e 48a3 NVMe DC SSD Add-In-Card [D7-PS1030 Custom]
+ 108e 48a4 NVMe DC SSD E3.S-1T 7.5mm 3.84TB [D7-PS1010 Custom]
+ 108e 48a5 NVMe DC SSD E3.S-1T 7.5mm 7.68TB [D7-PS1010 Custom]
+ 108e 48a6 NVMe DC SSD E3.S-1T 7.5mm 15.36TB [D7-PS1010 Custom]
+ f1ab P41 Plus NVMe SSD (DRAM-less) [Echo Harbor]
+ f1ac P44 Pro NVMe SSD [Hollywood Beach]
0270 Hauppauge computer works Inc. (Wrong ID)
0291 Davicom Semiconductor, Inc. (Wrong ID)
# SpeedStream is Efficient Networks, Inc, a Siemens Company
@@ -177,6 +202,12 @@
0a06 RCB672FXX 672-channel modular analog telephony card
0bae Bachmann electronic GmbH
0ccd Preferred Networks, Inc.
+ 0110 MN-Core
+ 0120 MN-Core 2
+ 0ccd 0000 MN-Core 2 16GB
+ 0ccd 0010 MN-Core 2 32GB
+ 0200 MN-Core Direct Connect
+ 0201 MN-Core 2 Middle-plane
0e11 Compaq Computer Corporation
0001 PCI to EISA Bridge
0002 PCI to ISA Bridge
@@ -434,6 +465,7 @@
1028 1f07 SAS 5/iR Integrated RAID Controller
1028 1f08 SAS 5/iR Integrated RAID Controller
1028 1f09 SAS 5/iR Adapter RAID Controller
+ 103c 3228 SAS3080X-HP 8-port PCI-X 133MHz Host Bus Adapter with 2xSFF-8484
15ad 1976 SAS Controller
0055 SAS1068 PCI-X Fusion-MPT SAS
1033 8336 SAS1068
@@ -595,6 +627,7 @@
1bd4 000e 6G SAS2008IR
1bd4 000f 6G SAS2008IT SA5248
1bd4 0010 6G SAS2008IR SA5248
+ 4c52 96c8 LRSA96C8 8-Port SATA3(6Gb/s)Exchange Adapter (with Raid)
8086 350f RMS2LL040 RAID Controller
8086 3700 SSD 910 Series
0073 MegaRAID SAS 2008 [Falcon]
@@ -744,7 +777,7 @@
1bd4 0026 12G SAS3008IT RACK
1bd4 0027 12G SAS3008IMR RACK
1bd4 0028 12G SAS3008IR RACK
- 00a5 Fusion-MPT 24GSAS/PCIe SAS40xx
+ 00a5 Fusion-MPT 24GSAS/PCIe SAS40xx/41xx
1000 4600 MegaRAID 9670W-16i Tri-Mode Storage Adapter
1000 4610 MegaRAID 9670-24i Tri-Mode Storage Adapter
1000 4620 MegaRAID 9660-16i Tri-Mode Storage Adapter
@@ -773,6 +806,9 @@
1028 2142 HBA465e Adapter
1028 2209 HBA465i Adapter
1028 220a HBA465i Front
+ 1028 22cb PERC H365i Front
+ 1028 22cc PERC H965i Front
+ 1028 22cd HBA465i Front
15d9 1d03 AOC-S4116L-H16IR (16DD/96DD) RAID Adapter
15d9 1d07 AOC-S4016L-L16IT Storage Adapter
15d9 1d08 AOC-S4016L-L16IR Storage Adapter
@@ -816,6 +852,24 @@
1000 5021 eHBA 9700W-16i 24G SAS/PCIe Storage Adapter
# 9700 16 external port Storage controller
1000 5030 eHBA 9700-16e 24G SAS/PCIe Storage Adapter
+ 1028 22d2 PERC H975i Front
+ 1028 22d3 PERC H975i Adapter
+ 1d49 020b ThinkSystem 460-16e SAS/SATA PCIe Gen5 24Gb HBA
+ 00b5 Fusion-MPT 24G SAS/PCIe SAS50xx/SAS51xx
+# 9760W 32 internal port RAID controller
+ 1000 5000 MegaRAID 9760W-32i 24G SAS/PCIe Storage Adapter
+# 9760W 16 internal port RAID controller
+ 1000 5001 MegaRAID 9760W-16i 24G SAS/PCIe Storage Adapter
+# 9760W 16 internal and 16 external port RAID controller
+ 1000 5010 MegaRAID 9760W-16i16e 24G SAS/PCIe Storage Adapter
+# 9700W 32 internal port Storage controller
+ 1000 5020 eHBA 9700W-32i 24G SAS/PCIe Storage Adapter
+# 9700W 16 internal port Storage controller
+ 1000 5021 eHBA 9700W-16i 24G SAS/PCIe Storage Adapter
+# 9700 16 external port Storage controller
+ 1000 5030 eHBA 9700-16e 24G SAS/PCIe Storage Adapter
+# Broadcom next-gen MPT PCIe switch
+ 00b8 Fusion-MPT Switch SAS50xx/SAS51xx
00be SAS3504 Fusion-MPT Tri-Mode RAID On Chip (ROC)
00bf SAS3404 Fusion-MPT Tri-Mode I/O Controller Chip (IOC)
00c0 SAS3324 PCI-Express Fusion-MPT SAS-3
@@ -1040,6 +1094,12 @@
10e4 MegaRAID 12GSAS/PCIe Unsupported SAS38xx
10e5 MegaRAID 12GSAS/PCIe SAS38xx
10e6 MegaRAID 12GSAS/PCIe Secure SAS38xx
+ 1000 04d9 3808N iMR ROMB
+ 1000 04da 3808N iMR ROMB
+ 1000 04db 3808N iMR ROMB
+ 1000 04dc 3808N iMR ROMB
+ 1000 04dd 3808N iMR ROMB
+ 1000 40d8 MegaRAID 9524-8i
1000 40e0 MegaRAID 9540-2M2
1028 2172 PERC H355 Adapter
1028 2173 PERC H355 Front
@@ -1052,9 +1112,12 @@
15d9 1c6e AOC-SLG4-2H8M2 Storage Adapter
1d49 0505 ThinkSystem RAID 540-8i PCIe Gen4 12Gb Adapter
1d49 0506 ThinkSystem RAID 540-16i PCIe Gen4 12Gb Adapter
+ 1d49 0507 ThinkSystem RAID 545-8i PCIe Gen4 12Gb Adapter
1d49 0700 ThinkSystem M.2 RAID B540i-2i SATA/NVMe Enablement Kit
1d49 0701 ThinkSystem 7mm RAID B540p-2HS SATA/NVMe Enablement Kit
1d49 0702 ThinkSystem M.2 RAID B540p-2HS SATA/NVMe Enablement Kit
+ 1d49 0703 ThinkSystem M.2 RAID B540d-2HS SATA/NVMe Enablement Kit
+ 1d49 0704 ThinkSystem M.2 RAID B545i-2i SATA/NVMe Enablement Kit
10e7 MegaRAID 12GSAS/PCIe Unsupported SAS38xx
1960 MegaRAID
1000 0518 MegaRAID 518 SCSI 320-2 Controller
@@ -1070,6 +1133,7 @@
8086 0520 MegaRAID RAID Controller SRCU41L
8086 0523 MegaRAID RAID Controller SRCS16
3050 SAS2008 PCI-Express Fusion-MPT SAS-2
+ 3150 1068e
6001 DX1 Multiformat Broadcast HD/SD Encoder/Decoder
c010 PEX880xx PCIe Gen 4 Switch
1000 100b PEX88000 PCIe Gen 4 Virtual Upstream/Downstream Port
@@ -1081,6 +1145,7 @@
1000 a064 PEX88064 64 lane/port PCIe Gen 4 Switch
1000 a080 PEX88080 80 lane/port PCIe Gen 4 Switch
1000 a096 PEX88096 98 lane/port PCIe Gen 4.0 Switch
+ 4c52 9f48 LRNV9F48 4-port Built-in 8654 NVMe Switching Adapter
c012 PEX880xx PCIe Gen 4 Switch
# Virtual endpoint used in Broadcom synthetic PCIe switches for resource reservation
1000 100b PEX88000 PCIe Gen 4 Virtual Upstream/Downstream Port
@@ -1166,6 +1231,8 @@
13e9 Ariel/Navi10Lite
13f9 Oberon/Navi12Lite
13fe Cyan Skillfish [BC-250]
+# Used in the Steam Deck OLED
+ 1435 Sephiroth [AMD Custom GPU 0405]
145a Dummy Function (absent graphics controller)
1478 Navi 10 XL Upstream Port of PCI Express Switch
1479 Navi 10 XL Downstream Port of PCI Express Switch
@@ -1201,11 +1268,11 @@
103c 8b17 ProBook 445 G9/455 G9 [Ryzen 7 Integrated Radeon GPU]
15ff Fenghuang [Zhongshan Subor Z+]
1607 Arden
- 1636 Renoir
+ 1636 Renoir [Radeon Vega Series / Radeon Vega Mobile Series]
1637 Renoir Radeon High Definition Audio Controller
1638 Cezanne [Radeon Vega Series / Radeon Vega Mobile Series]
1043 16c2 Radeon Vega 8
-# Used in the Steam Deck
+# Used in the Steam Deck LCD
163f VanGogh [AMD Custom GPU 0405]
1640 Rembrandt Radeon High Definition Audio Controller
164c Lucienne
@@ -1215,6 +1282,8 @@
1681 Rembrandt [Radeon 680M]
1714 BeaverCreek HDMI Audio [Radeon HD 6500D and 6400G-6600G series]
103c 168b ProBook 4535s
+ 1900 Phoenix3
+ 1901 Phoenix4
3150 RV380/M24 [Mobility Radeon X600]
103c 0934 nx8220
3151 RV380 GL [FireMV 2400]
@@ -2724,6 +2793,7 @@
1028 2120 Radeon HD 6450
103c 2128 Radeon HD 6450
103c 2aee Radeon HD 7450A
+ 1043 047b EAH6450 SILENT/DI/1GD3(LP)
1092 6450 Radeon HD 6450
1462 2125 Radeon HD 6450
1462 2346 Radeon HD 7450
@@ -3721,6 +3791,7 @@
6980 Polaris12
6981 Lexa XT [Radeon PRO WX 3200]
6985 Lexa XT [Radeon PRO WX 3100]
+ 103c 83b5 Radeon PRO WX 3100
6986 Polaris12
6987 Lexa [Radeon 540X/550X/630 / RX 640 / E9171 MCM]
698f Lexa XT [Radeon PRO WX 3100 / Barco MXRT 4700]
@@ -3728,6 +3799,7 @@
699f Lexa PRO [Radeon 540/540X/550/550X / RX 540X/550/550X]
1028 1720 Radeon RX 550X
148c 2380 Lexa XL [Radeon RX 550]
+ 17aa 5069 Thinkpad E480/E580
1da2 e367 Lexa PRO [Radeon RX 550]
69a0 Vega 12
69a1 Vega 12
@@ -3909,8 +3981,11 @@
73a3 Navi 21 GL-XL [Radeon PRO W6800]
73a4 Navi 21 USB
73a5 Navi 21 [Radeon RX 6950 XT]
+# Reference
+ 1002 0e3a Radeon RX 6950 XT
1849 5230 Navi 21 [ASRock OC Forumla Radeon RX 6950XT]
1da2 441d Navi 21 [Sapphire Nitro+ Radeon RX 6950 XT]
+ 1eae 6950 Navi 21 [XFX Speedster MERC319 Radeon RX 6950 XT]
73ab Navi 21 Pro-XLA [Radeon Pro W6800X/Radeon Pro W6800X Duo]
73ae Navi 21 [Radeon Pro V620 MxGPU]
73af Navi 21 [Radeon RX 6900 XT]
@@ -3930,11 +4005,17 @@
1043 16c2 Radeon RX 6800M
1458 2408 Radeon RX 6750 XT GAMING OC 12G
1462 3980 Radeon RX 6700 XT Mech 2X 12G [MSI]
+ 148c 2409 Red Devil RX 6700 XT
+# Dual fan version
+ 1849 5210 Radeon RX 6700 XT Challenger D
1849 5219 Radeon RX 6700 XT Challenger D
1849 5222 RX 6700 XT Challenger D OC
+# Gaming 1440/QHD Overclock edition with 12 Gb GDDR6 and PCIe 4.0 of Radeon RX 6700 XT by Sapphire PULSE manufactured on autumn 2022 / C1 reviseion
+ 1da2 445e Radeon RX 6700 XT GAMING OC 12G [Sapphire PULSE]
1da2 465e Radeon RX 6750 XT PULSE OC
1da2 e445 Sapphire Radeon RX 6700
1eae 6601 Speedster QICK 319 RX 6700 XT
+ 1eae 661a Radeon RX 6700 [SPEEDSTER SWFT 309]
73e0 Navi 23
73e1 Navi 23 WKS-XM [Radeon PRO W6600M]
73e3 Navi 23 WKS-XL [Radeon PRO W6600]
@@ -3944,6 +4025,7 @@
1849 5236 RX 6650 XT Challenger D OC
73f0 Navi 33 [Radeon RX 7600M XT]
73ff Navi 23 [Radeon RX 6600/6600 XT/6600M]
+ 1462 5021 MSI RX 6600XT MECH 2X
1462 5022 RX 6600 MECH 2X
148c 2412 PowerColor Red Devil RX 6600 XT
1849 5218 Radeon RX 6600 Challenger ITX 8GB
@@ -3959,16 +4041,24 @@
1da2 e457 PULSE AMD Radeon RX 6500 XT
7446 Navi 31 USB
7448 Navi 31 [Radeon Pro W7900]
- 744c Navi 31 [Radeon RX 7900 XT/7900 XTX]
+ 744c Navi 31 [Radeon RX 7900 XT/7900 XTX/7900M]
+ 1002 0e3b RX 7900 GRE [XFX]
+ 1043 0506 TUF Gaming Radeon RX 7900 XTX OC
+ 1849 5304 Radeon RX 7900 XTX
1da2 471e PULSE RX 7900 XTX
+ 1da2 475e PULSE RX 7900 GRE
1da2 e471 NITRO+ RX 7900 XTX Vapor-X
1eae 7901 RX-79XMERCB9 [SPEEDSTER MERC 310 RX 7900 XTX]
745e Navi 31 [Radeon Pro W7800]
+ 7460 7460 Navi32 GL-XL [AMD Radeon PRO V710]
+ 7470 Navi 32 [Radeon PRO W7700]
747e Navi 32 [Radeon RX 7700 XT / 7800 XT]
- 7480 Navi 33 [Radeon RX 7700S/7600/7600S/7600M XT/PRO W7600]
+ 7480 Navi 33 [Radeon RX 7600/7600 XT/7600M XT/7600S/7700S / PRO W7600]
1849 5313 RX 7600 Challenger OC
7483 Navi 33 [Radeon RX 7600M/7600M XT]
7489 Navi 33 [Radeon Pro W7500]
+ 74a0 Aqua Vanjaram [Instinct MI300A]
+ 74a1 Aqua Vanjaram [Instinct MI300X]
7833 RS350 Host Bridge
7834 RS350 [Radeon 9100 PRO/XT IGP]
7835 RS350M [Mobility Radeon 9000 IGP]
@@ -4352,6 +4442,7 @@
aa90 Turks HDMI Audio [Radeon HD 6500/6600 / 6700M Series]
1028 04a3 Precision M4600
aa98 Caicos HDMI Audio [Radeon HD 6450 / 7450/8450/8490 OEM / R5 230/235/235X OEM]
+ 1043 aa98 EAH6450 SILENT/DI/1GD3(LP)
174b aa98 Radeon HD 6450 1GB DDR3
aaa0 Tahiti HDMI Audio [Radeon HD 7870 XT / 7950/7970]
aab0 Oland/Hainan/Cape Verde/Pitcairn HDMI Audio [Radeon HD 7000 Series]
@@ -4815,6 +4906,7 @@
1014 04fb PCIe3 x16 20GB Cache 12Gb Quad SAS RAID+ Adapter(580B)
1014 04fc PCIe3 x8 12Gb Quad SAS RAID+ Adapter(580A)
04ed Internal Shared Memory (ISM) virtual PCI device
+ 0611 4769 Cryptographic Adapter
3022 QLA3022 Network Adapter
4022 QLA3022 Network Adapter
ffff MPIC-2 interrupt controller
@@ -4824,6 +4916,7 @@
5343 SPEA 3D Accelerator
1018 Unisys Systems
1019 Elitegroup Computer Systems
+ 9602 RS780/RS880 PCI to PCI bridge (int gfx)
101a AT&T GIS (NCR)
0005 100VG ethernet
0007 BYNET BIC4G/2C/2G
@@ -5281,7 +5374,7 @@
1646 VanGogh IOMMU
1647 VanGogh PCIe GPP Bridge
1648 VanGogh Internal PCIe GPP Bridge to Bus
- 1649 VanGogh PSP/CCP
+ 1649 Family 19h PSP/CCP
164f Milan IOMMU
1650 Milan Data Fabric; Function 0
1651 Milan Data Fabric; Function 1
@@ -5334,7 +5427,7 @@
1716 Family 12h/14h Processor Function 5
1718 Family 12h/14h Processor Function 6
1719 Family 12h/14h Processor Function 7
- 2000 79c970 [PCnet32 LANCE]
+ 2000 79C97x [PCnet32 LANCE]
1014 2000 NetFinity 10/100 Fast Ethernet
1022 2000 PCnet - Fast 79C971
103c 104c Ethernet with LAN remote power Adapter
@@ -5348,7 +5441,7 @@
1259 2454 AT-2450v4 10Mb Ethernet Adapter
1259 2700 AT-2700TX 10/100 Fast Ethernet
1259 2701 AT-2700FX 100Mb Ethernet
- 1259 2702 AT-2700FTX 10/100 Mb Fiber/Copper Fast Ethernet
+ 1259 2702 AT-2700FTX (AM79C976KD [PCnet-PRO] chipset) 10/100 Mb Fiber/Copper Fast Ethernet
1259 2703 AT-2701FX
1259 2704 AT-2701FTX 10/100 Mb Fiber/Copper Fast Ethernet
4c53 1000 CC7/CR7/CP7/VC7/VP7/VR7 mainboard
@@ -5362,7 +5455,7 @@
1092 0a78 Multimedia Home Network Adapter
1668 0299 ActionLink Home Network Adapter
2003 Am 1771 MBW [Alchemy]
- 2020 53c974 [PCscsi]
+ 2020 AM53/79C974 [PC-SCSI]
1af4 1100 QEMU Virtual Machine
2040 79c974
2080 CS5536 [Geode companion] Host Bridge
@@ -5392,10 +5485,12 @@
1849 43c8 Fatal1ty X370 Professional Gaming
43b6 X399 Series Chipset SATA Controller
43b7 300 Series Chipset SATA Controller
+ 43b8 A320 Chipset SATA Controller [AHCI mode]
43b9 X370 Series Chipset USB 3.1 xHCI Controller
1849 43d0 Fatal1ty X370 Professional Gaming
43ba X399 Series Chipset USB 3.1 xHCI Controller
43bb 300 Series Chipset USB 3.1 xHCI Controller
+ 43bc A320 USB 3.1 XHCI Host Controller
43c6 400 Series Chipset PCIe Bridge
43c7 400 Series Chipset PCIe Port
43c8 400 Series Chipset SATA Controller
@@ -5407,6 +5502,10 @@
43ee 500 Series Chipset USB 3.1 XHCI Controller
# maybe
1b21 1142 ASM1042A USB 3.0 Host Controller
+ 43f4 600 Series Chipset PCIe Switch Upstream Port
+ 43f5 600 Series Chipset PCIe Switch Downstream Port
+ 43f6 600 Series Chipset SATA Controller
+ 43f7 600 Series Chipset USB 3.2 Controller
57a3 Matisse PCIe GPP Bridge
57a4 Matisse PCIe GPP Bridge
57ad Matisse Switch Upstream
@@ -5575,6 +5674,8 @@
9609 RS780/RS880 PCI to PCI bridge (PCIE port 5)
960a RS780 PCI to PCI bridge (NB-SB link)
960b RS780 PCI to PCI bridge (ext gfx port 1)
+# Takes over NVMe PCI ID when RAID is enabled
+ b000 RAID Bottom Device
1023 Trident Microsystems
0194 82C194
2000 4DWave DX
@@ -6393,6 +6494,7 @@
103c Hewlett-Packard Company
1005 A4977A Visualize EG
1008 Visualize FX
+ 1020 548XX Scope Interface
1028 Tach TL Fibre Channel Host Adapter
1029 Tach XL2 Fibre Channel Host Adapter
107e 000f Interphase 5560 Fibre Channel Adapter
@@ -6549,8 +6651,6 @@
3010 Samurai_1
3020 Samurai_IDE
1043 ASUSTeK Computer Inc.
- 0464 Radeon R9 270x GPU
- 0521 RX580 [RX 580 Dual O8G]
0675 ISDNLink P-IN100-ST-D
0675 1704 ISDN Adapter (PCI Bus, D, C)
0675 1707 ISDN Adapter (PCI Bus, DV, W)
@@ -7009,6 +7109,7 @@
90dc Baikal DMA Controller
90dd Baikal Memory (DDR3/SPM)
90de Baikal USB 3.0 xHCI Host Controller
+ 90eb CXD90062GG
9121 Nextorage NEM-PA NVMe SSD for PlayStation
104e Oak Technology, Inc
0017 OTI-64017
@@ -7240,6 +7341,7 @@
c350 80333 [SuperTrak EX12350]
e350 80333 [SuperTrak EX24350]
105b Foxconn International, Inc.
+ 9602 RS780/RS880 PCI to PCI bridge (int gfx)
e0c3 T99W175 5G Modem [Snapdragon X55]
105c Wipro Infotech Limited
105d Number 9 Computer Company
@@ -7481,7 +7583,7 @@
1076 Chaintech Computer Co. Ltd
1077 QLogic Corp.
1016 ISP10160 Single Channel Ultra3 SCSI Processor
- 1020 ISP1020 Fast-wide SCSI
+ 1020 ISP1020/1040 Fast-wide SCSI
1022 ISP1022 Fast-wide SCSI
1080 ISP1080 SCSI Host Adapter
1216 ISP12160 Dual Channel Ultra3 SCSI Processor
@@ -8439,6 +8541,7 @@
764d PXI-2521
764e PXI-2522
764f PXI-2523
+ 7652 PXIe-4080
7654 PXI-2796
7655 PXI-2797
7656 PXI-2798
@@ -8453,7 +8556,14 @@
76a3 PXIe-6535B
76a4 PXIe-6536B
76a5 PXIe-6537B
+ 76d8 PXIe-4081
+ 76d9 PXIe-4082
+ 77a8 PXIe-6375
783e PXI-8368
+ 7882 PXIe-6376
+ 7883 PXIe-6378
+ 799e PXIe-6386
+ 799f PXIe-6396
9020 PXI-2501
9030 PXI-2503
9040 PXI-2527
@@ -8748,6 +8858,8 @@
13e9 0070 Win/TV (Video Section)
036e Bt878 Video Capture
0000 0001 Euresys Picolo PCIe
+ 0000 0002 Euresys PICOLO Pro 2
+ 0000 0004 Euresys PICOLO Pro 3E
0070 13eb WinTV Series
0070 ff01 Viewcast Osprey 200
0071 0101 DigiTV PCI
@@ -8768,6 +8880,23 @@
14f1 0002 Bt878 Mediastream Controller PAL BG
14f1 0003 Bt878a Mediastream Controller PAL BG
14f1 0048 Bt878/832 Mediastream Controller
+ 1805 0101 Euresys PICOLO Tetra
+ 1805 0102 Euresys PICOLO Tetra
+ 1805 0103 Euresys PICOLO Tetra
+ 1805 0104 Euresys PICOLO Tetra
+ 1805 0105 Euresys PICOLO Tetra
+ 1805 0106 Euresys PICOLO Tetra
+ 1805 0107 Euresys PICOLO Tetra
+ 1805 0108 Euresys PICOLO Tetra
+ 1805 0201 Euresys PICOLO Tetra-X
+ 1805 0202 Euresys PICOLO Tetra-X
+ 1805 0203 Euresys PICOLO Tetra-X
+ 1805 0204 Euresys PICOLO Tetra-X
+ 1805 0401 Euresys PICOLO Tymo
+ 1805 0402 Euresys PICOLO Tymo
+ 1805 0403 Euresys PICOLO Tymo
+ 1805 0404 Euresys PICOLO Tymo
+ 1805 1001 Euresys PICOLO Junior 4
1822 0001 VisionPlus DVB card
1851 1850 FlyVideo'98 - Video
1851 1851 FlyVideo II
@@ -8843,6 +8972,8 @@
1852 1852 FlyVideo'98 (with FM Tuner)
0878 Bt878 Audio Capture
0000 0001 Euresys Picolo PCIe
+ 0000 0002 Euresys PICOLO Pro 2 (Audio Section)
+ 0000 0004 Euresys PICOLO Pro 3E (Audio Section)
0070 13eb WinTV Series
0070 ff01 Viewcast Osprey 200
0071 0101 DigiTV PCI
@@ -8865,6 +8996,23 @@
14f1 0002 Bt878 Video Capture (Audio Section)
14f1 0003 Bt878 Video Capture (Audio Section)
14f1 0048 Bt878 Video Capture (Audio Section)
+ 1805 0101 Euresys PICOLO Tetra (Audio Section)
+ 1805 0102 Euresys PICOLO Tetra (Audio Section)
+ 1805 0103 Euresys PICOLO Tetra (Audio Section)
+ 1805 0104 Euresys PICOLO Tetra (Audio Section)
+ 1805 0105 Euresys PICOLO Tetra (Audio Section)
+ 1805 0106 Euresys PICOLO Tetra (Audio Section)
+ 1805 0107 Euresys PICOLO Tetra (Audio Section)
+ 1805 0108 Euresys PICOLO Tetra (Audio Section)
+ 1805 0201 Euresys PICOLO Tetra-X (Audio Section)
+ 1805 0202 Euresys PICOLO Tetra-X (Audio Section)
+ 1805 0203 Euresys PICOLO Tetra-X (Audio Section)
+ 1805 0204 Euresys PICOLO Tetra-X (Audio Section)
+ 1805 0401 Euresys PICOLO Tymo (Audio Section)
+ 1805 0402 Euresys PICOLO Tymo (Audio Section)
+ 1805 0403 Euresys PICOLO Tymo (Audio Section)
+ 1805 0404 Euresys PICOLO Tymo (Audio Section)
+ 1805 1001 Euresys PICOLO Junior 4 (Audio Section)
1822 0001 VisionPlus DVB Card
18ac d500 DViCO FusionHDTV5 Lite
270f fc00 Digitop DTT-1000
@@ -8991,6 +9139,10 @@
1147 VScom 020 2 port parallel adaptor
2000 PCI9030 32-bit 33MHz PCI <-> IOBus Bridge
10b5 9030 ATCOM AE400P Quad E1 PCI card
+ 2300 Euresys DOMINO Gamma
+ 2374 Euresys DOMINO Alpha
+ 2491 Euresys GRABLINK Value
+ 2493 Euresys GRABLINK Expert
2540 IXXAT CAN-Interface PC-I 04/PCI
2724 Thales PCSM Security Card
3376 Cosateq 4 Port CAN Card
@@ -9061,12 +9213,17 @@
8717 PEX 8717 16-lane, 8-Port PCI Express Gen 3 (8.0 GT/s) Switch with DMA
8718 PEX 8718 16-Lane, 5-Port PCI Express Gen 3 (8.0 GT/s) Switch
8724 PEX 8724 24-Lane, 6-Port PCI Express Gen 3 (8 GT/s) Switch, 19 x 19mm FCBGA
+ 4c52 9234 LRNV9324 2-port Built-in 8643 NVMe Exchange Adapter
+ 4c52 9524 LRNV9524 2-port M.2 NVMe SSD Exchange Adapter
8725 PEX 8725 24-Lane, 10-Port PCI Express Gen 3 (8.0 GT/s) Multi-Root Switch with DMA
8732 PEX 8732 32-lane, 8-Port PCI Express Gen 3 (8.0 GT/s) Switch
8734 PEX 8734 32-lane, 8-Port PCI Express Gen 3 (8.0GT/s) Switch
8747 PEX 8747 48-Lane, 5-Port PCI Express Gen 3 (8.0 GT/s) Switch
+ 4c52 9347 LRNV9347L 2-port Built-in 8643 NVMe Switching Adapter
+ 4c52 9547 LRNV9547 4-port M.2 NVMe SSD Exchange Adapter
8748 PEX 8748 48-Lane, 12-Port PCI Express Gen 3 (8 GT/s) Switch, 27 x 27mm FCBGA
8749 PEX 8749 48-Lane, 18-Port PCI Express Gen 3 (8.0 GT/s) Multi-Root Switch with DMA
+ 4c52 9349 LRNV9349 8-port SFF-8643 NVMe SSD Exchange Adapter
87a0 PEX PCI Express Switch NT0 Port Link Interface
87a1 PEX PCI Express Switch NT1 Port Link Interface
87b0 PEX PCI Express Switch NT0 Port Virtual Interface
@@ -9075,6 +9232,7 @@
87d0 PEX PCI Express Switch DMA interface
9016 PLX 9016 8-port serial controller
9030 PCI9030 32-bit 33MHz PCI <-> IOBus Bridge
+ 10b5 1205 Becker & Hickl MSA-1000
10b5 2695 Hilscher CIF50-PB/DPS Profibus
10b5 2862 Alpermann+Velte PCL PCI LV (3V/5V): Timecode Reader Board
10b5 2906 Alpermann+Velte PCI TS (3V/5V): Time Synchronisation Board
@@ -9106,10 +9264,17 @@
e1c5 0006 TA1-PCI4
9036 9036
9050 PCI <-> IOBus Bridge
+ 103c 10b0 82350 PCI GPIB
10b5 1067 IXXAT CAN i165
10b5 114e Wasco WITIO PCI168extended
10b5 1169 Wasco OPTOIO32standard 32 digital in, 32 digital out
+ 10b5 1171 Becker & Hickl PMS-400
10b5 1172 IK220 (Heidenhain)
+ 10b5 1201 Becker & Hickl SPC-6x0
+ 10b5 1202 Becker & Hickl SPC-7x0
+ 10b5 1203 Becker & Hickl MSA-300
+ 10b5 1206 Becker & Hickl DCC-100
+ 10b5 120a Becker & Hickl STP-340
10b5 2036 SatPak GPS
10b5 2221 Alpermann+Velte PCL PCI LV: Timecode Reader Board
10b5 2273 SH ARC-PCI SOHARD ARCNET card
@@ -9117,6 +9282,7 @@
10b5 2905 Alpermann+Velte PCI TS: Time Synchronisation Board
10b5 3196 Goramo PLX200SYN sync serial card
10b5 9050 PCI-I04 PCI Passive PC/CAN Interface
+ 11a9 5334 PDS4
12fe 0001 CAN-PCI/331 CAN bus controller
1369 8901 PCX11+ PCI
1369 8f01 VX222
@@ -9156,6 +9322,11 @@
d84d 4078 EX-4078 2S(16C552) RS-232+1P
9052 PCI9052 PCI <-> IOBus Bridge
9054 PCI9054 32-bit 33MHz PCI <-> IOBus Bridge
+ 10b5 1171 Becker & Hickl PMS-400A
+ 10b5 1208 Becker & Hickl SPC-830
+ 10b5 120e Becker & Hickl SPC-930
+ 10b5 120f Becker & Hickl SPC-150
+ 10b5 1210 Becker & Hickl DPC-230
10b5 2455 Wessex Techology PHIL-PCI
10b5 2696 Innes Corp AM Radcap card
10b5 2717 Innes Corp Auricon card
@@ -9542,6 +9713,7 @@
10be Tseng Labs International Co.
10bf Most Inc
10c0 Boca Research Inc.
+ 9135 iX3D Ultimate Rez
10c1 ICM Co., Ltd.
10c2 Auspex Systems Inc.
10c3 Samsung Semiconductors, Inc.
@@ -10256,6 +10428,7 @@
1043 402f AGP-V8200 DDR
1048 0c70 GLADIAC 920
0201 NV20 [GeForce3 Ti 200]
+ 1462 8503 G3Ti200 Pro VT128
0202 NV20 [GeForce3 Ti 500]
1043 405b V8200 T5
1545 002f Xtasy 6964
@@ -11555,6 +11728,7 @@
1025 0753 GeForce GT 620M
1025 0754 GeForce GT 620M
17aa 3977 GeForce GT 640M LE
+ 1b0a 20c6 GeForce GT 630M
1b0a 2210 GeForce GT 635M
0dea GF108M [GeForce 610M]
17aa 365a GeForce 615
@@ -11611,6 +11785,9 @@
0f02 GF108 [GeForce GT 730]
0f03 GF108 [GeForce GT 610]
0f06 GF108 [GeForce GT 730]
+ 0fa0 GK11x [GK11x_FPGA]
+ 0fa5 GK11x
+ 0fa7 GK11x [Tegra on x86 (PEATRANS)]
0fb0 GM200 High Definition Audio
0fb8 GP108 High Definition Audio Controller
0fb9 GP107GL High Definition Audio Controller
@@ -11620,13 +11797,18 @@
0fc0 GK107 [GeForce GT 640 OEM]
0fc1 GK107 [GeForce GT 640]
0fc2 GK107 [GeForce GT 630 OEM]
+ 0fc4 GK107 [D14P1-15]
0fc5 GK107 [GeForce GT 1030]
0fc6 GK107 [GeForce GTX 650]
1043 8428 GTX650-DC-1GD5
0fc8 GK107 [GeForce GT 740]
0fc9 GK107 [GeForce GT 730]
+ 0fcb GK107 [EXK107]
+ 0fcc GK107 [GeForce GT 720]
0fcd GK107M [GeForce GT 755M]
0fce GK107M [GeForce GT 640M LE]
+ 0fcf GK107 [GEN3 ESI]
+ 0fd0 GK107 [NB1G]
0fd1 GK107M [GeForce GT 650M]
1043 1597 GeForce GT 650M
1043 15a7 GeForce GT 650M
@@ -11641,10 +11823,15 @@
0fd3 GK107M [GeForce GT 640M LE]
0fd4 GK107M [GeForce GTX 660M]
0fd5 GK107M [GeForce GT 650M Mac Edition]
- 0fd6 GK107M
+ 0fd6 GK107M [N13P-GS-W]
+ 0fd7 GK107 [GK107-GTX]
0fd8 GK107M [GeForce GT 640M Mac Edition]
0fd9 GK107M [GeForce GT 645M]
- 0fdb GK107M
+ 0fda GK107 [GK107-ES-A1]
+ 0fdb GK107 [GK107-ESP-A1]
+ 0fdc GK107 [GK107-INT22-A1]
+ 0fdd GK107 [GK107-INT11-A1]
+ 0fde GK107 [GK107-ES-KA-E1]
0fdf GK107M [GeForce GT 740M]
0fe0 GK107M [GeForce GTX 660M Mac Edition]
0fe1 GK107M [GeForce GT 730M]
@@ -11665,6 +11852,7 @@
0fed GK107M [GeForce 820M]
0fee GK107M [GeForce 810M]
0fef GK107GL [GRID K340]
+ 0ff0 GK107 [NB1Q]
0ff1 GK107 [NVS 1000]
0ff2 GK107GL [GRID K1]
0ff3 GK107GL [Quadro K420]
@@ -12184,9 +12372,13 @@
11a2 GK104M [GeForce GTX 675MX Mac Edition]
11a3 GK104M [GeForce GTX 680MX]
106b 010d iMac 13,2
+ 11a4 GK104 [GK104-ESA]
+ 11a5 GK104 [GK104-ESA]
11a7 GK104M [GeForce GTX 675MX]
11a8 GK104GLM [Quadro K5100M]
11a9 GK104M [GeForce GTX 870M]
+ 11aa GK104 [GK104-INT]
+ 11ac GK104 [GK104-CS]
11af GK104GLM [GRID IceCube]
11b0 GK104GL [GRID K240Q / K260Q vGPU]
10de 101a GRID K240Q
@@ -12204,6 +12396,7 @@
11be GK104GLM [Quadro K3000M]
11bf GK104GL [GRID K2]
11c0 GK106 [GeForce GTX 660]
+ 11c1 GK106 [D14P2-30]
11c2 GK106 [GeForce GTX 650 Ti Boost]
1043 845b GeForce GTX 650 Ti Boost DirectCU II OC
1462 2874 GeForce GTX 650 Ti Boost TwinFrozr II OC
@@ -12219,6 +12412,10 @@
11c7 GK106 [GeForce GTX 750 Ti]
11c8 GK106 [GeForce GTX 650 OEM]
11cb GK106 [GeForce GT 740]
+ 11d0 GK106 [GK106-INT353]
+ 11d1 GK106 [GK106-INT343]
+ 11d2 GK106 [GK106-INT232]
+ 11d3 GK106 [GK106-ES]
11e0 GK106M [GeForce GTX 770M]
11e1 GK106M [GeForce GTX 765M]
11e2 GK106M [GeForce GTX 765M]
@@ -12227,6 +12424,7 @@
11e7 GK106M
11fa GK106GL [Quadro K4000]
11fc GK106GLM [Quadro K2100M]
+ 11ff GK106 [NB1Q]
1200 GF114 [GeForce GTX 560 Ti]
1201 GF114 [GeForce GTX 560]
1202 GF114 [GeForce GTX 560 Ti OEM]
@@ -12274,7 +12472,9 @@
1280 GK208 [GeForce GT 635]
1281 GK208 [GeForce GT 710]
1282 GK208 [GeForce GT 640 Rev. 2]
+ 1283 GK208 [D15M2-10]
1284 GK208 [GeForce GT 630 Rev. 2]
+ 1285 GK208 [GK208-100]
1286 GK208 [GeForce GT 720]
1287 GK208B [GeForce GT 730]
1288 GK208B [GeForce GT 720]
@@ -12314,8 +12514,14 @@
17aa 36af GeForce 920M
129a GK208BM [GeForce 910M]
12a0 GK208
+ 12ad GK208 [GK208-ES]
+ 12ae GK208 [GK208-CS1-C]
+ 12af GK208 [GK208-INT]
+ 12b0 GK208 [GK208-CS-Q]
+ 12b1 GK208 [GK208 INT]
12b9 GK208GLM [Quadro K610M]
12ba GK208GLM [Quadro K510M]
+ 130b GK110 [Q12U-1]
1340 GM108M [GeForce 830M]
103c 2b2b GeForce 830A
1341 GM108M [GeForce 840M]
@@ -12360,6 +12566,8 @@
103c 2b4c GeForce GTX 960A
139c GM107M [GeForce 940M]
139d GM107M [GeForce GTX 750 Ti]
+ 13ad GM204 [GM107 INT52]
+ 13ae GM204 [GM107 CS1]
13b0 GM107GLM [Quadro M2000M]
13b1 GM107GLM [Quadro M1000M]
13b2 GM107GLM [Quadro M600M]
@@ -12374,15 +12582,19 @@
10de 110a GRID M40
10de 1160 Tesla M10
10de 11d2 GRID M10-8Q
+ 13be GM204 [GM107 CS1]
+ 13bf GM204 [GM107 INT52]
13c0 GM204 [GeForce GTX 980]
1043 8504 GTX980-4GD5
13c1 GM204
13c2 GM204 [GeForce GTX 970]
13c3 GM204
+ 13c4 GM204 [D17U-20]
13d7 GM204M [GeForce GTX 980M]
13d8 GM204M [GeForce GTX 960 OEM / 970M]
13d9 GM204M [GeForce GTX 965M]
13da GM204M [GeForce GTX 980 Mobile]
+ 13e4 GM204 [Graphics Device ES-A]
13e7 GM204GL [GeForce GTX 980 Engineering Sample]
13f0 GM204GL [Quadro M5000]
13f1 GM204GL [Quadro M4000]
@@ -12408,11 +12620,16 @@
1430 GM206GL [Quadro M2000]
1431 GM206GL [Tesla M4]
1436 GM206GLM [Quadro M2200 Mobile]
+ 15c2 GP100 [CMP 100-100]
15f0 GP100GL [Quadro GP100]
15f1 GP100GL
15f7 GP100GL [Tesla P100 PCIe 12GB]
15f8 GP100GL [Tesla P100 PCIe 16GB]
15f9 GP100GL [Tesla P100 SXM2 16GB]
+ 15fa GP100GL [DGX Station / PH402 SKU 200]
+ 15fb GP100GL [GP100 SKU 200]
+ 15fc GP100GL [Tesla P100-DGXS-16GB]
+ 15ff GP100GL [GP100 SKU 15ff]
1617 GM204M [GeForce GTX 980M]
1618 GM204M [GeForce GTX 970M]
1619 GM204M [GeForce GTX 965M]
@@ -12571,6 +12788,7 @@
1d81 GV100 [TITAN V]
1d83 GV100 [CMP 100-200]
1d84 GV100 [CMP 100-210]
+ 1db0 GV100GL [Tesla GV100 SXM2-16GB SKU 890]
1db1 GV100GL [Tesla V100 SXM2 16GB]
1db2 GV100GL [Tesla V100 DGXS 16GB]
1db3 GV100GL [Tesla V100 FHHL 16GB]
@@ -12582,10 +12800,12 @@
10de 131d Tesla V100-SXM3-32GB-H
1dba GV100GL [Quadro GV100]
10de 12eb TITAN V CEO Edition
+ 1dbd GV100GL [Tesla GV100 DGX1-V]
1dbe GV100 Engineering Sample
1dc1 GV100 [CMP 100-200]
1df0 GV100GL [Tesla PG500-216]
1df2 GV100GL [Tesla PG503-216]
+ 1df4 GV100 [CMP 100-210]
1df5 GV100GL [Tesla V100 SXM2 16GB]
1df6 GV100GL [Tesla V100S PCIe 32GB]
1e02 TU102 [TITAN RTX]
@@ -12599,6 +12819,7 @@
1e30 TU102GL [Quadro RTX 6000/8000]
10de 129e Quadro RTX 8000
10de 12ba Quadro RTX 6000
+ 1e35 TU102GL [Tesla T10]
1e36 TU102GL [Quadro RTX 6000]
1e37 TU102GL [Tesla T10 16GB / GRID RTX T10-2/T10-4/T10-8]
10de 1304 Tesla T10 16GB
@@ -12696,7 +12917,7 @@
1fd9 TU117BM [GeForce GTX 1650 Mobile Refresh]
1fdd TU117BM [GeForce GTX 1650 Mobile Refresh]
1ff0 TU117GL [T1000 8GB]
- 1ff2 TU117GL [T400 4GB]
+ 1ff2 TU117GL [T400 4GB / T400E]
1ff9 TU117GLM [Quadro T1000 Mobile]
2080 GA100
2081 GA100
@@ -12737,6 +12958,7 @@
21ae TU116GL
21bf TU116GL
21c2 TU116
+ 21c3 TU116
21c4 TU116 [GeForce GTX 1660 SUPER]
21d1 TU116BM [GeForce GTX 1660 Ti Mobile]
2200 GA102
@@ -12769,25 +12991,34 @@
2296 Tegra PCIe Endpoint Virtual Network
22a3 GH100 [H100 NVSwitch]
22ba AD102 High Definition Audio Controller
+ 22bc AD104 High Definition Audio Controller
+ 22bd AD106M High Definition Audio Controller
2302 GH100
2313 GH100 [H100 CNX]
2321 GH100 [H100L 94GB]
2322 GH100 [H800 PCIe]
2324 GH100 [H800]
+ 2329 GH100 [H20]
2330 GH100 [H100 SXM5 80GB]
2331 GH100 [H100 PCIe]
+ 2335 GH100 [H200 SXM 141GB]
2336 GH100 [H100]
2337 GH100 [H100 SXM5 64GB]
+ 2338 GH100 [H100 SXM5 96GB]
2339 GH100 [H100 SXM5 94GB]
233a GH100 [H800L 94GB]
233d GH100 [H100 96GB]
- 2342 GH100 [GH200 120GB]
+ 2342 GH100 [GH200 120GB / 480GB]
2343 GH100
- 2345 GH100 [GH200 480GB]
+ 2345 GH100 [GH100-88K-A1]
+ 237f GH100 [Skinny Joe]
+ 23b0 GH100
+ 23f0 GH100
2414 GA103 [GeForce RTX 3060 Ti]
2420 GA103M [GeForce RTX 3080 Ti Mobile]
2438 GA103GLM [RTX A5500 Laptop GPU]
2460 GA103M [GeForce RTX 3080 Ti Laptop GPU]
+ 2480 GA104 [Reserved Dev ID A]
2482 GA104 [GeForce RTX 3070 Ti]
2483 GA104
2484 GA104 [GeForce RTX 3070]
@@ -12799,6 +13030,9 @@
2488 GA104 [GeForce RTX 3070 Lite Hash Rate]
2489 GA104 [GeForce RTX 3060 Ti Lite Hash Rate]
248a GA104 [CMP 70HX]
+ 248c GA104 [GeForce RTX 3070 Ti]
+ 248d GA104 [GeForce RTX 3070]
+ 248e GA104 [GeForce RTX 3060 Ti]
249c GA104M [GeForce RTX 3080 Mobile / Max-Q 8GB/16GB]
249d GA104M [GeForce RTX 3070 Mobile / Max-Q]
249f GA104M
@@ -12816,6 +13050,7 @@
24ba GA104GLM [RTX A4500 Laptop GPU]
24bb GA104GLM [RTX A3000 Laptop GPU]
24bf GA104 [GeForce RTX 3070 Engineering Sample]
+ 24c0 GA104 [Initial Dev ID B]
24c7 GA104 [GeForce RTX 3060 8GB]
24c8 GA104 [GeForce RTX 3070 GDDR6X]
24c9 GA104 [GeForce RTX 3060 Ti GDDR6X]
@@ -12843,6 +13078,7 @@
2571 GA106 [RTX A2000 12GB]
2582 GA107 [GeForce RTX 3050 8GB]
2583 GA107 [GeForce RTX 3050 4GB]
+ 2584 GA107 [GeForce RTX 3050 6GB]
25a0 GA107M [GeForce RTX 3050 Ti Mobile]
25a2 GA107M [GeForce RTX 3050 Mobile]
25a3 GA107
@@ -12856,6 +13092,8 @@
25ac GN20-P0-R-K2 [GeForce RTX 3050 6GB Laptop GPU]
25ad GA107 [GeForce RTX 2050]
25af GA107 [GeForce RTX 3050 Engineering Sample]
+ 25b0 GA107GL [RTX A1000]
+ 25b2 GA107GL [RTX A400]
25b5 GA107GLM [RTX A4 Mobile]
# A16 - 25B6 10DE 14A9 / A2 - 25B6 10DE 157E
25b6 GA107GL [A2 / A16]
@@ -12875,23 +13113,36 @@
25fb GA107 [RTX A500 Embedded GPU]
2681 AD102 [RTX TITAN Ada]
2684 AD102 [GeForce RTX 4090]
+ 2685 AD102 [GeForce RTX 4090 D]
+ 2689 AD102 [GeForce RTX 4070 Ti SUPER]
26b1 AD102GL [RTX 6000 Ada Generation]
26b2 AD102GL [RTX 5000 Ada Generation]
+ 26b3 AD102GL [RTX 5880 Ada Generation]
26b5 AD102GL [L40]
+ 26b7 AD102GL [L20]
26b8 AD102GL [L40G]
26b9 AD102GL [L40S]
+ 26ba AD102GL [L20]
26f5 AD102GL [L40 CNX]
+ 2702 AD103 [GeForce RTX 4080 SUPER]
+ 2703 AD103 [GeForce RTX 4080 SUPER]
2704 AD103 [GeForce RTX 4080]
+ 2705 AD103 [GeForce RTX 4070 Ti SUPER]
+ 2709 AD103 [GeForce RTX 4070]
2717 GN21-X11 [GeForce RTX 4090 Laptop GPU]
2730 AD103GLM [RTX 5000 Ada Generation Laptop GPU]
2757 GN21-X11
2770 AD103GLM [RTX 5000 Ada Generation Embedded GPU]
2782 AD104 [GeForce RTX 4070 Ti]
+ 2783 AD104 [GeForce RTX 4070 SUPER]
2785 AD104 [AC AD104 20GB]
2786 AD104 [GeForce RTX 4070]
+ 2788 AD104 [GeForce RTX 4060 Ti]
27a0 AD104M [GeForce RTX 4080 Max-Q / Mobile]
27b0 AD104GL [RTX 4000 SFF Ada Generation]
+ 27b1 AD104GL [RTX 4500 Ada Generation]
27b2 AD104GL [RTX 4000 Ada Generation]
+ 27b6 AD104GL [L2]
27b7 AD104GL [L16]
27b8 AD104GL [L4]
27ba AD104GLM [RTX 4000 Ada Generation Laptop GPU]
@@ -12901,6 +13152,7 @@
27fb AD104GLM [RTX 3500 Ada Generation Embedded GPU]
2803 AD106 [GeForce RTX 4060 Ti]
2805 AD106 [GeForce RTX 4060 Ti 16GB]
+ 2808 AD106 [GeForce RTX 4060]
2820 AD106M [GeForce RTX 4070 Max-Q / Mobile]
2838 AD106GLM [RTX 3000 Ada Generation Laptop GPU]
2860 AD106M [GeForce RTX 4070 Max-Q / Mobile]
@@ -12908,7 +13160,11 @@
2882 AD107 [GeForce RTX 4060]
28a0 AD107M [GeForce RTX 4060 Max-Q / Mobile]
28a1 AD107M [GeForce RTX 4050 Max-Q / Mobile]
+ 28b0 AD107GL [RTX 2000 / 2000E Ada Generation]
28b8 AD107GLM [RTX 2000 Ada Generation Laptop GPU]
+ 28b9 AD107GLM [RTX 1000 Ada Generation Laptop GPU]
+ 28ba AD107GLM [RTX 500 Ada Generation Laptop GPU]
+ 28bb AD107GLM [RTX 500 Ada Generation Laptop GPU]
28e0 AD107M [GeForce RTX 4060 Max-Q / Mobile]
28e1 AD107M [GeForce RTX 4050 Max-Q / Mobile]
28f8 AD107GLM [RTX 2000 Ada Generation Embedded GPU]
@@ -13074,9 +13330,11 @@
2011 Q-Motion Video Capture/Edit board
4750 S5930 [Matchmaker]
5920 S5920
+ 801d Roper Scientific PCI TAXI interface
8043 LANai4.x [Myrinet LANai interface chip]
8062 S5933_PARASTATION
807d S5933 [Matchmaker]
+ 8081 GPIB interface card [IOtech Inc. PCI488]
8088 Kongsberg Spacetec Format Synchronizer
8089 Kongsberg Spacetec Serial Output Board
809c S5933_HEPC3
@@ -13120,6 +13378,7 @@
8111 Twist3 Frame Grabber
10ec Realtek Semiconductor Co., Ltd.
0139 RTL-8139/8139C/8139C+ Ethernet Controller
+ 2600 Killer E2600 GbE Controller
3000 Killer E3000 2.5GbE Controller
4321 RTL8852BE 802.11ax PCIe Wireless Network Adapter
5208 RTS5208 PCI Express Card Reader
@@ -13147,6 +13406,7 @@
1028 06e6 Latitude 11 5175 2-in-1
1028 09be Latitude 7410
1028 0b10 Precision 3571
+ 1028 0c06 Precision 3580
17aa 224f ThinkPad X1 Carbon 5th Gen
5260 RTS5260 PCI Express Card Reader
5261 RTS5261 PCI Express Card Reader
@@ -13160,6 +13420,8 @@
5762 RTS5762 NVMe SSD Controller
5763 RTS5763DL NVMe SSD Controller (DRAM-less)
5765 RTS5765DL NVMe SSD Controller (DRAM-less)
+ 5770 RTS5770DL NVMe SSD Controller (DRAM-less)
+ 5772 RTS5772DL NVMe SSD Controller (DRAM-less)
8029 RTL-8029(AS)
10b8 2011 EZ-Card (SMC1208)
10ec 8029 RTL-8029(AS)
@@ -13168,6 +13430,7 @@
1259 2400 AT-2400
1af4 1100 QEMU Virtual Machine
8125 RTL8125 2.5GbE Controller
+ 4c52 2022 LRES2022PT Single-port 2.5Gb Ethernet Network Adapter
8129 RTL-8129
10ec 8129 RT8129 Fast Ethernet Adapter
11ec 8129 RTL8111/8168 PCIe Gigabit Ethernet (misconfigured)
@@ -13239,7 +13502,7 @@
1458 e000 GA-MA69G-S3H Motherboard
1462 235c P965 Neo MS-7235 mainboard
1462 236c 945P Neo3-F motherboard
- 8168 RTL8111/8168/8411 PCI Express Gigabit Ethernet Controller
+ 8168 RTL8111/8168/8211/8411 PCI Express Gigabit Ethernet Controller
1019 8168 RTL8111/8168 PCI Express Gigabit Ethernet controller
1025 1094 Acer Aspire E5-575G
1028 0283 Vostro 220
@@ -13267,7 +13530,7 @@
1043 8505 P8 series motherboard
1043 8554 H81M-C Motherboard
1043 859e AM1I-A Motherboard
- 1043 8677 PRIME B450M-A Motherboard
+ 1043 8677 Onboard RTL8111H Ethernet
105b 0d7c D270S/D250S Motherboard
10ec 8168 RTL8111/8168 PCI Express Gigabit Ethernet controller
144d c652 RTL8168 on a NP300E5C series laptop
@@ -13278,10 +13541,12 @@
1462 4180 Wind PC MS-7418
1462 7522 X58 Pro-E
1462 7c37 X570-A PRO motherboard
+ 1734 11c0 RTL8211DN on Esprimo P510 D3171 motherboard
1775 11cc CC11/CL11
17aa 3098 ThinkCentre E73
17aa 3814 Z50-75
17aa 3823 Lenovo V130-15IGM Laptop - Type 81HL
+ 17aa 5068 Thinkpad E480/E580
17aa 5124 ThinkPad E595
1849 8168 Motherboard (one of many)
7470 3468 TG-3468 Gigabit PCI Express Network Adapter
@@ -13342,16 +13607,21 @@
8813 RTL8813AE 802.11ac PCIe Wireless Network Adapter
8821 RTL8821AE 802.11ac PCIe Wireless Network Adapter
8852 RTL8852AE 802.11ax PCIe Wireless Network Adapter
+ a85a RTL8852AE WiFi 6 802.11ax PCIe Adapter
b723 RTL8723BE PCIe Wireless Network Adapter
10ec 8739 Dell Wireless 1801
17aa b736 Z50-75
+ b821 RTL8821CE PCIe 802.11ac Wireless Network Controller
b822 RTL8822BE 802.11a/b/g/n/ac WiFi adapter
103c 831b Realtek RTL8822BE 802.11ac 2x2 Wi-Fi + Bluetooth 4.2 Combo Adapter (MU-MIMO supported)
17aa 5124 ThinkPad E595
17aa b023 ThinkPad E595
+ b852 RTL8852BE PCIe 802.11ax Wireless Network Controller
+ b85b RTL8852BE PCIe 802.11ax Wireless Network Controller [1T1R]
c821 RTL8821CE 802.11ac PCIe Wireless Network Adapter
c822 RTL8822CE 802.11ac PCIe Wireless Network Adapter
c82f RTL8822CE 802.11ac PCIe Wireless Network Adapter
+ c852 RTL8852CE PCIe 802.11ax Wireless Network Controller
d723 RTL8723DE 802.11b/g/n PCIe Adapter
10ed Ascii Corporation
7310 V7310
@@ -13379,6 +13649,8 @@
500c Alveo U280 XDMA Platform
5020 Alveo U50 XMDA Platform
505c Alveo U55C
+ 5074 Alveo X3522, Quad Port, 10/25GbE Adaptable Accelerator Card
+ 5084 Alveo X3522, Quad Port, 10/25GbE Low Latency Network Adapter
6987 SmartSSD
6988 SmartSSD
7011 7-Series FPGA Hard PCIe block (AXI/debug)
@@ -13609,6 +13881,8 @@
0644 RocketRAID 644 4 Port SATA-III Controller (eSATA)
0645 RocketRAID 644L 4 Port SATA-III Controller (eSATA)
0646 RocketRAID 644LS SATA-III Controller (4 eSATA devices connected by 1 SAS cable)
+ 0750 Rocket 750 PCIe Gen2 SATA III Controller
+ 0840 RocketRAID 840 PCIe Gen3 SATA III Controller
1720 RocketRAID 1720 (2x SATA II RAID Controller)
1740 RocketRAID 1740
1742 RocketRAID 1742
@@ -13620,6 +13894,7 @@
2322 RocketRAID 2322 SATA-II Controller
2340 RocketRAID 2340 16 Port SATA-II Controller
2640 RocketRAID 2640 SAS/SATA Controller
+ 2720 RocketRAID 2720 PCIe Gen2 6Gb/s SAS/SATA Controller
2722 RocketRAID 2722
# SFF-8087 Mini-SAS 16 port internal
2740 RocketRAID 2740
@@ -13627,12 +13902,21 @@
2744 RocketRaid 2744
# SFF-8088 8 port external / SFF-8087 24 port internal
2782 RocketRAID 2782
+ 2840 RocketRAID 2840 PCIe Gen3 6Gb/s SAS/SATA Controller
3120 RocketRAID 3120
3220 RocketRAID 3220
3320 RocketRAID 3320
+ 3520 RocketRAID 3520 PCIe Gen1 8-Port SATA II Controller
+ 3530 RocketRAID 3530 PCIe Gen1 12-Port SATA II Controller
+ 3740 RocketRAID 3740 PCIe Gen3 12Gb/s SAS/SATA Controller
4310 RocketRaid 4310
+ 4320 RocketRAID 4320 SAS Controller
+ 7103 SSD7103 PCIe Gen3 x16 4-Port M.2 NVMe RAID Controller
+ 7105 SSD7105 PCIe Gen3 x16 4-Port M.2 NVMe RAID Controller
+ 7110 SSD7110 PCIe Gen3 x16 NVMe RAID Controller
7505 SSD7505 PCIe Gen4 x16 4-Port M.2 NVMe RAID Controller
7540 SSD7540 PCIe Gen4 x16 8-Port M.2 NVMe RAID Controller
+ 7580 SSD7580 PCIe Gen4 x16 8-Port M.2 NVMe RAID Controller
1104 RasterOps Corp.
1105 Sigma Designs, Inc.
1105 REALmagic Xcard MPEG 1/2/3/4 DVD Decoder
@@ -13818,6 +14102,7 @@
1043 808c VT62xx USB1.1 4 port controller
1043 80a1 A7V8X-X motherboard
1043 80ed A7V600/K8V-X/A8V Deluxe motherboard
+ 1106 3038 USB 1.1 UHCI controller
1179 0001 Magnia Z310
1234 0925 MVP3 USB Controller
1458 5004 GA-7VAX Mainboard
@@ -13850,6 +14135,7 @@
1462 590d KT6 Delta-FIS2R (MS-6590)
1462 702d K8T NEO 2 motherboard
1462 971d MS-6917
+ 153b 1146 Cameo DV Firewire controller
3050 VT82C596 Power Management
3051 VT82C596 Power Management
3053 VT6105M [Rhine-III]
@@ -13945,7 +14231,7 @@
1043 808c A7V8X motherboard
1043 80a1 A7V8X-X motherboard rev 1.01
1043 80ed A7V600/K8V-X/A8V Deluxe motherboard
- 1106 3104 USB 2.0 Controller
+ 1106 3104 USB 2.0 EHCI controller
1297 f641 FX41 motherboard
1458 5004 GA-7VAX Mainboard
1462 5901 KT6 Delta-FIS2R (MS-6590)
@@ -14263,6 +14549,8 @@
# Superfastcom-PCI (Commtech, Inc.) or DSCC4 WAN Adapter
2102 DSCC4 PEB/PEF 20534 DMA Supported Serial Communication Controller with 4 Channels
2104 Eicon Diva 2.02 compatible passive ISDN card
+# S30807-Q5474
+ 3101 HiPath 4000 PCI card
3141 SIMATIC NET CP 5611 / 5621
3142 SIMATIC NET CP 5613 / 5614
3143 SIMATIC NET CP 1613
@@ -15366,7 +15654,7 @@
1179 0021 KIOXIA CD5 series SSD
1d49 4039 Thinksystem U.2 CM5 NVMe SSD
1d49 403a Thinksystem AIC CM5 NVMe SSD
- 0113 BG3 NVMe SSD Controller
+ 0113 BG3 x2 NVMe SSD Controller (DRAM-less)
1179 0001 Toshiba KBG30ZMS128G 128GB NVMe SSD
0115 XG4 NVMe SSD Controller
0116 XG5 NVMe SSD Controller
@@ -15392,6 +15680,7 @@
0805 SD TypA Controller
0d01 FIR Port Type-DO
1179 0001 FIR Port Type-DO
+ 9602 RS780/RS880 PCI to PCI bridge (int gfx)
117a A-Trend Technology
117b L G Electronics, Inc.
117c ATTO Technology, Inc.
@@ -15686,6 +15975,8 @@
000b ATP867-B
000d ATP8620
000e ATP8620
+ 0011 ATP865-B
+ 1191 0011 ACARD AEC-6280
8002 AEC6710 SCSI-2 Host Adapter
8010 AEC6712UW SCSI
8020 AEC6712U SCSI
@@ -15949,7 +16240,7 @@
6281 88F6281 [Kirkwood] ARM SoC
# This device ID was used for earlier chips.
6381 MV78xx0 [Discovery Innovation] ARM SoC
- 6440 88SE6440 SAS/SATA PCIe controller
+ 6440 88SE63x0 x1, 88SE6440 x4 PCIe SAS/SATA 3Gb/s RAID controller
6450 64560 System Controller
6460 MV64360/64361/64362 System Controller
6480 MV64460/64461/64462 System Controller
@@ -15960,7 +16251,7 @@
6820 88F6820 [Armada 385] ARM SoC
6828 88F6828 [Armada 388] ARM SoC
6920 88F6920 [Armada 390] ARM SoC
- 7042 88SX7042 PCI-e 4-port SATA-II
+ 7042 88SX7042 PCIe 4-port SATA-II controller
16b8 434b Tempo SATA E4P
7810 MV78100 [Discovery Innovation] ARM SoC
7820 MV78200 [Discovery Innovation] ARM SoC
@@ -15983,7 +16274,9 @@
11ae Aztech System Ltd
11af Avid Technology Inc.
0001 Cinema
+ ee21 Digidesign DSP Farm
ee40 Digidesign Audiomedia III
+ ee60 Digidesign SampleCell II / II Plus
11b0 V3 Semiconductor Inc.
0002 V300PSC
0292 V292PBC [Am29030/40 Bridge]
@@ -16225,6 +16518,38 @@
11e2 Samsung Information Systems America
11e3 Quicklogic Corporation
0001 COM-ON-AIR Dosch&Amand DECT
+ 0010 QL5032 (PQFP208) [QuickPCI] 33 MHz/32-bit PCI Master/Target with Embedded Programmable Logic and Dual Port SRAM
+ 0011 QL5032 (PBGA256) [QuickPCI] 33 MHz/32-bit PCI Master/Target with Embedded Programmable Logic and Dual Port SRAM
+ 0012 QL5232 (PQFP208) [QuickPCI] 33 MHz/32-bit PCI Master/Target with Embedded Programmable Logic and Dual Port SRAM
+ 11e3 1204 Becker & Hickl SPC-130
+ 11e3 1207 Becker & Hickl DDG-200
+ 11e3 1209 Becker & Hickl SHM-180
+ 11e3 120c Becker & Hickl PMM-428
+ 0013 QL5232 (PBGA456) [QuickPCI] 33 MHz/32-bit PCI Master/Target with Embedded Programmable Logic and Dual Port SRAM
+ 0014 QL5030 (TQFP144) [QuickPCI] 33 MHz/32-bit PCI Target with Embedded Programmable Logic and Dual Port SRAM
+ 0015 QL5130 (TQFP144) [QuickPCI] 33 MHz/32-bit PCI Target with Embedded Programmable Logic and Dual Port SRAM
+ 0016 QL5130 (PQFP208) [QuickPCI] 33 MHz/32-bit PCI Target with Embedded Programmable Logic and Dual Port SRAM
+ 11e3 120b Becker & Hickl DEL-350
+ 0017 QL5130 (PBGA256) [QuickPCI] 33 MHz/32-bit PCI Target with Embedded Programmable Logic and Dual Port SRAM
+ 0019 QL5332 (PQFP208) [QuickPCI] 33 MHz/32-bit PCI Master/Target with Embedded Programmable Logic and Dual Port SRAM
+ 001a QL5332 (PBGA256) [QuickPCI] 33 MHz/32-bit PCI Master/Target with Embedded Programmable Logic and Dual Port SRAM
+ 001b QL5432 (PQFP208) [QuickPCI] 33 MHz/32-bit PCI Master/Target with Embedded Programmable Logic and Dual Port SRAM
+ 11e3 120d Becker & Hickl SPC-140
+ 11e3 1211 Becker & Hickl GVD-120
+ 11e3 1212 Becker & Hickl DDG-210
+ 001c QL5432 (PBGA456) [QuickPCI] 33 MHz/32-bit PCI Master/Target with Embedded Programmable Logic and Dual Port SRAM
+ 001d QL5632 (PQFP208) [QuickPCI] 33 MHz/32-bit PCI Master/Target with Embedded Programmable Logic and Dual Port SRAM
+ 001e QL5632 (PBGA280) [QuickPCI] 33 MHz/32-bit PCI Master/Target with Embedded Programmable Logic and Dual Port SRAM
+ 001f QL5632 (PBGA484) [QuickPCI] 33 MHz/32-bit PCI Master/Target with Embedded Programmable Logic and Dual Port SRAM
+ 0020 QL5632 (PBGA516) [QuickPCI] 33 MHz/32-bit PCI Master/Target with Embedded Programmable Logic and Dual Port SRAM
+ 0021 QL5732 (PQFP208) [QuickPCI] 33 MHz/32-bit PCI Master/Target with Embedded Programmable Logic and Dual Port SRAM
+ 0022 QL5732 (PBGA280) [QuickPCI] 33 MHz/32-bit PCI Master/Target with Embedded Programmable Logic and Dual Port SRAM
+ 0023 QL5732 (PBGA484) [QuickPCI] 33 MHz/32-bit PCI Master/Target with Embedded Programmable Logic and Dual Port SRAM
+ 0024 QL5732 (PBGA516) [QuickPCI] 33 MHz/32-bit PCI Master/Target with Embedded Programmable Logic and Dual Port SRAM
+ 002d QL5022 (TQFP144) [QuickPCI] 33 MHz/32-bit PCI Master/Target with Embedded Programmable Logic and Dual Port SRAM
+ 002e QL5022 (PQFP208) [QuickPCI] 33 MHz/32-bit PCI Master/Target with Embedded Programmable Logic and Dual Port SRAM
+ 002f QL5020 (TQFP144) [QuickPCI] 33 MHz/32-bit PCI Target with Embedded Programmable Logic and Dual Port SRAM
+ 0030 QL5020 (PQFP208) [QuickPCI] 33 MHz/32-bit PCI Target with Embedded Programmable Logic and Dual Port SRAM
0560 QL5064 Companion Design Demo Board
5030 PC Watchdog
8417 QL5064 [QuickPCI] PCI v2.2 bridge for SMT417 Dual TMS320C6416T PMC Module
@@ -16426,9 +16751,9 @@
13f7 1394 OHCI Compliant Host Controller
6729 OZ6729
673a OZ6730
- 6832 OZ6832/6833 CardBus Controller
- 6836 OZ6836/6860 CardBus Controller
- 6872 OZ6812 CardBus Controller
+ 6832 OZ6832/6833 CardBus Controller [Saturn]
+ 6836 OZ6836/6860 CardBus Controller [Mercury]
+ 6872 OZ6812 CardBus Controller [Challenger]
6925 OZ6922 CardBus Controller
6933 OZ6933/711E1 CardBus/SmartCardBus Controller
1025 1016 Travelmate 612 TX
@@ -16473,6 +16798,7 @@
8331 O2 Flash Memory Card
8520 SD/MMC Card Reader Controller
8621 SD/MMC Card Reader Controller
+ 17aa 5068 Thinkpad E480/E580
8760 FORESEE E2M2 NVMe SSD
1218 Hybricon Corp.
1219 First Virtual Corporation
@@ -16602,6 +16928,7 @@
00a3 VisionLink F4
00a9 VisionLink CLS
00ab PCIe8g3 A5 10G
+ 00b5 PCIe8 RFx SDR
123e Simutech, Inc.
# nee C-Cube Microsystems / acquired by Magnum Semiconductor
123f LSI Logic
@@ -16832,8 +17159,11 @@
0820 SM820 Lynx3D
0910 SM910
2260 SM2260 NVMe SSD Controller
+ 2261 SM2261XT x2 NVMe SSD Controller (DRAM-less)
2262 SM2262/SM2262EN SSD Controller
2263 SM2263EN/SM2263XT (DRAM-less) NVMe SSD Controllers
+ 2269 SM2269XT (DRAM-less) NVMe SSD Controller
+ 8366 SM8366 NVMe SSD Controller [MonTitan]
1270 Olympus Optical Co., Ltd.
1271 GW Instruments
1272 Telematics International
@@ -16902,7 +17232,7 @@
1274 2000 Creative CT4810 [Sound Blaster AudioPCI 128]
1274 2003 Creative SoundBlaster AudioPCI 128
1274 5880 Creative CT4750 [Sound Blaster PCI 128]
- 1274 8001 Sound Blaster 16PCI 4.1ch
+ 1274 8001 Creative CT4750 [Sound Blaster 16 PCI/PCI 128/4.1 Digital]
1458 a000 5880 AudioPCI On Motherboard 6OXET
1462 6880 5880 AudioPCI On Motherboard MS-6188 1.00
270f 2001 5880 AudioPCI On Motherboard 6CTR
@@ -17106,6 +17436,7 @@
1000 PXD1000
1296 Kofax Image Products
1297 Holco Enterprise Co, Ltd/Shuttle Computer
+ 9602 RS780/RS880 PCI to PCI bridge (int gfx)
1298 Spellcaster Telecommunications Inc.
1299 Knowledge Technology Lab.
129a VMetro, inc.
@@ -17152,7 +17483,8 @@
10a9 8002 Acenic Gigabit Ethernet
12ae 0002 Gigabit Ethernet-T (3C986-T)
00fa Farallon PN9100-T Gigabit Ethernet
-12af TDK USA Corp
+12af TDK Corporation
+ 5831 GBDriver GX1 x2 NVMe SSD Controller (DRAM-less)
12b0 Jorge Scientific Corp
12b1 GammaLink
12b2 General Signal Networks
@@ -17262,6 +17594,7 @@
0009 DAC64
0018 Riva128
1048 0c10 VICTORY Erazor
+ 1048 0c15 VICTORY Erazor LT-8
107b 8030 STB Velocity 128
1092 0350 Viper V330
1092 1092 Viper V330
@@ -17301,13 +17634,14 @@
# PI7C9X20508GP 5Port-8Lane PCI Express Switch GreenPacket Family
0508 PI7C9X20508GP PCI Express Switch 5Port-8Lane
2304 PI7C9X2G304 EL/SL PCIe2 3-Port/4-Lane Packet Switch
- 2308 PI7C9X2G308GP 8-lane PCI Express 2.0 Switch with 3 PCI Express ports
+ 2308 PI7C9X2G308GP 3-Ports/8-lane PCIe 2.0 Switch
2404 PI7C9X2G404 EL/SL PCIe2 4-Port/4-Lane Packet Switch
2608 PI7C9X2G608GP PCIe2 6-Port/8-Lane Packet Switch
ea50 cc10 RXi2-BP
- 400a PI7C9X442SL PCI Express Bridge Port
- 400e PI7C9X442SL USB OHCI Controller
- 400f PI7C9X442SL USB EHCI Controller
+ 400a PI7C9X442SL PCIe Bridge Port
+ 400c PI7C9X440SL PCIe Bridge Port
+ 400e PI7C9X440SL/PI7C9X442SL USB OHCI Controller
+ 400f PI7C9X440SL/PI7C9X442SL USB EHCI Controller
71e2 PI7C7300A/PI7C7300D PCI-to-PCI Bridge
71e3 PI7C7300A/PI7C7300D PCI-to-PCI Bridge (Secondary Bus 2)
8140 PI7C8140A PCI-to-PCI Bridge
@@ -17316,9 +17650,12 @@
8152 PI7C8152A/PI7C8152B/PI7C8152BI PCI-to-PCI Bridge
8154 PI7C8154A/PI7C8154B/PI7C8154BI PCI-to-PCI Bridge
8619 PI7C9X2G1616PR PCIe2 16-Port/16-Lane Packet Switch
- e110 PI7C9X110 PCI Express to PCI bridge
+ b404 PI7C9X2G404 EV/SV PCIe2 4-Port/4-Lane Packet Switch
+ e110 PI7C9X110 PCIe- to-PCI bridge
1775 11cc CC11/CL11 CompactPCI Bridge
e111 PI7C9X111SL PCIe-to-PCI Reversible Bridge
+ e112 PI7C9X112SL PCIe-to-PCI Bridge
+ e113 PI7C9X113SL/PI7C9X118SL PCIe-to-PCI Bridge
e130 PCI Express to PCI-XPI7C9X130 PCI-X Bridge
12d9 Aculab PLC
0002 PCI Prosody
@@ -17461,11 +17798,18 @@
0035 PCI-DAS64/M1/16
0036 PCI-DAS64/M2/16
0037 PCI-DAS64/M3/16
+ 004b PCI-MDB64
004c PCI-DAS1000
004d PCI-QUAD04
0052 PCI-DAS4020/12
0053 PCIM-DDA06/16
0054 PCI-DIO96
+ 0055 CPCI-DIO24H
+ 0056 PCIM-DAS1602/16
+ 0057 PCI-DAS3202/16
+ 0059 PCI-QUAD-AC5
+ 005a CPCI-DIO96H
+ 005b CPCI-DIO48H
005d PCI-DAS6023
005e PCI-DAS6025
005f PCI-DAS6030
@@ -17478,10 +17822,23 @@
0066 PCI-DAS6052
0067 PCI-DAS6070
0068 PCI-DAS6071
+ 006e PCI-CTR10
006f PCI-DAS6036
0070 PCI-DAC6702
+ 0071 PCI-DAC6703
+ 0074 PCI-CTR20HD
+ 0077 PCI-DIO24/LP
0078 PCI-DAS6013
0079 PCI-DAS6014
+ 007b PCIM-DAS16JR/16
+ 007e PCI-DIO24/S
+ 00a5 PCI-2511
+ 00a6 PCI-2513
+ 00a7 PCI-2515
+ 00a8 PCI-2517
+ 00be PCI-QUAD05
+ 00da PCIe-DIO96H
+ 00db PCIe-DIO24
0115 PCIe-DAS1602/16
1308 Jato Technologies Inc.
0001 NetCelerator Adapter
@@ -17643,6 +18000,26 @@
1344 4000 3.2TB U.2
1344 5000 6.4 TB U.2
1344 6000 12.8TB U.2
+ 51b7 7500 PRO NVMe SSD
+ 1028 22e7 DC NVMe 7500 U.2 SED RI 15.36TB
+ 1028 22e8 DC NVMe 7500 U.2 SED RI 7.68TB
+ 1028 22e9 DC NVMe 7500 U.2 SED RI 3.84TB
+ 1028 22ea DC NVMe 7500 U.2 SED RI 1.92TB
+ 1028 22eb DC NVMe 7500 U.2 SED RI 960GB
+ 1028 22ec DC NVMe 7500 U.2 ISE RI 15.36TB
+ 1028 22ed DC NVMe 7500 U.2 ISE RI 7.68TB
+ 1028 22ee DC NVMe 7500 U.2 ISE RI 3.84TB
+ 1028 22ef DC NVMe 7500 U.2 ISE RI 1.92TB
+ 1028 22f0 DC NVMe 7500 U.2 ISE RI 960GB
+ 51b8 7500 MAX NVMe SSD
+ 1028 22f1 DC NVMe 7500 U.2 ISE MU 12.8TB
+ 1028 22f2 DC NVMe 7500 U.2 ISE MU 6.4TB
+ 1028 22f3 DC NVMe 7500 U.2 ISE MU 3.2TB
+ 1028 22f4 DC NVMe 7500 U.2 ISE MU 1.6TB
+ 1028 22f5 DC NVMe 7500 U.2 ISE MU 800GB
+ 51b9 6500 ION NVMe SSD
+ 1028 22e6 Ent NVMe 6500 RI 30.72TB
+ 1028 22f6 Ent NVMe 6500 RI FIPS 30.72TB
51c0 7400 PRO NVMe SSD
1028 2162 EC NVMe OPAL 7400 RI M.2 480GB
1028 2163 EC NVMe OPAL 7400 RI M.2 960GB
@@ -17680,7 +18057,38 @@
1344 4000 U.3 3200GB
1344 5000 U.3 6400GB
51c3 7450 PRO NVMe SSD
+ 1028 226b EC NVMe FIPS 7450 RI M.2 110 960GB
+ 1028 226c EC NVMe ISE 7450 RI M.2 80 480GB
+ 1028 226d EC NVMe ISE 7450 RI M.2 80 960GB
+ 1028 226e EC NVMe SED 7450 RI M.2 80 480GB
+ 1028 226f EC NVMe SED 7450 RI M.2 80 960GB
+ 1028 2270 EC NVMe FIPS 7450 RI M.2 80 480GB
+ 1028 2271 EC NVMe FIPS 7450 RI M.2 80 960GB
+ 1028 2273 EC NVMe ISE 7450 RI M.2 110 960GB
+ 1028 2274 EC NVMe ISE 7450 RI M.2 110 1920GB
+ 1028 2275 EC NVMe ISE 7450 RI M.2 110 3840GB
+ 1028 2278 DC NVMe ISE 7450 RI U.2 960GB
+ 1028 2279 DC NVMe ISE 7450 RI U.2 1.92TB
+ 1028 227a DC NVMe ISE 7450 RI U.2 3.84TB
+ 1028 227b DC NVMe ISE 7450 RI U.2 7.68TB
+ 1028 227c DC NVMe ISE 7450 RI U.2 15.36TB
+ 1028 227d DC NVMe SED 7450 RI U.2 960GB
+ 1028 227e DC NVMe SED 7450 RI U.2 1.92TB
+ 1028 227f DC NVMe SED 7450 RI U.2 3.84TB
+ 1028 2280 DC NVMe SED 7450 RI U.2 7.68TB
+ 1028 2281 DC NVMe SED 7450 RI U.2 15.36TB
51c4 7450 MAX NVMe SSD
+ 1028 2272 EC NVMe ISE 7450 MU M.2 80 800GB
+ 1028 228b DC NVMe SED 7450 MU U.2 800GB
+ 1028 228c DC NVMe ISE 7450 MU U.2 800GB
+ 1028 228d DC NVMe SED 7450 MU U.2 1.6TB
+ 1028 228e DC NVMe ISE 7450 MU U.2 1.6TB
+ 1028 228f DC NVMe SED 7450 MU U.2 3.2TB
+ 1028 2290 DC NVMe ISE 7450 MU U.2 3.2TB
+ 1028 2291 DC NVMe SED 7450 MU U.2 6.4TB
+ 1028 2292 DC NVMe ISE 7450 MU U.2 6.4TB
+ 1028 2293 DC NVMe SED 7450 MU U.2 12.8TB
+ 1028 2294 DC NVMe ISE 7450 MU U.2 12.8TB
1344 3000 U.3 1600GB [MTFDKCB1T6TFS/MTFDKCC1T6TFS]
5404 2210 NVMe SSD [Cobain]
5405 2300 NVMe SSD [Santana]
@@ -17689,6 +18097,8 @@
5411 2450 NVMe SSD [HendrixV] (DRAM-less)
5413 2400 NVMe SSD (DRAM-less)
5414 3460 NVMe SSD
+ 5415 3500 NVMe SSD
+ 5416 2550 NVMe SSD (DRAM-less)
6001 2100AI NVMe SSD [Nitro]
1345 Arescom Inc
1347 Odetics
@@ -17727,8 +18137,97 @@
1355 Kratos Analytical Ltd
1356 The Logical Co
1359 Prisa Networks
-135a Brain Boxes
- 0a61 UC-324 [VELOCITY RS422/485]
+135a Brainboxes Ltd
+ 0841 UC-268 4 port RS-232 card
+ 0861 UC-257 2 port RS-232 + LPT card
+ 0862 UC-257 2 port RS-232 + LPT card
+ 0863 UC-257 2 port RS-232 + LPT card
+ 0881 UC-279 8 port RS-232 card
+ 08a1 UC-313 2 port RS-422/485 card
+ 08a2 UC-313 2 port RS-422/485 card
+ 08a3 UC-313 2 port RS-422/485 card
+ 08c1 UC-310 2 port RS-422/485 Opto Isolated card
+ 08e1 UC-302 2 port RS-232 card
+ 08e2 UC-302 2 port RS-232 card
+ 08e3 UC-302 2 port RS-232 card
+ 0901 UC-431 3 port RS-232 card
+ 0921 UC-420 3 + 1 port RS-232 card
+ 0981 UC-475 1 + 1 port RS-232 + LPT card
+ 0982 UC-475 1 + 1 port RS-232 + LPT card
+ 09a1 UC-607 2 port RS-232 card
+ 09a2 UC-607 2 port RS-232 card
+ 09a3 UC-607 2 port RS-232 card
+ 0a61 UC-324 1 port RS-422/485 card
+ 0a81 UC-357 1 port RS-232 + 1 port RS-422/485 card
+ 0a82 UC-357 1 port RS-232 + 1 port RS-422/485 card
+ 0a83 UC-357 1 port RS-232 + 1 port RS-422/485 card
+ 0aa1 UC-246 1 port RS-232 card
+ 0aa2 UC-246 1 port RS-232 card
+ 0ac1 UP-189 Powered 2 port RS-232 card
+ 0ac2 UP-189 Powered 2 port RS-232 card
+ 0ac3 UP-189 Powered 2 port RS-232 card
+ 0b01 UC-346 4 port RS-422/485 card
+ 0b02 UC-346 4 port RS-422/485 card
+ 0b21 UP-200 Powered 2 port RS-232 card
+ 0b22 UP-200 Powered 2 port RS-232 card
+ 0b23 UP-200 Powered 2 port RS-232 card
+ 0ba1 UC-101 1 + 1 port RS-232 card
+ 0bc1 UC-203 1 + 1 port RS-232 + LPT card
+ 0bc2 UC-203 1 + 1 port RS-232 + LPT card
+ 0be1 UC-146 LPT card
+ 0be2 UC-146 LPT card
+ 0c01 UP-869 Powered 2 port RS-232 card
+ 0c02 UP-869 Powered 2 port RS-232 card
+ 0c03 UP-869 Powered 2 port RS-232 card
+ 0c21 UP-880 Powered 2 port RS-232 card
+ 0c22 UP-880 Powered 2 port RS-232 card
+ 0c23 UP-880 Powered 2 port RS-232 card
+ 0c41 UC-368 4 port RS-422/485 Opto Isolated card
+ 0ca1 UC-253 2 port RS-232 card
+ 0d21 UC-260 4 port RS-232 card
+ 0d41 UC-836 4 port RS-232 card
+ 0d60 IS-100 1 port RS-232 card
+ 0d80 IS-200 2 port RS-232 card
+ 0da0 IS-300 1 port RS-232 + LPT card
+ 0dc0 IS-400 4 port RS-232 card
+ 0de0 IS-500 LPT card
+ 0e41 PX-279 8 port RS-232 card
+ 0e61 UC-414 3 + 1 port RS-232 + LPT card
+ 4000 PX-420 3 + 1 port RS-232 card
+ 4001 PX-431 3 port RS-232 card
+ 4002 PX-820 Powered 3 + 1 port RS-232 card
+ 4003 PX-831 Powered 3 port RS-232 card
+ 4004 PX-235 1 port RS-232 card
+ 4005 PX-101 1 + 1 port RS-232 card
+ 4006 PX-257 1 + 1 port RS-232 + LPT card (Serial port)
+ 4007 PX-257 1 + 1 port RS-232 + LPT card (LPT port)
+ 4008 PX-835 Powered 1 port RS-232 card
+ 4009 PX-857 Powered 2 port RS-232 card
+ 400a PX-260 4 port RS-232 card
+ 400b PX-320 1 port RS-422/485 card
+ 400c PX-313 2 port RS-422/485 card
+ 400e PX-310 2 port RS-422/485 Opto Isolated card
+ 400f PX-346 4 port RS-422/485 card
+ 4010 PX-368 4 port RS-422/485 Opto Isolated card
+ 4011 PX-420 3 + 1 port RS-232 card
+ 4012 PX-431 3 port RS-232 card
+ 4013 PX-820 Powered 3 + 1 port RS-232 card
+ 4014 PX-831 Powered 3 port RS-232 card
+ 4015 PX-257 2 port RS-232 card
+ 4016 PX-235 1 port RS-232 card
+ 4017 PX-835 Powered 1 port RS-232 card
+ 4018 PX-857 Powered 2 port RS-232 card
+ 4019 PX-101 1 + 1 port RS-232 card
+ 401c PX-146 LPT card
+ 401d PX-475 1 port RS-232 + LPT card (Serial port)
+ 401e PX-803 Powered 1 + 1 port RS-232 card
+ 401f PX-475 1 port RS-232 + LPT card (LPT port)
+ 4027 IX-100 1 port RS-232 card
+ 4028 IX-200 2 port RS-232 card
+ 4029 IX-400 4 port RS-232 card
+ 402a IX-500 LPT card
+ 402c PX-263 4 port RS-232 + LPT card
+ 4100 PX-272 4 + 1 port RS-232 + LPT card
135b Giganet Inc
135c Quatech Inc
0010 QSC-100
@@ -17738,12 +18237,19 @@
0050 ESC-100D
0060 ESC-100M
00f0 MPAC-100 Synchronous Serial Card (Zilog 85230)
+ 0120 QSCP-100
+ 0130 DSCP-100
+ 0140 QSCP-200/300
+ 0150 DSCP-200/300
0170 QSCLP-100
0180 DSCLP-100
+ 0181 DSC-100
0190 SSCLP-100
01a0 QSCLP-200/300
01b0 DSCLP-200/300
+ 01b1 DSC-200/300
01c0 SSCLP-200/300
+ 01e0 ESC(LP)-100
0258 DSPSX-200/300
135d ABB Network Partner AB
135e Sealevel Systems Inc
@@ -18233,6 +18739,7 @@
13fc Computer Peripherals International
13fd Micro Science Inc
13fe Advantech Co. Ltd
+ 0071 PCIE-1761H, 8-ch Relay and 8-ch Isolated Digital Input Card
1240 PCI-1240 4-channel stepper motor controller card
1600 PCI-16xx series PCI multiport serial board (function 0)
# This board has two PCI functions, appears as two PCI devices
@@ -18448,6 +18955,9 @@
580b Secure Flash Controller (Xenon)
580d System Management Controller (Xenon)
5811 Xenos GPU (Xenon)
+ 5821 Xenos GPU (Zephyr/Falcon)
+ 5831 Xenos GPU (Jasper)
+ 5841 Xenos GPU (Slim)
1415 Oxford Semiconductor Ltd
8401 OX9162 Mode 1 (8-bit bus)
8403 OX9162 Mode 0 (parallel port)
@@ -19187,14 +19697,14 @@
6002 T6225-SO-CR Unified Wire Ethernet Controller
6003 T6425-CR Unified Wire Ethernet Controller
6004 T6425-SO-CR Unified Wire Ethernet Controller
- 6005 T6225-OCP-SO Unified Wire Ethernet Controller
- 6006 T62100-OCP-SO Unified Wire Ethernet Controller
+ 6005 T6225-SO-OCP3 Unified Wire Ethernet Controller
+ 6006 T6225-OCP3 Unified Wire Ethernet Controller
6007 T62100-LP-CR Unified Wire Ethernet Controller
6008 T62100-SO-CR Unified Wire Ethernet Controller
6009 T6210-BT Unified Wire Ethernet Controller
600d T62100-CR Unified Wire Ethernet Controller
6011 T6225-LL-CR Unified Wire Ethernet Controller
- 6014 T61100-OCP-SO Unified Wire Ethernet Controller
+ 6014 T62100-SO-OCP3 Unified Wire Ethernet Controller
6015 T6201-BT Unified Wire Ethernet Controller
6080 T6225-6080 Unified Wire Ethernet Controller
6081 T62100-6081 Unified Wire Ethernet Controller
@@ -19213,14 +19723,14 @@
6402 T6225-SO-CR Unified Wire Ethernet Controller
6403 T6425-CR Unified Wire Ethernet Controller
6404 T6425-SO-CR Unified Wire Ethernet Controller
- 6405 T6225-OCP-SO Unified Wire Ethernet Controller
- 6406 T62100-OCP-SO Unified Wire Ethernet Controller
+ 6405 T6225-SO-OCP3 Unified Wire Ethernet Controller
+ 6406 T6225-OCP3 Unified Wire Ethernet Controller
6407 T62100-LP-CR Unified Wire Ethernet Controller
6408 T62100-SO-CR Unified Wire Ethernet Controller
6409 T6210-BT Unified Wire Ethernet Controller
640d T62100-CR Unified Wire Ethernet Controller
6411 T6225-LL-CR Unified Wire Ethernet Controller
- 6414 T61100-OCP-SO Unified Wire Ethernet Controller
+ 6414 T62100-SO-OCP3 Unified Wire Ethernet Controller
6415 T6201-BT Unified Wire Ethernet Controller
6480 T6225-6080 Unified Wire Ethernet Controller
6481 T62100-6081 Unified Wire Ethernet Controller
@@ -19239,14 +19749,14 @@
6502 T6225-SO-CR Unified Wire Storage Controller
6503 T6425-CR Unified Wire Storage Controller
6504 T6425-SO-CR Unified Wire Storage Controller
- 6505 T6225-OCP-SO Unified Wire Storage Controller
- 6506 T62100-OCP-SO Unified Wire Storage Controller
+ 6505 T6225-SO-OCP3 Unified Wire Storage Controller
+ 6506 T6225-OCP3 Unified Wire Storage Controller
6507 T62100-LP-CR Unified Wire Storage Controller
6508 T62100-SO-CR Unified Wire Storage Controller
6509 T6210-BT Unified Wire Storage Controller
650d T62100-CR Unified Wire Storage Controller
6511 T6225-LL-CR Unified Wire Storage Controller
- 6514 T61100-OCP-SO Unified Wire Storage Controller
+ 6514 T62100-SO-OCP3 Unified Wire Storage Controller
6515 T6201-BT Unified Wire Storage Controller
6580 T6225-6080 Unified Wire Storage Controller
6581 T62100-6081 Unified Wire Storage Controller
@@ -19264,14 +19774,14 @@
6602 T6225-SO-CR Unified Wire Storage Controller
6603 T6425-CR Unified Wire Storage Controller
6604 T6425-SO-CR Unified Wire Storage Controller
- 6605 T6225-OCP-SO Unified Wire Storage Controller
- 6606 T62100-OCP-SO Unified Wire Storage Controller
+ 6605 T6225-SO-OCP3 Unified Wire Storage Controller
+ 6606 T6225-OCP3 Unified Wire Storage Controller
6607 T62100-LP-CR Unified Wire Storage Controller
6608 T62100-SO-CR Unified Wire Storage Controller
6609 T6210-BT Unified Wire Storage Controller
660d T62100-CR Unified Wire Storage Controller
6611 T6225-LL-CR Unified Wire Storage Controller
- 6614 T61100-OCP-SO Unified Wire Storage Controller
+ 6614 T62100-SO-OCP3 Unified Wire Storage Controller
6615 T6201-BT Unified Wire Storage Controller
6680 T6225-6080 Unified Wire Storage Controller
6681 T62100-6081 Unified Wire Storage Controller
@@ -19289,14 +19799,14 @@
6802 T6225-SO-CR Unified Wire Ethernet Controller [VF]
6803 T6425-CR Unified Wire Ethernet Controller [VF]
6804 T6425-SO-CR Unified Wire Ethernet Controller [VF]
- 6805 T6225-OCP-SO Unified Wire Ethernet Controller [VF]
- 6806 T62100-OCP-SO Unified Wire Ethernet Controller [VF]
+ 6805 T6225-SO-OCP3 Unified Wire Ethernet Controller [VF]
+ 6806 T6225-OCP3 Unified Wire Ethernet Controller [VF]
6807 T62100-LP-CR Unified Wire Ethernet Controller [VF]
6808 T62100-SO-CR Unified Wire Ethernet Controller [VF]
6809 T6210-BT Unified Wire Ethernet Controller [VF]
680d T62100-CR Unified Wire Ethernet Controller [VF]
6811 T6225-LL-CR Unified Wire Ethernet Controller [VF]
- 6814 T61100-OCP-SO Unified Wire Ethernet Controller [VF]
+ 6814 T62100-SO-OCP3 Unified Wire Ethernet Controller [VF]
6815 T6201-BT Unified Wire Ethernet Controller [VF]
6880 T6225-6080 Unified Wire Ethernet Controller [VF]
6881 T62100-6081 Unified Wire Ethernet Controller [VF]
@@ -19391,9 +19901,10 @@
144d a801 SM963 2.5" NVMe PCIe SSD
a806 NVMe SSD SM0032L
a808 NVMe SSD Controller SM981/PM981/PM983
- 144d a801 SSD 970 EVO
+# Used by different variants of SSD 970 EVO and PRO
+ 144d a801 SSD 970 EVO/PRO
1d49 403b Thinksystem U.2 PM983 NVMe SSD
- a809 NVMe SSD Controller 980
+ a809 NVMe SSD Controller 980 (DRAM-less)
a80a NVMe SSD Controller PM9A1/PM9A3/980PRO
0128 215a DC NVMe PM9A3 RI U.2 960GB
0128 215b DC NVMe PM9A3 RI U.2 1.92TB
@@ -19413,9 +19924,12 @@
1028 2276 DC NVMe PM9A3 RI 110M.2 960GB
1028 2277 DC NVMe PM9A3 RI 110M.2 1.92TB
1028 512d DC NVMe PM9A3 RI U.2 7.68TB
+ 144d a801 SSD 980 PRO
144d a813 General DC NVMe PM9A3
- a80b NVMe SSD Controller PM9B1
+# Actually 88SS1322 according to techpowerup
+ a80b NVMe SSD Controller PM9B1 (DRAM-less)
a80c NVMe SSD Controller S4LV008[Pascal]
+ a80d NVMe SSD Controller PM9C1a (DRAM-less)
a820 NVMe SSD Controller 171X
1028 1f95 Express Flash NVMe XS1715 SSD 400GB
1028 1f96 Express Flash NVMe XS1715 SSD 800GB
@@ -19538,6 +20052,7 @@
1028 225d NVMe PM1745 MU U.2 6.4TB
1028 225e NVMe FIPS PM1745 MU U.2 12.8TB
1028 225f NVMe PM1745 MU U.2 12.8TB
+ a900 NVMe SSD Controller PM9DXa
ecec Exynos 8895 PCIe Root Complex
144e OLITEC
144f Askey Computer Corp.
@@ -19658,9 +20173,18 @@
14a2 Millennium Engineering Inc
14a3 Maverick Networks
14a4 Lite-On Technology Corporation
+ 2100 CA1-8D128 NVMe SSD
+ 2200 CX2-8B256, CX2-8B512 NVMe SSD
+ 22a0 EP2-KB960 NVMe SSD
22f1 M8Pe Series NVMe SSD
+ 2300 CA3-8D256, CA3-8D512 NVMe SSD
+ 23f1 M9PeG, M9PeGN, M9PeY NVMe SSD
+ 2f00 CAZ-82512 NVMe SSD
+ 3500 CA5-8D512 NVMe SSD
# Wrong vendor ID used
4318 Broadcom BCM4318 [AirForce One 54g] 802.11g WLAN Controller
+ 5100 CB1-SD256, CB1-SD512 NVMe SSD
+ 9100 CL1-3D256, CL1-8D512 NVMe SSD (DRAM-less)
14a5 XIONICS Document Technologies Inc
14a6 INOVA Computers GmBH & Co KG
14a7 MYTHOS Systems Inc
@@ -19726,16 +20250,20 @@
14c3 MEDIATEK Corp.
0608 MT7921K (RZ608) Wi-Fi 6E 80MHz
0616 MT7922 802.11ax PCI Express Wireless Network Adapter
+ 7603 MT7603E 802.11bgn PCI Express Wireless Network Adapter
7612 MT7612E 802.11acbgn PCI Express Wireless Network Adapter
7615 MT7615E 802.11ac PCI Express Wireless Network Adapter
7630 MT7630e 802.11bgn Wireless Network Adapter
+ 7650 MT7650 802.11ac
# MT7612E too?
7662 MT7662E 802.11ac PCI Express Wireless Network Adapter
7915 MT7915E 802.11ax PCI Express Wireless Network Adapter
7916 MT7905D/MT7975
# WiFi 6E capable
7922 MT7922 802.11ax PCI Express Wireless Network Adapter
+ 1a3b 5300 ASUS PCE-AXE59BT
7961 MT7921 802.11ax PCI Express Wireless Network Adapter
+ 8650 MT7650 Bluetooth
14c4 IWASAKI Information Systems Co Ltd
14c5 Automation Products AB
14c6 Data Race Inc
@@ -19980,9 +20508,11 @@
103c 3383 Ethernet 1Gb 4-port 331T Adapter
14e4 1904 4-port 1Gb Ethernet Adapter
14e4 1909 Broadcom NetXtreme 5719 Quad Port Gigabit NIC
- 14e4 d146 BCM95719-P41 4x1GBT Ethernet NIC
- 14e4 d346 BCM95719-N41 4x1GBT Ethernet NIC
+ 14e4 d166 BCM95719-P41 4x1GBT Ethernet NIC
+ 14e4 d366 BCM95719-N41 4x1GBT Ethernet NIC
193d 1025 NIC-ETH330T-LP-4P
+# NIC-ETH330T-3S-4P 4xGE 1000Base-T for OCP3.0
+ 193d 1086 NIC-ETH330T-3S-4P
1659 NetXtreme BCM5721 Gigabit Ethernet PCI Express
1014 02c6 eServer xSeries server mainboard
1028 01e6 PowerEdge 860
@@ -20110,6 +20640,7 @@
193d 1003 530F-B
193d 1006 530F-L
193d 100f NIC-ETH522i-Mb-2x10G
+ 4c52 9812 LREC9812AF Dual-port 10Gb Ethernet Server Adapter
1690 NetXtreme BCM57760 Gigabit Ethernet PCIe
1691 NetLink BCM57788 Gigabit Ethernet PCIe
1028 04aa XPS 8300
@@ -20137,6 +20668,7 @@
16a1 BCM57840 NetXtreme II 10 Gigabit Ethernet
1043 866e PEB-10G/57840-2T 10GBase-T Network Adapter
193d 100b NIC-ETH521i-Mb-4x10G
+ 4c52 9814 LREC9814AF Quad-port 10Gb Ethernet Server Adapter
16a2 BCM57840 NetXtreme II 10/20-Gigabit Ethernet
103c 1916 FlexFabric 20Gb 2-port 630FLB Adapter
103c 1917 FlexFabric 20Gb 2-port 630M Adapter
@@ -20264,6 +20796,8 @@
152d 8b22 BCM57412 NetXtreme-E 25Gb RDMA Ethernet Controller
# NIC-ETH531F-LP-2P BCM57412 2 x 10G SFP+ Ethernet PCIe Card
193d 1024 NIC-ETH531F-LP-2P
+# NIC-ETH531F-3S-2P 2x10GbE SFP+ Adapter for OCP3.0
+ 193d 1087 NIC-ETH531F-3S-2P
16d7 BCM57414 NetXtreme-E 10Gb/25Gb RDMA Ethernet Controller
117c 00cc FastFrame N422 Dual-port 25Gb Ethernet Adapter
14e4 1402 BCM957414A4142CC 10Gb/25Gb Ethernet PCIe
@@ -20353,7 +20887,10 @@
14e4 5250 NetXtreme-E BCM57504 4x25G KR Mezz
14e4 5425 NetXtreme-E Quad-port 25G SFP28 Ethernet OCP 3.0 Adapter (BCM957504-N425G)
14e4 d142 NetXtreme-E P425D BCM57504 4x25G SFP28 PCIE
+ 1590 0420 HPE Ethernet 25/50Gb 2-port 6310C Adapter
1752 BCM57502 NetXtreme-E 10Gb/25Gb/40Gb/50Gb Ethernet
+ 1760 BCM57608 10Gb/25Gb/50Gb/100Gb/200Gb/400Gb Ethernet
+ 14e4 d125 BCM57608 2x200G PCIe Ethernet NIC
1800 BCM57502 NetXtreme-E Ethernet Partition
1801 BCM57504 NetXtreme-E Ethernet Partition
1802 BCM57508 NetXtreme-E Ethernet Partition
@@ -20371,6 +20908,7 @@
1809 BCM5750X NetXtreme-E RDMA Virtual Function
14e4 df24 BCM57508 NetXtreme-E NGM2100D 2x100G KR Mezz RDMA Virtual Function
2711 BCM2711 PCIe Bridge
+ 2712 BCM2712 PCIe Bridge
3352 BCM3352
3360 BCM3360
4210 BCM4210 iLine10 HomePNA 2.0
@@ -20542,16 +21080,16 @@
4360 BCM4360 802.11ac Wireless Network Adapter
4365 BCM43142 802.11b/g/n
1028 0016 Wireless 1704 802.11n + BT 4.0
- 43a0 BCM4360 802.11ac Wireless Network Adapter
- 43a1 BCM4360 802.11ac Wireless Network Adapter
- 43a2 BCM4360 802.11ac Wireless Network Adapter
+ 43a0 BCM4360 802.11ac Dual Band Wireless Network Adapter
+ 43a1 BCM4360 802.11ac 2,4G Wireless Network Adapter
+ 43a2 BCM4360 802.11ac 5G Wireless Network Adapter
43a3 BCM4350 802.11ac Wireless Network Adapter
# Manufactured by Foxconn for Lenovo
17aa 075a 00JT494
43a9 BCM43217 802.11b/g/n
43aa BCM43131 802.11b/g/n
43ae BCM43162 802.11ac Wireless Network Adapter
- 43b1 BCM4352 802.11ac Wireless Network Adapter
+ 43b1 BCM4352 802.11ac Dual Band Wireless Network Adapter
1043 85ba PCE-AC56 Dual-Band Wireless PCI-E Adapter
43ba BCM43602 802.11ac Wireless LAN SoC
43bb BCM43602 802.11ac Wireless LAN SoC
@@ -20577,12 +21115,14 @@
441f BCM4361 802.11ac Dual-Band Wireless Network Controller
4420 BCM4361 802.11ac 2.4 GHz Wireless Network Controller
4421 BCM4361 802.11ac 5 GHz Wireless Network Controller
- 4425 BRCM4378 Wireless Network Adapter
+ 4425 BCM4378 802.11ax Dual Band Wireless Network Adapter
4430 BCM44xx CardBus iLine32 HomePNA 2.0
4432 BCM4432 CardBus 10/100BaseT
+ 4433 BCM4387 802.11ax Dual Band Wireless LAN Controller
4464 BCM4364 802.11ac Wireless Network Adapter
# brcmfmac reports it as BCM4377/4 but macOS drivers call it BCM4377b
4488 BCM4377b Wireless Network Adapter
+ 449d BCM43752 802.11ax Dual Band Wireless LAN Controller
4610 BCM4610 Sentry5 PCI to SB Bridge
4611 BCM4610 Sentry5 iLine32 HomePNA 1.0
4612 BCM4610 Sentry5 V.90 56k Modem
@@ -20900,6 +21440,10 @@
17de 08a6 KWorld/VStream XPert DVB-T
17de 08b2 KWorld DVB-S 100
17de a8a6 digitalnow DNTV Live! DVB-T
+ 1805 0111 PICOLO Jet-X Video
+ 1805 0112 PICOLO Jet-X Video
+ 1805 0113 PICOLO Jet-X Video
+ 1805 0114 PICOLO Jet-X Video
1822 0025 digitalnow DNTV Live! DVB-T Pro
185b e000 VideoMate X500
18ac d500 FusionHDTV 5 Gold
@@ -20928,6 +21472,10 @@
14f1 0187 Conexant DVB-T reference design
17de 08a1 XPert DVB-T PCI BDA DVBT 23880 Transport Stream Capture
17de 08a6 KWorld/VStream XPert DVB-T
+ 1805 0111 PICOLO Jet-X Jpeg
+ 1805 0112 PICOLO Jet-X Jpeg
+ 1805 0113 PICOLO Jet-X Jpeg
+ 1805 0114 PICOLO Jet-X Jpeg
18ac d500 DViCO FusionHDTV5 Gold
18ac d810 DViCO FusionHDTV3 Gold-Q
18ac d820 DViCO FusionHDTV3 Gold-T
@@ -20940,6 +21488,10 @@
0070 6902 WinTV HVR-4000-HD
0070 9002 Nova-T DVB-T Model 909
0070 9402 WinTV-HVR1100 DVB-T/Hybrid
+ 1805 0111 PICOLO Jet-X Control
+ 1805 0112 PICOLO Jet-X Control
+ 1805 0113 PICOLO Jet-X Control
+ 1805 0114 PICOLO Jet-X Control
7063 5500 pcHDTV HD-5500
8811 CX23880/1/2/3 PCI Video and Audio Decoder [Audio Port]
0070 3400 WinTV 34604
@@ -21165,6 +21717,7 @@
9290 FPGA Card
9300 Universal Exhaust Gas Oxygen Sensor Simulator
9310 Digital Programmable Resistor
+ 9320 Arria 10 FPGA Card
9350 Analog Input Card
1543 SILICON Laboratories
3052 Intel 537 [Winmodem]
@@ -21200,6 +21753,7 @@
be00 PCI Express Bridge
1557 MEDIASTAR Co Ltd
1558 CLEVO/KAPOK Computer
+ 9602 RS780/RS880 PCI to PCI bridge (int gfx)
1559 SI LOGIC Ltd
155a INNOMEDIA Inc
155b PROTAC INTERNATIONAL Corp
@@ -21283,6 +21837,8 @@
0001 Eagle Cluster Manager
0002 Osprey Cluster Manager
0003 Harrier Cluster Manager
+ 0371 Cassini 2 [Slingshot 400Gb]
+ 0372 Cassini 2 [Slingshot 400Gb] SR-IOV VF
a01d FC044X Fibre Channel HBA
1591 ARN
1592 Syba Tech Ltd
@@ -21373,6 +21929,10 @@
021f CX8 Family [ConnectX-8 Secure Flash Recovery]
0220 BF4 Family Flash Recovery [BlueField-4 SoC Flash Recovery]
0221 BF4 Family Secure Flash Recovery [BlueField-4 Secure Flash Recovery]
+ 0222 CX8 PCIe Switch Family [ConnectX-8 PCIe Switch Flash Recovery]
+ 0223 CX8 PCIe Switch Family [ConnectX-8 PCIe Switch Secure Flash Recovery-RMA]
+ 0224 CX9 Family [ConnectX-9 Flash Recovery]
+ 0225 CX9 Family [ConnectX-9 Secure Flash Recovery-RMA]
024e MT53100 [Spectrum-2, Flash recovery mode]
024f MT53100 [Spectrum-2, Secure Flash recovery mode]
0250 Spectrum-3, Flash recovery mode
@@ -21392,16 +21952,23 @@
0262 MT27710 [ConnectX-4 Lx Programmable] EN
0263 MT27710 [ConnectX-4 Lx Programmable Virtual Function] EN
0264 Innova-2 Flex Burn image
- 0270 Spectrum-4L, Flash recovery mode
+ 0270 Spectrum-5 in Flash Recovery Mode
0271 Spectrum-4L, RMA
- 0274 Spectrum-4C, Flash recovery mode
+ 0274 Spectrum-6 in Flash Recovery Mode
0275 Spectrum-4C RMA
0277 Spectrum-4TOR RMA
+ 0278 Quantum-4 in Flash Recovery Mode
+ 0279 Quantum-4 RMA
0281 NPS-600 Flash Recovery
0282 ArcusE Flash recovery
0283 ArcusE RMA
0284 Sagitta
0285 Sagitta RMA
+ 0286 LibraE Flash Recovery
+ 0287 LibraE RMA
+# Flash recovery
+ 0288 Arcus2
+ 0289 Arcus2 RMA
1002 MT25400 Family [ConnectX-2 Virtual Function]
1003 MT27500 Family [ConnectX-3]
1014 04b5 PCIe3 40GbE RoCE Converged Host Bus Adapter for Power
@@ -21502,6 +22069,8 @@
1020 MT28860
1021 MT2910 Family [ConnectX-7]
1023 CX8 Family [ConnectX-8]
+ 1024 CX8 PCIe Switch Family [ConnectX-8 PCIe Switch]
+ 1025 CX9 Family [ConnectX-9]
1974 MT28800 Family [ConnectX-5 PCIe Bridge]
1975 MT416842 Family [BlueField SoC PCIe Bridge]
1976 MT28908 Family [ConnectX-6 PCIe Bridge]
@@ -21511,11 +22080,14 @@
197a MT43162 Family [BlueField-3 Lx SoC PCIe Bridge]
197b MT43244 Family [BlueField-3 SoC PCIe Bridge]
197c ConnectX/BlueField Family mlx5Gen PCIe Bridge [PCIe Bridge]
+ 197d CX8 Family [ConnectX-8 PCIe Bridge]
+ 197e CX9 Family [ConnectX-9 PCIe Bridge]
2020 MT2892 Family [ConnectX-6 Dx Emulated PCIe Bridge]
2021 MT42822 Family [BlueField-2 SoC Emulated PCIe Bridge]
2023 MT2910 Family [ConnectX-7 Emulated PCIe Bridge]
2024 MT43244 Family [BlueField-3 SoC Emulated PCIe Bridge]
2025 ConnectX/BlueField Family mlx5Gen Emulated PCIe Bridge [Emulated PCIe Bridge]
+ 2100 CX8 Family [CX8 Data Direct Interface]
4117 MT27712A0-FDCF-AE
1bd4 0039 SN10XMP2P25
1bd4 003a 25G SFP28 SP EO251FM9 Adapter
@@ -21580,6 +22152,8 @@
a2de BF4 Family Crypto disabled [BlueField-4 SoC Crypto disabled]
a2df BF4 Family integrated network controller [BlueField-4 integrated network controller]
b200 ArcusE
+ b201 LibraE
+ b202 Arcus2
c2d1 BlueField DPU Family Auxiliary Communication Channel [BlueField Family]
c2d2 MT416842 BlueField SoC management interfac
c2d3 MT42822 BlueField-2 SoC Management Interface
@@ -21604,6 +22178,7 @@
d2f2 Quantum-2 NDR (400Gbps) switch
d2f4 Quantum-3
d2f6 Quantum-3CPO
+ d2f8 Quantum-4
15b4 CCI/TRIAD
15b5 Cimetrics Inc
15b6 Texas Memory Systems Inc
@@ -21627,10 +22202,10 @@
2001 Skyhawk Series NVME SSD
5001 WD Black NVMe SSD
5002 SanDisk Extreme Pro / WD Black 2018/SN750/PC SN720 NVMe SSD
- 5003 WD Blue SN500 / PC SN520 NVMe SSD
- 5004 PC SN520 NVMe SSD
- 5005 PC SN520 NVMe SSD
- 5006 WD Black SN750 / PC SN730 / Red SN700 NVMe SSD
+ 5003 WD Blue SN500 / PC SN520 x2 M.2 2280 NVMe SSD
+ 5004 PC SN520 x2 M.2 2230 NVMe SSD
+ 5005 PC SN520 x2 M.2 2242 NVMe SSD
+ 5006 SanDisk Extreme Pro / WD Black SN750 / PC SN730 / Red SN700 NVMe SSD
5007 IX SN530 NVMe SSD (DRAM-less)
5008 PC SN530 NVMe SSD (DRAM-less)
5009 SanDisk Ultra 3D / WD Blue SN550 NVMe SSD
@@ -21639,19 +22214,23 @@
1414 500b Xbox Series X
500d WD Ultrastar DC SN340 NVMe SSD
5011 WD PC SN810 / Black SN850 NVMe SSD
- 5014 WD Green SN350 NVMe SSD 1 TB (DRAM-less)
+ 5014 WD PC SN540 / Green SN350 NVMe SSD 1 TB (DRAM-less)
5015 PC SN740 NVMe SSD (DRAM-less)
5016 WD PC SN740 NVMe SSD 512GB (DRAM-less)
5017 WD Black SN770 / PC SN740 256GB / PC SN560 (DRAM-less) NVMe SSD
- 5019 WD Green SN350 NVMe SSD 240GB (DRAM-less)
- 501a WD Blue SN570 NVMe SSD
+ 5019 WD Green SN350 240GB (DRAM-less) / SN560E NVMe SSD
+ 501a SanDisk Ultra 3D / WD Blue SN570 NVMe SSD (DRAM-less)
501d WD Blue SN550 NVMe SSD 2TB (DRAM-less)
- 501e PC SN735 NVMe SSD (DRAM-less)
+ 501e PC SN735 / WD_BLACK SN750 SE NVMe SSD (DRAM-less)
501f WD PC SN735 NVMe SSD 512GB (DRAM-less)
5025 WD Blue SN570 NVMe SSD 2TB
5026 WD PC SN735 NVMe SSD 1TB (DRAM-less)
+ 5028 WD CH SN560 NVMe SSD
5030 WD Black SN850X NVMe SSD
+ 5034 WD PC SN5000S M.2 2230 NVMe SSD (DRAM-less)
+ 5036 WD PC SN5000S M.2 2280 NVMe SSD (DRAM-less)
5041 WD Blue SN580 NVMe SSD (DRAM-less)
+ 5042 WD Black SN770M NVMe SSD (DRAM-less)
15b8 ADDI-DATA GmbH
1001 APCI1516 SP controller (16 digi outputs)
1003 APCI1032 SP controller (32 digi inputs w/ opto coupler)
@@ -21673,7 +22252,15 @@
117c 0022 Celerity FC-42XS Fibre Channel Adapter
117c 0025 Celerity FC-44ES Fibre Channel Adapter
117c 0026 Celerity FC-42ES Fibre Channel Adapter
+ 0500 Infiniium Memory Controller Interface
+ 0501 Infiniium Acquisition System Interface
+ 0507 Infiniium Acquisition System (80000 series)
+ 0508 Infiniium Acquisition Support
+ 0b01 82350B PCI GPIB
1100 E8001-66442 PCI Express CIC
+ 1218 82351A PCI Express GPIB
+ 12d6 82350C PCI GPIB
+ 12d7 82351B PCI Express GPIB
2922 64 Bit, 133MHz PCI-X Exerciser & Protocol Checker
2928 64 Bit, 66MHz PCI Exerciser & Analyzer
2929 64 Bit, 133MHz PCI-X Analyzer & Exerciser
@@ -21709,7 +22296,6 @@
15d8 Cybernetics Technology Co Ltd
15d9 Super Micro Computer Inc
1b64 SCC-B8SB80-B1
- 1b67 AOC-S3916L-H16iR-32DD
1b9d Supermicro AOC-S3816L-L16IR
1c6e Supermicro AOC-SLG4-2H8M2
15da Cyberfirm Inc
@@ -21927,6 +22513,11 @@
165f Linux Media Labs, LLC
1020 LMLM4 MPEG-4 encoder
1661 Worldspace Corp.
+1665 EDAX Inc
+# P/N 4035.006.19720
+ 1973 DPP-II FR2 Board
+# P/N 4035.065.20000
+ 2000 SG-IIP Board
1668 Actiontec Electronics Inc
0100 Mini-PCI bridge
# Formerly SiByte, Inc.
@@ -22363,6 +22954,9 @@
7054 APA7-504 Reconfigurable Artix-7 52,160 Cell FPGA module 24 LVDS channels
7072 AP731 Multi-function I/O Module with 12-bit DAC
7073 AP730 Multi-function I/O Module 16 Digital I/O 8 Differential Analog In 4 Analog Out
+ 7731 APZU-301 Zynq Ultrascale+ Module 28 TTL channels
+ 7733 APZU-303 Zynq Ultrascale+ Module 20 TTL & 3 RS485/422 channels
+ 7734 APZU-304 Zynq Ultrascale+ Module 14 LVDS channels
16da Advantech Co., Ltd.
0011 INES GPIB-PCI
16df PIKA Technologies Inc.
@@ -22463,6 +23057,7 @@
13c8 AEP SureWare Runner 1000V3
# nee Fujitsu Siemens Computers GmbH
1734 Fujitsu Technology Solutions
+ 9602 RS780/RS880 PCI to PCI bridge (int gfx)
1735 Aten International Co. Ltd.
1737 Linksys
0029 WPG54G ver. 4 PCI Card
@@ -22485,6 +23080,7 @@
1745 ViXS Systems, Inc.
2020 XCode II Series
2100 XCode 2100 Series
+ 1043 48c9 My Cinema PE6200 Analoog
1749 RLX Technologies
174b PC Partner Limited / Sapphire Technology
174d WellX Telecom SA
@@ -22741,6 +23337,7 @@
0006 LENSE30512GMSP34MEAT3TA
3181 ThinkCentre M75n IoT
402b Intel 82599ES 10Gb 2-port Server Adapter X520-2
+ 9602 RS780/RS880 PCI to PCI bridge (int gfx)
17ab Phillips Components
17af Hightech Information System Ltd.
17b3 Hawking Technologies
@@ -22762,23 +23359,34 @@
0002 AGN300 802.11 a/b/g True MIMO Wireless Card
1385 6d00 WPNT511 RangeMax 240 Mbps Wireless CardBus Adapter
1737 0054 WPC54GX4 v1 802.11g Wireless-G Notebook Adapter with SRX400
+ 0104 APQ8096 PCIe Root Complex [Snapdragon 820]
0105 MSM8998 PCIe Root Complex
+ 0106 SDM850 PCIe Root Complex [Snapdragon 850]
+ 0107 SDM850 PCIe Root Port [Snapdragon 850]
0108 SM8150 PCIe Root Complex
0109 SA8195P PCIe Root Complex
+ 010b SM8250 PCIe Root Complex [Snapdragon 865/870 5G]
+ 010c SM8350 PCIe Root Complex [Snapdragon 888]
010e SC8280XP PCI Express Root Port
+ 0110 SM8475 PCIe Root Complex [Snapdragon 8+ Gen 1]
0300 MDM9x35 LTE Modem [Snapdragon X7]
0301 MDM9x45 LTE Modem [Snapdragon X12]
0302 MDM9x55 LTE Modem [Snapdragon X16]
+ 0304 SDX24 [Snapdragon X24 4G]
+ 0306 SDX55 [Snapdragon X55 5G]
0400 Datacenter Technologies QDF2432 PCI Express Root Port
0401 Datacenter Technologies QDF2400 PCI Express Root Port
1000 QCS405 PCIe Root Complex
1101 QCA6390 Wireless Network Adapter
1103 QCNFA765 Wireless Network Adapter
1104 QCN6024/9024/9074 Wireless Network Adapter
- 1108 IPQ95xx/97xx PCI Express Root Port
+ 1107 WCN785x Wi-Fi 7(802.11be) 320MHz 2x2 [FastConnect 7800]
+ 105b e0f7 High Band Simultaneous Wireless Network Adapter
+ 1108 IPQ95xx/97xx PCIe Root Port
1109 QCN62xx/92xx Wireless Network Adapter
17cc NetChip Technology, Inc
- 2280 USB 2.0
+ 2280 NET2280 PCI to USB 2.0 Hi-Speed Peripheral Controller
+ 2282 NET2282 PCI to USB 2.0 Hi-Speed Peripheral Controller
17cd Cadence Design Systems, Inc.
17cf Z-Com, Inc.
17d3 Areca Technology Corp.
@@ -22861,6 +23469,7 @@
17d5 7831 X3120 Dual Port 10GBase-CR
17db Cray Inc
0101 XT Series [Seastar] 3D Toroidal Router
+ 0501 Cassini 1 [Slingshot 200Gb]
17de KWorld Computer Co. Ltd.
17df Dini Group
1864 Virtex4 PCI Board w/ QL5064 Bridge [DN7000K10PCI/DN8000K10PCI/DN8000K10PSX/NOTUS]
@@ -22978,6 +23587,35 @@
1804 Ralink corp. (wrong ID)
3060 RT3060 Wireless 802.11n 1T/1R
1805 Euresys S.A.
+ 0201 PICOLO Alert PCI
+ 0202 PICOLO Diligent
+ 0204 PICOLO Alert-RC
+ 0205 PICOLO Alert PCIe
+ 0206 PICOLO Diligent Plus PCIe
+ 0207 PICOLO Alert-RC PCIe
+ 0300 GRABLINK Expert 2
+ 0301 GRABLINK Quickpack ColorScan
+ 0302 GRABLINK Value cPCI
+ 0303 GRABLINK Expert 2 cPCI
+ 0305 GRABLINK Avenue
+ 0306 GRABLINK Quickpack CFA
+ 0307 GRABLINK Express
+ 0308 GRABLINK Quickpack CFA PCIe
+ 0309 GRABLINK Quickpack CFA PCIe (Recovery)
+ 030a GRABLINK Full
+ 030b GRABLINK Full (Recovery)
+ 030c GRABLINK DualBase
+ 030d GRABLINK DualBase (Recovery)
+ 030e GRABLINK Base
+ 030f GRABLINK Base (Recovery)
+ 0310 GRABLINK Full XR
+ 0311 GRABLINK Full XR (Recovery)
+ 0401 DOMINO Iota
+ 0402 DOMINO Alpha 2
+ 0403 DOMINO Harmony
+ 0404 DOMINO Melody
+ 0407 DOMINO Symphony
+ 0408 DOMINO Symphony PCIe
1809 Lumanate, Inc.
180c IEI Integration Corp
1813 Ambient Technologies Inc
@@ -23042,6 +23680,7 @@
0701 RT2760 Wireless 802.11n 1T/2R
1737 0074 WMP110 v2 802.11n RangePlus Wireless PCI Adapter
0781 RT2790 Wireless 802.11n 1T/2R PCIe
+ 11ad 7600 HP WN7600R
1814 2790 RT2790 Wireless 802.11n 1T/2R PCIe
3060 RT3060 Wireless 802.11n 1T/1R
1186 3c04 DWA-525 Wireless N 150 Desktop Adapter (rev.A1)
@@ -23094,6 +23733,7 @@
08b0 MVC200-DC
1846 Alcatel-Lucent
1849 ASRock Incorporation
+ 9602 RS780/RS880 PCI to PCI bridge (int gfx)
184a Thales Computers
1100 MAX II cPLD
1850 Advantest Corporation
@@ -23333,6 +23973,7 @@
0013 SH7757 PCIe Switch [PS]
0014 uPD720201 USB 3.0 Host Controller
0015 uPD720202 USB 3.0 Host Controller
+ 4c52 9a72 LRSU9A72 2-Port USB 3.0 Exchange Adapter
001a SH7758 PCIe-PCI Bridge [PPB]
001b SH7758 PCIe End-Point [PBI]
001d SH7758 PCIe Switch [PS]
@@ -23342,7 +23983,7 @@
0100 A104d QUAD T1/E1 AFT card
0300 A101 single-port T1/E1
0400 A104u Quad T1/E1 AFT
-1924 Solarflare Communications
+1924 AMD Solarflare
0703 SFC4000 rev A net [Solarstorm]
10b8 0102 SMC10GPCIe-10BT (A2) [TigerCard]
10b8 0103 SMC10GPCIe-10BT (A3) [TigerCard]
@@ -23495,6 +24136,8 @@
1942 ClearSpeed Technology plc
e511 Advance X620 accelerator card
e521 Advance e620 accelerator card
+1945 MERA
+ 6200 PXI/PXIe measurement module
1947 C-guys, Inc.
4743 CG200 Dual SD/SDIO Host controller device
1948 Alpha Networks Inc.
@@ -23592,6 +24235,7 @@
7010 MPC8641 PCI Host Bridge
7011 MPC8641D PCI Host Bridge
7018 MPC8610
+ 81c0 LS1046A PCI Express Bridge
c006 MPC8308
1a56 1201 Bigfoot Killer E2100 Gigabit Ethernet Controller
# PCIe interface for emulator
@@ -23642,10 +24286,12 @@
1043 1477 N56VZ
10a0 QCA8172 Fast Ethernet
10a1 QCA8171 Gigabit Ethernet
+ 2010 QCA8175 card reader controller
2048 Attansic L2 Fast Ethernet
2060 AR8152 v1.1 Fast Ethernet
2062 AR8152 v2.0 Fast Ethernet
1043 8468 Eee PC 1015PX
+ 3010 QCA8175 SD controller
# E2200, E2201, E2205
e091 Killer E220x Gigabit Ethernet Controller
e0a1 Killer E2400 Gigabit Ethernet Controller
@@ -23657,8 +24303,11 @@
196d Club-3D BV
196e PNY
1971 AGEIA Technologies, Inc.
- 1011 Physics Processing Unit [PhysX]
+ 0000 Physics Processing Unit [PhysX] 100 Series PCI Express Card
+# The PCI and PCIe versions have a different PID
+ 1011 Physics Processing Unit [PhysX] 100 Series PCI Card
1043 0001 PhysX P1
+ 1021 Physics Processing Unit [PhysX] 200 Series PCI Express Card
# nee Eberspaecher Electronics
1974 Star Electronics GmbH & Co. KG
0009 FlexCard PMC-II
@@ -23708,13 +24357,16 @@
16ff OX16C954 HOST-B
1987 Phison Electronics Corporation
5007 E7 NVMe Controller
- 5008 E8 PCIe3 NVMe Controller
+ 5008 E8 PCIe3 x2 NVMe Controller
5012 E12 NVMe Controller
- 5013 PS5013 E13 NVMe Controller
+ 5013 PS5013-E13 PCIe3 NVMe Controller (DRAM-less)
+ 5015 PS5015-E15 PCIe3 NVMe Controller (DRAM-less)
5016 E16 PCIe4 NVMe Controller
5018 E18 PCIe4 NVMe Controller
5019 PS5019-E19 PCIe4 NVMe Controller (DRAM-less)
5021 PS5021-E21 PCIe4 NVMe Controller (DRAM-less)
+ 5026 PS5026-E26 PCIe5 NVMe Controller
+ 5027 PS5027-E27T PCIe4 NVMe Controller (DRAM-less)
1989 Montilio Inc.
0001 RapidFile Bridge
8001 RapidFile
@@ -24173,47 +24825,66 @@
1050 Virtio 1.0 GPU
1052 Virtio 1.0 input
1053 Virtio 1.0 socket
+ 1058 virtio-mem
105a Virtio file system
1110 Inter-VM shared memory
1af4 1100 QEMU Virtual Machine
1af5 Netezza Corp.
1afa J & W Electronics Co., Ltd.
1b00 Montage Technology Co., Ltd.
+ c001 CXL Memory Expander Controller M88MX5891
+ 1ff9 00a2 CXL Memory Expander
+ 1ff9 00a4 CXL Memory Expander
1b03 Magnum Semiconductor, Inc,
6100 DXT/DXTPro Multiformat Broadcast HD/SD Encoder/Decoder/Transcoder
7000 D7 Multiformat Broadcast HD/SD Encoder/Decoder/Transcoder
1b08 MSC Technologies GmbH
1b0a Pegatron
+ 9602 RS780/RS880 PCI to PCI bridge (int gfx)
1b13 Jaton Corp
1b1a K&F Computing Research Co.
0e70 GRAPE
1b1c Corsair
1b21 ASMedia Technology Inc.
- 0611 ASM1061 SATA IDE Controller
- 0612 ASM1062 Serial ATA Controller
+ 0611 ASM1061 Serial ATA Controller
+ 0612 ASM1061/ASM1062 Serial ATA Controller
1849 0612 Motherboard
+ 0622 ASM106x Serial ATA AHCI Controller
+ 4c52 9661 LRST9661 2-port M.2 SATA3(6Gb/s) Raid Adapter
+ 0624 ASM106x SATA/RAID Controller
0625 106x SATA/RAID Controller
- 1040 ASM1040 XHCI Controller
+ 1040 ASM1040 SuperSpeed USB Host Controller
+ 1041 ASM1041 SuperSpeed USB Host Controller
1042 ASM1042 SuperSpeed USB Host Controller
1043 1059 K53SM motherboard
1043 8488 P8B WS Motherboard
1849 1042 Motherboard
+ 1064 ASM1064 Serial ATA Controller
1080 ASM1083/1085 PCIe to PCI Bridge
1849 1080 Motherboard
1142 ASM1042A USB 3.0 Host Controller
+ 1164 ASM1164 Serial ATA AHCI Controller
1166 ASM1166 Serial ATA Controller
1182 ASM1182e 2-Port PCIe x1 Gen2 Packet Switch
1b21 118f ASM1182e 2-Port PCIe x1 Gen2 Packet Switch
1184 ASM1184e 4-Port PCIe x1 Gen2 Packet Switch
1849 1184 ASM1184e 4-Port PCIe x1 Gen2 Packet Switch
1187 ASM1187e 7-Port PCIe x1 Gen2 Packet Switch
+ 118f ASM1187e 7-Port PCIe x1 Gen2 Packet Switch
1242 ASM1142 USB 3.1 Host Controller
+ 4c52 9a42 LRSU9A42 2-Port Type-A Exchange Adapter
1343 ASM1143 USB 3.1 Host Controller
+ 1806 ASM1806 4-Port PCIe x2 Gen2 Packet Switch
1812 ASM1812 6-Port PCIe x4 Gen2 Packet Switch
+ 1824 ASM1824 12-Port PCIe x8 Gen2 Packet Switch
2142 ASM2142/ASM3142 USB 3.1 Host Controller
1462 7a72 H270 PC MATE
+ 2806 ASM2806 4-Port PCIe x2 Gen3 Packet Switch
+ 2812 ASM2812 6-Port PCIe x4 Gen3 Packet Switch
2824 ASM2824 PCIe Gen3 Packet Switch
3042 ASM3042 USB 3.2 Gen 1 xHCI Controller
+ 3142 ASM3142 USB 3.2 Gen 2x1 xHCI Controller
+ 3241 ASM3241 USB 3.2 Gen 2 Host Controller
3242 ASM3242 USB 3.2 Host Controller
1b26 Netcope Technologies, a.s.
c132 COMBO-LXT155
@@ -24246,6 +24917,7 @@
000c QEMU PCIe Root port
000d QEMU XHCI Host Controller
0010 QEMU NVM Express Controller
+ 0011 QEMU PVPanic device
0013 QEMU UFS Host Controller
0100 QXL paravirtual graphic card
1af4 1100 QEMU Virtual Machine
@@ -24290,9 +24962,14 @@
1028 2113 BOSS-N1 Modular
1028 2151 BOSS-N1 Modular ET
1028 2196 ROR-N1
+ 1028 2286 BOSS-N1 DC-MHS
+ 1028 2287 BOSS-N1 Modular
1b4b 2241 Santa Cruz NVMe Host Adapter
+ 1b96 4000 WD_BLACK AN1500 NVMe SSD
1d49 0306 ThinkSystem M.2 NVMe 2-Bay RAID Enablement Kit
1d49 0307 ThinkSystem 7mm NVMe 2-Bay Rear RAID Enablement Kit
+ 4c52 9541 LRNV9541 2-port M.2 NVMe Raid Adapter
+ 2b42 88W8997 2.4/5 GHz Dual-Band 2x2 Wi-Fi® 5 (802.11ac) + Bluetooth® 5.3 Solution
2b43 NXP 88W9098 Wi-Fi 6 (ax) MAC #1
2b44 NXP 88W9098 Wi-Fi 6 (ax) MAC #2
2b45 NXP 88W9098 Bluetooth 5.3
@@ -24300,6 +24977,7 @@
9123 88SE9123 PCIe SATA 6.0 Gb/s controller
dc93 600e DC-6xxe series SATA 6G controller
9125 88SE9125 PCIe SATA 6.0 Gb/s controller
+ 4c52 9615 LRST9615 4-port SATA3(6Gb/s) Exchange Adapter
9128 88SE9128 PCIe SATA 6 Gb/s RAID controller
9130 88SE9128 PCIe SATA 6 Gb/s RAID controller with HyperDuo
1043 8438 P8P67 Deluxe Motherboard
@@ -24328,6 +25006,7 @@
1d49 0303 ThinkSystem SE350 M.2 SATA 4-Bay Data RAID Mirroring Enablement Kit
1d49 0304 ThinkSystem M.2 SATA 2-Bay RAID Enablement Kit
1d49 0305 ThinkSystem 7mm SATA 2-Bay Rear RAID Enablement Kit
+ 4c52 9630 LRST9630 4-port SATA3(6Gb/s) Raid Adapter
9235 88SE9235 PCIe 2.0 x2 4-port SATA 6 Gb/s Controller
9445 88SE9445 PCIe 2.0 x4 4-Port SAS/SATA 6 Gbps RAID Controller
9480 88SE9480 SAS/SATA 6Gb/s RAID controller
@@ -24341,6 +25020,22 @@
# 2xHDMI and 2xHD-SDI inputs
e5f4 MPEG2 and H264 Encoder-Transcoder
f1c4 Dual ASI-RX/TX-CI card
+1b5e STAR-Dundee Ltd.
+ 0001 SpaceWire PCI Mk2
+ 0002 SpaceWire PCIe Mk1
+ 0003 SpaceWire cPCI Mk2
+ 0004 SpaceWire PXI Recorder Mk1
+ 0005 SpaceWire PXI Interface Mk1
+ 0006 SpaceWire PXI Interface Mk1 with RMAP Target
+ 0008 SpaceWire PXI Router Mk1
+ 000b SpaceWire PXI Interface Mk2
+ 000c SpaceWire PXI Interface Mk2 with RMAP Target
+ 000d SpaceWire PXI Router Mk2
+ 000e SpaceWire PXI Recorder Mk2
+ 0100 STAR-Ultra PCIe
+ 0102 STAR-Ultra Single-Lane Router
+ 0200 SpaceWire PCIe Mk2
+1b61 Byd Precision Manufacture Co.,Ltd
1b66 DELTACAST
0007 DELTA-3G-elp-d
1b6f Etron Technology, Inc.
@@ -24380,6 +25075,12 @@
2404 Ultrastar DC SN640 NVMe SSD
2500 Ultrastar DC SN840 NVMe SSD
2600 Ultrastar DC ZN540 ZNS NVMe SSD
+ 2700 Ultrastar DC SN650 NVMe SSD
+ 2701 Ultrastar DC SN650 NVMe SSD
+ 2702 Ultrastar DC SN650 NVMe SSD
+ 2720 Ultrastar DC SN650 NVMe SSD
+ 2721 Ultrastar DC SN650 NVMe SSD
+ 2722 Ultrastar DC SN655 NVMe SSD
3001 RapidFlex C2000 NVMe Initiator
3714 PC SN730 NVMe SSD
3734 PC SN730 NVMe SSD
@@ -24495,11 +25196,18 @@
1bb1 0179 Nytro 5360S - E3.S
# Nytro 5360S (Rocinante Single Port) TCG - E3.S
1bb1 0180 Nytro 5360S TCG - E3.S
+# Nytro 5060H (Rocinante High Performance) non-SED
+ 1bb1 0181 Nytro 5060H
1bb1 01a1 Nytro XP7102
5012 FireCuda/IronWolf 510 SSD
5013 BarraCuda Q5 NVMe SSD (DRAM-less)
5016 FireCuda 520/IronWolf 525 SSD
5018 FireCuda 530 SSD
+ 5019 BarraCuda PCIe SSD (DRAM-less)
+# 2TB
+ 5021 FireCuda 520 SSD
+# 1TB
+ 5026 FireCuda 540 SSD
1bb3 Bluecherry
4304 BC-04120A MPEG4 4 port video encoder / decoder
4309 BC-08240A MPEG4 4 port video encoder / decoder
@@ -24559,17 +25267,40 @@
100c NS8510G1Uxxx, NS8610G1Uxxx NVME SSD
100e NS8500G2Uxxxx, NS8600G2Uxxxx NVME SSD
1bee IXXAT Automation GmbH
+ 0002 CAN-IB100/PCIe
0003 CAN-IB200/PCIe
+ 0004 CAN-IB120/PCIe Mini
+ 0005 CAN-IB130/PCIe 104
+ 0006 CAN-IB230/PCIe 104
+ 000e CAN-IB500/PCIe
+ 000f CAN-IB600/PCIe
+ 0010 CAN-IB300/PCI
+ 0011 CAN-IB400/PCI
+ 0012 CAN-IB520/PCIe Mini
+ 0016 CAN-IB410/PMC
+ 001c CAN-IB810/PMC
+ 001e INpact PCIe
+ 001f INpact PCIe Mini
+ 0029 INpact M.2
+ 002d CAN-IB630/PCIe 104
+ 002e CAN-IB640/PCIe
1bef Lantiq
0011 MIPS SoC PCI Express Port
1bf4 VTI Instruments Corporation
0001 SentinelEX
7011 RX0xxx
1bf5 Greenliant
+ 1000 G7200 series U.2 NVMe SSD
1bfc Duagon AG
1bfd EeeTOP
1c00 Nanjing Qinheng Microelectronics Co., Ltd.
+ 2170 CH351 PCIe Parallel Port Adapter
+ 2273 CH351 PCIe Dual Port Serial Adapter
+ 3050 CH382L PCIe Parallel Port Adapter
+ 3250 CH384 Dual Port Serial and Parallel Port Adapter
3252 CH382 PCIe Dual Port Serial Adapter
+# Device ID reused: CH352 is for PCI bus, CH382 for PCIe.
+ 3253 CH352/CH382 PCI/PCIe Dual Port Serial Adapter
1c09 CSP, Inc.
4254 10G-PCIE3-8D-2S
4255 10G-PCIE3-8D-Q
@@ -24596,6 +25327,7 @@
0020 FD2110
0021 FD722
0022 FD788
+ 0023 FD722-M2
1c28 Lite-On IT Corp. / Plextor
0122 M6e PCI Express SSD [Marvell 88SS9183]
# previously Fiberblaze
@@ -24624,6 +25356,7 @@
a000 FBC2CGG3 Capture 2x40Gb [Mango_02]
a001 FBC2CGG3 Capture 2x100Gb [Mango_02]
a003 FBC2CGG3 Capture 16x10Gb [Mango]
+ a006 FBC2CGG3 Capture 2x100Gb [Mango]
a007 FBC2CGG3 Capture 2x40Gb [Mango]
a008 FBC2CGG3 Capture 2x25Gb [Mango]
a009 FBC2CGG3 Capture 16x10Gb [Mango]
@@ -24636,6 +25369,8 @@
a013 FB2CGHH Capture 2x25Gb [Tivoli]
a014 FB2CGHH Capture 8x10Gb [Tivoli]
a015 FB2CGHH Capture 2x100Gb [Tivoli]
+ a016 FB2CG Capture 8x25Gb [Savona]
+ a017 FB2CGHH Capture 8x25Gb [Tivoli] a017
# Used on V120 VME Crate Controller
1c32 Highland Technology, Inc.
1c33 Daktronics, Inc
@@ -24658,6 +25393,7 @@
0023 Ultrastar SN200 Series NVMe SSD
1c58 8823 Ultrastar Memory (ME200)
1c5c SK hynix
+ 1069 PCB01 NVMe Solid State Drive
1282 PC300 NVMe Solid State Drive 128GB
1283 PC300 NVMe Solid State Drive 256GB
1284 PC300 NVMe Solid State Drive 512GB
@@ -24677,6 +25413,7 @@
2429 PE6011 NVMe Solid State Drive
243b PE6110 NVMe Solid State Drive
1c5c 0100 PE6110 NVMe Solid State Drive
+ 2527 PE4010 NVMe Solid State Drive
2839 PE8000 Series NVMe Solid State Drive
1028 2143 DC NVMe SED PE8010 RI U.2 960GB
1028 2144 DC NVMe PE8010 RI U.2 960GB
@@ -24699,6 +25436,8 @@
1c5c 0101 PE81x0 U.2/3 NVMe Solid State Drive
284a PE8110 Series NVMe Solid State Drive
2a49 PE9110 Series NVMe Solid State Drive
+ 2a59 PE9010 Series NVMe Solid State Drives
+ 2b59 PS10x0 Series NVMe Solid State Drives
1c5f Beijing Memblaze Technology Co. Ltd.
000d PBlaze5 520/526
1c5f 0220 NVMe SSD PBlaze5 520 1920G AIC
@@ -24721,11 +25460,18 @@
1c5f 0b40 NVMe SSD PBlaze6 6530 7680G AIC
1c5f 0b41 NVMe SSD PBlaze6 6530 7680G 2.5" U.2
1c5f 0b47 NVMe SSD PBlaze6 6630 7680G 2.5" U.2
+ 1c5f 1320 NVMe SSD PBlaze6 6531 1920G AIC
1c5f 1321 NVMe SSD PBlaze6 6531 1920G 2.5" U.2
+ 1c5f 1330 NVMe SSD PBlaze6 6531 3840G AIC
1c5f 1331 NVMe SSD PBlaze6 6531 3840G 2.5" U.2
+ 1c5f 1340 NVMe SSD PBlaze6 6531 7680G AIC
1c5f 1341 NVMe SSD PBlaze6 6531 7680G 2.5" U.2
+ 1c5f 1421 NVMe SSD PBlaze6 6541 1920G 2.5" U.2
+ 1c5f 1427 NVMe SSD PBlaze6 6641 1920G 2.5" U.2(dual port)
1c5f 1431 NVMe SSD PBlaze6 6541 3840G 2.5" U.2
+ 1c5f 1437 NVMe SSD PBlaze6 6641 3840G 2.5" U.2(dual port)
1c5f 1441 NVMe SSD PBlaze6 6541 7680G 2.5" U.2
+ 1c5f 1447 NVMe SSD PBlaze6 6641 7680G 2.5" U.2(dual port)
1c5f 4b20 NVMe SSD PBlaze6 6536 1600G AIC
1c5f 4b21 NVMe SSD PBlaze6 6536 1600G 2.5" U.2
1c5f 4b25 NVMe SSD PBlaze6 6536 1600G E1.S
@@ -24737,11 +25483,18 @@
1c5f 4b40 NVMe SSD PBlaze6 6536 6400G AIC
1c5f 4b41 NVMe SSD PBlaze6 6536 6400G 2.5" U.2
1c5f 4b47 NVMe SSD PBlaze6 6636 6400G 2.5" U.2
+ 1c5f 5320 NVMe SSD PBlaze6 6537 1600G AIC
1c5f 5321 NVMe SSD PBlaze6 6537 1600G 2.5" U.2
+ 1c5f 5330 NVMe SSD PBlaze6 6537 3200G AIC
1c5f 5331 NVMe SSD PBlaze6 6537 3200G 2.5" U.2
+ 1c5f 5340 NVMe SSD PBlaze6 6537 6400G AIC
1c5f 5341 NVMe SSD PBlaze6 6537 6400G 2.5" U.2
+ 1c5f 5421 NVMe SSD PBlaze6 6547 1600G 2.5" U.2
+ 1c5f 5427 NVMe SSD PBlaze6 6647 1600G 2.5" U.2(dual port)
1c5f 5431 NVMe SSD PBlaze6 6547 3200G 2.5" U.2
+ 1c5f 5437 NVMe SSD PBlaze6 6647 3200G 2.5" U.2(dual port)
1c5f 5441 NVMe SSD PBlaze6 6547 6400G 2.5" U.2
+ 1c5f 5447 NVMe SSD PBlaze6 6647 6400G 2.5" U.2(dual port)
003d PBlaze5 920/926
1c5f 0a30 NVMe SSD PBlaze5 920 3840G AIC
1c5f 0a31 NVMe SSD PBlaze5 920 3840G 2.5" U.2
@@ -24764,12 +25517,37 @@
1c5f 4b51 NVMe SSD PBlaze6 6936 12800GB 2.5" U.3
1c5f 4b61 NVMe SSD PBlaze6 6936 25600GB 2.5" U.3
003f PBlaze7 7940/7946 NVMe SSD
+ 1c5f 0431 NVMe SSD PBlaze7 7940 3840G 2.5" U.2
+ 1c5f 0c31 NVMe SSD PBlaze7 7940 3840G 2.5" U.2
+ 1c5f 0c41 NVMe SSD PBlaze7 7940 7680G 2.5" U.2
+ 1c5f 0c51 NVMe SSD PBlaze7 7940 15360G 2.5" U.2
+ 1c5f 1430 NVMe SSD PBlaze7 7940 3840G AIC
1c5f 1431 NVMe SSD PBlaze7 7940 3840G 2.5" U.2
+ 1c5f 1435 NVMe SSD PBlaze7 7940 3840G E1.S
+ 1c5f 1440 NVMe SSD PBlaze7 7940 7680G AIC
1c5f 1441 NVMe SSD PBlaze7 7940 7680G 2.5" U.2
+ 1c5f 1445 NVMe SSD PBlaze7 7940 7680G E1.S
+ 1c5f 1450 NVMe SSD PBlaze7 7940 15360G AIC
1c5f 1451 NVMe SSD PBlaze7 7940 15360G 2.5" U.2
+ 1c5f 4c31 NVMe SSD PBlaze7 7946 3200G 2.5" U.2
+ 1c5f 4c41 NVMe SSD PBlaze7 7946 6400G 2.5" U.2
+ 1c5f 4c51 NVMe SSD PBlaze7 7946 12800G 2.5" U.2
+ 1c5f 5430 NVMe SSD PBlaze7 7946 3200G AIC
1c5f 5431 NVMe SSD PBlaze7 7946 3200G 2.5" U.2
+ 1c5f 5435 NVMe SSD PBlaze7 7946 3200G E1.S
+ 1c5f 5440 NVMe SSD PBlaze7 7946 6400G AIC
1c5f 5441 NVMe SSD PBlaze7 7946 6400G 2.5" U.2
+ 1c5f 5445 NVMe SSD PBlaze7 7946 6400G E1.S
+ 1c5f 5450 NVMe SSD PBlaze7 7946 12800G AIC
1c5f 5451 NVMe SSD PBlaze7 7946 12800G 2.5" U.2
+ 1ea0 2100 TP2510 Series U.2 NVMe Datacenter SSD(3.84TB)
+ 1ea0 2101 TP2511 Series U.2 NVMe Datacenter SSD(3.84TB)
+ 1ea0 2110 TP2510 Series E3.S NVMe Datacenter SSD(3.84TB)
+ 1ea0 2111 TP2511 Series E3.S NVMe Datacenter SSD(3.84TB)
+ 1ea0 2200 TP2510 Series U.2 NVMe Datacenter SSD(7.68TB)
+ 1ea0 2201 TP2511 Series U.2 NVMe Datacenter SSD(7.68TB)
+ 1ea0 2210 TP2510 Series E3.S NVMe Datacenter SSD(7.68TB)
+ 1ea0 2211 TP2511 Series E3.S NVMe Datacenter SSD(7.68TB)
0540 PBlaze4 NVMe SSD
0550 PBlaze5 700/900
0555 PBlaze5 510/516
@@ -24810,19 +25588,32 @@
0002 Clarett
1cb8 Dawning Information Industry Co., Ltd.
1cc1 ADATA Technology Co., Ltd.
+ 1202 IM2P32A8 NVMe SSD (DRAM-less)
# SX6000LNP
2263 XPG SX6000 Lite NVMe SSD (DRAM-less)
+ 32a8 SM2P32A8 NVMe SSD (DRAM-less)
33f3 IM2P33F3 NVMe SSD (DRAM-less)
- 33f8 IM2P33F8ABR1 NVMe SSD
- 5350 XPG GAMMIX S50 NVMe SSD
- 5762 FALCON NVMe SSD
- 5766 ADATA XPG GAMMIXS1 1L Media (256 GB SSD)
+ 33f4 IM2P33F4 NVMe SSD (DRAM-less)
+ 33f8 IM2P33F8 series NVMe SSD (DRAM-less)
+ 41c3 SM2P41C3 NVMe SSD (DRAM-less)
+ 41c8 SM2P41C8 NVMe SSD (DRAM-less)
+ 5236 XPG GAMMIX S70 BLADE NVMe SSD
+ 5350 XPG GAMMIX S50, S50 Lite NVMe SSD
+# PREMIUM NVMe SSD for PlayStation 5
+ 5370 GAMMIX S70 BLADE, PS5 PREMIUM NVMe SSD
+ 5762 FALCON, GAMMIX S41, SPECTRIX S40G NVMe SSD (DRAM-less)
+ 5763 XPG GAMMIX S5 NVMe SSD (DRAM-less)
+ 5766 XPG GAMMIXS1 1L, XPG GAMMIX S5, LEGEND 710 / 740, SWORDFISH NVMe SSD (DRAM-less)
+ 5772 LEGEND 850 LITE NVMe SSD (DRAM-less)
612a LEGEND 750 NVMe SSD (DRAM-less)
- 613a LEGEND 840 NVMe SSD (DRAM-less)
+ 613a ATOM 50, LEGEND 840 NVMe SSD (DRAM-less)
621a LEGEND 850 NVMe SSD (DRAM-less)
622a LEGEND 960 NVMe SSD
- 624a LEGEND 700 NVMe SSD (DRAM-less)
- 627a LEGEND 800 NVMe SSD
+ 624a LEGEND 700, XPG GAMMIX S20 NVMe SSD (DRAM-less)
+# 1TB
+ 627a LEGEND 800 NVMe SSD (DRAM-less)
+# 500GB
+ 628a LEGEND 800 NVMe SSD (DRAM-less)
8201 XPG SX8200 Pro PCIe Gen3x4 M.2 2280 Solid State Drive
1cc4 Shenzhen Unionmemory Information System Ltd.
1203 NVMe SSD Controller UHXXXa series
@@ -24841,9 +25632,10 @@
17a9 RPITJ1TBVME2HWD NVMe SSD 1024GB
17aa AH631 PCIe 3.0 NVMe SSD 512GB
17ab AH631 PCIe 3.0 NVMe SSD 256GB
- 2263 AM611 PCIe 3.0 NVMe SSD 256GB
- 5008 AM610 PCIe 3.0 NVMe SSD 128GB
+ 2263 AM611 PCIe 3.0 x2 NVMe SSD 256GB
+ 5008 AM610 PCIe 3.0 x2 NVMe SSD 128GB, 256GB
5012 RPITJ512PED2OWX NVMe SSD 512GB
+ 5212 AM521 PCIe 3.0 NVMe SSD 256GB
6201 AM620 PCIe 3.0 NVMe SSD 128GB
6202 AM620 PCIe 3.0 NVMe SSD 256GB
6203 AM620 PCIe 3.0 NVMe SSD 512GB
@@ -24851,14 +25643,37 @@
6302 AM630 PCIe 4.0 NVMe SSD 256GB
6303 AM630 PCIe 4.0 x4 NVMe SSD Controller
6304 AM630 PCIe 4.0 NVMe SSD 1024GB
+ 6a02 AM6A0 PCIe 4.0 NVMe SSD 256GB
6a03 RPETJ512MKP1QDQ PCIe 4.0 NVMe SSD 512GB (DRAM-less)
+ 6a13 RPJYJ512MKN1QWQ PCIe 4.0 NVMe SSD 512GB (DRAM-less)
6a14 RPEYJ1T24MKN2QWY PCIe 4.0 NVMe SSD 1024GB (DRAM-less)
+ 8030 NVMe SSD Controller UH8X2X/UH7X2X series
+ 1cc4 1122 NVMe SSD UH812a U.2 1.92TB
+ 1cc4 1123 NVMe SSD UH812a U.2 3.84TB
+ 1cc4 1124 NVMe SSD UH812a U.2 7.68TB
+ 1cc4 1125 NVMe SSD UH812a U.2 15.36TB
+ 1cc4 1222 NVMe SSD UH812a E3.S 1.92TB
+ 1cc4 1223 NVMe SSD UH812a E3.S 3.84TB
+ 1cc4 1224 NVMe SSD UH812a E3.S 7.68TB
+ 1cc4 1225 NVMe SSD UH812a E3.S 15.36TB
+ 1cc4 2112 NVMe SSD UH832a U.2 1.6TB
+ 1cc4 2113 NVMe SSD UH832a U.2 3.2TB
+ 1cc4 2114 NVMe SSD UH832a U.2 6.4TB
+ 1cc4 2115 NVMe SSD UH832a U.2 12.8TB
+ 1cc4 2212 NVMe SSD UH832a E3.S 1.6TB
+ 1cc4 2213 NVMe SSD UH832a E3.S 3.2TB
+ 1cc4 2214 NVMe SSD UH832a E3.S 6.4TB
+ 1cc4 2215 NVMe SSD UH832a E3.S 12.8TB
+ 1cc4 3122 NVMe SSD UH712a U.2 1.92TB
+ 1cc4 3123 NVMe SSD UH712a U.2 3.84TB
+ 1cc4 3124 NVMe SSD UH712a U.2 7.68TB
+ 1cc4 3125 NVMe SSD UH712a U.2 15.36TB
1cc5 Embedded Intelligence, Inc.
0100 PCIe-CAN-02 Dual CAN bus (9-pin male). PCI Express x1.
0101 PCIe-CAN-01 Single CAN bus (9-pin male). PCI Express x1.
1cc7 Radian Memory Systems Inc.
- 0200 RMS-200
- 0250 RMS-250
+ 0200 RMS-200 PCIe NVMe SSD
+ 0250 RMS-250 U.2 NVMe SSD
1ccf Zoom Corporation
0001 TAC-2 Thunderbolt Audio Converter
1cd2 SesKion GmbH
@@ -24871,6 +25686,7 @@
0306 Simulyzer-RT CompactPCI Serial CAN-2 card (CAN-FD)
0307 Simulyzer-RT CompactPCI Serial DIO-2 card [Xilinx Zynq UltraScale+]
1cd7 Nanjing Magewell Electronics Co., Ltd.
+ 0002 Pro Capture AIO
0010 Pro Capture Endpoint
0014 PRO CAPTURE AIO 4K PLUS
0017 PRO CAPTURE AIO 4K
@@ -24905,6 +25721,7 @@
7164 NeuronDevice (Trainium)
1d0f 0000 Trainium
7264 NeuronDevice (Inferentia2)
+ 7364 NeuronDevice (Trainium2)
8061 NVMe EBS Controller
cd01 NVMe SSD Controller
ec20 Elastic Network Adapter (ENA)
@@ -24920,55 +25737,67 @@
0714 ZX-100/ZX-200 PCI Express Root Port
0715 ZX-100/ZX-200 PCI Express Root Port
0716 ZX-D PCI Express Root Port
- 0717 KX-5000/KX-6000/KX-6000G/KH-40000 Express Root Port
- 0718 KX-5000/KX-6000/KX-6000G/KH-40000 Express Root Port
- 0719 KX-5000/KX-6000/KX-6000G/KH-40000 Express Root Port
- 071a KX-5000/KX-6000/KX-6000G/KH-40000 Express Root Port
- 071b KX-5000/KX-6000/KX-6000G/KH-40000 Express Root Port
- 071c KX-5000/KX-6000/KX-6000G/KH-40000 Express Root Port
- 071d KX-5000/KX-6000/KX-6000G/KH-40000 Express Root Port
- 071e KX-5000/KX-6000/KX-6000G/KH-40000 Express Root Port
+ 0717 KX-5000/KX-6000/KX-6000G/KH-40000/KX-7000 PCI Express Root Port
+ 0718 KX-5000/KX-6000/KX-6000G/KH-40000/KX-7000 PCI Express Root Port
+ 0719 KX-5000/KX-6000/KX-6000G/KH-40000/KX-7000 PCI Express Root Port
+ 071a KX-5000/KX-6000/KX-6000G/KH-40000 PCI Express Root Port
+ 071b KX-5000/KX-6000/KX-6000G/KH-40000/KX-7000 PCI Express Root Port
+ 071c KX-5000/KX-6000/KX-6000G/KH-40000/KX-7000 PCI Express Root Port
+ 071d KX-5000/KX-6000/KX-6000G/KH-40000/KX-7000 PCI Express Root Port
+ 071e KX-5000/KX-6000/KX-6000G/KH-40000/KX-7000 PCI Express Root Port
071f ZX-200 Upstream Port of PCI Express Switch
0720 ZX-200 PCIE RC6 controller
0721 ZX-200 Downstream Port of PCI Express Switch
0722 ZX-200 PCIE P2C bridge
- 0731 KH-40000 Express Root Port
- 0732 KH-40000 Express Root Port
+ 0731 KH-40000 PCI Express Root Port
+ 0732 KH-40000/KX-7000 PCI Express Root Port
+ 0733 KX-7000 PCIE Express Root Port
+ 0734 KX-7000 PCIE Express Root Port
+ 0735 KX-7000 PCIE Express Root Port
+ 0736 KX-7000 PCIE Express Root Port
+ 0737 KX-7000 PCIE Express Root Port
+ 0738 KX-7000 PCIE Express Root Port
+ 0739 KX-7000 PCIE Express Root Port
+ 073a KX-7000 PCIE Express Root Port
+ 073b KX-7000 PCIE Express Root Port
1000 ZX-D Standard Host Bridge
- 1001 ZX-D/ZX-E/KH-40000 Miscellaneous Bus
+ 1001 ZX-D/ZX-E/KH-40000/KX-7000 Miscellaneous Bus
1003 ZX-E Standard Host Bridge
1005 KH-40000 Standard Host Bridge
1006 KX-6000G Standard Host Bridge
+ 1007 KX-7000 Standard Host Bridge
3001 ZX-100 Standard Host Bridge
300a ZX-100 Miscellaneous Bus
- 3038 ZX-100/ZX-200/KX-6000/KX-6000G/KH-40000 Standard Universal PCI to USB Host Controller
- 3104 ZX-100/ZX-200/KX-6000/KX-6000G/KH-40000 Standard Enhanced PCI to USB Host Controller
- 31b0 ZX-100/KX-5000/KX-6000/KX-6000G/KH-40000 Standard Host Bridge
- 31b1 ZX-100/KX-5000/KX-6000/KX-6000G/KH-40000 Standard Host Bridge
- 31b2 ZX-100/KX-5000/KX-6000/KX-6000G/KH-40000 DRAM Controller
- 31b3 ZX-100/KX-5000/KX-6000/KX-6000G/KH-40000 Power Management Controller
- 31b4 ZX-100/KX-5000/KX-6000/KX-6000G/KH-40000 I/O APIC
- 31b5 ZX-100/KX-5000/KX-6000/KX-6000G/KH-40000 Scratch Device
- 31b7 ZX-100/KX-5000/KX-6000/KX-6000G/KH-40000 Standard Host Bridge
+ 3038 ZX-100/ZX-200/KX-6000/KX-6000G/KH-40000/KX-7000 Standard Universal PCI to USB Host Controller
+ 3104 ZX-100/ZX-200/KX-6000/KX-6000G/KH-40000/KX-7000 Standard Enhanced PCI to USB Host Controller
+ 31b0 ZX-100/KX-5000/KX-6000/KX-6000G/KH-40000/KX-7000 Standard Host Bridge
+ 31b1 ZX-100/KX-5000/KX-6000/KX-6000G/KH-40000/KX-7000 Standard Host Bridge
+ 31b2 ZX-100/KX-5000/KX-6000/KX-6000G/KH-40000/KX-7000 DRAM Controller
+ 31b3 ZX-100/KX-5000/KX-6000/KX-6000G/KH-40000/KX-7000 Power Management Controller
+ 31b4 ZX-100/KX-5000/KX-6000/KX-6000G/KH-40000/KX-7000 I/O APIC
+ 31b5 ZX-100/KX-5000/KX-6000/KX-6000G/KH-40000/KX-7000 Scratch Device
+ 31b7 ZX-100/KX-5000/KX-6000/KX-6000G/KH-40000/KX-7000 Standard Host Bridge
31b8 ZX-100/ZX-D PCI to PCI Bridge
- 3288 ZX-100/KX-5000/KX-6000/KX-6000G/KH-40000 High Definition Audio Controller
+ 3200 KX-7000 Host Bridge
+ 3288 ZX-100/KX-5000/KX-6000/KX-6000G/KH-40000/KX-7000 High Definition Audio Controller
345b ZX-100/KX-5000/KX-6000/KX-6000G/KH-40000 Miscellaneous Bus
3a02 ZX-100 C-320 GPU
3a03 ZX-D C-860 GPU
- 3a04 ZX-E C-960 GPU
+ 3a04 KX-6000 C-960 GPU
+ 3a05 KX-7000 C-1190 GPU
3b01 KH-40000 OPI Controller
3b02 KH-40000 OPI Controller
3b03 KH-40000 OPI Controller
3b04 KH-40000 OPI Controller
- 3c00 KH-40000 DRAM Controller
+ 3c00 KH-40000/KX-7000 DRAM Controller
3c02 KX-6000G DRAM Controller
3d01 KX-6000G C-1080 GPU
- 9002 ZX-100/ZX-200/KH-40000 EIDE Controller
+ 9002 ZX-100/ZX-200/KH-40000/KX-7000 EIDE Controller
9003 ZX-100/KX-6000/KX-6000G EIDE Controller
- 9043 KX-6000G/KH-40000 RAID Controller
+ 9043 KX-6000G/KH-40000/KX-7000 RAID Controller
9045 ZX-100/ZX-D/ZX-E RAID Accelerator 0
9046 ZX-D/ZX-E RAID Accelerator 1
- 9083 ZX-100/ZX-200/KX-6000/KX-6000G/KH-40000 StorX AHCI Controller
+ 9083 ZX-100/ZX-200/KX-6000/KX-6000G/KH-40000/KX-7000 StorX AHCI Controller
9084 ZX-100 StorX AHCI Controller
9100 ZX-200 Cross bus
9101 ZX-200 Traffic Controller
@@ -24976,17 +25805,21 @@
9142 ZX-D High Definition Audio Controller
9144 ZX-E High Definition Audio Controller
9145 KX-6000G High Definition Audio Controller
+ 9146 KX-7000 High Definition Audio Controller
9180 ZX-200 Networking Gigabit Ethernet Adapter
91c1 KH-40000 ZPI Controller
91c2 KH-40000 ZPI Controller
9202 ZX-100 USB eXtensible Host Controller
9203 ZX-200 USB eXtensible Host Controller
- 9204 KX-6000/KX-6000G USB eXtensible Host Controller
+ 9204 KX-6000/KX-6000G/KX-7000 USB3 xHCI Host Controller
9205 KH-40000 USB eXtensible Host Controller
+ 9206 KX-7000 USB4 Contoller
9286 ZX-D eMMC Host Controller
- 9300 ZX-100/KX-5000/KX-6000/KX-6000G/KH-40000 eSPI Host Controller
+ 9300 ZX-100/KX-5000/KX-6000/KX-6000G/KH-40000/KX-7000 eSPI Host Controller
+ 9500 KX-7000 I2S Controller
+ 9501 KX-7000 I2S Controller
95d0 ZX-100 Universal SD Host Controller
- f410 ZX-100/KX-5000/KX-6000/KX-6000G PCI Com Port
+ f410 ZX-100/KX-5000/KX-6000/KX-6000G/KH-40000/KX-7000 PCI Com Port
1d18 RME
0001 Fireface UFX+
# acquired by Intel
@@ -25022,7 +25855,6 @@
1d44 DPT
a400 PM2x24/PM3224
1d49 Lenovo
- 0522 ThinkSystem RAID 5350-8i PCIe 12Gb Internal Adapter
1d4c Diamanti, Inc.
1d5c Fantasia Trading LLC
1d61 Technobox, Inc.
@@ -25034,20 +25866,29 @@
2440 CL2440
1d6a Aquantia Corp.
0001 AQC107 NBase-T/IEEE 802.3bz Ethernet Controller [AQtion]
- 00b1 AQC100 10G Ethernet MAC controller [AQtion]
- 07b1 AQC107 NBase-T/IEEE 802.3bz Ethernet Controller [AQtion]
+ 4c52 6880 LREC6880BT Single-port 10Gb Ethernet Network Adapter
+ 00b1 AQtion AQC100 NBase-T/IEEE 802.3an Ethernet Controller [Atlantic 10G]
+ 00c0 Antigua NBase-T/IEEE 802.3an Ethernet Controller - Engineering Sample
+ 04c0 AQtion AQC113 NBase-T/IEEE 802.3an Ethernet Controller [Antigua 10G]
+ 4c52 1053 LRES1053PT Quad-port 10Gb Ethernet Network Adapter
+ 4c52 2051 LRES2051PT Single-port 10Gb Ethernet Network Adapter
+ 07b1 AQtion AQC107 NBase-T/IEEE 802.3an Ethernet Controller [Atlantic 10G]
# Older revision of QNAP QM2 M.2 2280 PCIe SSD & 10GbE Expansion Card
1baa 07b1 QM2-2P10G1TA [QXG 10GbE Network Adapter]
# Newer revision of QNAP QM2 M.2 2280 PCIe SSD & 10GbE Expansion Card
1baa 07b2 QM2-2P10G1TA [QM2 Expansion Adapter]
+ 4c52 6880 LREC6880BT Single-port 10Gb Ethernet Network Adapter
08b1 AQC108 NBase-T/IEEE 802.3bz Ethernet Controller [AQtion]
11b1 AQC111 NBase-T/IEEE 802.3bz Ethernet Controller [AQtion]
12b1 AQC112 NBase-T/IEEE 802.3bz Ethernet Controller [AQtion]
- 14c0 AQC113C NBase-T/IEEE 802.3bz Ethernet Controller [AQtion]
- 87b1 AQC107 NBase-T/IEEE 802.3bz Ethernet Controller [AQtion]
- 94c0 AQC113CS NBase-T/IEEE 802.3bz Ethernet Controller [AQtion]
+ 12c0 AQtion AQC115C NBase-T/IEEE 802.3bz Ethernet Controller [Antigua 2.5G]
+ 14c0 AQC113C NBase-T/IEEE 802.3an Ethernet Controller [Marvell Scalable mGig]
+ 80b1 AQtion AQC100S NBase-T/IEEE 802.3an Ethernet Controller [Atlantic 10G]
+ 87b1 AQtion AQC107S NBase-T/IEEE 802.3an Ethernet Controller [Atlantic 10G]
+ 93c0 AQtion AQC114CS NBase-T/IEEE 802.3bz Ethernet Controller [Antigua 5G]
+ 94c0 AQtion AQC113CS NBase-T/IEEE 802.3an Ethernet Controller [Antigua 10G]
1043 87f5 ProArt X570-CREATOR WIFI
- d107 AQC107 NBase-T/IEEE 802.3bz Ethernet Controller [AQtion]
+ d107 AQtion AQC107 NBase-T/IEEE 802.3an Ethernet Controller [Atlantic 10G]
1043 8741 XG-C100C
d108 AQC108 NBase-T/IEEE 802.3bz Ethernet Controller [AQtion]
1d6c Atomic Rules LLC
@@ -25088,6 +25929,12 @@
1021 AR-MGMT-PF [Board-Generic Management Physical Function]
1022 AR-ARKA-FX2 [Arkville 128B DPDK Data Mover for Agilex]
1023 AR-BD-FX1 [BufferDirect Zero-Bounce Data Motion Endpoint]
+ 1024 AR-TK242 [2x10GbE Packet Capture Device]
+ 1025 AR-TK242-FX2 [2x100GbE Gen5 Packet Capture Device]
+ 1026 AR-TK242-FX2 [1x200GbE Gen5 Packet Capture Device]
+ 1027 AR-P2P-DBG [P2P Debug Function]
+ 1028 AR-P2P-ATR [P2P Actor Function]
+ 1029 AR-P2P-UTL [P2P Utility Function]
4200 A5PL-E1-10GETI [10 GbE Ethernet Traffic Instrument]
1d72 Xiaomi
1d78 DERA Storage
@@ -25153,10 +26000,10 @@
1d78 7204 Aliflash V2 U.2 15mm 3.84TB NVMe SSD
1d78 7208 Aliflash V2 U.2 15mm 7.68TB NVMe SSD
1d79 Transcend Information, Inc.
- 2262 NVMe PCIe SSD 220S
- 2263 NVMe PCIe SSD 110S/112S/120S/MTE652T2 (DRAM-less)
+ 2262 NVMe PCIe SSD 220S/MTE662T2
+ 2263 NVMe PCIe SSD 110S/112S/120S/MTE300S/MTE400S/MTE652T2 (DRAM-less)
2264 NVMe PCIe SSD 250H
- 2267 NVMe PCIe SSD 240S/MTE710T
+ 2267 NVMe PCIe SSD 220S/240S/MTE710T
5766 NVMe PCIe SSD 110Q (DRAM-less)
1d7c Aerotech, Inc.
# Fiber-optic HyperWire motion control bus from Aerotech.
@@ -25214,10 +26061,15 @@
0002 Colossus GC1 [S1]
1d97 Shenzhen Longsys Electronics Co., Ltd.
1062 Lexar NM710 NVME SSD
+ 1160 FORESEE P900 BGA NVMe SSD (DRAM-less)
+ 1202 Lexar NM610 PRO NVME SSD (DRAM-less)
+ 12e4 ORCA 4836 Series eSSD
+ 1602 Lexar NM790 NVME SSD (DRAM-less)
1d97 Lexar NM620 NVME SSD (DRAM-less)
- 2263 SM2263EN/SM2263XT-based OEM SSD
- 2269 Lexar NM760 NVME SSD (DRAM-less)
- 5216 Lexar NM620 NVME SSD (DRAM-less)
+ 2263 SM2263EN/SM2263XT-based OEM NVME SSD (DRAM-less)
+ 2269 FORESEE XP2000, Lexar NM760 NVME SSD (DRAM-less)
+ 5216 FORESEE XP1000 / Lexar Professional CFexpress Type B Gold series, NM620 PCIe NVME SSD (DRAM-less)
+ 5220 FORESEE XP2100 NVMe SSD (DRAM-less)
5236 Lexar NM800 PRO NVME SSD
# nee Facebook, Inc.
1d9b Meta Platforms, Inc.
@@ -25241,6 +26093,7 @@
3800 Network Flow Processor 3800
3803 Network Flow Processor 3800 Virtual Function
1dad Fungible
+ 0108 FC50, FC100, FC200 DPU NVMeoF Adapters
1db2 ATP ELECTRONICS INC
1db7 Phytium Technology Co., Ltd.
dc20 [X100 Series]
@@ -25270,21 +26123,42 @@
5216 NVMe SSD Controller IG5216 (DRAM-less)
5220 NVMe SSD Controller IG5220 (DRAM-less)
5236 NVMe SSD Controller IG5236
+ 1dbe 4001 Dongting-B1 DC SSD M.2 480GB
+ 1dbe 4002 Dongting-B1 DC SSD M.2 960GB
5636 NVMe DC SSD IG5636
- 1dbe 0001 DONGTING-N1 DC SSD U.2
- 1dbe 1001 DONGHU-Z1 DC ZNS SSD U.2
+ 1dbe 0001 Dongting-N1 DC SSD U.2 1600GB
+ 1dbe 0002 Dongting-N1 DC SSD U.2 1920GB
+ 1dbe 0003 Dongting-N1 DC SSD U.2 3200GB
+ 1dbe 0004 Dongting-N1 DC SSD U.2 3840GB
+ 1dbe 0005 Dongting-N1 DC SSD U.2 6400GB
+ 1dbe 0006 Dongting-N1 DC SSD U.2 7680GB
+ 1dbe 1001 Donghu-Z1 DC ZNS SSD U.2 4000GB
+ 1dbe 1002 Donghu-Z1 DC ZNS SSD U.2 8000GB
5638 NVMe DC SSD IG5638
- 1dbe 2001 DONGTING-N1 DC SSD U.2
- 1dbe 3001 DONGHU-Z1 DC ZNS SSD U.2
+ 1dbe 2001 Dongting-N2 DC SSD U.2 1600GB
+ 1dbe 2002 Dongting-N2 DC SSD U.2 1920GB
+ 1dbe 2003 Dongting-N2 DC SSD U.2 3200GB
+ 1dbe 2004 Dongting-N2 DC SSD U.2 3840GB
+ 1dbe 2005 Dongting-N2 DC SSD U.2 6400GB
+ 1dbe 2006 Dongting-N2 DC SSD U.2 7680GB
+ 1dbe 3001 Donghu-Z2 DC ZNS SSD U.2 4000GB
+ 1dbe 3002 Donghu-Z2 DC ZNS SSD U.2 8000GB
+ 5666 NVMe SSD Controller IG5666
+ 5668 NVMe SSD Controller IG5668
+ 5669 NVMe SSD Controller IG5669 [Tacoma]
1dbf Guizhou Huaxintong Semiconductor Technology Co., Ltd
0401 StarDragon4800 PCI Express Root Port
1dc2 Alco Digital Devices Limited
1dc5 FADU Inc.
+ 4081 FC4121 PCIe 4.0 NVMe controller [DELTA]
+ 6150 FC3081 PCIe 3.0 NVMe controller [BRAVO]
1dcd Liqid Inc.
1dcf Beijing Sinead Technology Co., Ltd.
1dd3 Sage Microelectronics Corp.
1dd4 Swissbit AG
0010 N-10m2 NVMe SSD
+ 0016 N-16
+ 0020 EN-20 BGA NVMe SSD (DRAM-less)
1dd8 AMD Pensando Systems
0002 DSC2 Elba Upstream Port
1dd8 100e Distributed Services Card
@@ -25515,6 +26389,8 @@
2020 DC-390 Series SCSI Adapter [AMD Am53C974]
690c 690c
dc29 DC290
+1de4 Raspberry Pi Ltd
+ 0001 RP1 PCIe 2.0 South Bridge
1de5 Eideticom, Inc
1000 IO Memory Controller
2000 NoLoad Hardware Development Kit
@@ -25531,6 +26407,7 @@
1dee Biwin Storage Technology Co., Ltd.
2262 HP EX950 NVMe SSD
2263 HP EX900 NVMe SSD (DRAM-less)
+ 5216 KingSpec NX series NVMe SSD (DRAM-less)
1def Ampere Computing, LLC
e005 eMAG PCI Express Root Port 0
e006 eMAG PCI Express Root Port 1
@@ -25607,6 +26484,7 @@
1df3 0001 ENA2100RN
1df5 Shenzhen TIGO Semiconductor
1202 kimtigo NVMe SSD (DRAM-less)
+ 2263 kimtigo MG931K NVMe SSD (DRAM-less)
1df7 opencpi.org
0001 ml605
0002 alst4
@@ -25620,8 +26498,10 @@
1df8 d100 M.2 NVMe SSD
1df8 d201 M.2 NVMe SSD
1df8 d600 M.2 NVMe SSD
+1dfa Astera Labs, Inc.
1dfc JSC NT-COM
1181 TDM 8 Port E1/T1/J1 Adapter
+1e0d SambaNova Systems, Inc
1e0f KIOXIA Corporation
0001 NVMe SSD Controller BG4 (DRAM-less)
0007 NVMe SSD Controller Cx6
@@ -25683,6 +26563,7 @@
1028 22b9 Ent NVMe CM7 FIPS E3.S MU 6.4TB
1028 22ba Ent NVMe CM7 FIPS E3.S MU 12.8TB
0018 Exceria Pro NVMe SSD
+ 001a NVMe SSD Controller BG6 (DRAM-less)
001f NVMe SSD Controller CD8
1028 2223 DC NVMe CD8 U.2 SED 15.36TB
1028 2224 DC NVMe CD8 U.2 SED 7.68TB
@@ -25713,6 +26594,16 @@
1028 223c Ent NVMe CM7 U.2 MU 6.4TB
1028 223d Ent NVMe CM7 U.2 MU 3.2TB
1028 223e Ent NVMe CM7 U.2 MU 1.6TB
+ 002a Exceria Plus G3 NVMe SSD (DRAM-less)
+ 002c NVMe SSD Controller CD8P EDSFF
+ 1028 22bf DC NVMe CD8P E3.S 15.36TB
+ 1028 22c0 DC NVMe CD8P E3.S 7.68TB
+ 1028 22c1 DC NVMe CD8P E3.S 3.84TB
+ 1028 22c2 DC NVMe CD8P E3.S 1.92TB
+ 1028 22c7 DC NVMe CD8P E3.S MU 12.8TB
+ 1028 22c8 DC NVMe CD8P E3.S MU 6.4TB
+ 1028 22c9 DC NVMe CD8P E3.S MU 3.2TB
+ 1028 22ca DC NVMe CD8P E3.S MU 1.6TB
1e17 Arnold & Richter Cine Technik GmbH & Co. Betriebs KG
1e18 Beijing GuangRunTong Technology Development Co.,Ltd
1e24 Squirrels Research Labs
@@ -25744,16 +26635,21 @@
8032 S6 [Enflame]
# FHFL PCIe card, single slot, 3rd generation from Enflame
c031 S30 [Enflame]
-# FHFL PCIe card, single slot, 3rd generation from Enflame, 48GB device memory
+# FHFL PCIe card, dual slot, 3rd generation from Enflame, 48GB device memory
c032 S90 [Enflame]
-# FHFL PCIe card, single slot, 3rd generation from Enflame, 48GB device memory
- c033 S60 [Enflame]
+# FHFL PCIe card, dual slot, 3rd generation from Enflame, 48GB device memory
+ c033 S60G [Enflame]
+# FHFL PCIe card, dual slot, 3rd generation from Enflame, 48GB device memory
+ c035 S60 [Enflame]
# nee Thinci, Inc
1e38 Blaize, Inc
0102 Xplorer X1600
1e39 MEDION AG
+1e3a Cactus Technologies Limited
+ 2263 270PM6, 270PM7 series NVMe SSD
1e3b DapuStor Corporation
0600 NVMe SSD Controller DP600
+ 1e3b 0006 Enterprise NVMe SSD U.2 ODP 7.68TB (J5001)
1e3b 0010 Enterprise NVMe SSD U.2 3.84TB (R5102)
1e3b 0013 Enterprise NVMe SSD U.2 3.20TB (R5302)
1e3b 0030 Enterprise NVMe SSD U.2 3.84TB (J5100)
@@ -25788,12 +26684,49 @@
1e3b 0069 Enterprise NVMe SSD U.2 3.20TB (R5301D)
1e3b 006c Enterprise NVMe SSD U.2 1.92TB (R5101)
1e3b 006d Enterprise NVMe SSD U.2 1.60TB (J5301)
+ 1e3b 00b9 Enterprise NVMe SSD U.2 ODP 25.60TB (R5301)/(J5301)
+ 1e3b 00be Enterprise NVMe SSD U.2 ODP 30.72TB (R5101)/(J5101)
+ 1e3b 00c1 Enterprise NVMe SSD U.2 ODP 25.60TB (R5301D)/(J5301D)
+ 1e3b 00c4 Enterprise NVMe SSD U.2 ODP 30.72TB (R5101D)/(J5101D)
+ 1e3b 00c7 Enterprise NVMe SSD U.2 ODP 25.60TB (J5300)
+ 1e3b 00c8 Enterprise NVMe SSD U.2 ODP 30.72TB (J5100)
+ 1e3b 00c9 Enterprise NVMe SSD U.2 ODP 15.36TB (J5001)
+ 1e3b 00ca Enterprise NVMe SSD U.2 ODP 3.84TB (J5102)
+ 1e3b 00cb Enterprise NVMe SSD U.2 ODP 7.68TB (J5102)
+ 1e3b 00cc Enterprise NVMe SSD U.2 QDP 3.84TB (J5101)
+ 1e3b 00cd Enterprise NVMe SSD U.2 ODP 7.68TB (J5101)
+ 1e3b 00ce Enterprise NVMe SSD U.2 QDP 3.84TB (J5101D)
+ 1e3b 00cf Enterprise NVMe SSD U.2 ODP 7.68TB (J5101D)
+ 1e3b 00dc Enterprise NVMe SSD U.2 ODP 30.72TB with SAMSUNG 32GB DRAM (J5001)
+ 1e3b 00dd Enterprise NVMe SSD U.2 ODP 30.72TB with MT 32GB DRAM(J5001)
+ 1e3b 00de Enterprise NVMe SSD U.2 ODP 15.36TB with SK 16GB DRAM(J5001D)
+ 1e3b 00df Enterprise NVMe SSD U.2 ODP 30.72TB with SAMSUNG 32GB DRAM(J5001)
+ 1e3b 00e7 Enterprise NVMe SSD U.2 ODP 30.72TB with MT 32GB DRAM(J5001D)
+ 1e3b 00e8 Enterprise NVMe SSD U.2 QDP 3.20TB (J5301)
+ 1e3b 00e9 Enterprise NVMe SSD U.2 ODP 6.40TB (J5301)
+ 1e3b 00ea Enterprise NVMe SSD U.2 QDP 3.20TB (J5301D)
+ 1e3b 00eb Enterprise NVMe SSD U.2 ODP 6.40TB (J5301D)
+ 1e3b 00ec Enterprise NVMe SSD U.2 ODP 30.72TB with MT 32GB DRAM(J5101)
+ 1e3b 00ed Enterprise NVMe SSD U.2 ODP 30.72TB with MT 32GB DRAM(R5101)
+ 1e3b 00ee Enterprise NVMe SSD U.2 ODP 15.36B with SK 16GB DRAM(J5101)
+ 1e3b 00ef Enterprise NVMe SSD U.2 ODP 12.80TB with SK 16GB DRAM(J5301)
1e3b 00f0 Enterprise NVMe SSD U.2 0.40TB (X2900)
1e3b 00f1 Enterprise NVMe SSD U.2 0.80TB (X2900)
1e3b 00f2 Enterprise NVMe SSD U.2 1.60TB (X2900)
1e3b 00f3 Enterprise NVMe SSD U.2 3.20TB (X2900)
1e3b 00f5 Enterprise NVMe SSD U.2 0.40TB (X2900P)
1e3b 00f6 Enterprise NVMe SSD U.2 0.80TB (X2900P)
+ 0800 DP800
+ 1e3b 0001 Enterprise NVMe SSD U.2 QDP 3.84TB(R6100)
+ 1e3b 0007 Enterprise NVMe SSD U.2 ODP 15.36TB (R6100)
+ 1e3b 000a Enterprise NVMe SSD U.2 3.20TB (R6300)
+ 1e3b 000d Enterprise NVMe SSD U.2 6.40TB (R6300)
+ 1e3b 0010 Enterprise NVMe SSD U.2 12.80TB (R6300)
+ 1e3b 0018 Enterprise NVMe SSD U.2 QDP 3.84TB (R6100C)
+ 1e3b 0019 Enterprise NVMe SSD U.2 ODP 7.68TB (R6100C)
+ 1e3b 001a Enterprise NVMe SSD U.2 3.20TB (R6300C)
+ 1e3b 001b Enterprise NVMe SSD U.2 6.40TB (R6300C)
+ 1e3b 001c Enterprise NVMe SSD U.2 ODP 7.68TB (R6100)
1098 Haishen3 NVMe SSD
1e3b 0001 Enterprise NVMe SSD U.2 0.8TB (H2100)
1e3b 0002 Enterprise NVMe SSD U.2 0.96TB (H2200)
@@ -25844,24 +26777,32 @@
# YMTC
1001 PC005 NVMe SSD
1011 PC210 NVMe SSD
-# YMTC PCIe/NVMe SSD
- 1013 PC210
+ 1013 PC210 NVMe SSD
+ 1031 PC300 NVMe SSD (DRAM-less)
+ 1033 PC300 NVMe SSD (DRAM-less)
+ 1071 PC411 NVMe SSD (DRAM-less)
1e4b MAXIO Technology (Hangzhou) Ltd.
1001 NVMe SSD Controller MAP1001
- 1002 NVMe SSD Controller MAP1002
+ 1002 NVMe SSD Controller MAP1002 (DRAM-less)
1003 NVMe SSD Controller MAP1003
1201 NVMe SSD Controller MAP1201
- 1202 NVMe SSD Controller MAP1202
+ 1202 NVMe SSD Controller MAP1202 (DRAM-less)
1601 NVMe SSD Controller MAP1601
- 1602 NVMe SSD Controller MAP1602
+ 1602 NVMe SSD Controller MAP1602 (DRAM-less)
+ 1608 NVMe SSD Controller MAP1608 (DRAM-less)
1e4c GSI Technology
0010 Associative Processing Unit [Leda]
1e4c 0120 SE120
1e50 IP3 Tech (HK) Limited
1e52 Tenstorrent Inc
401e Wormhole
+ 1e52 0014 n300
+ 1e52 0018 n150
b140 Blackhole
faca Grayskull
+ 1e52 0003 e150
+ 1e52 0007 e75
+ 1e52 000a e300
1e57 Beijing Panyi Technology Co., Ltd
0100 The device has already been deleted.
0000 0100 PY8800 64GB Accelerator
@@ -25875,6 +26816,7 @@
1e67 Untether AI
0002 runAI200 AI Inference Accelerator
1e68 Jiangsu Xinsheng Intelligent Technology Co., Ltd
+ 8111 EP2000Pro PCIe 3 NVMe SSD (DRAM-less)
1e6b Axiado Corp.
1e7b Dataland
1e7c Brainchip Inc
@@ -25882,6 +26824,8 @@
1e7e Pliops
9034 Pliops Extreme Data Processor [XDP1.0]
1e7f Jiangsu Huacun Elec. Tech. Co., Ltd.
+ 6002 MMY MMSP350 PCIe 3 NVMe SSD (DRAM-less)
+ 6003 MMY HC512GP3KH2T PCIe 3 NVMe SSD (DRAM-less)
1e81 Ramaxel Technology(Shenzhen) Limited
1203 NVMe SSD Controller UHXXXa series
1e81 a121 NVMe SSD UHXXXa series U.2 960GB
@@ -25904,11 +26848,17 @@
# aka SED Systems
1e94 Calian SED
1e95 Solid State Storage Technology Corporation
+ 1000 XA1-311024 NVMe SSD M.2
+ 1001 CA6-8D512 NVMe SSD M.2
1002 NVMe SSD [3DNAND] 2.5" U.2 (LJ1)
1e95 1101 NVMe SSD [3DNAND] 2.5" U.2 (LJ1)
1ea0 5636 TP1500 Series U.2 NVMe Datacenter SSD
1003 CLR-8W512 NVMe SSD M.2 (DRAM-less)
+ 1005 PLEXTOR M10P(GN) NVMe SSD M.2
1007 CL4-8D512 NVMe SSD M.2 (DRAM-less)
+ 1008 CL5-8D512 NVMe SSD M.2 (DRAM-less)
+ 3500 CA5-8D256 NVMe SSD M.2
+ 35f1 PLEXTOR M9PGN Plus NVMe SSD M.2
9100 CL1-3D256-Q11 NVMe SSD M.2
1e96 Drut Technologies Inc.
1e9f Lynxi Technologies Co., Ltd.
@@ -25916,6 +26866,7 @@
2a16 Cloud Intelligent Inference Controller
2a18 Video Transcode Controller
2a20 Cloud Intelligent Inference and Training Controller
+ 2a22 Cloud Intelligent Inference Controller
1ea7 Intelliprop, Inc
223a Typhon+ PCIe to Gen-Z Bridge
224a IPA-PE224A CXL to Gen-Z Bridge [Sphinx]
@@ -25930,6 +26881,9 @@
1001 Video Accelerator
1eb4 Quantum Nebula Microelectronics Technology Co.,Ltd.
3401 SSD Contoller
+1eb6 Wuxi Stars Microsystem Technology Co., Ltd
+1eb9 Senscomm Semiconductor, Inc
+ 2020 SCM2625 Wi-Fi6 Network Adapter
1ebd EMERGETECH Company Ltd.
0101 Seirios 2063 Video Codec
1ec6 Vastai Technologies
@@ -25970,8 +26924,15 @@
1ec9 Wingtech Group(HongKong)Limited
1eca Lightmatter
0000 Envise-B
+1ed0 Hosin Global Electronics
+ 2283 Patriot P300 NVMe SSD (DRAM-less)
1ed2 FuriosaAI, Inc.
0000 Warboy
+ 1111 RNGD
+ 0000 1111 RNGD-S
+ 0000 2222 RNGD VF
+ 0000 3333 RNGD-S VF
+ 2222 RNGD-S
1ed3 Yeston
1ed5 Moore Threads Technology Co.,Ltd
0100 MTT S10
@@ -25988,18 +26949,21 @@
0201 MTT S80
0202 MTT S70
0203 MTT S60
- 0211 MTT X200
+ 0211 MTT X300
0221 G2S80
0222 MTT S3000
+ 1ed5 0001 C3150
0223 G2S4
0251 G2N10
02ff MTT HDMI/DP Audio
0300 MTT S90 Engineering Sample
0301 MTT S90
0323 MTT S4000
+ 0327 MTT S4000
03ff MTT HDMI/DP Audio
1ed8 Digiteq Automotive
- 0101 FG4 PCIe Frame Grabber
+ 0101 FG4 PCIe Frame Grabber (T100)
+ 0201 FG4 PCIe Frame Grabber (T200)
1ed9 Myrtle.ai
1ee1 Suzhou Kuhan Information Technologies
0050 Aurora NVMe SSD Controller
@@ -26022,14 +26986,21 @@
1142 XDX120M
1144 XDX E1200
1150 XDX120S
+ 1160 XDX121
+ 1170 XDX121S
+ 11e0 XDX130
11e4 XDX E1300
1320 XDX150
+ 1323 XDX R1500
1324 XDX X1500
1330 XDX150S
- 1340 XDX150T
- 1350 XDX150U
- 13c0 XDX160
+ 1333 XDX R1510
+ 1340 XDX151
+ 1350 XDX151S
+ 1360 XDX151T
+ 13c0 XDX160T
13d0 XDX160S
+ 13d3 XDX R1610
1500 XDX180
1503 XDX R1800
1504 XDX X1800
@@ -26037,7 +27008,7 @@
15a0 XDX190
15a3 XDX R1900
15a4 XDX X1900
- 15a5 XDX X1900M2
+ 15a5 XDX X1900D
15b0 XDX190S
1810 XDX TJ01 Audio
1820 XDX TJ02 Audio
@@ -26045,12 +27016,13 @@
1ef6 GrAI Matter Labs
1ef7 Shenzhen Gunnir Technology Development Co., Ltd
1efb Flexxon Pte Ltd
+1eff Rebellions Inc.
1f02 Beijing Dayu Technology
1f03 Shenzhen Shichuangyi Electronics Co., Ltd
- 1202 MAP1202-Based NVMe SSD
+ 1202 MAP1202-Based NVMe SSD (DRAM-less)
2262 SM2262EN-based OEM SSD
2263 SM2263XT-Base NVMe SSD
- 5216 IG5216-based NVMe SSD
+ 5216 IG5216-based NVMe SSD (DRAM-less)
5220 IG5220-Based NVMe SSD
5236 IG5236-Based NVMe SSD
5636 IG5636-Based NVMe SSD
@@ -26074,7 +27046,28 @@
1a01 M16104 Family Virtual Function
1f0f 0001 M16104 Family Virtual Function
2022 D1055AS PCI Express Switch Upstream Port
+ 3403 M18110 Family
+ 3404 M18110 Lx Family
+ 3405 M18110 Family BASE-T
+ 3406 M18110 Lx Family BASE-T
+ 3407 M18110 Family OCP
+ 3408 M18110 Lx Family OCP
+ 3409 M18110 Family BASE-T OCP
+ 340a M18110 Lx Family BASE-T OCP
+ 340b M18120 Family
+ 340c M18120 Lx Family
+ 340d M18120 Family BASE-T
+ 340e M18120 Lx Family BASE-T
+ 340f M18120 Family OCP
+ 3410 M18120 Lx Family OCP
+ 3411 M18120 Family BASE-T OCP
+ 3412 M18120 Lx Family BASE-T OCP
+ 3413 M18100 Family Virtual Function
9088 D1055AS PCI Express Switch Downstream Port
+1f16 XConn Technologies
+# XConn XC50256 CXL2.0/PCIe5.0 switch
+ c500 XC50256
+1f17 Zettastone Technology
1f24 xFusion Digital Technologies Co., Ltd.
1058 EP500/EP600 NVMe SSD
1f24 1114 EP500 NVMe SSD(RI)
@@ -26127,25 +27120,49 @@
9032 SSSNIC SDI5.1
1f3f 00a1 Dual Port 100GE SDI5.1
1f40 Netac Technology Co.,Ltd
- 2263 NVMe SSD (DRAM-less)
+ 0001 PCIe 4 NVMe SSD (DRAM-less)
+ 1202 PCIe 3 NVMe SSD (DRAM-less)
+ 1602 PCIe 4 NVMe SSD (DRAM-less)
+ 1f40 PCIe 4 NVMe SSD (DRAM-less)
+ 2263 PCIe 3 SM based NVMe SSD (DRAM-less)
+ 5216 PCIe 3 NVMe SSD (DRAM-less)
+ 5236 PCIe 4 INNOGRIT based NVMe SSD
+ 5765 PCIe 3 NVMe SSD (DRAM-less)
1f44 VVDN Technologies Private Limited
+# YUSUR Technology Co., Ltd.
+1f47 YUSUR Tech
+# Network Accelerating Card
+ 2018 DPU Card
+# Network Accelerating Card
+ 2020 DPU
1f4b Axera Semiconductor Co., Ltd
1f52 MangoBoost Inc.
1f56 SAPEON Inc.
1f60 Accelecom
0001 XELE-NIC 25K5
0054 XELE-NIC 25K5
-1f67 Yunsilicon Technology Co,. Ltd.
+1f67 Yunsilicon Technology
+ 1011 metaConnect SmartNIC Physical Function
+ 1012 metaConnect SmartNIC Virtual Function
+ 1051 metaFusion DPU Physical Function
+ 1052 metaFusion DPU Virtual Function
+ 1059 metaFusion DPU SoC Network Interface
+ 1111 metaScale SmartNIC Physical Function
+ 1112 metaScale SmartNIC Virtual Function
+ 1151 metaVisor DPU Physical Function
+ 1152 metaVisor DPU Virtual Function
1faa Hexaflake (Shanghai) Information Technology Co., Ltd.
0c10 Compass C10 PF
0c11 Compass C10 VF
1fab Unifabrix Ltd.
0000 Nexus Alpha IVPU
0100 NoX Gamma
+ 01fd Smart Memory Node Generic CXL Port (T1)
# UnifabriX Smart Memory Node Generic CXL Port
01fe Smart Memory Node Generic CXL Port (T2)
# UnifabriX Smart Memory Node Generic CXL Port
01ff Smart Memory Node Generic CXL Port (T3)
+ 1b00 MAX Host Device
1fb0 ICube Corporation Limited
1000 NF1000 Series GPU
1fb0 1001 NF1001
@@ -26249,15 +27266,46 @@
1fe4 0076 Enterprise NVMe SSD U.2 7.68TB(HP610)
1fe4 0077 Enterprise NVMe SSD U.2 6.40TB(HP630)
1fe4 0078 Enterprise NVMe SSD U.2 3.20TB(HP630)
+1fe9 MemryX
+# LinkData Technology (Tianjin) Co., LTD
+1ff2 Linkdata
+ 10a1 NIC1160 Ethernet Controller Family
+ 1ff2 0c11 10GE Ethernet Adapter 1160-2X
+ 10a2 NIC1160 Ethernet Controller Virtual Function Family
+ 20a1 IOC2110 Storage Controller
+ 1ff2 0a11 2120-16i SATA3/SAS3 HBA Adapter
+ 1ff2 0a12 2120-8i SATA3/SAS3 HBA Adapter
+ 20a2 IOC2250 Storage Controller
+ 1ff2 0a21 2230-18i Tri-mode HBA Adapter
+ 1ff2 0a22 2230-10i Tri-mode HBA Adapter
+ 1ff2 0a23 2230-16i Tri-mode HBA Adapter
+ 1ff2 0a24 2230-8i Tri-mode HBA Adapter
+ 1ff2 0a28 2233-16i Tri-mode HBA Adapter
+ 30a2 ROC3250 Storage Controller
+ 1ff2 0b21 3260-18i Tri-mode RAID Adapter
+ 1ff2 0b22 3260-10i Tri-mode RAID Adapter
+ 1ff2 0b23 3260-16i Tri-mode RAID Adapter
+ 1ff2 0b24 3260-8i Tri-mode RAID Adapter
1ff4 DEEPX Co., Ltd.
0000 DX_M1
0001 DX_M1A
+ 1000 DX_H1
+1ff8 Beijing Gengtu Technology Co.Ltd
+ 2000 GT6910
+ 2010 GT6908
+1ff9 Inagile Electronic Technology Co., LTD
2000 Smart Link Ltd.
2800 SmartPCI2800 V.92 PCI Soft DFT
2001 Temporal Research Ltd
2003 Smart Link Ltd.
8800 LM-I56N
2004 Smart Link Ltd.
+202c CAEN S.p.A.
+ 5818 A5818
+2036 Netforward Microelectronics Co., Ltd.
+ 1618 NF1618 PCI Express Ethernet Controller
+ 1619 NF1618 Family Virtual Function
+2046 GXMICRO Technology (Shanghai) Co., Ltd.
2048 Beijing SpaceControl Technology Co.Ltd
20f4 TRENDnet
2116 ZyDAS Technology Corp.
@@ -26266,30 +27314,41 @@
22b8 Flex-Logix Technologies
22a0 Flex Logix InferX X1 Inference Accelerator
22db Missing Link Electronics, Inc.
+ 1200 NVMe Streamer EP ERD
2304 Colorgraphic Communications Corp.
+2321 Bruker AXS Inc.
2348 Racore
2010 8142 100VG/AnyLAN
+256c Graphics Technology (HK) Co., Ltd.
+ 006d HS610
2646 Kingston Technology Company, Inc.
0010 HyperX Predator PCIe AHCI SSD
-# KC2000 and KC2500 share the same DID
- 2262 KC2000/KC2500 NVMe SSD
- 2263 A2000 NVMe SSD
- 5008 U-SNS8154P3 NVMe SSD
- 500a DC1000B NVMe SSD
- 500b DC1000M NVMe SSD
+ 2262 KC2000/KC2500 NVMe SSD SM2262EN
+ 2263 A2000 NVMe SSD SM2263EN
+ 5008 A1000/U-SNS8154P3 x2 NVMe SSD
+ 500a DC1000B NVMe SSD E12DC
+ 500b DC1000M NVMe SSD SM2270
500c OM8PCP Design-In PCIe 3 NVMe SSD (DRAM-less)
500d OM3PDP3 NVMe SSD
- 500e NV1 NVMe SSD
+ 500e NV1 NVMe SSD E13T (DRAM-less)
+ 500f NV1 NVMe SSD SM2263XT (DRAM-less)
5010 OM8SBP NVMe PCIe SSD (DRAM-less)
- 5012 DC1500M NVMe SSD
-# KC3000 and Renegade share the same DID
- 5013 KC3000/Renegade NVMe SSD
- 5014 Design-In PCIe 4 NVMe SSD (TLC)
+ 5012 DC1500M NVMe SSD SM2270
+ 5013 KC3000/FURY Renegade NVMe SSD E18
+ 5014 OM8SEP4 Design-In PCIe 4 NVMe SSD (TLC) (DRAM-less)
5016 OM3PGP4 NVMe SSD
+ 5017 NV2 NVMe SSD SM2267XT (DRAM-less)
+ 5019 NV2 NVMe SSD E21T (DRAM-less)
+# 128GB
+ 501a OM8PGP4 Design-In PCIe 4 NVMe SSD (TLC) (DRAM-less)
501b OM8PGP4 NVMe PCIe SSD (DRAM-less)
- 501d NV2 NVMe PCIe 4 SSD 500GB (DRAM-less)
+ 501c NV2 NVMe SSD E19T (DRAM-less)
+ 501d NV2 NVMe SSD TC2200 (DRAM-less)
501f FURY Renegade NVMe SSD with heatsink
- 5021 Design-In PCIe 4 NVMe SSD (QLC)
+ 5021 OM8SEP4 Design-In PCIe 4 NVMe SSD (QLC) (DRAM-less)
+ 5022 OM8PGP4 Design-In PCIe 4 NVMe SSD (QLC) (DRAM-less)
+ 5023 NV2 NVMe SSD SM2269XT (DRAM-less)
+ 5024 DC2000B NVMe SSD E18DC
270b Xantel Corporation
270f Chaintech Computer Co. Ltd
2711 AVID Technology Inc.
@@ -26300,11 +27359,15 @@
2a18 Video Transcode Controller
2bd8 ROPEX Industrie-Elektronik GmbH
3000 Hansol Electronics Inc.
+3100 Dynabook Inc.
3112 Satelco Ingenieria S.A.
3130 AUDIOTRAK
3142 Post Impression Systems.
31ab Zonet
1faa ZEW1602 802.11b/g Wireless Adapter
+328f Shenzhen EMEET Technology Co., Ltd.
+ 004c OfficeCore M1A
+ 2019 REC 600 HD Webcam
3388 Hint Corp
0013 HiNT HC4 PCI to ISDN bridge, Multimedia audio controller
0014 HiNT HC4 PCI to ISDN bridge, Network controller
@@ -26340,6 +27403,7 @@
3475 Arista Networks, Inc.
34ba Ice Lake-LP PCI Express Root Port #3
3513 ARCOM Control Systems Ltd
+369a HighSecLabs, Ltd.
37d9 ITD Firm ltd.
1138 SCHD-PH-8 Phase detector
1140 VR-12-PCI 12-ch Relay Actuator Card
@@ -26347,6 +27411,8 @@
1142 PCI-CAN2
3842 eVga.com. Corp.
38ef 4Links
+# Wrong ID in board programmed sub-did in place of sub-vid
+393e Lenovo (wrong ID)
3d3d 3DLabs
0001 GLINT 300SX
0002 GLINT 500TX
@@ -26662,9 +27728,22 @@
5000 NV5000SC
4a14 5000 RT8029-Based Ethernet Adapter
4b10 Buslogic Inc.
+4b43 KonteX Inc.
4c48 LUNG HWA Electronics
4c4d Liquid-Markets GmbH
-4c52 LR-Link
+# Dev versions of TaSR, not for production.
+ 9998 TaSR
+# First versions of UberNIC, not for production.
+ 9999 UberNIC PoC/testing/dev
+4c52 LR-LINK
+ 1001 Smart Network Adapter
+ 4c52 a008 LREG1008PT Single-port 1Gb Smart Ethernet Network Adapter
+ 1002 Smart Network Adapter
+ 4c52 a006 LREG1006PT Single-port 1.2Gb Network Security Isolation Adapter
+ 1003 Smart Network Adapter
+ 1004 Smart Network Adapter
+ 4c52 b010 LREG1010PF Single-port 10Gb FPGA Network Security Isolation Adapter
+ 4c52 b011 LREG1011PF Dual-port 10Gb FPGA Network Security Isolation Adapter
4c53 SBS Technologies
0000 PLUSTEST device
4c53 3000 PLUSTEST card (PC104+)
@@ -26675,7 +27754,7 @@
4d51 MediaQ Inc.
0200 MQ-200
4d54 Microtechnica Co Ltd
-4d56 MATRIX VISION GmbH
+4d56 Balluff MV GmbH
0000 [mvHYPERION-CLe/CLb] CameraLink PCI Express x1 Frame Grabber
0001 [mvHYPERION-CLf/CLm] CameraLink PCI Express x4 Frame Grabber
0010 [mvHYPERION-16R16/-32R16] 16 Video Channel PCI Express x4 Frame Grabber
@@ -26712,6 +27791,7 @@
0d10 SB-365x Motion Feedback Device
2f00 SB-3642 Motion Feedback Device
3000 SB-3644 Motion Feedback Device
+4e4c NieL TechSolution
4e58 Nutanix, Inc.
0001 Virtual NVMe Controller
5045 University of Toronto
@@ -26723,6 +27803,9 @@
50b2 TerraTec Electronic GmbH
50ce System-on-Chip Engineering S.L.
0001 RELY-MIL-XMC-TSN-SWITCH
+ 0100 XMC_AV-Dual-ETH
+ 0101 XMC_AV-ETSN
+ 0102 XMC_AV-AFDX
5136 S S Technologies
5143 Qualcomm Inc
5145 Ensoniq (Old)
@@ -26867,6 +27950,8 @@
9043 Chrome 430 GT
9045 Chrome 430 ULP / 435 ULP / 440 GTX
9060 Chrome 530 GT
+# Found in VIA Embedded uH4 graphics card
+ 9070 Chrome 5400EW
9102 86C410 [Savage 2000]
1092 5932 Viper II Z200
1092 5934 Viper II Z200
@@ -26907,6 +27992,7 @@
0001 I-30xx Scanner Interface
5555 Genroco, Inc
0003 TURBOstor HFP-832 [HiPPI NIC]
+ 0004 Torrent QN16e [16-128 Channel MPEG QAM Modulator for DVB-C]
3b00 Epiphan DVI2PCIe video capture card
5646 Vector Fabrics BV
5654 VoiceTronix Pty Ltd
@@ -26974,6 +28060,7 @@
6903 TBS Technologies (wrong ID)
6909 TBS Technologies (wrong ID)
6910 TBS Technologies (wrong ID)
+6f67 NOVAIUM Technology
7063 pcHDTV
2000 HD-2000
3000 HD-3000
@@ -26986,7 +28073,7 @@
e100 PTP3100 PCIe PTP Slave Clock
7470 TP-LINK Technologies Co., Ltd.
7526 HongQin (Beijing) Technology Co., Ltd.
- 0082 HQ SSD 1TB
+ 0082 HQ SSD M.2
0083 HQ SSD 2TB M.2 NVMe
7604 O.N. Electronic Co Ltd.
78c0 Herrick Technology Laboratories, Inc. [HTL]
@@ -27083,6 +28170,8 @@
0100 2nd Generation Core Processor Family DRAM Controller
1028 04aa XPS 8300
1043 844d P8P67/P8H67 Series Motherboard
+ 1734 11b9 Esprimo P510 D3171 motherboard
+ 17aa 3070 ThinkCentre M91p
8086 200d DH61CR motherboard
0101 Xeon E3-1200/2nd Generation Core Processor Family PCI Express Root Port
1028 04b2 Vostro 3350
@@ -27091,6 +28180,8 @@
0102 2nd Generation Core Processor Family Integrated Graphics Controller
1028 04aa XPS 8300
1043 0102 P8H67 Series Motherboard
+ 1734 11b9 G640 [Sandy Bridge, HD Graphics] on Esprimo P510 D3171 motherboard
+ 17aa 3070 ThinkCentre M91p
0104 2nd Generation Core Processor Family DRAM Controller
1028 04a3 Precision M4600
1028 04b2 Vostro 3350
@@ -27175,6 +28266,7 @@
02b1 Comet Lake PCI Express Root Port #10
02b3 Comet Lake PCI Express Root Port #12
02b4 Comet Lake PCI Express Root Port #13
+ 02b5 Comet Lake PCI Express Root Port #14
02b8 Comet Lake PCI Express Root Port #1
02bc Comet Lake PCI Express Root Port #5
02bf Comet Lake PCI Express Root Port #8
@@ -27197,10 +28289,13 @@
02ef Comet Lake PCH-LP Shared SRAM
1028 09be Latitude 7410
02f0 Comet Lake PCH-LP CNVi WiFi
- 8086 0034 Wireless-AC 9560 160MHz
- 8086 0070 Wi-Fi 6 AX201 160MHz
- 8086 0074 Wi-Fi 6 AX201 160MHz
- 8086 4070 Wi-Fi 6 AX201 160MHz
+ 8086 0034 Dual Band Wi-Fi 5(802.11ac) Wireless-AC 9560 160MHz 2x2 [Jefferson Peak]
+ 8086 0070 Dual Band Wi-Fi 6(802.11ax) AX201 160MHz 2x2 [Harrison Peak]
+ 8086 0074 Dual Band Wi-Fi 6(802.11ax) AX201 160MHz 2x2 [Harrison Peak]
+ 8086 0234 Dual Band Wi-Fi 5(802.11ac) Wireless-AC 9560 80MHz 2x2 [Jefferson Peak]
+ 8086 0264 Dual Band Wi-Fi 5(802.11ac) Wireless-AC 9461 80MHz 1x1 [Jefferson Peak]
+ 8086 02a4 Dual Band Wi-Fi 5(802.11ac) Wireless-AC 9462 80MHz 1x1 [Jefferson Peak]
+ 8086 4070 Dual Band Wi-Fi 6(802.11ax) AX201 160MHz 2x2 [Harrison Peak]
02f5 Comet Lake PCH-LP SCS3
02f9 Comet Lake Thermal Subsytem
1028 09be Latitude 7410
@@ -27318,10 +28413,12 @@
06ed Comet Lake USB 3.1 xHCI Host Controller
06ef Comet Lake PCH Shared SRAM
06f0 Comet Lake PCH CNVi WiFi
- 1a56 1651 Wi-Fi 6 AX1650s 160MHz (201D2W) [Killer]
- 8086 0034 Wireless-AC 9560
- 8086 0074 Wi-Fi 6 AX201 160MHz
- 8086 02a4 Wireless-AC 9462
+ 1a56 1651 Dual Band Wi-Fi 6(802.11ax) Killer AX1650s 160MHz 2x2 [Cyclone Peak]
+ 1a56 1652 Dual Band Wi-Fi 6(802.11ax) Killer AX1650i 160MHz 2x2 [Cyclone Peak]
+ 8086 0034 Dual Band Wi-Fi 5(802.11ac) Wireless-AC 9560 160MHz 2x2 [Jefferson Peak]
+ 8086 0074 Dual Band Wi-Fi 6(802.11ax) AX201 160MHz 2x2 [Harrison Peak]
+ 8086 02a4 Dual Band Wi-Fi 5(802.11ac) Wireless-AC 9462 80MHz 1x1 [Jefferson Peak]
+ 8086 42a4 Dual Band Wi-Fi 5(802.11ac) Wireless-AC 9462 80MHz 1x1 [Jefferson Peak]
06f9 Comet Lake PCH Thermal Controller
06fb Comet Lake PCH Serial IO SPI Controller #2
0700 CE Media Processor A/V Bridge
@@ -27699,7 +28796,10 @@
8086 8d08 NVMe Datacenter SSD [3DNAND] VE 2.5" U.2 (P5316)
8086 8d1d NVMe Datacenter SSD [3DNAND] VE E1.L 9.5/18mm (P5316)
8086 c008 NVMe Datacenter SSD [3DNAND] SE U.2 15mm (P5530)
+ 0b69 Ponte Vecchio XT (2 Tile) [Data Center GPU Max 1450]
+ 0b6e Ponte Vecchio XT (1 Tile) [Data Center GPU Max 1100C]
0bd0 Ponte Vecchio XL (2 Tile)
+ 0bd4 Ponte Vecchio XT (2 Tile) [Data Center GPU Max 1550VG]
0bd5 Ponte Vecchio XT (2 Tile) [Data Center GPU Max 1550]
0bd6 Ponte Vecchio XT (2 Tile) [Data Center GPU Max 1550]
0bd7 Ponte Vecchio XT (2 Tile) [Data Center GPU Max 1350]
@@ -27809,6 +28909,9 @@
8086 0000 Ethernet Controller XXV710 Intel(R) FPGA Programmable Acceleration Card N3000 for Networking
8086 0001 Ethernet Controller XXV710 Intel(R) FPGA Programmable Acceleration Card N3000 for Networking
0d9f Ethernet Controller I225-IT
+ 0dc5 Ethernet Connection (23) I219-LM
+ 1028 0c06 Precision 3580
+ 0dc6 Ethernet Connection (23) I219-V
0dcd Ethernet Connection C825-X
0dd2 Ethernet Network Adapter I710
1137 0000 I710T4LG 4x1 GbE RJ45 PCIe NIC
@@ -28408,6 +29511,7 @@
10a9 8028 UV-BaseIO dual-port GbE
13a3 0037 DS4100 Secure Multi-Gigabit Server Adapter with Compression
15d9 a811 H8DGU
+ 4c52 9212 LREC9212PT Dual-port 10Gb Ethernet Network Adapter
8086 a01c Gigabit ET Dual Port Server Adapter
8086 a03c Gigabit ET Dual Port Server Adapter
8086 a04c Gigabit ET Dual Port Server Adapter
@@ -28425,6 +29529,7 @@
15d9 0605 X8SIL
15d9 060a X7SPA-H/X7SPA-HF Motherboard
15d9 060d C7SIM-Q Motherboard
+ 4c52 2201 LRES2201PT Single-port 1Gb Ethernet Network Adapter
8086 0001 Gigabit CT2 Desktop Adapter
8086 3578 Server Board S1200BTLR
8086 357a Server Board S1200BTS
@@ -28456,6 +29561,7 @@
8086 a02f Gigabit EF Dual Port Server Adapter
10e7 82576 Gigabit Network Connection
103c 31ff NC362i Integrated Dual Port BL-c Gigabit Server Adapter
+ 4c52 9701 LREC9701EF Single-port 1Gb Ethernet Network Adapter
10e8 82576 Gigabit Network Connection
8086 a02b Gigabit ET Quad Port Server Adapter
8086 a02c Gigabit ET Quad Port Server Adapter
@@ -28515,6 +29621,11 @@
1bd4 002f 10G SFP+ DP EP102Fi4A Adapter
1bd4 0032 10G SFP+ DP EP102Fi4 Adapter
1bd4 0067 F102I82599
+ 4c52 1024 LR-LINK LRES9804BF Quad-port 10Gb Ethernet Server Adapter
+ 4c52 3002 LRES3002PF Dual-port 10Gb Ethernet Server Adapter for OCP
+ 4c52 3012 LRES3012PF Dual-port 10Gb Ethernet Server Adapter for OCP
+ 4c52 9801 LREC9801BF Single-port 10Gb Ethernet Server Adapter
+ 4c52 9802 LREC9802BF Dual-port 10Gb Ethernet Server Adapter
8086 0002 Ethernet Server Adapter X520-DA2
8086 0003 Ethernet Server Adapter X520-2
8086 0006 Ethernet Server Adapter X520-1
@@ -28764,10 +29875,27 @@
125b Ethernet Controller I226-LM
125c Ethernet Controller I226-V
125d Ethernet Controller I226-IT
+ 12d1 Ethernet Controller E830-CC for backplane
+ 12d2 Ethernet Controller E830-CC for QSFP
+ 8086 0002 Ethernet Network Adapter E830-C-Q2 for OCP 3.0
+ 8086 0004 Ethernet Network Adapter E830-CC-Q1 for OCP 3.0
+ 12d3 Ethernet Controller E830-CC for SFP
+ 8086 0001 Ethernet Network Adapter E830-XXV-2 for OCP 3.0
+ 8086 0003 Ethernet Network Adapter E830-XXV-2
+ 8086 0004 Ethernet Network Adapter E830-XXV-4 for OCP 3.0
+ 12d4 Ethernet Controller E830-CC for SFP-DD
+ 12d5 Ethernet Controller E830-C for backplane
+ 12d8 Ethernet Controller E830-C for QSFP
+ 12da Ethernet Controller E830-C for SFP
+ 12dc Ethernet Controller E830-XXV for backplane
+ 12dd Ethernet Controller E830-XXV for QSFP
+ 12de Ethernet Controller E830-XXV for SFP
1360 82806AA PCI64 Hub PCI Bridge
1361 82806AA PCI64 Hub Controller (HRes)
8086 1361 82806AA PCI64 Hub Controller (HRes)
8086 8000 82806AA PCI64 Hub Controller (HRes)
+ 1452 Infrastructure Data Path Function
+ 145c Infrastructure Data Path Function
1460 82870P2 P64H2 Hub PCI Bridge
1461 82870P2 P64H2 I/OxAPIC
15d9 3480 P4DP6
@@ -28777,6 +29905,7 @@
1502 82579LM Gigabit Network Connection (Lewisville)
1028 04a3 Precision M4600
17aa 21ce ThinkPad T520
+ 17aa 3070 ThinkCentre M91p
8086 3578 Server Board S1200BTLR
8086 357a Server Board S1200BTS
1503 82579V Gigabit Network Connection
@@ -28858,6 +29987,20 @@
1bd4 0066 F014I350
1bd4 008a F012I350
1bd4 008d ENFI1100-T4
+ 4c52 0350 I350 1Gb 2-port Ethernet Network Adapter
+ 4c52 1350 LREC9222HT Dual-port 1Gb Ethernet Network Adapter
+ 4c52 2003 LRES2003PT Dual-port 1Gb Ethernet Network Adapter
+ 4c52 2005 LRES2005PT Quad-port 1Gb Ethernet Network Adapter
+ 4c52 2006 LRES2006PT Six-port 1Gb Ethernet Network Adapter
+ 4c52 2008 LRES2008PT Eight-port 1Gb Ethernet Network Adapter
+ 4c52 2018 LRES2018PT Twelve-port 1Gb Ethernet Network Adapter
+ 4c52 2202 LRES2202PT Dual-port 1Gb Ethernet Network Adapter
+ 4c52 2217 LRES2217PT Dual-port 1Gb Ethernet Network Adapter
+ 4c52 3010 LRES3010PF Dual-port 1Gb Ethernet Server Adapter for OCP
+ 4c52 3023 LRES3023PT Quad-port 1Gb Ethernet Server Adapter for OCP
+ 4c52 3041 LRES3041PT Dual-port 1Gb Ethernet Server Adapter for OCP
+ 4c52 4006 LRES4006MT Quad-port 1Gb Ethernet Netwaork Adapter
+ 4c52 9712 LREC9712HT Dual-port 10Gb Ethernet Network Adapter
8086 0001 Ethernet Server Adapter I350-T4
8086 0002 Ethernet Server Adapter I350-T2
8086 0003 Ethernet Network Adapter I350-T4 for OCP NIC 3.0
@@ -28872,6 +30015,11 @@
1522 I350 Gigabit Fiber Network Connection
108e 7b17 Quad Port GbE PCIe 2.0 ExpressModule, MMF
108e 7b19 Dual Port GbE PCIe 2.0 Low Profile Adapter, MMF
+ 4c52 1006 LRES1006PF Six-port 1Gb Ethernet Network Adapter
+ 4c52 2203 LRES2203PF Dual-port 1Gb Ethernet Network Adapter
+ 4c52 9710 LREC9710HF Single-port 1Gb Ethernet Network Adapter
+ 4c52 9712 LREC9712HF Dual-port 1Gb Ethernet Network Adapter
+ 4c52 9714 LREC9714HF Quad-port 1Gb Ethernet Network Adapter
8086 0002 Ethernet Server Adapter I350-T2
8086 0003 Ethernet Server Adapter I350-F4
8086 0004 Ethernet Server Adapter I350-F2
@@ -28886,6 +30034,7 @@
103c 18d1 Ethernet 1Gb 2-port 361FLB Adapter
103c 1989 Ethernet 1Gb 2-port 363i Adapter
103c 339f Ethernet 1Gb 4-port 366M Adapter
+ 4c52 9714 LREC9714HT Quad-port 10Gb Ethernet Network Adapter
8086 1f52 1GbE 4P I350 Mezz
1524 I350 Gigabit Connection
1525 82567V-4 Gigabit Network Connection
@@ -28911,6 +30060,7 @@
1bd4 001a 10G base-T DP ER102Ti3 Rack Adapter
1bd4 0033 10G base-T DP EP102Ti3 Adapter
1bd4 0034 10G base-T DP EP102Ti3A Adapter
+ 4c52 9802 LREC9802BT Dual-port 10Gb Ethernet Network Adapter
8086 0001 Ethernet Converged Network Adapter X540-T2
8086 0002 Ethernet Converged Network Adapter X540-T1
8086 001a Ethernet Converged Network Adapter X540-T2
@@ -28935,9 +30085,23 @@
17aa 1100 ThinkServer Ethernet Server Adapter
17aa 1509 I210 Gigabit Network Connection
17aa 404d I210 PCIe 1Gb 1-Port RJ45 LOM
+ 17aa 407a I210 PCIe 1Gb 1-Port RJ45 LOM
+ 4c52 1051 LRES1051PT Dual-port 1Gb Ethernet Network Adapter
+ 4c52 1210 LREC9204CT Single-port 1Gb Ethernet Network Adapter
+ 4c52 2057 LRES2057PT Dual-port 1Gb Ethernet Network Adapter
+ 4c52 2206 LRES2206PT Single-port 1Gb Ethernet Network Adapter
+ 4c52 2210 LRES2210PT Single-port 1Gb Ethernet Network Adapter
+ 4c52 2211 LRES2211PT Single-port 1Gb Ethernet Network Adapter
+ 4c52 2214 LRES2214PT Single-port 1Gb Ethernet Network Adapter
+ 4c52 3002 LRES3002PT Dual-port 1Gb Ethernet Network Adapter
+ 4c52 3004 LRES3004PT Quad-port 1Gb Ethernet Network Adapter
8086 0001 Ethernet Server Adapter I210-T1
8086 0002 Ethernet Server Adapter I210-T1
1536 I210 Gigabit Fiber Network Connection
+ 4c52 2204 LRES2204PT Single-port 1Gb Ethernet Network Adapter
+ 4c52 2212 LRES2212PF Single-port 1Gb Ethernet Network Adapter
+ 4c52 2213 LRES2213PF Single-port 1Gb Ethernet Network Adapter
+ 4c52 6230 LREC6230PF Single-port 1Gb Ethernet Network Adapter
1537 I210 Gigabit Backplane Connection
1059 0110 T4005 1GbE interface
1059 0111 T4007 1GbE interface
@@ -28981,6 +30145,7 @@
8086 0001 Ethernet Server Bypass Adapter X520-SR2
8086 0002 Ethernet Server Bypass Adapter X520-LR2
1560 Ethernet Controller X540
+ 4c52 9801 LREC9801BT Single-port 10Gb Ethernet Network Adapter
1563 Ethernet Controller X550
1028 1fa8 Ethernet 10G 4P X550/I350 rNDC
1028 1fa9 Ethernet 10G 4P X550 rNDC
@@ -28995,6 +30160,8 @@
193d 1008 560T-B
193d 1009 560T-L
193d 1011 UN-NIC-ETH563T-sL-2P
+ 4c52 1025 LRES1025PT Dual-port 10Gb Ethernet Network Adapter
+ 4c52 9812 LREC9812BT Dual-port 10Gb Ethernet Network Adapter
8086 0001 Ethernet Converged Network Adapter X550-T2
8086 001a Ethernet Converged Network Adapter X550-T2
8086 001b Ethernet Server Adapter X550-T2 for OCP
@@ -29042,12 +30209,17 @@
1bd4 0065 F102IX710
1bd4 0074 Ethernet Network Adapter X710-BM2 for lldp
1bd4 008b F102IX710
+ 4c52 3003 LRES3003PF Quad-port 10Gb Ethernet Server Adapter for OCP
+ 4c52 3007 LRES3007PF Quad-port 10Gb Ethernet Server Adapter for OCP
+ 4c52 3039 LRES3039PF Dual-port 10Gb Ethernet Server Adapter for OCP
+ 4c52 9804 LREC9804BF Quad-port 10Gb Ethernet Server Adapter
+ 4c52 9812 LREC9812BF Dual-port 10Gb Ethernet Server Adapter
8086 0000 Ethernet Converged Network Adapter X710
8086 0001 Ethernet Converged Network Adapter X710-4
8086 0002 Ethernet Converged Network Adapter X710-4
8086 0004 Ethernet Converged Network Adapter X710-4
- 8086 0005 Ethernet 10G 4P X710 Adapter
- 8086 0006 Ethernet 10G 2P X710 Adapter
+ 8086 0005 Ethernet Converged Network Adapter X710
+ 8086 0006 Ethernet Converged Network Adapter X710
8086 0007 Ethernet Converged Network Adapter X710-2
8086 0008 Ethernet Converged Network Adapter X710-2
8086 0009 Ethernet Controller X710 for 10GbE SFP+
@@ -29096,6 +30268,8 @@
108e 7b1d 10Gb/40Gb Ethernet Adapter
1137 0000 Ethernet Converged NIC XL710-QDA2
1137 013c Ethernet Converged NIC XL710-QDA2
+ 4c52 3042 LRES3042PF Dual-port 40Gb Ethernet Server Adapter for OCP
+ 4c52 9902 LREC9902BF Dual-port 40Gb Ethernet Server Adapter
8086 0000 Ethernet Converged Network Adapter XL710-Q2
8086 0001 Ethernet Converged Network Adapter XL710-Q2
8086 0002 Ethernet Converged Network Adapter XL710-Q2
@@ -29103,6 +30277,7 @@
8086 0004 Ethernet Server Adapter XL710-Q2OCP
8086 0006 Ethernet Converged Network Adapter XL710-Q2
1584 Ethernet Controller XL710 for 40GbE QSFP+
+ 4c52 9901 LREC9901BF Single-port 40Gb Ethernet Server Adapter
8086 0000 Ethernet Converged Network Adapter XL710-Q1
8086 0001 Ethernet Converged Network Adapter XL710-Q1
8086 0002 Ethernet Converged Network Adapter XL710-Q1
@@ -29123,6 +30298,7 @@
1589 Ethernet Controller X710/X557-AT 10GBASE-T
108e 0000 Quad Port 10GBase-T Adapter
108e 7b1c Quad Port 10GBase-T Adapter
+ 4c52 9804 LREC9804BT Quad-port 10Gb Ethernet Network Adapter
8086 0000 Ethernet Converged Network Adapter X710-T
8086 0001 Ethernet Converged Network Adapter X710-T4
8086 0002 Ethernet Converged Network Adapter X710-T4
@@ -29156,6 +30332,7 @@
1374 023b Quad Port 25 Gigabit Ethernet PCI Express Server Adapter (PE31625G4I71LEU)
1590 0000 Ethernet Network Adapter XXV710-2
1590 0253 Ethernet 10/25/Gb 2-port 661SFP28 Adapter
+ 4c52 3017 LRES3017PF Dual-port 25Gb Ethernet Server Adapter for OCP
8086 0000 Ethernet Network Adapter XXV710
8086 0001 Ethernet Network Adapter XXV710-2
8086 0002 Ethernet Network Adapter XXV710-2
@@ -29175,6 +30352,8 @@
1592 Ethernet Controller E810-C for QSFP
1137 02bf E810CQDA2 2x100 GbE QSFP28 PCIe NIC
193d 1050 NIC-ETH1060F-LP-2P 2x100GbE Ethernet PCIe Card
+ 4c52 1014 LRES1014PF Dual-port 100Gb Ethernet Server Adapter
+ 4c52 3026 LRES3026PF Dual-port 100Gb Ethernet Server Adapter for OCP
8086 0001 Ethernet Network Adapter E810-C-Q1
8086 0002 Ethernet Network Adapter E810-C-Q2
8086 0004 Ethernet Network Adapter E810-C-Q2
@@ -29191,10 +30370,13 @@
8086 0011 Ethernet Network Adapter E810-C-Q1 for OCP3.0
8086 0012 Ethernet 100G 2P E810-C-st Adapter
8086 0013 Ethernet Network Adapter E810-C-Q1 for OCP 3.0
+ 8086 0014 Ethernet 100G 2P E810-2C Adapter
1593 Ethernet Controller E810-C for SFP
1137 02c3 E810XXVDA4 4x25/10 GbE SFP28 PCIe NIC
1137 02e9 E810XXVDA4TG 4x25/10 GbE SFP28 PCIe NIC
1137 02ea E810XXVDA4T 4x25/10 GbE SFP28 PCIe NIC
+ 4c52 1023 LRES1023PF Quad-port 25Gb Ethernet Server Adapter
+ 4c52 3027 LRES3027PF Quad-port 25Gb Ethernet Server Adapter for OCP
8086 0002 Ethernet Network Adapter E810-L-2
8086 0005 Ethernet Network Adapter E810-XXV-4
8086 0006 Ethernet Network Adapter E810-XXV-4
@@ -29209,6 +30391,7 @@
8086 0010 Ethernet 25G 4P E810-XXV-st Adapter
8086 4010 Ethernet Network Adapter E810-XXV-4
8086 4013 Ethernet Network Adapter E810-XXV-4 for OCP 3.0
+ 8086 401c Ethernet Network Adapter E810-XXV-4 for OCP 3.0
1599 Ethernet Controller E810-XXV for backplane
8086 0001 Ethernet 25G 2P E810-XXV-k Mezz
159a Ethernet Controller E810-XXV for QSFP
@@ -29222,6 +30405,8 @@
1bd4 0083 Ethernet Network Adapter E810-XXVAM2 for lldp
1bd4 00a0 S252IE810
1eec 0102 VSE-225-41E Dual-port 10Gb/25Gb Etherent PCIe
+ 4c52 0003 LRES1021PF Dual-port 25Gb Ethernet Server Adapter
+ 4c52 3029 LRES3029PF Dual-port 25Gb Ethernet Server Adapter for OCP
8086 0001 Ethernet 25G 2P E810-XXV OCP
8086 0002 Ethernet 25G 2P E810-XXV Adapter
8086 0003 Ethernet Network Adapter E810-XXV-2
@@ -29253,6 +30438,7 @@
15b6 DSL6540 USB 3.1 Controller [Alpine Ridge]
15b7 Ethernet Connection (2) I219-LM
15b8 Ethernet Connection (2) I219-V
+ 1462 7994 H110M ECO/GAMING
1462 7a72 H270 PC MATE
15b9 Ethernet Connection (3) I219-LM
15bb Ethernet Connection (7) I219-LM
@@ -29274,6 +30460,7 @@
8086 0001 Ethernet SDI Adapter FM10420-100GbE-QDA2
8086 0002 Ethernet SDI Adapter FM10840-MTP2
15d1 Ethernet Controller 10G X550T
+ 4c52 9811 LREC9811BT Single-port 10Gb Ethernet Network Adapter
8086 0002 Ethernet Converged Network Adapter X550-T1
8086 001b Ethernet Server Adapter X550-T1 for OCP
8086 0021 Ethernet Converged Network Adapter X550-T1
@@ -29310,9 +30497,11 @@
15ef JHL7540 Thunderbolt 3 Bridge [Titan Ridge DD 2018]
15f0 JHL7540 Thunderbolt 3 USB Controller [Titan Ridge DD 2018]
15f2 Ethernet Controller I225-LM
+ 4c52 2031 LRES2031PT Single-port 2.5Gb Ethernet Network Adapter
8086 0001 Ethernet Network Adapter I225-T1
8086 0002 Ethernet Network Adapter I225-T1
15f3 Ethernet Controller I225-V
+ 4c52 2031 LRES2031PT Single-port 2.5Gb Ethernet Network Adapter
8086 0003 Intel(R) Ethernet Controller (3) I225-V
15f4 Ethernet Connection (15) I219-LM
15f5 Ethernet Connection (15) I219-V
@@ -29323,6 +30512,7 @@
15fc Ethernet Connection (13) I219-V
15ff Ethernet Controller X710 for 10GBASE-T
1014 0000 PCIe3 4-port 10GbE Base-T Adapter
+ 108e 7b1f Quad Port 10GBase-T Adapter - CP
1137 0000 X710TLG GbE RJ45 PCIe NIC
1137 02c1 X710T2LG 2x10 GbE RJ45 PCIe NIC
1137 02c2 X710T4LG 4x10 GbE RJ45 PCIe NIC
@@ -29330,6 +30520,8 @@
1137 02da Ethernet Network Adapter X710-T4L OCP 3.0
# NIC-ETH565T-3S-2P OCP3.0 2x10G Base-T Card
193d 1082 NIC-ETH565T-3S-2P
+ 4c52 1012 LRES1012PT Dual-port 10Gb Ethernet Network Adapter
+ 4c52 3021 LRES3021PT Dual-port 10Gb Ethernet Server Adapter for OCP
8086 0000 Ethernet Network Adapter X710-TL
8086 0001 Ethernet Network Adapter X710-T4L
8086 0002 Ethernet Network Adapter X710-T4L
@@ -29405,10 +30597,18 @@
1898 Ethernet Connection E822-L for SFP
1899 Ethernet Connection E822-L/X557-AT 10GBASE-T
189a Ethernet Connection E822-L 1GbE
- 18a0 C4xxx Series QAT
+ 18a0 Atom Processor P5xxx Series QAT
18a1 C4XXX Series QAT Virtual Function
+ 18b3 Atom Processor P5xxx Series SATA Controller
+ 18d0 Atom Processor P5xxx Series USB xHCI Controller
+ 18d3 Atom Processor P5xxx Series MEI Controller
+ 18d6 Atom Processor P5xxx Series MEI Controller
+ 18df Atom Processor P5xxx Series SMBus Controller
+ 18e0 Atom Processor P5xxx Series SPI Controller
+ 18e1 Atom Processor P5xxx Series Trace Hub
18ee 200xx Series QAT
18ef 200xx Series QAT Virtual Function
+ 18f3 Atom Processor P5xxx Series SATA Controller
1900 Xeon E3-1200 v5/E3-1500 v5/6th Gen Core Processor Host Bridge/DRAM Registers
1901 6th-10th Gen Core Processor PCIe Controller (x16)
1902 HD Graphics 510
@@ -29442,6 +30642,7 @@
1911 Xeon E3-1200 v5/v6 / E3-1500 v5 / 6th/7th/8th Gen Core Processor Gaussian Mixture Model
1028 0869 Vostro 3470
1028 09be Latitude 7410
+ 1462 7994 H110M ECO/GAMING
1462 7a72 H270 PC MATE
17aa 2247 ThinkPad T570
17aa 224f ThinkPad X1 Carbon 5th Gen
@@ -29572,6 +30773,9 @@
1b48 82597EX 10GbE Ethernet Controller
8086 a01f PRO/10GbE LR Server Adapter
8086 a11f PRO/10GbE LR Server Adapter
+# Also rebranded as Montage IOH M88IO3020
+ 1bcd Emmitsburg (C740 Family) USB 3.2 Gen 1 xHCI Controller
+ 1bd4 00a5 RS0800I5H16i
1bd2 Sapphire Rapids SATA AHCI Controller
1bf2 Sapphire Rapids SATA AHCI Controller
1c00 6 Series/C200 Series Chipset Family Desktop SATA Controller (IDE mode, ports 0-3)
@@ -29579,6 +30783,7 @@
1c02 6 Series/C200 Series Chipset Family 6 port Desktop SATA AHCI Controller
1028 04aa XPS 8300
1043 844d P8 series motherboard
+ 17aa 3070 ThinkCentre M91p
8086 200d DH61CR motherboard
8086 7270 Server Board S1200BT Family
1c03 6 Series/C200 Series Chipset Family 6 port Mobile SATA AHCI Controller
@@ -29630,6 +30835,7 @@
1043 8418 P8P67 Deluxe Motherboard
1043 841b P8H67 Series Motherboard
17aa 21cf ThinkPad T520
+ 17aa 3070 ThinkCentre M91p
# Realtek ALC888 audio codec
8086 2008 DQ67SW board
8086 200d DH61CR motherboard
@@ -29641,6 +30847,7 @@
1028 04da Vostro 3750
1043 844d P8 series motherboard
17aa 21cf ThinkPad T520
+ 17aa 3070 ThinkCentre M91p
8086 200d DH61CR motherboard
8086 7270 Server Board S1200BT Family / Apple MacBook Pro 8,1/8,2
1c24 6 Series/C200 Series Chipset Family Thermal Management Controller
@@ -29652,6 +30859,7 @@
1028 04da Vostro 3750
1043 844d P8 series motherboard
17aa 21cf ThinkPad T520
+ 17aa 3070 ThinkCentre M91p
8086 200d DH61CR motherboard
8086 7270 Server Board S1200BT Family / Apple MacBook Pro 8,1/8,2
1c27 6 Series/C200 Series Chipset Family USB Universal Host Controller #1
@@ -29665,22 +30873,26 @@
1028 04da Vostro 3750
1043 844d P8 series motherboard
17aa 21cf ThinkPad T520
+ 17aa 3070 ThinkCentre M91p
8086 200d DH61CR motherboard
8086 7270 Server Board S1200BT Family / Apple MacBook Pro 8,1/8,2
1c33 6 Series/C200 Series Chipset Family LAN Controller
1c35 6 Series/C200 Series Chipset Family VECI Controller
1c3a 6 Series/C200 Series Chipset Family MEI Controller #1
+ 1028 0493 Latitude E6420
1028 04a3 Precision M4600
1028 04aa XPS 8300
1028 04b2 Vostro 3350
1028 04da Vostro 3750
1043 844d P8 series motherboard
17aa 21cf ThinkPad T520
+ 17aa 3070 ThinkCentre M91p
8086 200d DH61CR motherboard
8086 7270 Apple MacBookPro8,2 [Core i7, 15", 2011]
1c3b 6 Series/C200 Series Chipset Family MEI Controller #2
1c3c 6 Series/C200 Series Chipset Family IDE-r Controller
1c3d 6 Series/C200 Series Chipset Family KT Controller
+ 17aa 3070 ThinkCentre M91p
1c40 6 Series/C200 Series Chipset Family LPC Controller
1c41 Mobile SFF 6 Series Chipset Family LPC Controller
1c42 6 Series/C200 Series Chipset Family LPC Controller
@@ -29702,6 +30914,7 @@
1c4c Q65 Express Chipset LPC Controller
1c4d QS67 Express Chipset LPC Controller
1c4e Q67 Express Chipset LPC Controller
+ 17aa 3070 ThinkCentre M91p
1c4f QM67 Express Chipset LPC Controller
1028 04a3 Precision M4600
17aa 21cf ThinkPad T520
@@ -29808,10 +31021,12 @@
1d74 C608/C606/X79 series chipset PCI Express Upstream Port
1d76 C600/X79 series chipset Multi-Function Glue
1e00 7 Series/C210 Series Chipset Family 4-port SATA Controller [IDE mode]
+ 1734 11d6 B75 [Ivy Bridge] chipset on Esprimo P510 D3171 motherboard
1e01 7 Series Chipset Family 4-port SATA Controller [IDE mode]
144d c652 NP300E5C series laptop
1e02 7 Series/C210 Series Chipset Family 6-port SATA Controller [AHCI mode]
1043 84ca P8 series motherboard
+ 1734 11d6 B75 [Ivy Bridge] chipset on Esprimo P510 D3171 motherboard
1849 1e02 Motherboard
1e03 7 Series Chipset Family 6-port SATA Controller [AHCI mode]
1043 108d VivoBook X202EV
@@ -29834,6 +31049,7 @@
1043 84ca P8H77-I Motherboard
10cf 16e9 LIFEBOOK E752
144d c652 NP300E5C series laptop
+ 1734 11d6 B75 [Ivy Bridge] chipset on Esprimo P510 D3171 motherboard
1849 1e10 Motherboard
1e12 7 Series/C210 Series Chipset Family PCI Express Root Port 2
1043 108d VivoBook X202EV
@@ -29850,6 +31066,7 @@
1043 84ca P8H77-I Motherboard
1849 1e18 Motherboard
1e1a 7 Series/C210 Series Chipset Family PCI Express Root Port 6
+ 1734 11d6 B75 [Ivy Bridge] chipset on Esprimo P510 D3171 motherboard
1849 1e1a Motherboard
1e1c 7 Series/C210 Series Chipset Family PCI Express Root Port 7
1e1e 7 Series/C210 Series Chipset Family PCI Express Root Port 8
@@ -29864,6 +31081,7 @@
1043 8445 P8Z77-V LX Motherboard
10cf 1757 LIFEBOOK E752
144d c652 NP300E5C series laptop
+ 1734 11d8 B75 [Ivy Bridge] chipset CX20642 audio controller on Esprimo P510 D3171 motherboard
1849 1898 Z77 Extreme4 motherboard
1e22 7 Series/C216 Chipset Family SMBus Controller
1043 108d VivoBook X202EV
@@ -29872,6 +31090,7 @@
1043 84ca P8 series motherboard
10cf 16e6 LIFEBOOK E752
144d c652 NP300E5C series laptop
+ 1734 11d6 B75 [Ivy Bridge] chipset on Esprimo P510 D3171 motherboard
1849 1e22 Motherboard
1e24 7 Series/C210 Series Chipset Family Thermal Management Controller
1043 1517 Zenbook Prime UX31A
@@ -29883,6 +31102,7 @@
1043 84ca P8 series motherboard
10cf 16e8 LIFEBOOK E752
144d c652 NP300E5C series laptop
+ 1734 11d6 B75 [Ivy Bridge] chipset USB 2.0 controller on Esprimo P510 D3171 motherboard
1849 1e26 Motherboard
1e2d 7 Series/C216 Chipset Family USB Enhanced Host Controller #2
1043 108d VivoBook X202EV
@@ -29891,6 +31111,7 @@
1043 84ca P8 series motherboard
10cf 16e8 LIFEBOOK E752
144d c652 NP300E5C series laptop
+ 1734 11d6 B75 [Ivy Bridge] chipset USB 2.0 controller on Esprimo P510 D3171 motherboard
1849 1e2d Motherboard
1e31 7 Series/C210 Series Chipset Family USB xHCI Host Controller
103c 179b Elitebook 8470p
@@ -29900,6 +31121,7 @@
1043 1517 Zenbook Prime UX31A
1043 84ca P8 series motherboard
10cf 16ee LIFEBOOK E752
+ 1734 11d6 B75 [Ivy Bridge] chipset USB 3.0 controller on Esprimo P510 D3171 motherboard
17aa 21f3 ThinkPad T430
1849 1e31 Motherboard
1e33 7 Series/C210 Series Chipset Family LAN Controller
@@ -29910,6 +31132,7 @@
1043 84ca P8 series motherboard
10cf 16ea LIFEBOOK E752
144d c652 NP300E5C series laptop
+ 1734 11d6 B75 [Ivy Bridge] chipset on Esprimo P510 D3171 motherboard
1849 1e3a Motherboard
1e3b 7 Series/C210 Series Chipset Family MEI Controller #2
1e3c 7 Series/C210 Series Chipset Family IDE-r Controller
@@ -29925,6 +31148,7 @@
1e47 Q77 Express Chipset LPC Controller
1e48 Q75 Express Chipset LPC Controller
1e49 B75 Express Chipset LPC Controller
+ 1734 11d6 Esprimo P510 D3171 motherboard
1e4a H77 Express Chipset LPC Controller
1043 84ca P8H77-I Motherboard
1e4b 7 Series Chipset Family LPC Controller
@@ -30314,10 +31538,12 @@
1028 0211 Optiplex 755
1028 02da OptiPlex 980
1028 04f7 PowerEdge R320 server
+ 103c 130a Z600 Workstation
103c 2a3b Pavilion A1512X
103c 2a6f Asus IPIBL-LB Motherboard
103c 31fe ProLiant DL140 G3
103c 330b ProLiant ML150 G6 Server
+ 1043 81ec P5B Motherboard
# same ID possibly also on other ASUS boards
1043 8277 P5K PRO Motherboard
1043 844d P8 series motherboard
@@ -30327,7 +31553,9 @@
1462 7418 Wind PC MS-7418
15d9 060d C7SIM-Q Motherboard
15d9 9680 X7DBN Motherboard
+ 1734 11d6 B75 [Ivy Bridge] chipset on Esprimo P510 D3171 motherboard
1775 11cc CC11/CL11
+ 17aa 3070 ThinkCentre M91p
8086 7270 Server Board S1200BTS
2450 82801E ISA Bridge (LPC)
2452 82801E USB Controller
@@ -30839,7 +32067,18 @@
8086 3806 Optane Memory 16GB
8086 3810 Optane Memory M10 16GB
2525 Optane NVME SSD P1600X Series
- 2526 Wireless-AC 9260
+ 2526 Wi-Fi 5(802.11ac) Wireless-AC 9x6x [Thunder Peak]
+ 8086 0014 Dual Band Wi-Fi 5 Wireless-AC 9260 160MHz 2x2
+ 8086 0210 Dual Band Wi-Fi 5 Wireless-AC 9260 80MHz 2x2
+ 8086 0214 Dual Band Wi-Fi 5 Wireless-AC 9260 80MHz 2x2
+ 8086 0230 Dual Band Wi-Fi 5 Wireless-AC 9560 80MHz 2x2
+ 8086 0234 Dual Band Wi-Fi 5 Wireless-AC 9560 80MHz 2x2
+ 8086 0238 Dual Band Wi-Fi 5 Wireless-AC 9560 80MHz 2x2
+ 8086 023c Dual Band Wi-Fi 5 Wireless-AC 9560 80MHz 2x2
+ 8086 0260 Dual Band Wi-Fi 5 Wireless-AC 9461 80MHz 1x1
+ 8086 0264 Dual Band Wi-Fi 5 Wireless-AC 9461 80MHz 1x1
+ 8086 02a0 Dual Band Wi-Fi 5 Wireless-AC 9462 80MHz 1x1
+ 8086 02a4 Dual Band Wi-Fi 5 Wireless-AC 9462 80MHz 1x1
2530 82850 850 (Tehama) Chipset Host Bridge (MCH)
1028 00c7 Dimension 8100
147b 0507 TH7II-RAID
@@ -31349,11 +32588,11 @@
2723 Wi-Fi 6 AX200
1a56 1654 Killer Wi-Fi 6 AX1650x (AX200NGW)
8086 0084 Wi-Fi 6 AX200NGW
- 2725 Wi-Fi 6 AX210/AX211/AX411 160MHz
+ 2725 Wi-Fi 6E(802.11ax) AX210/AX1675* 2x2 [Typhoon Peak]
+ 1a56 1673 Killer AX1675w 160MHz
+ 1a56 1674 Killer Wi-Fi 6E AX1675x 160MHz
8086 0020 Wi-Fi 6 AX210 160MHz
8086 0024 Wi-Fi 6 AX210 160MHz
- 8086 0090 Wi-Fi 6 AX211 160MHz
- 8086 00b0 Wi-Fi 6 AX411 160MHz
8086 0310 Wi-Fi 6 AX210 160MHz
8086 0510 Wi-Fi 6 AX210 160MHz
8086 0a10 Wi-Fi 6 AX210 160MHz
@@ -31363,6 +32602,13 @@
8086 6024 Wi-Fi 6 AX210 160MHz
8086 e020 Wi-Fi 6 AX210 160MHz
8086 e024 Wi-Fi 6 AX210 160MHz
+ 272b Wi-Fi 7(802.11be) AX1775*/AX1790*/BE20*/BE401/BE1750* 2x2
+ 8086 00f0 BE200 320MHz [Gale Peak]
+ 8086 00f4 BE200 320MHz [Gale Peak]
+ 8086 02f4 BE202 160MHz [Misty Peak]
+ 8086 40f0 BE200 320MHz [Gale Peak]
+ 8086 42f4 BE202 160MHz [Misty Peak]
+ 8086 e0f4 BE200 320MHz [Gale Peak]
2770 82945G/GZ/P/PL Memory Controller Hub
1028 01ad OptiPlex GX620
103c 2a3b Pavilion A1512X
@@ -31768,20 +33014,20 @@
103c 2a6f Asus IPIBL-LB Motherboard
1043 8277 P5K PRO Motherboard: 82801IR [ICH9R]
1462 7345 MS-7345 Motherboard: Intel 82801I/IR [ICH9/ICH9R]
- 2823 C610/X99 series chipset sSATA Controller [RAID mode]
+ 2823 sSATA Controller [RAID Mode]
2824 82801HB (ICH8) 4 port SATA Controller [AHCI mode]
1043 81ec P5B
2825 82801HR/HO/HH (ICH8R/DO/DH) 2 port SATA Controller [IDE mode]
1028 01da OptiPlex 745
1462 7235 P965 Neo MS-7235 mainboard
- 2826 C600/X79 series chipset SATA RAID Controller
+ 2826 SATA Controller [RAID Mode]
1d49 0100 Intel RSTe SATA Software RAID
1d49 0101 Intel RSTe SATA Software RAID
1d49 0102 Intel RSTe SATA Software RAID
1d49 0103 Intel RSTe SATA Software RAID
1d49 0104 Intel RSTe SATA Software RAID
1d49 0105 Intel RSTe SATA Software RAID
- 2827 C610/X99 series chipset sSATA Controller [RAID mode]
+ 2827 sSATA Controller [RAID Mode]
2828 82801HM/HEM (ICH8M/ICH8M-E) SATA Controller [IDE mode]
1028 01f3 Inspiron 1420
103c 30c0 Compaq 6710b
@@ -31803,6 +33049,7 @@
282a 82801 Mobile SATA Controller [RAID mode]
1028 040b Latitude E6510
e4bf 50c1 PC1-GROOVE
+ 282f tSATA Controller [RAID Mode]
2830 82801H (ICH8 Family) USB UHCI Controller #1
1025 0121 Aspire 5920G
1028 01da OptiPlex 745
@@ -31937,27 +33184,32 @@
1028 01da OptiPlex 745
103c 30c1 Compaq 6910p
1043 1017 X58LE
+ 1043 81ec P5B
104d 902d VAIO VGN-NR120E
17aa 20ad ThinkPad T61/R61
17c0 4083 Medion WIM 2210 Notebook PC [MD96850]
2841 82801H (ICH8 Family) PCI Express Port 2
103c 30c1 Compaq 6910p
1043 1017 X58LE
+ 1043 81ec P5B
104d 902d VAIO VGN-NR120E
17aa 20ad ThinkPad T61/R61
17c0 4083 Medion WIM 2210 Notebook PC [MD96850]
2843 82801H (ICH8 Family) PCI Express Port 3
1043 1017 X58LE
+ 1043 81ec P5B
104d 902d VAIO VGN-NR120E
17aa 20ad ThinkPad T61/R61
17c0 4083 Medion WIM 2210 Notebook PC [MD96850]
2845 82801H (ICH8 Family) PCI Express Port 4
1043 1017 X58LE
+ 1043 81ec P5B
17aa 20ad ThinkPad T61/R61
17c0 4083 Medion WIM 2210 Notebook PC [MD96850]
2847 82801H (ICH8 Family) PCI Express Port 5
1028 01da OptiPlex 745
103c 30c1 Compaq 6910p
+ 1043 81ec P5B
17aa 20ad ThinkPad T61/R61
17c0 4083 Medion WIM 2210 Notebook PC [MD96850]
2849 82801H (ICH8 Family) PCI Express Port 6
@@ -32823,6 +34075,7 @@
2ffc Xeon E7 v3/Xeon E5 v3/Core i7 System Address Decoder & Broadcast Registers
2ffd Xeon E7 v3/Xeon E5 v3/Core i7 System Address Decoder & Broadcast Registers
2ffe Xeon E7 v3/Xeon E5 v3/Core i7 System Address Decoder & Broadcast Registers
+ 3101 Killer E3100X 2.5 Gigabit Ethernet Controller
3140 Easel/Monette Hill Image Processor [Pixel Visual Core]
3165 Wireless 3165
8086 4010 Dual Band Wireless AC 3165 [Stone Peak 1x1]
@@ -33226,6 +34479,7 @@
37d9 X722 Hyper-V Virtual Function
3882 Ice Lake LPC Controller
38a4 Ice Lake SPI Controller
+ 38c8 Ice Lake-LP Smart Sound Technology Audio Controller
38e0 Ice Lake Management Engine Interface
3a00 82801JD/DO (ICH10 Family) 4-port SATA IDE Controller
3a02 82801JD/DO (ICH10 Family) SATA AHCI Controller
@@ -33800,6 +35054,8 @@
4538 Elkhart Lake PCI-e Root Complex
4555 Elkhart Lake [UHD Graphics Gen11 16EU]
4571 Elkhart Lake [UHD Graphics Gen11 32EU]
+# Seems to be different than ID 4602
+ 4601 Alder Lake-U15 Host and DRAM Controller
4602 Alder Lake Host and DRAM Controller
460d 12th Gen Core Processor PCI Express x16 Controller #1
461d Alder Lake Innovation Platform Framework Processor Participant
@@ -33822,8 +35078,10 @@
4641 12th Gen Core Processor Host Bridge/DRAM Registers
1028 0b10 Precision 3571
464d 12th Gen Core Processor PCI Express x4 Controller #0
+ 464e Alder Lake-N Thunderbolt 4 USB Controller
464f 12th Gen Core Processor Gaussian & Neural Accelerator
1028 0b10 Precision 3571
+ 4650 12th Gen Core Processor Host Bridge
465d Alder Lake Imaging Signal Processor
4660 12th Gen Core Processor Host Bridge/DRAM Registers
4668 12th Gen Core Processor Host Bridge/DRAM Registers
@@ -33848,6 +35106,7 @@
46b0 AlderLake-P [Iris Xe Graphics]
46b1 AlderLake-P [Iris Xe Graphics]
46b3 Alder Lake-UP3 GT1 [UHD Graphics]
+ 1025 161d N22C6 [Extensa 15 EX215-55]
46b6 AlderLake-P [Iris Xe Graphics]
46b8 AlderLake-P [Iris Xe Graphics]
46ba AlderLake-P [Iris Xe Graphics]
@@ -33857,6 +35116,8 @@
46d0 Alder Lake-N [UHD Graphics]
46d1 Alder Lake-N [UHD Graphics]
46d2 Alder Lake-N [UHD Graphics]
+ 46d3 Alder Lake-N [Intel Graphics]
+ 46d4 Alder Lake-N [Intel Graphics]
4905 DG1 [Iris Xe MAX Graphics]
4906 DG1 [Iris Xe Pod]
4907 SG1 [Server GPU SG-18M]
@@ -33864,8 +35125,13 @@
4908 DG1 [Iris Xe Graphics]
4909 DG1 [Iris Xe MAX 100]
4940 4xxx Series QAT
- 4942 4xxx Series QAT
- 4944 4xxx Series QAT
+ 4941 4xxx Series QAT Virtual Function
+ 4942 401xx Series QAT
+ 4943 401xx Series QAT Virtual Function
+ 4944 402xx Series QAT
+ 4945 402xx Series QAT Virtual Function
+ 4946 420xx Series QAT
+ 4947 420xx Series QAT Virtual Function
4b00 Elkhart Lake eSPI Controller
4b23 Elkhart Lake SMBus Controller
4b24 Elkhart Lake SPI (Flash) Controller
@@ -33885,21 +35151,45 @@
4c8b RocketLake-S GT1 [UHD Graphics 730]
4c90 RocketLake-S GT1 [UHD Graphics P750]
4c9a RocketLake-S [UHD Graphics]
+ 4d87 Jasper Lake eSPI Controller
4da3 Jasper Lake SMBus
4da4 Jasper Lake SPI Controller
+ 4da8 Jasper Lake Serial IO UART Controller #0
+ 4dab Jasper Lake Serial IO SPI Controller #1
+ 4db8 Jasper Lake PCIe Root Port #1
+ 4db9 Jasper Lake PCIe Root Port #2
+ 4dbc Jasper Lake PCIe Root Port #5
+ 4dbe Jasper Lake PCIe Root Port #7
+ 4dc4 Jasper Lake eMMC Controller
+ 4dc5 Jasper Lake Serial IO I2C Host Controller #4
+ 4dc6 Jasper Lake Serial IO I2C Host Controller #5
4dc8 Jasper Lake HD Audio
+ 4dd3 Jasper Lake SATA AHCI Controller
4de0 Management Engine Interface
- 4de8 Serial IO I2C Host Controller
- 4de9 Serial IO I2C Host Controller
+ 4de8 Jasper Lake Serial IO I2C Host Controller #0
+ 4de9 Jasper Lake Serial IO I2C Host Controller #1
+ 4dea Jasper Lake Serial IO I2C Host Controller #2
+ 4deb Jasper Lake Serial IO I2C Host Controller #3
+ 4ded Jasper Lake USB 3.1 xHCI Host Controller
+ 4def Jasper Lake Shared SRAM
4df0 Wi-Fi 6 AX201 160MHz
+ 4df8 Jasper Lake SD Controller
4e03 Dynamic Tuning service
4e19 JasperLake IPU
4e55 JasperLake [UHD Graphics]
4e61 JasperLake [UHD Graphics]
4e71 JasperLake [UHD Graphics]
- 4f80 DG2
- 4f81 DG2
- 4f82 DG2
+ 4f80 DG2 [Intel Xe Graphics]
+ 4f81 DG2 [Intel Xe Graphics]
+ 4f82 DG2 [Intel Xe Graphics]
+ 4f83 DG2 [Intel Xe Graphics]
+ 4f84 DG2 [Intel Xe Graphics]
+ 4f85 DG2 [Intel Xe Graphics]
+ 4f86 DG2 [Intel Xe Graphics]
+ 4f87 DG2 [Intel Xe Graphics]
+ 4f88 DG2 [Intel Xe Graphics]
+ 4f89 ACMP [Xe Graphics]
+ 4f8c ACMP [Xe Graphics]
4f90 DG2 Audio Controller
4f91 DG2 Audio Controller
4f92 DG2 Audio Controller
@@ -33955,10 +35245,13 @@
1028 0b10 Precision 3571
5187 Alder Lake LPC Controller
519d Raptor Lake LPC/eSPI Controller
+ 1028 0c06 Precision 3580
51a3 Alder Lake PCH-P SMBus Host Controller
1028 0b10 Precision 3571
+ 1028 0c06 Precision 3580
51a4 Alder Lake-P PCH SPI Controller
1028 0b10 Precision 3571
+ 1028 0c06 Precision 3580
51a8 Alder Lake PCH UART #0
51a9 Alder Lake PCH UART #1
51aa Alder Lake SPI Controller
@@ -33966,12 +35259,14 @@
51b0 Alder Lake PCI Express Root Port #9
51b1 Alder Lake PCI Express x1 Root Port #10
51bb Alder Lake-P PCH PCIe Root Port #4
+ 51bd Alder Lake-P PCH PCIe Root Port #6
51bf Alder Lake PCH-P PCI Express Root Port #9
51c5 Alder Lake-P Serial IO I2C Controller #0
51c6 Alder Lake-P Serial IO I2C Controller #1
51c8 Alder Lake PCH-P High Definition Audio Controller
1028 0b10 Precision 3571
51ca Raptor Lake-P/U/H cAVS
+ 1028 0c06 Precision 3580
51cc Alder Lake Smart Sound Technology Audio Controller
51d3 Alder Lake-P SATA AHCI Controller
1028 0b10 Precision 3571
@@ -33979,33 +35274,65 @@
51d9 Alder Lake-P Serial IO I2C Controller #3
51e0 Alder Lake PCH HECI Controller
1028 0b10 Precision 3571
+ 1028 0c06 Precision 3580
51e3 Alder Lake AMT SOL Redirection
1028 0b10 Precision 3571
51e8 Alder Lake PCH Serial IO I2C Controller #0
1028 0b10 Precision 3571
+ 1028 0c06 Precision 3580
51e9 Alder Lake PCH Serial IO I2C Controller #1
1028 0b10 Precision 3571
+ 1028 0c06 Precision 3580
51ea Alder Lake PCH Serial IO I2C Controller #2
51eb Alder Lake PCH Serial IO I2C Controller #3
51ed Alder Lake PCH USB 3.2 xHCI Host Controller
1028 0b10 Precision 3571
+ 1028 0c06 Precision 3580
51ef Alder Lake PCH Shared SRAM
1028 0b10 Precision 3571
+ 1028 0c06 Precision 3580
51f0 Alder Lake-P PCH CNVi WiFi
- 8086 0034 Wireless-AC 9560 160MHz
- 8086 0070 Wi-Fi 6 AX201 160MHz
- 8086 0074 Wi-Fi 6 AX201 160MHz
- 8086 0094 Wi-Fi 6E AX211 160MHz
- 8086 4070 Wi-Fi 6 AX201 160MHz
- 8086 4090 Wi-Fi 6E AX211 160MHz
+ 1a56 1652 Dual Band Wi-Fi 6(802.11ax) Killer AX1650i 160MHz 2x2 [Cyclone Peak]
+ 1a56 1671 Dual Band Wi-Fi 6E(802.11ax) AX1675s 160MHz 2x2 [Garfield Peak]
+ 1a56 1672 Dual Band Wi-Fi 6E(802.11ax) AX1675i 160MHz 2x2 [Garfield Peak]
+ 1a56 1692 Simultaneous Dual Band(Double Connect) Wi-Fi 6E AX1690i 160MHz 2x2 [Garfield Peak]
+ 8086 0034 Dual Band Wi-Fi 5(802.11ac) Wireless-AC 9560 160MHz 2x2 [Jefferson Peak]
+ 8086 0070 Dual Band Wi-Fi 6(802.11ax) AX201 160MHz 2x2 [Harrison Peak]
+ 8086 0074 Dual Band Wi-Fi 6(802.11ax) AX201 160MHz 2x2 [Harrison Peak]
+ 8086 0094 Dual Band Wi-Fi 6E(802.11ax) AX211 160MHz 2x2 [Garfield Peak]
+ 8086 00b4 Simultaneous Dual Band(Double Connect) Wi-Fi 6E AX411 160MHz 2x2 [Garfield Peak]
+ 8086 0234 Dual Band Wi-Fi 5(802.11ac) Wireless-AC 9560 80MHz 2x2 [Jefferson Peak]
+ 8086 0244 Single Band Wi-Fi 6(802.11ax) AX101 80MHz 1x1 [Harrison Peak]
+ 8086 0264 Dual Band Wi-Fi 5(802.11ac) Wireless-AC 9461 80MHz 1x1 [Jefferson Peak]
+ 8086 0274 Dual Band Wi-Fi E(802.11ax) AX203 80MHz 2x2 [Johnson Peak]
+ 8086 02a4 Dual Band Wi-Fi 5(802.11ac) Wireless-AC 9462 80MHz 1x1 [Jefferson Peak]
+ 8086 4070 Dual Band Wi-Fi 6(802.11ax) AX201 160MHz 2x2 [Harrison Peak]
+ 8086 4090 Dual Band Wi-Fi 6E(802.11ax) AX211 160MHz 2x2 [Garfield Peak]
+ 8086 42a4 Dual Band Wi-Fi 5(802.11ac) Wireless-AC 9462 80MHz 1x1 [Jefferson Peak]
51f1 Raptor Lake PCH CNVi WiFi
+ 8086 4090 Wi-Fi 6E AX211 160MHz
51fc Alder Lake-P Integrated Sensor Hub
1028 0b10 Precision 3571
+ 1028 0c06 Precision 3580
5200 EtherExpress PRO/100 Intelligent Server PCI Bridge
5201 EtherExpress PRO/100 Intelligent Server Fast Ethernet Controller
8086 0001 EtherExpress PRO/100 Server Ethernet Adapter
530d 80310 (IOP) IO Processor
+ 5481 Alder Lake-N PCH eSPI Controller
+ 54a3 Alder Lake-N SMBus
+ 54a4 Alder Lake-N SPI (flash) Controller
+ 54a8 Alder Lake-N Serial IO UART Host Controller
+ 54b0 Alder Lake-N PCI Express Root Port #9
+ 54b1 Alder Lake-N PCI Express Root Port #10
+ 54b2 Alder Lake-N PCI Express Root Port #11
+ 54b3 Alder Lake-N PCI Express Root Port #12
+ 54c8 Alder Lake-N PCH High Definition Audio Controller
+ 54d3 Alder Lake-N SATA AHCI Controller
+ 54e0 Alder Lake-N PCH HECI Controller
+ 54ed Alder Lake-N PCH USB 3.2 xHCI Host Controller
+ 54ef Alder Lake-N PCH Shared SRAM
54f0 CNVi: Wi-Fi
+ 8086 0244 Wi-Fi 6 AX101NGW
5502 Ethernet Controller (2) I225-LMvP
1ab6 0225 TS4 On-Board 2.5GbE Ethernet Adaptor
5690 DG2 [Arc A770M]
@@ -34031,8 +35358,15 @@
56b1 DG2 [Arc Pro A40/A50]
56b2 DG2 [Arc Pro A60M]
56b3 DG2 [Arc Pro A60]
+ 56ba DG2 [Intel Graphics]
+ 56bb DG2 [Intel Graphics]
+ 56bc DG2 [Intel Graphics]
+ 56bd DG2 [Intel Graphics]
+ 56be DG2 [Arc Graphics A750E]
+ 56bf DG2 [Arc Graphics A580E]
56c0 ATS-M [Data Center GPU Flex 170]
56c1 ATS-M [Data Center GPU Flex 140]
+ 56c2 ATS-M [Data Center GPU Flex 170V]
5780 Thunderbolt 80/120G Bridge [Barlow Ridge Host 80G 2023]
5781 Thunderbolt 80/120G NHI [Barlow Ridge Host 80G 2023]
5782 Thunderbolt 80/120G USB Controller [Barlow Ridge Host 80G 2023]
@@ -34046,8 +35380,12 @@
579e Ethernet Connection E825-C for SFP
57a4 Thunderbolt Bridge [Barlow Ridge Hub 40G 2023]
57a5 Thunderbolt USB Controller [Barlow Ridge Hub 40G 2023]
- 57b1 Ethernet Controller E610 1GBASE T
+ 57ae Ethernet Controller E610 Backplane
+ 57af Ethernet Controller E610 SFP
+ 57b0 Ethernet Controller E610 10GBASE T
+ 57b1 Ethernet Controller E610 2.5GBASE T
8086 0000 Ethernet Converged Network Adapter E610
+ 57b2 Ethernet Controller E610 SGMII
5845 QEMU NVM Express Controller
1af4 1100 QEMU Virtual Machine
5900 Xeon E3-1200 v6/7th Gen Core Processor Host Bridge/DRAM Registers
@@ -34063,11 +35401,15 @@
590b HD Graphics 610
590c Xeon E3-1200 v6/7th Gen Core Processor Host Bridge/DRAM Registers
590f Xeon E3-1200 v6/7th Gen Core Processor Host Bridge/DRAM Registers
+ 1043 8694 H110I-PLUS Motherboard
+ 1462 7994 H110M ECO/GAMING
1462 7a68 B250 KRAIT GAMING (MS-7A68)
1462 7a72 H270 PC MATE
5910 Xeon E3-1200 v6/7th Gen Core Processor Host Bridge/DRAM Registers
5911 Xeon E3-1200 v6/7th Gen Core Processor Gaussian Mixture Model
5912 HD Graphics 630
+ 1043 8694 H110I-PLUS Motherboard
+ 1462 7994 H110M ECO/GAMING
1462 7a72 H270 PC MATE
5914 Xeon E3-1200 v6/7th Gen Core Processor Host Bridge/DRAM Registers
17aa 225d ThinkPad T480
@@ -34123,6 +35465,10 @@
5ae8 Celeron N3350/Pentium N4200/Atom E3900 Series Low Pin Count Interface
5aee Celeron N3350/Pentium N4200/Atom E3900 Series HSUART Controller #4
5af0 Celeron N3350/Pentium N4200/Atom E3900 Series Host Bridge
+ 6420 Lunar Lake [Intel Graphics]
+ 643e Lunar Lake NPU
+ 64a0 Lunar Lake [Intel Graphics]
+ 64b0 Lunar Lake [Intel Graphics]
65c0 5100 Chipset Memory Controller Hub
65e2 5100 Chipset PCI Express x4 Port 2
65e3 5100 Chipset PCI Express x4 Port 3
@@ -34412,8 +35758,30 @@
10b4 202f Lightspeed 740
8086 0000 Terminator 2x/i
8086 0100 Intel740 Graphics Accelerator
+ 7a04 Raptor Lake LPC/eSPI Controller
+ 7a23 Raptor Lake-S PCH SMBus Controller
+ 7a24 Raptor Lake SPI (flash) Controller
+ 7a27 Raptor Lake-S PCH Shared SRAM
+ 7a30 Raptor Lake PCI Express Root Port #9
+ 7a38 Raptor Lake PCI Express Root Port #1
+ 7a3a Raptor Point-S PCH - PCI Express Root Port 3
+ 7a3b Raptor Lake PCI Express Root Port #4
+ 7a40 Raptor Lake PCI Express Root Port #17
+ 7a44 Raptor Lake PCI Express Root Port #21
+ 7a48 Raptor Lake PCI Express Root Port #25
+ 7a4c Raptor Lake Serial IO I2C Host Controller #0
+ 7a4d Raptor Lake Serial IO I2C Host Controller #1
+ 7a4e Raptor Lake Serial IO I2C Host Controller #2
+ 7a50 Raptor Lake High Definition Audio Controller
+ 7a60 Raptor Lake USB 3.2 Gen 2x2 (20 Gb/s) XHCI Host Controller
+ 7a62 Raptor Lake SATA AHCI Controller
+ 7a68 Raptor Lake CSME HECI #1
+ 7a69 Raptor Lake CSME HECI #2
+ 7a70 Raptor Lake-S PCH CNVi WiFi
+ 8086 0090 WiFi 6E AX211 160MHz
# Unlike other PCH components. The eSPI controller is specific to each chipset model
7a84 Z690 Chipset LPC/eSPI Controller
+ 7a85 Alder Lake-S PCH PCI Express Root Port #?????
7aa3 Alder Lake-S PCH SMBus Controller
7aa4 Alder Lake-S PCH SPI Controller
7aa7 Alder Lake-S PCH Shared SRAM
@@ -34423,9 +35791,11 @@
7ab4 Alder Lake-S PCH PCI Express Root Port #13
7ab8 Alder Lake-S PCH PCI Express Root Port #1
7ab9 Alder Lake-S PCH PCI Express Root Port #2
+ 7aba Alder Lake-S PCH PCI Express Root Port #3
7abc Alder Lake-S PCH PCI Express Root Port #5
7abd Alder Lake-S PCH PCI Express Root Port #6
7abf Alder Lake-S PCH PCI Express Root Port #8
+ 7ac4 Alder Lake-S PCH PCI Express Root Port #21
7ac8 Alder Lake-S PCH PCI Express Root Port #25
7acc Alder Lake-S PCH Serial IO I2C Controller #0
7acd Alder Lake-S PCH Serial IO I2C Controller #1
@@ -34441,11 +35811,19 @@
8086 0094 Wi-Fi 6 AX201 160MHz
7afc Alder Lake-S PCH Serial IO I2C Controller #4
7afd Alder Lake-S PCH Serial IO I2C Controller #5
+ 7d03 Meteor Lake-P Dynamic Tuning Technology
7d0b Volume Management Device NVMe RAID Controller Intel Corporation
+ 7d0d Meteor Lake-P Platform Monitoring Technology
+ 7d19 Meteor Lake IPU
+ 7d1d Meteor Lake NPU
7d40 Meteor Lake-M [Intel Graphics]
+ 7d41 Arrow Lake-U [Intel Graphics]
7d45 Meteor Lake-P [Intel Graphics]
+ 7d51 Arrow Lake-P [Intel Graphics]
7d55 Meteor Lake-P [Intel Arc Graphics]
7d60 Meteor Lake-M [Intel Graphics]
+ 7d67 Arrow Lake-U [Intel Graphics]
+ 7dd1 Arrow Lake-P [Intel Graphics]
7dd5 Meteor Lake-P [Intel Graphics]
7e01 Meteor Lake-P LPC/eSPI Controller
7e22 Meteor Lake-P SMBus Controller
@@ -34456,10 +35834,17 @@
7e27 Meteor Lake-P Serial IO SPI Controller #0
7e28 Meteor Lake-P HD Audio Controller
7e30 Meteor Lake-P Serial IO SPI Controller #1
+ 7e40 Meteor Lake PCH CNVi WiFi
+ 8086 0094 Wi-Fi 6E AX211 160MHz
+# Refer from Intel Meteor Lake EDS (doc#640228) under its "Device IDs" section.
+ 7e45 Meteor Lake-P Integrated Sensor Hub
7e46 Meteor Lake-P Serial IO SPI Controller #2
+ 7e4c Meteor Lake-P Gaussian & Neural-Network Accelerator
7e50 Meteor Lake-P Serial IO I2C Controller #4
7e51 Meteor Lake-P Serial IO I2C Controller #5
7e52 Meteor Lake-P Serial IO UART Controller #2
+ 7e70 Meteor Lake-P CSME HECI #1
+ 7e73 Meteor Lake-P Keyboard and Text (KT) Redirection
7e78 Meteor Lake-P Serial IO I2C Controller #0
7e79 Meteor Lake-P Serial IO I2C Controller #1
7e7a Meteor Lake-P Serial IO I2C Controller #2
@@ -34848,6 +36233,7 @@
1028 09be Latitude 7410
9b43 10th Gen Core Processor Host Bridge/DRAM Registers
9b44 10th Gen Core Processor Host Bridge/DRAM Registers
+ 9b51 10th Gen Core Processor Host Bridge/DRAM Registers
9b53 Comet Lake-S 6c Host Bridge/DRAM Controller
9b54 10th Gen Core Processor Host Bridge/DRAM Registers
9b61 Comet Lake-U v1 4c Host Bridge/DRAM Controller
@@ -35196,6 +36582,8 @@
8086 0244 Wi-Fi 6 AX101NGW
a0fc Tiger Lake-LP Integrated Sensor Hub
a102 Q170/Q150/B150/H170/H110/Z170/CM236 Chipset SATA Controller [AHCI Mode]
+ 1043 8694 H110I-PLUS Motherboard
+ 1462 7994 H110M ECO/GAMING
a103 HM170/QM170 Chipset SATA Controller [AHCI Mode]
1028 06e4 XPS 15 9550
103c 825b OMEN-17-w001nv
@@ -35208,11 +36596,14 @@
a112 100 Series/C230 Series Chipset Family PCI Express Root Port #3
a113 100 Series/C230 Series Chipset Family PCI Express Root Port #4
a114 100 Series/C230 Series Chipset Family PCI Express Root Port #5
+ 1043 8694 H110I-PLUS Motherboard
a115 100 Series/C230 Series Chipset Family PCI Express Root Port #6
a116 100 Series/C230 Series Chipset Family PCI Express Root Port #7
a117 100 Series/C230 Series Chipset Family PCI Express Root Port #8
a118 100 Series/C230 Series Chipset Family PCI Express Root Port #9
+ 1043 8694 H110I-PLUS Motherboard
a119 100 Series/C230 Series Chipset Family PCI Express Root Port #10
+ 1043 8694 H110I-PLUS Motherboard
a11a 100 Series/C230 Series Chipset Family PCI Express Root Port #11
a11b 100 Series/C230 Series Chipset Family PCI Express Root Port #12
a11c 100 Series/C230 Series Chipset Family PCI Express Root Port #13
@@ -35223,10 +36614,14 @@
a121 100 Series/C230 Series Chipset Family Power Management Controller
1028 06e4 XPS 15 9550
103c 825b OMEN-17-w001nv
+ 1043 8694 H110I-PLUS Motherboard
+ 1462 7994 H110M ECO/GAMING
a122 Sunrise Point-H cAVS
a123 100 Series/C230 Series Chipset Family SMBus
1028 06e4 XPS 15 9550
103c 825b OMEN-17-w001nv
+ 1043 8694 H110I-PLUS Motherboard
+ 1462 7994 H110M ECO/GAMING
a124 100 Series/C230 Series Chipset Family SPI Controller
a125 100 Series/C230 Series Chipset Family Gigabit Ethernet Controller
a126 100 Series/C230 Series Chipset Family Trace Hub
@@ -35237,15 +36632,20 @@
a12f 100 Series/C230 Series Chipset Family USB 3.0 xHCI Controller
1028 06e4 XPS 15 9550
103c 825b OMEN-17-w001nv
+ 1043 8694 H110I-PLUS Motherboard
+ 1462 7994 H110M ECO/GAMING
a130 100 Series/C230 Series Chipset Family USB Device Controller (OTG)
a131 100 Series/C230 Series Chipset Family Thermal Subsystem
1028 06e4 XPS 15 9550
103c 825b OMEN-17-w001nv
+ 1462 7994 H110M ECO/GAMING
a133 Sunrise Point-H Northpeak ACPI Function
a135 100 Series/C230 Series Chipset Family Integrated Sensor Hub
a13a 100 Series/C230 Series Chipset Family MEI Controller #1
1028 06e4 XPS 15 9550
103c 825b OMEN-17-w001nv
+ 1043 8694 H110I-PLUS Motherboard
+ 1462 7994 H110M ECO/GAMING
a13b 100 Series/C230 Series Chipset Family MEI Controller #2
a13c 100 Series/C230 Series Chipset Family IDE Redirection
a13d 100 Series/C230 Series Chipset Family KT Redirection
@@ -35254,6 +36654,8 @@
a141 Sunrise Point-H LPC Controller
a142 Sunrise Point-H LPC Controller
a143 H110 Chipset LPC/eSPI Controller
+ 1043 8694 H110I-PLUS Motherboard
+ 1462 7994 H110M ECO/GAMING
a144 H170 Chipset LPC/eSPI Controller
a145 Z170 Chipset LPC/eSPI Controller
a146 Q170 Chipset LPC/eSPI Controller
@@ -35299,6 +36701,8 @@
a170 100 Series/C230 Series Chipset Family HD Audio Controller
1028 06e4 XPS 15 9550
103c 825b OMEN-17-w001nv
+ 1043 86c7 H110I-PLUS Motherboard
+ 1462 f994 H110M ECO/GAMING
a171 CM238 HD Audio Controller
a182 C620 Series Chipset Family SATA Controller [AHCI mode]
a186 C620 Series Chipset Family SATA Controller [RAID mode]
@@ -35409,6 +36813,8 @@
a2ba 200 Series PCH CSME HECI #1
1462 7a72 H270 PC MATE
a2bb 200 Series PCH CSME HECI #2
+# AMT serial over LAN
+ a2bd 200 Series Chipset Family KT Redirection
a2c4 200 Series PCH LPC Controller (H270)
1462 7a72 H270 PC MATE
a2c5 200 Series PCH LPC Controller (Z270)
@@ -35439,6 +36845,7 @@
1028 0869 Vostro 3470
a305 Z390 Chipset LPC/eSPI Controller
a306 Q370 Chipset LPC/eSPI Controller
+ a308 300 Series Chipset Family LPC Controller
a309 Cannon Point-LP LPC Controller
a30c QM370 Chipset LPC/eSPI Controller
a30d HM470 Chipset LPC/eSPI Controller
@@ -35509,19 +36916,29 @@
a3eb Comet Lake PCI Express Root Port #21
a3f0 Comet Lake PCH-V cAVS
a620 6400/6402 Advanced Memory Buffer (AMB)
+ a703 Raptor Lake-S Host Bridge/DRAM Controller
+ a706 Raptor Lake-P 6p+8e cores Host Bridge/DRAM Controller
+ 1028 0c06 Precision 3580
a707 Raptor Lake-P/U 4p+8e cores Host Bridge/DRAM Controller
a708 Raptor Lake-P/U 2p+8e cores Host Bridge/DRAM Controller
+ a70d Raptor Lake PCI Express 5.0 Graphics Port (PEG010)
a71d Raptor Lake Dynamic Platform and Thermal Framework Processor Participant
+ 1028 0c06 Precision 3580
a71e Raptor Lake-P Thunderbolt 4 USB Controller
+ 1028 0c06 Precision 3580
a720 Raptor Lake-P [UHD Graphics]
a721 Raptor Lake-P [UHD Graphics]
a72f Raptor Lake-P Thunderbolt 4 PCI Express Root Port #2
a73e Raptor Lake-P Thunderbolt 4 NHI #0
+ 1028 0c06 Precision 3580
+ a740 Raptor Lake-S 8+12 - Host Bridge/DRAM Controller
a74d Raptor Lake PCIe 4.0 Graphics Port
a74f GNA Scoring Accelerator module
+ 1028 0c06 Precision 3580
a76d Raptor Lake-P Thunderbolt 4 NHI #1
a76e Raptor Lake-P Thunderbolt 4 PCI Express Root Port #0
a77d Raptor Lake Crashlog and Telemetry
+ 1028 0c06 Precision 3580
a77f Volume Management Device NVMe RAID Controller Intel Corporation
a780 Raptor Lake-S GT1 [UHD Graphics 770]
a781 Raptor Lake-S UHD Graphics
@@ -35532,6 +36949,7 @@
a78a Raptor Lake-S UHD Graphics
a78b Raptor Lake-S UHD Graphics
a7a0 Raptor Lake-P [Iris Xe Graphics]
+ 1028 0c06 Precision 3580
a7a1 Raptor Lake-P [Iris Xe Graphics]
a7a8 Raptor Lake-P [UHD Graphics]
a7a9 Raptor Lake-P [UHD Graphics]
@@ -35539,8 +36957,38 @@
a7ab Raptor Lake-P [Intel Graphics]
a7ac Raptor Lake-U [Intel Graphics]
a7ad Raptor Lake-U [Intel Graphics]
+ a806 Lunar Lake-M LPC/eSPI Controller
+ a822 Lunar Lake-M SMbus Controller
+ a823 Lunar Lake-M SPI Controller
+ a824 Lunar Lake-M Trace Hub
+ a825 Lunar Lake-M Serial IO UART Controller #0
+ a826 Lunar Lake-M Serial IO UART Controller #1
+ a827 Lunar Lake-M Serial IO SPI Controller #0
+ a828 Lunar Lake-M HD Audio Controller
+ a830 Lunar Lake-M Serial IO SPI Controller #1
+ a831 Lunar Lake-M Thunderbolt 4 USB Controller
+ a833 Lunar Lake-M Thunderbolt 4 NHI #0
+ a834 Lunar Lake-M Thunderbolt 4 NHI #1
+ a838 Lunar Lake-M PCI Express Root Port #1
+ a839 Lunar Lake-M PCI Express Root Port #2
+ a83a Lunar Lake-M PCI Express Root Port #3
+ a83b Lunar Lake-M PCI Express Root Port #4
+ a83c Lunar Lake-M PCI Express Root Port #5
+ a83d Lunar Lake-M PCI Express Root Port #6
+ a845 Lunar Lake-M Integrated Sensor Hub
+ a847 Lunar Lake-M UFS Controller
+ a84e Lunar Lake-M Thunderbolt 4 PCI Express Root Port #0
+ a84f Lunar Lake-M Thunderbolt 4 PCI Express Root Port #1
+ a860 Lunar Lake-M Thunderbolt 4 PCI Express Root Port #2
+ a878 Lunar Lake-M Serial IO I2C Controller #0
+ a879 Lunar Lake-M Serial IO I2C Controller #1
+ a87a Lunar Lake-M Serial IO I2C Controller #2
+ a87b Lunar Lake-M Serial IO I2C Controller #3
+ a87d Lunar Lake-M USB 3.2 Gen 2x1 xHCI Host Controller
abc0 Omni-Path Fabric Switch Silicon 100 Series
ad0b Volume Management Device NVMe RAID Controller Intel Corporation
+ ad1d Arrow Lake NPU
+ b03e Panther Lake NPU
b152 21152 PCI-to-PCI Bridge
8086 b152 21152 PCI-to-PCI Bridge
# observed, and documented in Intel revision note; new mask of 1011:0026
@@ -35577,6 +37025,11 @@
d156 Core Processor Semaphore and Scratchpad Registers
d157 Core Processor System Control and Status Registers
d158 Core Processor Miscellaneous Registers
+ e202 Battlemage G21 [Intel Graphics]
+ e20b Battlemage G21 [Intel Graphics]
+ e20c Battlemage G21 [Intel Graphics]
+ e20d Battlemage G21 [Intel Graphics]
+ e212 Battlemage G21 [Intel Graphics]
f1a5 SSD 600P Series
8086 390a SSDPEKKW256G7 256GB
f1a6 SSD DC P4101/Pro 7600p/760p/E 6100p Series
@@ -35588,7 +37041,14 @@
8088 Beijing Wangxun Technology Co., Ltd.
0100 WX1860AL-W Gigabit Ethernet Controller
0101 WX1860A2 Gigabit Ethernet Controller
+ 4c52 2024 LRES2024PT Dual-port 1Gb Ethernet Network Adapter
+ 4c52 2025 LRES2025PT Quad-port 1Gb Ethernet Network Adapter
+ 4c52 2027 LRES2027PF Dual-port 1Gb Ethernet Server Adapter
+ 4c52 3018 LRES3018PT Dual-port 1Gb Ethernet Server Adapter for OCP
8088 0201 Dual-Port Ethernet Network Adaptor SF200T
+ 8088 0501 Dual-Port Ethernet Network Adapter SF200T-C101
+ 8088 0901 Dual-Port Ethernet Network Adapter SF200T-B401
+ 8088 0b01 Dual-Port Ethernet Network Adapter SF200T-B402
8088 4201 Dual-Port Ethernet Network Adaptor SF200T (WOL)
8088 8201 Dual-Port Ethernet Network Adaptor SF200T (NCSI)
8088 c201 Dual-Port Ethernet Network Adaptor SF200T (WOL, NCSI)
@@ -35596,8 +37056,12 @@
8088 0210 Dual-Port Ethernet Network Adaptor SF200T-S
0103 WX1860A4 Gigabit Ethernet Controller
1bd4 009e ENPW2100-T4
+ 4c52 2028 LRES2028PF Quad-port 1Gb Ethernet Server Adapter
+ 4c52 3019 LRES3019PT Quad-port 1Gb Ethernet Server Adapter for OCP
8088 0401 Qual-Port Ethernet Network Adaptor SF400T
8088 0440 Qual-Port Ethernet Network Adaptor SF400-OCP
+ 8088 0a01 Quad-Port Ethernet Network Adapter SF400T-B401
+ 8088 0c01 Quad-Port Ethernet Network Adapter SF400T-B402
8088 4401 Quad-Port Ethernet Network Adapter SF400T (WOL)
8088 8103 Quad-Port Ethernet Network Adaptor SF400T (NCSI)
8088 8401 Quad-Port Ethernet Network Adapter SF400T (NCSI)
@@ -35621,7 +37085,10 @@
8088 0420 Qual-Port Ethernet Network Adaptor SF400HT-S
0109 WX1860-LC Gigabit Ethernet Controller
010a WX1860A1 Gigabit Ethernet Controller
+ 4c52 2026 LRES2026PF Single-port 1Gb Ethernet Network Adapter
+ 4c52 2034 LRES2034PT Single-port 1Gb Ethernet Network Adapter
010b WX1860AL1 Gigabit Ethernet Controller
+ 4c52 2215 LRES2215PT Single-port 1Gb Ethernet Network Adapter
8088 0102 Single-Port Ethernet Network Adaptor SF100HT
8088 4102 Single-Port Ethernet Network Adaptor SF100HT (WOL)
8088 8102 Single-Port Ethernet Network Adaptor SF100HT (NCSI)
@@ -35633,15 +37100,18 @@
0119 WX1860-LC Gigabit Ethernet Controller Virtual Function
011a WX1860A1 Gigabit Ethernet Controller Virtual Function
011b WX1860AL1 Gigabit Ethernet Controller Virtual Function
- 1000 Ethernet Controller RP1000 Virtual Function for 10GbE SFP+
- 1001 Ethernet Controller RP1000 for 10GbE SFP+
+ 1000 Ethernet Controller SP1000A Virtual Function for 10GbE SFP+
+ 1001 Ethernet Controller SP1000A for 10GbE SFP+
1bd4 0084 Ethernet Controller SP1000A for 10GbE SFP+(lldp)
1bd4 0085 Ethernet Controller SP1000A for 10GBASE-T
+ 4c52 1002 LRES1002PF Dual-port 10Gb Ethernet Server Adapter
+ 4c52 1003 LRES1003PF Single-port 10Gb Ethernet Server Adapter
+ 4c52 3001 LRES3001PF Dual-port 10Gb Ethernet Server Adapter for OCP
8088 0000 Ethernet Network Adaptor RP1000 for 10GbE SFP+
8088 0300 Ethernet Network Adaptor RP1000-A03 for 10GbE SFP+
8088 0400 Ethernet Network Adaptor RP1000-A04 for 10GbE SFP+
- 2000 Ethernet Controller RP2000 Virtual Function for 10GbE SFP+
- 2001 Ethernet Controller RP2000 for 10GbE SFP+
+ 2000 Ethernet Controller WX1820AL Virtual Function for 10GbE SFP+
+ 2001 Ethernet Controller WX1820AL for 10GbE SFP+
8088 2000 Ethernet Network Adaptor RP2000 for 10GbE SFP+
8088 2300 Ethernet Network Adaptor RP2000-A03 for 10GbE SFP+
8088 2400 Ethernet Network Adaptor RP2000-A04 for 10GbE SFP+
@@ -35652,7 +37122,17 @@
8384 SigmaTel
8401 TRENDware International Inc.
8510 Sietium Semiconductor Co., Ltd.
- 0201 GenBu02 [GB2062-PCIe-C0]
+ 0201 GenBu02 Series GPU
+ 8510 0001 GB2062-PUB-LPDDR
+ 8510 0002 GB2062-PCIe-C0
+ 8510 0003 GB2062-PCIe-C41
+ 8510 0004 GB2062-PCIe-HIEILP4
+ 8510 0005 CQ2040-PCIe-C21
+ 8510 0007 GB2062-PCIe-C40
+ 8510 0008 CQ2040-MXM-M60
+ 8510 0009 GB2062-PCIe-C20
+ 8510 000c CQ2040-PUB
+ 8510 0201 GB2062-PUB-DDR
# nee ScaleMP
8686 SAP
1010 vSMP Foundation controller [vSMP CTL]
@@ -35664,21 +37144,36 @@
# Wuxi Micro Innovation Integrated Circuit Design Co.,Ltd.
8848 MUCSE
1000 Ethernet Controller N10 Series for 10GbE or 40GbE (Dual-port)
+ 4c52 3032 LRES3032PF Dual-port 10Gb Ethernet Server Adapter for OCP
8848 8410 Ethernet Network Adapter N10G-X2-DC for 10GbE SFP+ 2-port
1001 Ethernet Controller N400 Series for 1GbE (Dual-port)
1003 Ethernet Controller N400 Series for 10GbE (Single-port)
+ 4c52 1050 LRES1050PF Single-port 10Gb Ethernet Network Adapter
1020 Ethernet Controller N10 Series for 10GbE (Quad-port)
+ 4c52 1030 LRES1030PF Quad-port 10Gb Ethernet Server Adapter
+ 4c52 1031 LRES1031PF Dual-port 10Gb Ethernet Server Adapter
+ 4c52 3031 LRES3031PF Quad-port 10Gb Ethernet Server Adapter for OCP
8848 8451 Ethernet Network Adapter N10G-X4-QC for 10GbE SFP+ 4-port
1021 Ethernet Controller N400 Series for 1GbE (Quad-port)
+ 4c52 1032 LRES1032PF Quad-port 1Gb Ethernet Network Adapter
+ 4c52 1039 LRES1039PT Quad-port 1Gb Ethernet Network Adapter
1060 Ethernet Controller N10 Series for 1GbE or 10GbE (8-port)
1080 Ethernet Controller N10 Series Virtual Function
1081 Ethernet Controller N400 Series Virtual Function
1083 Ethernet Controller N400 Series Virtual Function
8308 Ethernet Controller N500 Series for 1GbE (Quad-port, Copper RJ45)
+# NIC-ETH3M0T-3S-4P Quad-Port RJ45 Adapter for OCP 3.0
+ 193d 1088 NIC-ETH3M0T-3S-4P
+ 4c52 1048 LRES1048PT Quad-port 1Gb Ethernet Network Adapter
+ 4c52 3044 LRES3044PT Quad-port 1Gb Ethernet Server Adapter for OCP
8309 Ethernet Controller N500 Series Virtual Function
8318 Ethernet Controller N500 Series for 1GbE (Dual-port, Copper RJ45)
+ 4c52 1049 LRES1049PT Dual-port 1Gb Ethernet Network Adapter
+ 4c52 3043 LRES3043PT Dual-port 1Gb Ethernet Server Adapter for OCP
8866 T-Square Design Inc.
8888 Silicon Magic
+# 4 port HDMI capture card
+ 8504 AVMatrix VC42
8912 TRX
# 8c4a is not Winbond but there is a board misprogrammed
8c4a Winbond
@@ -36010,6 +37505,9 @@
103c 1101 Smart Array P416ie-m SR G10
105b 1211 HBA 8238-16i
105b 1321 HBA 8242-24i
+ 1137 02f8 24G TriMode M1 RAID 4GB FBWC 32D
+ 1137 02f9 24G TriMode M1 RAID 4GB FBWC 16D
+ 1137 02fa 24G TriMode M1 HBA 16D
13fe 8312 SKY-9200 MIC-8312BridgeB
152d 8a22 QS-8204-8i
152d 8a23 QS-8238-16i
@@ -36062,6 +37560,7 @@
1d49 0220 ThinkSystem 4350-8i SAS/SATA 12Gb HBA
1d49 0221 ThinkSystem 4350-16i SAS/SATA 12Gb HBA
1d49 0520 ThinkSystem RAID 5350-8i PCIe 12Gb Adapter
+ 1d49 0522 ThinkSystem RAID 5350-8i PCIe 12Gb Internal Adapter
1d49 0620 ThinkSystem RAID 9350-8i 2GB Flash PCIe 12Gb Adapter
1d49 0621 ThinkSystem RAID 9350-8i 2GB Flash PCIe 12Gb Internal Adapter
1d49 0622 ThinkSystem RAID 9350-16i 4GB Flash PCIe 12Gb Adapter
@@ -36168,7 +37667,13 @@
1734 1011 PRIMERGY RX300 onboard SCSI
8080 ASC-29320A U320 w/HostRAID
8081 PMC-Sierra PM8001 SAS HBA [Series 6H]
+ 9005 0400 Adaptec SAS HBA 6405H
+ 9005 0800 Adaptec SAS HBA 6805H
8088 PMC-Sierra PM8018 SAS HBA [Series 7H]
+ 9005 0008 Adaptec SAS HBA 7085H
+ 9005 0016 Adaptec SAS HBA 70165H
+ 9005 0800 Adaptec SAS HBA 7805H
+ 9005 1600 Adaptec SAS HBA 71605H
8089 PMC-Sierra PM8019 SAS encryption HBA [Series 7He]
808f AIC-7901 U320 w/HostRAID
1028 0168 Precision Workstation 670 Mainboard
@@ -36249,6 +37754,9 @@
2001 STAR2000E NVMe SSD
2002 STAR2000C NVMe SSD
2003 STAR2000L NVMe SSD
+ 2004 EAST 2000K SSD
+ 2008 STAR2008 PCIE NVMe SSD Controller
+ 2010 STAR2010 PCIE NVMe Secure SSD Controller
bb5b Asgard AN3+ NVMe SSD
fc22 Asgard AN3+ NVMe SSD
a000 Asix Electronics Corporation (Wrong ID)
@@ -36344,6 +37852,9 @@ c0a9 Micron/Crucial Technology
5407 P5 Plus NVMe PCIe SSD
540a P2 [Nick P2] / P3 / P3 Plus NVMe PCIe SSD (DRAM-less)
5412 P5 NVMe PCIe SSD[SlashP5]
+ 5415 T500 NVMe PCIe SSD
+ 5419 T700 NVMe PCIe SSD
+ 5421 P3 Plus NVMe PCIe SSD (DRAM-less)
c0de Motorola
c0fe Motion Engineering, Inc.
ca01 I-TEK OptoElectronics Co., LTD.
@@ -36608,6 +38119,11 @@ edd8 ARK Logic Inc
f043 ASUSTeK Computer Inc. (Wrong ID)
f05b Foxconn International, Inc. (Wrong ID)
f111 Framework Computer Inc.
+f117 Cerio
+ 1000 Emulated PCIe Switch
+ 1010 Placeholder Device
+ 1020 Pseudo-Device
+ 1030 Test Device
f15e SiFive, Inc.
0000 FU740-C000 RISC-V SoC PCI Express x8 to AXI4 Bridge
f1d0 AJA Video
diff --git a/vendor/github.com/NVIDIA/go-nvlib/pkg/pciids/pciids.go b/vendor/github.com/NVIDIA/go-nvlib/pkg/pciids/pciids.go
index 5f25c0048..343df08d4 100644
--- a/vendor/github.com/NVIDIA/go-nvlib/pkg/pciids/pciids.go
+++ b/vendor/github.com/NVIDIA/go-nvlib/pkg/pciids/pciids.go
@@ -11,42 +11,42 @@ import (
"strings"
)
-// token what the Lexer retruns
+// token what the Lexer retruns.
type token int
const (
- // ILLEGAL a token which the Lexer does not understand
+ // ILLEGAL a token which the Lexer does not understand.
ILLEGAL token = iota
- // EOF end of file
+ // EOF end of file.
EOF
- // WS whitespace
+ // WS whitespace.
WS
- // NEWLINE '\n'
+ // NEWLINE '\n'.
NEWLINE
- // COMMENT '# something'
+ // COMMENT '# something'.
COMMENT
- // VENDOR PCI vendor
+ // VENDOR PCI vendor.
VENDOR
- // SUBVENDOR PCI subvendor
+ // SUBVENDOR PCI subvendor.
SUBVENDOR
- // DEVICE PCI device
+ // DEVICE PCI device.
DEVICE
- // CLASS PCI class
+ // CLASS PCI class.
CLASS
- // SUBCLASS PCI subclass
+ // SUBCLASS PCI subclass.
SUBCLASS
- // PROGIF PCI programming interface
+ // PROGIF PCI programming interface.
PROGIF
)
-// literal values from the Lexer
+// literal values from the Lexer.
type literal struct {
ID string
name string
SubName string
}
-// scanner a lexical scanner
+// scanner a lexical scanner.
type scanner struct {
r *bufio.Reader
isVendor bool
@@ -58,7 +58,7 @@ func newScanner(r io.Reader) *scanner {
}
// Since the pci.ids is line base we're consuming a whole line rather then only
-// a single rune/char
+// a single rune/char.
func (s *scanner) readline() []byte {
ln, err := s.r.ReadBytes('\n')
if err == io.EOF {
@@ -107,7 +107,7 @@ func isSubVendor(ln []byte) bool { return isLeadingTwoTabs(ln) }
func isDevice(ln []byte) bool { return isLeadingOneTab(ln) }
func isNewline(ln []byte) bool { return (ln[0] == '\n') }
-// List of known device classes, subclasses and programming interfaces
+// List of known device classes, subclasses and programming interfaces.
func isClass(ln []byte) bool { return (ln[0] == 'C') }
func isProgIf(ln []byte) bool { return isLeadingTwoTabs(ln) }
func isSubClass(ln []byte) bool { return isLeadingOneTab(ln) }
@@ -162,7 +162,7 @@ func (s *scanner) scan() (tok token, lit literal) {
return ILLEGAL, literal{ID: string(line)}
}
-// parser reads the tokens returned by the Lexer and constructs the AST
+// parser reads the tokens returned by the Lexer and constructs the AST.
type parser struct {
s *scanner
buf struct {
@@ -173,7 +173,7 @@ type parser struct {
}
// Various locations of pci.ids for different distributions. These may be more
-// up to date then the embedded pci.ids db
+// up to date then the embedded pci.ids db.
var defaultPCIdbPaths = []string{
"/usr/share/misc/pci.ids", // Ubuntu
"/usr/local/share/pci.ids", // RHEL like with manual update
@@ -202,7 +202,7 @@ func NewDB(opts ...Option) Interface {
return newParser(pcidbs).parse()
}
-// Option defines a function for passing options to the NewDB() call
+// Option defines a function for passing options to the NewDB() call.
type Option func(*pcidb)
// WithFilePath provides an Option to set the file path
@@ -216,7 +216,7 @@ func WithFilePath(path string) Option {
}
// newParser will attempt to read the db pci.ids from well known places or fall
-// back to an internal db
+// back to an internal db.
func newParser(pcidbs []string) *parser {
for _, db := range pcidbs {
@@ -229,7 +229,7 @@ func newParser(pcidbs []string) *parser {
}
// We're using go embed above to have the byte array
// correctly initialized with the internal shipped db
- // if we cannot find an up to date in the filesystem
+ // if we cannot find an up to date in the filesystem.
return newParserFromReader(bufio.NewReader(bytes.NewReader(defaultPCIdb)))
}
@@ -252,13 +252,13 @@ func (p *parser) unscan() { p.buf.n = 1 }
var _ Interface = (*pcidb)(nil)
-// Interface returns textual description of specific attributes of PCI devices
+// Interface returns textual description of specific attributes of PCI devices.
type Interface interface {
GetDeviceName(uint16, uint16) (string, error)
GetClassName(uint32) (string, error)
}
-// GetDeviceName return the textual description of the PCI device
+// GetDeviceName return the textual description of the PCI device.
func (d *pcidb) GetDeviceName(vendorID uint16, deviceID uint16) (string, error) {
vendor, ok := d.vendors[vendorID]
if !ok {
@@ -273,7 +273,7 @@ func (d *pcidb) GetDeviceName(vendorID uint16, deviceID uint16) (string, error)
return device.name, nil
}
-// GetClassName resturn the textual description of the PCI device class
+// GetClassName resturn the textual description of the PCI device class.
func (d *pcidb) GetClassName(classID uint32) (string, error) {
class, ok := d.classes[classID]
if !ok {
@@ -282,53 +282,53 @@ func (d *pcidb) GetClassName(classID uint32) (string, error) {
return class.name, nil
}
-// pcidb The complete set of PCI vendors and PCI classes
+// pcidb The complete set of PCI vendors and PCI classes.
type pcidb struct {
vendors map[uint16]vendor
classes map[uint32]class
path string
}
-// vendor PCI vendors/devices/subVendors/SubDevices
+// vendor PCI vendors/devices/subVendors/SubDevices.
type vendor struct {
name string
devices map[uint16]device
}
-// subVendor PCI subVendor
+// subVendor PCI subVendor.
type subVendor struct {
SubDevices map[uint16]SubDevice
}
-// SubDevice PCI SubDevice
+// SubDevice PCI SubDevice.
type SubDevice struct {
name string
}
-// device PCI device
+// device PCI device.
type device struct {
name string
subVendors map[uint16]subVendor
}
-// class PCI classes/subClasses/Programming Interfaces
+// class PCI classes/subClasses/Programming Interfaces.
type class struct {
name string
subClasses map[uint32]subClass
}
-// subClass PCI subClass
+// subClass PCI subClass.
type subClass struct {
name string
progIfs map[uint8]progIf
}
-// progIf PCI Programming Interface
+// progIf PCI Programming Interface.
type progIf struct {
name string
}
-// parse parses a PCI IDS entry
+// parse parses a PCI IDS entry.
func (p *parser) parse() Interface {
db := &pcidb{
@@ -336,7 +336,7 @@ func (p *parser) parse() Interface {
classes: map[uint32]class{},
}
- // Used for housekeeping, breadcrumb for aggregated types
+ // Used for housekeeping, breadcrumb for aggregated types.
var hkVendor vendor
var hkDevice device
@@ -349,8 +349,8 @@ func (p *parser) parse() Interface {
for {
tok, lit := p.scan()
- // We're ignoring COMMENT, NEWLINE
- // An EOF will break the loop
+ // We're ignoring COMMENT, NEWLINE.
+ // An EOF will break the loop.
if tok == EOF {
break
}
@@ -408,10 +408,10 @@ func (p *parser) parse() Interface {
}
hkSubClass = hkClass.subClasses[uint32(id)]
- // Clear the last detected sub class
+ // Clear the last detected sub class.
hkFullID = hkFullID & 0xFFFF0000
hkFullID = hkFullID | uint32(id)<<8
- // Clear the last detected prog iface
+ // Clear the last detected prog iface.
hkFullID = hkFullID & 0xFFFFFF00
hkFullName[1] = fmt.Sprintf("%s (%02x)", lit.name, id)
diff --git a/vendor/github.com/NVIDIA/k8s-kata-manager/api/v1alpha1/config/consts.go b/vendor/github.com/NVIDIA/k8s-kata-manager/api/v1alpha1/config/consts.go
index 3a7681aaf..ff7121ce1 100644
--- a/vendor/github.com/NVIDIA/k8s-kata-manager/api/v1alpha1/config/consts.go
+++ b/vendor/github.com/NVIDIA/k8s-kata-manager/api/v1alpha1/config/consts.go
@@ -16,6 +16,25 @@
package config
+// Runtime defines container runtime type
+type Runtime string
+
const (
DefaultKataArtifactsDir = "/opt/nvidia-gpu-operator/artifacts/runtimeclasses"
+ DefaultCrioRuntime = "crun"
+ // CRIO runtime
+ CRIO Runtime = "crio"
+ // Containerd runtime
+ Containerd Runtime = "containerd"
)
+
+func (r Runtime) String() string {
+ switch r {
+ case CRIO:
+ return "crio"
+ case Containerd:
+ return "containerd"
+ default:
+ return ""
+ }
+}
diff --git a/vendor/github.com/NVIDIA/k8s-operator-libs/api/upgrade/v1alpha1/zz_generated.deepcopy.go b/vendor/github.com/NVIDIA/k8s-operator-libs/api/upgrade/v1alpha1/zz_generated.deepcopy.go
index 38f524f73..9c2adde64 100644
--- a/vendor/github.com/NVIDIA/k8s-operator-libs/api/upgrade/v1alpha1/zz_generated.deepcopy.go
+++ b/vendor/github.com/NVIDIA/k8s-operator-libs/api/upgrade/v1alpha1/zz_generated.deepcopy.go
@@ -1,5 +1,4 @@
//go:build !ignore_autogenerated
-// +build !ignore_autogenerated
/*
Copyright 2022 NVIDIA
diff --git a/vendor/github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/system/create-dev-char-symlinks/all.go b/vendor/github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/system/create-dev-char-symlinks/all.go
index 7dcabf0e5..cafb8f9c7 100644
--- a/vendor/github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/system/create-dev-char-symlinks/all.go
+++ b/vendor/github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/system/create-dev-char-symlinks/all.go
@@ -20,10 +20,11 @@ import (
"fmt"
"path/filepath"
+ "github.com/NVIDIA/go-nvlib/pkg/nvpci"
+
"github.com/NVIDIA/nvidia-container-toolkit/internal/info/proc/devices"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/nvcaps"
- "gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvpci"
)
type allPossible struct {
diff --git a/vendor/github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/system/create-dev-char-symlinks/create-dev-char-symlinks.go b/vendor/github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/system/create-dev-char-symlinks/create-dev-char-symlinks.go
index ed6455bdd..7d269b92f 100644
--- a/vendor/github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/system/create-dev-char-symlinks/create-dev-char-symlinks.go
+++ b/vendor/github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/system/create-dev-char-symlinks/create-dev-char-symlinks.go
@@ -24,11 +24,12 @@ import (
"strings"
"syscall"
+ "github.com/fsnotify/fsnotify"
+ "github.com/urfave/cli/v2"
+
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/system/nvdevices"
"github.com/NVIDIA/nvidia-container-toolkit/internal/system/nvmodules"
- "github.com/fsnotify/fsnotify"
- "github.com/urfave/cli/v2"
)
const (
@@ -86,7 +87,7 @@ func (m command) build() *cli.Command {
Usage: "The path to the driver root. `DRIVER_ROOT`/dev is searched for NVIDIA device nodes.",
Value: "/",
Destination: &cfg.driverRoot,
- EnvVars: []string{"DRIVER_ROOT"},
+ EnvVars: []string{"NVIDIA_DRIVER_ROOT", "DRIVER_ROOT"},
},
&cli.BoolFlag{
Name: "watch",
diff --git a/vendor/github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/system/create-dev-char-symlinks/existing.go b/vendor/github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/system/create-dev-char-symlinks/existing.go
index a1af8b204..d022a98fe 100644
--- a/vendor/github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/system/create-dev-char-symlinks/existing.go
+++ b/vendor/github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/system/create-dev-char-symlinks/existing.go
@@ -20,9 +20,10 @@ import (
"path/filepath"
"strings"
+ "golang.org/x/sys/unix"
+
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
- "golang.org/x/sys/unix"
)
type nodeLister interface {
@@ -63,20 +64,13 @@ func (m existing) DeviceNodes() ([]deviceNode, error) {
if m.nodeIsBlocked(d) {
continue
}
-
var stat unix.Stat_t
err := unix.Stat(d, &stat)
if err != nil {
m.logger.Warningf("Could not stat device: %v", err)
continue
}
- deviceNode := deviceNode{
- path: d,
- major: unix.Major(uint64(stat.Rdev)),
- minor: unix.Minor(uint64(stat.Rdev)),
- }
-
- deviceNodes = append(deviceNodes, deviceNode)
+ deviceNodes = append(deviceNodes, newDeviceNode(d, stat))
}
return deviceNodes, nil
diff --git a/vendor/github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/system/create-dev-char-symlinks/existing_linux.go b/vendor/github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/system/create-dev-char-symlinks/existing_linux.go
new file mode 100644
index 000000000..4aab942af
--- /dev/null
+++ b/vendor/github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/system/create-dev-char-symlinks/existing_linux.go
@@ -0,0 +1,28 @@
+/**
+# Copyright (c) NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package devchar
+
+import "golang.org/x/sys/unix"
+
+func newDeviceNode(d string, stat unix.Stat_t) deviceNode {
+ deviceNode := deviceNode{
+ path: d,
+ major: unix.Major(stat.Rdev),
+ minor: unix.Minor(stat.Rdev),
+ }
+ return deviceNode
+}
diff --git a/vendor/github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/system/create-dev-char-symlinks/existing_other.go b/vendor/github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/system/create-dev-char-symlinks/existing_other.go
new file mode 100644
index 000000000..9be96294b
--- /dev/null
+++ b/vendor/github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/system/create-dev-char-symlinks/existing_other.go
@@ -0,0 +1,30 @@
+//go:build !linux
+
+/**
+# Copyright (c) NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package devchar
+
+import "golang.org/x/sys/unix"
+
+func newDeviceNode(d string, stat unix.Stat_t) deviceNode {
+ deviceNode := deviceNode{
+ path: d,
+ major: unix.Major(uint64(stat.Rdev)),
+ minor: unix.Minor(uint64(stat.Rdev)),
+ }
+ return deviceNode
+}
diff --git a/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/info/proc/devices/builder.go b/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/info/proc/devices/builder.go
index ed93939a5..6da9a90de 100644
--- a/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/info/proc/devices/builder.go
+++ b/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/info/proc/devices/builder.go
@@ -42,7 +42,6 @@ func New(opts ...Option) Devices {
return devices
}
-// Option defines a functional option.
type Option func(*builder)
// WithDeviceToMajor specifies an explicit device name to major number map.
diff --git a/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/ldcache/ldcache.go b/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/ldcache/ldcache.go
index 7673a49a4..4daf95bce 100644
--- a/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/ldcache/ldcache.go
+++ b/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/ldcache/ldcache.go
@@ -22,15 +22,12 @@ import (
"bytes"
"encoding/binary"
"errors"
- "fmt"
"os"
"path/filepath"
- "strings"
"syscall"
"unsafe"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
- "github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/symlinks"
)
const ldcachePath = "/etc/ld.so.cache"
@@ -81,9 +78,10 @@ type entry2 struct {
}
// LDCache represents the interface for performing lookups into the LDCache
+//
+//go:generate moq -rm -out ldcache_mock.go . LDCache
type LDCache interface {
List() ([]string, []string)
- Lookup(...string) ([]string, []string)
}
type ldcache struct {
@@ -187,7 +185,7 @@ type entry struct {
}
// getEntries returns the entires of the ldcache in a go-friendly struct.
-func (c *ldcache) getEntries(selected func(string) bool) []entry {
+func (c *ldcache) getEntries() []entry {
var entries []entry
for _, e := range c.entries {
bits := 0
@@ -214,9 +212,6 @@ func (c *ldcache) getEntries(selected func(string) bool) []entry {
c.logger.Debugf("Skipping invalid lib")
continue
}
- if !selected(lib) {
- continue
- }
value := bytesToString(c.libs[e.Value:])
if value == "" {
c.logger.Debugf("Skipping invalid value for lib %v", lib)
@@ -227,51 +222,19 @@ func (c *ldcache) getEntries(selected func(string) bool) []entry {
bits: bits,
value: value,
}
-
entries = append(entries, e)
}
-
return entries
}
-// List creates a list of libraires in the ldcache.
+// List creates a list of libraries in the ldcache.
// The 32-bit and 64-bit libraries are returned separately.
func (c *ldcache) List() ([]string, []string) {
- all := func(s string) bool { return true }
-
- return c.resolveSelected(all)
-}
-
-// Lookup searches the ldcache for the specified prefixes.
-// The 32-bit and 64-bit libraries matching the prefixes are returned.
-func (c *ldcache) Lookup(libPrefixes ...string) ([]string, []string) {
- c.logger.Debugf("Looking up %v in cache", libPrefixes)
-
- // We define a functor to check whether a given library name matches any of the prefixes
- matchesAnyPrefix := func(s string) bool {
- for _, p := range libPrefixes {
- if strings.HasPrefix(s, p) {
- return true
- }
- }
- return false
- }
-
- return c.resolveSelected(matchesAnyPrefix)
-}
-
-// resolveSelected process the entries in the LDCach based on the supplied filter and returns the resolved paths.
-// The paths are separated by bittage.
-func (c *ldcache) resolveSelected(selected func(string) bool) ([]string, []string) {
paths := make(map[int][]string)
processed := make(map[string]bool)
- for _, e := range c.getEntries(selected) {
- path, err := c.resolve(e.value)
- if err != nil {
- c.logger.Debugf("Could not resolve entry: %v", err)
- continue
- }
+ for _, e := range c.getEntries() {
+ path := filepath.Join(c.root, e.value)
if processed[path] {
continue
}
@@ -282,29 +245,6 @@ func (c *ldcache) resolveSelected(selected func(string) bool) ([]string, []strin
return paths[32], paths[64]
}
-// resolve resolves the specified ldcache entry based on the value being processed.
-// The input is the name of the entry in the cache.
-func (c *ldcache) resolve(target string) (string, error) {
- name := filepath.Join(c.root, target)
-
- c.logger.Debugf("checking %v", string(name))
-
- link, err := symlinks.Resolve(name)
- if err != nil {
- return "", fmt.Errorf("failed to resolve symlink: %v", err)
- }
- if link == name {
- return name, nil
- }
-
- // We return absolute paths for all targets
- if !filepath.IsAbs(link) || strings.HasPrefix(link, ".") {
- link = filepath.Join(filepath.Dir(target), link)
- }
-
- return c.resolve(link)
-}
-
// bytesToString converts a byte slice to a string.
// This assumes that the byte slice is null-terminated
func bytesToString(value []byte) string {
diff --git a/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/ldcache/ldcache_mock.go b/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/ldcache/ldcache_mock.go
new file mode 100644
index 000000000..5aa532351
--- /dev/null
+++ b/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/ldcache/ldcache_mock.go
@@ -0,0 +1,67 @@
+// Code generated by moq; DO NOT EDIT.
+// github.com/matryer/moq
+
+package ldcache
+
+import (
+ "sync"
+)
+
+// Ensure, that LDCacheMock does implement LDCache.
+// If this is not the case, regenerate this file with moq.
+var _ LDCache = &LDCacheMock{}
+
+// LDCacheMock is a mock implementation of LDCache.
+//
+// func TestSomethingThatUsesLDCache(t *testing.T) {
+//
+// // make and configure a mocked LDCache
+// mockedLDCache := &LDCacheMock{
+// ListFunc: func() ([]string, []string) {
+// panic("mock out the List method")
+// },
+// }
+//
+// // use mockedLDCache in code that requires LDCache
+// // and then make assertions.
+//
+// }
+type LDCacheMock struct {
+ // ListFunc mocks the List method.
+ ListFunc func() ([]string, []string)
+
+ // calls tracks calls to the methods.
+ calls struct {
+ // List holds details about calls to the List method.
+ List []struct {
+ }
+ }
+ lockList sync.RWMutex
+}
+
+// List calls ListFunc.
+func (mock *LDCacheMock) List() ([]string, []string) {
+ if mock.ListFunc == nil {
+ panic("LDCacheMock.ListFunc: method is nil but LDCache.List was just called")
+ }
+ callInfo := struct {
+ }{}
+ mock.lockList.Lock()
+ mock.calls.List = append(mock.calls.List, callInfo)
+ mock.lockList.Unlock()
+ return mock.ListFunc()
+}
+
+// ListCalls gets all the calls that were made to List.
+// Check the length with:
+//
+// len(mockedLDCache.ListCalls())
+func (mock *LDCacheMock) ListCalls() []struct {
+} {
+ var calls []struct {
+ }
+ mock.lockList.RLock()
+ calls = mock.calls.List
+ mock.lockList.RUnlock()
+ return calls
+}
diff --git a/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/logger/api.go b/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/logger/api.go
index b8db97667..750c64c66 100644
--- a/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/logger/api.go
+++ b/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/logger/api.go
@@ -24,4 +24,5 @@ type Interface interface {
Infof(string, ...interface{})
Warning(...interface{})
Warningf(string, ...interface{})
+ Tracef(string, ...interface{})
}
diff --git a/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/logger/lib.go b/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/logger/lib.go
index 300e925f0..ddb227bfd 100644
--- a/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/logger/lib.go
+++ b/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/logger/lib.go
@@ -45,3 +45,6 @@ func (l *NullLogger) Warning(...interface{}) {}
// Warningf is a no-op for the null logger
func (l *NullLogger) Warningf(string, ...interface{}) {}
+
+// Tracef is a no-op for the null logger
+func (l *NullLogger) Tracef(string, ...interface{}) {}
diff --git a/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/file.go b/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/file.go
index d6fb58259..8f3302731 100644
--- a/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/file.go
+++ b/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/file.go
@@ -27,49 +27,55 @@ import (
// file can be used to locate file (or file-like elements) at a specified set of
// prefixes. The validity of a file is determined by a filter function.
type file struct {
- logger logger.Interface
- root string
- prefixes []string
- filter func(string) error
- count int
- isOptional bool
+ builder
+ prefixes []string
}
-// Option defines a function for passing options to the NewFileLocator() call
-type Option func(*file)
+// builder defines the builder for a file locator.
+type builder struct {
+ logger logger.Interface
+ root string
+ searchPaths []string
+ filter func(string) error
+ count int
+ isOptional bool
+}
+
+// Option defines a function for passing builder to the NewFileLocator() call
+type Option func(*builder)
// WithRoot sets the root for the file locator
func WithRoot(root string) Option {
- return func(f *file) {
+ return func(f *builder) {
f.root = root
}
}
// WithLogger sets the logger for the file locator
func WithLogger(logger logger.Interface) Option {
- return func(f *file) {
+ return func(f *builder) {
f.logger = logger
}
}
// WithSearchPaths sets the search paths for the file locator.
func WithSearchPaths(paths ...string) Option {
- return func(f *file) {
- f.prefixes = paths
+ return func(f *builder) {
+ f.searchPaths = paths
}
}
// WithFilter sets the filter for the file locator
// The filter is called for each candidate file and candidates that return nil are considered.
func WithFilter(assert func(string) error) Option {
- return func(f *file) {
+ return func(f *builder) {
f.filter = assert
}
}
// WithCount sets the maximum number of candidates to discover
func WithCount(count int) Option {
- return func(f *file) {
+ return func(f *builder) {
f.count = count
}
}
@@ -77,32 +83,42 @@ func WithCount(count int) Option {
// WithOptional sets the optional flag for the file locator
// If the optional flag is set, the locator will not return an error if the file is not found.
func WithOptional(optional bool) Option {
- return func(f *file) {
+ return func(f *builder) {
f.isOptional = optional
}
}
-// NewFileLocator creates a Locator that can be used to find files with the specified options.
-func NewFileLocator(opts ...Option) Locator {
- return newFileLocator(opts...)
-}
-
-func newFileLocator(opts ...Option) *file {
- f := &file{}
+func newBuilder(opts ...Option) *builder {
+ o := &builder{}
for _, opt := range opts {
- opt(f)
+ opt(o)
+ }
+ if o.logger == nil {
+ o.logger = logger.New()
}
- if f.logger == nil {
- f.logger = logger.New()
+ if o.filter == nil {
+ o.filter = assertFile
}
- if f.filter == nil {
- f.filter = assertFile
+ return o
+}
+
+func (o builder) build() *file {
+ f := file{
+ builder: o,
+ // Since the `Locate` implementations rely on the root already being specified we update
+ // the prefixes to include the root.
+ prefixes: getSearchPrefixes(o.root, o.searchPaths...),
}
- // Since the `Locate` implementations rely on the root already being specified we update
- // the prefixes to include the root.
- f.prefixes = getSearchPrefixes(f.root, f.prefixes...)
+ return &f
+}
+
+// NewFileLocator creates a Locator that can be used to find files with the specified builder.
+func NewFileLocator(opts ...Option) Locator {
+ return newFileLocator(opts...)
+}
- return f
+func newFileLocator(opts ...Option) *file {
+ return newBuilder(opts...).build()
}
// getSearchPrefixes generates a list of unique paths to be searched by a file locator.
@@ -144,6 +160,7 @@ var _ Locator = (*file)(nil)
func (p file) Locate(pattern string) ([]string, error) {
var filenames []string
+ p.logger.Debugf("Locating %q in %v", pattern, p.prefixes)
visit:
for _, prefix := range p.prefixes {
pathPattern := filepath.Join(prefix, pattern)
@@ -168,7 +185,7 @@ visit:
}
if !p.isOptional && len(filenames) == 0 {
- return nil, fmt.Errorf("pattern %v not found", pattern)
+ return nil, fmt.Errorf("pattern %v %w", pattern, ErrNotFound)
}
return filenames, nil
}
diff --git a/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/ldcache.go b/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/ldcache.go
new file mode 100644
index 000000000..677dafaa6
--- /dev/null
+++ b/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/ldcache.go
@@ -0,0 +1,118 @@
+/**
+# Copyright 2024 NVIDIA CORPORATION
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package lookup
+
+import (
+ "fmt"
+ "path/filepath"
+ "slices"
+
+ "github.com/NVIDIA/nvidia-container-toolkit/internal/ldcache"
+)
+
+type ldcacheLocator struct {
+ *builder
+ resolvesTo map[string]string
+}
+
+var _ Locator = (*ldcacheLocator)(nil)
+
+func NewLdcacheLocator(opts ...Option) Locator {
+ b := newBuilder(opts...)
+
+ cache, err := ldcache.New(b.logger, b.root)
+ if err != nil {
+ b.logger.Warningf("Failed to load ldcache: %v", err)
+ if b.isOptional {
+ return &null{}
+ }
+ return ¬Found{}
+ }
+
+ chain := NewSymlinkChainLocator(WithOptional(true))
+
+ resolvesTo := make(map[string]string)
+ _, libs64 := cache.List()
+ for _, library := range libs64 {
+ if _, processed := resolvesTo[library]; processed {
+ continue
+ }
+ candidates, err := chain.Locate(library)
+ if err != nil {
+ b.logger.Errorf("error processing library %s from ldcache: %v", library, err)
+ continue
+ }
+
+ if len(candidates) == 0 {
+ resolvesTo[library] = library
+ continue
+ }
+
+ // candidates represents a symlink chain.
+ // The first element represents the start of the chain and the last
+ // element the final target.
+ target := candidates[len(candidates)-1]
+ for _, candidate := range candidates {
+ resolvesTo[candidate] = target
+ }
+ }
+
+ return &ldcacheLocator{
+ builder: b,
+ resolvesTo: resolvesTo,
+ }
+}
+
+// Locate finds the specified libraryname.
+// If the input is a library name, the ldcache is searched otherwise the
+// provided path is resolved as a symlink.
+func (l ldcacheLocator) Locate(libname string) ([]string, error) {
+ var matcher func(string, string) bool
+
+ if filepath.IsAbs(libname) {
+ matcher = func(p string, c string) bool {
+ m, _ := filepath.Match(filepath.Join(l.root, p), c)
+ return m
+ }
+ } else {
+ matcher = func(p string, c string) bool {
+ m, _ := filepath.Match(p, filepath.Base(c))
+ return m
+ }
+ }
+
+ var matches []string
+ seen := make(map[string]bool)
+ for name, target := range l.resolvesTo {
+ if !matcher(libname, name) {
+ continue
+ }
+ if seen[target] {
+ continue
+ }
+ seen[target] = true
+ matches = append(matches, target)
+ }
+
+ slices.Sort(matches)
+
+ if len(matches) == 0 && !l.isOptional {
+ return nil, fmt.Errorf("%s: %w", libname, ErrNotFound)
+ }
+
+ return matches, nil
+}
diff --git a/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/library.go b/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/library.go
index 0b5b7937b..6c403d084 100644
--- a/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/library.go
+++ b/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/library.go
@@ -16,54 +16,40 @@
package lookup
-import (
- "fmt"
- "strings"
-
- "github.com/NVIDIA/nvidia-container-toolkit/internal/ldcache"
- "github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
-)
-
-type library struct {
- logger logger.Interface
- symlink Locator
- cache ldcache.LDCache
-}
-
-var _ Locator = (*library)(nil)
-
-// NewLibraryLocator creates a library locator using the specified logger.
-func NewLibraryLocator(logger logger.Interface, root string) (Locator, error) {
- cache, err := ldcache.New(logger, root)
- if err != nil {
- return nil, fmt.Errorf("error loading ldcache: %v", err)
- }
-
- l := library{
- logger: logger,
- symlink: NewSymlinkLocator(WithLogger(logger), WithRoot(root)),
- cache: cache,
- }
-
- return &l, nil
-}
-
-// Locate finds the specified libraryname.
-// If the input is a library name, the ldcache is searched otherwise the
-// provided path is resolved as a symlink.
-func (l library) Locate(libname string) ([]string, error) {
- if strings.Contains(libname, "/") {
- return l.symlink.Locate(libname)
- }
-
- paths32, paths64 := l.cache.Lookup(libname)
- if len(paths32) > 0 {
- l.logger.Warningf("Ignoring 32-bit libraries for %v: %v", libname, paths32)
- }
-
- if len(paths64) == 0 {
- return nil, fmt.Errorf("64-bit library %v not found", libname)
+// NewLibraryLocator creates a library locator using the specified options.
+func NewLibraryLocator(opts ...Option) Locator {
+ b := newBuilder(opts...)
+
+ // If search paths are already specified, we return a locator for the specified search paths.
+ if len(b.searchPaths) > 0 {
+ return NewSymlinkLocator(
+ WithLogger(b.logger),
+ WithSearchPaths(b.searchPaths...),
+ WithRoot("/"),
+ )
}
- return paths64, nil
+ opts = append(opts,
+ WithSearchPaths([]string{
+ "/",
+ "/usr/lib64",
+ "/usr/lib/x86_64-linux-gnu",
+ "/usr/lib/aarch64-linux-gnu",
+ "/usr/lib/x86_64-linux-gnu/nvidia/current",
+ "/usr/lib/aarch64-linux-gnu/nvidia/current",
+ "/lib64",
+ "/lib/x86_64-linux-gnu",
+ "/lib/aarch64-linux-gnu",
+ "/lib/x86_64-linux-gnu/nvidia/current",
+ "/lib/aarch64-linux-gnu/nvidia/current",
+ }...),
+ )
+ // We construct a symlink locator for expected library locations.
+ symlinkLocator := NewSymlinkLocator(opts...)
+
+ l := First(
+ symlinkLocator,
+ NewLdcacheLocator(opts...),
+ )
+ return l
}
diff --git a/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/locator.go b/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/locator.go
index 871e1b025..73ade2322 100644
--- a/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/locator.go
+++ b/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/locator.go
@@ -16,9 +16,14 @@
package lookup
+import "errors"
+
//go:generate moq -stub -out locator_mock.go . Locator
// Locator defines the interface for locating files on a system.
type Locator interface {
Locate(string) ([]string, error)
}
+
+// ErrNotFound indicates that a specified pattern or file could not be found.
+var ErrNotFound = errors.New("not found")
diff --git a/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/merge.go b/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/merge.go
new file mode 100644
index 000000000..fa20b5125
--- /dev/null
+++ b/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/merge.go
@@ -0,0 +1,53 @@
+/**
+# Copyright 2023 NVIDIA CORPORATION
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package lookup
+
+import "errors"
+
+type first []Locator
+
+// First returns a locator that returns the first non-empty match
+func First(locators ...Locator) Locator {
+ var f first
+ for _, l := range locators {
+ if l == nil {
+ continue
+ }
+ f = append(f, l)
+ }
+ return f
+}
+
+// Locate returns the results for the first locator that returns a non-empty non-error result.
+func (f first) Locate(pattern string) ([]string, error) {
+ var allErrors []error
+ for _, l := range f {
+ if l == nil {
+ continue
+ }
+ candidates, err := l.Locate(pattern)
+ if err != nil {
+ allErrors = append(allErrors, err)
+ continue
+ }
+ if len(candidates) > 0 {
+ return candidates, nil
+ }
+ }
+
+ return nil, errors.Join(allErrors...)
+}
diff --git a/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/null.go b/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/null.go
new file mode 100644
index 000000000..938e481b4
--- /dev/null
+++ b/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/null.go
@@ -0,0 +1,36 @@
+/**
+# Copyright 2024 NVIDIA CORPORATION
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+**/
+
+package lookup
+
+import "fmt"
+
+// A null locator always returns an empty response.
+type null struct {
+}
+
+// Locate always returns empty for a null locator.
+func (l *null) Locate(string) ([]string, error) {
+ return nil, nil
+}
+
+// A notFound locator always returns an ErrNotFound error.
+type notFound struct {
+}
+
+func (l *notFound) Locate(s string) ([]string, error) {
+ return nil, fmt.Errorf("%s: %w", s, ErrNotFound)
+}
diff --git a/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/symlinks.go b/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/symlinks.go
index 002783cbe..c9bab069d 100644
--- a/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/symlinks.go
+++ b/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/symlinks.go
@@ -62,6 +62,7 @@ func (p symlinkChain) Locate(pattern string) ([]string, error) {
return candidates, nil
}
+ var filenames []string
found := make(map[string]bool)
for len(candidates) > 0 {
candidate := candidates[0]
@@ -70,6 +71,7 @@ func (p symlinkChain) Locate(pattern string) ([]string, error) {
continue
}
found[candidate] = true
+ filenames = append(filenames, candidate)
target, err := symlinks.Resolve(candidate)
if err != nil {
@@ -88,11 +90,6 @@ func (p symlinkChain) Locate(pattern string) ([]string, error) {
candidates = append(candidates, target)
}
}
-
- var filenames []string
- for f := range found {
- filenames = append(filenames, f)
- }
return filenames, nil
}
@@ -103,14 +100,19 @@ func (p symlink) Locate(pattern string) ([]string, error) {
if err != nil {
return nil, err
}
- if len(candidates) != 1 {
- return nil, fmt.Errorf("failed to uniquely resolve symlink %v: %v", pattern, candidates)
- }
- target, err := filepath.EvalSymlinks(candidates[0])
- if err != nil {
- return nil, fmt.Errorf("failed to resolve link: %v", err)
+ var targets []string
+ seen := make(map[string]bool)
+ for _, candidate := range candidates {
+ target, err := filepath.EvalSymlinks(candidate)
+ if err != nil {
+ return nil, fmt.Errorf("failed to resolve link: %w", err)
+ }
+ if seen[target] {
+ continue
+ }
+ seen[target] = true
+ targets = append(targets, target)
}
-
- return []string{target}, err
+ return targets, err
}
diff --git a/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/symlinks/symlink.go b/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/symlinks/symlink.go
index 991d47cb6..f9151a2f2 100644
--- a/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/symlinks/symlink.go
+++ b/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/symlinks/symlink.go
@@ -25,7 +25,7 @@ import (
func Resolve(filename string) (string, error) {
info, err := os.Lstat(filename)
if err != nil {
- return filename, fmt.Errorf("failed to get file info: %v", info)
+ return filename, fmt.Errorf("failed to get file info: %w", err)
}
if info.Mode()&os.ModeSymlink == 0 {
return filename, nil
@@ -33,3 +33,18 @@ func Resolve(filename string) (string, error) {
return os.Readlink(filename)
}
+
+// ForceCreate creates a specified symlink.
+// If a file (or empty directory) exists at the path it is removed.
+func ForceCreate(target string, link string) error {
+ _, err := os.Lstat(link)
+ if err != nil && !os.IsNotExist(err) {
+ return fmt.Errorf("failed to get file info: %w", err)
+ }
+ if !os.IsNotExist(err) {
+ if err := os.Remove(link); err != nil {
+ return fmt.Errorf("failed to remove existing file: %w", err)
+ }
+ }
+ return os.Symlink(target, link)
+}
diff --git a/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/system/nvdevices/mknod.go b/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/system/nvdevices/mknod.go
index e5990ea01..88a7aa441 100644
--- a/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/system/nvdevices/mknod.go
+++ b/vendor/github.com/NVIDIA/nvidia-container-toolkit/internal/system/nvdevices/mknod.go
@@ -17,8 +17,9 @@
package nvdevices
import (
- "github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"golang.org/x/sys/unix"
+
+ "github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
)
//go:generate moq -stub -out mknod_mock.go . mknoder
diff --git a/vendor/github.com/cespare/xxhash/v2/README.md b/vendor/github.com/cespare/xxhash/v2/README.md
index 8bf0e5b78..33c88305c 100644
--- a/vendor/github.com/cespare/xxhash/v2/README.md
+++ b/vendor/github.com/cespare/xxhash/v2/README.md
@@ -70,3 +70,5 @@ benchstat <(go test -benchtime 500ms -count 15 -bench 'Sum64$')
- [VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics)
- [FreeCache](https://github.com/coocood/freecache)
- [FastCache](https://github.com/VictoriaMetrics/fastcache)
+- [Ristretto](https://github.com/dgraph-io/ristretto)
+- [Badger](https://github.com/dgraph-io/badger)
diff --git a/vendor/github.com/cespare/xxhash/v2/xxhash.go b/vendor/github.com/cespare/xxhash/v2/xxhash.go
index a9e0d45c9..78bddf1ce 100644
--- a/vendor/github.com/cespare/xxhash/v2/xxhash.go
+++ b/vendor/github.com/cespare/xxhash/v2/xxhash.go
@@ -19,10 +19,13 @@ const (
// Store the primes in an array as well.
//
// The consts are used when possible in Go code to avoid MOVs but we need a
-// contiguous array of the assembly code.
+// contiguous array for the assembly code.
var primes = [...]uint64{prime1, prime2, prime3, prime4, prime5}
// Digest implements hash.Hash64.
+//
+// Note that a zero-valued Digest is not ready to receive writes.
+// Call Reset or create a Digest using New before calling other methods.
type Digest struct {
v1 uint64
v2 uint64
@@ -33,19 +36,31 @@ type Digest struct {
n int // how much of mem is used
}
-// New creates a new Digest that computes the 64-bit xxHash algorithm.
+// New creates a new Digest with a zero seed.
func New() *Digest {
+ return NewWithSeed(0)
+}
+
+// NewWithSeed creates a new Digest with the given seed.
+func NewWithSeed(seed uint64) *Digest {
var d Digest
- d.Reset()
+ d.ResetWithSeed(seed)
return &d
}
// Reset clears the Digest's state so that it can be reused.
+// It uses a seed value of zero.
func (d *Digest) Reset() {
- d.v1 = primes[0] + prime2
- d.v2 = prime2
- d.v3 = 0
- d.v4 = -primes[0]
+ d.ResetWithSeed(0)
+}
+
+// ResetWithSeed clears the Digest's state so that it can be reused.
+// It uses the given seed to initialize the state.
+func (d *Digest) ResetWithSeed(seed uint64) {
+ d.v1 = seed + prime1 + prime2
+ d.v2 = seed + prime2
+ d.v3 = seed
+ d.v4 = seed - prime1
d.total = 0
d.n = 0
}
diff --git a/vendor/github.com/cespare/xxhash/v2/xxhash_asm.go b/vendor/github.com/cespare/xxhash/v2/xxhash_asm.go
index 9216e0a40..78f95f256 100644
--- a/vendor/github.com/cespare/xxhash/v2/xxhash_asm.go
+++ b/vendor/github.com/cespare/xxhash/v2/xxhash_asm.go
@@ -6,7 +6,7 @@
package xxhash
-// Sum64 computes the 64-bit xxHash digest of b.
+// Sum64 computes the 64-bit xxHash digest of b with a zero seed.
//
//go:noescape
func Sum64(b []byte) uint64
diff --git a/vendor/github.com/cespare/xxhash/v2/xxhash_other.go b/vendor/github.com/cespare/xxhash/v2/xxhash_other.go
index 26df13bba..118e49e81 100644
--- a/vendor/github.com/cespare/xxhash/v2/xxhash_other.go
+++ b/vendor/github.com/cespare/xxhash/v2/xxhash_other.go
@@ -3,7 +3,7 @@
package xxhash
-// Sum64 computes the 64-bit xxHash digest of b.
+// Sum64 computes the 64-bit xxHash digest of b with a zero seed.
func Sum64(b []byte) uint64 {
// A simpler version would be
// d := New()
diff --git a/vendor/github.com/cespare/xxhash/v2/xxhash_safe.go b/vendor/github.com/cespare/xxhash/v2/xxhash_safe.go
index e86f1b5fd..05f5e7dfe 100644
--- a/vendor/github.com/cespare/xxhash/v2/xxhash_safe.go
+++ b/vendor/github.com/cespare/xxhash/v2/xxhash_safe.go
@@ -5,7 +5,7 @@
package xxhash
-// Sum64String computes the 64-bit xxHash digest of s.
+// Sum64String computes the 64-bit xxHash digest of s with a zero seed.
func Sum64String(s string) uint64 {
return Sum64([]byte(s))
}
diff --git a/vendor/github.com/cespare/xxhash/v2/xxhash_unsafe.go b/vendor/github.com/cespare/xxhash/v2/xxhash_unsafe.go
index 1c1638fd8..cf9d42aed 100644
--- a/vendor/github.com/cespare/xxhash/v2/xxhash_unsafe.go
+++ b/vendor/github.com/cespare/xxhash/v2/xxhash_unsafe.go
@@ -33,7 +33,7 @@ import (
//
// See https://github.com/golang/go/issues/42739 for discussion.
-// Sum64String computes the 64-bit xxHash digest of s.
+// Sum64String computes the 64-bit xxHash digest of s with a zero seed.
// It may be faster than Sum64([]byte(s)) by avoiding a copy.
func Sum64String(s string) uint64 {
b := *(*[]byte)(unsafe.Pointer(&sliceHeader{s, len(s)}))
diff --git a/vendor/github.com/containerd/containerd/archive/compression/compression.go b/vendor/github.com/containerd/containerd/archive/compression/compression.go
index ceceb21f5..31bbe4124 100644
--- a/vendor/github.com/containerd/containerd/archive/compression/compression.go
+++ b/vendor/github.com/containerd/containerd/archive/compression/compression.go
@@ -25,12 +25,12 @@ import (
"fmt"
"io"
"os"
+ "os/exec"
"strconv"
"sync"
"github.com/containerd/containerd/log"
"github.com/klauspost/compress/zstd"
- exec "golang.org/x/sys/execabs"
)
type (
diff --git a/vendor/github.com/containerd/containerd/content/helpers.go b/vendor/github.com/containerd/containerd/content/helpers.go
index 5404109a6..147005413 100644
--- a/vendor/github.com/containerd/containerd/content/helpers.go
+++ b/vendor/github.com/containerd/containerd/content/helpers.go
@@ -332,3 +332,14 @@ func copyWithBuffer(dst io.Writer, src io.Reader) (written int64, err error) {
}
return
}
+
+// Exists returns whether an attempt to access the content would not error out
+// with an ErrNotFound error. It will return an encountered error if it was
+// different than ErrNotFound.
+func Exists(ctx context.Context, provider InfoProvider, desc ocispec.Descriptor) (bool, error) {
+ _, err := provider.Info(ctx, desc.Digest)
+ if errdefs.IsNotFound(err) {
+ return false, nil
+ }
+ return err == nil, err
+}
diff --git a/vendor/github.com/containerd/containerd/version/version.go b/vendor/github.com/containerd/containerd/version/version.go
index 45767163c..c01bc57e8 100644
--- a/vendor/github.com/containerd/containerd/version/version.go
+++ b/vendor/github.com/containerd/containerd/version/version.go
@@ -23,7 +23,7 @@ var (
Package = "github.com/containerd/containerd"
// Version holds the complete version number. Filled in at linking time.
- Version = "1.7.11+unknown"
+ Version = "1.7.12+unknown"
// Revision is filled with the VCS (e.g. git) revision being used to build
// the program at linking time.
diff --git a/vendor/github.com/cpuguy83/go-md2man/v2/md2man/debug.go b/vendor/github.com/cpuguy83/go-md2man/v2/md2man/debug.go
new file mode 100644
index 000000000..0ec4b12c7
--- /dev/null
+++ b/vendor/github.com/cpuguy83/go-md2man/v2/md2man/debug.go
@@ -0,0 +1,62 @@
+package md2man
+
+import (
+ "fmt"
+ "io"
+ "os"
+ "strings"
+
+ "github.com/russross/blackfriday/v2"
+)
+
+func fmtListFlags(flags blackfriday.ListType) string {
+ knownFlags := []struct {
+ name string
+ flag blackfriday.ListType
+ }{
+ {"ListTypeOrdered", blackfriday.ListTypeOrdered},
+ {"ListTypeDefinition", blackfriday.ListTypeDefinition},
+ {"ListTypeTerm", blackfriday.ListTypeTerm},
+ {"ListItemContainsBlock", blackfriday.ListItemContainsBlock},
+ {"ListItemBeginningOfList", blackfriday.ListItemBeginningOfList},
+ {"ListItemEndOfList", blackfriday.ListItemEndOfList},
+ }
+
+ var f []string
+ for _, kf := range knownFlags {
+ if flags&kf.flag != 0 {
+ f = append(f, kf.name)
+ flags &^= kf.flag
+ }
+ }
+ if flags != 0 {
+ f = append(f, fmt.Sprintf("Unknown(%#x)", flags))
+ }
+ return strings.Join(f, "|")
+}
+
+type debugDecorator struct {
+ blackfriday.Renderer
+}
+
+func depth(node *blackfriday.Node) int {
+ d := 0
+ for n := node.Parent; n != nil; n = n.Parent {
+ d++
+ }
+ return d
+}
+
+func (d *debugDecorator) RenderNode(w io.Writer, node *blackfriday.Node, entering bool) blackfriday.WalkStatus {
+ fmt.Fprintf(os.Stderr, "%s%s %v %v\n",
+ strings.Repeat(" ", depth(node)),
+ map[bool]string{true: "+", false: "-"}[entering],
+ node,
+ fmtListFlags(node.ListFlags))
+ var b strings.Builder
+ status := d.Renderer.RenderNode(io.MultiWriter(&b, w), node, entering)
+ if b.Len() > 0 {
+ fmt.Fprintf(os.Stderr, ">> %q\n", b.String())
+ }
+ return status
+}
diff --git a/vendor/github.com/cpuguy83/go-md2man/v2/md2man/md2man.go b/vendor/github.com/cpuguy83/go-md2man/v2/md2man/md2man.go
index 42bf32aab..62d91b77d 100644
--- a/vendor/github.com/cpuguy83/go-md2man/v2/md2man/md2man.go
+++ b/vendor/github.com/cpuguy83/go-md2man/v2/md2man/md2man.go
@@ -1,16 +1,23 @@
package md2man
import (
+ "os"
+ "strconv"
+
"github.com/russross/blackfriday/v2"
)
// Render converts a markdown document into a roff formatted document.
func Render(doc []byte) []byte {
renderer := NewRoffRenderer()
+ var r blackfriday.Renderer = renderer
+ if v, _ := strconv.ParseBool(os.Getenv("MD2MAN_DEBUG")); v {
+ r = &debugDecorator{Renderer: r}
+ }
return blackfriday.Run(doc,
[]blackfriday.Option{
- blackfriday.WithRenderer(renderer),
+ blackfriday.WithRenderer(r),
blackfriday.WithExtensions(renderer.GetExtensions()),
}...)
}
diff --git a/vendor/github.com/cpuguy83/go-md2man/v2/md2man/roff.go b/vendor/github.com/cpuguy83/go-md2man/v2/md2man/roff.go
index 4b19188d9..9d6c473fd 100644
--- a/vendor/github.com/cpuguy83/go-md2man/v2/md2man/roff.go
+++ b/vendor/github.com/cpuguy83/go-md2man/v2/md2man/roff.go
@@ -1,6 +1,7 @@
package md2man
import (
+ "bufio"
"bytes"
"fmt"
"io"
@@ -13,68 +14,72 @@ import (
// roffRenderer implements the blackfriday.Renderer interface for creating
// roff format (manpages) from markdown text
type roffRenderer struct {
- extensions blackfriday.Extensions
listCounters []int
firstHeader bool
- firstDD bool
listDepth int
}
const (
- titleHeader = ".TH "
- topLevelHeader = "\n\n.SH "
- secondLevelHdr = "\n.SH "
- otherHeader = "\n.SS "
- crTag = "\n"
- emphTag = "\\fI"
- emphCloseTag = "\\fP"
- strongTag = "\\fB"
- strongCloseTag = "\\fP"
- breakTag = "\n.br\n"
- paraTag = "\n.PP\n"
- hruleTag = "\n.ti 0\n\\l'\\n(.lu'\n"
- linkTag = "\n\\[la]"
- linkCloseTag = "\\[ra]"
- codespanTag = "\\fB"
- codespanCloseTag = "\\fR"
- codeTag = "\n.EX\n"
- codeCloseTag = "\n.EE\n"
- quoteTag = "\n.PP\n.RS\n"
- quoteCloseTag = "\n.RE\n"
- listTag = "\n.RS\n"
- listCloseTag = "\n.RE\n"
- dtTag = "\n.TP\n"
- dd2Tag = "\n"
- tableStart = "\n.TS\nallbox;\n"
- tableEnd = ".TE\n"
- tableCellStart = "T{\n"
- tableCellEnd = "\nT}\n"
+ titleHeader = ".TH "
+ topLevelHeader = "\n\n.SH "
+ secondLevelHdr = "\n.SH "
+ otherHeader = "\n.SS "
+ crTag = "\n"
+ emphTag = "\\fI"
+ emphCloseTag = "\\fP"
+ strongTag = "\\fB"
+ strongCloseTag = "\\fP"
+ breakTag = "\n.br\n"
+ paraTag = "\n.PP\n"
+ hruleTag = "\n.ti 0\n\\l'\\n(.lu'\n"
+ linkTag = "\n\\[la]"
+ linkCloseTag = "\\[ra]"
+ codespanTag = "\\fB"
+ codespanCloseTag = "\\fR"
+ codeTag = "\n.EX\n"
+ codeCloseTag = ".EE\n" // Do not prepend a newline character since code blocks, by definition, include a newline already (or at least as how blackfriday gives us on).
+ quoteTag = "\n.PP\n.RS\n"
+ quoteCloseTag = "\n.RE\n"
+ listTag = "\n.RS\n"
+ listCloseTag = ".RE\n"
+ dtTag = "\n.TP\n"
+ dd2Tag = "\n"
+ tableStart = "\n.TS\nallbox;\n"
+ tableEnd = ".TE\n"
+ tableCellStart = "T{\n"
+ tableCellEnd = "\nT}\n"
+ tablePreprocessor = `'\" t`
)
// NewRoffRenderer creates a new blackfriday Renderer for generating roff documents
// from markdown
func NewRoffRenderer() *roffRenderer { // nolint: golint
- var extensions blackfriday.Extensions
-
- extensions |= blackfriday.NoIntraEmphasis
- extensions |= blackfriday.Tables
- extensions |= blackfriday.FencedCode
- extensions |= blackfriday.SpaceHeadings
- extensions |= blackfriday.Footnotes
- extensions |= blackfriday.Titleblock
- extensions |= blackfriday.DefinitionLists
- return &roffRenderer{
- extensions: extensions,
- }
+ return &roffRenderer{}
}
// GetExtensions returns the list of extensions used by this renderer implementation
-func (r *roffRenderer) GetExtensions() blackfriday.Extensions {
- return r.extensions
+func (*roffRenderer) GetExtensions() blackfriday.Extensions {
+ return blackfriday.NoIntraEmphasis |
+ blackfriday.Tables |
+ blackfriday.FencedCode |
+ blackfriday.SpaceHeadings |
+ blackfriday.Footnotes |
+ blackfriday.Titleblock |
+ blackfriday.DefinitionLists
}
// RenderHeader handles outputting the header at document start
func (r *roffRenderer) RenderHeader(w io.Writer, ast *blackfriday.Node) {
+ // We need to walk the tree to check if there are any tables.
+ // If there are, we need to enable the roff table preprocessor.
+ ast.Walk(func(node *blackfriday.Node, entering bool) blackfriday.WalkStatus {
+ if node.Type == blackfriday.Table {
+ out(w, tablePreprocessor+"\n")
+ return blackfriday.Terminate
+ }
+ return blackfriday.GoToNext
+ })
+
// disable hyphenation
out(w, ".nh\n")
}
@@ -91,7 +96,23 @@ func (r *roffRenderer) RenderNode(w io.Writer, node *blackfriday.Node, entering
switch node.Type {
case blackfriday.Text:
- escapeSpecialChars(w, node.Literal)
+ // Special case: format the NAME section as required for proper whatis parsing.
+ // Refer to the lexgrog(1) and groff_man(7) manual pages for details.
+ if node.Parent != nil &&
+ node.Parent.Type == blackfriday.Paragraph &&
+ node.Parent.Prev != nil &&
+ node.Parent.Prev.Type == blackfriday.Heading &&
+ node.Parent.Prev.FirstChild != nil &&
+ bytes.EqualFold(node.Parent.Prev.FirstChild.Literal, []byte("NAME")) {
+ before, after, found := bytes.Cut(node.Literal, []byte(" - "))
+ escapeSpecialChars(w, before)
+ if found {
+ out(w, ` \- `)
+ escapeSpecialChars(w, after)
+ }
+ } else {
+ escapeSpecialChars(w, node.Literal)
+ }
case blackfriday.Softbreak:
out(w, crTag)
case blackfriday.Hardbreak:
@@ -129,14 +150,25 @@ func (r *roffRenderer) RenderNode(w io.Writer, node *blackfriday.Node, entering
case blackfriday.Document:
break
case blackfriday.Paragraph:
- // roff .PP markers break lists
- if r.listDepth > 0 {
- return blackfriday.GoToNext
- }
if entering {
- out(w, paraTag)
+ if r.listDepth > 0 {
+ // roff .PP markers break lists
+ if node.Prev != nil { // continued paragraph
+ if node.Prev.Type == blackfriday.List && node.Prev.ListFlags&blackfriday.ListTypeDefinition == 0 {
+ out(w, ".IP\n")
+ } else {
+ out(w, crTag)
+ }
+ }
+ } else if node.Prev != nil && node.Prev.Type == blackfriday.Heading {
+ out(w, crTag)
+ } else {
+ out(w, paraTag)
+ }
} else {
- out(w, crTag)
+ if node.Next == nil || node.Next.Type != blackfriday.List {
+ out(w, crTag)
+ }
}
case blackfriday.BlockQuote:
if entering {
@@ -199,6 +231,10 @@ func (r *roffRenderer) handleHeading(w io.Writer, node *blackfriday.Node, enteri
func (r *roffRenderer) handleList(w io.Writer, node *blackfriday.Node, entering bool) {
openTag := listTag
closeTag := listCloseTag
+ if (entering && r.listDepth == 0) || (!entering && r.listDepth == 1) {
+ openTag = crTag
+ closeTag = ""
+ }
if node.ListFlags&blackfriday.ListTypeDefinition != 0 {
// tags for definition lists handled within Item node
openTag = ""
@@ -227,23 +263,25 @@ func (r *roffRenderer) handleItem(w io.Writer, node *blackfriday.Node, entering
} else if node.ListFlags&blackfriday.ListTypeTerm != 0 {
// DT (definition term): line just before DD (see below).
out(w, dtTag)
- r.firstDD = true
} else if node.ListFlags&blackfriday.ListTypeDefinition != 0 {
// DD (definition description): line that starts with ": ".
//
// We have to distinguish between the first DD and the
// subsequent ones, as there should be no vertical
// whitespace between the DT and the first DD.
- if r.firstDD {
- r.firstDD = false
- } else {
- out(w, dd2Tag)
+ if node.Prev != nil && node.Prev.ListFlags&(blackfriday.ListTypeTerm|blackfriday.ListTypeDefinition) == blackfriday.ListTypeDefinition {
+ if node.Prev.Type == blackfriday.Item &&
+ node.Prev.LastChild != nil &&
+ node.Prev.LastChild.Type == blackfriday.List &&
+ node.Prev.LastChild.ListFlags&blackfriday.ListTypeDefinition == 0 {
+ out(w, ".IP\n")
+ } else {
+ out(w, dd2Tag)
+ }
}
} else {
out(w, ".IP \\(bu 2\n")
}
- } else {
- out(w, "\n")
}
}
@@ -322,6 +360,28 @@ func out(w io.Writer, output string) {
}
func escapeSpecialChars(w io.Writer, text []byte) {
+ scanner := bufio.NewScanner(bytes.NewReader(text))
+
+ // count the number of lines in the text
+ // we need to know this to avoid adding a newline after the last line
+ n := bytes.Count(text, []byte{'\n'})
+ idx := 0
+
+ for scanner.Scan() {
+ dt := scanner.Bytes()
+ if idx < n {
+ idx++
+ dt = append(dt, '\n')
+ }
+ escapeSpecialCharsLine(w, dt)
+ }
+
+ if err := scanner.Err(); err != nil {
+ panic(err)
+ }
+}
+
+func escapeSpecialCharsLine(w io.Writer, text []byte) {
for i := 0; i < len(text); i++ {
// escape initial apostrophe or period
if len(text) >= 1 && (text[0] == '\'' || text[0] == '.') {
diff --git a/vendor/github.com/cyphar/filepath-securejoin/CHANGELOG.md b/vendor/github.com/cyphar/filepath-securejoin/CHANGELOG.md
new file mode 100644
index 000000000..7436896e1
--- /dev/null
+++ b/vendor/github.com/cyphar/filepath-securejoin/CHANGELOG.md
@@ -0,0 +1,138 @@
+# Changelog #
+All notable changes to this project will be documented in this file.
+
+The format is based on [Keep a Changelog](http://keepachangelog.com/)
+and this project adheres to [Semantic Versioning](http://semver.org/).
+
+## [Unreleased] ##
+
+## [0.3.1] - 2024-07-23 ##
+
+### Changed ###
+- By allowing `Open(at)InRoot` to opt-out of the extra work done by `MkdirAll`
+ to do the necessary "partial lookups", `Open(at)InRoot` now does less work
+ for both implementations (resulting in a many-fold decrease in the number of
+ operations for `openat2`, and a modest improvement for non-`openat2`) and is
+ far more guaranteed to match the correct `openat2(RESOLVE_IN_ROOT)`
+ behaviour.
+- We now use `readlinkat(fd, "")` where possible. For `Open(at)InRoot` this
+ effectively just means that we no longer risk getting spurious errors during
+ rename races. However, for our hardened procfs handler, this in theory should
+ prevent mount attacks from tricking us when doing magic-link readlinks (even
+ when using the unsafe host `/proc` handle). Unfortunately `Reopen` is still
+ potentially vulnerable to those kinds of somewhat-esoteric attacks.
+
+ Technically this [will only work on post-2.6.39 kernels][linux-readlinkat-emptypath]
+ but it seems incredibly unlikely anyone is using `filepath-securejoin` on a
+ pre-2011 kernel.
+
+### Fixed ###
+- Several improvements were made to the errors returned by `Open(at)InRoot` and
+ `MkdirAll` when dealing with invalid paths under the emulated (ie.
+ non-`openat2`) implementation. Previously, some paths would return the wrong
+ error (`ENOENT` when the last component was a non-directory), and other paths
+ would be returned as though they were acceptable (trailing-slash components
+ after a non-directory would be ignored by `Open(at)InRoot`).
+
+ These changes were done to match `openat2`'s behaviour and purely is a
+ consistency fix (most users are going to be using `openat2` anyway).
+
+[linux-readlinkat-emptypath]: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=65cfc6722361570bfe255698d9cd4dccaf47570d
+
+## [0.3.0] - 2024-07-11 ##
+
+### Added ###
+- A new set of `*os.File`-based APIs have been added. These are adapted from
+ [libpathrs][] and we strongly suggest using them if possible (as they provide
+ far more protection against attacks than `SecureJoin`):
+
+ - `Open(at)InRoot` resolves a path inside a rootfs and returns an `*os.File`
+ handle to the path. Note that the handle returned is an `O_PATH` handle,
+ which cannot be used for reading or writing (as well as some other
+ operations -- [see open(2) for more details][open.2])
+
+ - `Reopen` takes an `O_PATH` file handle and safely re-opens it to upgrade
+ it to a regular handle. This can also be used with non-`O_PATH` handles,
+ but `O_PATH` is the most obvious application.
+
+ - `MkdirAll` is an implementation of `os.MkdirAll` that is safe to use to
+ create a directory tree within a rootfs.
+
+ As these are new APIs, they may change in the future. However, they should be
+ safe to start migrating to as we have extensive tests ensuring they behave
+ correctly and are safe against various races and other attacks.
+
+[libpathrs]: https://github.com/openSUSE/libpathrs
+[open.2]: https://www.man7.org/linux/man-pages/man2/open.2.html
+
+## [0.2.5] - 2024-05-03 ##
+
+### Changed ###
+- Some minor changes were made to how lexical components (like `..` and `.`)
+ are handled during path generation in `SecureJoin`. There is no behaviour
+ change as a result of this fix (the resulting paths are the same).
+
+### Fixed ###
+- The error returned when we hit a symlink loop now references the correct
+ path. (#10)
+
+## [0.2.4] - 2023-09-06 ##
+
+### Security ###
+- This release fixes a potential security issue in filepath-securejoin when
+ used on Windows ([GHSA-6xv5-86q9-7xr8][], which could be used to generate
+ paths outside of the provided rootfs in certain cases), as well as improving
+ the overall behaviour of filepath-securejoin when dealing with Windows paths
+ that contain volume names. Thanks to Paulo Gomes for discovering and fixing
+ these issues.
+
+### Fixed ###
+- Switch to GitHub Actions for CI so we can test on Windows as well as Linux
+ and MacOS.
+
+[GHSA-6xv5-86q9-7xr8]: https://github.com/advisories/GHSA-6xv5-86q9-7xr8
+
+## [0.2.3] - 2021-06-04 ##
+
+### Changed ###
+- Switch to Go 1.13-style `%w` error wrapping, letting us drop the dependency
+ on `github.com/pkg/errors`.
+
+## [0.2.2] - 2018-09-05 ##
+
+### Changed ###
+- Use `syscall.ELOOP` as the base error for symlink loops, rather than our own
+ (internal) error. This allows callers to more easily use `errors.Is` to check
+ for this case.
+
+## [0.2.1] - 2018-09-05 ##
+
+### Fixed ###
+- Use our own `IsNotExist` implementation, which lets us handle `ENOTDIR`
+ properly within `SecureJoin`.
+
+## [0.2.0] - 2017-07-19 ##
+
+We now have 100% test coverage!
+
+### Added ###
+- Add a `SecureJoinVFS` API that can be used for mocking (as we do in our new
+ tests) or for implementing custom handling of lookup operations (such as for
+ rootless containers, where work is necessary to access directories with weird
+ modes because we don't have `CAP_DAC_READ_SEARCH` or `CAP_DAC_OVERRIDE`).
+
+## 0.1.0 - 2017-07-19
+
+This is our first release of `github.com/cyphar/filepath-securejoin`,
+containing a full implementation with a coverage of 93.5% (the only missing
+cases are the error cases, which are hard to mocktest at the moment).
+
+[Unreleased]: https://github.com/cyphar/filepath-securejoin/compare/v0.3.1...HEAD
+[0.3.1]: https://github.com/cyphar/filepath-securejoin/compare/v0.3.0...v0.3.1
+[0.3.0]: https://github.com/cyphar/filepath-securejoin/compare/v0.2.5...v0.3.0
+[0.2.5]: https://github.com/cyphar/filepath-securejoin/compare/v0.2.4...v0.2.5
+[0.2.4]: https://github.com/cyphar/filepath-securejoin/compare/v0.2.3...v0.2.4
+[0.2.3]: https://github.com/cyphar/filepath-securejoin/compare/v0.2.2...v0.2.3
+[0.2.2]: https://github.com/cyphar/filepath-securejoin/compare/v0.2.1...v0.2.2
+[0.2.1]: https://github.com/cyphar/filepath-securejoin/compare/v0.2.0...v0.2.1
+[0.2.0]: https://github.com/cyphar/filepath-securejoin/compare/v0.1.0...v0.2.0
diff --git a/vendor/github.com/cyphar/filepath-securejoin/LICENSE b/vendor/github.com/cyphar/filepath-securejoin/LICENSE
index bec842f29..cb1ab88da 100644
--- a/vendor/github.com/cyphar/filepath-securejoin/LICENSE
+++ b/vendor/github.com/cyphar/filepath-securejoin/LICENSE
@@ -1,5 +1,5 @@
Copyright (C) 2014-2015 Docker Inc & Go Authors. All rights reserved.
-Copyright (C) 2017 SUSE LLC. All rights reserved.
+Copyright (C) 2017-2024 SUSE LLC. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
diff --git a/vendor/github.com/cyphar/filepath-securejoin/README.md b/vendor/github.com/cyphar/filepath-securejoin/README.md
index 4eca0f235..253956f86 100644
--- a/vendor/github.com/cyphar/filepath-securejoin/README.md
+++ b/vendor/github.com/cyphar/filepath-securejoin/README.md
@@ -2,31 +2,24 @@
[](https://github.com/cyphar/filepath-securejoin/actions/workflows/ci.yml)
-An implementation of `SecureJoin`, a [candidate for inclusion in the Go
-standard library][go#20126]. The purpose of this function is to be a "secure"
-alternative to `filepath.Join`, and in particular it provides certain
-guarantees that are not provided by `filepath.Join`.
-
-> **NOTE**: This code is *only* safe if you are not at risk of other processes
-> modifying path components after you've used `SecureJoin`. If it is possible
-> for a malicious process to modify path components of the resolved path, then
-> you will be vulnerable to some fairly trivial TOCTOU race conditions. [There
-> are some Linux kernel patches I'm working on which might allow for a better
-> solution.][lwn-obeneath]
->
-> In addition, with a slightly modified API it might be possible to use
-> `O_PATH` and verify that the opened path is actually the resolved one -- but
-> I have not done that yet. I might add it in the future as a helper function
-> to help users verify the path (we can't just return `/proc/self/fd/`
-> because that doesn't always work transparently for all users).
-
-This is the function prototype:
+### Old API ###
-```go
-func SecureJoin(root, unsafePath string) (string, error)
-```
+This library was originally just an implementation of `SecureJoin` which was
+[intended to be included in the Go standard library][go#20126] as a safer
+`filepath.Join` that would restrict the path lookup to be inside a root
+directory.
+
+The implementation was based on code that existed in several container
+runtimes. Unfortunately, this API is **fundamentally unsafe** against attackers
+that can modify path components after `SecureJoin` returns and before the
+caller uses the path, allowing for some fairly trivial TOCTOU attacks.
+
+`SecureJoin` (and `SecureJoinVFS`) are still provided by this library to
+support legacy users, but new users are strongly suggested to avoid using
+`SecureJoin` and instead use the [new api](#new-api) or switch to
+[libpathrs][libpathrs].
-This library **guarantees** the following:
+With the above limitations in mind, this library guarantees the following:
* If no error is set, the resulting string **must** be a child path of
`root` and will not contain any symlink path components (they will all be
@@ -47,7 +40,7 @@ This library **guarantees** the following:
A (trivial) implementation of this function on GNU/Linux systems could be done
with the following (note that this requires root privileges and is far more
opaque than the implementation in this library, and also requires that
-`readlink` is inside the `root` path):
+`readlink` is inside the `root` path and is trustworthy):
```go
package securejoin
@@ -70,9 +63,105 @@ func SecureJoin(root, unsafePath string) (string, error) {
}
```
-[lwn-obeneath]: https://lwn.net/Articles/767547/
+[libpathrs]: https://github.com/openSUSE/libpathrs
[go#20126]: https://github.com/golang/go/issues/20126
+### New API ###
+
+While we recommend users switch to [libpathrs][libpathrs] as soon as it has a
+stable release, some methods implemented by libpathrs have been ported to this
+library to ease the transition. These APIs are only supported on Linux.
+
+These APIs are implemented such that `filepath-securejoin` will
+opportunistically use certain newer kernel APIs that make these operations far
+more secure. In particular:
+
+* All of the lookup operations will use [`openat2`][openat2.2] on new enough
+ kernels (Linux 5.6 or later) to restrict lookups through magic-links and
+ bind-mounts (for certain operations) and to make use of `RESOLVE_IN_ROOT` to
+ efficiently resolve symlinks within a rootfs.
+
+* The APIs provide hardening against a malicious `/proc` mount to either detect
+ or avoid being tricked by a `/proc` that is not legitimate. This is done
+ using [`openat2`][openat2.2] for all users, and privileged users will also be
+ further protected by using [`fsopen`][fsopen.2] and [`open_tree`][open_tree.2]
+ (Linux 4.18 or later).
+
+[openat2.2]: https://www.man7.org/linux/man-pages/man2/openat2.2.html
+[fsopen.2]: https://github.com/brauner/man-pages-md/blob/main/fsopen.md
+[open_tree.2]: https://github.com/brauner/man-pages-md/blob/main/open_tree.md
+
+#### `OpenInRoot` ####
+
+```go
+func OpenInRoot(root, unsafePath string) (*os.File, error)
+func OpenatInRoot(root *os.File, unsafePath string) (*os.File, error)
+func Reopen(handle *os.File, flags int) (*os.File, error)
+```
+
+`OpenInRoot` is a much safer version of
+
+```go
+path, err := securejoin.SecureJoin(root, unsafePath)
+file, err := os.OpenFile(path, unix.O_PATH|unix.O_CLOEXEC)
+```
+
+that protects against various race attacks that could lead to serious security
+issues, depending on the application. Note that the returned `*os.File` is an
+`O_PATH` file descriptor, which is quite restricted. Callers will probably need
+to use `Reopen` to get a more usable handle (this split is done to provide
+useful features like PTY spawning and to avoid users accidentally opening bad
+inodes that could cause a DoS).
+
+Callers need to be careful in how they use the returned `*os.File`. Usually it
+is only safe to operate on the handle directly, and it is very easy to create a
+security issue. [libpathrs][libpathrs] provides far more helpers to make using
+these handles safer -- there is currently no plan to port them to
+`filepath-securejoin`.
+
+`OpenatInRoot` is like `OpenInRoot` except that the root is provided using an
+`*os.File`. This allows you to ensure that multiple `OpenatInRoot` (or
+`MkdirAllHandle`) calls are operating on the same rootfs.
+
+> **NOTE**: Unlike `SecureJoin`, `OpenInRoot` will error out as soon as it hits
+> a dangling symlink or non-existent path. This is in contrast to `SecureJoin`
+> which treated non-existent components as though they were real directories,
+> and would allow for partial resolution of dangling symlinks. These behaviours
+> are at odds with how Linux treats non-existent paths and dangling symlinks,
+> and so these are no longer allowed.
+
+#### `MkdirAll` ####
+
+```go
+func MkdirAll(root, unsafePath string, mode int) error
+func MkdirAllHandle(root *os.File, unsafePath string, mode int) (*os.File, error)
+```
+
+`MkdirAll` is a much safer version of
+
+```go
+path, err := securejoin.SecureJoin(root, unsafePath)
+err = os.MkdirAll(path, mode)
+```
+
+that protects against the same kinds of races that `OpenInRoot` protects
+against.
+
+`MkdirAllHandle` is like `MkdirAll` except that the root is provided using an
+`*os.File` (the reason for this is the same as with `OpenatInRoot`) and an
+`*os.File` of the final created directory is returned (this directory is
+guaranteed to be effectively identical to the directory created by
+`MkdirAllHandle`, which is not possible to ensure by just using `OpenatInRoot`
+after `MkdirAll`).
+
+> **NOTE**: Unlike `SecureJoin`, `MkdirAll` will error out as soon as it hits
+> a dangling symlink or non-existent path. This is in contrast to `SecureJoin`
+> which treated non-existent components as though they were real directories,
+> and would allow for partial resolution of dangling symlinks. These behaviours
+> are at odds with how Linux treats non-existent paths and dangling symlinks,
+> and so these are no longer allowed. This means that `MkdirAll` will not
+> create non-existent directories referenced by a dangling symlink.
+
### License ###
The license of this project is the same as Go, which is a BSD 3-clause license
diff --git a/vendor/github.com/cyphar/filepath-securejoin/VERSION b/vendor/github.com/cyphar/filepath-securejoin/VERSION
index abd410582..9e11b32fc 100644
--- a/vendor/github.com/cyphar/filepath-securejoin/VERSION
+++ b/vendor/github.com/cyphar/filepath-securejoin/VERSION
@@ -1 +1 @@
-0.2.4
+0.3.1
diff --git a/vendor/github.com/cyphar/filepath-securejoin/join.go b/vendor/github.com/cyphar/filepath-securejoin/join.go
index aa32b85fb..bd86a48b0 100644
--- a/vendor/github.com/cyphar/filepath-securejoin/join.go
+++ b/vendor/github.com/cyphar/filepath-securejoin/join.go
@@ -1,5 +1,5 @@
// Copyright (C) 2014-2015 Docker Inc & Go Authors. All rights reserved.
-// Copyright (C) 2017 SUSE LLC. All rights reserved.
+// Copyright (C) 2017-2024 SUSE LLC. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
@@ -11,7 +11,6 @@
package securejoin
import (
- "bytes"
"errors"
"os"
"path/filepath"
@@ -19,6 +18,8 @@ import (
"syscall"
)
+const maxSymlinkLimit = 255
+
// IsNotExist tells you if err is an error that implies that either the path
// accessed does not exist (or path components don't exist). This is
// effectively a more broad version of os.IsNotExist.
@@ -40,6 +41,12 @@ func IsNotExist(err error) bool {
// replaced with symlinks on the filesystem) after this function has returned.
// Such a symlink race is necessarily out-of-scope of SecureJoin.
//
+// NOTE: Due to the above limitation, Linux users are strongly encouraged to
+// use OpenInRoot instead, which does safely protect against these kinds of
+// attacks. There is no way to solve this problem with SecureJoinVFS because
+// the API is fundamentally wrong (you cannot return a "safe" path string and
+// guarantee it won't be modified afterwards).
+//
// Volume names in unsafePath are always discarded, regardless if they are
// provided via direct input or when evaluating symlinks. Therefore:
//
@@ -51,71 +58,69 @@ func SecureJoinVFS(root, unsafePath string, vfs VFS) (string, error) {
}
unsafePath = filepath.FromSlash(unsafePath)
- var path bytes.Buffer
- n := 0
- for unsafePath != "" {
- if n > 255 {
- return "", &os.PathError{Op: "SecureJoin", Path: root + string(filepath.Separator) + unsafePath, Err: syscall.ELOOP}
+ var (
+ currentPath string
+ remainingPath = unsafePath
+ linksWalked int
+ )
+ for remainingPath != "" {
+ if v := filepath.VolumeName(remainingPath); v != "" {
+ remainingPath = remainingPath[len(v):]
}
- if v := filepath.VolumeName(unsafePath); v != "" {
- unsafePath = unsafePath[len(v):]
- }
-
- // Next path component, p.
- i := strings.IndexRune(unsafePath, filepath.Separator)
- var p string
- if i == -1 {
- p, unsafePath = unsafePath, ""
+ // Get the next path component.
+ var part string
+ if i := strings.IndexRune(remainingPath, filepath.Separator); i == -1 {
+ part, remainingPath = remainingPath, ""
} else {
- p, unsafePath = unsafePath[:i], unsafePath[i+1:]
+ part, remainingPath = remainingPath[:i], remainingPath[i+1:]
}
- // Create a cleaned path, using the lexical semantics of /../a, to
- // create a "scoped" path component which can safely be joined to fullP
- // for evaluation. At this point, path.String() doesn't contain any
- // symlink components.
- cleanP := filepath.Clean(string(filepath.Separator) + path.String() + p)
- if cleanP == string(filepath.Separator) {
- path.Reset()
+ // Apply the component lexically to the path we are building.
+ // currentPath does not contain any symlinks, and we are lexically
+ // dealing with a single component, so it's okay to do a filepath.Clean
+ // here.
+ nextPath := filepath.Join(string(filepath.Separator), currentPath, part)
+ if nextPath == string(filepath.Separator) {
+ currentPath = ""
continue
}
- fullP := filepath.Clean(root + cleanP)
+ fullPath := root + string(filepath.Separator) + nextPath
// Figure out whether the path is a symlink.
- fi, err := vfs.Lstat(fullP)
+ fi, err := vfs.Lstat(fullPath)
if err != nil && !IsNotExist(err) {
return "", err
}
// Treat non-existent path components the same as non-symlinks (we
// can't do any better here).
if IsNotExist(err) || fi.Mode()&os.ModeSymlink == 0 {
- path.WriteString(p)
- path.WriteRune(filepath.Separator)
+ currentPath = nextPath
continue
}
- // Only increment when we actually dereference a link.
- n++
+ // It's a symlink, so get its contents and expand it by prepending it
+ // to the yet-unparsed path.
+ linksWalked++
+ if linksWalked > maxSymlinkLimit {
+ return "", &os.PathError{Op: "SecureJoin", Path: root + string(filepath.Separator) + unsafePath, Err: syscall.ELOOP}
+ }
- // It's a symlink, expand it by prepending it to the yet-unparsed path.
- dest, err := vfs.Readlink(fullP)
+ dest, err := vfs.Readlink(fullPath)
if err != nil {
return "", err
}
+ remainingPath = dest + string(filepath.Separator) + remainingPath
// Absolute symlinks reset any work we've already done.
if filepath.IsAbs(dest) {
- path.Reset()
+ currentPath = ""
}
- unsafePath = dest + string(filepath.Separator) + unsafePath
}
- // We have to clean path.String() here because it may contain '..'
- // components that are entirely lexical, but would be misleading otherwise.
- // And finally do a final clean to ensure that root is also lexically
- // clean.
- fullP := filepath.Clean(string(filepath.Separator) + path.String())
- return filepath.Clean(root + fullP), nil
+ // There should be no lexical components like ".." left in the path here,
+ // but for safety clean up the path before joining it to the root.
+ finalPath := filepath.Join(string(filepath.Separator), currentPath)
+ return filepath.Join(root, finalPath), nil
}
// SecureJoin is a wrapper around SecureJoinVFS that just uses the os.* library
diff --git a/vendor/github.com/cyphar/filepath-securejoin/lookup_linux.go b/vendor/github.com/cyphar/filepath-securejoin/lookup_linux.go
new file mode 100644
index 000000000..290befa15
--- /dev/null
+++ b/vendor/github.com/cyphar/filepath-securejoin/lookup_linux.go
@@ -0,0 +1,389 @@
+//go:build linux
+
+// Copyright (C) 2024 SUSE LLC. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package securejoin
+
+import (
+ "errors"
+ "fmt"
+ "os"
+ "path"
+ "path/filepath"
+ "slices"
+ "strings"
+
+ "golang.org/x/sys/unix"
+)
+
+type symlinkStackEntry struct {
+ // (dir, remainingPath) is what we would've returned if the link didn't
+ // exist. This matches what openat2(RESOLVE_IN_ROOT) would return in
+ // this case.
+ dir *os.File
+ remainingPath string
+ // linkUnwalked is the remaining path components from the original
+ // Readlink which we have yet to walk. When this slice is empty, we
+ // drop the link from the stack.
+ linkUnwalked []string
+}
+
+func (se symlinkStackEntry) String() string {
+ return fmt.Sprintf("<%s>/%s [->%s]", se.dir.Name(), se.remainingPath, strings.Join(se.linkUnwalked, "/"))
+}
+
+func (se symlinkStackEntry) Close() {
+ _ = se.dir.Close()
+}
+
+type symlinkStack []*symlinkStackEntry
+
+func (s *symlinkStack) IsEmpty() bool {
+ return s == nil || len(*s) == 0
+}
+
+func (s *symlinkStack) Close() {
+ if s != nil {
+ for _, link := range *s {
+ link.Close()
+ }
+ // TODO: Switch to clear once we switch to Go 1.21.
+ *s = nil
+ }
+}
+
+var (
+ errEmptyStack = errors.New("[internal] stack is empty")
+ errBrokenSymlinkStack = errors.New("[internal error] broken symlink stack")
+)
+
+func (s *symlinkStack) popPart(part string) error {
+ if s == nil || s.IsEmpty() {
+ // If there is nothing in the symlink stack, then the part was from the
+ // real path provided by the user, and this is a no-op.
+ return errEmptyStack
+ }
+ if part == "." {
+ // "." components are no-ops -- we drop them when doing SwapLink.
+ return nil
+ }
+
+ tailEntry := (*s)[len(*s)-1]
+
+ // Double-check that we are popping the component we expect.
+ if len(tailEntry.linkUnwalked) == 0 {
+ return fmt.Errorf("%w: trying to pop component %q of empty stack entry %s", errBrokenSymlinkStack, part, tailEntry)
+ }
+ headPart := tailEntry.linkUnwalked[0]
+ if headPart != part {
+ return fmt.Errorf("%w: trying to pop component %q but the last stack entry is %s (%q)", errBrokenSymlinkStack, part, tailEntry, headPart)
+ }
+
+ // Drop the component, but keep the entry around in case we are dealing
+ // with a "tail-chained" symlink.
+ tailEntry.linkUnwalked = tailEntry.linkUnwalked[1:]
+ return nil
+}
+
+func (s *symlinkStack) PopPart(part string) error {
+ if err := s.popPart(part); err != nil {
+ if errors.Is(err, errEmptyStack) {
+ // Skip empty stacks.
+ err = nil
+ }
+ return err
+ }
+
+ // Clean up any of the trailing stack entries that are empty.
+ for lastGood := len(*s) - 1; lastGood >= 0; lastGood-- {
+ entry := (*s)[lastGood]
+ if len(entry.linkUnwalked) > 0 {
+ break
+ }
+ entry.Close()
+ (*s) = (*s)[:lastGood]
+ }
+ return nil
+}
+
+func (s *symlinkStack) push(dir *os.File, remainingPath, linkTarget string) error {
+ if s == nil {
+ return nil
+ }
+ // Split the link target and clean up any "" parts.
+ linkTargetParts := slices.DeleteFunc(
+ strings.Split(linkTarget, "/"),
+ func(part string) bool { return part == "" || part == "." })
+
+ // Copy the directory so the caller doesn't close our copy.
+ dirCopy, err := dupFile(dir)
+ if err != nil {
+ return err
+ }
+
+ // Add to the stack.
+ *s = append(*s, &symlinkStackEntry{
+ dir: dirCopy,
+ remainingPath: remainingPath,
+ linkUnwalked: linkTargetParts,
+ })
+ return nil
+}
+
+func (s *symlinkStack) SwapLink(linkPart string, dir *os.File, remainingPath, linkTarget string) error {
+ // If we are currently inside a symlink resolution, remove the symlink
+ // component from the last symlink entry, but don't remove the entry even
+ // if it's empty. If we are a "tail-chained" symlink (a trailing symlink we
+ // hit during a symlink resolution) we need to keep the old symlink until
+ // we finish the resolution.
+ if err := s.popPart(linkPart); err != nil {
+ if !errors.Is(err, errEmptyStack) {
+ return err
+ }
+ // Push the component regardless of whether the stack was empty.
+ }
+ return s.push(dir, remainingPath, linkTarget)
+}
+
+func (s *symlinkStack) PopTopSymlink() (*os.File, string, bool) {
+ if s == nil || s.IsEmpty() {
+ return nil, "", false
+ }
+ tailEntry := (*s)[0]
+ *s = (*s)[1:]
+ return tailEntry.dir, tailEntry.remainingPath, true
+}
+
+// partialLookupInRoot tries to lookup as much of the request path as possible
+// within the provided root (a-la RESOLVE_IN_ROOT) and opens the final existing
+// component of the requested path, returning a file handle to the final
+// existing component and a string containing the remaining path components.
+func partialLookupInRoot(root *os.File, unsafePath string) (*os.File, string, error) {
+ return lookupInRoot(root, unsafePath, true)
+}
+
+func completeLookupInRoot(root *os.File, unsafePath string) (*os.File, error) {
+ handle, remainingPath, err := lookupInRoot(root, unsafePath, false)
+ if remainingPath != "" && err == nil {
+ // should never happen
+ err = fmt.Errorf("[bug] non-empty remaining path when doing a non-partial lookup: %q", remainingPath)
+ }
+ // lookupInRoot(partial=false) will always close the handle if an error is
+ // returned, so no need to double-check here.
+ return handle, err
+}
+
+func lookupInRoot(root *os.File, unsafePath string, partial bool) (Handle *os.File, _ string, _ error) {
+ unsafePath = filepath.ToSlash(unsafePath) // noop
+
+ // This is very similar to SecureJoin, except that we operate on the
+ // components using file descriptors. We then return the last component we
+ // managed open, along with the remaining path components not opened.
+
+ // Try to use openat2 if possible.
+ if hasOpenat2() {
+ return lookupOpenat2(root, unsafePath, partial)
+ }
+
+ // Get the "actual" root path from /proc/self/fd. This is necessary if the
+ // root is some magic-link like /proc/$pid/root, in which case we want to
+ // make sure when we do checkProcSelfFdPath that we are using the correct
+ // root path.
+ logicalRootPath, err := procSelfFdReadlink(root)
+ if err != nil {
+ return nil, "", fmt.Errorf("get real root path: %w", err)
+ }
+
+ currentDir, err := dupFile(root)
+ if err != nil {
+ return nil, "", fmt.Errorf("clone root fd: %w", err)
+ }
+ defer func() {
+ // If a handle is not returned, close the internal handle.
+ if Handle == nil {
+ _ = currentDir.Close()
+ }
+ }()
+
+ // symlinkStack is used to emulate how openat2(RESOLVE_IN_ROOT) treats
+ // dangling symlinks. If we hit a non-existent path while resolving a
+ // symlink, we need to return the (dir, remainingPath) that we had when we
+ // hit the symlink (treating the symlink as though it were a regular file).
+ // The set of (dir, remainingPath) sets is stored within the symlinkStack
+ // and we add and remove parts when we hit symlink and non-symlink
+ // components respectively. We need a stack because of recursive symlinks
+ // (symlinks that contain symlink components in their target).
+ //
+ // Note that the stack is ONLY used for book-keeping. All of the actual
+ // path walking logic is still based on currentPath/remainingPath and
+ // currentDir (as in SecureJoin).
+ var symStack *symlinkStack
+ if partial {
+ symStack = new(symlinkStack)
+ defer symStack.Close()
+ }
+
+ var (
+ linksWalked int
+ currentPath string
+ remainingPath = unsafePath
+ )
+ for remainingPath != "" {
+ // Save the current remaining path so if the part is not real we can
+ // return the path including the component.
+ oldRemainingPath := remainingPath
+
+ // Get the next path component.
+ var part string
+ if i := strings.IndexByte(remainingPath, '/'); i == -1 {
+ part, remainingPath = remainingPath, ""
+ } else {
+ part, remainingPath = remainingPath[:i], remainingPath[i+1:]
+ }
+ // If we hit an empty component, we need to treat it as though it is
+ // "." so that trailing "/" and "//" components on a non-directory
+ // correctly return the right error code.
+ if part == "" {
+ part = "."
+ }
+
+ // Apply the component lexically to the path we are building.
+ // currentPath does not contain any symlinks, and we are lexically
+ // dealing with a single component, so it's okay to do a filepath.Clean
+ // here.
+ nextPath := path.Join("/", currentPath, part)
+ // If we logically hit the root, just clone the root rather than
+ // opening the part and doing all of the other checks.
+ if nextPath == "/" {
+ if err := symStack.PopPart(part); err != nil {
+ return nil, "", fmt.Errorf("walking into root with part %q failed: %w", part, err)
+ }
+ // Jump to root.
+ rootClone, err := dupFile(root)
+ if err != nil {
+ return nil, "", fmt.Errorf("clone root fd: %w", err)
+ }
+ _ = currentDir.Close()
+ currentDir = rootClone
+ currentPath = nextPath
+ continue
+ }
+
+ // Try to open the next component.
+ nextDir, err := openatFile(currentDir, part, unix.O_PATH|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0)
+ switch {
+ case err == nil:
+ st, err := nextDir.Stat()
+ if err != nil {
+ _ = nextDir.Close()
+ return nil, "", fmt.Errorf("stat component %q: %w", part, err)
+ }
+
+ switch st.Mode() & os.ModeType {
+ case os.ModeSymlink:
+ // readlinkat implies AT_EMPTY_PATH since Linux 2.6.39. See
+ // Linux commit 65cfc6722361 ("readlinkat(), fchownat() and
+ // fstatat() with empty relative pathnames").
+ linkDest, err := readlinkatFile(nextDir, "")
+ // We don't need the handle anymore.
+ _ = nextDir.Close()
+ if err != nil {
+ return nil, "", err
+ }
+
+ linksWalked++
+ if linksWalked > maxSymlinkLimit {
+ return nil, "", &os.PathError{Op: "securejoin.lookupInRoot", Path: logicalRootPath + "/" + unsafePath, Err: unix.ELOOP}
+ }
+
+ // Swap out the symlink's component for the link entry itself.
+ if err := symStack.SwapLink(part, currentDir, oldRemainingPath, linkDest); err != nil {
+ return nil, "", fmt.Errorf("walking into symlink %q failed: push symlink: %w", part, err)
+ }
+
+ // Update our logical remaining path.
+ remainingPath = linkDest + "/" + remainingPath
+ // Absolute symlinks reset any work we've already done.
+ if path.IsAbs(linkDest) {
+ // Jump to root.
+ rootClone, err := dupFile(root)
+ if err != nil {
+ return nil, "", fmt.Errorf("clone root fd: %w", err)
+ }
+ _ = currentDir.Close()
+ currentDir = rootClone
+ currentPath = "/"
+ }
+
+ default:
+ // If we are dealing with a directory, simply walk into it.
+ _ = currentDir.Close()
+ currentDir = nextDir
+ currentPath = nextPath
+
+ // The part was real, so drop it from the symlink stack.
+ if err := symStack.PopPart(part); err != nil {
+ return nil, "", fmt.Errorf("walking into directory %q failed: %w", part, err)
+ }
+
+ // If we are operating on a .., make sure we haven't escaped.
+ // We only have to check for ".." here because walking down
+ // into a regular component component cannot cause you to
+ // escape. This mirrors the logic in RESOLVE_IN_ROOT, except we
+ // have to check every ".." rather than only checking after a
+ // rename or mount on the system.
+ if part == ".." {
+ // Make sure the root hasn't moved.
+ if err := checkProcSelfFdPath(logicalRootPath, root); err != nil {
+ return nil, "", fmt.Errorf("root path moved during lookup: %w", err)
+ }
+ // Make sure the path is what we expect.
+ fullPath := logicalRootPath + nextPath
+ if err := checkProcSelfFdPath(fullPath, currentDir); err != nil {
+ return nil, "", fmt.Errorf("walking into %q had unexpected result: %w", part, err)
+ }
+ }
+ }
+
+ default:
+ if !partial {
+ return nil, "", err
+ }
+ // If there are any remaining components in the symlink stack, we
+ // are still within a symlink resolution and thus we hit a dangling
+ // symlink. So pretend that the first symlink in the stack we hit
+ // was an ENOENT (to match openat2).
+ if oldDir, remainingPath, ok := symStack.PopTopSymlink(); ok {
+ _ = currentDir.Close()
+ return oldDir, remainingPath, err
+ }
+ // We have hit a final component that doesn't exist, so we have our
+ // partial open result. Note that we have to use the OLD remaining
+ // path, since the lookup failed.
+ return currentDir, oldRemainingPath, err
+ }
+ }
+
+ // If the unsafePath had a trailing slash, we need to make sure we try to
+ // do a relative "." open so that we will correctly return an error when
+ // the final component is a non-directory (to match openat2). In the
+ // context of openat2, a trailing slash and a trailing "/." are completely
+ // equivalent.
+ if strings.HasSuffix(unsafePath, "/") {
+ nextDir, err := openatFile(currentDir, ".", unix.O_PATH|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0)
+ if err != nil {
+ if !partial {
+ _ = currentDir.Close()
+ currentDir = nil
+ }
+ return currentDir, "", err
+ }
+ _ = currentDir.Close()
+ currentDir = nextDir
+ }
+
+ // All of the components existed!
+ return currentDir, "", nil
+}
diff --git a/vendor/github.com/cyphar/filepath-securejoin/mkdir_linux.go b/vendor/github.com/cyphar/filepath-securejoin/mkdir_linux.go
new file mode 100644
index 000000000..ad2bd7973
--- /dev/null
+++ b/vendor/github.com/cyphar/filepath-securejoin/mkdir_linux.go
@@ -0,0 +1,229 @@
+//go:build linux
+
+// Copyright (C) 2024 SUSE LLC. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package securejoin
+
+import (
+ "errors"
+ "fmt"
+ "io"
+ "os"
+ "path/filepath"
+ "slices"
+ "strings"
+
+ "golang.org/x/sys/unix"
+)
+
+var (
+ errInvalidMode = errors.New("invalid permission mode")
+ errPossibleAttack = errors.New("possible attack detected")
+)
+
+// MkdirAllHandle is equivalent to MkdirAll, except that it is safer to use in
+// two respects:
+//
+// - The caller provides the root directory as an *os.File (preferably O_PATH)
+// handle. This means that the caller can be sure which root directory is
+// being used. Note that this can be emulated by using /proc/self/fd/... as
+// the root path with MkdirAll.
+//
+// - Once all of the directories have been created, an *os.File (O_PATH) handle
+// to the directory at unsafePath is returned to the caller. This is done in
+// an effectively-race-free way (an attacker would only be able to swap the
+// final directory component), which is not possible to emulate with
+// MkdirAll.
+//
+// In addition, the returned handle is obtained far more efficiently than doing
+// a brand new lookup of unsafePath (such as with SecureJoin or openat2) after
+// doing MkdirAll. If you intend to open the directory after creating it, you
+// should use MkdirAllHandle.
+func MkdirAllHandle(root *os.File, unsafePath string, mode int) (_ *os.File, Err error) {
+ // Make sure there are no os.FileMode bits set.
+ if mode&^0o7777 != 0 {
+ return nil, fmt.Errorf("%w for mkdir 0o%.3o", errInvalidMode, mode)
+ }
+
+ // Try to open as much of the path as possible.
+ currentDir, remainingPath, err := partialLookupInRoot(root, unsafePath)
+ defer func() {
+ if Err != nil {
+ _ = currentDir.Close()
+ }
+ }()
+ if err != nil && !errors.Is(err, unix.ENOENT) {
+ return nil, fmt.Errorf("find existing subpath of %q: %w", unsafePath, err)
+ }
+
+ // If there is an attacker deleting directories as we walk into them,
+ // detect this proactively. Note this is guaranteed to detect if the
+ // attacker deleted any part of the tree up to currentDir.
+ //
+ // Once we walk into a dead directory, partialLookupInRoot would not be
+ // able to walk further down the tree (directories must be empty before
+ // they are deleted), and if the attacker has removed the entire tree we
+ // can be sure that anything that was originally inside a dead directory
+ // must also be deleted and thus is a dead directory in its own right.
+ //
+ // This is mostly a quality-of-life check, because mkdir will simply fail
+ // later if the attacker deletes the tree after this check.
+ if err := isDeadInode(currentDir); err != nil {
+ return nil, fmt.Errorf("finding existing subpath of %q: %w", unsafePath, err)
+ }
+
+ // Re-open the path to match the O_DIRECTORY reopen loop later (so that we
+ // always return a non-O_PATH handle). We also check that we actually got a
+ // directory.
+ if reopenDir, err := Reopen(currentDir, unix.O_DIRECTORY|unix.O_CLOEXEC); errors.Is(err, unix.ENOTDIR) {
+ return nil, fmt.Errorf("cannot create subdirectories in %q: %w", currentDir.Name(), unix.ENOTDIR)
+ } else if err != nil {
+ return nil, fmt.Errorf("re-opening handle to %q: %w", currentDir.Name(), err)
+ } else {
+ _ = currentDir.Close()
+ currentDir = reopenDir
+ }
+
+ remainingParts := strings.Split(remainingPath, string(filepath.Separator))
+ if slices.Contains(remainingParts, "..") {
+ // The path contained ".." components after the end of the "real"
+ // components. We could try to safely resolve ".." here but that would
+ // add a bunch of extra logic for something that it's not clear even
+ // needs to be supported. So just return an error.
+ //
+ // If we do filepath.Clean(remainingPath) then we end up with the
+ // problem that ".." can erase a trailing dangling symlink and produce
+ // a path that doesn't quite match what the user asked for.
+ return nil, fmt.Errorf("%w: yet-to-be-created path %q contains '..' components", unix.ENOENT, remainingPath)
+ }
+
+ // Make sure the mode doesn't have any type bits.
+ mode &^= unix.S_IFMT
+ // What properties do we expect any newly created directories to have?
+ var (
+ // While umask(2) is a per-thread property, and thus this value could
+ // vary between threads, a functioning Go program would LockOSThread
+ // threads with different umasks and so we don't need to LockOSThread
+ // for this entire mkdirat loop (if we are in the locked thread with a
+ // different umask, we are already locked and there's nothing for us to
+ // do -- and if not then it doesn't matter which thread we run on and
+ // there's nothing for us to do).
+ expectedMode = uint32(unix.S_IFDIR | (mode &^ getUmask()))
+
+ // We would want to get the fs[ug]id here, but we can't access those
+ // from userspace. In practice, nobody uses setfs[ug]id() anymore, so
+ // just use the effective [ug]id (which is equivalent to the fs[ug]id
+ // for programs that don't use setfs[ug]id).
+ expectedUid = uint32(unix.Geteuid())
+ expectedGid = uint32(unix.Getegid())
+ )
+
+ // Create the remaining components.
+ for _, part := range remainingParts {
+ switch part {
+ case "", ".":
+ // Skip over no-op paths.
+ continue
+ }
+
+ // NOTE: mkdir(2) will not follow trailing symlinks, so we can safely
+ // create the finaly component without worrying about symlink-exchange
+ // attacks.
+ if err := unix.Mkdirat(int(currentDir.Fd()), part, uint32(mode)); err != nil {
+ err = &os.PathError{Op: "mkdirat", Path: currentDir.Name() + "/" + part, Err: err}
+ // Make the error a bit nicer if the directory is dead.
+ if err2 := isDeadInode(currentDir); err2 != nil {
+ err = fmt.Errorf("%w (%w)", err, err2)
+ }
+ return nil, err
+ }
+
+ // Get a handle to the next component. O_DIRECTORY means we don't need
+ // to use O_PATH.
+ var nextDir *os.File
+ if hasOpenat2() {
+ nextDir, err = openat2File(currentDir, part, &unix.OpenHow{
+ Flags: unix.O_NOFOLLOW | unix.O_DIRECTORY | unix.O_CLOEXEC,
+ Resolve: unix.RESOLVE_BENEATH | unix.RESOLVE_NO_SYMLINKS | unix.RESOLVE_NO_XDEV,
+ })
+ } else {
+ nextDir, err = openatFile(currentDir, part, unix.O_NOFOLLOW|unix.O_DIRECTORY|unix.O_CLOEXEC, 0)
+ }
+ if err != nil {
+ return nil, err
+ }
+ _ = currentDir.Close()
+ currentDir = nextDir
+
+ // Make sure that the directory matches what we expect. An attacker
+ // could have swapped the directory between us making it and opening
+ // it. There's no way for us to be sure that the directory is
+ // _precisely_ the same as the directory we created, but if we are in
+ // an empty directory with the same owner and mode as the one we
+ // created then there is nothing the attacker could do with this new
+ // directory that they couldn't do with the old one.
+ if stat, err := fstat(currentDir); err != nil {
+ return nil, fmt.Errorf("check newly created directory: %w", err)
+ } else {
+ if stat.Mode != expectedMode {
+ return nil, fmt.Errorf("%w: newly created directory %q has incorrect mode 0o%.3o (expected 0o%.3o)", errPossibleAttack, currentDir.Name(), stat.Mode, expectedMode)
+ }
+ if stat.Uid != expectedUid || stat.Gid != expectedGid {
+ return nil, fmt.Errorf("%w: newly created directory %q has incorrect owner %d:%d (expected %d:%d)", errPossibleAttack, currentDir.Name(), stat.Uid, stat.Gid, expectedUid, expectedGid)
+ }
+ // Check that the directory is empty. We only need to check for
+ // a single entry, and we should get EOF if the directory is
+ // empty.
+ _, err := currentDir.Readdirnames(1)
+ if !errors.Is(err, io.EOF) {
+ if err == nil {
+ err = fmt.Errorf("%w: newly created directory %q is non-empty", errPossibleAttack, currentDir.Name())
+ }
+ return nil, fmt.Errorf("check if newly created directory %q is empty: %w", currentDir.Name(), err)
+ }
+ // Reset the offset.
+ _, _ = currentDir.Seek(0, unix.SEEK_SET)
+ }
+ }
+ return currentDir, nil
+}
+
+// MkdirAll is a race-safe alternative to the Go stdlib's os.MkdirAll function,
+// where the new directory is guaranteed to be within the root directory (if an
+// attacker can move directories from inside the root to outside the root, the
+// created directory tree might be outside of the root but the key constraint
+// is that at no point will we walk outside of the directory tree we are
+// creating).
+//
+// Effectively, MkdirAll(root, unsafePath, mode) is equivalent to
+//
+// path, _ := securejoin.SecureJoin(root, unsafePath)
+// err := os.MkdirAll(path, mode)
+//
+// But is much safer. The above implementation is unsafe because if an attacker
+// can modify the filesystem tree between SecureJoin and MkdirAll, it is
+// possible for MkdirAll to resolve unsafe symlink components and create
+// directories outside of the root.
+//
+// If you plan to open the directory after you have created it or want to use
+// an open directory handle as the root, you should use MkdirAllHandle instead.
+// This function is a wrapper around MkdirAllHandle.
+//
+// NOTE: The mode argument must be set the unix mode bits (unix.S_I...), not
+// the Go generic mode bits (os.Mode...).
+func MkdirAll(root, unsafePath string, mode int) error {
+ rootDir, err := os.OpenFile(root, unix.O_PATH|unix.O_DIRECTORY|unix.O_CLOEXEC, 0)
+ if err != nil {
+ return err
+ }
+ defer rootDir.Close()
+
+ f, err := MkdirAllHandle(rootDir, unsafePath, mode)
+ if err != nil {
+ return err
+ }
+ _ = f.Close()
+ return nil
+}
diff --git a/vendor/github.com/cyphar/filepath-securejoin/open_linux.go b/vendor/github.com/cyphar/filepath-securejoin/open_linux.go
new file mode 100644
index 000000000..52dce76f3
--- /dev/null
+++ b/vendor/github.com/cyphar/filepath-securejoin/open_linux.go
@@ -0,0 +1,101 @@
+//go:build linux
+
+// Copyright (C) 2024 SUSE LLC. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package securejoin
+
+import (
+ "fmt"
+ "os"
+ "strconv"
+
+ "golang.org/x/sys/unix"
+)
+
+// OpenatInRoot is equivalent to OpenInRoot, except that the root is provided
+// using an *os.File handle, to ensure that the correct root directory is used.
+func OpenatInRoot(root *os.File, unsafePath string) (*os.File, error) {
+ handle, err := completeLookupInRoot(root, unsafePath)
+ if err != nil {
+ return nil, &os.PathError{Op: "securejoin.OpenInRoot", Path: unsafePath, Err: err}
+ }
+ return handle, nil
+}
+
+// OpenInRoot safely opens the provided unsafePath within the root.
+// Effectively, OpenInRoot(root, unsafePath) is equivalent to
+//
+// path, _ := securejoin.SecureJoin(root, unsafePath)
+// handle, err := os.OpenFile(path, unix.O_PATH|unix.O_CLOEXEC)
+//
+// But is much safer. The above implementation is unsafe because if an attacker
+// can modify the filesystem tree between SecureJoin and OpenFile, it is
+// possible for the returned file to be outside of the root.
+//
+// Note that the returned handle is an O_PATH handle, meaning that only a very
+// limited set of operations will work on the handle. This is done to avoid
+// accidentally opening an untrusted file that could cause issues (such as a
+// disconnected TTY that could cause a DoS, or some other issue). In order to
+// use the returned handle, you can "upgrade" it to a proper handle using
+// Reopen.
+func OpenInRoot(root, unsafePath string) (*os.File, error) {
+ rootDir, err := os.OpenFile(root, unix.O_PATH|unix.O_DIRECTORY|unix.O_CLOEXEC, 0)
+ if err != nil {
+ return nil, err
+ }
+ defer rootDir.Close()
+ return OpenatInRoot(rootDir, unsafePath)
+}
+
+// Reopen takes an *os.File handle and re-opens it through /proc/self/fd.
+// Reopen(file, flags) is effectively equivalent to
+//
+// fdPath := fmt.Sprintf("/proc/self/fd/%d", file.Fd())
+// os.OpenFile(fdPath, flags|unix.O_CLOEXEC)
+//
+// But with some extra hardenings to ensure that we are not tricked by a
+// maliciously-configured /proc mount. While this attack scenario is not
+// common, in container runtimes it is possible for higher-level runtimes to be
+// tricked into configuring an unsafe /proc that can be used to attack file
+// operations. See CVE-2019-19921 for more details.
+func Reopen(handle *os.File, flags int) (*os.File, error) {
+ procRoot, err := getProcRoot()
+ if err != nil {
+ return nil, err
+ }
+
+ // We can't operate on /proc/thread-self/fd/$n directly when doing a
+ // re-open, so we need to open /proc/thread-self/fd and then open a single
+ // final component.
+ procFdDir, closer, err := procThreadSelf(procRoot, "fd/")
+ if err != nil {
+ return nil, fmt.Errorf("get safe /proc/thread-self/fd handle: %w", err)
+ }
+ defer procFdDir.Close()
+ defer closer()
+
+ // Try to detect if there is a mount on top of the magic-link we are about
+ // to open. If we are using unsafeHostProcRoot(), this could change after
+ // we check it (and there's nothing we can do about that) but for
+ // privateProcRoot() this should be guaranteed to be safe (at least since
+ // Linux 5.12[1], when anonymous mount namespaces were completely isolated
+ // from external mounts including mount propagation events).
+ //
+ // [1]: Linux commit ee2e3f50629f ("mount: fix mounting of detached mounts
+ // onto targets that reside on shared mounts").
+ fdStr := strconv.Itoa(int(handle.Fd()))
+ if err := checkSymlinkOvermount(procRoot, procFdDir, fdStr); err != nil {
+ return nil, fmt.Errorf("check safety of /proc/thread-self/fd/%s magiclink: %w", fdStr, err)
+ }
+
+ flags |= unix.O_CLOEXEC
+ // Rather than just wrapping openatFile, open-code it so we can copy
+ // handle.Name().
+ reopenFd, err := unix.Openat(int(procFdDir.Fd()), fdStr, flags, 0)
+ if err != nil {
+ return nil, fmt.Errorf("reopen fd %d: %w", handle.Fd(), err)
+ }
+ return os.NewFile(uintptr(reopenFd), handle.Name()), nil
+}
diff --git a/vendor/github.com/cyphar/filepath-securejoin/openat2_linux.go b/vendor/github.com/cyphar/filepath-securejoin/openat2_linux.go
new file mode 100644
index 000000000..921b3e1d4
--- /dev/null
+++ b/vendor/github.com/cyphar/filepath-securejoin/openat2_linux.go
@@ -0,0 +1,141 @@
+//go:build linux
+
+// Copyright (C) 2024 SUSE LLC. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package securejoin
+
+import (
+ "errors"
+ "fmt"
+ "os"
+ "path/filepath"
+ "strings"
+ "sync"
+ "testing"
+
+ "golang.org/x/sys/unix"
+)
+
+var (
+ hasOpenat2Bool bool
+ hasOpenat2Once sync.Once
+
+ testingForceHasOpenat2 *bool
+)
+
+func hasOpenat2() bool {
+ if testing.Testing() && testingForceHasOpenat2 != nil {
+ return *testingForceHasOpenat2
+ }
+ hasOpenat2Once.Do(func() {
+ fd, err := unix.Openat2(unix.AT_FDCWD, ".", &unix.OpenHow{
+ Flags: unix.O_PATH | unix.O_CLOEXEC,
+ Resolve: unix.RESOLVE_NO_SYMLINKS | unix.RESOLVE_IN_ROOT,
+ })
+ if err == nil {
+ hasOpenat2Bool = true
+ _ = unix.Close(fd)
+ }
+ })
+ return hasOpenat2Bool
+}
+
+func scopedLookupShouldRetry(how *unix.OpenHow, err error) bool {
+ // RESOLVE_IN_ROOT (and RESOLVE_BENEATH) can return -EAGAIN if we resolve
+ // ".." while a mount or rename occurs anywhere on the system. This could
+ // happen spuriously, or as the result of an attacker trying to mess with
+ // us during lookup.
+ //
+ // In addition, scoped lookups have a "safety check" at the end of
+ // complete_walk which will return -EXDEV if the final path is not in the
+ // root.
+ return how.Resolve&(unix.RESOLVE_IN_ROOT|unix.RESOLVE_BENEATH) != 0 &&
+ (errors.Is(err, unix.EAGAIN) || errors.Is(err, unix.EXDEV))
+}
+
+const scopedLookupMaxRetries = 10
+
+func openat2File(dir *os.File, path string, how *unix.OpenHow) (*os.File, error) {
+ fullPath := dir.Name() + "/" + path
+ // Make sure we always set O_CLOEXEC.
+ how.Flags |= unix.O_CLOEXEC
+ var tries int
+ for tries < scopedLookupMaxRetries {
+ fd, err := unix.Openat2(int(dir.Fd()), path, how)
+ if err != nil {
+ if scopedLookupShouldRetry(how, err) {
+ // We retry a couple of times to avoid the spurious errors, and
+ // if we are being attacked then returning -EAGAIN is the best
+ // we can do.
+ tries++
+ continue
+ }
+ return nil, &os.PathError{Op: "openat2", Path: fullPath, Err: err}
+ }
+ // If we are using RESOLVE_IN_ROOT, the name we generated may be wrong.
+ // NOTE: The procRoot code MUST NOT use RESOLVE_IN_ROOT, otherwise
+ // you'll get infinite recursion here.
+ if how.Resolve&unix.RESOLVE_IN_ROOT == unix.RESOLVE_IN_ROOT {
+ if actualPath, err := rawProcSelfFdReadlink(fd); err == nil {
+ fullPath = actualPath
+ }
+ }
+ return os.NewFile(uintptr(fd), fullPath), nil
+ }
+ return nil, &os.PathError{Op: "openat2", Path: fullPath, Err: errPossibleAttack}
+}
+
+func lookupOpenat2(root *os.File, unsafePath string, partial bool) (*os.File, string, error) {
+ if !partial {
+ file, err := openat2File(root, unsafePath, &unix.OpenHow{
+ Flags: unix.O_PATH | unix.O_CLOEXEC,
+ Resolve: unix.RESOLVE_IN_ROOT | unix.RESOLVE_NO_MAGICLINKS,
+ })
+ return file, "", err
+ }
+ return partialLookupOpenat2(root, unsafePath)
+}
+
+// partialLookupOpenat2 is an alternative implementation of
+// partialLookupInRoot, using openat2(RESOLVE_IN_ROOT) to more safely get a
+// handle to the deepest existing child of the requested path within the root.
+func partialLookupOpenat2(root *os.File, unsafePath string) (*os.File, string, error) {
+ // TODO: Implement this as a git-bisect-like binary search.
+
+ unsafePath = filepath.ToSlash(unsafePath) // noop
+ endIdx := len(unsafePath)
+ var lastError error
+ for endIdx > 0 {
+ subpath := unsafePath[:endIdx]
+
+ handle, err := openat2File(root, subpath, &unix.OpenHow{
+ Flags: unix.O_PATH | unix.O_CLOEXEC,
+ Resolve: unix.RESOLVE_IN_ROOT | unix.RESOLVE_NO_MAGICLINKS,
+ })
+ if err == nil {
+ // Jump over the slash if we have a non-"" remainingPath.
+ if endIdx < len(unsafePath) {
+ endIdx += 1
+ }
+ // We found a subpath!
+ return handle, unsafePath[endIdx:], lastError
+ }
+ if errors.Is(err, unix.ENOENT) || errors.Is(err, unix.ENOTDIR) {
+ // That path doesn't exist, let's try the next directory up.
+ endIdx = strings.LastIndexByte(subpath, '/')
+ lastError = err
+ continue
+ }
+ return nil, "", fmt.Errorf("open subpath: %w", err)
+ }
+ // If we couldn't open anything, the whole subpath is missing. Return a
+ // copy of the root fd so that the caller doesn't close this one by
+ // accident.
+ rootClone, err := dupFile(root)
+ if err != nil {
+ return nil, "", err
+ }
+ return rootClone, unsafePath, lastError
+}
diff --git a/vendor/github.com/cyphar/filepath-securejoin/openat_linux.go b/vendor/github.com/cyphar/filepath-securejoin/openat_linux.go
new file mode 100644
index 000000000..949fb5f2d
--- /dev/null
+++ b/vendor/github.com/cyphar/filepath-securejoin/openat_linux.go
@@ -0,0 +1,59 @@
+//go:build linux
+
+// Copyright (C) 2024 SUSE LLC. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package securejoin
+
+import (
+ "os"
+ "path/filepath"
+
+ "golang.org/x/sys/unix"
+)
+
+func dupFile(f *os.File) (*os.File, error) {
+ fd, err := unix.FcntlInt(f.Fd(), unix.F_DUPFD_CLOEXEC, 0)
+ if err != nil {
+ return nil, os.NewSyscallError("fcntl(F_DUPFD_CLOEXEC)", err)
+ }
+ return os.NewFile(uintptr(fd), f.Name()), nil
+}
+
+func openatFile(dir *os.File, path string, flags int, mode int) (*os.File, error) {
+ // Make sure we always set O_CLOEXEC.
+ flags |= unix.O_CLOEXEC
+ fd, err := unix.Openat(int(dir.Fd()), path, flags, uint32(mode))
+ if err != nil {
+ return nil, &os.PathError{Op: "openat", Path: dir.Name() + "/" + path, Err: err}
+ }
+ // All of the paths we use with openatFile(2) are guaranteed to be
+ // lexically safe, so we can use path.Join here.
+ fullPath := filepath.Join(dir.Name(), path)
+ return os.NewFile(uintptr(fd), fullPath), nil
+}
+
+func fstatatFile(dir *os.File, path string, flags int) (unix.Stat_t, error) {
+ var stat unix.Stat_t
+ if err := unix.Fstatat(int(dir.Fd()), path, &stat, flags); err != nil {
+ return stat, &os.PathError{Op: "fstatat", Path: dir.Name() + "/" + path, Err: err}
+ }
+ return stat, nil
+}
+
+func readlinkatFile(dir *os.File, path string) (string, error) {
+ size := 4096
+ for {
+ linkBuf := make([]byte, size)
+ n, err := unix.Readlinkat(int(dir.Fd()), path, linkBuf)
+ if err != nil {
+ return "", &os.PathError{Op: "readlinkat", Path: dir.Name() + "/" + path, Err: err}
+ }
+ if n != size {
+ return string(linkBuf[:n]), nil
+ }
+ // Possible truncation, resize the buffer.
+ size *= 2
+ }
+}
diff --git a/vendor/github.com/cyphar/filepath-securejoin/procfs_linux.go b/vendor/github.com/cyphar/filepath-securejoin/procfs_linux.go
new file mode 100644
index 000000000..adf0bd08f
--- /dev/null
+++ b/vendor/github.com/cyphar/filepath-securejoin/procfs_linux.go
@@ -0,0 +1,474 @@
+//go:build linux
+
+// Copyright (C) 2024 SUSE LLC. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package securejoin
+
+import (
+ "errors"
+ "fmt"
+ "os"
+ "runtime"
+ "strconv"
+ "sync"
+
+ "golang.org/x/sys/unix"
+)
+
+func fstat(f *os.File) (unix.Stat_t, error) {
+ var stat unix.Stat_t
+ if err := unix.Fstat(int(f.Fd()), &stat); err != nil {
+ return stat, &os.PathError{Op: "fstat", Path: f.Name(), Err: err}
+ }
+ return stat, nil
+}
+
+func fstatfs(f *os.File) (unix.Statfs_t, error) {
+ var statfs unix.Statfs_t
+ if err := unix.Fstatfs(int(f.Fd()), &statfs); err != nil {
+ return statfs, &os.PathError{Op: "fstatfs", Path: f.Name(), Err: err}
+ }
+ return statfs, nil
+}
+
+// The kernel guarantees that the root inode of a procfs mount has an
+// f_type of PROC_SUPER_MAGIC and st_ino of PROC_ROOT_INO.
+const (
+ procSuperMagic = 0x9fa0 // PROC_SUPER_MAGIC
+ procRootIno = 1 // PROC_ROOT_INO
+)
+
+func verifyProcRoot(procRoot *os.File) error {
+ if statfs, err := fstatfs(procRoot); err != nil {
+ return err
+ } else if statfs.Type != procSuperMagic {
+ return fmt.Errorf("%w: incorrect procfs root filesystem type 0x%x", errUnsafeProcfs, statfs.Type)
+ }
+ if stat, err := fstat(procRoot); err != nil {
+ return err
+ } else if stat.Ino != procRootIno {
+ return fmt.Errorf("%w: incorrect procfs root inode number %d", errUnsafeProcfs, stat.Ino)
+ }
+ return nil
+}
+
+var (
+ hasNewMountApiBool bool
+ hasNewMountApiOnce sync.Once
+)
+
+func hasNewMountApi() bool {
+ hasNewMountApiOnce.Do(func() {
+ // All of the pieces of the new mount API we use (fsopen, fsconfig,
+ // fsmount, open_tree) were added together in Linux 5.1[1,2], so we can
+ // just check for one of the syscalls and the others should also be
+ // available.
+ //
+ // Just try to use open_tree(2) to open a file without OPEN_TREE_CLONE.
+ // This is equivalent to openat(2), but tells us if open_tree is
+ // available (and thus all of the other basic new mount API syscalls).
+ // open_tree(2) is most light-weight syscall to test here.
+ //
+ // [1]: merge commit 400913252d09
+ // [2]:
+ fd, err := unix.OpenTree(-int(unix.EBADF), "/", unix.OPEN_TREE_CLOEXEC)
+ if err == nil {
+ hasNewMountApiBool = true
+ _ = unix.Close(fd)
+ }
+ })
+ return hasNewMountApiBool
+}
+
+func fsopen(fsName string, flags int) (*os.File, error) {
+ // Make sure we always set O_CLOEXEC.
+ flags |= unix.FSOPEN_CLOEXEC
+ fd, err := unix.Fsopen(fsName, flags)
+ if err != nil {
+ return nil, os.NewSyscallError("fsopen "+fsName, err)
+ }
+ return os.NewFile(uintptr(fd), "fscontext:"+fsName), nil
+}
+
+func fsmount(ctx *os.File, flags, mountAttrs int) (*os.File, error) {
+ // Make sure we always set O_CLOEXEC.
+ flags |= unix.FSMOUNT_CLOEXEC
+ fd, err := unix.Fsmount(int(ctx.Fd()), flags, mountAttrs)
+ if err != nil {
+ return nil, os.NewSyscallError("fsmount "+ctx.Name(), err)
+ }
+ return os.NewFile(uintptr(fd), "fsmount:"+ctx.Name()), nil
+}
+
+func newPrivateProcMount() (*os.File, error) {
+ procfsCtx, err := fsopen("proc", unix.FSOPEN_CLOEXEC)
+ if err != nil {
+ return nil, err
+ }
+ defer procfsCtx.Close()
+
+ // Try to configure hidepid=ptraceable,subset=pid if possible, but ignore errors.
+ _ = unix.FsconfigSetString(int(procfsCtx.Fd()), "hidepid", "ptraceable")
+ _ = unix.FsconfigSetString(int(procfsCtx.Fd()), "subset", "pid")
+
+ // Get an actual handle.
+ if err := unix.FsconfigCreate(int(procfsCtx.Fd())); err != nil {
+ return nil, os.NewSyscallError("fsconfig create procfs", err)
+ }
+ return fsmount(procfsCtx, unix.FSMOUNT_CLOEXEC, unix.MS_RDONLY|unix.MS_NODEV|unix.MS_NOEXEC|unix.MS_NOSUID)
+}
+
+func openTree(dir *os.File, path string, flags uint) (*os.File, error) {
+ dirFd := -int(unix.EBADF)
+ dirName := "."
+ if dir != nil {
+ dirFd = int(dir.Fd())
+ dirName = dir.Name()
+ }
+ // Make sure we always set O_CLOEXEC.
+ flags |= unix.OPEN_TREE_CLOEXEC
+ fd, err := unix.OpenTree(dirFd, path, flags)
+ if err != nil {
+ return nil, &os.PathError{Op: "open_tree", Path: path, Err: err}
+ }
+ return os.NewFile(uintptr(fd), dirName+"/"+path), nil
+}
+
+func clonePrivateProcMount() (_ *os.File, Err error) {
+ // Try to make a clone without using AT_RECURSIVE if we can. If this works,
+ // we can be sure there are no over-mounts and so if the root is valid then
+ // we're golden. Otherwise, we have to deal with over-mounts.
+ procfsHandle, err := openTree(nil, "/proc", unix.OPEN_TREE_CLONE)
+ if err != nil || testingForcePrivateProcRootOpenTreeAtRecursive(procfsHandle) {
+ procfsHandle, err = openTree(nil, "/proc", unix.OPEN_TREE_CLONE|unix.AT_RECURSIVE)
+ }
+ if err != nil {
+ return nil, fmt.Errorf("creating a detached procfs clone: %w", err)
+ }
+ defer func() {
+ if Err != nil {
+ _ = procfsHandle.Close()
+ }
+ }()
+ if err := verifyProcRoot(procfsHandle); err != nil {
+ return nil, err
+ }
+ return procfsHandle, nil
+}
+
+func privateProcRoot() (*os.File, error) {
+ if !hasNewMountApi() || testingForceGetProcRootUnsafe() {
+ return nil, fmt.Errorf("new mount api: %w", unix.ENOTSUP)
+ }
+ // Try to create a new procfs mount from scratch if we can. This ensures we
+ // can get a procfs mount even if /proc is fake (for whatever reason).
+ procRoot, err := newPrivateProcMount()
+ if err != nil || testingForcePrivateProcRootOpenTree(procRoot) {
+ // Try to clone /proc then...
+ procRoot, err = clonePrivateProcMount()
+ }
+ return procRoot, err
+}
+
+var (
+ procRootHandle *os.File
+ procRootError error
+ procRootOnce sync.Once
+
+ errUnsafeProcfs = errors.New("unsafe procfs detected")
+)
+
+func unsafeHostProcRoot() (_ *os.File, Err error) {
+ procRoot, err := os.OpenFile("/proc", unix.O_PATH|unix.O_NOFOLLOW|unix.O_DIRECTORY|unix.O_CLOEXEC, 0)
+ if err != nil {
+ return nil, err
+ }
+ defer func() {
+ if Err != nil {
+ _ = procRoot.Close()
+ }
+ }()
+ if err := verifyProcRoot(procRoot); err != nil {
+ return nil, err
+ }
+ return procRoot, nil
+}
+
+func doGetProcRoot() (*os.File, error) {
+ procRoot, err := privateProcRoot()
+ if err != nil {
+ // Fall back to using a /proc handle if making a private mount failed.
+ // If we have openat2, at least we can avoid some kinds of over-mount
+ // attacks, but without openat2 there's not much we can do.
+ procRoot, err = unsafeHostProcRoot()
+ }
+ return procRoot, err
+}
+
+func getProcRoot() (*os.File, error) {
+ procRootOnce.Do(func() {
+ procRootHandle, procRootError = doGetProcRoot()
+ })
+ return procRootHandle, procRootError
+}
+
+var (
+ haveProcThreadSelf bool
+ haveProcThreadSelfOnce sync.Once
+)
+
+type procThreadSelfCloser func()
+
+// procThreadSelf returns a handle to /proc/thread-self/ (or an
+// equivalent handle on older kernels where /proc/thread-self doesn't exist).
+// Once finished with the handle, you must call the returned closer function
+// (runtime.UnlockOSThread). You must not pass the returned *os.File to other
+// Go threads or use the handle after calling the closer.
+//
+// This is similar to ProcThreadSelf from runc, but with extra hardening
+// applied and using *os.File.
+func procThreadSelf(procRoot *os.File, subpath string) (_ *os.File, _ procThreadSelfCloser, Err error) {
+ haveProcThreadSelfOnce.Do(func() {
+ // If the kernel doesn't support thread-self, it doesn't matter which
+ // /proc handle we use.
+ _, err := fstatatFile(procRoot, "thread-self", unix.AT_SYMLINK_NOFOLLOW)
+ haveProcThreadSelf = (err == nil)
+ })
+
+ // We need to lock our thread until the caller is done with the handle
+ // because between getting the handle and using it we could get interrupted
+ // by the Go runtime and hit the case where the underlying thread is
+ // swapped out and the original thread is killed, resulting in
+ // pull-your-hair-out-hard-to-debug issues in the caller.
+ runtime.LockOSThread()
+ defer func() {
+ if Err != nil {
+ runtime.UnlockOSThread()
+ }
+ }()
+
+ // Figure out what prefix we want to use.
+ threadSelf := "thread-self/"
+ if !haveProcThreadSelf || testingForceProcSelfTask() {
+ /// Pre-3.17 kernels don't have /proc/thread-self, so do it manually.
+ threadSelf = "self/task/" + strconv.Itoa(unix.Gettid()) + "/"
+ if _, err := fstatatFile(procRoot, threadSelf, unix.AT_SYMLINK_NOFOLLOW); err != nil || testingForceProcSelf() {
+ // In this case, we running in a pid namespace that doesn't match
+ // the /proc mount we have. This can happen inside runc.
+ //
+ // Unfortunately, there is no nice way to get the correct TID to
+ // use here because of the age of the kernel, so we have to just
+ // use /proc/self and hope that it works.
+ threadSelf = "self/"
+ }
+ }
+
+ // Grab the handle.
+ var (
+ handle *os.File
+ err error
+ )
+ if hasOpenat2() {
+ // We prefer being able to use RESOLVE_NO_XDEV if we can, to be
+ // absolutely sure we are operating on a clean /proc handle that
+ // doesn't have any cheeky overmounts that could trick us (including
+ // symlink mounts on top of /proc/thread-self). RESOLVE_BENEATH isn't
+ // stricly needed, but just use it since we have it.
+ //
+ // NOTE: /proc/self is technically a magic-link (the contents of the
+ // symlink are generated dynamically), but it doesn't use
+ // nd_jump_link() so RESOLVE_NO_MAGICLINKS allows it.
+ //
+ // NOTE: We MUST NOT use RESOLVE_IN_ROOT here, as openat2File uses
+ // procSelfFdReadlink to clean up the returned f.Name() if we use
+ // RESOLVE_IN_ROOT (which would lead to an infinite recursion).
+ handle, err = openat2File(procRoot, threadSelf+subpath, &unix.OpenHow{
+ Flags: unix.O_PATH | unix.O_NOFOLLOW | unix.O_CLOEXEC,
+ Resolve: unix.RESOLVE_BENEATH | unix.RESOLVE_NO_XDEV | unix.RESOLVE_NO_MAGICLINKS,
+ })
+ if err != nil {
+ return nil, nil, fmt.Errorf("%w: %w", errUnsafeProcfs, err)
+ }
+ } else {
+ handle, err = openatFile(procRoot, threadSelf+subpath, unix.O_PATH|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0)
+ if err != nil {
+ return nil, nil, fmt.Errorf("%w: %w", errUnsafeProcfs, err)
+ }
+ defer func() {
+ if Err != nil {
+ _ = handle.Close()
+ }
+ }()
+ // We can't detect bind-mounts of different parts of procfs on top of
+ // /proc (a-la RESOLVE_NO_XDEV), but we can at least be sure that we
+ // aren't on the wrong filesystem here.
+ if statfs, err := fstatfs(handle); err != nil {
+ return nil, nil, err
+ } else if statfs.Type != procSuperMagic {
+ return nil, nil, fmt.Errorf("%w: incorrect /proc/self/fd filesystem type 0x%x", errUnsafeProcfs, statfs.Type)
+ }
+ }
+ return handle, runtime.UnlockOSThread, nil
+}
+
+var (
+ hasStatxMountIdBool bool
+ hasStatxMountIdOnce sync.Once
+)
+
+func hasStatxMountId() bool {
+ hasStatxMountIdOnce.Do(func() {
+ var (
+ stx unix.Statx_t
+ // We don't care which mount ID we get. The kernel will give us the
+ // unique one if it is supported.
+ wantStxMask uint32 = unix.STATX_MNT_ID_UNIQUE | unix.STATX_MNT_ID
+ )
+ err := unix.Statx(-int(unix.EBADF), "/", 0, int(wantStxMask), &stx)
+ hasStatxMountIdBool = (err == nil && (stx.Mask&wantStxMask != 0))
+ })
+ return hasStatxMountIdBool
+}
+
+func getMountId(dir *os.File, path string) (uint64, error) {
+ // If we don't have statx(STATX_MNT_ID*) support, we can't do anything.
+ if !hasStatxMountId() {
+ return 0, nil
+ }
+
+ var (
+ stx unix.Statx_t
+ // We don't care which mount ID we get. The kernel will give us the
+ // unique one if it is supported.
+ wantStxMask uint32 = unix.STATX_MNT_ID_UNIQUE | unix.STATX_MNT_ID
+ )
+
+ err := unix.Statx(int(dir.Fd()), path, unix.AT_EMPTY_PATH|unix.AT_SYMLINK_NOFOLLOW, int(wantStxMask), &stx)
+ if stx.Mask&wantStxMask == 0 {
+ // It's not a kernel limitation, for some reason we couldn't get a
+ // mount ID. Assume it's some kind of attack.
+ err = fmt.Errorf("%w: could not get mount id", errUnsafeProcfs)
+ }
+ if err != nil {
+ return 0, &os.PathError{Op: "statx(STATX_MNT_ID_...)", Path: dir.Name() + "/" + path, Err: err}
+ }
+ return stx.Mnt_id, nil
+}
+
+func checkSymlinkOvermount(procRoot *os.File, dir *os.File, path string) error {
+ // Get the mntId of our procfs handle.
+ expectedMountId, err := getMountId(procRoot, "")
+ if err != nil {
+ return err
+ }
+ // Get the mntId of the target magic-link.
+ gotMountId, err := getMountId(dir, path)
+ if err != nil {
+ return err
+ }
+ // As long as the directory mount is alive, even with wrapping mount IDs,
+ // we would expect to see a different mount ID here. (Of course, if we're
+ // using unsafeHostProcRoot() then an attaker could change this after we
+ // did this check.)
+ if expectedMountId != gotMountId {
+ return fmt.Errorf("%w: symlink %s/%s has an overmount obscuring the real link (mount ids do not match %d != %d)", errUnsafeProcfs, dir.Name(), path, expectedMountId, gotMountId)
+ }
+ return nil
+}
+
+func doRawProcSelfFdReadlink(procRoot *os.File, fd int) (string, error) {
+ fdPath := fmt.Sprintf("fd/%d", fd)
+ procFdLink, closer, err := procThreadSelf(procRoot, fdPath)
+ if err != nil {
+ return "", fmt.Errorf("get safe /proc/thread-self/%s handle: %w", fdPath, err)
+ }
+ defer procFdLink.Close()
+ defer closer()
+
+ // Try to detect if there is a mount on top of the magic-link. Since we use the handle directly
+ // provide to the closure. If the closure uses the handle directly, this
+ // should be safe in general (a mount on top of the path afterwards would
+ // not affect the handle itself) and will definitely be safe if we are
+ // using privateProcRoot() (at least since Linux 5.12[1], when anonymous
+ // mount namespaces were completely isolated from external mounts including
+ // mount propagation events).
+ //
+ // [1]: Linux commit ee2e3f50629f ("mount: fix mounting of detached mounts
+ // onto targets that reside on shared mounts").
+ if err := checkSymlinkOvermount(procRoot, procFdLink, ""); err != nil {
+ return "", fmt.Errorf("check safety of /proc/thread-self/fd/%d magiclink: %w", fd, err)
+ }
+
+ // readlinkat implies AT_EMPTY_PATH since Linux 2.6.39. See Linux commit
+ // 65cfc6722361 ("readlinkat(), fchownat() and fstatat() with empty
+ // relative pathnames").
+ return readlinkatFile(procFdLink, "")
+}
+
+func rawProcSelfFdReadlink(fd int) (string, error) {
+ procRoot, err := getProcRoot()
+ if err != nil {
+ return "", err
+ }
+ return doRawProcSelfFdReadlink(procRoot, fd)
+}
+
+func procSelfFdReadlink(f *os.File) (string, error) {
+ return rawProcSelfFdReadlink(int(f.Fd()))
+}
+
+var (
+ errPossibleBreakout = errors.New("possible breakout detected")
+ errInvalidDirectory = errors.New("wandered into deleted directory")
+ errDeletedInode = errors.New("cannot verify path of deleted inode")
+)
+
+func isDeadInode(file *os.File) error {
+ // If the nlink of a file drops to 0, there is an attacker deleting
+ // directories during our walk, which could result in weird /proc values.
+ // It's better to error out in this case.
+ stat, err := fstat(file)
+ if err != nil {
+ return fmt.Errorf("check for dead inode: %w", err)
+ }
+ if stat.Nlink == 0 {
+ err := errDeletedInode
+ if stat.Mode&unix.S_IFMT == unix.S_IFDIR {
+ err = errInvalidDirectory
+ }
+ return fmt.Errorf("%w %q", err, file.Name())
+ }
+ return nil
+}
+
+func getUmask() int {
+ // umask is a per-thread property, but it is inherited by children, so we
+ // need to lock our OS thread to make sure that no other goroutine runs in
+ // this thread and no goroutines are spawned from this thread until we
+ // revert to the old umask.
+ //
+ // We could parse /proc/self/status to avoid this get-set problem, but
+ // /proc/thread-self requires LockOSThread anyway, so there's no real
+ // benefit over just using umask(2).
+ runtime.LockOSThread()
+ umask := unix.Umask(0)
+ unix.Umask(umask)
+ runtime.UnlockOSThread()
+ return umask
+}
+
+func checkProcSelfFdPath(path string, file *os.File) error {
+ if err := isDeadInode(file); err != nil {
+ return err
+ }
+ actualPath, err := procSelfFdReadlink(file)
+ if err != nil {
+ return fmt.Errorf("get path of handle: %w", err)
+ }
+ if actualPath != path {
+ return fmt.Errorf("%w: handle path %q doesn't match expected path %q", errPossibleBreakout, actualPath, path)
+ }
+ return nil
+}
diff --git a/vendor/github.com/cyphar/filepath-securejoin/testing_mocks_linux.go b/vendor/github.com/cyphar/filepath-securejoin/testing_mocks_linux.go
new file mode 100644
index 000000000..a3aedf03d
--- /dev/null
+++ b/vendor/github.com/cyphar/filepath-securejoin/testing_mocks_linux.go
@@ -0,0 +1,68 @@
+//go:build linux
+
+// Copyright (C) 2024 SUSE LLC. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package securejoin
+
+import (
+ "os"
+ "testing"
+)
+
+type forceGetProcRootLevel int
+
+const (
+ forceGetProcRootDefault forceGetProcRootLevel = iota
+ forceGetProcRootOpenTree // force open_tree()
+ forceGetProcRootOpenTreeAtRecursive // force open_tree(AT_RECURSIVE)
+ forceGetProcRootUnsafe // force open()
+)
+
+var testingForceGetProcRoot *forceGetProcRootLevel
+
+func testingCheckClose(check bool, f *os.File) bool {
+ if check {
+ if f != nil {
+ _ = f.Close()
+ }
+ return true
+ }
+ return false
+}
+
+func testingForcePrivateProcRootOpenTree(f *os.File) bool {
+ return testing.Testing() && testingForceGetProcRoot != nil &&
+ testingCheckClose(*testingForceGetProcRoot >= forceGetProcRootOpenTree, f)
+}
+
+func testingForcePrivateProcRootOpenTreeAtRecursive(f *os.File) bool {
+ return testing.Testing() && testingForceGetProcRoot != nil &&
+ testingCheckClose(*testingForceGetProcRoot >= forceGetProcRootOpenTreeAtRecursive, f)
+}
+
+func testingForceGetProcRootUnsafe() bool {
+ return testing.Testing() && testingForceGetProcRoot != nil &&
+ *testingForceGetProcRoot >= forceGetProcRootUnsafe
+}
+
+type forceProcThreadSelfLevel int
+
+const (
+ forceProcThreadSelfDefault forceProcThreadSelfLevel = iota
+ forceProcSelfTask
+ forceProcSelf
+)
+
+var testingForceProcThreadSelf *forceProcThreadSelfLevel
+
+func testingForceProcSelfTask() bool {
+ return testing.Testing() && testingForceProcThreadSelf != nil &&
+ *testingForceProcThreadSelf >= forceProcSelfTask
+}
+
+func testingForceProcSelf() bool {
+ return testing.Testing() && testingForceProcThreadSelf != nil &&
+ *testingForceProcThreadSelf >= forceProcSelf
+}
diff --git a/vendor/github.com/cyphar/filepath-securejoin/vfs.go b/vendor/github.com/cyphar/filepath-securejoin/vfs.go
index a82a5eae1..6e27c7dd8 100644
--- a/vendor/github.com/cyphar/filepath-securejoin/vfs.go
+++ b/vendor/github.com/cyphar/filepath-securejoin/vfs.go
@@ -1,4 +1,4 @@
-// Copyright (C) 2017 SUSE LLC. All rights reserved.
+// Copyright (C) 2017-2024 SUSE LLC. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
diff --git a/vendor/github.com/docker/cli/AUTHORS b/vendor/github.com/docker/cli/AUTHORS
index 483743c99..d6d23b3de 100644
--- a/vendor/github.com/docker/cli/AUTHORS
+++ b/vendor/github.com/docker/cli/AUTHORS
@@ -2,6 +2,7 @@
# This file lists all contributors to the repository.
# See scripts/docs/generate-authors.sh to make modifications.
+A. Lester Buck III
Aanand Prasad
Aaron L. Xu
Aaron Lehmann
@@ -16,6 +17,7 @@ Adolfo Ochagavía
Adrian Plata
Adrien Duermael
Adrien Folie
+Adyanth Hosavalike
Ahmet Alp Balkan
Aidan Feldman
Aidan Hobson Sayers
@@ -26,7 +28,7 @@ Akim Demaille
Alan Thompson
Albert Callarisa
Alberto Roura
-Albin Kerouanton
+Albin Kerouanton
Aleksa Sarai
Aleksander Piotrowski
Alessandro Boch
@@ -34,6 +36,7 @@ Alex Couture-Beil
Alex Mavrogiannis
Alex Mayer
Alexander Boyd
+Alexander Chneerov
Alexander Larsson
Alexander Morozov
Alexander Ryabov
@@ -41,6 +44,7 @@ Alexandre González
Alexey Igrychev
Alexis Couvreur
Alfred Landrum
+Ali Rostami
Alicia Lauerman
Allen Sun
Alvin Deng
@@ -79,7 +83,9 @@ Arko Dasgupta
Arnaud Porterie
Arnaud Rebillout
Arthur Peka
+Ashly Mathew
Ashwini Oruganti
+Aslam Ahemad
Azat Khuyiyakhmetov
Bardia Keyoumarsi
Barnaby Gray
@@ -98,7 +104,9 @@ Bill Wang
Bin Liu
Bingshen Wang
Bishal Das
+Bjorn Neergaard
Boaz Shuster
+Boban Acimovic
Bogdan Anton
Boris Pruessmann
Brad Baker
@@ -109,6 +117,7 @@ Brent Salisbury
Bret Fisher
Brian (bex) Exelbierd
Brian Goff
+Brian Tracy
Brian Wieder
Bruno Sousa
Bryan Bess
@@ -136,6 +145,7 @@ Chen Chuanliang
Chen Hanxiao
Chen Mingjie
Chen Qiu
+Chris Chinchilla
Chris Couzens
Chris Gavin
Chris Gibson
@@ -163,6 +173,8 @@ Conner Crosby
Corey Farrell
Corey Quon
Cory Bennet
+Cory Snider
+Craig Osterhout
Craig Wilhite
Cristian Staretu
Daehyeok Mun
@@ -171,6 +183,7 @@ Daisuke Ito
dalanlan
Damien Nadé
Dan Cotora
+Danial Gharib
Daniel Artine
Daniel Cassidy
Daniel Dao
@@ -210,6 +223,7 @@ Denis Defreyne
Denis Gladkikh
Denis Ollier
Dennis Docter
+dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Derek McGowan
Des Preston
Deshi Xiao
@@ -232,11 +246,13 @@ DongGeon Lee
Doug Davis
Drew Erny
Ed Costello
+Ed Morley <501702+edmorley@users.noreply.github.com>
Elango Sivanandam
Eli Uriegas
Eli Uriegas
Elias Faxö
Elliot Luo <956941328@qq.com>
+Eric Bode
Eric Curtin
Eric Engestrom
Eric G. Noriega
@@ -254,6 +270,7 @@ Eugene Yakubovich
Evan Allrich
Evan Hazlett
Evan Krall
+Evan Lezar
Evelyn Xu
Everett Toews
Fabio Falci
@@ -275,6 +292,7 @@ Frederik Nordahl Jul Sabroe
Frieder Bluemle
Gabriel Gore
Gabriel Nicolas Avellaneda
+Gabriela Georgieva
Gaetan de Villele
Gang Qiao
Gary Schaetz
@@ -288,6 +306,7 @@ Gleb Stsenov
Goksu Toprak
Gou Rao
Govind Rai
+Graeme Wiebe
Grant Reaber
Greg Pflaum
Gsealy
@@ -311,6 +330,7 @@ Hernan Garcia
Hongbin Lu
Hu Keping
Huayi Zhang
+Hugo Chastel
Hugo Gabriel Eyherabide
huqun
Huu Nguyen
@@ -329,9 +349,12 @@ Ivan Grund
Ivan Markin
Jacob Atzen
Jacob Tomlinson
+Jacopo Rigoli
Jaivish Kothari
Jake Lambert
Jake Sanders
+Jake Stokes
+Jakub Panek
James Nesbitt
James Turnbull
Jamie Hannaford
@@ -408,10 +431,12 @@ Josh Chorlton
Josh Hawn
Josh Horwitz
Josh Soref
+Julian
Julien Barbier
Julien Kassar
Julien Maitrehenry
Justas Brazauskas
+Justin Chadwell
Justin Cormack
Justin Simonelis
Justyn Temme
@@ -434,7 +459,7 @@ Kelton Bassingthwaite
Ken Cochrane
Ken ICHIKAWA
Kenfe-Mickaël Laventure
-Kevin Alvarez
+Kevin Alvarez
Kevin Burke
Kevin Feyrer
Kevin Kern
@@ -454,6 +479,7 @@ Kyle Mitofsky
Lachlan Cooper
Lai Jiangshan
Lars Kellogg-Stedman
+Laura Brehm
Laura Frank
Laurent Erignoux
Lee Gaines
@@ -462,10 +488,10 @@ Lennie
Leo Gallucci
Leonid Skorospelov
Lewis Daly
+Li Fu Bang
Li Yi
Li Yi
Liang-Chi Hsieh