Skip to content

Commit 78322e5

Browse files
committed
Add optional NFD dependencies for Helm deployment
If nfd.enabled is set to true, the daemonsets are only deployed to nodes with AMD devices (as identified by label feature.node.kubernetes.io/pci-0300_1002.present) Bumping Helm chart version to 0.2.0
1 parent d1ffdf9 commit 78322e5

File tree

5 files changed

+37
-4
lines changed

5 files changed

+37
-4
lines changed

helm/amd-gpu/Chart.yaml

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,5 +15,11 @@ keywords:
1515
- gpu
1616

1717
kubeVersion: ">= 1.18.0"
18-
version: 0.1.0
18+
version: 0.2.0
1919
appVersion: "1.18.0"
20+
21+
dependencies:
22+
- name: node-feature-discovery
23+
version: "0.8.1"
24+
repository: "https://kubernetes-sigs.github.io/node-feature-discovery/charts"
25+
condition: nfd.enabled

helm/amd-gpu/README.md

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,19 @@
11
# AMD GPU Helm Chart
22

3-
![Version: 0.1.0](https://img.shields.io/badge/Version-0.1.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 1.18.0](https://img.shields.io/badge/AppVersion-1.18.0-informational?style=flat-square)
3+
![Version: 0.2.0](https://img.shields.io/badge/Version-0.2.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 1.18.0](https://img.shields.io/badge/AppVersion-1.18.0-informational?style=flat-square)
44

55
A Helm chart for deploying Kubernetes AMD GPU device plugin
66

77
## Requirements
88

99
Kubernetes: `>= 1.18.0`
1010

11+
## Optional Dependencies
12+
13+
| Repository | Name | Version |
14+
|------------|------|---------|
15+
| https://kubernetes-sigs.github.io/node-feature-discovery/charts | node-feature-discovery | 0.8.1 |
16+
1117
## Values
1218

1319
| Key | Type | Default | Description |
@@ -19,6 +25,8 @@ Kubernetes: `>= 1.18.0`
1925
| lbl.image.repository | string | `"docker.io/rocm/k8s-device-plugin"` | |
2026
| lbl.image.tag | string | `"labeller-latest"` | |
2127
| namespace | string | `"kube-system"` | |
28+
| nfd.enabled | bool | `false` | |
29+
| node_selector."feature.node.kubernetes.io/pci-0300_1002.present" | string | `"true"` | |
2230
| securityContext.allowPrivilegeEscalation | bool | `false` | |
2331
| securityContext.capabilities.drop[0] | string | `"ALL"` | |
2432
| tolerations[0].key | string | `"CriticalAddonsOnly"` | |

helm/amd-gpu/templates/deviceplugin-daemonset.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,12 @@ spec:
1818
imagePullSecrets:
1919
{{- toYaml . | nindent 8 }}
2020
{{- end }}
21+
{{- if .Values.nfd.enabled }}
22+
{{- with .Values.node_selector }}
23+
nodeSelector:
24+
{{- toYaml . | nindent 8 }}
25+
{{- end }}
26+
{{- end }}
2127
{{- with .Values.tolerations }}
2228
tolerations:
2329
{{- toYaml . | nindent 8 }}

helm/amd-gpu/templates/labeller.yaml

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,16 @@ spec:
4141
labels:
4242
name: amdgpu-lr-ds
4343
spec:
44+
{{- if .Values.nfd.enabled }}
45+
{{- with .Values.node_selector }}
46+
nodeSelector:
47+
{{- toYaml . | nindent 8 }}
48+
{{- end }}
49+
{{- end }}
50+
{{- with .Values.tolerations }}
4451
tolerations:
45-
- key: CriticalAddonsOnly
46-
operator: Exists
52+
{{- toYaml . | nindent 8 }}
53+
{{- end }}
4754
containers:
4855
- image: {{ .Values.lbl.image.repository }}:{{ .Values.lbl.image.tag }}
4956
name: {{ .Chart.Name }}-lr-cntr

helm/amd-gpu/values.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
nfd:
2+
enabled: false
3+
14
labeller:
25
enabled: false
36

@@ -25,3 +28,6 @@ securityContext:
2528
tolerations:
2629
- key: CriticalAddonsOnly
2730
operator: Exists
31+
32+
node_selector:
33+
feature.node.kubernetes.io/pci-0300_1002.present: "true"

0 commit comments

Comments
 (0)