From 2ad8d8ad346358e9361895251759826e28e58e54 Mon Sep 17 00:00:00 2001 From: limengxuan <391013634@qq.com> Date: Sat, 14 Sep 2024 17:15:51 +0800 Subject: [PATCH 1/3] update docs and yaml Signed-off-by: limengxuan <391013634@qq.com> --- README.md | 21 +++++++++--------- README_cn.md | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+), 10 deletions(-) create mode 100644 README_cn.md diff --git a/README.md b/README.md index c7d6e06..eb7353f 100644 --- a/README.md +++ b/README.md @@ -1,30 +1,30 @@ # Ascend Device Plugin -## 说明 +## Introduction -基于[HAMi](https://github.com/Project-HAMi/HAMi)调度机制的ascend device plugin。 +This Ascend device plugin is implemented for [HAMi](https://github.com/Project-HAMi/HAMi) scheduling. -支持基于显存调度,显存是基于昇腾的虚拟化模板来切分的,会找到满足显存需求的最小模板来作为容器的显存。 +Memory slicing is supported based on virtualization template, lease available template is automatically used. For detailed information, check [templeate](https://github.com/Project-HAMi/HAMi/blob/master/charts/hami/device-spec/ascend-config.yaml) -启动容器依赖[ascend-docker-runtime](https://gitee.com/ascend/ascend-docker-runtime)。 +## Prequisites -## 编译 +[ascend-docker-runtime](https://gitee.com/ascend/ascend-docker-runtime)。 -### 编译二进制文件 +## Compile ```bash make all ``` -### 编译镜像 +### Build ```bash docker buildx build -t $IMAGE_NAME . ``` -## 部署 +## Deployment -由于和HAMi的一些依赖关系,部署集成在HAMi的部署中,修改HAMi chart values中的以下部分即可。 +Due to dependencies with HAMi, the deployment is integrated into the HAMi deployment, you need to set 'devices.ascend.enabled=true'. The device-plugin is automaticaly deployed. For more details ,see 'devices' section in values.yaml. ```yaml devices: @@ -45,7 +45,8 @@ devices: - huawei.com/Ascend310P-memory ``` -## 使用 + +## Usage ```yaml ... diff --git a/README_cn.md b/README_cn.md new file mode 100644 index 0000000..c7d6e06 --- /dev/null +++ b/README_cn.md @@ -0,0 +1,60 @@ +# Ascend Device Plugin + +## 说明 + +基于[HAMi](https://github.com/Project-HAMi/HAMi)调度机制的ascend device plugin。 + +支持基于显存调度,显存是基于昇腾的虚拟化模板来切分的,会找到满足显存需求的最小模板来作为容器的显存。 + +启动容器依赖[ascend-docker-runtime](https://gitee.com/ascend/ascend-docker-runtime)。 + +## 编译 + +### 编译二进制文件 + +```bash +make all +``` + +### 编译镜像 + +```bash +docker buildx build -t $IMAGE_NAME . +``` + +## 部署 + +由于和HAMi的一些依赖关系,部署集成在HAMi的部署中,修改HAMi chart values中的以下部分即可。 + +```yaml +devices: + ascend: + enabled: true + image: "ascend-device-plugin:master" + imagePullPolicy: IfNotPresent + extraArgs: [] + nodeSelector: + ascend: "on" + tolerations: [] + resources: + - huawei.com/Ascend910A + - huawei.com/Ascend910A-memory + - huawei.com/Ascend910B + - huawei.com/Ascend910B-memory + - huawei.com/Ascend310P + - huawei.com/Ascend310P-memory +``` + +## 使用 + +```yaml +... + containers: + - name: npu_pod + ... + resources: + limits: + huawei.com/Ascend910B: "1" + # 不填写显存默认使用整张卡 + huawei.com/Ascend910B-memory: "4096" +``` From 6b73fa00958fba7aaabdad952b8bf7f851f07106 Mon Sep 17 00:00:00 2001 From: limengxuan <391013634@qq.com> Date: Sat, 14 Sep 2024 17:17:51 +0800 Subject: [PATCH 2/3] add device_plugin.yaml Signed-off-by: limengxuan <391013634@qq.com> --- README.md | 2 +- ascend-device-plugin.yaml | 105 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 106 insertions(+), 1 deletion(-) create mode 100644 ascend-device-plugin.yaml diff --git a/README.md b/README.md index eb7353f..9bfe55c 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ This Ascend device plugin is implemented for [HAMi](https://github.com/Project-HAMi/HAMi) scheduling. -Memory slicing is supported based on virtualization template, lease available template is automatically used. For detailed information, check [templeate](https://github.com/Project-HAMi/HAMi/blob/master/charts/hami/device-spec/ascend-config.yaml) +Memory slicing is supported based on virtualization template, lease available template is automatically used. For detailed information, check [templeate](./config.yaml) ## Prequisites diff --git a/ascend-device-plugin.yaml b/ascend-device-plugin.yaml new file mode 100644 index 0000000..435ba4e --- /dev/null +++ b/ascend-device-plugin.yaml @@ -0,0 +1,105 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: hami-ascend +rules: + - apiGroups: [""] + resources: ["pods"] + verbs: ["get", "list", "update", "watch", "patch"] + - apiGroups: [""] + resources: ["nodes"] + verbs: ["get", "patch"] +---- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: hami-ascend +subjects: + - kind: ServiceAccount + name: hami-ascend + namespace: kube-system +roleRef: + kind: ClusterRole + name: hami-ascend + apiGroup: rbac.authorization.k8s.io +---- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: hami-ascend-device-plugin + namespace: kube-system + labels: + app.kubernetes.io/component: hami-ascend-device-plugin +spec: + selector: + matchLabels: + app.kubernetes.io/component: hami-ascend-device-plugin + hami.io/webhook: ignore + template: + metadata: + labels: + app.kubernetes.io/component: hami-ascend-device-plugin + hami.io/webhook: ignore + spec: + priorityClassName: "system-node-critical" + serviceAccountName: hami-ascend + containers: + - image: projecthami/ascend-device-plugin:main + imagePullPolicy: IfNotPresent + name: device-plugin + resources: + requests: + memory: 500Mi + cpu: 500m + limits: + memory: 500Mi + cpu: 500m + args: + - --config_file + - /ascend-config.yaml + securityContext: + privileged: true + readOnlyRootFilesystem: false + volumeMounts: + - name: device-plugin + mountPath: /var/lib/kubelet/device-plugins + - name: pod-resource + mountPath: /var/lib/kubelet/pod-resources + - name: hiai-driver + mountPath: /usr/local/Ascend/driver + readOnly: true + - name: log-path + mountPath: /var/log/mindx-dl/devicePlugin + - name: tmp + mountPath: /tmp + - name: device-config + mountPath: /ascend-config.yaml + subPath: ascend-config.yaml + readOnly: true + env: + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + volumes: + - name: device-plugin + hostPath: + path: /var/lib/kubelet/device-plugins + - name: pod-resource + hostPath: + path: /var/lib/kubelet/pod-resources + - name: hiai-driver + hostPath: + path: /usr/local/Ascend/driver + - name: log-path + hostPath: + path: /var/log/mindx-dl/devicePlugin + type: Directory + - name: tmp + hostPath: + path: /tmp + - name: device-config + configMap: + name: hami-scheduler-device + nodeSelector: + ascend: "on" From ced24cb909af7b89c6b19a8af7616021db0985f1 Mon Sep 17 00:00:00 2001 From: limengxuan <391013634@qq.com> Date: Sat, 14 Sep 2024 17:20:39 +0800 Subject: [PATCH 3/3] add device_plugin.yaml Signed-off-by: limengxuan <391013634@qq.com> --- ascend-device-plugin.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/ascend-device-plugin.yaml b/ascend-device-plugin.yaml index 435ba4e..32316b1 100644 --- a/ascend-device-plugin.yaml +++ b/ascend-device-plugin.yaml @@ -23,6 +23,14 @@ roleRef: name: hami-ascend apiGroup: rbac.authorization.k8s.io ---- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: hami-ascend + namespace: kube-system + labels: + app.kubernetes.io/component: "hami-ascend" +---- apiVersion: apps/v1 kind: DaemonSet metadata: