add gpu support

This commit is contained in:
2026-05-05 12:01:39 -05:00
parent 7aa07cadd2
commit d8efb8557f
4 changed files with 2 additions and 62 deletions

View File

@@ -1,13 +0,0 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: gpu-node-labels
namespace: gpu-operator
data:
labels.yaml: |
- key: accelerator
value: nvidia-tesla
- key: nvidia.com/gpu.present
value: "true"
- key: topology.kubernetes.io/zone
value: "dgx-spark"

View File

@@ -17,54 +17,8 @@ spec:
chart:
spec:
chart: gpu-operator
version: "v23.9.1"
version: "v26.3.1"
sourceRef:
kind: HelmRepository
name: nvidia
namespace: gpu-operator
values:
driver:
enabled: true
image: "nvcr.io/nvidia/driver"
version: "535.129.03"
operator:
defaultRuntime: nvidia-container-runtime
toolkit:
enabled: true
image: "nvcr.io/nvidia/k8s-device-plugin"
version: "v0.14.0"
env:
- name: CONTAINERD_CONFIG
value: /var/lib/rancher/k3s/agent/etc/containerd/config.toml
- name: CONTAINERD_SOCKET
value: /run/k3s/containerd/containerd.sock
- name: CONTAINERD_RUNTIME_CLASS
value: nvidia
devicePlugin:
enabled: true
image: "nvcr.io/nvidia/k8s-device-plugin"
version: "v0.14.0"
resources:
requests:
cpu: 100m
memory: 100Mi
limits:
cpu: 500m
memory: 500Mi
dcgm:
enabled: true
image: "nvcr.io/nvidia/dcgm-exporter"
version: "3.3.3-3.1.0-ubuntu22.04"
resources:
requests:
cpu: 100m
memory: 100Mi
limits:
cpu: 500m
memory: 500Mi
migManager:
enabled: true
gfd:
enabled: true
node-feature-discovery:
enabled: true

View File

@@ -4,4 +4,3 @@ namespace: gpu-operator
resources:
- gpu-operator-namespace.yaml
- gpu-operator-helmrelease.yaml
- gpu-node-labels.yaml