diff --git a/clusters/k3s-dgx/gpu-support/gpu-node-labels.yaml b/clusters/k3s-dgx/gpu-support/gpu-node-labels.yaml deleted file mode 100644 index 6551695..0000000 --- a/clusters/k3s-dgx/gpu-support/gpu-node-labels.yaml +++ /dev/null @@ -1,13 +0,0 @@ -apiVersion: v1 -kind: ConfigMap -metadata: - name: gpu-node-labels - namespace: gpu-operator -data: - labels.yaml: | - - key: accelerator - value: nvidia-tesla - - key: nvidia.com/gpu.present - value: "true" - - key: topology.kubernetes.io/zone - value: "dgx-spark" \ No newline at end of file diff --git a/clusters/k3s-dgx/gpu-support/gpu-operator-helmrelease.yaml b/clusters/k3s-dgx/gpu-support/gpu-operator-helmrelease.yaml index 4178071..41fa5fb 100644 --- a/clusters/k3s-dgx/gpu-support/gpu-operator-helmrelease.yaml +++ b/clusters/k3s-dgx/gpu-support/gpu-operator-helmrelease.yaml @@ -17,54 +17,8 @@ spec: chart: spec: chart: gpu-operator - version: "v23.9.1" + version: "v26.3.1" sourceRef: kind: HelmRepository name: nvidia namespace: gpu-operator - values: - driver: - enabled: true - image: "nvcr.io/nvidia/driver" - version: "535.129.03" - operator: - defaultRuntime: nvidia-container-runtime - toolkit: - enabled: true - image: "nvcr.io/nvidia/k8s-device-plugin" - version: "v0.14.0" - env: - - name: CONTAINERD_CONFIG - value: /var/lib/rancher/k3s/agent/etc/containerd/config.toml - - name: CONTAINERD_SOCKET - value: /run/k3s/containerd/containerd.sock - - name: CONTAINERD_RUNTIME_CLASS - value: nvidia - devicePlugin: - enabled: true - image: "nvcr.io/nvidia/k8s-device-plugin" - version: "v0.14.0" - resources: - requests: - cpu: 100m - memory: 100Mi - limits: - cpu: 500m - memory: 500Mi - dcgm: - enabled: true - image: "nvcr.io/nvidia/dcgm-exporter" - version: "3.3.3-3.1.0-ubuntu22.04" - resources: - requests: - cpu: 100m - memory: 100Mi - limits: - cpu: 500m - memory: 500Mi - migManager: - enabled: true - gfd: - enabled: true - node-feature-discovery: - enabled: true \ No newline at end of file diff --git a/clusters/k3s-dgx/gpu-support/kustomization.yaml b/clusters/k3s-dgx/gpu-support/kustomization.yaml index 03363b3..fa84033 100644 --- a/clusters/k3s-dgx/gpu-support/kustomization.yaml +++ b/clusters/k3s-dgx/gpu-support/kustomization.yaml @@ -4,4 +4,3 @@ namespace: gpu-operator resources: - gpu-operator-namespace.yaml - gpu-operator-helmrelease.yaml - - gpu-node-labels.yaml \ No newline at end of file diff --git a/clusters/k3s-dgx/kustomization.yaml b/clusters/k3s-dgx/kustomization.yaml index ccbd846..b0fb5f8 100644 --- a/clusters/k3s-dgx/kustomization.yaml +++ b/clusters/k3s-dgx/kustomization.yaml @@ -2,6 +2,6 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: - flux-system - # - gpu-support + - gpu-support # - kserve # - apps