From f132fc5f0c0042552e233bda35f243bd2403aa0d Mon Sep 17 00:00:00 2001 From: HaimKortovich Date: Tue, 5 May 2026 11:31:59 -0500 Subject: [PATCH] cleanup --- .gitignore | 40 ++++++++++++++++++ .../k3s-dgx/flux-system/gotk-components.yaml | 2 +- .../gpu-support/nvidia-device-plugin.yaml | 39 ----------------- .../k3s-dgx/gpu-support/runtime-class.yaml | 16 ------- .../k3s-dgx/kserve/gpu-serving-runtime.yaml | 42 ------------------- .../k3s-dgx/kserve/kserve-controller.yaml | 3 -- clusters/k3s-dgx/kserve/kustomization.yaml | 3 -- .../k3s-dgx/kserve/model-storage-pvc.yaml | 12 ------ clusters/k3s-dgx/kserve/storage-config.yaml | 20 --------- 9 files changed, 41 insertions(+), 136 deletions(-) create mode 100644 .gitignore delete mode 100644 clusters/k3s-dgx/gpu-support/nvidia-device-plugin.yaml delete mode 100644 clusters/k3s-dgx/gpu-support/runtime-class.yaml delete mode 100644 clusters/k3s-dgx/kserve/gpu-serving-runtime.yaml delete mode 100644 clusters/k3s-dgx/kserve/model-storage-pvc.yaml delete mode 100644 clusters/k3s-dgx/kserve/storage-config.yaml diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ebafdf3 --- /dev/null +++ b/.gitignore @@ -0,0 +1,40 @@ +# FluxCD secrets +*.yaml.enc +*.yaml.dec +flux-gitea-key +flux-gitea-key.pub + +# Kubernetes secrets +*-secret.yaml +secrets/ + +# Temporary files +*.tmp +*.bak +*.swp +*~ + +# IDE files +.vscode/ +.idea/ +*.iml + +# OS files +.DS_Store +Thumbs.db + +# Logs +*.log +logs/ + +# Build artifacts +dist/ +build/ +*.tar.gz + +# Environment files +.env +.env.local + +# Helm +charts/*.tgz \ No newline at end of file diff --git a/clusters/k3s-dgx/flux-system/gotk-components.yaml b/clusters/k3s-dgx/flux-system/gotk-components.yaml index bb5facd..ae1f00e 100644 --- a/clusters/k3s-dgx/flux-system/gotk-components.yaml +++ b/clusters/k3s-dgx/flux-system/gotk-components.yaml @@ -47,4 +47,4 @@ metadata: namespace: flux-system spec: interval: 30m - url: https://nvidia.github.io/k8s-device-plugin \ No newline at end of file + url: https://nvidia.github.io/k8s-device-plugin diff --git a/clusters/k3s-dgx/gpu-support/nvidia-device-plugin.yaml b/clusters/k3s-dgx/gpu-support/nvidia-device-plugin.yaml deleted file mode 100644 index 49d19f5..0000000 --- a/clusters/k3s-dgx/gpu-support/nvidia-device-plugin.yaml +++ /dev/null @@ -1,39 +0,0 @@ -apiVersion: v1 -kind: Namespace -metadata: - name: gpu-operator ---- -apiVersion: apps/v1 -kind: DaemonSet -metadata: - name: nvidia-device-plugin-daemonset - namespace: gpu-operator -spec: - selector: - matchLabels: - name: nvidia-device-plugin-ds - template: - metadata: - labels: - name: nvidia-device-plugin-ds - spec: - tolerations: - - key: nvidia.com/gpu - operator: Exists - effect: NoSchedule - containers: - - image: nvcr.io/nvidia/k8s-device-plugin:v0.14.0 - name: nvidia-device-plugin-ctr - args: ["--fail-on-init-error=false"] - env: - - name: NVIDIA_VISIBLE_DEVICES - value: "all" - - name: MIG_STRATEGY - value: "single" - volumeMounts: - - name: device-plugin - mountPath: /var/lib/kubelet/device-plugins - volumes: - - name: device-plugin - hostPath: - path: /var/lib/kubelet/device-plugins \ No newline at end of file diff --git a/clusters/k3s-dgx/gpu-support/runtime-class.yaml b/clusters/k3s-dgx/gpu-support/runtime-class.yaml deleted file mode 100644 index adc5838..0000000 --- a/clusters/k3s-dgx/gpu-support/runtime-class.yaml +++ /dev/null @@ -1,16 +0,0 @@ -apiVersion: node.k8s.io/v1 -kind: RuntimeClass -metadata: - name: nvidia -handler: nvidia -overhead: - podFixed: - memory: "1Gi" - cpu: "500m" -scheduling: - nodeSelector: - nvidia.com/gpu.present: "true" - tolerations: - - key: nvidia.com/gpu - operator: Exists - effect: NoSchedule \ No newline at end of file diff --git a/clusters/k3s-dgx/kserve/gpu-serving-runtime.yaml b/clusters/k3s-dgx/kserve/gpu-serving-runtime.yaml deleted file mode 100644 index 84a49d9..0000000 --- a/clusters/k3s-dgx/kserve/gpu-serving-runtime.yaml +++ /dev/null @@ -1,42 +0,0 @@ -apiVersion: serving.kserve.io/v1beta1 -kind: ServingRuntime -metadata: - name: kserve-gpu-runtime - namespace: kserve -spec: - supportedModelFormats: - - name: tensorflow - version: "2" - autoSelect: true - - name: pytorch - version: "2" - autoSelect: true - - name: sklearn - version: "1" - autoSelect: true - - name: xgboost - version: "1" - autoSelect: true - protocol: v1 - protocolVersions: - - v1 - - v2 - containers: - - name: kserve-container - image: kserve/sklearnserver-gpu:latest - resources: - requests: - cpu: "1" - memory: "2Gi" - nvidia.com/gpu: "1" - limits: - cpu: "4" - memory: "8Gi" - nvidia.com/gpu: "1" - env: - - name: NVIDIA_VISIBLE_DEVICES - value: "all" - - name: NVIDIA_DRIVER_CAPABILITIES - value: "compute,utility" - multiModel: false - disabled: false \ No newline at end of file diff --git a/clusters/k3s-dgx/kserve/kserve-controller.yaml b/clusters/k3s-dgx/kserve/kserve-controller.yaml index 9c4c0ac..8ef3b21 100644 --- a/clusters/k3s-dgx/kserve/kserve-controller.yaml +++ b/clusters/k3s-dgx/kserve/kserve-controller.yaml @@ -34,9 +34,6 @@ spec: config: ingress: className: istio - storage: - initialCapacity: 10Gi - storageClassName: local-path knative: enabled: true istio: diff --git a/clusters/k3s-dgx/kserve/kustomization.yaml b/clusters/k3s-dgx/kserve/kustomization.yaml index 19447b8..3e1f91a 100644 --- a/clusters/k3s-dgx/kserve/kustomization.yaml +++ b/clusters/k3s-dgx/kserve/kustomization.yaml @@ -5,6 +5,3 @@ resources: - kserve-namespace.yaml - kserve-controller.yaml - istio-gateway.yaml - # - gpu-serving-runtime.yaml - # - model-storage-pvc.yaml - # - storage-config.yaml diff --git a/clusters/k3s-dgx/kserve/model-storage-pvc.yaml b/clusters/k3s-dgx/kserve/model-storage-pvc.yaml deleted file mode 100644 index a7e81a8..0000000 --- a/clusters/k3s-dgx/kserve/model-storage-pvc.yaml +++ /dev/null @@ -1,12 +0,0 @@ -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: model-storage-pvc - namespace: kserve -spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 50Gi - storageClassName: local-path \ No newline at end of file diff --git a/clusters/k3s-dgx/kserve/storage-config.yaml b/clusters/k3s-dgx/kserve/storage-config.yaml deleted file mode 100644 index 7cf2122..0000000 --- a/clusters/k3s-dgx/kserve/storage-config.yaml +++ /dev/null @@ -1,20 +0,0 @@ -apiVersion: v1 -kind: ConfigMap -metadata: - name: storage-config - namespace: kserve -data: - storageConfig.yaml: | - defaultStorageUri: "pvc://model-storage" - storageSpec: - - name: model-storage - type: pvc - pvcSpec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 50Gi - storageClassName: local-path - modelCacheSize: 10Gi - modelCacheMemory: 2Gi \ No newline at end of file