This commit is contained in:
2026-05-05 11:31:59 -05:00
parent a1b1d936bd
commit f132fc5f0c
9 changed files with 41 additions and 136 deletions

40
.gitignore vendored Normal file
View File

@@ -0,0 +1,40 @@
# FluxCD secrets
*.yaml.enc
*.yaml.dec
flux-gitea-key
flux-gitea-key.pub
# Kubernetes secrets
*-secret.yaml
secrets/
# Temporary files
*.tmp
*.bak
*.swp
*~
# IDE files
.vscode/
.idea/
*.iml
# OS files
.DS_Store
Thumbs.db
# Logs
*.log
logs/
# Build artifacts
dist/
build/
*.tar.gz
# Environment files
.env
.env.local
# Helm
charts/*.tgz

View File

@@ -1,39 +0,0 @@
apiVersion: v1
kind: Namespace
metadata:
name: gpu-operator
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: nvidia-device-plugin-daemonset
namespace: gpu-operator
spec:
selector:
matchLabels:
name: nvidia-device-plugin-ds
template:
metadata:
labels:
name: nvidia-device-plugin-ds
spec:
tolerations:
- key: nvidia.com/gpu
operator: Exists
effect: NoSchedule
containers:
- image: nvcr.io/nvidia/k8s-device-plugin:v0.14.0
name: nvidia-device-plugin-ctr
args: ["--fail-on-init-error=false"]
env:
- name: NVIDIA_VISIBLE_DEVICES
value: "all"
- name: MIG_STRATEGY
value: "single"
volumeMounts:
- name: device-plugin
mountPath: /var/lib/kubelet/device-plugins
volumes:
- name: device-plugin
hostPath:
path: /var/lib/kubelet/device-plugins

View File

@@ -1,16 +0,0 @@
apiVersion: node.k8s.io/v1
kind: RuntimeClass
metadata:
name: nvidia
handler: nvidia
overhead:
podFixed:
memory: "1Gi"
cpu: "500m"
scheduling:
nodeSelector:
nvidia.com/gpu.present: "true"
tolerations:
- key: nvidia.com/gpu
operator: Exists
effect: NoSchedule

View File

@@ -1,42 +0,0 @@
apiVersion: serving.kserve.io/v1beta1
kind: ServingRuntime
metadata:
name: kserve-gpu-runtime
namespace: kserve
spec:
supportedModelFormats:
- name: tensorflow
version: "2"
autoSelect: true
- name: pytorch
version: "2"
autoSelect: true
- name: sklearn
version: "1"
autoSelect: true
- name: xgboost
version: "1"
autoSelect: true
protocol: v1
protocolVersions:
- v1
- v2
containers:
- name: kserve-container
image: kserve/sklearnserver-gpu:latest
resources:
requests:
cpu: "1"
memory: "2Gi"
nvidia.com/gpu: "1"
limits:
cpu: "4"
memory: "8Gi"
nvidia.com/gpu: "1"
env:
- name: NVIDIA_VISIBLE_DEVICES
value: "all"
- name: NVIDIA_DRIVER_CAPABILITIES
value: "compute,utility"
multiModel: false
disabled: false

View File

@@ -34,9 +34,6 @@ spec:
config: config:
ingress: ingress:
className: istio className: istio
storage:
initialCapacity: 10Gi
storageClassName: local-path
knative: knative:
enabled: true enabled: true
istio: istio:

View File

@@ -5,6 +5,3 @@ resources:
- kserve-namespace.yaml - kserve-namespace.yaml
- kserve-controller.yaml - kserve-controller.yaml
- istio-gateway.yaml - istio-gateway.yaml
# - gpu-serving-runtime.yaml
# - model-storage-pvc.yaml
# - storage-config.yaml

View File

@@ -1,12 +0,0 @@
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: model-storage-pvc
namespace: kserve
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 50Gi
storageClassName: local-path

View File

@@ -1,20 +0,0 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: storage-config
namespace: kserve
data:
storageConfig.yaml: |
defaultStorageUri: "pvc://model-storage"
storageSpec:
- name: model-storage
type: pvc
pvcSpec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 50Gi
storageClassName: local-path
modelCacheSize: 10Gi
modelCacheMemory: 2Gi