Files
edge-gitops/clusters/k3s-dgx/kserve/gpu-serving-runtime.yaml
2026-05-05 11:15:49 -05:00

42 lines
924 B
YAML

apiVersion: serving.kserve.io/v1beta1
kind: ServingRuntime
metadata:
name: kserve-gpu-runtime
namespace: kserve
spec:
supportedModelFormats:
- name: tensorflow
version: "2"
autoSelect: true
- name: pytorch
version: "2"
autoSelect: true
- name: sklearn
version: "1"
autoSelect: true
- name: xgboost
version: "1"
autoSelect: true
protocol: v1
protocolVersions:
- v1
- v2
containers:
- name: kserve-container
image: kserve/sklearnserver-gpu:latest
resources:
requests:
cpu: "1"
memory: "2Gi"
nvidia.com/gpu: "1"
limits:
cpu: "4"
memory: "8Gi"
nvidia.com/gpu: "1"
env:
- name: NVIDIA_VISIBLE_DEVICES
value: "all"
- name: NVIDIA_DRIVER_CAPABILITIES
value: "compute,utility"
multiModel: false
disabled: false