Files
edge-gitops/clusters/k3s-dgx/apps/huihui-granite-inference.yaml
2026-05-05 13:31:20 -05:00

23 lines
474 B
YAML

apiVersion: "serving.kserve.io/v1beta1"
kind: "InferenceService"
metadata:
name: "qwen-llm"
namespace: kserve
spec:
predictor:
model:
modelFormat:
name: huggingface
args:
- --model_name=qwen
storageUri: "hf://Qwen/Qwen2.5-0.5B-Instruct"
resources:
limits:
cpu: "2"
memory: 6Gi
nvidia.com/gpu: "1"
requests:
cpu: "1"
memory: 4Gi
nvidia.com/gpu: "1"