diff --git a/clusters/k3s-dgx/apps/huihui-granite-inference.yaml b/clusters/k3s-dgx/apps/huihui-granite-inference.yaml index 9a77c59..6ff11bf 100644 --- a/clusters/k3s-dgx/apps/huihui-granite-inference.yaml +++ b/clusters/k3s-dgx/apps/huihui-granite-inference.yaml @@ -1,22 +1,18 @@ apiVersion: "serving.kserve.io/v1beta1" kind: "InferenceService" metadata: - name: "qwen-llm" + name: "flower-sample" namespace: kserve spec: predictor: model: modelFormat: - name: huggingface - args: - - --model_name=qwen - storageUri: "hf://Qwen/Qwen2.5-0.5B-Instruct" + name: tensorflow + storageUri: "gs://kfserving-examples/models/tensorflow/flowers" resources: - limits: - cpu: "2" - memory: 6Gi - nvidia.com/gpu: "1" requests: + cpu: "100m" + memory: "256Mi" + limits: cpu: "1" - memory: 4Gi - nvidia.com/gpu: "1" + memory: "1Gi"