From ee5939eef34f9be75e190caa8d5b546d04e06785 Mon Sep 17 00:00:00 2001 From: HaimKortovich Date: Tue, 5 May 2026 13:31:20 -0500 Subject: [PATCH] test qwen --- .../apps/huihui-granite-inference.yaml | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/clusters/k3s-dgx/apps/huihui-granite-inference.yaml b/clusters/k3s-dgx/apps/huihui-granite-inference.yaml index bad3c46..9a77c59 100644 --- a/clusters/k3s-dgx/apps/huihui-granite-inference.yaml +++ b/clusters/k3s-dgx/apps/huihui-granite-inference.yaml @@ -1,7 +1,7 @@ -apiVersion: serving.kserve.io/v1beta1 -kind: InferenceService +apiVersion: "serving.kserve.io/v1beta1" +kind: "InferenceService" metadata: - name: huihui-granite + name: "qwen-llm" namespace: kserve spec: predictor: @@ -9,14 +9,14 @@ spec: modelFormat: name: huggingface args: - - --model_name=huihui-granite - storageUri: "hf://huihui-ai/Huihui-granite-4.1-30b-abliterated" + - --model_name=qwen + storageUri: "hf://Qwen/Qwen2.5-0.5B-Instruct" resources: limits: - cpu: "4" - memory: 16Gi + cpu: "2" + memory: 6Gi nvidia.com/gpu: "1" requests: - cpu: "2" - memory: 8Gi - nvidia.com/gpu: "1" \ No newline at end of file + cpu: "1" + memory: 4Gi + nvidia.com/gpu: "1"