test qwen

This commit is contained in:
2026-05-05 13:31:20 -05:00
parent 5f7c513090
commit ee5939eef3

View File

@@ -1,7 +1,7 @@
apiVersion: serving.kserve.io/v1beta1 apiVersion: "serving.kserve.io/v1beta1"
kind: InferenceService kind: "InferenceService"
metadata: metadata:
name: huihui-granite name: "qwen-llm"
namespace: kserve namespace: kserve
spec: spec:
predictor: predictor:
@@ -9,14 +9,14 @@ spec:
modelFormat: modelFormat:
name: huggingface name: huggingface
args: args:
- --model_name=huihui-granite - --model_name=qwen
storageUri: "hf://huihui-ai/Huihui-granite-4.1-30b-abliterated" storageUri: "hf://Qwen/Qwen2.5-0.5B-Instruct"
resources: resources:
limits: limits:
cpu: "4" cpu: "2"
memory: 16Gi memory: 6Gi
nvidia.com/gpu: "1" nvidia.com/gpu: "1"
requests: requests:
cpu: "2" cpu: "1"
memory: 8Gi memory: 4Gi
nvidia.com/gpu: "1" nvidia.com/gpu: "1"