diff --git a/clusters/k3s-dgx/nim-service/qwen.yaml b/clusters/k3s-dgx/nim-service/qwen.yaml index f3319b6..82ca2f1 100644 --- a/clusters/k3s-dgx/nim-service/qwen.yaml +++ b/clusters/k3s-dgx/nim-service/qwen.yaml @@ -27,8 +27,8 @@ spec: userID: 0 groupID: 0 image: - repository: vllm/vllm-openai - tag: v0.19.1-cu130 + repository: scitrera/dgx-spark-vllm + tag: 0.17.0-t4 pullPolicy: IfNotPresent env: - name: VLLM_CACHE_ROOT @@ -74,7 +74,6 @@ spec: - '{"preserve_thinking": true}' - --override-generation-config - '{"temperature": 0.6, "top_p": 0.95, "top_k": 20, "min_p": 0.0, "presence_penalty": 0.0, "repetition_penalty": 1.0}' - - --disable-log-requests - --attention-backend - FLASHINFER authSecret: hf-api-secret