From c655bca1bf85405d0d626b670b98dabf3d3fca45 Mon Sep 17 00:00:00 2001 From: HaimKortovich Date: Mon, 11 May 2026 13:07:56 -0500 Subject: [PATCH] use vlmm optomized for spark --- clusters/k3s-dgx/nim-service/qwen.yaml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/clusters/k3s-dgx/nim-service/qwen.yaml b/clusters/k3s-dgx/nim-service/qwen.yaml index f3319b6..82ca2f1 100644 --- a/clusters/k3s-dgx/nim-service/qwen.yaml +++ b/clusters/k3s-dgx/nim-service/qwen.yaml @@ -27,8 +27,8 @@ spec: userID: 0 groupID: 0 image: - repository: vllm/vllm-openai - tag: v0.19.1-cu130 + repository: scitrera/dgx-spark-vllm + tag: 0.17.0-t4 pullPolicy: IfNotPresent env: - name: VLLM_CACHE_ROOT @@ -74,7 +74,6 @@ spec: - '{"preserve_thinking": true}' - --override-generation-config - '{"temperature": 0.6, "top_p": 0.95, "top_k": 20, "min_p": 0.0, "presence_penalty": 0.0, "repetition_penalty": 1.0}' - - --disable-log-requests - --attention-backend - FLASHINFER authSecret: hf-api-secret