From 0997aa48b73db2ff0d4fbc9470309b146d84999c Mon Sep 17 00:00:00 2001 From: HaimKortovich Date: Thu, 7 May 2026 11:19:11 -0500 Subject: [PATCH] add max model len --- clusters/k3s-dgx/nim-service/qwen.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clusters/k3s-dgx/nim-service/qwen.yaml b/clusters/k3s-dgx/nim-service/qwen.yaml index 7b22323..7b3344f 100644 --- a/clusters/k3s-dgx/nim-service/qwen.yaml +++ b/clusters/k3s-dgx/nim-service/qwen.yaml @@ -35,8 +35,6 @@ spec: value: "16384" - name: ENABLE_AUTO_TOOL_CHOICE value: "true" - - name: TOOL_CALL_PARSER - value: "hermes" - name: ENABLE_PREFIX_CACHING value: "true" - name: TRUNCATION_SIDE @@ -45,6 +43,8 @@ spec: value: "INFO" - name: VLLM_ALLOW_LONG_MAX_MODEL_LEN value: "true" + - name: NIM_MAX_MODEL_LEN + value: "32768" image: repository: nvcr.io/nim/qwen/qwen3-32b-dgx-spark tag: "1.1.0-variant"