From 0997aa48b73db2ff0d4fbc9470309b146d84999c Mon Sep 17 00:00:00 2001
From: HaimKortovich <haimkortovich88@gmail.com>
Date: Thu, 7 May 2026 11:19:11 -0500
Subject: [PATCH] add max model len

---
 clusters/k3s-dgx/nim-service/qwen.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clusters/k3s-dgx/nim-service/qwen.yaml b/clusters/k3s-dgx/nim-service/qwen.yaml
index 7b22323..7b3344f 100644
--- a/clusters/k3s-dgx/nim-service/qwen.yaml
+++ b/clusters/k3s-dgx/nim-service/qwen.yaml
@@ -35,8 +35,6 @@ spec:
       value: "16384"
     - name: ENABLE_AUTO_TOOL_CHOICE
       value: "true"
-    - name: TOOL_CALL_PARSER
-      value: "hermes"
     - name: ENABLE_PREFIX_CACHING
       value: "true"
     - name: TRUNCATION_SIDE
@@ -45,6 +43,8 @@ spec:
       value: "INFO"
     - name: VLLM_ALLOW_LONG_MAX_MODEL_LEN
       value: "true"
+    - name: NIM_MAX_MODEL_LEN
+      value: "32768"
   image:
     repository: nvcr.io/nim/qwen/qwen3-32b-dgx-spark
     tag: "1.1.0-variant"