diff --git a/clusters/k3s-dgx/nim-service/qwen.yaml b/clusters/k3s-dgx/nim-service/qwen.yaml index 387d29d..7b22323 100644 --- a/clusters/k3s-dgx/nim-service/qwen.yaml +++ b/clusters/k3s-dgx/nim-service/qwen.yaml @@ -26,12 +26,25 @@ metadata: name: qwen3-32b-instruct namespace: nim-service spec: - # args: - # - --enable-auto-tool-choice - # - --gpu-memory-utilization - # - "0.90" - # - --tool-call-parser - # - hermes + env: + - name: GPU_MEMORY_UTILIZATION + value: "0.90" + - name: NIM_MAX_NUM_SEQS + value: "32" + - name: NIM_MAX_NUM_BATCHED_TOKENS + value: "16384" + - name: ENABLE_AUTO_TOOL_CHOICE + value: "true" + - name: TOOL_CALL_PARSER + value: "hermes" + - name: ENABLE_PREFIX_CACHING + value: "true" + - name: TRUNCATION_SIDE + value: "left" + - name: VLLM_LOGGING_LEVEL + value: "INFO" + - name: VLLM_ALLOW_LONG_MAX_MODEL_LEN + value: "true" image: repository: nvcr.io/nim/qwen/qwen3-32b-dgx-spark tag: "1.1.0-variant"