diff --git a/clusters/k3s-dgx/nim-service/qwen.yaml b/clusters/k3s-dgx/nim-service/qwen.yaml index c2086bb..36738e9 100644 --- a/clusters/k3s-dgx/nim-service/qwen.yaml +++ b/clusters/k3s-dgx/nim-service/qwen.yaml @@ -26,8 +26,10 @@ metadata: spec: args: - --enable-auto-tool-choice + - --gpu-memory-utilization + - "0.90" - --tool-call-parser - - llama3_json + - hermes image: repository: nvcr.io/nim/qwen/qwen3-32b-dgx-spark tag: "1.1.0-variant"