diff --git a/clusters/k3s-dgx/nim-service/qwen.yaml b/clusters/k3s-dgx/nim-service/qwen.yaml index 6e655da..e8b80b0 100644 --- a/clusters/k3s-dgx/nim-service/qwen.yaml +++ b/clusters/k3s-dgx/nim-service/qwen.yaml @@ -45,7 +45,7 @@ spec: - --served-model-name - Qwen/Qwen3.6-27B-FP8 - --gpu-memory-utilization - - "0.90" + - "0.85" - --max-model-len - "256000" - --language-model-only