From b70f8063a836183975f02a2a8ce2e69c90b1034d Mon Sep 17 00:00:00 2001 From: HaimKortovich Date: Fri, 8 May 2026 09:42:50 -0500 Subject: [PATCH] fix memory util --- clusters/k3s-dgx/nim-service/qwen.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clusters/k3s-dgx/nim-service/qwen.yaml b/clusters/k3s-dgx/nim-service/qwen.yaml index 6e655da..e8b80b0 100644 --- a/clusters/k3s-dgx/nim-service/qwen.yaml +++ b/clusters/k3s-dgx/nim-service/qwen.yaml @@ -45,7 +45,7 @@ spec: - --served-model-name - Qwen/Qwen3.6-27B-FP8 - --gpu-memory-utilization - - "0.90" + - "0.85" - --max-model-len - "256000" - --language-model-only