From db6efb188c8d1e156f5940bf0af6fdcbc5be99ec Mon Sep 17 00:00:00 2001 From: HaimKortovich Date: Mon, 11 May 2026 11:36:17 -0500 Subject: [PATCH] fix quantization --- clusters/k3s-dgx/nim-service/qwen.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clusters/k3s-dgx/nim-service/qwen.yaml b/clusters/k3s-dgx/nim-service/qwen.yaml index 86ff75a..0f1b79f 100644 --- a/clusters/k3s-dgx/nim-service/qwen.yaml +++ b/clusters/k3s-dgx/nim-service/qwen.yaml @@ -67,7 +67,7 @@ spec: - --dtype - "float" # Explicit FP8 encoding - --quantization - - "fbgemm-fp8" # Explicit quantization backend + - "fp8" # Explicit quantization backend authSecret: hf-api-secret storage: sharedMemorySizeLimit: 64Gi