From 719a6e3d11d9c667b69cf5e56a6f3d999baf9fee Mon Sep 17 00:00:00 2001 From: HaimKortovich Date: Mon, 11 May 2026 11:33:45 -0500 Subject: [PATCH] fix list --- clusters/k3s-dgx/nim-service/qwen.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clusters/k3s-dgx/nim-service/qwen.yaml b/clusters/k3s-dgx/nim-service/qwen.yaml index 71c84bb..f08ef2d 100644 --- a/clusters/k3s-dgx/nim-service/qwen.yaml +++ b/clusters/k3s-dgx/nim-service/qwen.yaml @@ -66,9 +66,9 @@ spec: - --enable-prefix-caching - --dtype - "float8" # Explicit FP8 encoding - --quantization + - --quantization - "fbgemm-fp8" # Explicit quantization backend - --sv2-transformer-bindings # Skip SA checks (small speedup) + - --sv2-transformer-bindings # Skip SA checks (small speedup) authSecret: hf-api-secret storage: sharedMemorySizeLimit: 64Gi