From ce65c6435bedbf09abf3641dc94629273d436f08 Mon Sep 17 00:00:00 2001 From: HaimKortovich Date: Mon, 11 May 2026 11:35:29 -0500 Subject: [PATCH] remove option --- clusters/k3s-dgx/nim-service/qwen.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/clusters/k3s-dgx/nim-service/qwen.yaml b/clusters/k3s-dgx/nim-service/qwen.yaml index 082a330..86ff75a 100644 --- a/clusters/k3s-dgx/nim-service/qwen.yaml +++ b/clusters/k3s-dgx/nim-service/qwen.yaml @@ -68,7 +68,6 @@ spec: - "float" # Explicit FP8 encoding - --quantization - "fbgemm-fp8" # Explicit quantization backend - - --sv2-transformer-bindings # Skip SA checks (small speedup) authSecret: hf-api-secret storage: sharedMemorySizeLimit: 64Gi