diff --git a/clusters/k3s-dgx/nim-service/qwen.yaml b/clusters/k3s-dgx/nim-service/qwen.yaml index 082a330..86ff75a 100644 --- a/clusters/k3s-dgx/nim-service/qwen.yaml +++ b/clusters/k3s-dgx/nim-service/qwen.yaml @@ -68,7 +68,6 @@ spec: - "float" # Explicit FP8 encoding - --quantization - "fbgemm-fp8" # Explicit quantization backend - - --sv2-transformer-bindings # Skip SA checks (small speedup) authSecret: hf-api-secret storage: sharedMemorySizeLimit: 64Gi