fix quantization

2026-05-11 11:36:17 -05:00
parent ce65c6435b
commit db6efb188c
1 changed files with 1 additions and 1 deletions
--- a/clusters/k3s-dgx/nim-service/qwen.yaml
+++ b/clusters/k3s-dgx/nim-service/qwen.yaml
@@ -67,7 +67,7 @@ spec:
    - --dtype
    - "float"                # Explicit FP8 encoding
    - --quantization
-    - "fbgemm-fp8"            # Explicit quantization backend
+    - "fp8"            # Explicit quantization backend
  authSecret: hf-api-secret
  storage:
    sharedMemorySizeLimit: 64Gi