fix quantization

This commit is contained in:
2026-05-11 11:36:17 -05:00
parent ce65c6435b
commit db6efb188c

View File

@@ -67,7 +67,7 @@ spec:
- --dtype - --dtype
- "float" # Explicit FP8 encoding - "float" # Explicit FP8 encoding
- --quantization - --quantization
- "fbgemm-fp8" # Explicit quantization backend - "fp8" # Explicit quantization backend
authSecret: hf-api-secret authSecret: hf-api-secret
storage: storage:
sharedMemorySizeLimit: 64Gi sharedMemorySizeLimit: 64Gi