This commit is contained in:
2026-05-11 11:33:45 -05:00
parent 855cce3c54
commit 719a6e3d11

View File

@@ -66,9 +66,9 @@ spec:
- --enable-prefix-caching - --enable-prefix-caching
- --dtype - --dtype
- "float8" # Explicit FP8 encoding - "float8" # Explicit FP8 encoding
--quantization - --quantization
- "fbgemm-fp8" # Explicit quantization backend - "fbgemm-fp8" # Explicit quantization backend
--sv2-transformer-bindings # Skip SA checks (small speedup) - --sv2-transformer-bindings # Skip SA checks (small speedup)
authSecret: hf-api-secret authSecret: hf-api-secret
storage: storage:
sharedMemorySizeLimit: 64Gi sharedMemorySizeLimit: 64Gi