This commit is contained in:
2026-05-11 11:33:45 -05:00
parent 855cce3c54
commit 719a6e3d11

View File

@@ -66,9 +66,9 @@ spec:
- --enable-prefix-caching
- --dtype
- "float8" # Explicit FP8 encoding
--quantization
- --quantization
- "fbgemm-fp8" # Explicit quantization backend
--sv2-transformer-bindings # Skip SA checks (small speedup)
- --sv2-transformer-bindings # Skip SA checks (small speedup)
authSecret: hf-api-secret
storage:
sharedMemorySizeLimit: 64Gi