use correct dtype

This commit is contained in:
2026-05-11 11:34:39 -05:00
parent 719a6e3d11
commit 08106b6693

View File

@@ -65,7 +65,7 @@ spec:
- "254" # ↑ Allow more concurrent sequences
- --enable-prefix-caching
- --dtype
- "float8" # Explicit FP8 encoding
- "float" # Explicit FP8 encoding
- --quantization
- "fbgemm-fp8" # Explicit quantization backend
- --sv2-transformer-bindings # Skip SA checks (small speedup)