use correct dtype

This commit is contained in:
2026-05-11 11:34:39 -05:00
parent 719a6e3d11
commit 08106b6693

View File

@@ -65,7 +65,7 @@ spec:
- "254" # ↑ Allow more concurrent sequences - "254" # ↑ Allow more concurrent sequences
- --enable-prefix-caching - --enable-prefix-caching
- --dtype - --dtype
- "float8" # Explicit FP8 encoding - "float" # Explicit FP8 encoding
- --quantization - --quantization
- "fbgemm-fp8" # Explicit quantization backend - "fbgemm-fp8" # Explicit quantization backend
- --sv2-transformer-bindings # Skip SA checks (small speedup) - --sv2-transformer-bindings # Skip SA checks (small speedup)