This commit is contained in:
2026-05-11 11:32:56 -05:00
parent 9594d6311a
commit 855cce3c54

View File

@@ -63,8 +63,8 @@ spec:
- "131072" # ↑ 32K→128K — larger decode batches = more tokens/sec - "131072" # ↑ 32K→128K — larger decode batches = more tokens/sec
- --max-num-seqs - --max-num-seqs
- "254" # ↑ Allow more concurrent sequences - "254" # ↑ Allow more concurrent sequences
--enable-prefix-caching - --enable-prefix-caching
--dtype - --dtype
- "float8" # Explicit FP8 encoding - "float8" # Explicit FP8 encoding
--quantization --quantization
- "fbgemm-fp8" # Explicit quantization backend - "fbgemm-fp8" # Explicit quantization backend