fix list
This commit is contained in:
@@ -63,8 +63,8 @@ spec:
|
|||||||
- "131072" # ↑ 32K→128K — larger decode batches = more tokens/sec
|
- "131072" # ↑ 32K→128K — larger decode batches = more tokens/sec
|
||||||
- --max-num-seqs
|
- --max-num-seqs
|
||||||
- "254" # ↑ Allow more concurrent sequences
|
- "254" # ↑ Allow more concurrent sequences
|
||||||
--enable-prefix-caching
|
- --enable-prefix-caching
|
||||||
--dtype
|
- --dtype
|
||||||
- "float8" # Explicit FP8 encoding
|
- "float8" # Explicit FP8 encoding
|
||||||
--quantization
|
--quantization
|
||||||
- "fbgemm-fp8" # Explicit quantization backend
|
- "fbgemm-fp8" # Explicit quantization backend
|
||||||
|
|||||||
Reference in New Issue
Block a user