diff --git a/recipes/qwen35-35b-a3b-fp8.yaml b/recipes/qwen3.5-35b-a3b-fp8.yaml similarity index 98% rename from recipes/qwen35-35b-a3b-fp8.yaml rename to recipes/qwen3.5-35b-a3b-fp8.yaml index 296146a..e2bc05b 100644 --- a/recipes/qwen35-35b-a3b-fp8.yaml +++ b/recipes/qwen3.5-35b-a3b-fp8.yaml @@ -24,7 +24,7 @@ defaults: host: 0.0.0.0 tensor_parallel: 2 gpu_memory_utilization: 0.7 - max_model_len: 131072 + max_model_len: 262144 max_num_batched_tokens: 16384 # Environment variables