From 47a896d722fa43ec6dcc06e361635b465e6ce298 Mon Sep 17 00:00:00 2001 From: eugr Date: Thu, 26 Mar 2026 22:44:48 -0700 Subject: [PATCH] Removed expert-parallel from 3x-node Qwen --- recipes/3x-spark-cluster/qwen3.5-397b-int4-autoround.yaml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/recipes/3x-spark-cluster/qwen3.5-397b-int4-autoround.yaml b/recipes/3x-spark-cluster/qwen3.5-397b-int4-autoround.yaml index 238a129..a208268 100644 --- a/recipes/3x-spark-cluster/qwen3.5-397b-int4-autoround.yaml +++ b/recipes/3x-spark-cluster/qwen3.5-397b-int4-autoround.yaml @@ -28,9 +28,9 @@ defaults: port: 8000 host: 0.0.0.0 pipeline_parallel: 3 - gpu_memory_utilization: 0.6 + gpu_memory_utilization: 0.7 max_model_len: 262144 - max_num_batched_tokens: 4176 + max_num_batched_tokens: 16384 # Environment variables env: @@ -55,7 +55,6 @@ command: | --chat-template unsloth.jinja \ -tp 1 \ -pp {pipeline_parallel} \ - --enable-expert-parallel \ --distributed-executor-backend ray