Fixed qwen3-coder-next-int4-autoround to exclude Ray

This commit is contained in:
Eugene Rakhmatulin
2026-03-11 11:20:56 -07:00
parent 45066e2b16
commit 7ceea85647

View File

@@ -22,7 +22,6 @@ mods:
defaults:
port: 8000
host: 0.0.0.0
tensor_parallel: 2
gpu_memory_utilization: 0.7
max_model_len: 262144
@@ -40,7 +39,5 @@ command: |
--port {port} \
--load-format fastsafetensors \
--enable-prefix-caching \
--max-model-len {max_model_len} \
-tp {tensor_parallel} \
--distributed-executor-backend ray
--max-model-len {max_model_len}