Fixed qwen3-coder-next-int4-autoround to exclude Ray
This commit is contained in:
@@ -22,7 +22,6 @@ mods:
|
|||||||
defaults:
|
defaults:
|
||||||
port: 8000
|
port: 8000
|
||||||
host: 0.0.0.0
|
host: 0.0.0.0
|
||||||
tensor_parallel: 2
|
|
||||||
gpu_memory_utilization: 0.7
|
gpu_memory_utilization: 0.7
|
||||||
max_model_len: 262144
|
max_model_len: 262144
|
||||||
|
|
||||||
@@ -40,7 +39,5 @@ command: |
|
|||||||
--port {port} \
|
--port {port} \
|
||||||
--load-format fastsafetensors \
|
--load-format fastsafetensors \
|
||||||
--enable-prefix-caching \
|
--enable-prefix-caching \
|
||||||
--max-model-len {max_model_len} \
|
--max-model-len {max_model_len}
|
||||||
-tp {tensor_parallel} \
|
|
||||||
--distributed-executor-backend ray
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user