16 lines
464 B
Bash
16 lines
464 B
Bash
#!/bin/bash
|
|
# PROFILE: MiniMax-M2-AWQ Example
|
|
# DESCRIPTION: vLLM serving MiniMax-M2-AWQ with Ray distributed backend
|
|
|
|
vllm serve QuantTrio/MiniMax-M2-AWQ \
|
|
--port 8000 \
|
|
--host 0.0.0.0 \
|
|
--gpu-memory-utilization 0.7 \
|
|
-tp 2 \
|
|
--distributed-executor-backend ray \
|
|
--max-model-len 128000 \
|
|
--load-format fastsafetensors \
|
|
--enable-auto-tool-choice \
|
|
--tool-call-parser minimax_m2 \
|
|
--reasoning-parser minimax_m2_append_think
|