Files
spark-vllm-docker/profiles/example-vllm-minimax.sh
2026-02-02 10:25:53 -05:00

16 lines
464 B
Bash

#!/bin/bash
# PROFILE: MiniMax-M2-AWQ Example
# DESCRIPTION: vLLM serving MiniMax-M2-AWQ with Ray distributed backend
vllm serve QuantTrio/MiniMax-M2-AWQ \
--port 8000 \
--host 0.0.0.0 \
--gpu-memory-utilization 0.7 \
-tp 2 \
--distributed-executor-backend ray \
--max-model-len 128000 \
--load-format fastsafetensors \
--enable-auto-tool-choice \
--tool-call-parser minimax_m2 \
--reasoning-parser minimax_m2_append_think