#!/bin/bash # PROFILE: MiniMax-M2-AWQ Example # DESCRIPTION: vLLM serving MiniMax-M2-AWQ with Ray distributed backend vllm serve QuantTrio/MiniMax-M2-AWQ \ --port 8000 \ --host 0.0.0.0 \ --gpu-memory-utilization 0.7 \ -tp 2 \ --distributed-executor-backend ray \ --max-model-len 128000 \ --load-format fastsafetensors \ --enable-auto-tool-choice \ --tool-call-parser minimax_m2 \ --reasoning-parser minimax_m2_append_think