Adding sample profile and profile loader
This commit is contained in:
15
profiles/example-vllm-minimax.sh
Normal file
15
profiles/example-vllm-minimax.sh
Normal file
@@ -0,0 +1,15 @@
|
||||
#!/bin/bash
|
||||
# PROFILE: MiniMax-M2-AWQ Example
|
||||
# DESCRIPTION: vLLM serving MiniMax-M2-AWQ with Ray distributed backend
|
||||
|
||||
vllm serve QuantTrio/MiniMax-M2-AWQ \
|
||||
--port 8000 \
|
||||
--host 0.0.0.0 \
|
||||
--gpu-memory-utilization 0.7 \
|
||||
-tp 2 \
|
||||
--distributed-executor-backend ray \
|
||||
--max-model-len 128000 \
|
||||
--load-format fastsafetensors \
|
||||
--enable-auto-tool-choice \
|
||||
--tool-call-parser minimax_m2 \
|
||||
--reasoning-parser minimax_m2_append_think
|
||||
Reference in New Issue
Block a user