Adding sample profile and profile loader

2026-01-25 21:22:45 -05:00
parent 133ed9cfb9
commit 751bc5a47a
6 changed files with 390 additions and 8 deletions
--- a/profiles/example-vllm-minimax.sh
+++ b/profiles/example-vllm-minimax.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+# PROFILE: MiniMax-M2-AWQ Example
+# DESCRIPTION: vLLM serving MiniMax-M2-AWQ with Ray distributed backend
+
+vllm serve QuantTrio/MiniMax-M2-AWQ \
+    --port 8000 \
+    --host 0.0.0.0 \
+    --gpu-memory-utilization 0.7 \
+    -tp 2 \
+    --distributed-executor-backend ray \
+    --max-model-len 128000 \
+    --load-format fastsafetensors \
+    --enable-auto-tool-choice \
+    --tool-call-parser minimax_m2 \
+    --reasoning-parser minimax_m2_append_think