Adding sample profile and profile loader
This commit is contained in:
17
profiles/vllm-glm-4.7-nvfp4.sh
Normal file
17
profiles/vllm-glm-4.7-nvfp4.sh
Normal file
@@ -0,0 +1,17 @@
|
||||
#!/bin/bash
|
||||
# PROFILE: Salyut1/GLM-4.7-NVFP4
|
||||
# DESCRIPTION: vLLM serving GLM-4.7-NVFP4
|
||||
# NOTE: This profile requires --apply-mod mods/fix-Salyut1-GLM-4.7-NVFP4 to fix k/v scales incompatibility
|
||||
# See: https://huggingface.co/Salyut1/GLM-4.7-NVFP4/discussions/3#694ab9b6e2efa04b7ecb0c4b
|
||||
|
||||
vllm serve Salyut1/GLM-4.7-NVFP4 \
|
||||
--attention-config.backend flashinfer \
|
||||
--tool-call-parser glm47 \
|
||||
--reasoning-parser glm45 \
|
||||
--enable-auto-tool-choice \
|
||||
-tp 2 \
|
||||
--gpu-memory-utilization 0.88 \
|
||||
--max-model-len 32000 \
|
||||
--distributed-executor-backend ray \
|
||||
--host 0.0.0.0 \
|
||||
--port 8000
|
||||
Reference in New Issue
Block a user