use vlmm optomized for spark

This commit is contained in:
2026-05-11 13:07:56 -05:00
parent e991993636
commit c655bca1bf

View File

@@ -27,8 +27,8 @@ spec:
userID: 0 userID: 0
groupID: 0 groupID: 0
image: image:
repository: vllm/vllm-openai repository: scitrera/dgx-spark-vllm
tag: v0.19.1-cu130 tag: 0.17.0-t4
pullPolicy: IfNotPresent pullPolicy: IfNotPresent
env: env:
- name: VLLM_CACHE_ROOT - name: VLLM_CACHE_ROOT
@@ -74,7 +74,6 @@ spec:
- '{"preserve_thinking": true}' - '{"preserve_thinking": true}'
- --override-generation-config - --override-generation-config
- '{"temperature": 0.6, "top_p": 0.95, "top_k": 20, "min_p": 0.0, "presence_penalty": 0.0, "repetition_penalty": 1.0}' - '{"temperature": 0.6, "top_p": 0.95, "top_k": 20, "min_p": 0.0, "presence_penalty": 0.0, "repetition_penalty": 1.0}'
- --disable-log-requests
- --attention-backend - --attention-backend
- FLASHINFER - FLASHINFER
authSecret: hf-api-secret authSecret: hf-api-secret