use vlmm optomized for spark

This commit is contained in:
2026-05-11 13:07:56 -05:00
parent e991993636
commit c655bca1bf

View File

@@ -27,8 +27,8 @@ spec:
userID: 0
groupID: 0
image:
repository: vllm/vllm-openai
tag: v0.19.1-cu130
repository: scitrera/dgx-spark-vllm
tag: 0.17.0-t4
pullPolicy: IfNotPresent
env:
- name: VLLM_CACHE_ROOT
@@ -74,7 +74,6 @@ spec:
- '{"preserve_thinking": true}'
- --override-generation-config
- '{"temperature": 0.6, "top_p": 0.95, "top_k": 20, "min_p": 0.0, "presence_penalty": 0.0, "repetition_penalty": 1.0}'
- --disable-log-requests
- --attention-backend
- FLASHINFER
authSecret: hf-api-secret