use vlmm optomized for spark
This commit is contained in:
@@ -27,8 +27,8 @@ spec:
|
||||
userID: 0
|
||||
groupID: 0
|
||||
image:
|
||||
repository: vllm/vllm-openai
|
||||
tag: v0.19.1-cu130
|
||||
repository: scitrera/dgx-spark-vllm
|
||||
tag: 0.17.0-t4
|
||||
pullPolicy: IfNotPresent
|
||||
env:
|
||||
- name: VLLM_CACHE_ROOT
|
||||
@@ -74,7 +74,6 @@ spec:
|
||||
- '{"preserve_thinking": true}'
|
||||
- --override-generation-config
|
||||
- '{"temperature": 0.6, "top_p": 0.95, "top_k": 20, "min_p": 0.0, "presence_penalty": 0.0, "repetition_penalty": 1.0}'
|
||||
- --disable-log-requests
|
||||
- --attention-backend
|
||||
- FLASHINFER
|
||||
authSecret: hf-api-secret
|
||||
|
||||
Reference in New Issue
Block a user