use vlmm optomized for spark
This commit is contained in:
@@ -27,8 +27,8 @@ spec:
|
|||||||
userID: 0
|
userID: 0
|
||||||
groupID: 0
|
groupID: 0
|
||||||
image:
|
image:
|
||||||
repository: vllm/vllm-openai
|
repository: scitrera/dgx-spark-vllm
|
||||||
tag: v0.19.1-cu130
|
tag: 0.17.0-t4
|
||||||
pullPolicy: IfNotPresent
|
pullPolicy: IfNotPresent
|
||||||
env:
|
env:
|
||||||
- name: VLLM_CACHE_ROOT
|
- name: VLLM_CACHE_ROOT
|
||||||
@@ -74,7 +74,6 @@ spec:
|
|||||||
- '{"preserve_thinking": true}'
|
- '{"preserve_thinking": true}'
|
||||||
- --override-generation-config
|
- --override-generation-config
|
||||||
- '{"temperature": 0.6, "top_p": 0.95, "top_k": 20, "min_p": 0.0, "presence_penalty": 0.0, "repetition_penalty": 1.0}'
|
- '{"temperature": 0.6, "top_p": 0.95, "top_k": 20, "min_p": 0.0, "presence_penalty": 0.0, "repetition_penalty": 1.0}'
|
||||||
- --disable-log-requests
|
|
||||||
- --attention-backend
|
- --attention-backend
|
||||||
- FLASHINFER
|
- FLASHINFER
|
||||||
authSecret: hf-api-secret
|
authSecret: hf-api-secret
|
||||||
|
|||||||
Reference in New Issue
Block a user