set env
This commit is contained in:
@@ -26,12 +26,25 @@ metadata:
|
||||
name: qwen3-32b-instruct
|
||||
namespace: nim-service
|
||||
spec:
|
||||
# args:
|
||||
# - --enable-auto-tool-choice
|
||||
# - --gpu-memory-utilization
|
||||
# - "0.90"
|
||||
# - --tool-call-parser
|
||||
# - hermes
|
||||
env:
|
||||
- name: GPU_MEMORY_UTILIZATION
|
||||
value: "0.90"
|
||||
- name: NIM_MAX_NUM_SEQS
|
||||
value: "32"
|
||||
- name: NIM_MAX_NUM_BATCHED_TOKENS
|
||||
value: "16384"
|
||||
- name: ENABLE_AUTO_TOOL_CHOICE
|
||||
value: "true"
|
||||
- name: TOOL_CALL_PARSER
|
||||
value: "hermes"
|
||||
- name: ENABLE_PREFIX_CACHING
|
||||
value: "true"
|
||||
- name: TRUNCATION_SIDE
|
||||
value: "left"
|
||||
- name: VLLM_LOGGING_LEVEL
|
||||
value: "INFO"
|
||||
- name: VLLM_ALLOW_LONG_MAX_MODEL_LEN
|
||||
value: "true"
|
||||
image:
|
||||
repository: nvcr.io/nim/qwen/qwen3-32b-dgx-spark
|
||||
tag: "1.1.0-variant"
|
||||
|
||||
Reference in New Issue
Block a user