set env
This commit is contained in:
@@ -26,12 +26,25 @@ metadata:
|
|||||||
name: qwen3-32b-instruct
|
name: qwen3-32b-instruct
|
||||||
namespace: nim-service
|
namespace: nim-service
|
||||||
spec:
|
spec:
|
||||||
# args:
|
env:
|
||||||
# - --enable-auto-tool-choice
|
- name: GPU_MEMORY_UTILIZATION
|
||||||
# - --gpu-memory-utilization
|
value: "0.90"
|
||||||
# - "0.90"
|
- name: NIM_MAX_NUM_SEQS
|
||||||
# - --tool-call-parser
|
value: "32"
|
||||||
# - hermes
|
- name: NIM_MAX_NUM_BATCHED_TOKENS
|
||||||
|
value: "16384"
|
||||||
|
- name: ENABLE_AUTO_TOOL_CHOICE
|
||||||
|
value: "true"
|
||||||
|
- name: TOOL_CALL_PARSER
|
||||||
|
value: "hermes"
|
||||||
|
- name: ENABLE_PREFIX_CACHING
|
||||||
|
value: "true"
|
||||||
|
- name: TRUNCATION_SIDE
|
||||||
|
value: "left"
|
||||||
|
- name: VLLM_LOGGING_LEVEL
|
||||||
|
value: "INFO"
|
||||||
|
- name: VLLM_ALLOW_LONG_MAX_MODEL_LEN
|
||||||
|
value: "true"
|
||||||
image:
|
image:
|
||||||
repository: nvcr.io/nim/qwen/qwen3-32b-dgx-spark
|
repository: nvcr.io/nim/qwen/qwen3-32b-dgx-spark
|
||||||
tag: "1.1.0-variant"
|
tag: "1.1.0-variant"
|
||||||
|
|||||||
Reference in New Issue
Block a user