This commit is contained in:
2026-05-07 11:13:23 -05:00
parent 1e7630efe8
commit 923679cb29

View File

@@ -26,12 +26,25 @@ metadata:
name: qwen3-32b-instruct
namespace: nim-service
spec:
# args:
# - --enable-auto-tool-choice
# - --gpu-memory-utilization
# - "0.90"
# - --tool-call-parser
# - hermes
env:
- name: GPU_MEMORY_UTILIZATION
value: "0.90"
- name: NIM_MAX_NUM_SEQS
value: "32"
- name: NIM_MAX_NUM_BATCHED_TOKENS
value: "16384"
- name: ENABLE_AUTO_TOOL_CHOICE
value: "true"
- name: TOOL_CALL_PARSER
value: "hermes"
- name: ENABLE_PREFIX_CACHING
value: "true"
- name: TRUNCATION_SIDE
value: "left"
- name: VLLM_LOGGING_LEVEL
value: "INFO"
- name: VLLM_ALLOW_LONG_MAX_MODEL_LEN
value: "true"
image:
repository: nvcr.io/nim/qwen/qwen3-32b-dgx-spark
tag: "1.1.0-variant"