pass args

This commit is contained in:
2026-05-07 15:03:56 -05:00
parent 1a4e73b755
commit c5db9144a9

View File

@@ -46,23 +46,15 @@ metadata:
name: qwen36-27b-fp8 name: qwen36-27b-fp8
namespace: nim-service namespace: nim-service
spec: spec:
args: env:
- "--gpu-memory-utilization=0.85" - name: NIM_PASSTHROUGH_ARGS
- "--served-model-name=qwen36" value: "--gpu-memory-utilization 0.85 --served-model-name qwen36 --max-model-len 256K --language-model-only --reasoning-parser qwen3 --enable-auto-tool-choice --tool-call-parser qwen3_coder --enable-chunked-prefill --max-num-batched-tokens 32768 --max-num-seqs 10 --enable-prefix-caching --speculative-config '{\"method\":\"mtp\",\"num_speculative_tokens\":2}'"
- "--max-model-len=256K"
- "--language-model-only"
- "--reasoning-parser=qwen3"
- "--enable-auto-tool-choice"
- "--tool-call-parser=qwen3_coder"
- "--enable-chunked-prefill"
- "--max-num-batched-tokens=32768"
- "--max-num-seqs=10"
- "--enable-prefix-caching"
- '--speculative-config={"method":"mtp","num_speculative_tokens":2}'
image: image:
repository: vllm/vllm-openai repository: nvcr.io/nim/nvidia/llm-nim
tag: "v0.19.1-cu130" tag: "1.12"
pullPolicy: IfNotPresent pullPolicy: IfNotPresent
pullSecrets:
- ngc-secret
authSecret: hf-api-secret authSecret: hf-api-secret
storage: storage:
nimCache: nimCache: