From 64abcb1483adc3395fd85df685a2f64e5affab3a Mon Sep 17 00:00:00 2001 From: HaimKortovich Date: Thu, 7 May 2026 16:05:19 -0500 Subject: [PATCH] addd parameters --- clusters/k3s-dgx/nim-service/qwen.yaml | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/clusters/k3s-dgx/nim-service/qwen.yaml b/clusters/k3s-dgx/nim-service/qwen.yaml index 42b0a3a..03dc7df 100644 --- a/clusters/k3s-dgx/nim-service/qwen.yaml +++ b/clusters/k3s-dgx/nim-service/qwen.yaml @@ -55,19 +55,34 @@ spec: command: - python3 - args: - -m - vllm.entrypoints.openai.api_server - --model - /model-store + - --host + - 0.0.0.0 - --served-model-name - qwen36 - --gpu-memory-utilization - "0.85" - --max-model-len - - "262144" + - "256000" + - --language-model-only + - --reasoning-parser + - qwen3 + - --enable-auto-tool-choice + - --tool-call-parser + - qwen3_coder + - --enable-chunked-prefill + - --max-num-batched-tokens + - "32768" + - --max-num-seqs + - "10" - --enable-prefix-caching + - --speculative-config + - '{"method":"mtp","num_speculative_tokens":2}' + authSecret: hf-api-secret storage: nimCache: