addd parameters

2026-05-07 16:05:19 -05:00
parent 6ef281e06f
commit 64abcb1483
1 changed files with 17 additions and 2 deletions
--- a/clusters/k3s-dgx/nim-service/qwen.yaml
+++ b/clusters/k3s-dgx/nim-service/qwen.yaml
@@ -55,19 +55,34 @@ spec:
  command:
    - python3
  args:
    - -m
    - vllm.entrypoints.openai.api_server
    - --model
    - /model-store
    - --host
    - 0.0.0.0
    - --served-model-name
    - qwen36
    - --gpu-memory-utilization
    - "0.85"
    - --max-model-len
-    - "262144"
+    - "256000"
    - --language-model-only
    - --reasoning-parser
    - qwen3
    - --enable-auto-tool-choice
    - --tool-call-parser
    - qwen3_coder
    - --enable-chunked-prefill
    - --max-num-batched-tokens
    - "32768"
    - --max-num-seqs
    - "10"
    - --enable-prefix-caching
    - --speculative-config
    - '{"method":"mtp","num_speculative_tokens":2}'
  authSecret: hf-api-secret
  storage:
    nimCache: