addd parameters

This commit is contained in:
2026-05-07 16:05:19 -05:00
parent 6ef281e06f
commit 64abcb1483

View File

@@ -55,19 +55,34 @@ spec:
command: command:
- python3 - python3
args: args:
- -m - -m
- vllm.entrypoints.openai.api_server - vllm.entrypoints.openai.api_server
- --model - --model
- /model-store - /model-store
- --host
- 0.0.0.0
- --served-model-name - --served-model-name
- qwen36 - qwen36
- --gpu-memory-utilization - --gpu-memory-utilization
- "0.85" - "0.85"
- --max-model-len - --max-model-len
- "262144" - "256000"
- --language-model-only
- --reasoning-parser
- qwen3
- --enable-auto-tool-choice
- --tool-call-parser
- qwen3_coder
- --enable-chunked-prefill
- --max-num-batched-tokens
- "32768"
- --max-num-seqs
- "10"
- --enable-prefix-caching - --enable-prefix-caching
- --speculative-config
- '{"method":"mtp","num_speculative_tokens":2}'
authSecret: hf-api-secret authSecret: hf-api-secret
storage: storage:
nimCache: nimCache: