Updated Nemotron-3-Super recipe
This commit is contained in:
@@ -9,8 +9,12 @@ container: vllm-node
|
|||||||
cluster_only: false
|
cluster_only: false
|
||||||
solo_only: false
|
solo_only: false
|
||||||
|
|
||||||
mods:
|
# mods:
|
||||||
- mods/nemotron-super
|
# - mods/nemotron-super
|
||||||
|
|
||||||
|
env:
|
||||||
|
VLLM_FLASHINFER_ALLREDUCE_BACKEND: trtllm
|
||||||
|
VLLM_ALLOW_LONG_MAX_MODEL_LEN: 1
|
||||||
|
|
||||||
container: vllm-node
|
container: vllm-node
|
||||||
defaults:
|
defaults:
|
||||||
@@ -35,7 +39,8 @@ command: |
|
|||||||
--enable-auto-tool-choice \
|
--enable-auto-tool-choice \
|
||||||
--load-format fastsafetensors \
|
--load-format fastsafetensors \
|
||||||
--tool-call-parser qwen3_coder \
|
--tool-call-parser qwen3_coder \
|
||||||
--reasoning-parser-plugin super_v3_reasoning_parser.py \
|
--reasoning-parser nemotron_v3 \
|
||||||
--reasoning-parser super_v3 \
|
--mamba_ssm_cache_dtype float32 \
|
||||||
--tensor-parallel-size {tensor_parallel} \
|
--tensor-parallel-size {tensor_parallel} \
|
||||||
|
--attention-backend TRITON_ATTN \
|
||||||
--distributed-executor-backend ray
|
--distributed-executor-backend ray
|
||||||
Reference in New Issue
Block a user