Updated Nemotron-3-Super recipe

This commit is contained in:
Eugene Rakhmatulin
2026-03-25 12:44:44 -07:00
parent 3be2fb24a8
commit 3dcd2a90c1

View File

@@ -9,8 +9,12 @@ container: vllm-node
cluster_only: false cluster_only: false
solo_only: false solo_only: false
mods: # mods:
- mods/nemotron-super # - mods/nemotron-super
env:
VLLM_FLASHINFER_ALLREDUCE_BACKEND: trtllm
VLLM_ALLOW_LONG_MAX_MODEL_LEN: 1
container: vllm-node container: vllm-node
defaults: defaults:
@@ -35,7 +39,8 @@ command: |
--enable-auto-tool-choice \ --enable-auto-tool-choice \
--load-format fastsafetensors \ --load-format fastsafetensors \
--tool-call-parser qwen3_coder \ --tool-call-parser qwen3_coder \
--reasoning-parser-plugin super_v3_reasoning_parser.py \ --reasoning-parser nemotron_v3 \
--reasoning-parser super_v3 \ --mamba_ssm_cache_dtype float32 \
--tensor-parallel-size {tensor_parallel} \ --tensor-parallel-size {tensor_parallel} \
--attention-backend TRITON_ATTN \
--distributed-executor-backend ray --distributed-executor-backend ray