From efacbd69f2e1892b00e4788f094893a17b03e350 Mon Sep 17 00:00:00 2001 From: Eugene Rakhmatulin Date: Wed, 25 Mar 2026 12:43:12 -0700 Subject: [PATCH] Updated Nemotron3-Super recipe --- recipes/nemotron-3-super-nvfp4.yaml | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/recipes/nemotron-3-super-nvfp4.yaml b/recipes/nemotron-3-super-nvfp4.yaml index ec790c2..8e2449d 100644 --- a/recipes/nemotron-3-super-nvfp4.yaml +++ b/recipes/nemotron-3-super-nvfp4.yaml @@ -9,8 +9,12 @@ container: vllm-node cluster_only: false solo_only: false -mods: - - mods/nemotron-super +# mods: +# - mods/nemotron-super + +env: + VLLM_FLASHINFER_ALLREDUCE_BACKEND: trtllm + VLLM_ALLOW_LONG_MAX_MODEL_LEN: 1 container: vllm-node defaults: @@ -35,7 +39,8 @@ command: | --enable-auto-tool-choice \ --load-format fastsafetensors \ --tool-call-parser qwen3_coder \ - --reasoning-parser-plugin super_v3_reasoning_parser.py \ - --reasoning-parser super_v3 \ + --reasoning-parser nemotron_v3 \ + --mamba_ssm_cache_dtype float32 \ --tensor-parallel-size {tensor_parallel} \ + --attention-backend TRITON_ATTN \ --distributed-executor-backend ray \ No newline at end of file