Updated Nemotron-3-Super recipe

2026-03-25 12:44:44 -07:00
parent 3be2fb24a8
commit 3dcd2a90c1
1 changed files with 9 additions and 4 deletions
--- a/recipes/nemotron-3-super-nvfp4.yaml
+++ b/recipes/nemotron-3-super-nvfp4.yaml
@@ -9,8 +9,12 @@ container: vllm-node
 cluster_only: false
 solo_only: false
-mods: 
+# mods: 
-  - mods/nemotron-super
+#   - mods/nemotron-super
 env:
  VLLM_FLASHINFER_ALLREDUCE_BACKEND: trtllm
  VLLM_ALLOW_LONG_MAX_MODEL_LEN: 1
 container: vllm-node
 defaults:
@@ -35,7 +39,8 @@ command: |
  --enable-auto-tool-choice \
  --load-format fastsafetensors \
  --tool-call-parser qwen3_coder \
-  --reasoning-parser-plugin super_v3_reasoning_parser.py \
+  --reasoning-parser nemotron_v3 \
-  --reasoning-parser super_v3 \
+  --mamba_ssm_cache_dtype float32 \
  --tensor-parallel-size {tensor_parallel} \
  --attention-backend TRITON_ATTN \
  --distributed-executor-backend ray