Changed Nemotron-3-Nano-NVFP4 to Marlin backend

This commit is contained in:
Eugene Rakhmatulin
2026-03-17 13:10:48 -07:00
parent fa645f3e4b
commit b1eeefc0eb
2 changed files with 9 additions and 10 deletions

View File

@@ -25,12 +25,13 @@ defaults:
host: 0.0.0.0
tensor_parallel: 1
gpu_memory_utilization: 0.7
max_model_len: 131072
max_model_len: 262144
# Environment variables
env:
VLLM_USE_FLASHINFER_MOE_FP4: 1
VLLM_FLASHINFER_MOE_BACKEND: "throughput"
VLLM_NVFP4_GEMM_BACKEND: "marlin"
VLLM_TEST_FORCE_FP8_MARLIN: "1"
VLLM_MARLIN_USE_ATOMIC_ADD: "1"
# The vLLM serve command template
command: |
@@ -44,6 +45,5 @@ command: |
--reasoning-parser nano_v3 \
--kv-cache-dtype fp8 \
--enable-prefix-caching \
--attention-backend flashinfer \
--load-format fastsafetensors \
--gpu-memory-utilization {gpu_memory_utilization}

View File

@@ -470,17 +470,16 @@ def generate_launch_script(recipe: dict[str, Any], overrides: dict[str, Any], is
if '--distributed-executor-backend' not in line
]
command = '\n'.join(filtered_lines)
# Remove trailing backslash if present
if command.endswith('\\\n'):
command = command.rstrip('\\\n').rstrip()
# Append extra args if provided (after --)
if extra_args:
# Join extra args and append to command
extra_args_str = ' '.join(shlex.quote(a) for a in extra_args)
command = command.rstrip()
# Handle multi-line commands with backslash continuations
if command.endswith('\\'):
command = command.rstrip('\\').rstrip() + ' \\\n ' + extra_args_str
else:
command = command + ' ' + extra_args_str
command = command + ' ' + extra_args_str
lines.append("# Run the model")
lines.append(command.strip())