diff --git a/README.md b/README.md index 9cbc8ef..e2ca582 100644 --- a/README.md +++ b/README.md @@ -146,6 +146,8 @@ For periodic maintenance, I recommend using a filter: `docker builder prune --fi ### 2026-02-02 +#### Nemotron Nano mod + Added a mod for nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B support. It supports all Nemotron Nano models/quants using the same reasoning parser. To use, add `--apply-mod mods/nemotron-nano` to `./launch-cluster.sh` arguments. @@ -172,6 +174,38 @@ For example, to run nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-NVFP4 on a single node Please note, that NVFP4 models on Spark are not fully supported on vLLM (any build) yet, so the performance will not be optimal. You will likely see Flashinfer errors during load. This model is also known to crash sometimes. +#### Ability to use launch-cluster.sh with NVIDIA NGC containers + +Added a new mod that enables using cluster launch script with NVIDIA NGC vLLM or any other vLLM container that includes Infiniband libraries and Ray support. + +To use, add `--apply-mod mods/use-ngc-vllm` to `./launch-cluster.sh` arguments. It can be combined with other mods. +For example, to launch Nemotron Nano in the cluster using NGC container, you can use the following command: + +```bash +./launch-cluster.sh \ + -t nvcr.io/nvidia/vllm:26.01-py3 \ + --apply-mod mods/use-ngc-vllm \ + --apply-mod mods/nemotron-nano \ + -e VLLM_USE_FLASHINFER_MOE_FP4=1 \ + -e VLLM_FLASHINFER_MOE_BACKEND=throughput \ + exec vllm serve nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-NVFP4 \ + --max-model-len 262144 \ + --port 8888 --host 0.0.0.0 \ + --trust-remote-code \ + --enable-auto-tool-choice \ + --tool-call-parser qwen3_coder \ + --reasoning-parser-plugin nano_v3_reasoning_parser.py \ + --reasoning-parser nano_v3 \ + --kv-cache-dtype fp8 \ + --gpu-memory-utilization 0.7 \ + --tensor-parallel-size 2 \ + --distributed-executor-backend ray +``` + +Make sure you have the container pulled on both nodes! + +At this point it doesn't seem like NGC container performs any better for this model than a custom build. + ### 2026-01-29 #### New Parameters for launch-cluster.sh diff --git a/launch-cluster.sh b/launch-cluster.sh index f3e645d..cb01b4f 100755 --- a/launch-cluster.sh +++ b/launch-cluster.sh @@ -403,14 +403,15 @@ apply_mod_to_container() { # 3. Run run.sh echo " Running patch script on $node_ip..." - local exec_cmd="cd $container_dest && chmod +x run.sh && ./run.sh" + local local_exec_cmd="export WORKSPACE_DIR=\$PWD && cd $container_dest && chmod +x run.sh && ./run.sh" + local remote_exec_cmd="export WORKSPACE_DIR=\\\$PWD && cd $container_dest && chmod +x run.sh && ./run.sh" local ret_code=0 if [[ "$is_local" == "true" ]]; then - docker exec "$container" bash -c "$exec_cmd" + docker exec "$container" bash -c "$local_exec_cmd" ret_code=$? else - $cmd_prefix docker exec "$container" bash -c "\"$exec_cmd\"" + $cmd_prefix docker exec "$container" bash -c "\"$remote_exec_cmd\"" ret_code=$? fi diff --git a/mods/nemotron-nano/run.sh b/mods/nemotron-nano/run.sh index 8f2d581..ebecaec 100644 --- a/mods/nemotron-nano/run.sh +++ b/mods/nemotron-nano/run.sh @@ -1,4 +1,4 @@ #!/bin/bash set -e -cd $VLLM_BASE_DIR +cd $WORKSPACE_DIR wget https://huggingface.co/nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-NVFP4/resolve/main/nano_v3_reasoning_parser.py diff --git a/mods/use-ngc-vllm/run-cluster-node.sh b/mods/use-ngc-vllm/run-cluster-node.sh new file mode 100755 index 0000000..4143e4d --- /dev/null +++ b/mods/use-ngc-vllm/run-cluster-node.sh @@ -0,0 +1,117 @@ +#!/bin/bash +set -e + +# Define a function to export immediately AND save to .bashrc for future sessions +export_persist() { + local var_name="$1" + local var_value="$2" + + # 1. Export for the current running process + export "$var_name"="$var_value" + + # 2. Append to .bashrc (idempotent check to avoid duplicate lines) + if ! grep -q "export $var_name=" ~/.bashrc; then + echo "export $var_name=\"$var_value\"" >> ~/.bashrc + else + # Optional: Update the existing line if it exists + sed -i "s|export $var_name=.*|export $var_name=\"$var_value\"|" ~/.bashrc + fi +} + +# --- Help Function --- +usage() { + echo "Usage: $0 [OPTIONS]" + echo "" + echo "Required Arguments:" + echo " -r, --role
: Set the node type" + echo " -h, --host-ip