Merge branch '3-node' of gitlab.home.eugr.net:ai/spark-vllm into 3-node

2026-03-25 14:18:32 -07:00
parent 1fd8c7afc3 ad2cd3373f
commit 1702f47df6
3 changed files with 548 additions and 231 deletions
--- a/.env.example
+++ b/.env.example
@@ -0,0 +1,29 @@
 # Example .env configuration file for spark-vllm-docker
 # Copy this file to .env and customize for your environment
 # Cluster configuration
 # CLUSTER_NODES: Comma-separated list of node IPs (first node is the head node)
 CLUSTER_NODES="192.168.1.1,192.168.1.2,192.168.1.3"
 # ETH_IF: Ethernet interface name (optional, auto-detected if not specified)
 ETH_IF="eth0"
 # IB_IF: InfiniBand interface name (optional, auto-detected if not specified)
 IB_IF="ib0"
 # MASTER_PORT: Port for cluster coordination (default: 29501)
 MASTER_PORT="29501"
 # CONTAINER_NAME: Container name (default: vllm_node)
 CONTAINER_NAME="vllm_node"
 # Container environment variables
 # Any variable starting with CONTAINER_ will be converted to -e flags
 # Example: CONTAINER_NCCL_DEBUG=INFO becomes -e NCCL_DEBUG=INFO
 CONTAINER_NCCL_DEBUG="INFO"
 CONTAINER_HF_TOKEN="your_huggingface_token_here"
 CONTAINER_NCCL_IGNORE_CPU_AFFINITY="1"
 # Additional container environment variables
 # CONTAINER_MAX_JOBS="16"
 # CONTAINER_CUDA_VISIBLE_DEVICES="0,1"
--- a/launch-cluster.sh
+++ b/launch-cluster.sh
@@ -30,6 +30,7 @@ MOD_PATHS=()
 MOD_TYPES=()
 LAUNCH_SCRIPT_PATH=""
 SCRIPT_DIR="$(dirname "$(realpath "$0")")"
 CONFIG_FILE=""  # Will be set to default after argument parsing
 ACTIONS_ARG=""
 SOLO_MODE="false"
@@ -67,9 +68,27 @@ usage() {
    echo "  --mem-swap-limit-gb Memory+swap limit in GB (default: mem-limit + 10, only with --non-privileged)"
    echo "  --pids-limit    Process limit (default: 4096, only with --non-privileged)"
    echo "  --shm-size-gb   Shared memory size in GB (default: 64, only with --non-privileged)"
    echo "  --config        Path to .env configuration file (default: .env in script directory)"
    echo "  action          start | stop | status | exec (Default: start). Not compatible with --launch-script."
    echo "  command         Command to run (only for 'exec' action). Not compatible with --launch-script."
    echo ""
    echo "Supported .env file variables:"
    echo "  CLUSTER_NODES       Comma-separated list of node IPs"
    echo "  ETH_IF              Ethernet interface name"
    echo "  IB_IF               InfiniBand interface name"
    echo "  MASTER_PORT         Port for cluster coordination (default: 29501)"
    echo "  CONTAINER_NAME      Container name (default: vllm_node)"
    echo "  CONTAINER_*         Any variable starting with CONTAINER_ becomes -e flag"
    echo "                      Example: CONTAINER_NCCL_DEBUG=INFO -> -e NCCL_DEBUG=INFO"
    echo ""
    echo "Example .env file:"
    echo "  CLUSTER_NODES=192.168.1.1,192.168.1.2"
    echo "  ETH_IF=eth0"
    echo "  IB_IF=ib0"
    echo "  MASTER_PORT=29501"
    echo "  CONTAINER_NCCL_DEBUG=INFO"
    echo "  CONTAINER_HF_TOKEN=abc123"
    echo ""
    echo "Launch Script Usage:"
    echo "  $0 --launch-script examples/my-script.sh   # Script copied to container and executed"
    echo "  $0 --launch-script /path/to/script.sh      # Uses absolute path to script"
@@ -108,6 +127,7 @@ while [[ "$#" -gt 0 ]]; do
        --shm-size-gb) SHM_SIZE_GB="$2"; shift ;;
        -d) DAEMON_MODE="true" ;;
        -h|--help) usage ;;
        --config) CONFIG_FILE="$2"; shift ;;
        start|stop|status) 
            if [[ -n "$LAUNCH_SCRIPT_PATH" ]]; then
                echo "Error: Action '$1' is not compatible with --launch-script. Please omit the action or not use --launch-script."
@@ -133,6 +153,108 @@ while [[ "$#" -gt 0 ]]; do
    shift
 done
 # Set .env file path (use default if not specified)
 if [[ -z "$CONFIG_FILE" ]]; then
    CONFIG_FILE="$SCRIPT_DIR/.env"
 fi
 # Load .env file if exists
 if [[ -f "$CONFIG_FILE" ]]; then
    echo "Loading configuration from .env file..."
    # Validate .env file syntax
    if ! python3 -c "
 import sys
 import re
 env_file = '$CONFIG_FILE'
 seen_keys = set()
 with open(env_file, 'r') as f:
    for line_num, line in enumerate(f, 1):
        line = line.strip()
        # Skip empty lines and comments
        if not line or line.startswith('#'):
            continue
        # Check for key=value format
        if '=' not in line:
            print(f'Error: Invalid syntax at line {line_num}: missing \"=\"')
            sys.exit(1)
        key = line.split('=', 1)[0].strip()
        # Validate key format (alphanumeric + underscore)
        if not re.match(r'^[A-Za-z_][A-Za-z0-9_]*$', key):
            print(f'Error: Invalid key format at line {line_num}: {key}')
            sys.exit(1)
        # Check for duplicates
        if key in seen_keys:
            print(f'Error: Duplicate key at line {line_num}: {key}')
            sys.exit(1)
        seen_keys.add(key)
 sys.exit(0)
 " 2>/dev/null; then
        echo "Error: Invalid .env file syntax. Aborting."
        exit 1
    fi
    # Load .env variables with DOTENV_ prefix
    while IFS='=' read -r key value || [[ -n "$key" ]]; do
        # Skip comments and empty lines
        [[ "$key" =~ ^[[:space:]]*# ]] && continue
        [[ -z "$key" ]] && continue
        # Remove leading/trailing whitespace from key
        key=$(echo "$key" | xargs)
        # Skip if key is empty after trimming
        [[ -z "$key" ]] && continue
        # Remove quotes and whitespace from value using Python for proper shlex handling
        value=$(python3 -c "
 import shlex
 import sys
 value = '''$value'''
 # Strip whitespace
 value = value.strip()
 # Remove surrounding quotes if present
 if (value.startswith('\"') and value.endswith('\"')) or (value.startswith(\"'\" ) and value.endswith(\"'\")):
    value = value[1:-1]
 print(value)
 ")
        # Export with DOTENV_ prefix
        export "DOTENV_$key=$value"
    done < "$CONFIG_FILE"
    echo "Loaded .env variables: $(compgen -v DOTENV_ | tr '\n' ' ')"
 fi
 # Apply .env configuration (CLI args take precedence)
 if [[ -z "$NODES_ARG" && -n "$DOTENV_CLUSTER_NODES" ]]; then
    NODES_ARG="$DOTENV_CLUSTER_NODES"
 fi
 if [[ -z "$ETH_IF" && -n "$DOTENV_ETH_IF" ]]; then
    ETH_IF="$DOTENV_ETH_IF"
 fi
 if [[ -z "$IB_IF" && -n "$DOTENV_IB_IF" ]]; then
    IB_IF="$DOTENV_IB_IF"
 fi
 if [[ -z "$MASTER_PORT" || "$MASTER_PORT" == "29501" ]] && [[ -n "$DOTENV_MASTER_PORT" ]]; then
    MASTER_PORT="$DOTENV_MASTER_PORT"
 fi
 if [[ -z "$CONTAINER_NAME" || "$CONTAINER_NAME" == "vllm_node" ]] && [[ -n "$DOTENV_CONTAINER_NAME" ]]; then
    CONTAINER_NAME="$DOTENV_CONTAINER_NAME"
 fi
 # Validate non-privileged mode flags
 if [[ "$NON_PRIVILEGED_MODE" == "true" ]]; then
    # Set default swap limit if not specified
@@ -163,6 +285,22 @@ if [[ -n "$NCCL_DEBUG_VAL" ]]; then
    esac
 fi
 # Add container environment variables from .env (CONTAINER_* pattern)
 for env_var in $(compgen -v DOTENV_CONTAINER_); do
    # Get the value
    value="${!env_var}"
    # Extract the actual env var name (remove DOTENV_CONTAINER_ prefix)
    actual_var="${env_var#DOTENV_CONTAINER_}"
    # Properly escape the value for shell using Python
    escaped_value=$(python3 -c "import shlex; print(shlex.quote('$value'))")
    # Add to docker args
    DOCKER_ARGS="$DOCKER_ARGS -e $actual_var=$escaped_value"
    echo "Adding container env: $actual_var"
 done
 # Add build job parallelization environment variables if BUILD_JOBS is set
 if [[ -n "$BUILD_JOBS" ]]; then
    DOCKER_ARGS="$DOCKER_ARGS -e MAX_JOBS=$BUILD_JOBS"
--- a/run-recipe.py
+++ b/run-recipe.py
@@ -105,7 +105,7 @@ LAUNCH_SCRIPT = SCRIPT_DIR / "launch-cluster.sh"
 BUILD_SCRIPT = SCRIPT_DIR / "build-and-copy.sh"
 DOWNLOAD_SCRIPT = SCRIPT_DIR / "hf-download.sh"
 AUTODISCOVER_SCRIPT = SCRIPT_DIR / "autodiscover.sh"
-ENV_FILE = SCRIPT_DIR / ".env"
+ENV_FILE = None  # Will be set from CLI argument or default
 def load_recipe(recipe_path: Path) -> dict[str, Any]:
@@ -187,7 +187,9 @@ def load_recipe(recipe_path: Path) -> dict[str, Any]:
    SUPPORTED_VERSIONS = ["1"]
    recipe_ver = str(recipe["recipe_version"])
    if recipe_ver not in SUPPORTED_VERSIONS:
-        print(f"Warning: Recipe uses schema version '{recipe_ver}', but this run-recipe.py supports: {SUPPORTED_VERSIONS}")
+        print(
            f"Warning: Recipe uses schema version '{recipe_ver}', but this run-recipe.py supports: {SUPPORTED_VERSIONS}"
        )
        print("Some features may not work correctly. Consider updating run-recipe.py.")
    return recipe
@@ -269,19 +271,27 @@ def check_image_exists(image: str, host: str | None = None) -> bool:
    """
    if host:
        result = subprocess.run(
-            ["ssh", "-o", "BatchMode=yes", "-o", "StrictHostKeyChecking=no", 
+            [
-             host, f"docker image inspect '{image}'"],
+                "ssh",
-            capture_output=True
+                "-o",
                "BatchMode=yes",
                "-o",
                "StrictHostKeyChecking=no",
                host,
                f"docker image inspect '{image}'",
            ],
            capture_output=True,
        )
    else:
        result = subprocess.run(
-            ["docker", "image", "inspect", image],
+            ["docker", "image", "inspect", image], capture_output=True
            capture_output=True
        )
    return result.returncode == 0
-def build_image(image: str, copy_to: list[str] | None = None, build_args: list[str] | None = None) -> bool:
+def build_image(
    image: str, copy_to: list[str] | None = None, build_args: list[str] | None = None
 ) -> bool:
    """
    Build the container image using build-and-copy.sh.
@@ -393,7 +403,13 @@ def check_model_exists(model: str) -> bool:
    return False
-def generate_launch_script(recipe: dict[str, Any], overrides: dict[str, Any], is_solo: bool = False, extra_args: list[str] | None = None, no_ray: bool = False) -> str:
+def generate_launch_script(
    recipe: dict[str, Any],
    overrides: dict[str, Any],
    is_solo: bool = False,
    extra_args: list[str] | None = None,
    no_ray: bool = False,
 ) -> str:
    """
    Generate a bash launch script from the recipe.
@@ -446,7 +462,7 @@ def generate_launch_script(recipe: dict[str, Any], overrides: dict[str, Any], is
    if env_vars:
        lines.append("# Environment variables")
        for key, value in env_vars.items():
-            lines.append(f"export {key}=\"{value}\"")
+            lines.append(f'export {key}="{value}"')
        lines.append("")
    # Format the command with parameters
@@ -462,26 +478,24 @@ def generate_launch_script(recipe: dict[str, Any], overrides: dict[str, Any], is
    # (not needed for solo; no-ray uses PyTorch distributed instead)
    if is_solo or no_ray:
        import re
        # Remove just the flag and its value, not the whole line
-        command = re.sub(r'--distributed-executor-backend\s+\S+', '', command)
+        command = re.sub(r"--distributed-executor-backend\s+\S+", "", command)
        # Remove lines that are now empty or just a backslash continuation
-        lines_list = command.split('\n')
+        lines_list = command.split("\n")
-        filtered_lines = [
+        filtered_lines = [line for line in lines_list if line.strip() not in ("", "\\")]
-            line for line in lines_list
+        command = "\n".join(filtered_lines)
            if line.strip() not in ('', '\\')
        ]
        command = '\n'.join(filtered_lines)
    # Remove trailing backslash if present
    command = command.rstrip()
-    if command.endswith('\\'):
+    if command.endswith("\\"):
-        command = command.rstrip('\\\n').rstrip()
+        command = command.rstrip("\\\n").rstrip()
    # Append extra args if provided (after --)
    if extra_args:
        # Join extra args and append to command
-        extra_args_str = ' '.join(shlex.quote(a) for a in extra_args)
+        extra_args_str = " ".join(shlex.quote(a) for a in extra_args)
-        command = command + ' ' + extra_args_str
+        command = command + " " + extra_args_str
    lines.append("# Run the model")
    lines.append(command.strip())
@@ -631,11 +645,7 @@ def run_autodiscover() -> dict[str, str] | None:
        echo "IB_IF=$IB_IF"
    """
-    result = subprocess.run(
+    result = subprocess.run(["bash", "-c", script], capture_output=True, text=True)
        ["bash", "-c", script],
        capture_output=True,
        text=True
    )
    if result.returncode != 0:
        print("Autodiscover output:")
@@ -649,7 +659,10 @@ def run_autodiscover() -> dict[str, str] | None:
    output_lines = result.stdout.strip().split("\n")
    env = {}
    for line in output_lines:
-        if "=" in line and any(line.startswith(k) for k in ["CLUSTER_NODES=", "LOCAL_IP=", "ETH_IF=", "IB_IF="]):
+        if "=" in line and any(
            line.startswith(k)
            for k in ["CLUSTER_NODES=", "LOCAL_IP=", "ETH_IF=", "IB_IF="]
        ):
            key, _, value = line.partition("=")
            env[key] = value
        else:
@@ -692,7 +705,9 @@ def run_autodiscover() -> dict[str, str] | None:
                print(f"Only one node selected: {selected_nodes[0]}")
                print("This will run in solo mode (single node).")
            else:
-                print(f"Selected {len(selected_nodes)} nodes: {', '.join(selected_nodes)}")
+                print(
                    f"Selected {len(selected_nodes)} nodes: {', '.join(selected_nodes)}"
                )
            env["CLUSTER_NODES"] = ",".join(selected_nodes)
            print()
@@ -757,18 +772,16 @@ Examples:
  # Show current .env configuration
  %(prog)s --show-env
-        """
+        """,
    )
    parser.add_argument(
        "recipe",
        nargs="?",
-        help="Path to recipe YAML file (or just the name without .yaml)"
+        help="Path to recipe YAML file (or just the name without .yaml)",
    )
    parser.add_argument(
-        "--list", "-l",
+        "--list", "-l", action="store_true", help="List available recipes"
        action="store_true",
        help="List available recipes"
    )
    # Setup options
@@ -776,87 +789,194 @@ Examples:
    setup_group.add_argument(
        "--setup",
        action="store_true",
-        help="Full setup: build container (if missing) + download model (if missing) + run"
+        help="Full setup: build container (if missing) + download model (if missing) + run",
    )
    setup_group.add_argument(
        "--build-only",
        action="store_true",
-        help="Only build/copy the container image, don't run"
+        help="Only build/copy the container image, don't run",
    )
    setup_group.add_argument(
        "--download-only",
        action="store_true",
-        help="Only download/copy the model, don't run"
+        help="Only download/copy the model, don't run",
    )
    setup_group.add_argument(
-        "--force-build",
+        "--force-build", action="store_true", help="Force rebuild even if image exists"
        action="store_true",
        help="Force rebuild even if image exists"
    )
    setup_group.add_argument(
        "--force-download",
        action="store_true",
-        help="Force re-download even if model exists"
+        help="Force re-download even if model exists",
    )
    parser.add_argument(
        "--dry-run",
        action="store_true",
-        help="Show what would be executed without running"
+        help="Show what would be executed without running",
    )
    # Override options
    override_group = parser.add_argument_group("Recipe overrides")
    override_group.add_argument("--port", type=int, help="Override port")
    override_group.add_argument("--host", help="Override host")
-    override_group.add_argument("--tensor-parallel", "--tp", type=int, dest="tensor_parallel", help="Override tensor parallelism")
+    override_group.add_argument(
-    override_group.add_argument("--gpu-memory-utilization", "--gpu-mem", type=float, dest="gpu_memory_utilization", help="Override GPU memory utilization")
+        "--tensor-parallel",
-    override_group.add_argument("--max-model-len", type=int, dest="max_model_len", help="Override max model length")
+        "--tp",
        type=int,
        dest="tensor_parallel",
        help="Override tensor parallelism",
    )
    override_group.add_argument(
        "--gpu-memory-utilization",
        "--gpu-mem",
        type=float,
        dest="gpu_memory_utilization",
        help="Override GPU memory utilization",
    )
    override_group.add_argument(
        "--max-model-len",
        type=int,
        dest="max_model_len",
        help="Override max model length",
    )
    # Launch options (passed to launch-cluster.sh)
-    launch_group = parser.add_argument_group("Launch options (passed to launch-cluster.sh)")
+    launch_group = parser.add_argument_group(
-    launch_group.add_argument("--solo", action="store_true", help="Run in solo mode (single node, no Ray)")
+        "Launch options (passed to launch-cluster.sh)"
-    launch_group.add_argument("-n", "--nodes", help="Comma-separated list of node IPs (first is head node)")
+    )
-    launch_group.add_argument("-d", "--daemon", action="store_true", help="Run in daemon mode")
+    launch_group.add_argument(
-    launch_group.add_argument("-t", "--container", dest="container_override", help="Override container image from recipe")
+        "--solo", action="store_true", help="Run in solo mode (single node, no Ray)"
-    launch_group.add_argument("--nccl-debug", choices=["VERSION", "WARN", "INFO", "TRACE"], help="NCCL debug level")
+    )
-    launch_group.add_argument("-e", "--env", action="append", dest="env_vars", default=[], metavar="VAR=VALUE", help="Environment variable to pass to container (e.g. -e HF_TOKEN=xxx). Can be used multiple times.")
+    launch_group.add_argument(
        "-n", "--nodes", help="Comma-separated list of node IPs (first is head node)"
    )
    launch_group.add_argument(
        "-d", "--daemon", action="store_true", help="Run in daemon mode"
    )
    launch_group.add_argument(
        "-t",
        "--container",
        dest="container_override",
        help="Override container image from recipe",
    )
    launch_group.add_argument(
        "--nccl-debug",
        choices=["VERSION", "WARN", "INFO", "TRACE"],
        help="NCCL debug level",
    )
    launch_group.add_argument(
        "-e",
        "--env",
        action="append",
        dest="env_vars",
        default=[],
        metavar="VAR=VALUE",
        help="Environment variable to pass to container (e.g. -e HF_TOKEN=xxx). Can be used multiple times.",
    )
    launch_group.add_argument(
        "--no-ray",
        action="store_true",
        dest="no_ray",
-        help="No-Ray mode: run multi-node vLLM without Ray (uses PyTorch distributed backend)"
+        help="No-Ray mode: run multi-node vLLM without Ray (uses PyTorch distributed backend)",
    )
    launch_group.add_argument(
        "--master-port",
        "--head-port",
        type=int,
        dest="master_port",
        help="Port for cluster coordination (Ray head port or PyTorch distributed master port, default: 29501)",
    )
    launch_group.add_argument(
        "--name",
        dest="container_name",
        help="Override container name (default: vllm_node)",
    )
    launch_group.add_argument(
        "--eth-if",
        dest="eth_if",
        help="Ethernet interface (overrides .env and auto-detection)",
    )
    launch_group.add_argument(
        "--ib-if",
        dest="ib_if",
        help="InfiniBand interface (overrides .env and auto-detection)",
    )
    launch_group.add_argument(
        "-j",
        dest="build_jobs",
        type=int,
        metavar="N",
        help="Number of parallel build jobs inside container",
    )
    launch_group.add_argument(
        "--no-cache-dirs",
        action="store_true",
        dest="no_cache_dirs",
        help="Do not mount ~/.cache/vllm, ~/.cache/flashinfer, ~/.triton",
    )
    launch_group.add_argument(
        "--non-privileged",
        action="store_true",
        dest="non_privileged",
        help="Run in non-privileged mode (removes --privileged and --ipc=host)",
    )
    launch_group.add_argument(
        "--mem-limit-gb",
        type=int,
        dest="mem_limit_gb",
        help="Memory limit in GB (only with --non-privileged)",
    )
    launch_group.add_argument(
        "--mem-swap-limit-gb",
        type=int,
        dest="mem_swap_limit_gb",
        help="Memory+swap limit in GB (only with --non-privileged)",
    )
    launch_group.add_argument(
        "--pids-limit",
        type=int,
        dest="pids_limit",
        help="Process limit (only with --non-privileged, default: 4096)",
    )
    launch_group.add_argument(
        "--shm-size-gb",
        type=int,
        dest="shm_size_gb",
        help="Shared memory size in GB (only with --non-privileged, default: 64)",
    )
    # Config file option
    parser.add_argument(
        "--config",
        dest="config_file",
        metavar="FILE",
        help="Path to .env configuration file (default: .env in script directory)",
    )
    launch_group.add_argument("--master-port", "--head-port", type=int, dest="master_port", help="Port for cluster coordination (Ray head port or PyTorch distributed master port, default: 29501)")
    launch_group.add_argument("--name", dest="container_name", help="Override container name (default: vllm_node)")
    launch_group.add_argument("--eth-if", dest="eth_if", help="Ethernet interface (overrides .env and auto-detection)")
    launch_group.add_argument("--ib-if", dest="ib_if", help="InfiniBand interface (overrides .env and auto-detection)")
    launch_group.add_argument("-j", dest="build_jobs", type=int, metavar="N", help="Number of parallel build jobs inside container")
    launch_group.add_argument("--no-cache-dirs", action="store_true", dest="no_cache_dirs", help="Do not mount ~/.cache/vllm, ~/.cache/flashinfer, ~/.triton")
    launch_group.add_argument("--non-privileged", action="store_true", dest="non_privileged", help="Run in non-privileged mode (removes --privileged and --ipc=host)")
    launch_group.add_argument("--mem-limit-gb", type=int, dest="mem_limit_gb", help="Memory limit in GB (only with --non-privileged)")
    launch_group.add_argument("--mem-swap-limit-gb", type=int, dest="mem_swap_limit_gb", help="Memory+swap limit in GB (only with --non-privileged)")
    launch_group.add_argument("--pids-limit", type=int, dest="pids_limit", help="Process limit (only with --non-privileged, default: 4096)")
    launch_group.add_argument("--shm-size-gb", type=int, dest="shm_size_gb", help="Shared memory size in GB (only with --non-privileged, default: 64)")
    # Cluster discovery options
    discover_group = parser.add_argument_group("Cluster discovery")
    discover_group.add_argument(
        "--discover",
        action="store_true",
-        help="Auto-detect cluster nodes and save to .env file"
+        help="Auto-detect cluster nodes and save to .env file",
    )
    discover_group.add_argument(
-        "--show-env",
+        "--show-env", action="store_true", help="Show current .env configuration"
        action="store_true", 
        help="Show current .env configuration"
    )
    # Use parse_known_args to allow extra vLLM arguments after --
    args, extra_args = parser.parse_known_args()
    # Set .env file path (use default if not specified)
    global ENV_FILE
    if args.config_file:
        ENV_FILE = Path(args.config_file).resolve()
    else:
        ENV_FILE = SCRIPT_DIR / ".env"
    # Filter out the -- separator if present
-    if extra_args and extra_args[0] == '--':
+    if extra_args and extra_args[0] == "--":
        extra_args = extra_args[1:]
    # Handle --discover (can be run with or without a recipe)
@@ -941,7 +1061,13 @@ Examples:
                    if nodes:
                        # Ask if user wants to save to .env
                        print()
-                        response = input("Save this configuration to .env for future use? [Y/n]: ").strip().lower()
+                        response = (
                            input(
                                "Save this configuration to .env for future use? [Y/n]: "
                            )
                            .strip()
                            .lower()
                        )
                        if response in ("", "y", "yes"):
                            save_env_file(discovered_env)
                        print()
@@ -963,8 +1089,10 @@ Examples:
    solo_only = recipe.get("solo_only", False)
    is_solo = args.solo or not is_cluster
-    if getattr(args, 'no_ray', False) and is_solo:
+    if getattr(args, "no_ray", False) and is_solo:
-        print("Error: --no-ray is incompatible with --solo. Solo mode already runs without Ray.")
+        print(
            "Error: --no-ray is incompatible with --solo. Solo mode already runs without Ray."
        )
        return 1
    if cluster_only and is_solo:
@@ -972,7 +1100,9 @@ Examples:
        print(f"This model is too large to run on a single node.")
        print()
        print("Options:")
-        print(f"  1. Specify nodes directly:  {sys.argv[0]} {args.recipe} -n node1,node2")
+        print(
            f"  1. Specify nodes directly:  {sys.argv[0]} {args.recipe} -n node1,node2"
        )
        print(f"  2. Auto-discover and save:  {sys.argv[0]} --discover")
        print(f"     Then run:                {sys.argv[0]} {args.recipe}")
        return 1
@@ -1007,9 +1137,13 @@ Examples:
                print(f"  Workers: {', '.join(worker_nodes)}")
        print(f"Solo mode: {is_solo}")
        if eth_if:
-            print(f"Ethernet interface: {eth_if}{' (from .env)' if not args.eth_if else ''}")
+            print(
                f"Ethernet interface: {eth_if}{' (from .env)' if not args.eth_if else ''}"
            )
        if ib_if:
-            print(f"InfiniBand interface: {ib_if}{' (from .env)' if not args.ib_if else ''}")
+            print(
                f"InfiniBand interface: {ib_if}{' (from .env)' if not args.ib_if else ''}"
            )
        if args.container_name:
            print(f"Container name: {args.container_name}")
        if args.non_privileged:
@@ -1099,7 +1233,7 @@ Examples:
        print(f"  2. Build manually: ./build-and-copy.sh -t {container}")
        print()
        response = input("Build now? [y/N] ").strip().lower()
-        if response == 'y':
+        if response == "y":
            if not build_image(container, copy_targets, build_args):
                print("Error: Failed to build image")
                return 1
@@ -1109,7 +1243,13 @@ Examples:
    # Build overrides from CLI args
    overrides = {}
-    for key in ["port", "host", "tensor_parallel", "gpu_memory_utilization", "max_model_len"]:
+    for key in [
        "port",
        "host",
        "tensor_parallel",
        "gpu_memory_utilization",
        "max_model_len",
    ]:
        value = getattr(args, key, None)
        if value is not None:
            overrides[key] = value
@@ -1122,24 +1262,34 @@ Examples:
    if extra_args:
        # Map vLLM flags to our override keys
        flag_to_override = {
-            '--port': 'port',
+            "--port": "port",
-            '--host': 'host',
+            "--host": "host",
-            '--tensor-parallel-size': 'tensor_parallel',
+            "--tensor-parallel-size": "tensor_parallel",
-            '-tp': 'tensor_parallel',
+            "-tp": "tensor_parallel",
-            '--gpu-memory-utilization': 'gpu_memory_utilization',
+            "--gpu-memory-utilization": "gpu_memory_utilization",
-            '--max-model-len': 'max_model_len',
+            "--max-model-len": "max_model_len",
        }
        for i, arg in enumerate(extra_args):
            # Check both exact flag and =value syntax
-            flag = arg.split('=')[0] if '=' in arg else arg
+            flag = arg.split("=")[0] if "=" in arg else arg
            if flag in flag_to_override:
                override_key = flag_to_override[flag]
                if override_key in overrides:
-                    print(f"Warning: '{arg}' in extra args duplicates --{override_key.replace('_', '-')} override")
+                    print(
-                    print(f"         vLLM uses last value; extra args appear after template substitution")
+                        f"Warning: '{arg}' in extra args duplicates --{override_key.replace('_', '-')} override"
                    )
                    print(
                        f"         vLLM uses last value; extra args appear after template substitution"
                    )
    # Generate launch script
-    script_content = generate_launch_script(recipe, overrides, is_solo=is_solo, extra_args=extra_args, no_ray=getattr(args, 'no_ray', False))
+    script_content = generate_launch_script(
        recipe,
        overrides,
        is_solo=is_solo,
        extra_args=extra_args,
        no_ray=getattr(args, "no_ray", False),
    )
    if args.dry_run:
        print("=== Generated Launch Script ===")
@@ -1158,7 +1308,7 @@ Examples:
            cmd_parts.append("--solo")
        if args.daemon:
            cmd_parts.append("-d")
-        if getattr(args, 'no_ray', False):
+        if getattr(args, "no_ray", False):
            cmd_parts.append("--no-ray")
        if nodes:
            cmd_parts.extend(["-n", ",".join(nodes)])
@@ -1195,7 +1345,7 @@ Examples:
        return 0
    # Write temporary launch script
-    with tempfile.NamedTemporaryFile(mode='w', suffix='.sh', delete=False) as f:
+    with tempfile.NamedTemporaryFile(mode="w", suffix=".sh", delete=False) as f:
        f.write(script_content)
        temp_script = f.name
@@ -1222,7 +1372,7 @@ Examples:
        if args.daemon:
            cmd.append("-d")
-        if getattr(args, 'no_ray', False):
+        if getattr(args, "no_ray", False):
            cmd.append("--no-ray")
        # Pass nodes to launch-cluster.sh (from command line, .env, or autodiscover)