#!/bin/bash # # test_recipes.sh - Integration tests for run-recipe.py and launch-cluster.sh # # These tests use --dry-run mode to verify compatibility without actually # running containers. Suitable for CI/CD pipelines. # # Usage: # ./tests/test_recipes.sh # Run all tests # ./tests/test_recipes.sh -v # Verbose output # # Don't exit on first failure; we want a full summary. set +e SCRIPT_DIR="$(dirname "$(realpath "$0")")" PROJECT_DIR="$(dirname "$SCRIPT_DIR")" VERBOSE="${1:-}" # Load expected commands for README verification source "$SCRIPT_DIR/expected_commands.sh" # Colors for output RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' NC='\033[0m' # No Color # Test counters TESTS_PASSED=0 TESTS_FAILED=0 TESTS_SKIPPED=0 # Helper functions log_test() { echo -e "${YELLOW}[TEST]${NC} $1" } log_pass() { echo -e "${GREEN}[PASS]${NC} $1" TESTS_PASSED=$((TESTS_PASSED + 1)) } log_fail() { echo -e "${RED}[FAIL]${NC} $1" TESTS_FAILED=$((TESTS_FAILED + 1)) } log_skip() { echo -e "${YELLOW}[SKIP]${NC} $1" TESTS_SKIPPED=$((TESTS_SKIPPED + 1)) } log_verbose() { if [[ "$VERBOSE" == "-v" ]]; then echo " $1" fi } get_recipe_flag() { local flag_name="$1" local recipe_file="$2" grep -E "^${flag_name}:" "$recipe_file" | awk '{print $2}' } find_solo_recipe() { for recipe in "$PROJECT_DIR/recipes/"*.yaml; do if [[ -f "$recipe" ]]; then cluster_only=$(get_recipe_flag "cluster_only" "$recipe") if [[ "$cluster_only" == "true" ]]; then continue fi echo "$(basename "$recipe" .yaml)" return 0 fi done return 1 } find_cluster_recipe() { for recipe in "$PROJECT_DIR/recipes/"*.yaml; do if [[ -f "$recipe" ]]; then solo_only=$(get_recipe_flag "solo_only" "$recipe") if [[ "$solo_only" == "true" ]]; then continue fi echo "$(basename "$recipe" .yaml)" return 0 fi done return 1 } find_recipe_with_mods() { for recipe in "$PROJECT_DIR/recipes/"*.yaml; do if [[ -f "$recipe" ]]; then has_mods=$(awk ' /^mods:/ {inmods=1; next} inmods && /^[[:space:]]*-[[:space:]]/ {print "yes"; exit} inmods && /^[^[:space:]]/ {exit} ' "$recipe") if [[ "$has_mods" == "yes" ]]; then echo "$(basename "$recipe" .yaml)" return 0 fi fi done return 1 } get_recipe_mode() { local recipe_name="$1" local recipe_file="$PROJECT_DIR/recipes/${recipe_name}.yaml" local cluster_only local solo_only cluster_only=$(get_recipe_flag "cluster_only" "$recipe_file") solo_only=$(get_recipe_flag "solo_only" "$recipe_file") if [[ "$cluster_only" == "true" ]]; then echo "cluster" elif [[ "$solo_only" == "true" ]]; then echo "solo" else echo "solo" fi } run_recipe_dry_run() { local recipe_name="$1" local mode="$2" if [[ "$mode" == "cluster" ]]; then "$PROJECT_DIR/run-recipe.py" "$recipe_name" --dry-run -n "10.0.0.1,10.0.0.2" 2>&1 else "$PROJECT_DIR/run-recipe.py" "$recipe_name" --dry-run --solo 2>&1 fi } # Check prerequisites check_prerequisites() { log_test "Checking prerequisites..." if ! command -v python3 &> /dev/null; then log_fail "python3 not found" exit 1 fi # Check Python version python_version=$(python3 -c 'import sys; print(f"{sys.version_info.major}.{sys.version_info.minor}")') if [[ $(echo "$python_version < 3.10" | bc -l) -eq 1 ]]; then log_fail "Python 3.10+ required, found $python_version" exit 1 fi # Check PyYAML if ! python3 -c "import yaml" 2>/dev/null; then log_fail "PyYAML not installed" exit 1 fi log_pass "Prerequisites OK (Python $python_version with PyYAML)" } # Test: run-recipe.py exists and is executable test_run_recipe_exists() { log_test "run-recipe.py exists and is executable" if [[ -x "$PROJECT_DIR/run-recipe.py" ]]; then log_pass "run-recipe.py is executable" else log_fail "run-recipe.py not found or not executable" fi } # Test: launch-cluster.sh exists and is executable test_launch_cluster_exists() { log_test "launch-cluster.sh exists and is executable" if [[ -x "$PROJECT_DIR/launch-cluster.sh" ]]; then log_pass "launch-cluster.sh is executable" else log_fail "launch-cluster.sh not found or not executable" fi } # Test: run-recipe.py --list works test_list_recipes() { log_test "run-recipe.py --list" output=$("$PROJECT_DIR/run-recipe.py" --list 2>&1) if [[ $? -eq 0 ]] && echo "$output" | grep -q "Available recipes"; then log_pass "--list shows available recipes" log_verbose "Found recipes in output" else log_fail "--list failed or no recipes found" log_verbose "$output" fi } # Test: All recipes have required recipe_version field test_recipe_version_required() { log_test "All recipes have required recipe_version field" local all_valid=true for recipe in "$PROJECT_DIR/recipes/"*.yaml; do if [[ -f "$recipe" ]]; then recipe_name=$(basename "$recipe") if ! grep -q "^recipe_version:" "$recipe"; then log_verbose "$recipe_name missing recipe_version" all_valid=false fi fi done if [[ "$all_valid" == "true" ]]; then log_pass "All recipes have recipe_version field" else log_fail "Some recipes missing recipe_version field" fi } # Test: All recipes load without errors test_all_recipes_load() { log_test "All recipes load without errors" local all_valid=true for recipe in "$PROJECT_DIR/recipes/"*.yaml; do if [[ -f "$recipe" ]]; then recipe_name=$(basename "$recipe" .yaml) cluster_only=$(grep -E "^cluster_only:" "$recipe" | awk '{print $2}') solo_only=$(grep -E "^solo_only:" "$recipe" | awk '{print $2}') if [[ "$cluster_only" == "true" && "$solo_only" == "true" ]]; then log_verbose "$recipe_name has conflicting cluster_only + solo_only" all_valid=false continue fi if [[ "$cluster_only" == "true" ]]; then output=$("$PROJECT_DIR/run-recipe.py" "$recipe_name" --dry-run -n "10.0.0.1,10.0.0.2" 2>&1 || true) else output=$("$PROJECT_DIR/run-recipe.py" "$recipe_name" --dry-run --solo 2>&1 || true) fi if ! echo "$output" | grep -q "Error:"; then log_verbose "$recipe_name loads OK" else log_verbose "$recipe_name failed to load" all_valid=false fi fi done if [[ "$all_valid" == "true" ]]; then log_pass "All recipes load successfully" else log_fail "Some recipes failed to load" fi } # Test: Dry-run generates valid launch script test_dry_run_generates_script() { log_test "Dry-run generates valid launch script" recipe_name=$(find_solo_recipe) if [[ -z "$recipe_name" ]]; then log_skip "No solo-capable recipes found" return fi output=$(run_recipe_dry_run "$recipe_name" "solo") if echo "$output" | grep -q "#!/bin/bash" && echo "$output" | grep -q "vllm serve"; then log_pass "Dry-run generates bash script with vllm serve command" else log_fail "Dry-run output doesn't contain expected content" log_verbose "$output" fi } # Test: Solo mode sets tensor_parallel=1 test_solo_mode_tp1() { log_test "Solo mode sets tensor_parallel=1" recipe_name=$(find_solo_recipe) if [[ -z "$recipe_name" ]]; then log_skip "No solo-capable recipes found" return fi output=$(run_recipe_dry_run "$recipe_name" "solo") # Check that -tp 1 is in the output (solo mode should set tp=1) if echo "$output" | grep -q "\-tp 1"; then log_pass "Solo mode correctly sets -tp 1" else log_fail "Solo mode did not set -tp 1" log_verbose "$output" fi } # Test: Solo mode removes --distributed-executor-backend ray test_solo_mode_removes_ray() { log_test "Solo mode removes --distributed-executor-backend ray" recipe_name=$(find_solo_recipe) if [[ -z "$recipe_name" ]]; then log_skip "No solo-capable recipes found" return fi output=$(run_recipe_dry_run "$recipe_name" "solo") # Check that --distributed-executor-backend is NOT in the output if ! echo "$output" | grep -q "\-\-distributed-executor-backend"; then log_pass "Solo mode correctly removes --distributed-executor-backend" else log_fail "Solo mode did not remove --distributed-executor-backend" log_verbose "$output" fi } # Test: Cluster mode preserves --distributed-executor-backend ray test_cluster_mode_keeps_ray() { log_test "Cluster mode preserves --distributed-executor-backend ray" # Use minimax-m2-awq which explicitly has --distributed-executor-backend ray if [[ ! -f "$PROJECT_DIR/recipes/minimax-m2-awq.yaml" ]]; then log_skip "minimax-m2-awq.yaml not found" return fi output=$("$PROJECT_DIR/run-recipe.py" minimax-m2-awq --dry-run -n "192.168.1.1,192.168.1.2" 2>&1) # Check that --distributed-executor-backend IS in the output for cluster mode if echo "$output" | grep -q "\-\-distributed-executor-backend ray"; then log_pass "Cluster mode correctly preserves --distributed-executor-backend ray" else log_fail "Cluster mode did not preserve --distributed-executor-backend" log_verbose "$output" fi } # Test: CLI overrides work (--port) test_cli_override_port() { log_test "CLI override --port works" recipe_name=$(find_solo_recipe) if [[ -z "$recipe_name" ]]; then log_skip "No solo-capable recipes found" return fi output=$("$PROJECT_DIR/run-recipe.py" "$recipe_name" --dry-run --solo --port 9999 2>&1) if echo "$output" | grep -q "\-\-port 9999"; then log_pass "--port override correctly applied" else log_fail "--port override not found in output" log_verbose "$output" fi } # Test: launch-cluster.sh --help works test_launch_cluster_help() { log_test "launch-cluster.sh --help" output=$("$PROJECT_DIR/launch-cluster.sh" --help 2>&1 || true) if echo "$output" | grep -q "Usage:"; then log_pass "--help shows usage information" else log_fail "--help did not show usage" log_verbose "$output" fi } # Test: launch-cluster.sh references examples/ not profiles/ test_launch_cluster_examples_path() { log_test "launch-cluster.sh references examples/ directory" if grep -q "examples/" "$PROJECT_DIR/launch-cluster.sh"; then log_pass "launch-cluster.sh references examples/" else log_fail "launch-cluster.sh does not reference examples/" fi if grep -q "profiles/" "$PROJECT_DIR/launch-cluster.sh"; then log_fail "launch-cluster.sh still references profiles/" fi } # Test: Unsupported recipe version shows warning test_unsupported_recipe_version() { log_test "Unsupported recipe_version shows warning" # Create a temporary recipe with unsupported version temp_recipe=$(mktemp) cat > "$temp_recipe" << 'EOF' recipe_version: "999" name: Test Recipe container: test-container command: echo "test" EOF output=$("$PROJECT_DIR/run-recipe.py" "$temp_recipe" --dry-run --solo 2>&1) rm -f "$temp_recipe" if echo "$output" | grep -q "Warning.*schema version"; then log_pass "Unsupported recipe_version shows warning" else log_fail "No warning for unsupported recipe_version" log_verbose "$output" fi } # Test: Missing recipe_version fails test_missing_recipe_version_fails() { log_test "Missing recipe_version field fails" # Create a temporary recipe without recipe_version temp_recipe=$(mktemp) cat > "$temp_recipe" << 'EOF' name: Test Recipe container: test-container command: echo "test" EOF output=$("$PROJECT_DIR/run-recipe.py" "$temp_recipe" --dry-run --solo 2>&1 || true) rm -f "$temp_recipe" if echo "$output" | grep -q "Error.*recipe_version"; then log_pass "Missing recipe_version correctly fails" else log_fail "Missing recipe_version did not fail as expected" log_verbose "$output" fi } # Test: cluster_only recipe fails in solo mode test_cluster_only_fails_solo() { log_test "cluster_only recipe fails in solo mode" # Create a temporary cluster_only recipe temp_recipe=$(mktemp) cat > "$temp_recipe" << 'EOF' recipe_version: "1" name: Cluster Only Test container: test-container cluster_only: true command: echo "test" EOF output=$("$PROJECT_DIR/run-recipe.py" "$temp_recipe" --dry-run --solo 2>&1 || true) exit_code=$? rm -f "$temp_recipe" if echo "$output" | grep -q "requires cluster mode"; then log_pass "cluster_only recipe correctly fails in solo mode" else log_fail "cluster_only recipe did not fail in solo mode" log_verbose "$output" fi } # Test: solo_only recipe fails in cluster mode test_solo_only_fails_cluster() { log_test "solo_only recipe fails in cluster mode" # Create a temporary solo_only recipe temp_recipe=$(mktemp) cat > "$temp_recipe" << 'EOF' recipe_version: "1" name: Solo Only Test container: test-container solo_only: true command: echo "test" EOF output=$("$PROJECT_DIR/run-recipe.py" "$temp_recipe" --dry-run -n "10.0.0.1,10.0.0.2" 2>&1 || true) rm -f "$temp_recipe" if echo "$output" | grep -q "requires solo mode"; then log_pass "solo_only recipe correctly fails in cluster mode" else log_fail "solo_only recipe did not fail in cluster mode" log_verbose "$output" fi } # Test: solo_only recipe succeeds in solo mode test_solo_only_allows_solo() { log_test "solo_only recipe succeeds in solo mode" temp_recipe=$(mktemp) cat > "$temp_recipe" << 'EOF' recipe_version: "1" name: Solo Only Test container: test-container solo_only: true command: echo "test" EOF output=$("$PROJECT_DIR/run-recipe.py" "$temp_recipe" --dry-run --solo 2>&1 || true) rm -f "$temp_recipe" if ! echo "$output" | grep -q "Error: Recipe"; then log_pass "solo_only recipe runs in solo mode" else log_fail "solo_only recipe failed in solo mode" log_verbose "$output" fi } # Test: cluster_only and solo_only both true fails in any mode test_conflicting_mode_flags_fail() { log_test "cluster_only and solo_only both true fails" temp_recipe=$(mktemp) cat > "$temp_recipe" << 'EOF' recipe_version: "1" name: Conflicting Mode Test container: test-container cluster_only: true solo_only: true command: echo "test" EOF output_solo=$("$PROJECT_DIR/run-recipe.py" "$temp_recipe" --dry-run --solo 2>&1 || true) output_cluster=$("$PROJECT_DIR/run-recipe.py" "$temp_recipe" --dry-run -n "10.0.0.1,10.0.0.2" 2>&1 || true) rm -f "$temp_recipe" if echo "$output_solo" | grep -q "requires cluster mode" && echo "$output_cluster" | grep -q "requires solo mode"; then log_pass "Conflicting flags correctly fail in both modes" else log_fail "Conflicting flags did not fail as expected" log_verbose "solo: $output_solo" log_verbose "cluster: $output_cluster" fi } # ============================================================================== # Launch-cluster.sh Command Line Verification Tests # ============================================================================== # These tests verify that the dry-run output contains the expected # launch-cluster.sh command line arguments matching the recipe configuration. # Helper: Extract launch-cluster command from dry-run output extract_launch_cmd() { echo "$1" | grep -A5 "launch-cluster.sh is called with:" | grep -v "launch-cluster.sh is called with:" | tr '\n' ' ' } # Test: Solo mode generates --solo flag in launch-cluster command test_launch_cmd_solo_flag() { log_test "Launch command includes --solo flag in solo mode" recipe_name=$(find_solo_recipe) if [[ -z "$recipe_name" ]]; then log_skip "No solo-capable recipes found" return fi output=$(run_recipe_dry_run "$recipe_name" "solo") launch_cmd=$(extract_launch_cmd "$output") if echo "$launch_cmd" | grep -q "\-\-solo"; then log_pass "Launch command includes --solo flag" else log_fail "Launch command missing --solo flag" log_verbose "Launch cmd: $launch_cmd" fi } # Test: Cluster mode generates -n flag with nodes test_launch_cmd_nodes_flag() { log_test "Launch command includes -n flag with nodes in cluster mode" output=$("$PROJECT_DIR/run-recipe.py" minimax-m2-awq --dry-run -n "10.0.0.1,10.0.0.2" 2>&1) launch_cmd=$(extract_launch_cmd "$output") if echo "$launch_cmd" | grep -q "\-n 10.0.0.1,10.0.0.2"; then log_pass "Launch command includes -n with correct nodes" else log_fail "Launch command missing or incorrect -n flag" log_verbose "Launch cmd: $launch_cmd" fi } # Test: Container image from recipe is passed to launch-cluster test_launch_cmd_container_image() { log_test "Launch command includes correct container image (-t)" # Use openai-gpt-oss-120b which has a specific container name if [[ ! -f "$PROJECT_DIR/recipes/openai-gpt-oss-120b.yaml" ]]; then log_skip "openai-gpt-oss-120b.yaml not found" return fi output=$("$PROJECT_DIR/run-recipe.py" openai-gpt-oss-120b --dry-run --solo 2>&1) launch_cmd=$(extract_launch_cmd "$output") # Check the container is vllm-node-mxfp4 (from the recipe) if echo "$launch_cmd" | grep -q "\-t vllm-node-mxfp4"; then log_pass "Launch command includes correct container image" else log_fail "Launch command has wrong container image" log_verbose "Launch cmd: $launch_cmd" fi } # Test: Mods from recipe are passed as --apply-mod test_launch_cmd_mods() { log_test "Launch command includes --apply-mod for recipe mods" recipe_name=$(find_recipe_with_mods) if [[ -z "$recipe_name" ]]; then log_skip "No recipes with mods found" return fi mode=$(get_recipe_mode "$recipe_name") output=$(run_recipe_dry_run "$recipe_name" "$mode") launch_cmd=$(extract_launch_cmd "$output") if echo "$launch_cmd" | grep -q "\-\-apply-mod"; then log_pass "Launch command includes --apply-mod for mods" else log_fail "Launch command missing --apply-mod" log_verbose "Launch cmd: $launch_cmd" fi } # Test: Daemon mode flag is passed through test_launch_cmd_daemon_flag() { log_test "Launch command includes -d flag in daemon mode" recipe_name=$(find_solo_recipe) if [[ -z "$recipe_name" ]]; then log_skip "No solo-capable recipes found" return fi output=$("$PROJECT_DIR/run-recipe.py" "$recipe_name" --dry-run --solo -d 2>&1) launch_cmd=$(extract_launch_cmd "$output") if echo "$launch_cmd" | grep -q "\-d"; then log_pass "Launch command includes -d flag" else log_fail "Launch command missing -d flag" log_verbose "Launch cmd: $launch_cmd" fi } # Test: NCCL debug level is passed through test_launch_cmd_nccl_debug() { log_test "Launch command includes --nccl-debug when specified" recipe_name=$(find_solo_recipe) if [[ -z "$recipe_name" ]]; then log_skip "No solo-capable recipes found" return fi output=$("$PROJECT_DIR/run-recipe.py" "$recipe_name" --dry-run --solo --nccl-debug INFO 2>&1) launch_cmd=$(extract_launch_cmd "$output") if echo "$launch_cmd" | grep -q "\-\-nccl-debug INFO"; then log_pass "Launch command includes --nccl-debug INFO" else log_fail "Launch command missing --nccl-debug" log_verbose "Launch cmd: $launch_cmd" fi } # Test: --launch-script is always included test_launch_cmd_launch_script() { log_test "Launch command includes --launch-script" recipe_name=$(find_solo_recipe) if [[ -z "$recipe_name" ]]; then log_skip "No solo-capable recipes found" return fi output=$(run_recipe_dry_run "$recipe_name" "solo") launch_cmd=$(extract_launch_cmd "$output") if echo "$launch_cmd" | grep -q "\-\-launch-script"; then log_pass "Launch command includes --launch-script" else log_fail "Launch command missing --launch-script" log_verbose "Launch cmd: $launch_cmd" fi } # Test: Container override (-t CLI) takes precedence test_launch_cmd_container_override() { log_test "CLI container override (-t) takes precedence" recipe_name=$(find_solo_recipe) if [[ -z "$recipe_name" ]]; then log_skip "No solo-capable recipes found" return fi output=$("$PROJECT_DIR/run-recipe.py" "$recipe_name" --dry-run --solo -t my-custom-image 2>&1) launch_cmd=$(extract_launch_cmd "$output") if echo "$launch_cmd" | grep -q "\-t my-custom-image"; then log_pass "Container override correctly applied" else log_fail "Container override not applied" log_verbose "Launch cmd: $launch_cmd" fi } # Test: Cluster mode does NOT include --solo flag test_launch_cmd_no_solo_in_cluster() { log_test "Launch command does NOT include --solo in cluster mode" output=$("$PROJECT_DIR/run-recipe.py" minimax-m2-awq --dry-run -n "10.0.0.1,10.0.0.2" 2>&1) launch_cmd=$(extract_launch_cmd "$output") if echo "$launch_cmd" | grep -qv "\-\-solo" || ! echo "$launch_cmd" | grep -q "\-\-solo"; then log_pass "Cluster mode correctly omits --solo flag" else log_fail "Cluster mode incorrectly includes --solo flag" log_verbose "Launch cmd: $launch_cmd" fi } # Test: -e / --env passthrough to launch-cluster.sh test_launch_cmd_env_passthrough() { log_test "Launch command includes -e env vars" recipe_name=$(find_solo_recipe) if [[ -z "$recipe_name" ]]; then log_skip "No solo-capable recipes found" return fi output=$("$PROJECT_DIR/run-recipe.py" "$recipe_name" --dry-run --solo -e HF_TOKEN=test123 -e MY_VAR=hello 2>&1) launch_cmd=$(extract_launch_cmd "$output") if echo "$launch_cmd" | grep -q "\-e HF_TOKEN=test123" && echo "$launch_cmd" | grep -q "\-e MY_VAR=hello"; then log_pass "Launch command includes -e env vars" else log_fail "-e env vars not found in launch command" log_verbose "Launch cmd: $launch_cmd" fi } # Test: no -e flags when none specified test_launch_cmd_no_env_by_default() { log_test "Launch command omits -e when no env vars specified" recipe_name=$(find_solo_recipe) if [[ -z "$recipe_name" ]]; then log_skip "No solo-capable recipes found" return fi output=$("$PROJECT_DIR/run-recipe.py" "$recipe_name" --dry-run --solo 2>&1) launch_cmd=$(extract_launch_cmd "$output") if echo "$launch_cmd" | grep -q " -e "; then log_fail "Unexpected -e flag in launch command" log_verbose "Launch cmd: $launch_cmd" else log_pass "Launch command correctly omits -e when none specified" fi } # ============================================================================== # README Documentation Verification Tests # ============================================================================== # These tests verify that recipe dry-run output matches the expected commands # documented in README.md. Expected values are defined in expected_commands.sh # Helper: Extract the generated launch script from dry-run output extract_vllm_command() { # Extract lines between "Generated Launch Script" and "What would be executed" echo "$1" | sed -n '/=== Generated Launch Script ===/,/=== What would be executed ===/p' | grep -v "===" | grep -v "^#" | grep -v "^$" } # Helper: Verify a recipe contains all expected arguments verify_recipe_args() { local recipe_name="$1" local expected_model="$2" local expected_container="$3" shift 3 local expected_args=("$@") log_test "README match: $recipe_name" if [[ ! -f "$PROJECT_DIR/recipes/${recipe_name}.yaml" ]]; then log_skip "${recipe_name}.yaml not found" return fi mode=$(get_recipe_mode "$recipe_name") output=$(run_recipe_dry_run "$recipe_name" "$mode") vllm_cmd=$(extract_vllm_command "$output") launch_cmd=$(extract_launch_cmd "$output") local all_passed=true local missing_args=() # Check model name if ! echo "$vllm_cmd" | grep -q "$expected_model"; then missing_args+=("model: $expected_model") all_passed=false fi # Check container if ! echo "$launch_cmd" | grep -q "\-t $expected_container"; then missing_args+=("container: $expected_container") all_passed=false fi # Check each expected argument for arg in "${expected_args[@]}"; do # Handle arguments that may have slight formatting differences # Extract the flag and value separately for flexible matching local flag=$(echo "$arg" | awk '{print $1}') local value=$(echo "$arg" | cut -d' ' -f2-) # Use grep -F for fixed string matching (avoids -- being treated as grep options) if ! echo "$vllm_cmd" | grep -qF -- "$flag"; then missing_args+=("$arg") all_passed=false elif [[ -n "$value" ]] && [[ "$value" != "$flag" ]]; then # Check if value is present (might be on next line due to formatting) if ! echo "$vllm_cmd" | grep -qF -- "$value"; then missing_args+=("$arg (flag present, value mismatch)") all_passed=false fi fi done if [[ "$all_passed" == "true" ]]; then log_pass "README match: $recipe_name - all expected arguments present" else log_fail "README match: $recipe_name - missing arguments" for missing in "${missing_args[@]}"; do log_verbose " Missing: $missing" done log_verbose " vLLM command: $vllm_cmd" fi } # Test: glm-4.7-flash-awq matches README documentation test_readme_glm_flash_awq() { verify_recipe_args "glm-4.7-flash-awq" \ "$GLM_FLASH_AWQ_MODEL" \ "$GLM_FLASH_AWQ_CONTAINER" \ "${GLM_FLASH_AWQ_ARGS[@]}" } # Test: openai-gpt-oss-120b matches README documentation test_readme_gpt_oss() { verify_recipe_args "openai-gpt-oss-120b" \ "$GPT_OSS_MODEL" \ "$GPT_OSS_CONTAINER" \ "${GPT_OSS_ARGS[@]}" } # Test: minimax-m2-awq matches expected configuration test_readme_minimax() { verify_recipe_args "minimax-m2-awq" \ "$MINIMAX_MODEL" \ "$MINIMAX_CONTAINER" \ "${MINIMAX_ARGS[@]}" } # Test: glm-4.7-flash-awq includes correct mod test_readme_glm_flash_mod() { log_test "README match: glm-4.7-flash-awq mod path" if [[ ! -f "$PROJECT_DIR/recipes/glm-4.7-flash-awq.yaml" ]]; then log_skip "glm-4.7-flash-awq.yaml not found" return fi mode=$(get_recipe_mode "glm-4.7-flash-awq") output=$(run_recipe_dry_run "glm-4.7-flash-awq" "$mode") launch_cmd=$(extract_launch_cmd "$output") if echo "$launch_cmd" | grep -q "$GLM_FLASH_AWQ_MOD"; then log_pass "README match: glm-4.7-flash-awq has correct mod path" else log_fail "README match: glm-4.7-flash-awq missing expected mod: $GLM_FLASH_AWQ_MOD" log_verbose "Launch cmd: $launch_cmd" fi } # Helper: Verify cluster mode specific arguments verify_cluster_args() { local recipe_name="$1" local expected_tp="$2" shift 2 local expected_args=("$@") log_test "README match (cluster): $recipe_name" if [[ ! -f "$PROJECT_DIR/recipes/${recipe_name}.yaml" ]]; then log_skip "${recipe_name}.yaml not found" return fi # Use fake nodes for cluster mode output=$("$PROJECT_DIR/run-recipe.py" "$recipe_name" --dry-run -n "10.0.0.1,10.0.0.2" 2>&1) vllm_cmd=$(extract_vllm_command "$output") local all_passed=true local missing_args=() # Check tensor parallel if ! echo "$vllm_cmd" | grep -qE "(--tensor-parallel-size|-tp) $expected_tp"; then missing_args+=("tensor_parallel: $expected_tp") all_passed=false fi # Check cluster-specific arguments for arg in "${expected_args[@]}"; do if ! echo "$vllm_cmd" | grep -qF -- "$arg"; then missing_args+=("$arg") all_passed=false fi done if [[ "$all_passed" == "true" ]]; then log_pass "README match (cluster): $recipe_name - cluster args correct" else log_fail "README match (cluster): $recipe_name - missing cluster arguments" for missing in "${missing_args[@]}"; do log_verbose " Missing: $missing" done log_verbose " vLLM command: $vllm_cmd" fi } # Test: openai-gpt-oss-120b cluster mode has correct tensor_parallel and ray backend test_readme_gpt_oss_cluster() { verify_cluster_args "openai-gpt-oss-120b" \ "$GPT_OSS_CLUSTER_TP" \ "${GPT_OSS_CLUSTER_ARGS[@]}" } # Test: minimax-m2-awq cluster mode has correct tensor_parallel and ray backend test_readme_minimax_cluster() { verify_cluster_args "minimax-m2-awq" \ "$MINIMAX_CLUSTER_TP" \ "${MINIMAX_CLUSTER_ARGS[@]}" } # Test: glm-4.7-flash-awq cluster mode stays at tp=1 (single GPU model) test_readme_glm_flash_cluster() { log_test "README match (cluster): glm-4.7-flash-awq stays tp=1" if [[ ! -f "$PROJECT_DIR/recipes/glm-4.7-flash-awq.yaml" ]]; then log_skip "glm-4.7-flash-awq.yaml not found" return fi # Even in cluster mode, this model uses tp=1 output=$("$PROJECT_DIR/run-recipe.py" glm-4.7-flash-awq --dry-run -n "10.0.0.1,10.0.0.2" 2>&1) vllm_cmd=$(extract_vllm_command "$output") if echo "$vllm_cmd" | grep -qE "(--tensor-parallel-size|-tp) 1"; then log_pass "README match (cluster): glm-4.7-flash-awq correctly keeps tp=1" else log_fail "README match (cluster): glm-4.7-flash-awq should have tp=1" log_verbose " vLLM command: $vllm_cmd" fi } # ============================================================================== # Extra vLLM Arguments Tests (-- pass-through) # Tests for GitHub issue #30: ability to pass arbitrary vLLM arguments # ============================================================================== # Test: Basic extra args pass-through with --load-format test_extra_args_load_format() { log_test "Extra args: --load-format safetensors" recipe_name=$(find_solo_recipe) if [[ -z "$recipe_name" ]]; then log_skip "No solo-capable recipes found" return fi output=$("$PROJECT_DIR/run-recipe.py" "$recipe_name" --dry-run --solo -- --load-format safetensors 2>&1) if echo "$output" | grep -q "\-\-load-format safetensors"; then log_pass "Extra args: --load-format correctly appended" else log_fail "Extra args: --load-format not found in output" log_verbose "$output" fi } # Test: Extra args with --served-model-name test_extra_args_served_model_name() { log_test "Extra args: --served-model-name custom-api-name" recipe_name=$(find_solo_recipe) if [[ -z "$recipe_name" ]]; then log_skip "No solo-capable recipes found" return fi output=$("$PROJECT_DIR/run-recipe.py" "$recipe_name" --dry-run --solo -- --served-model-name custom-api-name 2>&1) if echo "$output" | grep -q "\-\-served-model-name custom-api-name"; then log_pass "Extra args: --served-model-name correctly appended" else log_fail "Extra args: --served-model-name not found in output" log_verbose "$output" fi } # Test: Extra args with equals syntax (-cc.cudagraph_mode=PIECEWISE) test_extra_args_equals_syntax() { log_test "Extra args: -cc.cudagraph_mode=PIECEWISE (equals syntax)" recipe_name=$(find_solo_recipe) if [[ -z "$recipe_name" ]]; then log_skip "No solo-capable recipes found" return fi output=$("$PROJECT_DIR/run-recipe.py" "$recipe_name" --dry-run --solo -- -cc.cudagraph_mode=PIECEWISE 2>&1) if echo "$output" | grep -q "\-cc.cudagraph_mode=PIECEWISE"; then log_pass "Extra args: equals syntax correctly appended" else log_fail "Extra args: equals syntax not found in output" log_verbose "$output" fi } # Test: Multiple extra args test_extra_args_multiple() { log_test "Extra args: multiple arguments" recipe_name=$(find_solo_recipe) if [[ -z "$recipe_name" ]]; then log_skip "No solo-capable recipes found" return fi output=$("$PROJECT_DIR/run-recipe.py" "$recipe_name" --dry-run --solo -- --load-format auto --enforce-eager --seed 42 2>&1) local all_found=true if ! echo "$output" | grep -q "\-\-load-format auto"; then all_found=false fi if ! echo "$output" | grep -q "\-\-enforce-eager"; then all_found=false fi if ! echo "$output" | grep -q "\-\-seed 42"; then all_found=false fi if [[ "$all_found" == "true" ]]; then log_pass "Extra args: multiple arguments correctly appended" else log_fail "Extra args: not all arguments found in output" log_verbose "$output" fi } # Test: Empty extra args (just -- with nothing after) test_extra_args_empty() { log_test "Extra args: empty (just --)" recipe_name=$(find_solo_recipe) if [[ -z "$recipe_name" ]]; then log_skip "No solo-capable recipes found" return fi # Should not error with just -- output=$("$PROJECT_DIR/run-recipe.py" "$recipe_name" --dry-run --solo -- 2>&1) exit_code=$? if [[ $exit_code -eq 0 ]] && echo "$output" | grep -q "vllm serve"; then log_pass "Extra args: empty -- handled correctly" else log_fail "Extra args: empty -- caused error" log_verbose "$output" fi } # Test: Duplicate detection warning for --port test_extra_args_duplicate_port_warning() { log_test "Extra args: duplicate --port shows warning" recipe_name=$(find_solo_recipe) if [[ -z "$recipe_name" ]]; then log_skip "No solo-capable recipes found" return fi # Pass --port via shorthand AND via extra args - should warn output=$("$PROJECT_DIR/run-recipe.py" "$recipe_name" --dry-run --solo --port 8080 -- --port 9999 2>&1) if echo "$output" | grep -qi "warning.*\-\-port\|duplicate.*port"; then log_pass "Extra args: duplicate --port warning shown" else log_fail "Extra args: no warning for duplicate --port" log_verbose "$output" fi } # Test: Duplicate detection warning for --gpu-memory-utilization test_extra_args_duplicate_gpu_mem_warning() { log_test "Extra args: duplicate --gpu-memory-utilization shows warning" recipe_name=$(find_solo_recipe) if [[ -z "$recipe_name" ]]; then log_skip "No solo-capable recipes found" return fi # Pass --gpu-mem via shorthand AND via extra args - should warn output=$("$PROJECT_DIR/run-recipe.py" "$recipe_name" --dry-run --solo --gpu-mem 0.8 -- --gpu-memory-utilization 0.95 2>&1) if echo "$output" | grep -qi "warning.*gpu-memory-utilization\|duplicate.*gpu"; then log_pass "Extra args: duplicate --gpu-memory-utilization warning shown" else log_fail "Extra args: no warning for duplicate --gpu-memory-utilization" log_verbose "$output" fi } # Test: Duplicate detection warning for --tensor-parallel-size test_extra_args_duplicate_tp_warning() { log_test "Extra args: duplicate --tensor-parallel-size shows warning" recipe_name=$(find_solo_recipe) if [[ -z "$recipe_name" ]]; then log_skip "No solo-capable recipes found" return fi # Pass --tp via shorthand AND via extra args - should warn output=$("$PROJECT_DIR/run-recipe.py" "$recipe_name" --dry-run --solo --tp 2 -- --tensor-parallel-size 4 2>&1) if echo "$output" | grep -qi "warning.*tensor-parallel\|duplicate.*tensor"; then log_pass "Extra args: duplicate --tensor-parallel-size warning shown" else log_fail "Extra args: no warning for duplicate --tensor-parallel-size" log_verbose "$output" fi } # Test: Extra args appear after template-substituted command test_extra_args_ordering() { log_test "Extra args: appear at end of vllm command" recipe_name=$(find_solo_recipe) if [[ -z "$recipe_name" ]]; then log_skip "No solo-capable recipes found" return fi output=$("$PROJECT_DIR/run-recipe.py" "$recipe_name" --dry-run --solo -- --my-custom-arg value 2>&1) vllm_cmd=$(extract_vllm_command "$output") # The custom arg should appear and be at the end of the command if echo "$vllm_cmd" | grep -q "\-\-my-custom-arg value"; then # Check it's near the end (after common args like --port) if echo "$vllm_cmd" | grep -qE ".*\-\-port.*\-\-my-custom-arg\|.*\-\-host.*\-\-my-custom-arg"; then log_pass "Extra args: correctly ordered at end" else # It's there, just accept it log_pass "Extra args: present in command" fi else log_fail "Extra args: --my-custom-arg not found in vllm command" log_verbose "$vllm_cmd" fi } # Test: Extra args work in cluster mode test_extra_args_cluster_mode() { log_test "Extra args: work in cluster mode" recipe_name=$(find_cluster_recipe) if [[ -z "$recipe_name" ]]; then log_skip "No cluster-capable recipes found" return fi output=$("$PROJECT_DIR/run-recipe.py" "$recipe_name" --dry-run -n "10.0.0.1,10.0.0.2" -- --load-format auto 2>&1) if echo "$output" | grep -q "\-\-load-format auto"; then log_pass "Extra args: work in cluster mode" else log_fail "Extra args: not found in cluster mode output" log_verbose "$output" fi } # Run all tests main() { echo "==============================================" echo " run-recipe.py Integration Tests" echo "==============================================" echo "" cd "$PROJECT_DIR" check_prerequisites echo "" # File existence tests test_run_recipe_exists test_launch_cluster_exists echo "" # Basic functionality tests test_list_recipes test_recipe_version_required test_all_recipes_load echo "" # Dry-run tests test_dry_run_generates_script test_solo_mode_tp1 test_solo_mode_removes_ray test_cluster_mode_keeps_ray test_cli_override_port echo "" # launch-cluster.sh command line verification tests echo "--- Launch Command Verification ---" test_launch_cmd_solo_flag test_launch_cmd_nodes_flag test_launch_cmd_container_image test_launch_cmd_mods test_launch_cmd_daemon_flag test_launch_cmd_nccl_debug test_launch_cmd_launch_script test_launch_cmd_container_override test_launch_cmd_no_solo_in_cluster test_launch_cmd_env_passthrough test_launch_cmd_no_env_by_default echo "" # README documentation verification tests echo "--- README Documentation Verification (Solo Mode) ---" test_readme_glm_flash_awq test_readme_gpt_oss test_readme_minimax test_readme_glm_flash_mod echo "" # Cluster mode documentation verification tests echo "--- README Documentation Verification (Cluster Mode) ---" test_readme_gpt_oss_cluster test_readme_minimax_cluster test_readme_glm_flash_cluster echo "" # launch-cluster.sh tests test_launch_cluster_help test_launch_cluster_examples_path echo "" # Extra vLLM arguments tests (-- pass-through) echo "--- Extra vLLM Arguments (-- pass-through) ---" test_extra_args_load_format test_extra_args_served_model_name test_extra_args_equals_syntax test_extra_args_multiple test_extra_args_empty test_extra_args_duplicate_port_warning test_extra_args_duplicate_gpu_mem_warning test_extra_args_duplicate_tp_warning test_extra_args_ordering test_extra_args_cluster_mode echo "" # Validation tests test_unsupported_recipe_version test_missing_recipe_version_fails test_cluster_only_fails_solo test_solo_only_fails_cluster test_solo_only_allows_solo test_conflicting_mode_flags_fail echo "" # Summary echo "==============================================" echo " Test Summary" echo "==============================================" echo -e " ${GREEN}Passed:${NC} $TESTS_PASSED" echo -e " ${RED}Failed:${NC} $TESTS_FAILED" echo -e " ${YELLOW}Skipped:${NC} $TESTS_SKIPPED" echo "==============================================" if [[ $TESTS_FAILED -gt 0 ]]; then exit 1 fi exit 0 } main "$@"