Merge pull request #85 from saladinomario/feat/recipe-env-passthrough
Add -e/--env passthrough to run-recipe.py
This commit is contained in:
@@ -819,6 +819,7 @@ Examples:
|
|||||||
launch_group.add_argument("-d", "--daemon", action="store_true", help="Run in daemon mode")
|
launch_group.add_argument("-d", "--daemon", action="store_true", help="Run in daemon mode")
|
||||||
launch_group.add_argument("-t", "--container", dest="container_override", help="Override container image from recipe")
|
launch_group.add_argument("-t", "--container", dest="container_override", help="Override container image from recipe")
|
||||||
launch_group.add_argument("--nccl-debug", choices=["VERSION", "WARN", "INFO", "TRACE"], help="NCCL debug level")
|
launch_group.add_argument("--nccl-debug", choices=["VERSION", "WARN", "INFO", "TRACE"], help="NCCL debug level")
|
||||||
|
launch_group.add_argument("-e", "--env", action="append", dest="env_vars", default=[], metavar="VAR=VALUE", help="Environment variable to pass to container (e.g. -e HF_TOKEN=xxx). Can be used multiple times.")
|
||||||
|
|
||||||
# Cluster discovery options
|
# Cluster discovery options
|
||||||
discover_group = parser.add_argument_group("Cluster discovery")
|
discover_group = parser.add_argument_group("Cluster discovery")
|
||||||
@@ -1119,6 +1120,8 @@ Examples:
|
|||||||
cmd_parts.extend(["-n", ",".join(nodes)])
|
cmd_parts.extend(["-n", ",".join(nodes)])
|
||||||
if args.nccl_debug:
|
if args.nccl_debug:
|
||||||
cmd_parts.extend(["--nccl-debug", args.nccl_debug])
|
cmd_parts.extend(["--nccl-debug", args.nccl_debug])
|
||||||
|
for env_var in args.env_vars:
|
||||||
|
cmd_parts.extend(["-e", env_var])
|
||||||
cmd_parts.extend(["\\", "\n --launch-script", "/tmp/tmpXXXXXX.sh"])
|
cmd_parts.extend(["\\", "\n --launch-script", "/tmp/tmpXXXXXX.sh"])
|
||||||
print(" ".join(cmd_parts))
|
print(" ".join(cmd_parts))
|
||||||
print()
|
print()
|
||||||
@@ -1160,6 +1163,9 @@ Examples:
|
|||||||
if args.nccl_debug:
|
if args.nccl_debug:
|
||||||
cmd.extend(["--nccl-debug", args.nccl_debug])
|
cmd.extend(["--nccl-debug", args.nccl_debug])
|
||||||
|
|
||||||
|
for env_var in args.env_vars:
|
||||||
|
cmd.extend(["-e", env_var])
|
||||||
|
|
||||||
# Add launch script
|
# Add launch script
|
||||||
cmd.extend(["--launch-script", temp_script])
|
cmd.extend(["--launch-script", temp_script])
|
||||||
|
|
||||||
|
|||||||
@@ -728,6 +728,48 @@ test_launch_cmd_no_solo_in_cluster() {
|
|||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Test: -e / --env passthrough to launch-cluster.sh
|
||||||
|
test_launch_cmd_env_passthrough() {
|
||||||
|
log_test "Launch command includes -e env vars"
|
||||||
|
|
||||||
|
recipe_name=$(find_solo_recipe)
|
||||||
|
if [[ -z "$recipe_name" ]]; then
|
||||||
|
log_skip "No solo-capable recipes found"
|
||||||
|
return
|
||||||
|
fi
|
||||||
|
|
||||||
|
output=$("$PROJECT_DIR/run-recipe.py" "$recipe_name" --dry-run --solo -e HF_TOKEN=test123 -e MY_VAR=hello 2>&1)
|
||||||
|
launch_cmd=$(extract_launch_cmd "$output")
|
||||||
|
|
||||||
|
if echo "$launch_cmd" | grep -q "\-e HF_TOKEN=test123" && echo "$launch_cmd" | grep -q "\-e MY_VAR=hello"; then
|
||||||
|
log_pass "Launch command includes -e env vars"
|
||||||
|
else
|
||||||
|
log_fail "-e env vars not found in launch command"
|
||||||
|
log_verbose "Launch cmd: $launch_cmd"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# Test: no -e flags when none specified
|
||||||
|
test_launch_cmd_no_env_by_default() {
|
||||||
|
log_test "Launch command omits -e when no env vars specified"
|
||||||
|
|
||||||
|
recipe_name=$(find_solo_recipe)
|
||||||
|
if [[ -z "$recipe_name" ]]; then
|
||||||
|
log_skip "No solo-capable recipes found"
|
||||||
|
return
|
||||||
|
fi
|
||||||
|
|
||||||
|
output=$("$PROJECT_DIR/run-recipe.py" "$recipe_name" --dry-run --solo 2>&1)
|
||||||
|
launch_cmd=$(extract_launch_cmd "$output")
|
||||||
|
|
||||||
|
if echo "$launch_cmd" | grep -q " -e "; then
|
||||||
|
log_fail "Unexpected -e flag in launch command"
|
||||||
|
log_verbose "Launch cmd: $launch_cmd"
|
||||||
|
else
|
||||||
|
log_pass "Launch command correctly omits -e when none specified"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
# README Documentation Verification Tests
|
# README Documentation Verification Tests
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
@@ -1203,6 +1245,8 @@ main() {
|
|||||||
test_launch_cmd_launch_script
|
test_launch_cmd_launch_script
|
||||||
test_launch_cmd_container_override
|
test_launch_cmd_container_override
|
||||||
test_launch_cmd_no_solo_in_cluster
|
test_launch_cmd_no_solo_in_cluster
|
||||||
|
test_launch_cmd_env_passthrough
|
||||||
|
test_launch_cmd_no_env_by_default
|
||||||
echo ""
|
echo ""
|
||||||
|
|
||||||
# README documentation verification tests
|
# README documentation verification tests
|
||||||
|
|||||||
Reference in New Issue
Block a user