From 8b7c02aa252dc6e3215ea2ee46712248146d07bb Mon Sep 17 00:00:00 2001 From: Eugene Rakhmatulin Date: Wed, 25 Mar 2026 22:47:02 -0700 Subject: [PATCH] add .env support to build-and-copy.sh --- .env.example | 4 +++ autodiscover.sh | 52 ++++++++++++++++++++++++++++++++++++++ build-and-copy.sh | 64 ++++++++++++++++++++++++++++++++++++----------- 3 files changed, 105 insertions(+), 15 deletions(-) diff --git a/.env.example b/.env.example index bc6f2dc..d1cea6c 100644 --- a/.env.example +++ b/.env.example @@ -29,6 +29,10 @@ CONTAINER_NCCL_DEBUG="INFO" CONTAINER_HF_TOKEN="your_huggingface_token_here" CONTAINER_NCCL_IGNORE_CPU_AFFINITY="1" +# COPY_HOSTS: Comma-separated list of hosts for build-and-copy.sh (optional) +# Used by build-and-copy.sh to distribute images across cluster +COPY_HOSTS="192.168.177.12" + # Additional container environment variables # CONTAINER_MAX_JOBS="16" # CONTAINER_CUDA_VISIBLE_DEVICES="0,1" diff --git a/autodiscover.sh b/autodiscover.sh index 43e622f..54ee4e0 100644 --- a/autodiscover.sh +++ b/autodiscover.sh @@ -1,5 +1,44 @@ #!/bin/bash +# Load .env file if exists (for shared configuration) +# This is called early so that DOTENV_* variables are available to all functions +load_env_if_exists() { + local env_file="${CONFIG_FILE:-}" + + # If CONFIG_FILE is not set, check default location + if [[ -z "$env_file" ]]; then + local script_dir="$(dirname "$(realpath "${BASH_SOURCE[0]}")")" + env_file="$script_dir/.env" + fi + + if [[ -f "$env_file" ]]; then + # Load .env variables with DOTENV_ prefix + while IFS='=' read -r key value || [[ -n "$key" ]]; do + # Skip comments and empty lines + [[ "$key" =~ ^[[:space:]]*# ]] && continue + [[ -z "$key" ]] && continue + + # Remove leading/trailing whitespace from key + key=$(echo "$key" | xargs) + + # Skip if key is empty after trimming + [[ -z "$key" ]] && continue + + # Remove quotes from value + value="${value%\"}" + value="${value#\"}" + value="${value%\'}" + value="${value#\'}" + + # Export with DOTENV_ prefix + export "DOTENV_$key=$value" + done < "$env_file" + fi +} + +# Load .env file +load_env_if_exists + # Function to detect IB and Ethernet interfaces detect_interfaces() { # If both interfaces are already set, nothing to do @@ -110,6 +149,19 @@ detect_nodes() { done return 0 fi + + # Try to use COPY_HOSTS from .env + if [[ -n "$DOTENV_COPY_HOSTS" ]]; then + echo " Using COPY_HOSTS from .env: $DOTENV_COPY_HOSTS" + PEER_NODES=() + IFS=',' read -ra ALL_NODES <<< "$DOTENV_COPY_HOSTS" + for node in "${ALL_NODES[@]}"; do + node=$(echo "$node" | xargs) + PEER_NODES+=("$node") + done + NODES_ARG="$DOTENV_COPY_HOSTS" + return 0 + fi echo "Auto-detecting nodes..." diff --git a/build-and-copy.sh b/build-and-copy.sh index 628b9c0..1aa3628 100755 --- a/build-and-copy.sh +++ b/build-and-copy.sh @@ -28,6 +28,7 @@ VLLM_RELEASE_TAG="prebuilt-vllm-current" # Space-separated list of GPU architectures for which prebuilt wheels are available PREBUILT_WHEELS_SUPPORTED_ARCHS="12.1a" CLEANUP_MODE="false" +CONFIG_FILE="" cleanup() { if [ -n "$TMP_IMAGE" ] && [ -f "$TMP_IMAGE" ]; then @@ -280,11 +281,32 @@ usage() { echo " --no-build : Skip building, only copy image (requires --copy-to)" echo " --network : Docker network to use during build" echo " --cleanup : Remove all *.whl and *.-commit files in wheels directory" + echo " --config : Path to .env configuration file (default: .env in script directory)" echo " -h, --help : Show this help message" exit 1 } -# Argument parsing +# Set default CONFIG_FILE +SCRIPT_DIR="$(dirname "$(realpath "$0")")" +export CONFIG_FILE="$SCRIPT_DIR/.env" + +# Parse --config argument first +i=1 +while [[ $i -le $# ]]; do + arg="${!i}" + if [[ "$arg" == "--config" ]]; then + next_i=$((i+1)) + CONFIG_FILE="${!next_i}" + export CONFIG_FILE + break + fi + i=$((i+1)) +done + +# Source autodiscover.sh to load .env file +source "$(dirname "$0")/autodiscover.sh" + +# Now parse all arguments normally while [[ "$#" -gt 0 ]]; do case $1 in -t|--tag) IMAGE_TAG="$2"; shift ;; @@ -300,24 +322,31 @@ while [[ "$#" -gt 0 ]]; do done if [ "${#COPY_HOSTS[@]}" -eq 0 ]; then - echo "No hosts specified. Using autodiscovery..." - source "$(dirname "$0")/autodiscover.sh" + # Try to use COPY_HOSTS from .env first + if [[ -n "$DOTENV_COPY_HOSTS" ]]; then + echo "Using COPY_HOSTS from .env: $DOTENV_COPY_HOSTS" + IFS=',' read -ra HOSTS_FROM_ENV <<< "$DOTENV_COPY_HOSTS" + COPY_HOSTS=("${HOSTS_FROM_ENV[@]}") + else + echo "No hosts specified. Using autodiscovery..." + source "$(dirname "$0")/autodiscover.sh" - detect_nodes - if [ $? -ne 0 ]; then - echo "Error: Autodiscovery failed." - exit 1 - fi + detect_nodes + if [ $? -ne 0 ]; then + echo "Error: Autodiscovery failed." + exit 1 + fi - if [ ${#PEER_NODES[@]} -gt 0 ]; then - COPY_HOSTS=("${PEER_NODES[@]}") - fi + if [ ${#PEER_NODES[@]} -gt 0 ]; then + COPY_HOSTS=("${PEER_NODES[@]}") + fi - if [ "${#COPY_HOSTS[@]}" -eq 0 ]; then - echo "Error: Autodiscovery found no other nodes." - exit 1 + if [ "${#COPY_HOSTS[@]}" -eq 0 ]; then + echo "Error: Autodiscovery found no other nodes." + exit 1 + fi + echo "Autodiscovered hosts: ${COPY_HOSTS[*]}" fi - echo "Autodiscovered hosts: ${COPY_HOSTS[*]}" fi continue ;; @@ -351,12 +380,17 @@ while [[ "$#" -gt 0 ]]; do exit 1 fi ;; + --config) CONFIG_FILE="$2"; shift ;; -h|--help) usage ;; *) echo "Unknown parameter passed: $1"; usage ;; esac shift done +# Set CONFIG_FILE and source autodiscover.sh to load .env +export CONFIG_FILE +source "$(dirname "$0")/autodiscover.sh" + # Validate flag combinations if [ -n "$VLLM_PRS" ]; then if [ "$EXP_MXFP4" = true ]; then echo "Error: --apply-vllm-pr is incompatible with --exp-mxfp4"; exit 1; fi