add .env support to build-and-copy.sh

This commit is contained in:
Eugene Rakhmatulin
2026-03-25 22:47:02 -07:00
parent 2f5ff0211e
commit 8b7c02aa25
3 changed files with 105 additions and 15 deletions

View File

@@ -29,6 +29,10 @@ CONTAINER_NCCL_DEBUG="INFO"
CONTAINER_HF_TOKEN="your_huggingface_token_here"
CONTAINER_NCCL_IGNORE_CPU_AFFINITY="1"
# COPY_HOSTS: Comma-separated list of hosts for build-and-copy.sh (optional)
# Used by build-and-copy.sh to distribute images across cluster
COPY_HOSTS="192.168.177.12"
# Additional container environment variables
# CONTAINER_MAX_JOBS="16"
# CONTAINER_CUDA_VISIBLE_DEVICES="0,1"

View File

@@ -1,5 +1,44 @@
#!/bin/bash
# Load .env file if exists (for shared configuration)
# This is called early so that DOTENV_* variables are available to all functions
load_env_if_exists() {
local env_file="${CONFIG_FILE:-}"
# If CONFIG_FILE is not set, check default location
if [[ -z "$env_file" ]]; then
local script_dir="$(dirname "$(realpath "${BASH_SOURCE[0]}")")"
env_file="$script_dir/.env"
fi
if [[ -f "$env_file" ]]; then
# Load .env variables with DOTENV_ prefix
while IFS='=' read -r key value || [[ -n "$key" ]]; do
# Skip comments and empty lines
[[ "$key" =~ ^[[:space:]]*# ]] && continue
[[ -z "$key" ]] && continue
# Remove leading/trailing whitespace from key
key=$(echo "$key" | xargs)
# Skip if key is empty after trimming
[[ -z "$key" ]] && continue
# Remove quotes from value
value="${value%\"}"
value="${value#\"}"
value="${value%\'}"
value="${value#\'}"
# Export with DOTENV_ prefix
export "DOTENV_$key=$value"
done < "$env_file"
fi
}
# Load .env file
load_env_if_exists
# Function to detect IB and Ethernet interfaces
detect_interfaces() {
# If both interfaces are already set, nothing to do
@@ -111,6 +150,19 @@ detect_nodes() {
return 0
fi
# Try to use COPY_HOSTS from .env
if [[ -n "$DOTENV_COPY_HOSTS" ]]; then
echo " Using COPY_HOSTS from .env: $DOTENV_COPY_HOSTS"
PEER_NODES=()
IFS=',' read -ra ALL_NODES <<< "$DOTENV_COPY_HOSTS"
for node in "${ALL_NODES[@]}"; do
node=$(echo "$node" | xargs)
PEER_NODES+=("$node")
done
NODES_ARG="$DOTENV_COPY_HOSTS"
return 0
fi
echo "Auto-detecting nodes..."
if ! command -v nc &> /dev/null; then

View File

@@ -28,6 +28,7 @@ VLLM_RELEASE_TAG="prebuilt-vllm-current"
# Space-separated list of GPU architectures for which prebuilt wheels are available
PREBUILT_WHEELS_SUPPORTED_ARCHS="12.1a"
CLEANUP_MODE="false"
CONFIG_FILE=""
cleanup() {
if [ -n "$TMP_IMAGE" ] && [ -f "$TMP_IMAGE" ]; then
@@ -280,11 +281,32 @@ usage() {
echo " --no-build : Skip building, only copy image (requires --copy-to)"
echo " --network <network> : Docker network to use during build"
echo " --cleanup : Remove all *.whl and *.-commit files in wheels directory"
echo " --config : Path to .env configuration file (default: .env in script directory)"
echo " -h, --help : Show this help message"
exit 1
}
# Argument parsing
# Set default CONFIG_FILE
SCRIPT_DIR="$(dirname "$(realpath "$0")")"
export CONFIG_FILE="$SCRIPT_DIR/.env"
# Parse --config argument first
i=1
while [[ $i -le $# ]]; do
arg="${!i}"
if [[ "$arg" == "--config" ]]; then
next_i=$((i+1))
CONFIG_FILE="${!next_i}"
export CONFIG_FILE
break
fi
i=$((i+1))
done
# Source autodiscover.sh to load .env file
source "$(dirname "$0")/autodiscover.sh"
# Now parse all arguments normally
while [[ "$#" -gt 0 ]]; do
case $1 in
-t|--tag) IMAGE_TAG="$2"; shift ;;
@@ -300,24 +322,31 @@ while [[ "$#" -gt 0 ]]; do
done
if [ "${#COPY_HOSTS[@]}" -eq 0 ]; then
echo "No hosts specified. Using autodiscovery..."
source "$(dirname "$0")/autodiscover.sh"
# Try to use COPY_HOSTS from .env first
if [[ -n "$DOTENV_COPY_HOSTS" ]]; then
echo "Using COPY_HOSTS from .env: $DOTENV_COPY_HOSTS"
IFS=',' read -ra HOSTS_FROM_ENV <<< "$DOTENV_COPY_HOSTS"
COPY_HOSTS=("${HOSTS_FROM_ENV[@]}")
else
echo "No hosts specified. Using autodiscovery..."
source "$(dirname "$0")/autodiscover.sh"
detect_nodes
if [ $? -ne 0 ]; then
echo "Error: Autodiscovery failed."
exit 1
fi
detect_nodes
if [ $? -ne 0 ]; then
echo "Error: Autodiscovery failed."
exit 1
fi
if [ ${#PEER_NODES[@]} -gt 0 ]; then
COPY_HOSTS=("${PEER_NODES[@]}")
fi
if [ ${#PEER_NODES[@]} -gt 0 ]; then
COPY_HOSTS=("${PEER_NODES[@]}")
fi
if [ "${#COPY_HOSTS[@]}" -eq 0 ]; then
echo "Error: Autodiscovery found no other nodes."
exit 1
if [ "${#COPY_HOSTS[@]}" -eq 0 ]; then
echo "Error: Autodiscovery found no other nodes."
exit 1
fi
echo "Autodiscovered hosts: ${COPY_HOSTS[*]}"
fi
echo "Autodiscovered hosts: ${COPY_HOSTS[*]}"
fi
continue
;;
@@ -351,12 +380,17 @@ while [[ "$#" -gt 0 ]]; do
exit 1
fi
;;
--config) CONFIG_FILE="$2"; shift ;;
-h|--help) usage ;;
*) echo "Unknown parameter passed: $1"; usage ;;
esac
shift
done
# Set CONFIG_FILE and source autodiscover.sh to load .env
export CONFIG_FILE
source "$(dirname "$0")/autodiscover.sh"
# Validate flag combinations
if [ -n "$VLLM_PRS" ]; then
if [ "$EXP_MXFP4" = true ]; then echo "Error: --apply-vllm-pr is incompatible with --exp-mxfp4"; exit 1; fi