Merge branch '3-node' of gitlab.home.eugr.net:ai/spark-vllm into 3-node
This commit is contained in:
10
.env.example
10
.env.example
@@ -3,13 +3,17 @@
|
||||
|
||||
# Cluster configuration
|
||||
# CLUSTER_NODES: Comma-separated list of node IPs (first node is the head node)
|
||||
CLUSTER_NODES="192.168.1.1,192.168.1.2,192.168.1.3"
|
||||
CLUSTER_NODES="192.168.177.11,192.168.177.12"
|
||||
|
||||
# ETH_IF: Ethernet interface name (optional, auto-detected if not specified)
|
||||
ETH_IF="eth0"
|
||||
ETH_IF="enp1s0f1np1"
|
||||
|
||||
# IB_IF: InfiniBand interface name (optional, auto-detected if not specified)
|
||||
IB_IF="ib0"
|
||||
IB_IF="rocep1s0f1,roceP2p1s0f1"
|
||||
|
||||
# LOCAL_IP: Local IP address (optional, auto-detected if not specified)
|
||||
# Useful for solo mode or overriding auto-detection
|
||||
LOCAL_IP="192.168.177.11"
|
||||
|
||||
# MASTER_PORT: Port for cluster coordination (default: 29501)
|
||||
MASTER_PORT="29501"
|
||||
|
||||
@@ -78,6 +78,7 @@ usage() {
|
||||
echo " IB_IF InfiniBand interface name"
|
||||
echo " MASTER_PORT Port for cluster coordination (default: 29501)"
|
||||
echo " CONTAINER_NAME Container name (default: vllm_node)"
|
||||
echo " LOCAL_IP Local IP address (for solo mode or override auto-detection)"
|
||||
echo " CONTAINER_* Any variable starting with CONTAINER_ (except CONTAINER_NAME)"
|
||||
echo " becomes -e flag. Example: CONTAINER_NCCL_DEBUG=INFO -> -e NCCL_DEBUG=INFO"
|
||||
echo ""
|
||||
@@ -87,6 +88,7 @@ usage() {
|
||||
echo " IB_IF=ib0"
|
||||
echo " MASTER_PORT=29501"
|
||||
echo " CONTAINER_NAME=vllm_node"
|
||||
echo " LOCAL_IP=192.168.1.1"
|
||||
echo " CONTAINER_NCCL_DEBUG=INFO"
|
||||
echo " CONTAINER_HF_TOKEN=abc123"
|
||||
echo ""
|
||||
@@ -256,6 +258,10 @@ if [[ -z "$CONTAINER_NAME" || "$CONTAINER_NAME" == "vllm_node" ]] && [[ -n "$DOT
|
||||
CONTAINER_NAME="$DOTENV_CONTAINER_NAME"
|
||||
fi
|
||||
|
||||
if [[ -n "$DOTENV_LOCAL_IP" ]]; then
|
||||
export LOCAL_IP="$DOTENV_LOCAL_IP"
|
||||
fi
|
||||
|
||||
# Validate non-privileged mode flags
|
||||
if [[ "$NON_PRIVILEGED_MODE" == "true" ]]; then
|
||||
# Set default swap limit if not specified
|
||||
@@ -408,7 +414,10 @@ if [[ "$SOLO_MODE" == "true" ]]; then
|
||||
exit 1
|
||||
fi
|
||||
# Solo mode: skip node detection, just get local IP
|
||||
# Use LOCAL_IP from .env if set, otherwise default to 127.0.0.1
|
||||
if [[ -z "$LOCAL_IP" ]]; then
|
||||
LOCAL_IP="127.0.0.1"
|
||||
fi
|
||||
NODES_ARG="$LOCAL_IP"
|
||||
PEER_NODES=()
|
||||
echo "Solo mode enabled. Skipping node detection."
|
||||
|
||||
Reference in New Issue
Block a user