Added LOCAL_IP support
This commit is contained in:
10
.env.example
10
.env.example
@@ -3,13 +3,17 @@
|
|||||||
|
|
||||||
# Cluster configuration
|
# Cluster configuration
|
||||||
# CLUSTER_NODES: Comma-separated list of node IPs (first node is the head node)
|
# CLUSTER_NODES: Comma-separated list of node IPs (first node is the head node)
|
||||||
CLUSTER_NODES="192.168.1.1,192.168.1.2,192.168.1.3"
|
CLUSTER_NODES="192.168.177.11,192.168.177.12"
|
||||||
|
|
||||||
# ETH_IF: Ethernet interface name (optional, auto-detected if not specified)
|
# ETH_IF: Ethernet interface name (optional, auto-detected if not specified)
|
||||||
ETH_IF="eth0"
|
ETH_IF="enp1s0f1np1"
|
||||||
|
|
||||||
# IB_IF: InfiniBand interface name (optional, auto-detected if not specified)
|
# IB_IF: InfiniBand interface name (optional, auto-detected if not specified)
|
||||||
IB_IF="ib0"
|
IB_IF="rocep1s0f1,roceP2p1s0f1"
|
||||||
|
|
||||||
|
# LOCAL_IP: Local IP address (optional, auto-detected if not specified)
|
||||||
|
# Useful for solo mode or overriding auto-detection
|
||||||
|
LOCAL_IP="192.168.177.11"
|
||||||
|
|
||||||
# MASTER_PORT: Port for cluster coordination (default: 29501)
|
# MASTER_PORT: Port for cluster coordination (default: 29501)
|
||||||
MASTER_PORT="29501"
|
MASTER_PORT="29501"
|
||||||
|
|||||||
@@ -78,6 +78,7 @@ usage() {
|
|||||||
echo " IB_IF InfiniBand interface name"
|
echo " IB_IF InfiniBand interface name"
|
||||||
echo " MASTER_PORT Port for cluster coordination (default: 29501)"
|
echo " MASTER_PORT Port for cluster coordination (default: 29501)"
|
||||||
echo " CONTAINER_NAME Container name (default: vllm_node)"
|
echo " CONTAINER_NAME Container name (default: vllm_node)"
|
||||||
|
echo " LOCAL_IP Local IP address (for solo mode or override auto-detection)"
|
||||||
echo " CONTAINER_* Any variable starting with CONTAINER_ (except CONTAINER_NAME)"
|
echo " CONTAINER_* Any variable starting with CONTAINER_ (except CONTAINER_NAME)"
|
||||||
echo " becomes -e flag. Example: CONTAINER_NCCL_DEBUG=INFO -> -e NCCL_DEBUG=INFO"
|
echo " becomes -e flag. Example: CONTAINER_NCCL_DEBUG=INFO -> -e NCCL_DEBUG=INFO"
|
||||||
echo ""
|
echo ""
|
||||||
@@ -87,6 +88,7 @@ usage() {
|
|||||||
echo " IB_IF=ib0"
|
echo " IB_IF=ib0"
|
||||||
echo " MASTER_PORT=29501"
|
echo " MASTER_PORT=29501"
|
||||||
echo " CONTAINER_NAME=vllm_node"
|
echo " CONTAINER_NAME=vllm_node"
|
||||||
|
echo " LOCAL_IP=192.168.1.1"
|
||||||
echo " CONTAINER_NCCL_DEBUG=INFO"
|
echo " CONTAINER_NCCL_DEBUG=INFO"
|
||||||
echo " CONTAINER_HF_TOKEN=abc123"
|
echo " CONTAINER_HF_TOKEN=abc123"
|
||||||
echo ""
|
echo ""
|
||||||
@@ -256,6 +258,10 @@ if [[ -z "$CONTAINER_NAME" || "$CONTAINER_NAME" == "vllm_node" ]] && [[ -n "$DOT
|
|||||||
CONTAINER_NAME="$DOTENV_CONTAINER_NAME"
|
CONTAINER_NAME="$DOTENV_CONTAINER_NAME"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
if [[ -n "$DOTENV_LOCAL_IP" ]]; then
|
||||||
|
export LOCAL_IP="$DOTENV_LOCAL_IP"
|
||||||
|
fi
|
||||||
|
|
||||||
# Validate non-privileged mode flags
|
# Validate non-privileged mode flags
|
||||||
if [[ "$NON_PRIVILEGED_MODE" == "true" ]]; then
|
if [[ "$NON_PRIVILEGED_MODE" == "true" ]]; then
|
||||||
# Set default swap limit if not specified
|
# Set default swap limit if not specified
|
||||||
@@ -408,7 +414,10 @@ if [[ "$SOLO_MODE" == "true" ]]; then
|
|||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
# Solo mode: skip node detection, just get local IP
|
# Solo mode: skip node detection, just get local IP
|
||||||
|
# Use LOCAL_IP from .env if set, otherwise default to 127.0.0.1
|
||||||
|
if [[ -z "$LOCAL_IP" ]]; then
|
||||||
LOCAL_IP="127.0.0.1"
|
LOCAL_IP="127.0.0.1"
|
||||||
|
fi
|
||||||
NODES_ARG="$LOCAL_IP"
|
NODES_ARG="$LOCAL_IP"
|
||||||
PEER_NODES=()
|
PEER_NODES=()
|
||||||
echo "Solo mode enabled. Skipping node detection."
|
echo "Solo mode enabled. Skipping node detection."
|
||||||
|
|||||||
Reference in New Issue
Block a user