Fixed cluster script and small fix for Dockerfilewq
This commit is contained in:
@@ -56,7 +56,7 @@ RUN pip install flashinfer-python --no-deps --index-url https://flashinfer.ai/wh
|
|||||||
pip install flashinfer-jit-cache --index-url https://flashinfer.ai/whl/cu130
|
pip install flashinfer-jit-cache --index-url https://flashinfer.ai/whl/cu130
|
||||||
|
|
||||||
# Install fast safetensors to improve loading speeds
|
# Install fast safetensors to improve loading speeds
|
||||||
RUN pip install fastsafetensors>=0.1.10
|
RUN pip install fastsafetensors
|
||||||
|
|
||||||
# --- VLLM SOURCE CACHE BUSTER ---
|
# --- VLLM SOURCE CACHE BUSTER ---
|
||||||
# Change THIS argument to force a fresh git clone and rebuild of vLLM
|
# Change THIS argument to force a fresh git clone and rebuild of vLLM
|
||||||
|
|||||||
@@ -82,24 +82,24 @@ fi
|
|||||||
|
|
||||||
echo "Configuring environment for [$NODE_TYPE] at $HOST_IP..."
|
echo "Configuring environment for [$NODE_TYPE] at $HOST_IP..."
|
||||||
|
|
||||||
export_persist VLLM_HOST_IP=$HOST_IP
|
export_persist VLLM_HOST_IP "$HOST_IP"
|
||||||
export_persist RAY_NODE_IP_ADDRESS=$VLLM_HOST_IP
|
export_persist RAY_NODE_IP_ADDRESS "$HOST_IP"
|
||||||
export_persist RAY_OVERRIDE_NODE_IP_ADDRESS=$VLLM_HOST_IP
|
export_persist RAY_OVERRIDE_NODE_IP_ADDRESS "$HOST_IP"
|
||||||
|
|
||||||
# Network Interface
|
# Network Interface
|
||||||
export_persist MN_IF_NAME=$ETH_IF_NAME
|
export_persist MN_IF_NAME "$ETH_IF_NAME"
|
||||||
export_persist UCX_NET_DEVICES=$MN_IF_NAME
|
export_persist UCX_NET_DEVICES "$ETH_IF_NAME"
|
||||||
export_persist NCCL_SOCKET_IFNAME=$MN_IF_NAME
|
export_persist NCCL_SOCKET_IFNAME "$ETH_IF_NAME"
|
||||||
|
|
||||||
# InfiniBand
|
# InfiniBand
|
||||||
export_persist NCCL_IB_HCA=$IB_IF_NAME
|
export_persist NCCL_IB_HCA "$IB_IF_NAME"
|
||||||
export_persist NCCL_IB_DISABLE=0
|
export_persist NCCL_IB_DISABLE "0"
|
||||||
|
|
||||||
# Sockets/Transport
|
# Sockets/Transport
|
||||||
export_persist OMPI_MCA_btl_tcp_if_include=$MN_IF_NAME
|
export_persist OMPI_MCA_btl_tcp_if_include "$ETH_IF_NAME"
|
||||||
export_persist GLOO_SOCKET_IFNAME=$MN_IF_NAME
|
export_persist GLOO_SOCKET_IFNAME "$ETH_IF_NAME"
|
||||||
export_persist TP_SOCKET_IFNAME=$MN_IF_NAME
|
export_persist TP_SOCKET_IFNAME "$ETH_IF_NAME"
|
||||||
export_persist RAY_memory_monitor_refresh_ms=0
|
export_persist RAY_memory_monitor_refresh_ms "0"
|
||||||
|
|
||||||
# --- Execution ---
|
# --- Execution ---
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user