Enhance launch-cluster script with improved SSH connectivity checks for worker nodes
This commit is contained in:
@@ -77,7 +77,7 @@ fi
|
|||||||
if [[ -z "$ETH_IF" || -z "$IB_IF" ]]; then
|
if [[ -z "$ETH_IF" || -z "$IB_IF" ]]; then
|
||||||
echo "Auto-detecting interfaces..."
|
echo "Auto-detecting interfaces..."
|
||||||
|
|
||||||
# Get all Up interfaces: "mlx5_0 port 1 ==> enp1s0f0np0 (Up)"
|
# Get all Up interfaces: "rocep1s0f1 port 1 ==> enp1s0f1np1 (Up)"
|
||||||
# We capture: IB_DEV, NET_DEV
|
# We capture: IB_DEV, NET_DEV
|
||||||
mapfile -t IB_NET_PAIRS < <(ibdev2netdev | awk '/Up\)/ {print $1 " " $5}')
|
mapfile -t IB_NET_PAIRS < <(ibdev2netdev | awk '/Up\)/ {print $1 " " $5}')
|
||||||
|
|
||||||
@@ -229,6 +229,21 @@ echo "Worker Nodes: ${WORKER_NODES[*]}"
|
|||||||
echo "Container Name: $CONTAINER_NAME"
|
echo "Container Name: $CONTAINER_NAME"
|
||||||
echo "Action: $ACTION"
|
echo "Action: $ACTION"
|
||||||
|
|
||||||
|
# Check SSH connectivity to worker nodes
|
||||||
|
if [[ "$ACTION" == "start" || "$ACTION" == "exec" || "$CHECK_CONFIG" == "true" ]]; then
|
||||||
|
if [ ${#WORKER_NODES[@]} -gt 0 ]; then
|
||||||
|
echo "Checking SSH connectivity to worker nodes..."
|
||||||
|
for worker in "${WORKER_NODES[@]}"; do
|
||||||
|
if ! ssh -o BatchMode=yes -o ConnectTimeout=5 -o StrictHostKeyChecking=no "$worker" true 2>/dev/null; then
|
||||||
|
echo "Error: Passwordless SSH to $worker failed."
|
||||||
|
echo " Please ensure SSH keys are configured and the host is reachable."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
echo " SSH to $worker: OK"
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
if [[ "$CHECK_CONFIG" == "true" ]]; then
|
if [[ "$CHECK_CONFIG" == "true" ]]; then
|
||||||
echo "Configuration Check Complete."
|
echo "Configuration Check Complete."
|
||||||
echo " Image Name: $IMAGE_NAME"
|
echo " Image Name: $IMAGE_NAME"
|
||||||
|
|||||||
Reference in New Issue
Block a user