Enhance launch-cluster script with improved SSH connectivity checks for worker nodes
This commit is contained in:
@@ -77,7 +77,7 @@ fi
|
||||
if [[ -z "$ETH_IF" || -z "$IB_IF" ]]; then
|
||||
echo "Auto-detecting interfaces..."
|
||||
|
||||
# Get all Up interfaces: "mlx5_0 port 1 ==> enp1s0f0np0 (Up)"
|
||||
# Get all Up interfaces: "rocep1s0f1 port 1 ==> enp1s0f1np1 (Up)"
|
||||
# We capture: IB_DEV, NET_DEV
|
||||
mapfile -t IB_NET_PAIRS < <(ibdev2netdev | awk '/Up\)/ {print $1 " " $5}')
|
||||
|
||||
@@ -229,6 +229,21 @@ echo "Worker Nodes: ${WORKER_NODES[*]}"
|
||||
echo "Container Name: $CONTAINER_NAME"
|
||||
echo "Action: $ACTION"
|
||||
|
||||
# Check SSH connectivity to worker nodes
|
||||
if [[ "$ACTION" == "start" || "$ACTION" == "exec" || "$CHECK_CONFIG" == "true" ]]; then
|
||||
if [ ${#WORKER_NODES[@]} -gt 0 ]; then
|
||||
echo "Checking SSH connectivity to worker nodes..."
|
||||
for worker in "${WORKER_NODES[@]}"; do
|
||||
if ! ssh -o BatchMode=yes -o ConnectTimeout=5 -o StrictHostKeyChecking=no "$worker" true 2>/dev/null; then
|
||||
echo "Error: Passwordless SSH to $worker failed."
|
||||
echo " Please ensure SSH keys are configured and the host is reachable."
|
||||
exit 1
|
||||
fi
|
||||
echo " SSH to $worker: OK"
|
||||
done
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ "$CHECK_CONFIG" == "true" ]]; then
|
||||
echo "Configuration Check Complete."
|
||||
echo " Image Name: $IMAGE_NAME"
|
||||
|
||||
Reference in New Issue
Block a user