Bugfix: don't shut down on exit if cluster is already running

This commit is contained in:
Eugene Rakhmatulin
2025-12-18 23:12:39 -08:00
parent 2a2f8f24e2
commit 0377e9badf

View File

@@ -22,6 +22,7 @@ COMMAND_TO_RUN=""
DAEMON_MODE="false"
CHECK_CONFIG="false"
ACTION="start"
CLUSTER_WAS_RUNNING="false"
# Function to print usage
usage() {
@@ -275,6 +276,11 @@ cleanup() {
# Remove traps to prevent nested cleanup
trap - EXIT INT TERM HUP
if [[ "$CLUSTER_WAS_RUNNING" == "true" ]]; then
echo "Cluster was already running when script started. Skipping cleanup."
return
fi
echo ""
echo "Stopping cluster..."
@@ -347,8 +353,9 @@ check_cluster_running() {
done
if [[ "$running" == "true" ]]; then
echo "Cluster containers are already running. Please stop them first or use a different name."
exit 1
echo "Cluster containers are already running. Skipping launch."
CLUSTER_WAS_RUNNING="true"
return 0
fi
}
@@ -356,6 +363,10 @@ check_cluster_running() {
start_cluster() {
check_cluster_running
if [[ "$CLUSTER_WAS_RUNNING" == "true" ]]; then
return
fi
# Start Head Node
echo "Starting Head Node on $HEAD_IP..."
docker run -d --privileged --gpus all --rm \