Autodiscover tweaks
This commit is contained in:
@@ -182,6 +182,12 @@ detect_interfaces() {
|
|||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# Export mesh NCCL settings directly so launch-cluster.sh picks them up
|
||||||
|
# even if the user declines to save config to .env
|
||||||
|
export DOTENV_CONTAINER_NCCL_NET_PLUGIN=none
|
||||||
|
export DOTENV_CONTAINER_NCCL_IB_SUBNET_AWARE_ROUTING=1
|
||||||
|
export DOTENV_CONTAINER_NCCL_IB_MERGE_NICS=0
|
||||||
|
|
||||||
else
|
else
|
||||||
echo "Error: Unexpected number of active CX7 interfaces ($num_up). Expected 2 (non-mesh) or 4 (mesh)."
|
echo "Error: Unexpected number of active CX7 interfaces ($num_up). Expected 2 (non-mesh) or 4 (mesh)."
|
||||||
return 1
|
return 1
|
||||||
@@ -411,6 +417,12 @@ save_config() {
|
|||||||
echo "LOCAL_IP=$LOCAL_IP"
|
echo "LOCAL_IP=$LOCAL_IP"
|
||||||
echo "ETH_IF=$ETH_IF"
|
echo "ETH_IF=$ETH_IF"
|
||||||
echo "IB_IF=$IB_IF"
|
echo "IB_IF=$IB_IF"
|
||||||
|
if [[ "$MESH_MODE" == "true" ]]; then
|
||||||
|
echo "# Mesh mode NCCL settings"
|
||||||
|
echo "CONTAINER_NCCL_NET_PLUGIN=none"
|
||||||
|
echo "CONTAINER_NCCL_IB_SUBNET_AWARE_ROUTING=1"
|
||||||
|
echo "CONTAINER_NCCL_IB_MERGE_NICS=0"
|
||||||
|
fi
|
||||||
} > "$env_file"
|
} > "$env_file"
|
||||||
echo ""
|
echo ""
|
||||||
echo "Saved to $env_file"
|
echo "Saved to $env_file"
|
||||||
|
|||||||
Reference in New Issue
Block a user