From 11355677f6ec4ca2799fbda0d7d6b833bcfbf1ad Mon Sep 17 00:00:00 2001 From: Eric Lewis Date: Thu, 18 Dec 2025 01:24:48 -0500 Subject: [PATCH] Add parallel copy option to build-and-copy.sh Introduced the --copy-parallel flag to enable concurrent copying of Docker images to multiple hosts. Updated the README with usage instructions and details about the new option. Refactored the script to support both serial and parallel copy modes for improved efficiency. --- README.md | 10 +++++++++- build-and-copy.sh | 50 +++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 51 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 114b75d..8040546 100644 --- a/README.md +++ b/README.md @@ -14,8 +14,9 @@ The Dockerfile builds from the main branch of VLLM, so depending on when you run ### 2025-12-18 -Updated `build-and-copy.sh` to support copying to multiple hosts. +Updated `build-and-copy.sh` to support copying to multiple hosts. - Added `-c, --copy-to` (accepts space- or comma-separated host lists) and kept `--copy-to-host` as a backward-compatible alias. +- Added `--copy-parallel` to copy to all hosts concurrently. - Short `-h` is now used for help. ### 2025-12-15 @@ -99,6 +100,12 @@ Copy to multiple hosts (space- or comma-separated after the flag): ./build-and-copy.sh --copy-to 192.168.177.12 192.168.177.13 ``` +Copy to multiple hosts in parallel: + +```bash +./build-and-copy.sh --copy-to 192.168.177.12 192.168.177.13 --copy-parallel +``` + Using a different username: ```bash @@ -146,6 +153,7 @@ Using a different username: | `--vllm-ref ` | vLLM commit SHA, branch or tag (default: 'main') | | `-c, --copy-to ` | Host(s) to copy the image to after building (space- or comma-separated list after the flag). | | `--copy-to-host` | Alias for `--copy-to` (backwards compatibility). | +| `--copy-parallel` | Copy to all specified hosts concurrently. | | `-u, --user ` | Username for SSH connection (default: current user) | | `--no-build` | Skip building, only copy existing image (requires `--copy-to`) | | `-h, --help` | Show help message | diff --git a/build-and-copy.sh b/build-and-copy.sh index ddc1b5f..8bf46f6 100755 --- a/build-and-copy.sh +++ b/build-and-copy.sh @@ -14,6 +14,7 @@ NO_BUILD=false TRITON_REF="v3.5.1" VLLM_REF="main" TMP_IMAGE="" +PARALLEL_COPY=false cleanup() { if [ -n "$TMP_IMAGE" ] && [ -f "$TMP_IMAGE" ]; then @@ -36,6 +37,21 @@ add_copy_hosts() { done } +copy_to_host() { + local host="$1" + echo "Loading image into ${SSH_USER}@${host}..." + local host_copy_start host_copy_end host_copy_time + host_copy_start=$(date +%s) + if cat "$TMP_IMAGE" | ssh "${SSH_USER}@${host}" "docker load"; then + host_copy_end=$(date +%s) + host_copy_time=$((host_copy_end - host_copy_start)) + printf "Copy to %s completed in %02d:%02d:%02d\n" "$host" $((host_copy_time/3600)) $((host_copy_time%3600/60)) $((host_copy_time%60)) + else + echo "Copy to $host failed." + return 1 + fi +} + # Help function usage() { echo "Usage: $0 [OPTIONS]" @@ -46,6 +62,7 @@ usage() { echo " --vllm-ref : vLLM commit SHA, branch or tag (default: 'main')" echo " -c, --copy-to : Host(s) to copy the image to. Accepts comma or space-delimited lists after the flag." echo " --copy-to-host : Alias for --copy-to (backwards compatibility)." + echo " --copy-parallel : Copy to all hosts in parallel instead of serially." echo " -u, --user : Username for ssh command (default: \$USER)" echo " --no-build : Skip building, only copy image (requires --copy-to)" echo " -h, --help : Show this help message" @@ -78,6 +95,7 @@ while [[ "$#" -gt 0 ]]; do continue ;; -u|--user) SSH_USER="$2"; shift ;; + --copy-parallel) PARALLEL_COPY=true ;; --no-build) NO_BUILD=true ;; -h|--help) usage ;; *) echo "Unknown parameter passed: $1"; usage ;; @@ -130,20 +148,36 @@ fi COPY_TIME=0 if [ "${#COPY_HOSTS[@]}" -gt 0 ]; then echo "Copying image '$IMAGE_TAG' to ${#COPY_HOSTS[@]} host(s): ${COPY_HOSTS[*]}" + if [ "$PARALLEL_COPY" = true ]; then + echo "Parallel copy enabled." + fi COPY_START=$(date +%s) TMP_IMAGE=$(mktemp -t vllm_image.XXXXXX) echo "Saving image locally to $TMP_IMAGE..." docker save -o "$TMP_IMAGE" "$IMAGE_TAG" - for host in "${COPY_HOSTS[@]}"; do - echo "Loading image into ${SSH_USER}@${host}..." - HOST_COPY_START=$(date +%s) - cat "$TMP_IMAGE" | ssh "${SSH_USER}@${host}" "docker load" - HOST_COPY_END=$(date +%s) - HOST_COPY_TIME=$((HOST_COPY_END - HOST_COPY_START)) - printf "Copy to %s completed in %02d:%02d:%02d\n" "$host" $((HOST_COPY_TIME/3600)) $((HOST_COPY_TIME%3600/60)) $((HOST_COPY_TIME%60)) - done + if [ "$PARALLEL_COPY" = true ]; then + PIDS=() + for host in "${COPY_HOSTS[@]}"; do + copy_to_host "$host" & + PIDS+=($!) + done + COPY_FAILURE=0 + for pid in "${PIDS[@]}"; do + if ! wait "$pid"; then + COPY_FAILURE=1 + fi + done + if [ "$COPY_FAILURE" -ne 0 ]; then + echo "One or more copies failed." + exit 1 + fi + else + for host in "${COPY_HOSTS[@]}"; do + copy_to_host "$host" + done + fi COPY_END=$(date +%s) COPY_TIME=$((COPY_END - COPY_START))