diff --git a/README.md b/README.md index 114b75d..8040546 100644 --- a/README.md +++ b/README.md @@ -14,8 +14,9 @@ The Dockerfile builds from the main branch of VLLM, so depending on when you run ### 2025-12-18 -Updated `build-and-copy.sh` to support copying to multiple hosts. +Updated `build-and-copy.sh` to support copying to multiple hosts. - Added `-c, --copy-to` (accepts space- or comma-separated host lists) and kept `--copy-to-host` as a backward-compatible alias. +- Added `--copy-parallel` to copy to all hosts concurrently. - Short `-h` is now used for help. ### 2025-12-15 @@ -99,6 +100,12 @@ Copy to multiple hosts (space- or comma-separated after the flag): ./build-and-copy.sh --copy-to 192.168.177.12 192.168.177.13 ``` +Copy to multiple hosts in parallel: + +```bash +./build-and-copy.sh --copy-to 192.168.177.12 192.168.177.13 --copy-parallel +``` + Using a different username: ```bash @@ -146,6 +153,7 @@ Using a different username: | `--vllm-ref ` | vLLM commit SHA, branch or tag (default: 'main') | | `-c, --copy-to ` | Host(s) to copy the image to after building (space- or comma-separated list after the flag). | | `--copy-to-host` | Alias for `--copy-to` (backwards compatibility). | +| `--copy-parallel` | Copy to all specified hosts concurrently. | | `-u, --user ` | Username for SSH connection (default: current user) | | `--no-build` | Skip building, only copy existing image (requires `--copy-to`) | | `-h, --help` | Show help message | diff --git a/build-and-copy.sh b/build-and-copy.sh index ddc1b5f..8bf46f6 100755 --- a/build-and-copy.sh +++ b/build-and-copy.sh @@ -14,6 +14,7 @@ NO_BUILD=false TRITON_REF="v3.5.1" VLLM_REF="main" TMP_IMAGE="" +PARALLEL_COPY=false cleanup() { if [ -n "$TMP_IMAGE" ] && [ -f "$TMP_IMAGE" ]; then @@ -36,6 +37,21 @@ add_copy_hosts() { done } +copy_to_host() { + local host="$1" + echo "Loading image into ${SSH_USER}@${host}..." + local host_copy_start host_copy_end host_copy_time + host_copy_start=$(date +%s) + if cat "$TMP_IMAGE" | ssh "${SSH_USER}@${host}" "docker load"; then + host_copy_end=$(date +%s) + host_copy_time=$((host_copy_end - host_copy_start)) + printf "Copy to %s completed in %02d:%02d:%02d\n" "$host" $((host_copy_time/3600)) $((host_copy_time%3600/60)) $((host_copy_time%60)) + else + echo "Copy to $host failed." + return 1 + fi +} + # Help function usage() { echo "Usage: $0 [OPTIONS]" @@ -46,6 +62,7 @@ usage() { echo " --vllm-ref : vLLM commit SHA, branch or tag (default: 'main')" echo " -c, --copy-to : Host(s) to copy the image to. Accepts comma or space-delimited lists after the flag." echo " --copy-to-host : Alias for --copy-to (backwards compatibility)." + echo " --copy-parallel : Copy to all hosts in parallel instead of serially." echo " -u, --user : Username for ssh command (default: \$USER)" echo " --no-build : Skip building, only copy image (requires --copy-to)" echo " -h, --help : Show this help message" @@ -78,6 +95,7 @@ while [[ "$#" -gt 0 ]]; do continue ;; -u|--user) SSH_USER="$2"; shift ;; + --copy-parallel) PARALLEL_COPY=true ;; --no-build) NO_BUILD=true ;; -h|--help) usage ;; *) echo "Unknown parameter passed: $1"; usage ;; @@ -130,20 +148,36 @@ fi COPY_TIME=0 if [ "${#COPY_HOSTS[@]}" -gt 0 ]; then echo "Copying image '$IMAGE_TAG' to ${#COPY_HOSTS[@]} host(s): ${COPY_HOSTS[*]}" + if [ "$PARALLEL_COPY" = true ]; then + echo "Parallel copy enabled." + fi COPY_START=$(date +%s) TMP_IMAGE=$(mktemp -t vllm_image.XXXXXX) echo "Saving image locally to $TMP_IMAGE..." docker save -o "$TMP_IMAGE" "$IMAGE_TAG" - for host in "${COPY_HOSTS[@]}"; do - echo "Loading image into ${SSH_USER}@${host}..." - HOST_COPY_START=$(date +%s) - cat "$TMP_IMAGE" | ssh "${SSH_USER}@${host}" "docker load" - HOST_COPY_END=$(date +%s) - HOST_COPY_TIME=$((HOST_COPY_END - HOST_COPY_START)) - printf "Copy to %s completed in %02d:%02d:%02d\n" "$host" $((HOST_COPY_TIME/3600)) $((HOST_COPY_TIME%3600/60)) $((HOST_COPY_TIME%60)) - done + if [ "$PARALLEL_COPY" = true ]; then + PIDS=() + for host in "${COPY_HOSTS[@]}"; do + copy_to_host "$host" & + PIDS+=($!) + done + COPY_FAILURE=0 + for pid in "${PIDS[@]}"; do + if ! wait "$pid"; then + COPY_FAILURE=1 + fi + done + if [ "$COPY_FAILURE" -ne 0 ]; then + echo "One or more copies failed." + exit 1 + fi + else + for host in "${COPY_HOSTS[@]}"; do + copy_to_host "$host" + done + fi COPY_END=$(date +%s) COPY_TIME=$((COPY_END - COPY_START))