Add parallel copy option to build-and-copy.sh
Introduced the --copy-parallel flag to enable concurrent copying of Docker images to multiple hosts. Updated the README with usage instructions and details about the new option. Refactored the script to support both serial and parallel copy modes for improved efficiency.
This commit is contained in:
@@ -16,6 +16,7 @@ The Dockerfile builds from the main branch of VLLM, so depending on when you run
|
|||||||
|
|
||||||
Updated `build-and-copy.sh` to support copying to multiple hosts.
|
Updated `build-and-copy.sh` to support copying to multiple hosts.
|
||||||
- Added `-c, --copy-to` (accepts space- or comma-separated host lists) and kept `--copy-to-host` as a backward-compatible alias.
|
- Added `-c, --copy-to` (accepts space- or comma-separated host lists) and kept `--copy-to-host` as a backward-compatible alias.
|
||||||
|
- Added `--copy-parallel` to copy to all hosts concurrently.
|
||||||
- Short `-h` is now used for help.
|
- Short `-h` is now used for help.
|
||||||
|
|
||||||
### 2025-12-15
|
### 2025-12-15
|
||||||
@@ -99,6 +100,12 @@ Copy to multiple hosts (space- or comma-separated after the flag):
|
|||||||
./build-and-copy.sh --copy-to 192.168.177.12 192.168.177.13
|
./build-and-copy.sh --copy-to 192.168.177.12 192.168.177.13
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Copy to multiple hosts in parallel:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./build-and-copy.sh --copy-to 192.168.177.12 192.168.177.13 --copy-parallel
|
||||||
|
```
|
||||||
|
|
||||||
Using a different username:
|
Using a different username:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
@@ -146,6 +153,7 @@ Using a different username:
|
|||||||
| `--vllm-ref <ref>` | vLLM commit SHA, branch or tag (default: 'main') |
|
| `--vllm-ref <ref>` | vLLM commit SHA, branch or tag (default: 'main') |
|
||||||
| `-c, --copy-to <host[,host...] or host host...>` | Host(s) to copy the image to after building (space- or comma-separated list after the flag). |
|
| `-c, --copy-to <host[,host...] or host host...>` | Host(s) to copy the image to after building (space- or comma-separated list after the flag). |
|
||||||
| `--copy-to-host` | Alias for `--copy-to` (backwards compatibility). |
|
| `--copy-to-host` | Alias for `--copy-to` (backwards compatibility). |
|
||||||
|
| `--copy-parallel` | Copy to all specified hosts concurrently. |
|
||||||
| `-u, --user <user>` | Username for SSH connection (default: current user) |
|
| `-u, --user <user>` | Username for SSH connection (default: current user) |
|
||||||
| `--no-build` | Skip building, only copy existing image (requires `--copy-to`) |
|
| `--no-build` | Skip building, only copy existing image (requires `--copy-to`) |
|
||||||
| `-h, --help` | Show help message |
|
| `-h, --help` | Show help message |
|
||||||
|
|||||||
@@ -14,6 +14,7 @@ NO_BUILD=false
|
|||||||
TRITON_REF="v3.5.1"
|
TRITON_REF="v3.5.1"
|
||||||
VLLM_REF="main"
|
VLLM_REF="main"
|
||||||
TMP_IMAGE=""
|
TMP_IMAGE=""
|
||||||
|
PARALLEL_COPY=false
|
||||||
|
|
||||||
cleanup() {
|
cleanup() {
|
||||||
if [ -n "$TMP_IMAGE" ] && [ -f "$TMP_IMAGE" ]; then
|
if [ -n "$TMP_IMAGE" ] && [ -f "$TMP_IMAGE" ]; then
|
||||||
@@ -36,6 +37,21 @@ add_copy_hosts() {
|
|||||||
done
|
done
|
||||||
}
|
}
|
||||||
|
|
||||||
|
copy_to_host() {
|
||||||
|
local host="$1"
|
||||||
|
echo "Loading image into ${SSH_USER}@${host}..."
|
||||||
|
local host_copy_start host_copy_end host_copy_time
|
||||||
|
host_copy_start=$(date +%s)
|
||||||
|
if cat "$TMP_IMAGE" | ssh "${SSH_USER}@${host}" "docker load"; then
|
||||||
|
host_copy_end=$(date +%s)
|
||||||
|
host_copy_time=$((host_copy_end - host_copy_start))
|
||||||
|
printf "Copy to %s completed in %02d:%02d:%02d\n" "$host" $((host_copy_time/3600)) $((host_copy_time%3600/60)) $((host_copy_time%60))
|
||||||
|
else
|
||||||
|
echo "Copy to $host failed."
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
# Help function
|
# Help function
|
||||||
usage() {
|
usage() {
|
||||||
echo "Usage: $0 [OPTIONS]"
|
echo "Usage: $0 [OPTIONS]"
|
||||||
@@ -46,6 +62,7 @@ usage() {
|
|||||||
echo " --vllm-ref <ref> : vLLM commit SHA, branch or tag (default: 'main')"
|
echo " --vllm-ref <ref> : vLLM commit SHA, branch or tag (default: 'main')"
|
||||||
echo " -c, --copy-to <hosts> : Host(s) to copy the image to. Accepts comma or space-delimited lists after the flag."
|
echo " -c, --copy-to <hosts> : Host(s) to copy the image to. Accepts comma or space-delimited lists after the flag."
|
||||||
echo " --copy-to-host : Alias for --copy-to (backwards compatibility)."
|
echo " --copy-to-host : Alias for --copy-to (backwards compatibility)."
|
||||||
|
echo " --copy-parallel : Copy to all hosts in parallel instead of serially."
|
||||||
echo " -u, --user <user> : Username for ssh command (default: \$USER)"
|
echo " -u, --user <user> : Username for ssh command (default: \$USER)"
|
||||||
echo " --no-build : Skip building, only copy image (requires --copy-to)"
|
echo " --no-build : Skip building, only copy image (requires --copy-to)"
|
||||||
echo " -h, --help : Show this help message"
|
echo " -h, --help : Show this help message"
|
||||||
@@ -78,6 +95,7 @@ while [[ "$#" -gt 0 ]]; do
|
|||||||
continue
|
continue
|
||||||
;;
|
;;
|
||||||
-u|--user) SSH_USER="$2"; shift ;;
|
-u|--user) SSH_USER="$2"; shift ;;
|
||||||
|
--copy-parallel) PARALLEL_COPY=true ;;
|
||||||
--no-build) NO_BUILD=true ;;
|
--no-build) NO_BUILD=true ;;
|
||||||
-h|--help) usage ;;
|
-h|--help) usage ;;
|
||||||
*) echo "Unknown parameter passed: $1"; usage ;;
|
*) echo "Unknown parameter passed: $1"; usage ;;
|
||||||
@@ -130,20 +148,36 @@ fi
|
|||||||
COPY_TIME=0
|
COPY_TIME=0
|
||||||
if [ "${#COPY_HOSTS[@]}" -gt 0 ]; then
|
if [ "${#COPY_HOSTS[@]}" -gt 0 ]; then
|
||||||
echo "Copying image '$IMAGE_TAG' to ${#COPY_HOSTS[@]} host(s): ${COPY_HOSTS[*]}"
|
echo "Copying image '$IMAGE_TAG' to ${#COPY_HOSTS[@]} host(s): ${COPY_HOSTS[*]}"
|
||||||
|
if [ "$PARALLEL_COPY" = true ]; then
|
||||||
|
echo "Parallel copy enabled."
|
||||||
|
fi
|
||||||
COPY_START=$(date +%s)
|
COPY_START=$(date +%s)
|
||||||
|
|
||||||
TMP_IMAGE=$(mktemp -t vllm_image.XXXXXX)
|
TMP_IMAGE=$(mktemp -t vllm_image.XXXXXX)
|
||||||
echo "Saving image locally to $TMP_IMAGE..."
|
echo "Saving image locally to $TMP_IMAGE..."
|
||||||
docker save -o "$TMP_IMAGE" "$IMAGE_TAG"
|
docker save -o "$TMP_IMAGE" "$IMAGE_TAG"
|
||||||
|
|
||||||
for host in "${COPY_HOSTS[@]}"; do
|
if [ "$PARALLEL_COPY" = true ]; then
|
||||||
echo "Loading image into ${SSH_USER}@${host}..."
|
PIDS=()
|
||||||
HOST_COPY_START=$(date +%s)
|
for host in "${COPY_HOSTS[@]}"; do
|
||||||
cat "$TMP_IMAGE" | ssh "${SSH_USER}@${host}" "docker load"
|
copy_to_host "$host" &
|
||||||
HOST_COPY_END=$(date +%s)
|
PIDS+=($!)
|
||||||
HOST_COPY_TIME=$((HOST_COPY_END - HOST_COPY_START))
|
done
|
||||||
printf "Copy to %s completed in %02d:%02d:%02d\n" "$host" $((HOST_COPY_TIME/3600)) $((HOST_COPY_TIME%3600/60)) $((HOST_COPY_TIME%60))
|
COPY_FAILURE=0
|
||||||
done
|
for pid in "${PIDS[@]}"; do
|
||||||
|
if ! wait "$pid"; then
|
||||||
|
COPY_FAILURE=1
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
if [ "$COPY_FAILURE" -ne 0 ]; then
|
||||||
|
echo "One or more copies failed."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
for host in "${COPY_HOSTS[@]}"; do
|
||||||
|
copy_to_host "$host"
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
|
||||||
COPY_END=$(date +%s)
|
COPY_END=$(date +%s)
|
||||||
COPY_TIME=$((COPY_END - COPY_START))
|
COPY_TIME=$((COPY_END - COPY_START))
|
||||||
|
|||||||
Reference in New Issue
Block a user