From a13a9f6806520bdc458a6ac19b39deae860bf5f4 Mon Sep 17 00:00:00 2001 From: Christopher Owen Date: Thu, 18 Dec 2025 13:31:54 +0100 Subject: [PATCH 1/3] Limit build parallelism to reduce OOM situations --- Dockerfile | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Dockerfile b/Dockerfile index 78bdd9d..2285d93 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,10 +1,20 @@ # syntax=docker/dockerfile:1.6 +# Limit build parallelism to reduce OOM situations +ARG BUILD_JOBS=16 + # ========================================================= # STAGE 1: Base Image (Installs Dependencies) # ========================================================= FROM nvidia/cuda:13.1.0-devel-ubuntu24.04 AS base +# Build parallemism +ARG BUILD_JOBS +ENV MAX_JOBS=${BUILD_JOBS} +ENV CMAKE_BUILD_PARALLEL_LEVEL=${BUILD_JOBS} +ENV NINJAFLAGS="-j${BUILD_JOBS}" +ENV MAKEFLAGS="-j${BUILD_JOBS}" + # Set non-interactive frontend to prevent apt prompts ENV DEBIAN_FRONTEND=noninteractive From 442f7369addf23644da54413ba79a1854a453bc5 Mon Sep 17 00:00:00 2001 From: Eugene Rakhmatulin Date: Thu, 18 Dec 2025 22:02:04 -0800 Subject: [PATCH 2/3] Updated build script to handle BUILD_JOBS argument --- build-and-copy.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/build-and-copy.sh b/build-and-copy.sh index 19a2c11..01bfd7e 100755 --- a/build-and-copy.sh +++ b/build-and-copy.sh @@ -13,6 +13,7 @@ SSH_USER="$USER" NO_BUILD=false TRITON_REF="v3.5.1" VLLM_REF="main" +BUILD_JOBS="16" # Help function usage() { @@ -22,6 +23,7 @@ usage() { echo " --rebuild-vllm : Set cache bust for vllm" echo " --triton-ref : Triton commit SHA, branch or tag (default: 'v3.5.1')" echo " --vllm-ref : vLLM commit SHA, branch or tag (default: 'main')" + echo " -j, --build-jobs : Number of concurrent build jobs (default: \${BUILD_JOBS})" echo " -h, --copy-to-host : Host address to copy the image to (if not set, don't copy)" echo " -u, --user : Username for ssh command (default: \$USER)" echo " --no-build : Skip building, only copy image (requires --copy-to-host)" @@ -37,6 +39,7 @@ while [[ "$#" -gt 0 ]]; do --rebuild-vllm) REBUILD_VLLM=true ;; --triton-ref) TRITON_REF="$2"; shift ;; --vllm-ref) VLLM_REF="$2"; shift ;; + -j|--build-jobs) BUILD_JOBS="$2"; shift ;; -h|--copy-to-host) COPY_HOST="$2"; shift ;; -u|--user) SSH_USER="$2"; shift ;; --no-build) NO_BUILD=true ;; @@ -74,6 +77,9 @@ if [ "$NO_BUILD" = false ]; then # Add VLLM_REF to build arguments CMD+=("--build-arg" "VLLM_REF=$VLLM_REF") + # Add BUILD_JOBS to build arguments + CMD+=("--build-arg" "BUILD_JOBS=$BUILD_JOBS") + # Add build context CMD+=(".") From cf9da89545ef7c26dcad284ed00425e70034153f Mon Sep 17 00:00:00 2001 From: Eugene Rakhmatulin Date: Thu, 18 Dec 2025 22:03:46 -0800 Subject: [PATCH 3/3] Updated README --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 2bdbe5f..40b5bf1 100644 --- a/README.md +++ b/README.md @@ -29,6 +29,8 @@ The Dockerfile builds from the main branch of VLLM, so depending on when you run Added `launch-cluster.sh` convenience script for basic cluster management - see details below. +Added `-j` / `--build-jobs` argument to `build-and-copy.sh` to control build parallelism. + ### 2025-12-15 Updated `build-and-copy.sh` flags: @@ -79,6 +81,7 @@ Using a provided build script is recommended, but if you want to build using `do | `CACHEBUST_VLLM` | `1` | Change this to force a fresh git clone and rebuild of vLLM source code. | | `TRITON_REF` | `v3.5.1` | Triton commit SHA, branch, or tag to build. | | `VLLM_REF` | `main` | vLLM commit SHA, branch, or tag to build. | +| `BUILD_JOBS` | `16` | Number of parallel build jobs (default: 16). | ### Using the Build Script (Recommended) @@ -149,6 +152,7 @@ Using a different username: | `--rebuild-vllm` | Force rebuild vLLM source only (sets CACHEBUST_VLLM) | | `--triton-ref ` | Triton commit SHA, branch or tag (default: 'v3.5.1') | | `--vllm-ref ` | vLLM commit SHA, branch or tag (default: 'main') | +| `-j, --build-jobs ` | Number of parallel build jobs (default: Dockerfile default) | | `-h, --copy-to-host ` | Host address to copy the image to after building | | `-u, --user ` | Username for SSH connection (default: current user) | | `--no-build` | Skip building, only copy existing image (requires `--copy-to-host`) |