Merge pull request #6

This commit is contained in:
Eugene Rakhmatulin
2025-12-18 22:17:12 -08:00
3 changed files with 20 additions and 0 deletions

View File

@@ -1,10 +1,20 @@
# syntax=docker/dockerfile:1.6
# Limit build parallelism to reduce OOM situations
ARG BUILD_JOBS=16
# =========================================================
# STAGE 1: Base Image (Installs Dependencies)
# =========================================================
FROM nvidia/cuda:13.1.0-devel-ubuntu24.04 AS base
# Build parallemism
ARG BUILD_JOBS
ENV MAX_JOBS=${BUILD_JOBS}
ENV CMAKE_BUILD_PARALLEL_LEVEL=${BUILD_JOBS}
ENV NINJAFLAGS="-j${BUILD_JOBS}"
ENV MAKEFLAGS="-j${BUILD_JOBS}"
# Set non-interactive frontend to prevent apt prompts
ENV DEBIAN_FRONTEND=noninteractive

View File

@@ -29,6 +29,8 @@ The Dockerfile builds from the main branch of VLLM, so depending on when you run
Added `launch-cluster.sh` convenience script for basic cluster management - see details below.
Added `-j` / `--build-jobs` argument to `build-and-copy.sh` to control build parallelism.
### 2025-12-15
Updated `build-and-copy.sh` flags:
@@ -79,6 +81,7 @@ Using a provided build script is recommended, but if you want to build using `do
| `CACHEBUST_VLLM` | `1` | Change this to force a fresh git clone and rebuild of vLLM source code. |
| `TRITON_REF` | `v3.5.1` | Triton commit SHA, branch, or tag to build. |
| `VLLM_REF` | `main` | vLLM commit SHA, branch, or tag to build. |
| `BUILD_JOBS` | `16` | Number of parallel build jobs (default: 16). |
### Using the Build Script (Recommended)
@@ -149,6 +152,7 @@ Using a different username:
| `--rebuild-vllm` | Force rebuild vLLM source only (sets CACHEBUST_VLLM) |
| `--triton-ref <ref>` | Triton commit SHA, branch or tag (default: 'v3.5.1') |
| `--vllm-ref <ref>` | vLLM commit SHA, branch or tag (default: 'main') |
| `-j, --build-jobs <jobs>` | Number of parallel build jobs (default: Dockerfile default) |
| `-h, --copy-to-host <host>` | Host address to copy the image to after building |
| `-u, --user <user>` | Username for SSH connection (default: current user) |
| `--no-build` | Skip building, only copy existing image (requires `--copy-to-host`) |

View File

@@ -13,6 +13,7 @@ SSH_USER="$USER"
NO_BUILD=false
TRITON_REF="v3.5.1"
VLLM_REF="main"
BUILD_JOBS="16"
# Help function
usage() {
@@ -22,6 +23,7 @@ usage() {
echo " --rebuild-vllm : Set cache bust for vllm"
echo " --triton-ref <ref> : Triton commit SHA, branch or tag (default: 'v3.5.1')"
echo " --vllm-ref <ref> : vLLM commit SHA, branch or tag (default: 'main')"
echo " -j, --build-jobs <jobs> : Number of concurrent build jobs (default: \${BUILD_JOBS})"
echo " -h, --copy-to-host <host> : Host address to copy the image to (if not set, don't copy)"
echo " -u, --user <user> : Username for ssh command (default: \$USER)"
echo " --no-build : Skip building, only copy image (requires --copy-to-host)"
@@ -37,6 +39,7 @@ while [[ "$#" -gt 0 ]]; do
--rebuild-vllm) REBUILD_VLLM=true ;;
--triton-ref) TRITON_REF="$2"; shift ;;
--vllm-ref) VLLM_REF="$2"; shift ;;
-j|--build-jobs) BUILD_JOBS="$2"; shift ;;
-h|--copy-to-host) COPY_HOST="$2"; shift ;;
-u|--user) SSH_USER="$2"; shift ;;
--no-build) NO_BUILD=true ;;
@@ -74,6 +77,9 @@ if [ "$NO_BUILD" = false ]; then
# Add VLLM_REF to build arguments
CMD+=("--build-arg" "VLLM_REF=$VLLM_REF")
# Add BUILD_JOBS to build arguments
CMD+=("--build-arg" "BUILD_JOBS=$BUILD_JOBS")
# Add build context
CMD+=(".")