Merge pull request #6
This commit is contained in:
10
Dockerfile
10
Dockerfile
@@ -1,10 +1,20 @@
|
||||
# syntax=docker/dockerfile:1.6
|
||||
|
||||
# Limit build parallelism to reduce OOM situations
|
||||
ARG BUILD_JOBS=16
|
||||
|
||||
# =========================================================
|
||||
# STAGE 1: Base Image (Installs Dependencies)
|
||||
# =========================================================
|
||||
FROM nvidia/cuda:13.1.0-devel-ubuntu24.04 AS base
|
||||
|
||||
# Build parallemism
|
||||
ARG BUILD_JOBS
|
||||
ENV MAX_JOBS=${BUILD_JOBS}
|
||||
ENV CMAKE_BUILD_PARALLEL_LEVEL=${BUILD_JOBS}
|
||||
ENV NINJAFLAGS="-j${BUILD_JOBS}"
|
||||
ENV MAKEFLAGS="-j${BUILD_JOBS}"
|
||||
|
||||
# Set non-interactive frontend to prevent apt prompts
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
|
||||
@@ -29,6 +29,8 @@ The Dockerfile builds from the main branch of VLLM, so depending on when you run
|
||||
|
||||
Added `launch-cluster.sh` convenience script for basic cluster management - see details below.
|
||||
|
||||
Added `-j` / `--build-jobs` argument to `build-and-copy.sh` to control build parallelism.
|
||||
|
||||
### 2025-12-15
|
||||
|
||||
Updated `build-and-copy.sh` flags:
|
||||
@@ -79,6 +81,7 @@ Using a provided build script is recommended, but if you want to build using `do
|
||||
| `CACHEBUST_VLLM` | `1` | Change this to force a fresh git clone and rebuild of vLLM source code. |
|
||||
| `TRITON_REF` | `v3.5.1` | Triton commit SHA, branch, or tag to build. |
|
||||
| `VLLM_REF` | `main` | vLLM commit SHA, branch, or tag to build. |
|
||||
| `BUILD_JOBS` | `16` | Number of parallel build jobs (default: 16). |
|
||||
|
||||
### Using the Build Script (Recommended)
|
||||
|
||||
@@ -149,6 +152,7 @@ Using a different username:
|
||||
| `--rebuild-vllm` | Force rebuild vLLM source only (sets CACHEBUST_VLLM) |
|
||||
| `--triton-ref <ref>` | Triton commit SHA, branch or tag (default: 'v3.5.1') |
|
||||
| `--vllm-ref <ref>` | vLLM commit SHA, branch or tag (default: 'main') |
|
||||
| `-j, --build-jobs <jobs>` | Number of parallel build jobs (default: Dockerfile default) |
|
||||
| `-h, --copy-to-host <host>` | Host address to copy the image to after building |
|
||||
| `-u, --user <user>` | Username for SSH connection (default: current user) |
|
||||
| `--no-build` | Skip building, only copy existing image (requires `--copy-to-host`) |
|
||||
|
||||
@@ -13,6 +13,7 @@ SSH_USER="$USER"
|
||||
NO_BUILD=false
|
||||
TRITON_REF="v3.5.1"
|
||||
VLLM_REF="main"
|
||||
BUILD_JOBS="16"
|
||||
|
||||
# Help function
|
||||
usage() {
|
||||
@@ -22,6 +23,7 @@ usage() {
|
||||
echo " --rebuild-vllm : Set cache bust for vllm"
|
||||
echo " --triton-ref <ref> : Triton commit SHA, branch or tag (default: 'v3.5.1')"
|
||||
echo " --vllm-ref <ref> : vLLM commit SHA, branch or tag (default: 'main')"
|
||||
echo " -j, --build-jobs <jobs> : Number of concurrent build jobs (default: \${BUILD_JOBS})"
|
||||
echo " -h, --copy-to-host <host> : Host address to copy the image to (if not set, don't copy)"
|
||||
echo " -u, --user <user> : Username for ssh command (default: \$USER)"
|
||||
echo " --no-build : Skip building, only copy image (requires --copy-to-host)"
|
||||
@@ -37,6 +39,7 @@ while [[ "$#" -gt 0 ]]; do
|
||||
--rebuild-vllm) REBUILD_VLLM=true ;;
|
||||
--triton-ref) TRITON_REF="$2"; shift ;;
|
||||
--vllm-ref) VLLM_REF="$2"; shift ;;
|
||||
-j|--build-jobs) BUILD_JOBS="$2"; shift ;;
|
||||
-h|--copy-to-host) COPY_HOST="$2"; shift ;;
|
||||
-u|--user) SSH_USER="$2"; shift ;;
|
||||
--no-build) NO_BUILD=true ;;
|
||||
@@ -74,6 +77,9 @@ if [ "$NO_BUILD" = false ]; then
|
||||
# Add VLLM_REF to build arguments
|
||||
CMD+=("--build-arg" "VLLM_REF=$VLLM_REF")
|
||||
|
||||
# Add BUILD_JOBS to build arguments
|
||||
CMD+=("--build-arg" "BUILD_JOBS=$BUILD_JOBS")
|
||||
|
||||
# Add build context
|
||||
CMD+=(".")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user