Refactor Triton and vLLM reference handling in Dockerfile and build script

This commit is contained in:
Eugene Rakhmatulin
2025-12-14 23:28:08 -08:00
parent 4551795908
commit 0606b1b984
2 changed files with 23 additions and 18 deletions

View File

@@ -74,17 +74,17 @@ WORKDIR $VLLM_BASE_DIR
# Initial Triton repo clone (cached forever) # Initial Triton repo clone (cached forever)
RUN git clone https://github.com/triton-lang/triton.git RUN git clone https://github.com/triton-lang/triton.git
# We expect TRITON_SHA to be passed from the command line to break the cache # We expect TRITON_REF to be passed from the command line to break the cache
# Set to v3.5.1 commit by default # Set to v3.5.1 tag by default
ARG TRITON_SHA=0add68262ab0a2e33b84524346cb27cbb2787356 ARG TRITON_REF=v3.5.1
WORKDIR $VLLM_BASE_DIR/triton WORKDIR $VLLM_BASE_DIR/triton
# This only runs if TRITON_SHA differs from the last build # This only runs if TRITON_REF differs from the last build
RUN --mount=type=cache,id=ccache,target=/root/.ccache \ RUN --mount=type=cache,id=ccache,target=/root/.ccache \
--mount=type=cache,id=pip-cache,target=/root/.cache/pip \ --mount=type=cache,id=pip-cache,target=/root/.cache/pip \
git fetch origin && \ git fetch origin && \
git checkout ${TRITON_SHA} && \ git checkout ${TRITON_REF} && \
git submodule sync && \ git submodule sync && \
git submodule update --init --recursive && \ git submodule update --init --recursive && \
pip install -r python/requirements.txt && \ pip install -r python/requirements.txt && \
@@ -102,6 +102,9 @@ FROM base AS builder
# without re-installing the dependencies above. # without re-installing the dependencies above.
ARG CACHEBUST_VLLM=1 ARG CACHEBUST_VLLM=1
# Git reference (branch, tag, or SHA) to checkout
ARG VLLM_REF=main
# 4. Smart Git Clone (Fetch changes instead of full re-clone) # 4. Smart Git Clone (Fetch changes instead of full re-clone)
# We mount a cache at /repo-cache. This directory persists on your host machine. # We mount a cache at /repo-cache. This directory persists on your host machine.
RUN --mount=type=cache,id=repo-cache,target=/repo-cache \ RUN --mount=type=cache,id=repo-cache,target=/repo-cache \
@@ -115,7 +118,10 @@ RUN --mount=type=cache,id=repo-cache,target=/repo-cache \
echo "Cache hit: Fetching updates..." && \ echo "Cache hit: Fetching updates..." && \
cd vllm && \ cd vllm && \
git fetch --all && \ git fetch --all && \
git reset --hard origin/main && \ git checkout ${VLLM_REF} && \
if [ "${VLLM_REF}" = "main" ]; then \
git reset --hard origin/main; \
fi && \
git submodule update --init --recursive; \ git submodule update --init --recursive; \
fi && \ fi && \
# 3. Copy the updated code from the cache to the actual container workspace # 3. Copy the updated code from the cache to the actual container workspace

View File

@@ -11,7 +11,8 @@ REBUILD_VLLM=false
COPY_HOST="" COPY_HOST=""
SSH_USER="$USER" SSH_USER="$USER"
NO_BUILD=false NO_BUILD=false
TRITON_SHA="" TRITON_REF="v3.5.1"
VLLM_REF="main"
# Help function # Help function
usage() { usage() {
@@ -19,7 +20,8 @@ usage() {
echo " -t, --tag <tag> : Image tag (default: 'vllm-node')" echo " -t, --tag <tag> : Image tag (default: 'vllm-node')"
echo " --rebuild-deps : Set cache bust for dependencies" echo " --rebuild-deps : Set cache bust for dependencies"
echo " --rebuild-vllm : Set cache bust for vllm" echo " --rebuild-vllm : Set cache bust for vllm"
echo " --triton-sha <sha> : Triton commit SHA (default: auto-detect latest main)" echo " --triton-ref <ref> : Triton commit SHA, branch or tag (default: 'v3.5.1')"
echo " --vllm-ref <ref> : vLLM commit SHA, branch or tag (default: 'main')"
echo " -h, --copy-to-host <host> : Host address to copy the image to (if not set, don't copy)" echo " -h, --copy-to-host <host> : Host address to copy the image to (if not set, don't copy)"
echo " -u, --user <user> : Username for ssh command (default: \$USER)" echo " -u, --user <user> : Username for ssh command (default: \$USER)"
echo " --no-build : Skip building, only copy image (requires --copy-to-host)" echo " --no-build : Skip building, only copy image (requires --copy-to-host)"
@@ -33,7 +35,8 @@ while [[ "$#" -gt 0 ]]; do
-t|--tag) IMAGE_TAG="$2"; shift ;; -t|--tag) IMAGE_TAG="$2"; shift ;;
--rebuild-deps) REBUILD_DEPS=true ;; --rebuild-deps) REBUILD_DEPS=true ;;
--rebuild-vllm) REBUILD_VLLM=true ;; --rebuild-vllm) REBUILD_VLLM=true ;;
--triton-sha) TRITON_SHA="$2"; shift ;; --triton-ref) TRITON_REF="$2"; shift ;;
--vllm-ref) VLLM_REF="$2"; shift ;;
-h|--copy-to-host) COPY_HOST="$2"; shift ;; -h|--copy-to-host) COPY_HOST="$2"; shift ;;
-u|--user) SSH_USER="$2"; shift ;; -u|--user) SSH_USER="$2"; shift ;;
--no-build) NO_BUILD=true ;; --no-build) NO_BUILD=true ;;
@@ -52,13 +55,6 @@ fi
# Build image (unless --no-build is set) # Build image (unless --no-build is set)
BUILD_TIME=0 BUILD_TIME=0
if [ "$NO_BUILD" = false ]; then if [ "$NO_BUILD" = false ]; then
# Auto-detect TRITON_SHA if not provided
if [ -z "$TRITON_SHA" ]; then
echo "Auto-detecting Triton commit for v3.5.1..."
TRITON_SHA=$(git ls-remote https://github.com/triton-lang/triton.git refs/tags/v3.5.1 | cut -f1)
echo "Detected TRITON_SHA: $TRITON_SHA"
fi
# Construct build command # Construct build command
CMD=("docker" "build" "-t" "$IMAGE_TAG") CMD=("docker" "build" "-t" "$IMAGE_TAG")
@@ -72,8 +68,11 @@ if [ "$NO_BUILD" = false ]; then
CMD+=("--build-arg" "CACHEBUST_VLLM=$(date +%s)") CMD+=("--build-arg" "CACHEBUST_VLLM=$(date +%s)")
fi fi
# Add TRITON_SHA to build arguments # Add TRITON_REF to build arguments
CMD+=("--build-arg" "TRITON_SHA=$TRITON_SHA") CMD+=("--build-arg" "TRITON_REF=$TRITON_REF")
# Add VLLM_REF to build arguments
CMD+=("--build-arg" "VLLM_REF=$VLLM_REF")
# Add build context # Add build context
CMD+=(".") CMD+=(".")