Optimized triton caching

This commit is contained in:
eugr
2025-12-14 09:26:10 -08:00
parent 02f842e1fd
commit 25f759fec8

View File

@@ -3,7 +3,7 @@
# =========================================================
# STAGE 1: Builder (Builds vLLM from Source)
# =========================================================
FROM nvidia/cuda:13.0.2-devel-ubuntu24.04 AS builder
FROM nvidia/cuda:13.0.2-devel-ubuntu24.04 AS base
# Set non-interactive frontend to prevent apt prompts
ENV DEBIAN_FRONTEND=noninteractive
@@ -38,6 +38,9 @@ WORKDIR $VLLM_BASE_DIR
ENV TORCH_CUDA_ARCH_LIST=12.1a
ENV TRITON_PTXAS_PATH=/usr/local/cuda/bin/ptxas
# Initial Triton repo clone (cached forever) - before all cache busters
RUN git clone https://github.com/triton-lang/triton.git
# --- CACHE BUSTER ---
# Change this argument to force a re-download of PyTorch/FlashInfer
ARG CACHEBUST_DEPS=1
@@ -112,16 +115,13 @@ RUN --mount=type=cache,id=ccache,target=/root/.ccache \
# Install latest Triton from main - override version pulled from dependencies
# Initial clone (Cached forever)
RUN git clone https://github.com/triton-lang/triton.git
# We expect TRITON_SHA to be passed from the command line to break the cache
# Set to v3.5.1 commit by default
ARG TRITON_SHA=0add68262ab0a2e33b84524346cb27cbb2787356
# This only runs if TRITON_SHA differs from the last build
RUN --mount=type=cache,target=/root/.cache/pip \
--mount=type=cache,target=/root/.cache/ccache \
RUN --mount=type=cache,id=ccache,target=/root/.ccache \
--mount=type=cache,id=pip-cache,target=/root/.cache/pip \
cd triton && \
git fetch origin && \
git checkout ${TRITON_SHA} && \