Switch to uv in the main Dockerfile

This commit is contained in:
Eugene Rakhmatulin
2025-12-21 13:28:40 -08:00
parent bbd3469549
commit 11db634aad
3 changed files with 44 additions and 35 deletions

View File

@@ -21,6 +21,13 @@ ENV DEBIAN_FRONTEND=noninteractive
# Allow pip to install globally on Ubuntu 24.04 without a venv # Allow pip to install globally on Ubuntu 24.04 without a venv
ENV PIP_BREAK_SYSTEM_PACKAGES=1 ENV PIP_BREAK_SYSTEM_PACKAGES=1
# Set pip cache directory
ENV PIP_CACHE_DIR=/root/.cache/pip
ENV UV_CACHE_DIR=/root/.cache/uv
ENV UV_SYSTEM_PYTHON=1
ENV UV_BREAK_SYSTEM_PACKAGES=1
ENV UV_LINK_MODE=copy
# Set the base directory environment variable # Set the base directory environment variable
ENV VLLM_BASE_DIR=/workspace/vllm ENV VLLM_BASE_DIR=/workspace/vllm
@@ -33,7 +40,8 @@ RUN apt update && apt upgrade -y \
python3-dev python3-pip git wget \ python3-dev python3-pip git wget \
libnccl-dev libnccl2 libibverbs1 libibverbs-dev rdma-core \ libnccl-dev libnccl2 libibverbs1 libibverbs-dev rdma-core \
ccache \ ccache \
&& rm -rf /var/lib/apt/lists/* && rm -rf /var/lib/apt/lists/* \
&& pip install uv
# Configure Ccache for CUDA/C++ # Configure Ccache for CUDA/C++
ENV PATH=/usr/lib/ccache:$PATH ENV PATH=/usr/lib/ccache:$PATH
@@ -61,25 +69,21 @@ ARG CACHEBUST_DEPS=1
# Using --mount=type=cache ensures that even if this layer invalidates, # Using --mount=type=cache ensures that even if this layer invalidates,
# pip reuses previously downloaded wheels. # pip reuses previously downloaded wheels.
# Set pip cache directory RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \
ENV PIP_CACHE_DIR=/root/.cache/pip uv pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu130
RUN --mount=type=cache,id=pip-cache,target=/root/.cache/pip \
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu130
# Install additional dependencies # Install additional dependencies
RUN --mount=type=cache,id=pip-cache,target=/root/.cache/pip \ RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \
pip install xgrammar fastsafetensors uv pip install xgrammar fastsafetensors
ARG FLASHINFER_PRE="" ARG FLASHINFER_PRE=""
# Install FlashInfer packages # Install FlashInfer packages
RUN --mount=type=cache,id=pip-cache,target=/root/.cache/pip \ RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \
pip install ${FLASHINFER_PRE} flashinfer-python --no-deps --index-url https://flashinfer.ai/whl && \ uv pip install ${FLASHINFER_PRE} flashinfer-python --no-deps --index-url https://flashinfer.ai/whl && \
pip install ${FLASHINFER_PRE} flashinfer-cubin --index-url https://flashinfer.ai/whl && \ uv pip install ${FLASHINFER_PRE} flashinfer-cubin --index-url https://flashinfer.ai/whl && \
pip install ${FLASHINFER_PRE} flashinfer-jit-cache --index-url https://flashinfer.ai/whl/cu130 && \ uv pip install ${FLASHINFER_PRE} flashinfer-jit-cache --index-url https://flashinfer.ai/whl/cu130 && \
pip install apache-tvm-ffi nvidia-cudnn-frontend nvidia-cutlass-dsl nvidia-ml-py tabulate uv pip install apache-tvm-ffi nvidia-cudnn-frontend nvidia-cutlass-dsl nvidia-ml-py tabulate
# ========================================================= # =========================================================
# STAGE 2: Triton Builder (Compiles Triton independently) # STAGE 2: Triton Builder (Compiles Triton independently)
# ========================================================= # =========================================================
@@ -98,15 +102,16 @@ WORKDIR $VLLM_BASE_DIR/triton
# This only runs if TRITON_REF differs from the last build # This only runs if TRITON_REF differs from the last build
RUN --mount=type=cache,id=ccache,target=/root/.ccache \ RUN --mount=type=cache,id=ccache,target=/root/.ccache \
--mount=type=cache,id=pip-cache,target=/root/.cache/pip \ --mount=type=cache,id=uv-cache,target=/root/.cache/uv \
git fetch origin && \ git fetch origin && \
git checkout ${TRITON_REF} && \ git checkout ${TRITON_REF} && \
git submodule sync && \ git submodule sync && \
git submodule update --init --recursive && \ git submodule update --init --recursive && \
pip install -r python/requirements.txt && \ uv pip install -r python/requirements.txt && \
mkdir -p /workspace/wheels && \ mkdir -p /workspace/wheels && \
pip wheel --no-build-isolation . --wheel-dir=/workspace/wheels -v && \ rm -rf .git && \
pip wheel --no-build-isolation python/triton_kernels --no-deps --wheel-dir=/workspace/wheels uv build --no-build-isolation --out-dir=/workspace/wheels -v . && \
uv build --no-build-isolation --no-index --out-dir=/workspace/wheels python/triton_kernels
# ========================================================= # =========================================================
# STAGE 3: vLLM Builder (Builds vLLM from Source) # STAGE 3: vLLM Builder (Builds vLLM from Source)
@@ -149,12 +154,12 @@ RUN --mount=type=cache,id=repo-cache,target=/repo-cache \
WORKDIR $VLLM_BASE_DIR/vllm WORKDIR $VLLM_BASE_DIR/vllm
# Prepare build requirements # Prepare build requirements
RUN --mount=type=cache,id=pip-cache,target=/root/.cache/pip \ RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \
python3 use_existing_torch.py && \ python3 use_existing_torch.py && \
sed -i "/flashinfer/d" requirements/cuda.txt && \ sed -i "/flashinfer/d" requirements/cuda.txt && \
sed -i '/^triton\b/d' requirements/test.txt && \ sed -i '/^triton\b/d' requirements/test.txt && \
sed -i '/^fastsafetensors\b/d' requirements/test.txt && \ sed -i '/^fastsafetensors\b/d' requirements/test.txt && \
pip install -r requirements/build.txt uv pip install -r requirements/build.txt
# Apply Patches # Apply Patches
# TEMPORARY PATCH for fastsafetensors loading in cluster setup - tracking https://github.com/foundation-model-stack/fastsafetensors/issues/36 # TEMPORARY PATCH for fastsafetensors loading in cluster setup - tracking https://github.com/foundation-model-stack/fastsafetensors/issues/36
@@ -165,13 +170,13 @@ RUN patch -p1 < fastsafetensors.patch
# We mount the ccache directory here. Ideally, map this to a host volume for persistence # We mount the ccache directory here. Ideally, map this to a host volume for persistence
# across totally separate `docker build` invocations. # across totally separate `docker build` invocations.
RUN --mount=type=cache,id=ccache,target=/root/.ccache \ RUN --mount=type=cache,id=ccache,target=/root/.ccache \
--mount=type=cache,id=pip-cache,target=/root/.cache/pip \ --mount=type=cache,id=uv-cache,target=/root/.cache/uv \
pip install --no-build-isolation . -v uv pip install --no-build-isolation . -v
# Install custom Triton from triton-builder # Install custom Triton from triton-builder
COPY --from=triton-builder /workspace/wheels /workspace/wheels COPY --from=triton-builder /workspace/wheels /workspace/wheels
RUN --mount=type=cache,id=pip-cache,target=/root/.cache/pip \ RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \
pip install /workspace/wheels/*.whl uv pip install /workspace/wheels/*.whl
# ========================================================= # =========================================================
# STAGE 4: Runner (Transfers only necessary artifacts) # STAGE 4: Runner (Transfers only necessary artifacts)
@@ -184,6 +189,10 @@ ENV VLLM_BASE_DIR=/workspace/vllm
# Set pip cache directory # Set pip cache directory
ENV PIP_CACHE_DIR=/root/.cache/pip ENV PIP_CACHE_DIR=/root/.cache/pip
ENV UV_CACHE_DIR=/root/.cache/uv
ENV UV_SYSTEM_PYTHON=1
ENV UV_BREAK_SYSTEM_PACKAGES=1
ENV UV_LINK_MODE=copy
# Install minimal runtime dependencies (NCCL, Python) # Install minimal runtime dependencies (NCCL, Python)
# Note: "devel" tools like cmake/gcc are NOT installed here to save space # Note: "devel" tools like cmake/gcc are NOT installed here to save space
@@ -219,6 +228,5 @@ COPY run-cluster-node.sh $VLLM_BASE_DIR/
RUN chmod +x $VLLM_BASE_DIR/run-cluster-node.sh RUN chmod +x $VLLM_BASE_DIR/run-cluster-node.sh
# Final extra deps # Final extra deps
RUN --mount=type=cache,id=pip-cache,target=/root/.cache/pip \ RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \
pip install ray[default] uv pip install ray[default]

View File

@@ -19,6 +19,7 @@ ENV PIP_CACHE_DIR=/root/.cache/pip
ENV UV_CACHE_DIR=/root/.cache/uv ENV UV_CACHE_DIR=/root/.cache/uv
ENV UV_SYSTEM_PYTHON=1 ENV UV_SYSTEM_PYTHON=1
ENV UV_LINK_MODE=copy ENV UV_LINK_MODE=copy
ENV UV_BREAK_SYSTEM_PACKAGES=1
# Install minimal runtime dependencies (NCCL, Python) # Install minimal runtime dependencies (NCCL, Python)
# Note: "devel" tools like cmake/gcc are NOT installed here to save space # Note: "devel" tools like cmake/gcc are NOT installed here to save space
@@ -43,7 +44,7 @@ COPY fastsafetensors.patch .
# Install fastsafetensors # Install fastsafetensors
RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \ RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \
uv pip install --system --break-system-packages -U fastsafetensors uv pip install -U fastsafetensors
# --- VLLM SOURCE CACHE BUSTER --- # --- VLLM SOURCE CACHE BUSTER ---
# Change THIS argument to force a fresh git clone and rebuild of vLLM # Change THIS argument to force a fresh git clone and rebuild of vLLM
@@ -57,9 +58,9 @@ ARG WHEELS_FROM_GITHUB_RELEASE=0
RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \ RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \
if [ "$WHEELS_FROM_GITHUB_RELEASE" = "1" ]; then \ if [ "$WHEELS_FROM_GITHUB_RELEASE" = "1" ]; then \
export VLLM_VERSION=$(curl -s https://api.github.com/repos/vllm-project/vllm/releases/latest | jq -r .tag_name | sed 's/^v//') && \ export VLLM_VERSION=$(curl -s https://api.github.com/repos/vllm-project/vllm/releases/latest | jq -r .tag_name | sed 's/^v//') && \
uv pip install --system --break-system-packages -U https://github.com/vllm-project/vllm/releases/download/v${VLLM_VERSION}/vllm-${VLLM_VERSION}+cu130-cp38-abi3-manylinux_2_35_aarch64.whl --torch-backend=auto; \ uv pip install -U https://github.com/vllm-project/vllm/releases/download/v${VLLM_VERSION}/vllm-${VLLM_VERSION}+cu130-cp38-abi3-manylinux_2_35_aarch64.whl --torch-backend=auto; \
else \ else \
uv pip install --system --break-system-packages -U vllm \ uv pip install -U vllm \
--torch-backend=auto \ --torch-backend=auto \
--extra-index-url https://wheels.vllm.ai/nightly/cu130; \ --extra-index-url https://wheels.vllm.ai/nightly/cu130; \
fi fi
@@ -72,9 +73,9 @@ ARG FLASHINFER_PRE=""
# Install flashinfer helper packages # Install flashinfer helper packages
RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \ RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \
uv pip install --system --break-system-packages ${FLASHINFER_PRE} flashinfer-python -U --no-deps --index-url https://flashinfer.ai/whl && \ uv pip install ${FLASHINFER_PRE} flashinfer-python -U --no-deps --index-url https://flashinfer.ai/whl && \
uv pip install --system --break-system-packages ${FLASHINFER_PRE} flashinfer-cubin --index-url https://flashinfer.ai/whl && \ uv pip install ${FLASHINFER_PRE} flashinfer-cubin --index-url https://flashinfer.ai/whl && \
uv pip install --system --break-system-packages ${FLASHINFER_PRE} flashinfer-jit-cache --index-url https://flashinfer.ai/whl/cu130 uv pip install ${FLASHINFER_PRE} flashinfer-jit-cache --index-url https://flashinfer.ai/whl/cu130
# Setup Env for Runtime # Setup Env for Runtime
ENV TORCH_CUDA_ARCH_LIST=12.1a ENV TORCH_CUDA_ARCH_LIST=12.1a
@@ -87,5 +88,5 @@ RUN chmod +x $VLLM_BASE_DIR/run-cluster-node.sh
# Final extra deps # Final extra deps
RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \ RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \
uv pip install --system --break-system-packages ray[default] uv pip install ray[default]

View File

@@ -257,4 +257,4 @@ if [ "$COPY_TIME" -gt 0 ]; then
fi fi
echo "Total Time: $(printf '%02d:%02d:%02d' $((TOTAL_TIME/3600)) $((TOTAL_TIME%3600/60)) $((TOTAL_TIME%60)))" echo "Total Time: $(printf '%02d:%02d:%02d' $((TOTAL_TIME/3600)) $((TOTAL_TIME%3600/60)) $((TOTAL_TIME%60)))"
echo "=========================================" echo "========================================="
echo "Done." echo "Done building $IMAGE_TAG."