diff --git a/Dockerfile.mxfp4 b/Dockerfile.mxfp4 index 2abc16a..ecdd08e 100644 --- a/Dockerfile.mxfp4 +++ b/Dockerfile.mxfp4 @@ -6,7 +6,7 @@ ARG BUILD_JOBS=16 # ========================================================= # STAGE 1: Base Image (Installs Dependencies) # ========================================================= -FROM nvidia/cuda:13.1.0-devel-ubuntu24.04 AS base +FROM nvcr.io/nvidia/pytorch:25.12-py3 AS base # Build parallemism ARG BUILD_JOBS @@ -36,8 +36,8 @@ ENV PIP_BREAK_SYSTEM_PACKAGES=1 # Set pip cache directory ENV PIP_CACHE_DIR=/root/.cache/pip ENV UV_CACHE_DIR=/root/.cache/uv -#ENV UV_SYSTEM_PYTHON=1 -#ENV UV_BREAK_SYSTEM_PACKAGES=1 +ENV UV_SYSTEM_PYTHON=1 +ENV UV_BREAK_SYSTEM_PACKAGES=1 ENV UV_LINK_MODE=copy # Set the base directory environment variable @@ -45,15 +45,12 @@ ENV VLLM_BASE_DIR=/workspace/vllm # 1. Install Build Dependencies & Ccache # Added ccache to enable incremental compilation caching -RUN apt update && apt upgrade -y \ - && apt install -y --allow-change-held-packages --no-install-recommends \ - curl vim cmake build-essential ninja-build \ - libcudnn9-cuda-13 libcudnn9-dev-cuda-13 \ - python3-dev python3-pip git wget \ - libnccl-dev libnccl2 libibverbs1 libibverbs-dev rdma-core \ +RUN apt update && \ + apt install -y --no-install-recommends \ + curl vim ninja-build git \ ccache \ && rm -rf /var/lib/apt/lists/* \ - && pip install uv + && pip install uv && pip uninstall -y flash-attn # Configure Ccache for CUDA/C++ ENV PATH=/usr/lib/ccache:$PATH @@ -77,18 +74,6 @@ ENV TRITON_PTXAS_PATH=/usr/local/cuda/bin/ptxas # Change this argument to force a re-download of PyTorch/FlashInfer ARG CACHEBUST_DEPS=1 -# Initialize virtual environment -ENV VIRTUAL_ENV=/workspace/python-venv -RUN uv venv /workspace/python-venv -ENV PATH="$VIRTUAL_ENV/bin:$PATH" - -# 3. Install Python Dependencies with Cache Mounts -# Using --mount=type=cache ensures that even if this layer invalidates, -# pip reuses previously downloaded wheels. - -RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \ - uv pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu130 - # Install additional dependencies RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \ uv pip install fastsafetensors @@ -238,7 +223,7 @@ RUN --mount=type=cache,id=ccache,target=/root/.ccache \ # ========================================================= # STAGE 4: Runner (Transfers only necessary artifacts) # ========================================================= -FROM nvidia/cuda:13.1.0-devel-ubuntu24.04 AS runner +FROM nvcr.io/nvidia/pytorch:25.12-py3 AS runner ENV DEBIAN_FRONTEND=noninteractive ENV PIP_BREAK_SYSTEM_PACKAGES=1 @@ -247,20 +232,18 @@ ENV VLLM_BASE_DIR=/workspace/vllm # Set pip cache directory ENV PIP_CACHE_DIR=/root/.cache/pip ENV UV_CACHE_DIR=/root/.cache/uv -#ENV UV_SYSTEM_PYTHON=1 -#ENV UV_BREAK_SYSTEM_PACKAGES=1 +ENV UV_SYSTEM_PYTHON=1 +ENV UV_BREAK_SYSTEM_PACKAGES=1 ENV UV_LINK_MODE=copy # Install minimal runtime dependencies (NCCL, Python) # Note: "devel" tools like cmake/gcc are NOT installed here to save space -RUN apt update && apt upgrade -y \ - && apt install -y --allow-change-held-packages --no-install-recommends \ - python3 python3-pip python3-dev vim curl git wget \ - libcudnn9-cuda-13 \ - libnccl-dev libnccl2 libibverbs1 libibverbs-dev rdma-core \ +RUN apt update && \ + apt install -y --no-install-recommends \ + curl vim git \ libxcb1 \ && rm -rf /var/lib/apt/lists/* \ - && pip install uv + && pip install uv && pip uninstall -y flash-attn # Set final working directory WORKDIR $VLLM_BASE_DIR @@ -273,11 +256,8 @@ RUN mkdir -p tiktoken_encodings && \ # Copy artifacts from Builder Stage # We copy the python packages and executables # No need to copy source code, as it's already in the site-packages -COPY --from=builder /workspace/python-venv /workspace/python-venv - -# Activate virtual environment -ENV VIRTUAL_ENV=/workspace/python-venv -ENV PATH="$VIRTUAL_ENV/bin:$PATH" +COPY --from=builder /usr/local/lib/python3.12/dist-packages /usr/local/lib/python3.12/dist-packages +COPY --from=builder /usr/local/bin /usr/local/bin # Setup Env for Runtime ENV TORCH_CUDA_ARCH_LIST="12.0;12.1" @@ -293,10 +273,3 @@ RUN chmod +x $VLLM_BASE_DIR/run-cluster-node.sh # Final extra deps RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \ uv pip install ray[default] - -# Create entrypoint script to activate venv -# RUN echo '#!/bin/bash\nsource /workspace/python-venv/bin/activate\nexec "$@"' > /entrypoint.sh && \ -# chmod +x /entrypoint.sh -# ENTRYPOINT ["/entrypoint.sh"] - -# CMD ["bash"] \ No newline at end of file