From ef0f996df62c50992079237ea63eab29e91cbdaa Mon Sep 17 00:00:00 2001 From: Eugene Rakhmatulin Date: Thu, 29 Jan 2026 23:14:43 -0800 Subject: [PATCH 1/5] Bumped base image version; reverted Triton to 3.5.1 --- Dockerfile | 6 +++--- Dockerfile.mxfp4 | 3 ++- Dockerfile.wheels | 2 +- build-and-copy.sh | 2 +- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/Dockerfile b/Dockerfile index cda048c..5dc3f1e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,7 +6,7 @@ ARG BUILD_JOBS=16 # ========================================================= # STAGE 1: Base Image (Installs Dependencies) # ========================================================= -FROM nvidia/cuda:13.1.0-devel-ubuntu24.04 AS base +FROM nvidia/cuda:13.1.1-devel-ubuntu24.04 AS base # Build parallemism ARG BUILD_JOBS @@ -102,7 +102,7 @@ RUN git clone https://github.com/triton-lang/triton.git # We expect TRITON_REF to be passed from the command line to break the cache # Set to v3.5.1 tag by default -ARG TRITON_REF=v3.6.0 +ARG TRITON_REF=v3.5.1 WORKDIR $VLLM_BASE_DIR/triton @@ -193,7 +193,7 @@ RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \ # ========================================================= # STAGE 4: Runner (Transfers only necessary artifacts) # ========================================================= -FROM nvidia/cuda:13.1.0-devel-ubuntu24.04 AS runner +FROM nvidia/cuda:13.1.1-devel-ubuntu24.04 AS runner ENV DEBIAN_FRONTEND=noninteractive ENV PIP_BREAK_SYSTEM_PACKAGES=1 diff --git a/Dockerfile.mxfp4 b/Dockerfile.mxfp4 index 11af230..b528f8d 100644 --- a/Dockerfile.mxfp4 +++ b/Dockerfile.mxfp4 @@ -258,7 +258,8 @@ RUN mkdir -p tiktoken_encodings && \ # No need to copy source code, as it's already in the site-packages COPY --from=builder /workspace/wheels /workspace/wheels RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \ - uv pip install /workspace/wheels/*.whl + uv pip install /workspace/wheels/*.whl && \ + rm -rf /workspace/wheels # Setup Env for Runtime ENV TORCH_CUDA_ARCH_LIST="12.0;12.1" diff --git a/Dockerfile.wheels b/Dockerfile.wheels index 77dd45f..e287f1c 100644 --- a/Dockerfile.wheels +++ b/Dockerfile.wheels @@ -1,6 +1,6 @@ # syntax=docker/dockerfile:1.6 -FROM nvidia/cuda:13.1.0-devel-ubuntu24.04 +FROM nvidia/cuda:13.1.1-devel-ubuntu24.04 ENV DEBIAN_FRONTEND=noninteractive ENV PIP_BREAK_SYSTEM_PACKAGES=1 diff --git a/build-and-copy.sh b/build-and-copy.sh index 2030909..0b45eb2 100755 --- a/build-and-copy.sh +++ b/build-and-copy.sh @@ -11,7 +11,7 @@ REBUILD_VLLM=false COPY_HOSTS=() SSH_USER="$USER" NO_BUILD=false -TRITON_REF="v3.6.0" +TRITON_REF="v3.5.1" VLLM_REF="main" TMP_IMAGE="" PARALLEL_COPY=false From 34bd3ae39c8c546df3d8eaa4b6f9426cf4af0595 Mon Sep 17 00:00:00 2001 From: Eugene Rakhmatulin Date: Fri, 30 Jan 2026 09:07:01 -0800 Subject: [PATCH 2/5] Fixed fetching vllm source code in MXFP4 version. --- Dockerfile.mxfp4 | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Dockerfile.mxfp4 b/Dockerfile.mxfp4 index b528f8d..cb75d8e 100644 --- a/Dockerfile.mxfp4 +++ b/Dockerfile.mxfp4 @@ -175,7 +175,8 @@ RUN --mount=type=cache,id=repo-cache,target=/repo-cache \ # 2. Logic: Clone if missing, otherwise Fetch & Reset if [ ! -d "vllm-mxfp4" ]; then \ echo "Cache miss: Cloning vLLM from scratch..." && \ - git clone --recursive ${VLLM_REPO} vllm-mxfp4; \ + git clone --recursive ${VLLM_REPO} vllm-mxfp4 && \ + git checkout ${VLLM_SHA} ; \ else \ echo "Cache hit: Fetching updates..." && \ cd vllm-mxfp4 && \ From 7d232a305a65ad2478be3f7bbe4ae428d6605f31 Mon Sep 17 00:00:00 2001 From: Eugene Rakhmatulin Date: Fri, 30 Jan 2026 10:43:12 -0800 Subject: [PATCH 3/5] Reverted to Torch 2.9.1 in the source build to address #24 --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 5dc3f1e..67d9748 100644 --- a/Dockerfile +++ b/Dockerfile @@ -70,7 +70,7 @@ ARG CACHEBUST_DEPS=1 # pip reuses previously downloaded wheels. RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \ - uv pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu130 + uv pip install torch==2.9.1 torchvision torchaudio --index-url https://download.pytorch.org/whl/cu130 # Install additional dependencies RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \ From 3a68e1ca464c2c36cc3c6028811826a24ec0674e Mon Sep 17 00:00:00 2001 From: Eugene Rakhmatulin Date: Fri, 30 Jan 2026 11:20:29 -0800 Subject: [PATCH 4/5] Fixed #25 --- Dockerfile.mxfp4 | 1 + 1 file changed, 1 insertion(+) diff --git a/Dockerfile.mxfp4 b/Dockerfile.mxfp4 index cb75d8e..cea96af 100644 --- a/Dockerfile.mxfp4 +++ b/Dockerfile.mxfp4 @@ -176,6 +176,7 @@ RUN --mount=type=cache,id=repo-cache,target=/repo-cache \ if [ ! -d "vllm-mxfp4" ]; then \ echo "Cache miss: Cloning vLLM from scratch..." && \ git clone --recursive ${VLLM_REPO} vllm-mxfp4 && \ + cd vllm-mxfp4 && \ git checkout ${VLLM_SHA} ; \ else \ echo "Cache hit: Fetching updates..." && \ From be19675980d512f3b7420a549df7e179f0fa9727 Mon Sep 17 00:00:00 2001 From: Eugene Rakhmatulin Date: Fri, 30 Jan 2026 11:24:51 -0800 Subject: [PATCH 5/5] Fixed initial vllm source fetch if not using main branch --- Dockerfile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Dockerfile b/Dockerfile index 67d9748..a8cb9f3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -141,6 +141,10 @@ RUN --mount=type=cache,id=repo-cache,target=/repo-cache \ if [ ! -d "vllm" ]; then \ echo "Cache miss: Cloning vLLM from scratch..." && \ git clone --recursive https://github.com/vllm-project/vllm.git; \ + if [ "$VLLM_REF" != "main" ]; then \ + cd vllm && \ + git checkout ${VLLM_REF}; \ + fi; \ else \ echo "Cache hit: Fetching updates..." && \ cd vllm && \