From 8385506c5e1fe9003108d770bd20a84de58bfcef Mon Sep 17 00:00:00 2001 From: Eugene Rakhmatulin Date: Fri, 20 Mar 2026 23:51:21 -0700 Subject: [PATCH] Fixes --- Dockerfile | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/Dockerfile b/Dockerfile index f664c95..2a56775 100644 --- a/Dockerfile +++ b/Dockerfile @@ -43,6 +43,11 @@ RUN apt update && \ && rm -rf /var/lib/apt/lists/* \ && pip install uv +# Additional deps +RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \ + uv pip install torch torchvision torchaudio triton --index-url https://download.pytorch.org/whl/nightly/cu130 && \ + uv pip install nvidia-nvshmem-cu13 "apache-tvm-ffi<0.2" filelock pynvml requests tqdm + # Configure Ccache for CUDA/C++ ENV PATH=/usr/lib/ccache:$PATH ENV CCACHE_DIR=/root/.ccache @@ -76,9 +81,6 @@ ARG FLASHINFER_REF=main # Change this argument to force a re-download of FlashInfer ARG CACHEBUST_FLASHINFER=1 -RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \ - uv pip install nvidia-nvshmem-cu13 "apache-tvm-ffi<0.2" - # Smart Git Clone (Fetch changes instead of full re-clone) RUN --mount=type=cache,id=repo-cache,target=/repo-cache \ cd /repo-cache && \ @@ -135,10 +137,6 @@ ARG TORCH_CUDA_ARCH_LIST="12.1a" ENV TORCH_CUDA_ARCH_LIST=${TORCH_CUDA_ARCH_LIST} WORKDIR $VLLM_BASE_DIR -RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \ - uv pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu130 && \ - uv pip install nvidia-nvshmem-cu13 "apache-tvm-ffi<0.2" triton - # --- VLLM SOURCE CACHE BUSTER --- ARG CACHEBUST_VLLM=1 @@ -256,10 +254,10 @@ RUN mkdir -p tiktoken_encodings && \ ARG PRE_TRANSFORMERS=0 -# Install dependencies +# Install deps RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \ - uv pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu130 && \ - uv pip install nvidia-nvshmem-cu13 "apache-tvm-ffi<0.2" triton + uv pip install torch torchvision torchaudio triton --index-url https://download.pytorch.org/whl/nightly/cu130 && \ + uv pip install nvidia-nvshmem-cu13 "apache-tvm-ffi<0.2" # Install wheels from host ./wheels/ (bind-mounted from build context — no layer bloat) # With --tf5: override vLLM's transformers<5 constraint to get transformers>=5