Fixes
This commit is contained in:
18
Dockerfile
18
Dockerfile
@@ -43,6 +43,11 @@ RUN apt update && \
|
|||||||
&& rm -rf /var/lib/apt/lists/* \
|
&& rm -rf /var/lib/apt/lists/* \
|
||||||
&& pip install uv
|
&& pip install uv
|
||||||
|
|
||||||
|
# Additional deps
|
||||||
|
RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \
|
||||||
|
uv pip install torch torchvision torchaudio triton --index-url https://download.pytorch.org/whl/nightly/cu130 && \
|
||||||
|
uv pip install nvidia-nvshmem-cu13 "apache-tvm-ffi<0.2" filelock pynvml requests tqdm
|
||||||
|
|
||||||
# Configure Ccache for CUDA/C++
|
# Configure Ccache for CUDA/C++
|
||||||
ENV PATH=/usr/lib/ccache:$PATH
|
ENV PATH=/usr/lib/ccache:$PATH
|
||||||
ENV CCACHE_DIR=/root/.ccache
|
ENV CCACHE_DIR=/root/.ccache
|
||||||
@@ -76,9 +81,6 @@ ARG FLASHINFER_REF=main
|
|||||||
# Change this argument to force a re-download of FlashInfer
|
# Change this argument to force a re-download of FlashInfer
|
||||||
ARG CACHEBUST_FLASHINFER=1
|
ARG CACHEBUST_FLASHINFER=1
|
||||||
|
|
||||||
RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \
|
|
||||||
uv pip install nvidia-nvshmem-cu13 "apache-tvm-ffi<0.2"
|
|
||||||
|
|
||||||
# Smart Git Clone (Fetch changes instead of full re-clone)
|
# Smart Git Clone (Fetch changes instead of full re-clone)
|
||||||
RUN --mount=type=cache,id=repo-cache,target=/repo-cache \
|
RUN --mount=type=cache,id=repo-cache,target=/repo-cache \
|
||||||
cd /repo-cache && \
|
cd /repo-cache && \
|
||||||
@@ -135,10 +137,6 @@ ARG TORCH_CUDA_ARCH_LIST="12.1a"
|
|||||||
ENV TORCH_CUDA_ARCH_LIST=${TORCH_CUDA_ARCH_LIST}
|
ENV TORCH_CUDA_ARCH_LIST=${TORCH_CUDA_ARCH_LIST}
|
||||||
WORKDIR $VLLM_BASE_DIR
|
WORKDIR $VLLM_BASE_DIR
|
||||||
|
|
||||||
RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \
|
|
||||||
uv pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu130 && \
|
|
||||||
uv pip install nvidia-nvshmem-cu13 "apache-tvm-ffi<0.2" triton
|
|
||||||
|
|
||||||
# --- VLLM SOURCE CACHE BUSTER ---
|
# --- VLLM SOURCE CACHE BUSTER ---
|
||||||
ARG CACHEBUST_VLLM=1
|
ARG CACHEBUST_VLLM=1
|
||||||
|
|
||||||
@@ -256,10 +254,10 @@ RUN mkdir -p tiktoken_encodings && \
|
|||||||
|
|
||||||
ARG PRE_TRANSFORMERS=0
|
ARG PRE_TRANSFORMERS=0
|
||||||
|
|
||||||
# Install dependencies
|
# Install deps
|
||||||
RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \
|
RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \
|
||||||
uv pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu130 && \
|
uv pip install torch torchvision torchaudio triton --index-url https://download.pytorch.org/whl/nightly/cu130 && \
|
||||||
uv pip install nvidia-nvshmem-cu13 "apache-tvm-ffi<0.2" triton
|
uv pip install nvidia-nvshmem-cu13 "apache-tvm-ffi<0.2"
|
||||||
|
|
||||||
# Install wheels from host ./wheels/ (bind-mounted from build context — no layer bloat)
|
# Install wheels from host ./wheels/ (bind-mounted from build context — no layer bloat)
|
||||||
# With --tf5: override vLLM's transformers<5 constraint to get transformers>=5
|
# With --tf5: override vLLM's transformers<5 constraint to get transformers>=5
|
||||||
|
|||||||
Reference in New Issue
Block a user