Merge branch 'flashinfer-0.6.0-pre'

This commit is contained in:
Eugene Rakhmatulin
2025-12-20 23:02:58 -08:00

View File

@@ -38,6 +38,10 @@ RUN apt update && apt upgrade -y \
# Configure Ccache for CUDA/C++ # Configure Ccache for CUDA/C++
ENV PATH=/usr/lib/ccache:$PATH ENV PATH=/usr/lib/ccache:$PATH
ENV CCACHE_DIR=/root/.ccache ENV CCACHE_DIR=/root/.ccache
# Limit ccache size to prevent unbounded growth (e.g. 50G)
ENV CCACHE_MAXSIZE=50G
# Enable compression to save space
ENV CCACHE_COMPRESS=1
# Tell CMake to use ccache for compilation # Tell CMake to use ccache for compilation
ENV CMAKE_CXX_COMPILER_LAUNCHER=ccache ENV CMAKE_CXX_COMPILER_LAUNCHER=ccache
ENV CMAKE_CUDA_COMPILER_LAUNCHER=ccache ENV CMAKE_CUDA_COMPILER_LAUNCHER=ccache
@@ -69,9 +73,9 @@ RUN --mount=type=cache,id=pip-cache,target=/root/.cache/pip \
# Install FlashInfer packages # Install FlashInfer packages
RUN --mount=type=cache,id=pip-cache,target=/root/.cache/pip \ RUN --mount=type=cache,id=pip-cache,target=/root/.cache/pip \
pip install flashinfer-python --no-deps --index-url https://flashinfer.ai/whl && \ pip install flashinfer-python --no-deps --index-url https://flashinfer.ai/whl --pre && \
pip install flashinfer-cubin --index-url https://flashinfer.ai/whl && \ pip install flashinfer-cubin --index-url https://flashinfer.ai/whl --pre && \
pip install flashinfer-jit-cache --index-url https://flashinfer.ai/whl/cu130 && \ pip install flashinfer-jit-cache --index-url https://flashinfer.ai/whl/cu130 --pre && \
pip install apache-tvm-ffi nvidia-cudnn-frontend nvidia-cutlass-dsl nvidia-ml-py tabulate pip install apache-tvm-ffi nvidia-cudnn-frontend nvidia-cutlass-dsl nvidia-ml-py tabulate
# ========================================================= # =========================================================
@@ -132,7 +136,9 @@ RUN --mount=type=cache,id=repo-cache,target=/repo-cache \
if [ "${VLLM_REF}" = "main" ]; then \ if [ "${VLLM_REF}" = "main" ]; then \
git reset --hard origin/main; \ git reset --hard origin/main; \
fi && \ fi && \
git submodule update --init --recursive; \ git submodule update --init --recursive && \
# Optimize git repo size
git gc --auto; \
fi && \ fi && \
# 3. Copy the updated code from the cache to the actual container workspace # 3. Copy the updated code from the cache to the actual container workspace
# We use 'cp -a' to preserve permissions # We use 'cp -a' to preserve permissions