Merge branch 'flashinfer-0.6.0-pre'

2025-12-20 23:02:58 -08:00
parent 9f35dbdd2d a83200573a
commit 1b3968fe98
1 changed files with 10 additions and 4 deletions
--- a/14
+++ b/14
@@ -38,6 +38,10 @@ RUN apt update && apt upgrade -y \
 # Configure Ccache for CUDA/C++
 ENV PATH=/usr/lib/ccache:$PATH
 ENV CCACHE_DIR=/root/.ccache
+# Limit ccache size to prevent unbounded growth (e.g. 50G)
+ENV CCACHE_MAXSIZE=50G
+# Enable compression to save space
+ENV CCACHE_COMPRESS=1
 # Tell CMake to use ccache for compilation
 ENV CMAKE_CXX_COMPILER_LAUNCHER=ccache
 ENV CMAKE_CUDA_COMPILER_LAUNCHER=ccache
@@ -69,9 +73,9 @@ RUN --mount=type=cache,id=pip-cache,target=/root/.cache/pip \

 # Install FlashInfer packages
 RUN --mount=type=cache,id=pip-cache,target=/root/.cache/pip \
-    pip install flashinfer-python --no-deps --index-url https://flashinfer.ai/whl && \
-    pip install flashinfer-cubin --index-url https://flashinfer.ai/whl && \
-    pip install flashinfer-jit-cache --index-url https://flashinfer.ai/whl/cu130 && \
+    pip install flashinfer-python --no-deps --index-url https://flashinfer.ai/whl --pre && \
+    pip install flashinfer-cubin --index-url https://flashinfer.ai/whl --pre && \
+    pip install flashinfer-jit-cache --index-url https://flashinfer.ai/whl/cu130 --pre && \
    pip install apache-tvm-ffi nvidia-cudnn-frontend nvidia-cutlass-dsl nvidia-ml-py tabulate

 # =========================================================
@@ -132,7 +136,9 @@ RUN --mount=type=cache,id=repo-cache,target=/repo-cache \
        if [ "${VLLM_REF}" = "main" ]; then \
            git reset --hard origin/main; \
        fi && \
-        git submodule update --init --recursive; \
+        git submodule update --init --recursive && \
+        # Optimize git repo size
+        git gc --auto; \
    fi && \
    # 3. Copy the updated code from the cache to the actual container workspace
    # We use 'cp -a' to preserve permissions