From fbb1bf73d58f616b3edd60c1f66248db5eeece90 Mon Sep 17 00:00:00 2001 From: Eugene Rakhmatulin Date: Sat, 20 Dec 2025 13:28:06 -0800 Subject: [PATCH 1/2] Switching to flashinfer 0.6.x pre-release wheels --- Dockerfile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index 2285d93..b257b93 100644 --- a/Dockerfile +++ b/Dockerfile @@ -69,9 +69,9 @@ RUN --mount=type=cache,id=pip-cache,target=/root/.cache/pip \ # Install FlashInfer packages RUN --mount=type=cache,id=pip-cache,target=/root/.cache/pip \ - pip install flashinfer-python --no-deps --index-url https://flashinfer.ai/whl && \ - pip install flashinfer-cubin --index-url https://flashinfer.ai/whl && \ - pip install flashinfer-jit-cache --index-url https://flashinfer.ai/whl/cu130 && \ + pip install flashinfer-python --no-deps --index-url https://flashinfer.ai/whl --pre && \ + pip install flashinfer-cubin --index-url https://flashinfer.ai/whl --pre && \ + pip install flashinfer-jit-cache --index-url https://flashinfer.ai/whl/cu130 --pre && \ pip install apache-tvm-ffi nvidia-cudnn-frontend nvidia-cutlass-dsl nvidia-ml-py tabulate # ========================================================= From a83200573a700eb90f4a76d4fa26c2be020b551c Mon Sep 17 00:00:00 2001 From: Eugene Rakhmatulin Date: Sat, 20 Dec 2025 15:29:37 -0800 Subject: [PATCH 2/2] Enhance Dockerfile: limit ccache size, enable compression, and optimize git repo size --- Dockerfile | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index b257b93..b15b439 100644 --- a/Dockerfile +++ b/Dockerfile @@ -38,6 +38,10 @@ RUN apt update && apt upgrade -y \ # Configure Ccache for CUDA/C++ ENV PATH=/usr/lib/ccache:$PATH ENV CCACHE_DIR=/root/.ccache +# Limit ccache size to prevent unbounded growth (e.g. 50G) +ENV CCACHE_MAXSIZE=50G +# Enable compression to save space +ENV CCACHE_COMPRESS=1 # Tell CMake to use ccache for compilation ENV CMAKE_CXX_COMPILER_LAUNCHER=ccache ENV CMAKE_CUDA_COMPILER_LAUNCHER=ccache @@ -132,7 +136,9 @@ RUN --mount=type=cache,id=repo-cache,target=/repo-cache \ if [ "${VLLM_REF}" = "main" ]; then \ git reset --hard origin/main; \ fi && \ - git submodule update --init --recursive; \ + git submodule update --init --recursive && \ + # Optimize git repo size + git gc --auto; \ fi && \ # 3. Copy the updated code from the cache to the actual container workspace # We use 'cp -a' to preserve permissions