diff --git a/Dockerfile b/Dockerfile index 2285d93..b15b439 100644 --- a/Dockerfile +++ b/Dockerfile @@ -38,6 +38,10 @@ RUN apt update && apt upgrade -y \ # Configure Ccache for CUDA/C++ ENV PATH=/usr/lib/ccache:$PATH ENV CCACHE_DIR=/root/.ccache +# Limit ccache size to prevent unbounded growth (e.g. 50G) +ENV CCACHE_MAXSIZE=50G +# Enable compression to save space +ENV CCACHE_COMPRESS=1 # Tell CMake to use ccache for compilation ENV CMAKE_CXX_COMPILER_LAUNCHER=ccache ENV CMAKE_CUDA_COMPILER_LAUNCHER=ccache @@ -69,9 +73,9 @@ RUN --mount=type=cache,id=pip-cache,target=/root/.cache/pip \ # Install FlashInfer packages RUN --mount=type=cache,id=pip-cache,target=/root/.cache/pip \ - pip install flashinfer-python --no-deps --index-url https://flashinfer.ai/whl && \ - pip install flashinfer-cubin --index-url https://flashinfer.ai/whl && \ - pip install flashinfer-jit-cache --index-url https://flashinfer.ai/whl/cu130 && \ + pip install flashinfer-python --no-deps --index-url https://flashinfer.ai/whl --pre && \ + pip install flashinfer-cubin --index-url https://flashinfer.ai/whl --pre && \ + pip install flashinfer-jit-cache --index-url https://flashinfer.ai/whl/cu130 --pre && \ pip install apache-tvm-ffi nvidia-cudnn-frontend nvidia-cutlass-dsl nvidia-ml-py tabulate # ========================================================= @@ -132,7 +136,9 @@ RUN --mount=type=cache,id=repo-cache,target=/repo-cache \ if [ "${VLLM_REF}" = "main" ]; then \ git reset --hard origin/main; \ fi && \ - git submodule update --init --recursive; \ + git submodule update --init --recursive && \ + # Optimize git repo size + git gc --auto; \ fi && \ # 3. Copy the updated code from the cache to the actual container workspace # We use 'cp -a' to preserve permissions