diff --git a/Dockerfile b/Dockerfile
index 2285d93..b15b439 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -38,6 +38,10 @@ RUN apt update && apt upgrade -y \
 # Configure Ccache for CUDA/C++
 ENV PATH=/usr/lib/ccache:$PATH
 ENV CCACHE_DIR=/root/.ccache
+# Limit ccache size to prevent unbounded growth (e.g. 50G)
+ENV CCACHE_MAXSIZE=50G
+# Enable compression to save space
+ENV CCACHE_COMPRESS=1
 # Tell CMake to use ccache for compilation
 ENV CMAKE_CXX_COMPILER_LAUNCHER=ccache
 ENV CMAKE_CUDA_COMPILER_LAUNCHER=ccache
@@ -69,9 +73,9 @@ RUN --mount=type=cache,id=pip-cache,target=/root/.cache/pip \
 
 # Install FlashInfer packages
 RUN --mount=type=cache,id=pip-cache,target=/root/.cache/pip \
-    pip install flashinfer-python --no-deps --index-url https://flashinfer.ai/whl && \
-    pip install flashinfer-cubin --index-url https://flashinfer.ai/whl && \
-    pip install flashinfer-jit-cache --index-url https://flashinfer.ai/whl/cu130 && \
+    pip install flashinfer-python --no-deps --index-url https://flashinfer.ai/whl --pre && \
+    pip install flashinfer-cubin --index-url https://flashinfer.ai/whl --pre && \
+    pip install flashinfer-jit-cache --index-url https://flashinfer.ai/whl/cu130 --pre && \
     pip install apache-tvm-ffi nvidia-cudnn-frontend nvidia-cutlass-dsl nvidia-ml-py tabulate
 
 # =========================================================
@@ -132,7 +136,9 @@ RUN --mount=type=cache,id=repo-cache,target=/repo-cache \
         if [ "${VLLM_REF}" = "main" ]; then \
             git reset --hard origin/main; \
         fi && \
-        git submodule update --init --recursive; \
+        git submodule update --init --recursive && \
+        # Optimize git repo size
+        git gc --auto; \
     fi && \
     # 3. Copy the updated code from the cache to the actual container workspace
     # We use 'cp -a' to preserve permissions