diff --git a/Dockerfile b/Dockerfile
index b15b439..c9465db 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -71,11 +71,13 @@ RUN --mount=type=cache,id=pip-cache,target=/root/.cache/pip \
 RUN --mount=type=cache,id=pip-cache,target=/root/.cache/pip \
     pip install xgrammar fastsafetensors
 
+ARG FLASHINFER_PRE=""
+
 # Install FlashInfer packages
 RUN --mount=type=cache,id=pip-cache,target=/root/.cache/pip \
-    pip install flashinfer-python --no-deps --index-url https://flashinfer.ai/whl --pre && \
-    pip install flashinfer-cubin --index-url https://flashinfer.ai/whl --pre && \
-    pip install flashinfer-jit-cache --index-url https://flashinfer.ai/whl/cu130 --pre && \
+    pip install ${FLASHINFER_PRE} flashinfer-python --no-deps --index-url https://flashinfer.ai/whl && \
+    pip install ${FLASHINFER_PRE} flashinfer-cubin --index-url https://flashinfer.ai/whl && \
+    pip install ${FLASHINFER_PRE} flashinfer-jit-cache --index-url https://flashinfer.ai/whl/cu130 && \
     pip install apache-tvm-ffi nvidia-cudnn-frontend nvidia-cutlass-dsl nvidia-ml-py tabulate
 
 # =========================================================
diff --git a/Dockerfile.wheels b/Dockerfile.wheels
index da6ed1f..4dbc20d 100644
--- a/Dockerfile.wheels
+++ b/Dockerfile.wheels
@@ -61,11 +61,13 @@ RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \
 # Apply in site-packages
 RUN patch -p1 -d /usr/local/lib/python3.12/dist-packages < ${VLLM_BASE_DIR}/fastsafetensors.patch
 
+ARG FLASHINFER_PRE=""
+
 # Install flashinfer helper packages
 RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \
-    uv pip install --system --break-system-packages flashinfer-python -U --no-deps --index-url https://flashinfer.ai/whl && \
-    uv pip install --system --break-system-packages flashinfer-cubin --index-url https://flashinfer.ai/whl && \
-    uv pip install --system --break-system-packages flashinfer-jit-cache --index-url https://flashinfer.ai/whl/cu130
+    uv pip install --system --break-system-packages ${FLASHINFER_PRE} flashinfer-python -U --no-deps --index-url https://flashinfer.ai/whl && \
+    uv pip install --system --break-system-packages ${FLASHINFER_PRE} flashinfer-cubin --index-url https://flashinfer.ai/whl && \
+    uv pip install --system --break-system-packages ${FLASHINFER_PRE} flashinfer-jit-cache --index-url https://flashinfer.ai/whl/cu130
 
 # Setup Env for Runtime
 ENV TORCH_CUDA_ARCH_LIST=12.1a
diff --git a/README.md b/README.md
index dbac2b1..fdbbc44 100644
--- a/README.md
+++ b/README.md
@@ -26,6 +26,8 @@ The Dockerfile builds from the main branch of VLLM, so depending on when you run
 
 ### 2025-12-20
 
+- Limited ccache to 50G when building from source to reduce build cache size.
+- Added `--pre-flashinfer` flag to `build-and-copy.sh` to use pre-release versions of FlashInfer.
 - Added `--use-wheels [mode]` flag to `build-and-copy.sh`.
   - Allows building the container using pre-built vLLM wheels instead of compiling from source.
   - The resulting Docker container size is reduced considerably (14GB vs 24GB)
@@ -187,6 +189,7 @@ Using a different username:
 | `--triton-ref <ref>` | Triton commit SHA, branch or tag (default: 'v3.5.1') |
 | `--vllm-ref <ref>` | vLLM commit SHA, branch or tag (default: 'main') |
 | `--use-wheels [mode]` | Use pre-built vLLM wheels. Mode: `nightly` (default) or `release`. |
+| `--pre-flashinfer` | Use pre-release versions of FlashInfer. |
 | `-c, --copy-to <host[,host...] or host host...>` | Host(s) to copy the image to after building (space- or comma-separated list after the flag). |
 | `--copy-to-host` | Alias for `--copy-to` (backwards compatibility). |
 | `--copy-parallel` | Copy to all specified hosts concurrently. |
diff --git a/build-and-copy.sh b/build-and-copy.sh
index eb26cc5..01b6d16 100755
--- a/build-and-copy.sh
+++ b/build-and-copy.sh
@@ -16,6 +16,7 @@ VLLM_REF="main"
 TMP_IMAGE=""
 PARALLEL_COPY=false
 USE_WHEELS_MODE=""
+PRE_FLASHINFER=false
 
 cleanup() {
     if [ -n "$TMP_IMAGE" ] && [ -f "$TMP_IMAGE" ]; then
@@ -69,6 +70,7 @@ usage() {
     echo "  -j, --build-jobs <jobs>   : Number of concurrent build jobs (default: \${BUILD_JOBS})"
     echo "  -u, --user <user>         : Username for ssh command (default: \$USER)"
     echo "  --use-wheels [mode]       : Use prebuilt vLLM wheels. Mode can be 'nightly' (default) or 'release'."
+    echo "  --pre-flashinfer          : Use pre-release versions of FlashInfer"
     echo "  --no-build                : Skip building, only copy image (requires --copy-to)"
     echo "  -h, --help                : Show this help message"
     exit 1
@@ -129,6 +131,7 @@ while [[ "$#" -gt 0 ]]; do
                 USE_WHEELS_MODE="nightly"
             fi
             ;;
+        --pre-flashinfer) PRE_FLASHINFER=true ;;
         --no-build) NO_BUILD=true ;;
         -h|--help) usage ;;
         *) echo "Unknown parameter passed: $1"; usage ;;
@@ -181,6 +184,11 @@ if [ "$NO_BUILD" = false ]; then
     # Add BUILD_JOBS to build arguments
     CMD+=("--build-arg" "BUILD_JOBS=$BUILD_JOBS")
 
+    if [ "$PRE_FLASHINFER" = true ]; then
+        echo "Using pre-release FlashInfer..."
+        CMD+=("--build-arg" "FLASHINFER_PRE=--pre")
+    fi
+
     # Add build context
     CMD+=(".")