diff --git a/Dockerfile b/Dockerfile
index 6221a4c..ce376be 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -3,7 +3,7 @@
 # =========================================================
 # STAGE 1: Builder (Builds vLLM from Source)
 # =========================================================
-FROM nvidia/cuda:13.0.2-devel-ubuntu24.04 AS builder
+FROM nvidia/cuda:13.0.2-devel-ubuntu24.04 AS base
 
 # Set non-interactive frontend to prevent apt prompts
 ENV DEBIAN_FRONTEND=noninteractive
@@ -38,6 +38,9 @@ WORKDIR $VLLM_BASE_DIR
 ENV TORCH_CUDA_ARCH_LIST=12.1a
 ENV TRITON_PTXAS_PATH=/usr/local/cuda/bin/ptxas
 
+# Initial Triton repo clone (cached forever) - before all cache busters
+RUN git clone https://github.com/triton-lang/triton.git
+
 # --- CACHE BUSTER ---
 # Change this argument to force a re-download of PyTorch/FlashInfer
 ARG CACHEBUST_DEPS=1
@@ -112,16 +115,13 @@ RUN --mount=type=cache,id=ccache,target=/root/.ccache \
 
 # Install latest Triton from main - override version pulled from dependencies
 
-# Initial clone (Cached forever)
-RUN git clone https://github.com/triton-lang/triton.git
-
 # We expect TRITON_SHA to be passed from the command line to break the cache
 # Set to v3.5.1 commit by default
 ARG TRITON_SHA=0add68262ab0a2e33b84524346cb27cbb2787356
 
 # This only runs if TRITON_SHA differs from the last build
-RUN --mount=type=cache,target=/root/.cache/pip \
-    --mount=type=cache,target=/root/.cache/ccache \
+RUN --mount=type=cache,id=ccache,target=/root/.ccache \
+    --mount=type=cache,id=pip-cache,target=/root/.cache/pip \
     cd triton && \
     git fetch origin && \
     git checkout ${TRITON_SHA} && \