Caching cubins during build for reuse

2026-02-13 19:30:28 -08:00
parent 3470345624
commit 4214d4fefe
4 changed files with 34 additions and 2 deletions
--- a/Dockerfile.mxfp4
+++ b/Dockerfile.mxfp4
@@ -276,7 +276,7 @@ RUN chmod +x $VLLM_BASE_DIR/run-cluster-node.sh

 # Final extra deps
 RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \
-    uv pip install ray[default] fastsafetensors
+    uv pip install ray[default] fastsafetensors nvidia-nvshmem-cu13

 # If not compiling Triton
 # remove triton-kernels as they are not compatible with this vLLM version yet