Caching cubins during build for reuse
This commit is contained in:
@@ -276,7 +276,7 @@ RUN chmod +x $VLLM_BASE_DIR/run-cluster-node.sh
|
||||
|
||||
# Final extra deps
|
||||
RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \
|
||||
uv pip install ray[default] fastsafetensors
|
||||
uv pip install ray[default] fastsafetensors nvidia-nvshmem-cu13
|
||||
|
||||
# If not compiling Triton
|
||||
# remove triton-kernels as they are not compatible with this vLLM version yet
|
||||
|
||||
Reference in New Issue
Block a user