Use mesh-optimized NCCL

This commit is contained in:
Eugene Rakhmatulin
2026-03-23 15:43:18 -07:00
parent 9e089acf2b
commit 990a7b3837

View File

@@ -39,7 +39,7 @@ RUN apt update && \
libcudnn9-cuda-13 libcudnn9-dev-cuda-13 \ libcudnn9-cuda-13 libcudnn9-dev-cuda-13 \
python3-dev python3-pip git wget \ python3-dev python3-pip git wget \
libnccl-dev libnccl2 libibverbs1 libibverbs-dev rdma-core \ libnccl-dev libnccl2 libibverbs1 libibverbs-dev rdma-core \
ccache \ ccache devscripts debhelper fakeroot \
&& rm -rf /var/lib/apt/lists/* \ && rm -rf /var/lib/apt/lists/* \
&& pip install uv && pip install uv
@@ -59,14 +59,19 @@ ENV CCACHE_COMPRESS=1
ENV CMAKE_CXX_COMPILER_LAUNCHER=ccache ENV CMAKE_CXX_COMPILER_LAUNCHER=ccache
ENV CMAKE_CUDA_COMPILER_LAUNCHER=ccache ENV CMAKE_CUDA_COMPILER_LAUNCHER=ccache
# Setup Workspace
WORKDIR $VLLM_BASE_DIR
# 2. Set Environment Variables # 2. Set Environment Variables
ARG TORCH_CUDA_ARCH_LIST="12.1a" ARG TORCH_CUDA_ARCH_LIST="12.1a"
ENV TORCH_CUDA_ARCH_LIST=${TORCH_CUDA_ARCH_LIST} ENV TORCH_CUDA_ARCH_LIST=${TORCH_CUDA_ARCH_LIST}
ENV TRITON_PTXAS_PATH=/usr/local/cuda/bin/ptxas ENV TRITON_PTXAS_PATH=/usr/local/cuda/bin/ptxas
# Setup Workspace
WORKDIR $VLLM_BASE_DIR
# Build NCCL with mesh support (TODO: only do it if arch is 12.1) - artifacts will be in /workspace/nccl/build/pkg/deb
RUN git clone -b dgxspark-3node-ring https://github.com/zyang-dev/nccl.git && \
cd nccl && make -j ${BUILD_JOBS} src.build NVCC_GENCODE="-gencode=arch=compute_121,code=sm_121" && \
make pkg.debian.build && apt install -y --no-install-recommends ./build/pkg/deb/*.deb
# ========================================================= # =========================================================
# STAGE 2: FlashInfer Builder # STAGE 2: FlashInfer Builder
# ========================================================= # =========================================================
@@ -234,13 +239,16 @@ ENV UV_SYSTEM_PYTHON=1
ENV UV_BREAK_SYSTEM_PACKAGES=1 ENV UV_BREAK_SYSTEM_PACKAGES=1
ENV UV_LINK_MODE=copy ENV UV_LINK_MODE=copy
# Mount additional packages from base builder image
# Install runtime dependencies # Install runtime dependencies
RUN apt update && \ RUN --mount=type=bind,from=base,source=/workspace/vllm/nccl/build/pkg/deb,target=/workspace/nccl-pkg \
apt update && \
apt install -y --no-install-recommends \ apt install -y --no-install-recommends \
python3 python3-pip python3-dev vim curl git wget \ python3 python3-pip python3-dev vim curl git wget \
libcudnn9-cuda-13 \ libcudnn9-cuda-13 \
libnccl-dev libnccl2 libibverbs1 libibverbs-dev rdma-core \ libnccl-dev libnccl2 libibverbs1 libibverbs-dev rdma-core \
libxcb1 \ libxcb1 \
&& cd /workspace/nccl-pkg && apt install -y --no-install-recommends --allow-downgrades ./*.deb \
&& rm -rf /var/lib/apt/lists/* \ && rm -rf /var/lib/apt/lists/* \
&& pip install uv && pip install uv