Use mesh-optimized NCCL
This commit is contained in:
18
Dockerfile
18
Dockerfile
@@ -39,7 +39,7 @@ RUN apt update && \
|
||||
libcudnn9-cuda-13 libcudnn9-dev-cuda-13 \
|
||||
python3-dev python3-pip git wget \
|
||||
libnccl-dev libnccl2 libibverbs1 libibverbs-dev rdma-core \
|
||||
ccache \
|
||||
ccache devscripts debhelper fakeroot \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& pip install uv
|
||||
|
||||
@@ -59,14 +59,19 @@ ENV CCACHE_COMPRESS=1
|
||||
ENV CMAKE_CXX_COMPILER_LAUNCHER=ccache
|
||||
ENV CMAKE_CUDA_COMPILER_LAUNCHER=ccache
|
||||
|
||||
# Setup Workspace
|
||||
WORKDIR $VLLM_BASE_DIR
|
||||
|
||||
# 2. Set Environment Variables
|
||||
ARG TORCH_CUDA_ARCH_LIST="12.1a"
|
||||
ENV TORCH_CUDA_ARCH_LIST=${TORCH_CUDA_ARCH_LIST}
|
||||
ENV TRITON_PTXAS_PATH=/usr/local/cuda/bin/ptxas
|
||||
|
||||
# Setup Workspace
|
||||
WORKDIR $VLLM_BASE_DIR
|
||||
|
||||
# Build NCCL with mesh support (TODO: only do it if arch is 12.1) - artifacts will be in /workspace/nccl/build/pkg/deb
|
||||
RUN git clone -b dgxspark-3node-ring https://github.com/zyang-dev/nccl.git && \
|
||||
cd nccl && make -j ${BUILD_JOBS} src.build NVCC_GENCODE="-gencode=arch=compute_121,code=sm_121" && \
|
||||
make pkg.debian.build && apt install -y --no-install-recommends ./build/pkg/deb/*.deb
|
||||
|
||||
# =========================================================
|
||||
# STAGE 2: FlashInfer Builder
|
||||
# =========================================================
|
||||
@@ -234,13 +239,16 @@ ENV UV_SYSTEM_PYTHON=1
|
||||
ENV UV_BREAK_SYSTEM_PACKAGES=1
|
||||
ENV UV_LINK_MODE=copy
|
||||
|
||||
# Mount additional packages from base builder image
|
||||
# Install runtime dependencies
|
||||
RUN apt update && \
|
||||
RUN --mount=type=bind,from=base,source=/workspace/vllm/nccl/build/pkg/deb,target=/workspace/nccl-pkg \
|
||||
apt update && \
|
||||
apt install -y --no-install-recommends \
|
||||
python3 python3-pip python3-dev vim curl git wget \
|
||||
libcudnn9-cuda-13 \
|
||||
libnccl-dev libnccl2 libibverbs1 libibverbs-dev rdma-core \
|
||||
libxcb1 \
|
||||
&& cd /workspace/nccl-pkg && apt install -y --no-install-recommends --allow-downgrades ./*.deb \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& pip install uv
|
||||
|
||||
|
||||
Reference in New Issue
Block a user