Merge branch 'main' into 3-node
This commit is contained in:
3
.gitignore
vendored
3
.gitignore
vendored
@@ -1 +1,2 @@
|
|||||||
.env
|
.env
|
||||||
|
build-metadata.yaml
|
||||||
@@ -250,7 +250,7 @@ RUN --mount=type=bind,from=base,source=/workspace/vllm/nccl/build/pkg/deb,target
|
|||||||
libxcb1 \
|
libxcb1 \
|
||||||
&& cd /workspace/nccl-pkg && apt install -y --no-install-recommends --allow-downgrades ./*.deb \
|
&& cd /workspace/nccl-pkg && apt install -y --no-install-recommends --allow-downgrades ./*.deb \
|
||||||
&& rm -rf /var/lib/apt/lists/* \
|
&& rm -rf /var/lib/apt/lists/* \
|
||||||
&& pip install uv
|
&& pip install uv
|
||||||
|
|
||||||
# Set final working directory
|
# Set final working directory
|
||||||
WORKDIR $VLLM_BASE_DIR
|
WORKDIR $VLLM_BASE_DIR
|
||||||
@@ -265,7 +265,7 @@ ARG PRE_TRANSFORMERS=0
|
|||||||
# Install deps
|
# Install deps
|
||||||
RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \
|
RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \
|
||||||
uv pip install torch torchvision torchaudio triton --index-url https://download.pytorch.org/whl/nightly/cu130 && \
|
uv pip install torch torchvision torchaudio triton --index-url https://download.pytorch.org/whl/nightly/cu130 && \
|
||||||
uv pip install nvidia-nvshmem-cu13 "apache-tvm-ffi<0.2"
|
uv pip install nvidia-nvshmem-cu13 "apache-tvm-ffi<0.2"
|
||||||
|
|
||||||
# Install wheels from host ./wheels/ (bind-mounted from build context — no layer bloat)
|
# Install wheels from host ./wheels/ (bind-mounted from build context — no layer bloat)
|
||||||
# With --tf5: override vLLM's transformers<5 constraint to get transformers>=5
|
# With --tf5: override vLLM's transformers<5 constraint to get transformers>=5
|
||||||
@@ -294,4 +294,7 @@ RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \
|
|||||||
|
|
||||||
# Fix NCCL
|
# Fix NCCL
|
||||||
RUN rm /usr/local/lib/python3.12/dist-packages/nvidia/nccl/lib/libnccl.so.2 && \
|
RUN rm /usr/local/lib/python3.12/dist-packages/nvidia/nccl/lib/libnccl.so.2 && \
|
||||||
ln -s /usr/lib/aarch64-linux-gnu/libnccl.so.2 /usr/local/lib/python3.12/dist-packages/nvidia/nccl/lib/libnccl.so.2
|
ln -s /usr/lib/aarch64-linux-gnu/libnccl.so.2 /usr/local/lib/python3.12/dist-packages/nvidia/nccl/lib/libnccl.so.2
|
||||||
|
|
||||||
|
# Build metadata (generated by build-and-copy.sh)
|
||||||
|
COPY build-metadata.yaml /workspace/build-metadata.yaml
|
||||||
|
|||||||
@@ -274,6 +274,9 @@ ENV PATH=$VLLM_BASE_DIR:$PATH
|
|||||||
RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \
|
RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \
|
||||||
uv pip install ray[default] fastsafetensors nvidia-nvshmem-cu13
|
uv pip install ray[default] fastsafetensors nvidia-nvshmem-cu13
|
||||||
|
|
||||||
|
# Build metadata (generated by build-and-copy.sh)
|
||||||
|
COPY build-metadata.yaml /workspace/build-metadata.yaml
|
||||||
|
|
||||||
# If not compiling Triton
|
# If not compiling Triton
|
||||||
# remove triton-kernels as they are not compatible with this vLLM version yet
|
# remove triton-kernels as they are not compatible with this vLLM version yet
|
||||||
RUN uv pip uninstall triton-kernels
|
RUN uv pip uninstall triton-kernels
|
||||||
|
|||||||
@@ -33,10 +33,42 @@ cleanup() {
|
|||||||
echo "Cleaning up temporary image $TMP_IMAGE"
|
echo "Cleaning up temporary image $TMP_IMAGE"
|
||||||
rm -f "$TMP_IMAGE"
|
rm -f "$TMP_IMAGE"
|
||||||
fi
|
fi
|
||||||
|
rm -f ./build-metadata.yaml
|
||||||
}
|
}
|
||||||
|
|
||||||
trap cleanup EXIT
|
trap cleanup EXIT
|
||||||
|
|
||||||
|
generate_build_metadata() {
|
||||||
|
local dockerfile="$1"
|
||||||
|
local vllm_version="$2"
|
||||||
|
local vllm_commit="$3"
|
||||||
|
local flashinfer_commit="$4"
|
||||||
|
local vllm_ref="$5"
|
||||||
|
local pre_transformers="$6"
|
||||||
|
local exp_mxfp4="$7"
|
||||||
|
local vllm_prs="$8"
|
||||||
|
|
||||||
|
local base_image
|
||||||
|
base_image=$(grep -m1 '^FROM .* AS runner' "$dockerfile" | awk '{print $2}')
|
||||||
|
|
||||||
|
cat > ./build-metadata.yaml <<EOF
|
||||||
|
build_date: $(date -u +"%Y-%m-%dT%H:%M:%SZ")
|
||||||
|
build_script_commit: $(git rev-parse HEAD 2>/dev/null || echo "unknown")
|
||||||
|
vllm_version: ${vllm_version:-unknown}
|
||||||
|
vllm_commit: ${vllm_commit:-unknown}
|
||||||
|
flashinfer_commit: ${flashinfer_commit:-unknown}
|
||||||
|
gpu_arch: ${GPU_ARCH_LIST}
|
||||||
|
base_image: ${base_image:-unknown}
|
||||||
|
build_args:
|
||||||
|
vllm_ref: ${vllm_ref}
|
||||||
|
transformers_5: ${pre_transformers}
|
||||||
|
exp_mxfp4: ${exp_mxfp4}
|
||||||
|
vllm_prs: "${vllm_prs}"
|
||||||
|
build_jobs: ${BUILD_JOBS}
|
||||||
|
EOF
|
||||||
|
echo "Generated build-metadata.yaml"
|
||||||
|
}
|
||||||
|
|
||||||
add_copy_hosts() {
|
add_copy_hosts() {
|
||||||
local token part
|
local token part
|
||||||
for token in "$@"; do
|
for token in "$@"; do
|
||||||
@@ -365,6 +397,13 @@ RUNNER_BUILD_TIME=0
|
|||||||
if [ "$NO_BUILD" = false ]; then
|
if [ "$NO_BUILD" = false ]; then
|
||||||
if [ "$EXP_MXFP4" = true ]; then
|
if [ "$EXP_MXFP4" = true ]; then
|
||||||
echo "Building with experimental MXFP4 support..."
|
echo "Building with experimental MXFP4 support..."
|
||||||
|
|
||||||
|
# Generate build metadata YAML for mxfp4 build
|
||||||
|
MXFP4_VLLM_SHA=$(grep -m1 '^ARG VLLM_SHA=' Dockerfile.mxfp4 | cut -d= -f2)
|
||||||
|
MXFP4_FLASHINFER_SHA=$(grep -m1 '^ARG FLASHINFER_SHA=' Dockerfile.mxfp4 | cut -d= -f2)
|
||||||
|
generate_build_metadata Dockerfile.mxfp4 "unknown" "$MXFP4_VLLM_SHA" "$MXFP4_FLASHINFER_SHA" \
|
||||||
|
"mxfp4-pinned" "false" "true" ""
|
||||||
|
|
||||||
CMD=("docker" "build" "-t" "$IMAGE_TAG" "${COMMON_BUILD_FLAGS[@]}" "-f" "Dockerfile.mxfp4" ".")
|
CMD=("docker" "build" "-t" "$IMAGE_TAG" "${COMMON_BUILD_FLAGS[@]}" "-f" "Dockerfile.mxfp4" ".")
|
||||||
echo "Building image with command: ${CMD[*]}"
|
echo "Building image with command: ${CMD[*]}"
|
||||||
BUILD_START=$(date +%s)
|
BUILD_START=$(date +%s)
|
||||||
@@ -496,6 +535,15 @@ if [ "$NO_BUILD" = false ]; then
|
|||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# Generate build metadata YAML
|
||||||
|
VLLM_VERSION=$(ls ./wheels/vllm-*.whl 2>/dev/null | head -1 | sed 's|.*/vllm-||;s|-.*||')
|
||||||
|
VLLM_COMMIT=""
|
||||||
|
[ -f "./wheels/.vllm-commit" ] && VLLM_COMMIT=$(cat ./wheels/.vllm-commit)
|
||||||
|
FLASHINFER_COMMIT=""
|
||||||
|
[ -f "./wheels/.flashinfer-commit" ] && FLASHINFER_COMMIT=$(cat ./wheels/.flashinfer-commit)
|
||||||
|
generate_build_metadata Dockerfile "$VLLM_VERSION" "$VLLM_COMMIT" "$FLASHINFER_COMMIT" \
|
||||||
|
"$VLLM_REF" "$PRE_TRANSFORMERS" "false" "$VLLM_PRS"
|
||||||
|
|
||||||
RUNNER_CMD=("docker" "build"
|
RUNNER_CMD=("docker" "build"
|
||||||
"-t" "$IMAGE_TAG"
|
"-t" "$IMAGE_TAG"
|
||||||
"${COMMON_BUILD_FLAGS[@]}")
|
"${COMMON_BUILD_FLAGS[@]}")
|
||||||
|
|||||||
Reference in New Issue
Block a user