diff --git a/.gitignore b/.gitignore index 2eea525..6a0a8ca 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ -.env \ No newline at end of file +.env +build-metadata.yaml \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index a9a2485..8401e85 100644 --- a/Dockerfile +++ b/Dockerfile @@ -250,7 +250,7 @@ RUN --mount=type=bind,from=base,source=/workspace/vllm/nccl/build/pkg/deb,target libxcb1 \ && cd /workspace/nccl-pkg && apt install -y --no-install-recommends --allow-downgrades ./*.deb \ && rm -rf /var/lib/apt/lists/* \ - && pip install uv + && pip install uv # Set final working directory WORKDIR $VLLM_BASE_DIR @@ -265,7 +265,7 @@ ARG PRE_TRANSFORMERS=0 # Install deps RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \ uv pip install torch torchvision torchaudio triton --index-url https://download.pytorch.org/whl/nightly/cu130 && \ - uv pip install nvidia-nvshmem-cu13 "apache-tvm-ffi<0.2" + uv pip install nvidia-nvshmem-cu13 "apache-tvm-ffi<0.2" # Install wheels from host ./wheels/ (bind-mounted from build context — no layer bloat) # With --tf5: override vLLM's transformers<5 constraint to get transformers>=5 @@ -294,4 +294,7 @@ RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \ # Fix NCCL RUN rm /usr/local/lib/python3.12/dist-packages/nvidia/nccl/lib/libnccl.so.2 && \ - ln -s /usr/lib/aarch64-linux-gnu/libnccl.so.2 /usr/local/lib/python3.12/dist-packages/nvidia/nccl/lib/libnccl.so.2 \ No newline at end of file + ln -s /usr/lib/aarch64-linux-gnu/libnccl.so.2 /usr/local/lib/python3.12/dist-packages/nvidia/nccl/lib/libnccl.so.2 + +# Build metadata (generated by build-and-copy.sh) +COPY build-metadata.yaml /workspace/build-metadata.yaml diff --git a/Dockerfile.mxfp4 b/Dockerfile.mxfp4 index 4b4a895..bedcf4a 100644 --- a/Dockerfile.mxfp4 +++ b/Dockerfile.mxfp4 @@ -274,6 +274,9 @@ ENV PATH=$VLLM_BASE_DIR:$PATH RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \ uv pip install ray[default] fastsafetensors nvidia-nvshmem-cu13 +# Build metadata (generated by build-and-copy.sh) +COPY build-metadata.yaml /workspace/build-metadata.yaml + # If not compiling Triton # remove triton-kernels as they are not compatible with this vLLM version yet RUN uv pip uninstall triton-kernels diff --git a/build-and-copy.sh b/build-and-copy.sh index a954fc5..d019ea0 100755 --- a/build-and-copy.sh +++ b/build-and-copy.sh @@ -33,10 +33,42 @@ cleanup() { echo "Cleaning up temporary image $TMP_IMAGE" rm -f "$TMP_IMAGE" fi + rm -f ./build-metadata.yaml } trap cleanup EXIT +generate_build_metadata() { + local dockerfile="$1" + local vllm_version="$2" + local vllm_commit="$3" + local flashinfer_commit="$4" + local vllm_ref="$5" + local pre_transformers="$6" + local exp_mxfp4="$7" + local vllm_prs="$8" + + local base_image + base_image=$(grep -m1 '^FROM .* AS runner' "$dockerfile" | awk '{print $2}') + + cat > ./build-metadata.yaml </dev/null || echo "unknown") +vllm_version: ${vllm_version:-unknown} +vllm_commit: ${vllm_commit:-unknown} +flashinfer_commit: ${flashinfer_commit:-unknown} +gpu_arch: ${GPU_ARCH_LIST} +base_image: ${base_image:-unknown} +build_args: + vllm_ref: ${vllm_ref} + transformers_5: ${pre_transformers} + exp_mxfp4: ${exp_mxfp4} + vllm_prs: "${vllm_prs}" + build_jobs: ${BUILD_JOBS} +EOF + echo "Generated build-metadata.yaml" +} + add_copy_hosts() { local token part for token in "$@"; do @@ -365,6 +397,13 @@ RUNNER_BUILD_TIME=0 if [ "$NO_BUILD" = false ]; then if [ "$EXP_MXFP4" = true ]; then echo "Building with experimental MXFP4 support..." + + # Generate build metadata YAML for mxfp4 build + MXFP4_VLLM_SHA=$(grep -m1 '^ARG VLLM_SHA=' Dockerfile.mxfp4 | cut -d= -f2) + MXFP4_FLASHINFER_SHA=$(grep -m1 '^ARG FLASHINFER_SHA=' Dockerfile.mxfp4 | cut -d= -f2) + generate_build_metadata Dockerfile.mxfp4 "unknown" "$MXFP4_VLLM_SHA" "$MXFP4_FLASHINFER_SHA" \ + "mxfp4-pinned" "false" "true" "" + CMD=("docker" "build" "-t" "$IMAGE_TAG" "${COMMON_BUILD_FLAGS[@]}" "-f" "Dockerfile.mxfp4" ".") echo "Building image with command: ${CMD[*]}" BUILD_START=$(date +%s) @@ -496,6 +535,15 @@ if [ "$NO_BUILD" = false ]; then exit 1 fi + # Generate build metadata YAML + VLLM_VERSION=$(ls ./wheels/vllm-*.whl 2>/dev/null | head -1 | sed 's|.*/vllm-||;s|-.*||') + VLLM_COMMIT="" + [ -f "./wheels/.vllm-commit" ] && VLLM_COMMIT=$(cat ./wheels/.vllm-commit) + FLASHINFER_COMMIT="" + [ -f "./wheels/.flashinfer-commit" ] && FLASHINFER_COMMIT=$(cat ./wheels/.flashinfer-commit) + generate_build_metadata Dockerfile "$VLLM_VERSION" "$VLLM_COMMIT" "$FLASHINFER_COMMIT" \ + "$VLLM_REF" "$PRE_TRANSFORMERS" "false" "$VLLM_PRS" + RUNNER_CMD=("docker" "build" "-t" "$IMAGE_TAG" "${COMMON_BUILD_FLAGS[@]}")