diff --git a/Dockerfile b/Dockerfile index 8b52576..6d1e11a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -60,36 +60,9 @@ ENV TORCH_CUDA_ARCH_LIST=${TORCH_CUDA_ARCH_LIST} ENV TRITON_PTXAS_PATH=/usr/local/cuda/bin/ptxas # ========================================================= -# STAGE 2: Builder (Builds Triton, Flashinfer and vLLM from Source) +# STAGE 2: FlashInfer Builder # ========================================================= -FROM base AS builder - - -# # ======= Triton Build ========== - -# # Initial Triton repo clone (cached forever) -# RUN git clone https://github.com/triton-lang/triton.git - -# # We expect TRITON_REF to be passed from the command line to break the cache -# # Set to v3.6.0 by default -# ARG TRITON_REF=v3.6.0 - -# WORKDIR $VLLM_BASE_DIR/triton - -# # This only runs if TRITON_REF differs from the last build -# RUN --mount=type=cache,id=ccache,target=/root/.ccache \ -# --mount=type=cache,id=uv-cache,target=/root/.cache/uv \ -# git fetch origin && \ -# git checkout ${TRITON_REF} && \ -# git submodule sync && \ -# git submodule update --init --recursive && \ -# uv pip install -r python/requirements.txt && \ -# mkdir -p /workspace/wheels && \ -# rm -rf .git && \ -# uv build --no-build-isolation --wheel --out-dir=/workspace/wheels -v . && \ -# uv build --no-build-isolation --wheel --no-index --out-dir=/workspace/wheels python/triton_kernels - -# ======= FlashInfer Build ========== +FROM base AS flashinfer-builder ARG FLASHINFER_CUDA_ARCH_LIST="12.1a" ENV FLASHINFER_CUDA_ARCH_LIST=${FLASHINFER_CUDA_ARCH_LIST} @@ -98,17 +71,14 @@ ARG FLASHINFER_REF=main # --- CACHE BUSTER --- # Change this argument to force a re-download of FlashInfer -ARG CACHEBUST_DEPS=1 +ARG CACHEBUST_FLASHINFER=1 RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \ uv pip install nvidia-nvshmem-cu13 "apache-tvm-ffi<0.2" -# 4. Smart Git Clone (Fetch changes instead of full re-clone) -# We mount a cache at /repo-cache. This directory persists on your host machine. +# Smart Git Clone (Fetch changes instead of full re-clone) RUN --mount=type=cache,id=repo-cache,target=/repo-cache \ - # 1. Go into the persistent cache directory cd /repo-cache && \ - # 2. Logic: Clone if missing, otherwise Fetch & Reset if [ ! -d "flashinfer" ]; then \ echo "Cache miss: Cloning FlashInfer from scratch..." && \ git clone --recursive https://github.com/flashinfer-ai/flashinfer.git; \ @@ -124,55 +94,54 @@ RUN --mount=type=cache,id=repo-cache,target=/repo-cache \ (git checkout --detach origin/${FLASHINFER_REF} 2>/dev/null || git checkout ${FLASHINFER_REF}) && \ git submodule update --init --recursive && \ git clean -fdx && \ - # Optimize git repo size git gc --auto; \ fi && \ - # 3. Copy the updated code from the cache to the actual container workspace - # We use 'cp -a' to preserve permissions cp -a /repo-cache/flashinfer /workspace/flashinfer -# Build FlashInfer wheels - WORKDIR /workspace/flashinfer # Apply patch to avoid re-downloading existing cubins COPY flashinfer_cache.patch . -RUN patch -p1 < flashinfer_cache.patch - -# flashinfer-python RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \ --mount=type=cache,id=ccache,target=/root/.ccache \ --mount=type=cache,id=cubins-cache,target=/workspace/flashinfer/flashinfer-cubin/flashinfer_cubin/cubins \ + patch -p1 < flashinfer_cache.patch && \ + # flashinfer-python sed -i -e 's/license = "Apache-2.0"/license = { text = "Apache-2.0" }/' -e '/license-files/d' pyproject.toml && \ + uv build --no-build-isolation --wheel . --out-dir=/workspace/wheels -v && \ + # flashinfer-cubin + cd flashinfer-cubin && uv build --no-build-isolation --wheel . --out-dir=/workspace/wheels -v && \ + # flashinfer-jit-cache + cd ../flashinfer-jit-cache && \ uv build --no-build-isolation --wheel . --out-dir=/workspace/wheels -v -# flashinfer-cubin -RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \ - --mount=type=cache,id=ccache,target=/root/.ccache \ - --mount=type=cache,id=cubins-cache,target=/workspace/flashinfer/flashinfer-cubin/flashinfer_cubin/cubins \ - cd flashinfer-cubin && uv build --no-build-isolation --wheel . --out-dir=/workspace/wheels -v +# ========================================================= +# STAGE 3: FlashInfer Wheel Export +# ========================================================= +FROM scratch AS flashinfer-export +COPY --from=flashinfer-builder /workspace/wheels / + +# ========================================================= +# STAGE 4: vLLM Builder +# ========================================================= +FROM base AS vllm-builder + +ARG TORCH_CUDA_ARCH_LIST="12.1a" +ENV TORCH_CUDA_ARCH_LIST=${TORCH_CUDA_ARCH_LIST} +WORKDIR $VLLM_BASE_DIR -# flashinfer-jit-cache RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \ - --mount=type=cache,id=ccache,target=/root/.ccache \ - --mount=type=cache,id=cubins-cache,target=/workspace/flashinfer/flashinfer-cubin/flashinfer_cubin/cubins \ - cd flashinfer-jit-cache && \ - uv build --no-build-isolation --wheel . --out-dir=/workspace/wheels -v + uv pip install nvidia-nvshmem-cu13 "apache-tvm-ffi<0.2" # --- VLLM SOURCE CACHE BUSTER --- -# Change THIS argument to force a fresh git clone and rebuild of vLLM -# without re-installing the dependencies above. ARG CACHEBUST_VLLM=1 # Git reference (branch, tag, or SHA) to checkout ARG VLLM_REF=main -# 4. Smart Git Clone (Fetch changes instead of full re-clone) -# We mount a cache at /repo-cache. This directory persists on your host machine. +# Smart Git Clone (Fetch changes instead of full re-clone) RUN --mount=type=cache,id=repo-cache,target=/repo-cache \ - # 1. Go into the persistent cache directory cd /repo-cache && \ - # 2. Logic: Clone if missing, otherwise Fetch & Reset if [ ! -d "vllm" ]; then \ echo "Cache miss: Cloning vLLM from scratch..." && \ git clone --recursive https://github.com/vllm-project/vllm.git; \ @@ -188,11 +157,8 @@ RUN --mount=type=cache,id=repo-cache,target=/repo-cache \ (git checkout --detach origin/${VLLM_REF} 2>/dev/null || git checkout ${VLLM_REF}) && \ git submodule update --init --recursive && \ git clean -fdx && \ - # Optimize git repo size git gc --auto; \ fi && \ - # 3. Copy the updated code from the cache to the actual container workspace - # We use 'cp -a' to preserve permissions cp -a /repo-cache/vllm $VLLM_BASE_DIR/ WORKDIR $VLLM_BASE_DIR/vllm @@ -231,19 +197,18 @@ RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \ # fi # Final Compilation -# We mount the ccache directory here. Ideally, map this to a host volume for persistence -# across totally separate `docker build` invocations. RUN --mount=type=cache,id=ccache,target=/root/.ccache \ --mount=type=cache,id=uv-cache,target=/root/.cache/uv \ uv build --no-build-isolation --wheel . --out-dir=/workspace/wheels -v -# # Install custom Triton from triton-builder -# COPY --from=triton-builder /workspace/wheels /workspace/wheels -# RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \ -# uv pip install /workspace/wheels/*.whl +# ========================================================= +# STAGE 5: vLLM Wheel Export +# ========================================================= +FROM scratch AS vllm-export +COPY --from=vllm-builder /workspace/wheels / # ========================================================= -# STAGE 4: Runner (Transfers only necessary artifacts) +# STAGE 6: Runner (Installs wheels from host ./wheels/) # ========================================================= FROM nvcr.io/nvidia/pytorch:26.01-py3 AS runner @@ -282,10 +247,10 @@ RUN mkdir -p tiktoken_encodings && \ wget -O tiktoken_encodings/o200k_base.tiktoken "https://openaipublic.blob.core.windows.net/encodings/o200k_base.tiktoken" && \ wget -O tiktoken_encodings/cl100k_base.tiktoken "https://openaipublic.blob.core.windows.net/encodings/cl100k_base.tiktoken" -# Copy artifacts from Builder Stage -RUN --mount=type=bind,from=builder,source=/workspace/wheels,target=/mount/wheels \ +# Install wheels from host ./wheels/ (bind-mounted from build context — no layer bloat) +RUN --mount=type=bind,source=wheels,target=/workspace/wheels \ --mount=type=cache,id=uv-cache,target=/root/.cache/uv \ - uv pip install /mount/wheels/*.whl + uv pip install /workspace/wheels/*.whl ARG PRE_TRANSFORMERS=0 RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \ diff --git a/build-and-copy.sh b/build-and-copy.sh index b302b34..b21b673 100755 --- a/build-and-copy.sh +++ b/build-and-copy.sh @@ -6,23 +6,21 @@ START_TIME=$(date +%s) # Default values IMAGE_TAG="vllm-node" -REBUILD_DEPS=false +REBUILD_FLASHINFER=false REBUILD_VLLM=false COPY_HOSTS=() SSH_USER="$USER" NO_BUILD=false -TRITON_REF="v3.6.0" VLLM_REF="main" TMP_IMAGE="" PARALLEL_COPY=false -USE_WHEELS_MODE="" -PRE_FLASHINFER=false -PRE_TRANSFORMERS=false EXP_MXFP4=false -TRITON_REF_SET=false VLLM_REF_SET=false VLLM_PRS="" +PRE_TRANSFORMERS=false FULL_LOG=false +BUILD_JOBS="16" +GPU_ARCH_LIST="12.1a" cleanup() { if [ -n "$TMP_IMAGE" ] && [ -f "$TMP_IMAGE" ]; then @@ -60,31 +58,26 @@ copy_to_host() { return 1 fi } -BUILD_JOBS="16" -GPU_ARCH_LIST="12.1a" # Help function usage() { echo "Usage: $0 [OPTIONS]" - echo " -t, --tag : Image tag (default: 'vllm-node')" - echo " --gpu-arch : GPU architecture (default: '12.1a')" - echo " --rebuild-deps : Set cache bust for dependencies" - echo " --rebuild-vllm : Set cache bust for vllm" - echo " --triton-ref : Triton commit SHA, branch or tag (default: 'v3.5.1')" - echo " --vllm-ref : vLLM commit SHA, branch or tag (default: 'main')" - echo " -c, --copy-to : Host(s) to copy the image to. Accepts comma or space-delimited lists after the flag." - echo " --copy-to-host : Alias for --copy-to (backwards compatibility)." - echo " --copy-parallel : Copy to all hosts in parallel instead of serially." - echo " -j, --build-jobs : Number of concurrent build jobs (default: \${BUILD_JOBS})" - echo " -u, --user : Username for ssh command (default: \$USER)" - echo " --use-wheels [mode] : Use prebuilt vLLM wheels. Mode can be 'nightly' (default) or 'release'." - echo " --pre-flashinfer : Use pre-release versions of FlashInfer" - echo " --pre-tf, --pre-transformers : Install transformers 5.0.0rc0 or higher" + echo " -t, --tag : Image tag (default: 'vllm-node')" + echo " --gpu-arch : GPU architecture (default: '12.1a')" + echo " --rebuild-flashinfer : Force rebuild of FlashInfer wheels (ignore cached wheels)" + echo " --rebuild-vllm : Force rebuild of vLLM wheels (ignore cached wheels)" + echo " --vllm-ref : vLLM commit SHA, branch or tag (default: 'main')" + echo " -c, --copy-to : Host(s) to copy the image to. Accepts comma or space-delimited lists." + echo " --copy-to-host : Alias for --copy-to (backwards compatibility)." + echo " --copy-parallel : Copy to all hosts in parallel instead of serially." + echo " -j, --build-jobs : Number of concurrent build jobs (default: ${BUILD_JOBS})" + echo " -u, --user : Username for ssh command (default: \$USER)" + echo " --pre-tf, --pre-transformers : Install transformers 5.0.0rc0 or higher" echo " --exp-mxfp4, --experimental-mxfp4 : Build with experimental native MXFP4 support" - echo " --apply-vllm-pr : Apply a specific PR patch to vLLM source code. Can be specified multiple times." - echo " --full-log : Enable full build logging (--progress=plain)" - echo " --no-build : Skip building, only copy image (requires --copy-to)" - echo " -h, --help : Show this help message" + echo " --apply-vllm-pr : Apply a specific PR patch to vLLM source. Can be specified multiple times." + echo " --full-log : Enable full build logging (--progress=plain)" + echo " --no-build : Skip building, only copy image (requires --copy-to)" + echo " -h, --help : Show this help message" exit 1 } @@ -93,34 +86,30 @@ while [[ "$#" -gt 0 ]]; do case $1 in -t|--tag) IMAGE_TAG="$2"; shift ;; --gpu-arch) GPU_ARCH_LIST="$2"; shift ;; - --rebuild-deps) REBUILD_DEPS=true ;; + --rebuild-flashinfer) REBUILD_FLASHINFER=true ;; --rebuild-vllm) REBUILD_VLLM=true ;; - --triton-ref) TRITON_REF="$2"; TRITON_REF_SET=true; shift ;; --vllm-ref) VLLM_REF="$2"; VLLM_REF_SET=true; shift ;; -c|--copy-to|--copy-to-host|--copy-to-hosts) shift - # Consume arguments until the next flag or end of args while [[ "$#" -gt 0 && "$1" != -* ]]; do add_copy_hosts "$1" shift done - # If no hosts specified, use autodiscovery if [ "${#COPY_HOSTS[@]}" -eq 0 ]; then echo "No hosts specified. Using autodiscovery..." source "$(dirname "$0")/autodiscover.sh" - + detect_nodes if [ $? -ne 0 ]; then echo "Error: Autodiscovery failed." exit 1 fi - - # Use PEER_NODES directly + if [ ${#PEER_NODES[@]} -gt 0 ]; then COPY_HOSTS=("${PEER_NODES[@]}") fi - + if [ "${#COPY_HOSTS[@]}" -eq 0 ]; then echo "Error: Autodiscovery found no other nodes." exit 1 @@ -132,19 +121,6 @@ while [[ "$#" -gt 0 ]]; do -j|--build-jobs) BUILD_JOBS="$2"; shift ;; -u|--user) SSH_USER="$2"; shift ;; --copy-parallel) PARALLEL_COPY=true ;; - --use-wheels) - if [[ "$2" != -* && -n "$2" ]]; then - if [[ "$2" != "nightly" && "$2" != "release" ]]; then - echo "Error: --use-wheels argument must be 'nightly' or 'release'." - exit 1 - fi - USE_WHEELS_MODE="$2" - shift - else - USE_WHEELS_MODE="nightly" - fi - ;; - --pre-flashinfer) PRE_FLASHINFER=true ;; --pre-tf|--pre-transformers) PRE_TRANSFORMERS=true ;; --exp-mxfp4|--experimental-mxfp4) EXP_MXFP4=true ;; --apply-vllm-pr) @@ -168,25 +144,16 @@ while [[ "$#" -gt 0 ]]; do shift done +# Validate flag combinations if [ -n "$VLLM_PRS" ]; then if [ "$EXP_MXFP4" = true ]; then echo "Error: --apply-vllm-pr is incompatible with --exp-mxfp4"; exit 1; fi - if [ -n "$USE_WHEELS_MODE" ]; then echo "Error: --apply-vllm-pr is incompatible with --use-wheels"; exit 1; fi fi if [ "$EXP_MXFP4" = true ]; then - if [ "$TRITON_REF_SET" = true ]; then echo "Error: --exp-mxfp4 is incompatible with --triton-ref"; exit 1; fi if [ "$VLLM_REF_SET" = true ]; then echo "Error: --exp-mxfp4 is incompatible with --vllm-ref"; exit 1; fi - if [ -n "$USE_WHEELS_MODE" ]; then echo "Error: --exp-mxfp4 is incompatible with --use-wheels"; exit 1; fi - if [ "$PRE_FLASHINFER" = true ]; then echo "Error: --exp-mxfp4 is incompatible with --pre-flashinfer"; exit 1; fi if [ "$PRE_TRANSFORMERS" = true ]; then echo "Error: --exp-mxfp4 is incompatible with --pre-transformers"; exit 1; fi -fi - -if [ -n "$USE_WHEELS_MODE" ]; then - read -p "!!! Wheels build is known not to work properly with all models after migration to Torch 2.10! Full build is recommended. Do you want to continue (y/N)? " choice - case "$choice" in - y|Y ) echo "Continuing...";; - * ) echo "Aborting."; exit 1;; - esac + if [ "$REBUILD_FLASHINFER" = true ]; then echo "Error: --exp-mxfp4 is incompatible with --rebuild-flashinfer"; exit 1; fi + if [ "$REBUILD_VLLM" = true ]; then echo "Error: --exp-mxfp4 is incompatible with --rebuild-vllm"; exit 1; fi fi # Validate --no-build usage @@ -195,81 +162,147 @@ if [ "$NO_BUILD" = true ] && [ "${#COPY_HOSTS[@]}" -eq 0 ]; then exit 1 fi -# Build image (unless --no-build is set) -BUILD_TIME=0 +# Ensure wheels directory exists +mkdir -p ./wheels + +# Common build flags used across all non-mxfp4 sub-builds +COMMON_BUILD_FLAGS=() +if [ "$FULL_LOG" = true ]; then + COMMON_BUILD_FLAGS+=("--progress=plain") +fi +COMMON_BUILD_FLAGS+=("--build-arg" "BUILD_JOBS=$BUILD_JOBS") +COMMON_BUILD_FLAGS+=("--build-arg" "TORCH_CUDA_ARCH_LIST=$GPU_ARCH_LIST") +COMMON_BUILD_FLAGS+=("--build-arg" "FLASHINFER_CUDA_ARCH_LIST=$GPU_ARCH_LIST") + +# ===================================================== +# Build image (unless --no-build or --exp-mxfp4) +# ===================================================== +FLASHINFER_BUILD_TIME=0 +VLLM_BUILD_TIME=0 +RUNNER_BUILD_TIME=0 + if [ "$NO_BUILD" = false ]; then - # Construct build command - CMD=("docker" "build" "-t" "$IMAGE_TAG") - - if [ "$FULL_LOG" = true ]; then - CMD+=("--progress=plain") - fi - if [ "$EXP_MXFP4" = true ]; then echo "Building with experimental MXFP4 support..." - CMD+=("-f" "Dockerfile.mxfp4") - elif [ -n "$USE_WHEELS_MODE" ]; then - echo "Using pre-built vLLM wheels (mode: $USE_WHEELS_MODE)" - CMD+=("-f" "Dockerfile.wheels") - if [ "$USE_WHEELS_MODE" = "release" ]; then - CMD+=("--build-arg" "WHEELS_FROM_GITHUB_RELEASE=1") - fi + CMD=("docker" "build" "-t" "$IMAGE_TAG" "${COMMON_BUILD_FLAGS[@]}" "-f" "Dockerfile.mxfp4" ".") + echo "Building image with command: ${CMD[*]}" + BUILD_START=$(date +%s) + "${CMD[@]}" + BUILD_END=$(date +%s) + RUNNER_BUILD_TIME=$((BUILD_END - BUILD_START)) else - echo "Building vLLM from source" + # ---------------------------------------------------------- + # Phase 1: FlashInfer wheels + # ---------------------------------------------------------- + FLASHINFER_WHEELS_EXIST=false + if compgen -G "./wheels/flashinfer*.whl" > /dev/null 2>&1; then + FLASHINFER_WHEELS_EXIST=true + fi + + if [ "$REBUILD_FLASHINFER" = true ] || [ "$FLASHINFER_WHEELS_EXIST" = false ]; then + if [ "$REBUILD_FLASHINFER" = true ]; then + echo "Rebuilding FlashInfer wheels (--rebuild-flashinfer specified)..." + else + echo "No FlashInfer wheels found in ./wheels/ — building..." + fi + + FI_CMD=("docker" "build" + "--target" "flashinfer-export" + "--output" "type=local,dest=./wheels" + "${COMMON_BUILD_FLAGS[@]}") + + if [ "$REBUILD_FLASHINFER" = true ]; then + FI_CMD+=("--build-arg" "CACHEBUST_FLASHINFER=$(date +%s)") + fi + + FI_CMD+=(".") + + echo "FlashInfer build command: ${FI_CMD[*]}" + FI_START=$(date +%s) + "${FI_CMD[@]}" + FI_END=$(date +%s) + FLASHINFER_BUILD_TIME=$((FI_END - FI_START)) + else + echo "FlashInfer wheels already present in ./wheels/ — skipping build." + fi + + # ---------------------------------------------------------- + # Phase 2: vLLM wheels + # ---------------------------------------------------------- + VLLM_WHEELS_EXIST=false + if compgen -G "./wheels/vllm*.whl" > /dev/null 2>&1; then + VLLM_WHEELS_EXIST=true + fi + + if [ "$REBUILD_VLLM" = true ] || [ "$VLLM_WHEELS_EXIST" = false ]; then + if [ "$REBUILD_VLLM" = true ]; then + echo "Rebuilding vLLM wheels (--rebuild-vllm specified)..." + else + echo "No vLLM wheels found in ./wheels/ — building..." + fi + + VLLM_CMD=("docker" "build" + "--target" "vllm-export" + "--output" "type=local,dest=./wheels" + "${COMMON_BUILD_FLAGS[@]}" + "--build-arg" "VLLM_REF=$VLLM_REF") + + if [ "$REBUILD_VLLM" = true ]; then + VLLM_CMD+=("--build-arg" "CACHEBUST_VLLM=$(date +%s)") + fi + + if [ -n "$VLLM_PRS" ]; then + echo "Applying vLLM PRs: $VLLM_PRS" + VLLM_CMD+=("--build-arg" "VLLM_PRS=$VLLM_PRS") + fi + + if [ "$PRE_TRANSFORMERS" = true ]; then + echo "Using transformers>=5.0.0..." + VLLM_CMD+=("--build-arg" "PRE_TRANSFORMERS=1") + fi + + VLLM_CMD+=(".") + + echo "vLLM build command: ${VLLM_CMD[*]}" + VLLM_START=$(date +%s) + "${VLLM_CMD[@]}" + VLLM_END=$(date +%s) + VLLM_BUILD_TIME=$((VLLM_END - VLLM_START)) + else + echo "vLLM wheels already present in ./wheels/ — skipping build." + fi + + # ---------------------------------------------------------- + # Phase 3: Runner image + # ---------------------------------------------------------- + if ! compgen -G "./wheels/*.whl" > /dev/null 2>&1; then + echo "Error: No wheel files found in ./wheels/ — cannot build runner image." + exit 1 + fi + + RUNNER_CMD=("docker" "build" + "-t" "$IMAGE_TAG" + "${COMMON_BUILD_FLAGS[@]}") + + if [ "$PRE_TRANSFORMERS" = true ]; then + RUNNER_CMD+=("--build-arg" "PRE_TRANSFORMERS=1") + fi + + RUNNER_CMD+=(".") + + echo "Building runner image with command: ${RUNNER_CMD[*]}" + RUNNER_START=$(date +%s) + "${RUNNER_CMD[@]}" + RUNNER_END=$(date +%s) + RUNNER_BUILD_TIME=$((RUNNER_END - RUNNER_START)) fi - - if [ "$REBUILD_DEPS" = true ]; then - echo "Setting CACHEBUST_DEPS..." - CMD+=("--build-arg" "CACHEBUST_DEPS=$(date +%s)") - fi - - if [ "$REBUILD_VLLM" = true ]; then - echo "Setting CACHEBUST_VLLM..." - CMD+=("--build-arg" "CACHEBUST_VLLM=$(date +%s)") - fi - - # Add TRITON_REF to build arguments - CMD+=("--build-arg" "TRITON_REF=$TRITON_REF") - - # Add VLLM_REF to build arguments - CMD+=("--build-arg" "VLLM_REF=$VLLM_REF") - - # Add BUILD_JOBS to build arguments - CMD+=("--build-arg" "BUILD_JOBS=$BUILD_JOBS") - - # Add GPU architecture to build arguments - CMD+=("--build-arg" "TORCH_CUDA_ARCH_LIST=$GPU_ARCH_LIST") - CMD+=("--build-arg" "FLASHINFER_CUDA_ARCH_LIST=$GPU_ARCH_LIST") - - if [ "$PRE_FLASHINFER" = true ]; then - echo "Using pre-release FlashInfer..." - CMD+=("--build-arg" "FLASHINFER_PRE=--pre") - fi - - if [ -n "$VLLM_PRS" ]; then - echo "Applying vLLM PRs: $VLLM_PRS" - CMD+=("--build-arg" "VLLM_PRS=$VLLM_PRS") - fi - - if [ "$PRE_TRANSFORMERS" = true ]; then - echo "Using transformers>=5.0.0..." - CMD+=("--build-arg" "PRE_TRANSFORMERS=1") - fi - - # Add build context - CMD+=(".") - - # Execute build - echo "Building image with command: ${CMD[*]}" - BUILD_START=$(date +%s) - "${CMD[@]}" - BUILD_END=$(date +%s) - BUILD_TIME=$((BUILD_END - BUILD_START)) else echo "Skipping build (--no-build specified)" fi -# Copy to host if requested +# ===================================================== +# Copy to host(s) if requested +# ===================================================== COPY_TIME=0 if [ "${#COPY_HOSTS[@]}" -gt 0 ]; then echo "Copying image '$IMAGE_TAG' to ${#COPY_HOSTS[@]} host(s): ${COPY_HOSTS[*]}" @@ -320,12 +353,18 @@ echo "" echo "=========================================" echo " TIMING STATISTICS" echo "=========================================" -if [ "$BUILD_TIME" -gt 0 ]; then - echo "Docker Build: $(printf '%02d:%02d:%02d' $((BUILD_TIME/3600)) $((BUILD_TIME%3600/60)) $((BUILD_TIME%60)))" +if [ "$FLASHINFER_BUILD_TIME" -gt 0 ]; then + echo "FlashInfer Build: $(printf '%02d:%02d:%02d' $((FLASHINFER_BUILD_TIME/3600)) $((FLASHINFER_BUILD_TIME%3600/60)) $((FLASHINFER_BUILD_TIME%60)))" +fi +if [ "$VLLM_BUILD_TIME" -gt 0 ]; then + echo "vLLM Build: $(printf '%02d:%02d:%02d' $((VLLM_BUILD_TIME/3600)) $((VLLM_BUILD_TIME%3600/60)) $((VLLM_BUILD_TIME%60)))" +fi +if [ "$RUNNER_BUILD_TIME" -gt 0 ]; then + echo "Runner Build: $(printf '%02d:%02d:%02d' $((RUNNER_BUILD_TIME/3600)) $((RUNNER_BUILD_TIME%3600/60)) $((RUNNER_BUILD_TIME%60)))" fi if [ "$COPY_TIME" -gt 0 ]; then - echo "Image Copy: $(printf '%02d:%02d:%02d' $((COPY_TIME/3600)) $((COPY_TIME%3600/60)) $((COPY_TIME%60)))" + echo "Image Copy: $(printf '%02d:%02d:%02d' $((COPY_TIME/3600)) $((COPY_TIME%3600/60)) $((COPY_TIME%60)))" fi -echo "Total Time: $(printf '%02d:%02d:%02d' $((TOTAL_TIME/3600)) $((TOTAL_TIME%3600/60)) $((TOTAL_TIME%60)))" +echo "Total Time: $(printf '%02d:%02d:%02d' $((TOTAL_TIME/3600)) $((TOTAL_TIME%3600/60)) $((TOTAL_TIME%60)))" echo "=========================================" echo "Done building $IMAGE_TAG." diff --git a/wheels/.gitkeep b/wheels/.gitkeep new file mode 100644 index 0000000..e69de29