From a3201f88734de40a64eaf0487400502828e46869 Mon Sep 17 00:00:00 2001 From: Eugene Rakhmatulin Date: Sun, 29 Mar 2026 22:40:35 -0700 Subject: [PATCH] --flashinfer-ref / --apply-flashinfer-pr --- Dockerfile | 10 ++++++++++ README.md | 15 +++++++++++++++ build-and-copy.sh | 48 ++++++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 70 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index cabd991..4b22f61 100644 --- a/Dockerfile +++ b/Dockerfile @@ -110,6 +110,16 @@ RUN --mount=type=cache,id=repo-cache,target=/repo-cache \ WORKDIR /workspace/flashinfer +ARG FLASHINFER_PRS="" + +RUN if [ -n "$FLASHINFER_PRS" ]; then \ + echo "Applying PRs: $FLASHINFER_PRS"; \ + for pr in $FLASHINFER_PRS; do \ + echo "Fetching and applying PR #$pr..."; \ + curl -fL "https://github.com/flashinfer-ai/flashinfer/pull/${pr}.diff" | git apply -v; \ + done; \ + fi + # Apply patch to avoid re-downloading existing cubins COPY flashinfer_cache.patch . RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \ diff --git a/README.md b/README.md index d10894a..125f0b6 100644 --- a/README.md +++ b/README.md @@ -31,6 +31,8 @@ We will expand the selection of models we test in the pipeline, but since vLLM i If you want to build the latest from main branch, you can specify `--rebuild-vllm` flag. Or you can target a specific vLLM release by setting `--vllm-ref` parameter. +Similarly, `--rebuild-flashinfer`, `--flashinfer-ref`, and `--apply-flashinfer-pr` control the FlashInfer build in the same way. + ## QUICK START ### Build @@ -149,6 +151,17 @@ For periodic maintenance, I recommend using a filter: `docker builder prune --fi ## CHANGELOG +### 2026-03-29 + +#### Flags to specify Flashinfer ref and apply PRs + +`build-and-copy.sh` gains two new flags that mirror the existing vLLM equivalents: + +- `--flashinfer-ref ` — build FlashInfer from a specific commit SHA, branch, or tag instead of `main`. Forces a local FlashInfer build (skips prebuilt wheel download). +- `--apply-flashinfer-pr ` — fetch and apply a FlashInfer GitHub PR patch before building. Can be specified multiple times. Forces a local FlashInfer build. + +Both flags are incompatible with `--exp-mxfp4`. + ### 2026-03-27 #### Default image tag in `build-and-copy.sh` @@ -981,7 +994,9 @@ Using a different username: | `--rebuild-flashinfer` | Skip prebuilt wheel download; force a fresh local FlashInfer build | | `--rebuild-vllm` | Force rebuild vLLM from source | | `--vllm-ref ` | vLLM commit SHA, branch or tag (default: `main`) | +| `--flashinfer-ref ` | FlashInfer commit SHA, branch or tag (default: `main`) | | `--apply-vllm-pr ` | Apply a vLLM PR patch during build. Can be specified multiple times. | +| `--apply-flashinfer-pr ` | Apply a FlashInfer PR patch during build. Can be specified multiple times. | | `--tf5` | Install transformers v5 (5.0.0 or higher). Aliases: `--pre-tf, --pre-transformers`. | | `--exp-mxfp4` | Build with experimental native MXFP4 support. Alias: `--experimental-mxfp4`. | | `-c, --copy-to ` | Host(s) to copy the image to after building (space- or comma-separated). | diff --git a/build-and-copy.sh b/build-and-copy.sh index 064990e..3526dbb 100755 --- a/build-and-copy.sh +++ b/build-and-copy.sh @@ -14,11 +14,14 @@ COPY_TO_FLAG=false SSH_USER="$USER" NO_BUILD=false VLLM_REF="main" +VLLM_REF_SET=false +FLASHINFER_REF="main" +FLASHINFER_REF_SET=false TMP_IMAGE="" PARALLEL_COPY=false EXP_MXFP4=false -VLLM_REF_SET=false VLLM_PRS="" +FLASHINFER_PRS="" PRE_TRANSFORMERS=false FULL_LOG=false BUILD_JOBS="16" @@ -271,6 +274,7 @@ usage() { echo " --rebuild-flashinfer : Force rebuild of FlashInfer wheels (ignore cached wheels)" echo " --rebuild-vllm : Force rebuild of vLLM wheels (ignore cached wheels)" echo " --vllm-ref : vLLM commit SHA, branch or tag (default: 'main')" + echo " --flashinfer-ref : FlashInfer commit SHA, branch or tag (default: 'main')" echo " -c, --copy-to : Host(s) to copy the image to. Accepts comma or space-delimited lists." echo " --copy-to-host : Alias for --copy-to (backwards compatibility)." echo " --copy-parallel : Copy to all hosts in parallel instead of serially." @@ -279,6 +283,7 @@ usage() { echo " --tf5 : Install transformers>=5 (aliases: --pre-tf, --pre-transformers)" echo " --exp-mxfp4, --experimental-mxfp4 : Build with experimental native MXFP4 support" echo " --apply-vllm-pr : Apply a specific PR patch to vLLM source. Can be specified multiple times." + echo " --apply-flashinfer-pr : Apply a specific PR patch to FlashInfer source. Can be specified multiple times." echo " --full-log : Enable full build logging (--progress=plain)" echo " --no-build : Skip building, only copy image (requires --copy-to)" echo " --network : Docker network to use during build" @@ -298,6 +303,7 @@ while [[ "$#" -gt 0 ]]; do --rebuild-flashinfer) REBUILD_FLASHINFER=true ;; --rebuild-vllm) REBUILD_VLLM=true ;; --vllm-ref) VLLM_REF="$2"; VLLM_REF_SET=true; shift ;; + --flashinfer-ref) FLASHINFER_REF="$2"; FLASHINFER_REF_SET=true; shift ;; -c|--copy-to|--copy-to-host|--copy-to-hosts) COPY_TO_FLAG=true shift @@ -325,6 +331,19 @@ while [[ "$#" -gt 0 ]]; do exit 1 fi ;; + --apply-flashinfer-pr) + if [ -n "$2" ] && [[ "$2" != -* ]]; then + if [ -n "$FLASHINFER_PRS" ]; then + FLASHINFER_PRS="$FLASHINFER_PRS $2" + else + FLASHINFER_PRS="$2" + fi + shift + else + echo "Error: --apply-flashinfer-pr requires a PR number." + exit 1 + fi + ;; --full-log) FULL_LOG=true ;; --no-build) NO_BUILD=true ;; --cleanup) CLEANUP_MODE=true ;; @@ -399,8 +418,13 @@ if [ -n "$VLLM_PRS" ]; then if [ "$EXP_MXFP4" = true ]; then echo "Error: --apply-vllm-pr is incompatible with --exp-mxfp4"; exit 1; fi fi +if [ -n "$FLASHINFER_PRS" ]; then + if [ "$EXP_MXFP4" = true ]; then echo "Error: --apply-flashinfer-pr is incompatible with --exp-mxfp4"; exit 1; fi +fi + if [ "$EXP_MXFP4" = true ]; then if [ "$VLLM_REF_SET" = true ]; then echo "Error: --exp-mxfp4 is incompatible with --vllm-ref"; exit 1; fi + if [ "$FLASHINFER_REF_SET" = true ]; then echo "Error: --exp-mxfp4 is incompatible with --flashinfer-ref"; exit 1; fi if [ "$PRE_TRANSFORMERS" = true ]; then echo "Error: --exp-mxfp4 is incompatible with --tf5"; exit 1; fi if [ "$REBUILD_FLASHINFER" = true ]; then echo "Error: --exp-mxfp4 is incompatible with --rebuild-flashinfer"; exit 1; fi if [ "$REBUILD_VLLM" = true ]; then echo "Error: --exp-mxfp4 is incompatible with --rebuild-vllm"; exit 1; fi @@ -478,9 +502,21 @@ if [ "$NO_BUILD" = false ]; then # ---------------------------------------------------------- # Phase 1: FlashInfer wheels # ---------------------------------------------------------- + if [ "$FLASHINFER_REF_SET" = true ] || [ -n "$FLASHINFER_PRS" ]; then + REBUILD_FLASHINFER=true + fi + BUILD_FLASHINFER=false if [ "$REBUILD_FLASHINFER" = true ]; then - echo "Rebuilding FlashInfer wheels (--rebuild-flashinfer specified)..." + if [ "$FLASHINFER_REF_SET" = true ] && [ -n "$FLASHINFER_PRS" ]; then + echo "Rebuilding FlashInfer wheels (--flashinfer-ref and --apply-flashinfer-pr specified)..." + elif [ "$FLASHINFER_REF_SET" = true ]; then + echo "Rebuilding FlashInfer wheels (--flashinfer-ref specified)..." + elif [ -n "$FLASHINFER_PRS" ]; then + echo "Rebuilding FlashInfer wheels (--apply-flashinfer-pr specified)..." + else + echo "Rebuilding FlashInfer wheels (--rebuild-flashinfer specified)..." + fi BUILD_FLASHINFER=true elif try_download_wheels "$FLASHINFER_RELEASE_TAG" "flashinfer"; then echo "FlashInfer wheels ready." @@ -502,12 +538,18 @@ if [ "$NO_BUILD" = false ]; then FI_CMD=("docker" "build" "--target" "flashinfer-export" "--output" "type=local,dest=./wheels" - "${COMMON_BUILD_FLAGS[@]}") + "${COMMON_BUILD_FLAGS[@]}" + "--build-arg" "FLASHINFER_REF=$FLASHINFER_REF") if [ "$REBUILD_FLASHINFER" = true ]; then FI_CMD+=("--build-arg" "CACHEBUST_FLASHINFER=$(date +%s)") fi + if [ -n "$FLASHINFER_PRS" ]; then + echo "Applying FlashInfer PRs: $FLASHINFER_PRS" + FI_CMD+=("--build-arg" "FLASHINFER_PRS=$FLASHINFER_PRS") + fi + FI_CMD+=(".") echo "FlashInfer build command: ${FI_CMD[*]}"