--flashinfer-ref / --apply-flashinfer-pr
This commit is contained in:
10
Dockerfile
10
Dockerfile
@@ -110,6 +110,16 @@ RUN --mount=type=cache,id=repo-cache,target=/repo-cache \
|
|||||||
|
|
||||||
WORKDIR /workspace/flashinfer
|
WORKDIR /workspace/flashinfer
|
||||||
|
|
||||||
|
ARG FLASHINFER_PRS=""
|
||||||
|
|
||||||
|
RUN if [ -n "$FLASHINFER_PRS" ]; then \
|
||||||
|
echo "Applying PRs: $FLASHINFER_PRS"; \
|
||||||
|
for pr in $FLASHINFER_PRS; do \
|
||||||
|
echo "Fetching and applying PR #$pr..."; \
|
||||||
|
curl -fL "https://github.com/flashinfer-ai/flashinfer/pull/${pr}.diff" | git apply -v; \
|
||||||
|
done; \
|
||||||
|
fi
|
||||||
|
|
||||||
# Apply patch to avoid re-downloading existing cubins
|
# Apply patch to avoid re-downloading existing cubins
|
||||||
COPY flashinfer_cache.patch .
|
COPY flashinfer_cache.patch .
|
||||||
RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \
|
RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \
|
||||||
|
|||||||
15
README.md
15
README.md
@@ -31,6 +31,8 @@ We will expand the selection of models we test in the pipeline, but since vLLM i
|
|||||||
|
|
||||||
If you want to build the latest from main branch, you can specify `--rebuild-vllm` flag. Or you can target a specific vLLM release by setting `--vllm-ref` parameter.
|
If you want to build the latest from main branch, you can specify `--rebuild-vllm` flag. Or you can target a specific vLLM release by setting `--vllm-ref` parameter.
|
||||||
|
|
||||||
|
Similarly, `--rebuild-flashinfer`, `--flashinfer-ref`, and `--apply-flashinfer-pr` control the FlashInfer build in the same way.
|
||||||
|
|
||||||
## QUICK START
|
## QUICK START
|
||||||
|
|
||||||
### Build
|
### Build
|
||||||
@@ -149,6 +151,17 @@ For periodic maintenance, I recommend using a filter: `docker builder prune --fi
|
|||||||
|
|
||||||
## CHANGELOG
|
## CHANGELOG
|
||||||
|
|
||||||
|
### 2026-03-29
|
||||||
|
|
||||||
|
#### Flags to specify Flashinfer ref and apply PRs
|
||||||
|
|
||||||
|
`build-and-copy.sh` gains two new flags that mirror the existing vLLM equivalents:
|
||||||
|
|
||||||
|
- `--flashinfer-ref <ref>` — build FlashInfer from a specific commit SHA, branch, or tag instead of `main`. Forces a local FlashInfer build (skips prebuilt wheel download).
|
||||||
|
- `--apply-flashinfer-pr <pr-num>` — fetch and apply a FlashInfer GitHub PR patch before building. Can be specified multiple times. Forces a local FlashInfer build.
|
||||||
|
|
||||||
|
Both flags are incompatible with `--exp-mxfp4`.
|
||||||
|
|
||||||
### 2026-03-27
|
### 2026-03-27
|
||||||
|
|
||||||
#### Default image tag in `build-and-copy.sh`
|
#### Default image tag in `build-and-copy.sh`
|
||||||
@@ -981,7 +994,9 @@ Using a different username:
|
|||||||
| `--rebuild-flashinfer` | Skip prebuilt wheel download; force a fresh local FlashInfer build |
|
| `--rebuild-flashinfer` | Skip prebuilt wheel download; force a fresh local FlashInfer build |
|
||||||
| `--rebuild-vllm` | Force rebuild vLLM from source |
|
| `--rebuild-vllm` | Force rebuild vLLM from source |
|
||||||
| `--vllm-ref <ref>` | vLLM commit SHA, branch or tag (default: `main`) |
|
| `--vllm-ref <ref>` | vLLM commit SHA, branch or tag (default: `main`) |
|
||||||
|
| `--flashinfer-ref <ref>` | FlashInfer commit SHA, branch or tag (default: `main`) |
|
||||||
| `--apply-vllm-pr <pr-num>` | Apply a vLLM PR patch during build. Can be specified multiple times. |
|
| `--apply-vllm-pr <pr-num>` | Apply a vLLM PR patch during build. Can be specified multiple times. |
|
||||||
|
| `--apply-flashinfer-pr <pr-num>` | Apply a FlashInfer PR patch during build. Can be specified multiple times. |
|
||||||
| `--tf5` | Install transformers v5 (5.0.0 or higher). Aliases: `--pre-tf, --pre-transformers`. |
|
| `--tf5` | Install transformers v5 (5.0.0 or higher). Aliases: `--pre-tf, --pre-transformers`. |
|
||||||
| `--exp-mxfp4` | Build with experimental native MXFP4 support. Alias: `--experimental-mxfp4`. |
|
| `--exp-mxfp4` | Build with experimental native MXFP4 support. Alias: `--experimental-mxfp4`. |
|
||||||
| `-c, --copy-to <hosts>` | Host(s) to copy the image to after building (space- or comma-separated). |
|
| `-c, --copy-to <hosts>` | Host(s) to copy the image to after building (space- or comma-separated). |
|
||||||
|
|||||||
@@ -14,11 +14,14 @@ COPY_TO_FLAG=false
|
|||||||
SSH_USER="$USER"
|
SSH_USER="$USER"
|
||||||
NO_BUILD=false
|
NO_BUILD=false
|
||||||
VLLM_REF="main"
|
VLLM_REF="main"
|
||||||
|
VLLM_REF_SET=false
|
||||||
|
FLASHINFER_REF="main"
|
||||||
|
FLASHINFER_REF_SET=false
|
||||||
TMP_IMAGE=""
|
TMP_IMAGE=""
|
||||||
PARALLEL_COPY=false
|
PARALLEL_COPY=false
|
||||||
EXP_MXFP4=false
|
EXP_MXFP4=false
|
||||||
VLLM_REF_SET=false
|
|
||||||
VLLM_PRS=""
|
VLLM_PRS=""
|
||||||
|
FLASHINFER_PRS=""
|
||||||
PRE_TRANSFORMERS=false
|
PRE_TRANSFORMERS=false
|
||||||
FULL_LOG=false
|
FULL_LOG=false
|
||||||
BUILD_JOBS="16"
|
BUILD_JOBS="16"
|
||||||
@@ -271,6 +274,7 @@ usage() {
|
|||||||
echo " --rebuild-flashinfer : Force rebuild of FlashInfer wheels (ignore cached wheels)"
|
echo " --rebuild-flashinfer : Force rebuild of FlashInfer wheels (ignore cached wheels)"
|
||||||
echo " --rebuild-vllm : Force rebuild of vLLM wheels (ignore cached wheels)"
|
echo " --rebuild-vllm : Force rebuild of vLLM wheels (ignore cached wheels)"
|
||||||
echo " --vllm-ref <ref> : vLLM commit SHA, branch or tag (default: 'main')"
|
echo " --vllm-ref <ref> : vLLM commit SHA, branch or tag (default: 'main')"
|
||||||
|
echo " --flashinfer-ref <ref> : FlashInfer commit SHA, branch or tag (default: 'main')"
|
||||||
echo " -c, --copy-to <hosts> : Host(s) to copy the image to. Accepts comma or space-delimited lists."
|
echo " -c, --copy-to <hosts> : Host(s) to copy the image to. Accepts comma or space-delimited lists."
|
||||||
echo " --copy-to-host : Alias for --copy-to (backwards compatibility)."
|
echo " --copy-to-host : Alias for --copy-to (backwards compatibility)."
|
||||||
echo " --copy-parallel : Copy to all hosts in parallel instead of serially."
|
echo " --copy-parallel : Copy to all hosts in parallel instead of serially."
|
||||||
@@ -279,6 +283,7 @@ usage() {
|
|||||||
echo " --tf5 : Install transformers>=5 (aliases: --pre-tf, --pre-transformers)"
|
echo " --tf5 : Install transformers>=5 (aliases: --pre-tf, --pre-transformers)"
|
||||||
echo " --exp-mxfp4, --experimental-mxfp4 : Build with experimental native MXFP4 support"
|
echo " --exp-mxfp4, --experimental-mxfp4 : Build with experimental native MXFP4 support"
|
||||||
echo " --apply-vllm-pr <pr-num> : Apply a specific PR patch to vLLM source. Can be specified multiple times."
|
echo " --apply-vllm-pr <pr-num> : Apply a specific PR patch to vLLM source. Can be specified multiple times."
|
||||||
|
echo " --apply-flashinfer-pr <pr-num>: Apply a specific PR patch to FlashInfer source. Can be specified multiple times."
|
||||||
echo " --full-log : Enable full build logging (--progress=plain)"
|
echo " --full-log : Enable full build logging (--progress=plain)"
|
||||||
echo " --no-build : Skip building, only copy image (requires --copy-to)"
|
echo " --no-build : Skip building, only copy image (requires --copy-to)"
|
||||||
echo " --network <network> : Docker network to use during build"
|
echo " --network <network> : Docker network to use during build"
|
||||||
@@ -298,6 +303,7 @@ while [[ "$#" -gt 0 ]]; do
|
|||||||
--rebuild-flashinfer) REBUILD_FLASHINFER=true ;;
|
--rebuild-flashinfer) REBUILD_FLASHINFER=true ;;
|
||||||
--rebuild-vllm) REBUILD_VLLM=true ;;
|
--rebuild-vllm) REBUILD_VLLM=true ;;
|
||||||
--vllm-ref) VLLM_REF="$2"; VLLM_REF_SET=true; shift ;;
|
--vllm-ref) VLLM_REF="$2"; VLLM_REF_SET=true; shift ;;
|
||||||
|
--flashinfer-ref) FLASHINFER_REF="$2"; FLASHINFER_REF_SET=true; shift ;;
|
||||||
-c|--copy-to|--copy-to-host|--copy-to-hosts)
|
-c|--copy-to|--copy-to-host|--copy-to-hosts)
|
||||||
COPY_TO_FLAG=true
|
COPY_TO_FLAG=true
|
||||||
shift
|
shift
|
||||||
@@ -325,6 +331,19 @@ while [[ "$#" -gt 0 ]]; do
|
|||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
;;
|
;;
|
||||||
|
--apply-flashinfer-pr)
|
||||||
|
if [ -n "$2" ] && [[ "$2" != -* ]]; then
|
||||||
|
if [ -n "$FLASHINFER_PRS" ]; then
|
||||||
|
FLASHINFER_PRS="$FLASHINFER_PRS $2"
|
||||||
|
else
|
||||||
|
FLASHINFER_PRS="$2"
|
||||||
|
fi
|
||||||
|
shift
|
||||||
|
else
|
||||||
|
echo "Error: --apply-flashinfer-pr requires a PR number."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
;;
|
||||||
--full-log) FULL_LOG=true ;;
|
--full-log) FULL_LOG=true ;;
|
||||||
--no-build) NO_BUILD=true ;;
|
--no-build) NO_BUILD=true ;;
|
||||||
--cleanup) CLEANUP_MODE=true ;;
|
--cleanup) CLEANUP_MODE=true ;;
|
||||||
@@ -399,8 +418,13 @@ if [ -n "$VLLM_PRS" ]; then
|
|||||||
if [ "$EXP_MXFP4" = true ]; then echo "Error: --apply-vllm-pr is incompatible with --exp-mxfp4"; exit 1; fi
|
if [ "$EXP_MXFP4" = true ]; then echo "Error: --apply-vllm-pr is incompatible with --exp-mxfp4"; exit 1; fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
if [ -n "$FLASHINFER_PRS" ]; then
|
||||||
|
if [ "$EXP_MXFP4" = true ]; then echo "Error: --apply-flashinfer-pr is incompatible with --exp-mxfp4"; exit 1; fi
|
||||||
|
fi
|
||||||
|
|
||||||
if [ "$EXP_MXFP4" = true ]; then
|
if [ "$EXP_MXFP4" = true ]; then
|
||||||
if [ "$VLLM_REF_SET" = true ]; then echo "Error: --exp-mxfp4 is incompatible with --vllm-ref"; exit 1; fi
|
if [ "$VLLM_REF_SET" = true ]; then echo "Error: --exp-mxfp4 is incompatible with --vllm-ref"; exit 1; fi
|
||||||
|
if [ "$FLASHINFER_REF_SET" = true ]; then echo "Error: --exp-mxfp4 is incompatible with --flashinfer-ref"; exit 1; fi
|
||||||
if [ "$PRE_TRANSFORMERS" = true ]; then echo "Error: --exp-mxfp4 is incompatible with --tf5"; exit 1; fi
|
if [ "$PRE_TRANSFORMERS" = true ]; then echo "Error: --exp-mxfp4 is incompatible with --tf5"; exit 1; fi
|
||||||
if [ "$REBUILD_FLASHINFER" = true ]; then echo "Error: --exp-mxfp4 is incompatible with --rebuild-flashinfer"; exit 1; fi
|
if [ "$REBUILD_FLASHINFER" = true ]; then echo "Error: --exp-mxfp4 is incompatible with --rebuild-flashinfer"; exit 1; fi
|
||||||
if [ "$REBUILD_VLLM" = true ]; then echo "Error: --exp-mxfp4 is incompatible with --rebuild-vllm"; exit 1; fi
|
if [ "$REBUILD_VLLM" = true ]; then echo "Error: --exp-mxfp4 is incompatible with --rebuild-vllm"; exit 1; fi
|
||||||
@@ -478,9 +502,21 @@ if [ "$NO_BUILD" = false ]; then
|
|||||||
# ----------------------------------------------------------
|
# ----------------------------------------------------------
|
||||||
# Phase 1: FlashInfer wheels
|
# Phase 1: FlashInfer wheels
|
||||||
# ----------------------------------------------------------
|
# ----------------------------------------------------------
|
||||||
|
if [ "$FLASHINFER_REF_SET" = true ] || [ -n "$FLASHINFER_PRS" ]; then
|
||||||
|
REBUILD_FLASHINFER=true
|
||||||
|
fi
|
||||||
|
|
||||||
BUILD_FLASHINFER=false
|
BUILD_FLASHINFER=false
|
||||||
if [ "$REBUILD_FLASHINFER" = true ]; then
|
if [ "$REBUILD_FLASHINFER" = true ]; then
|
||||||
|
if [ "$FLASHINFER_REF_SET" = true ] && [ -n "$FLASHINFER_PRS" ]; then
|
||||||
|
echo "Rebuilding FlashInfer wheels (--flashinfer-ref and --apply-flashinfer-pr specified)..."
|
||||||
|
elif [ "$FLASHINFER_REF_SET" = true ]; then
|
||||||
|
echo "Rebuilding FlashInfer wheels (--flashinfer-ref specified)..."
|
||||||
|
elif [ -n "$FLASHINFER_PRS" ]; then
|
||||||
|
echo "Rebuilding FlashInfer wheels (--apply-flashinfer-pr specified)..."
|
||||||
|
else
|
||||||
echo "Rebuilding FlashInfer wheels (--rebuild-flashinfer specified)..."
|
echo "Rebuilding FlashInfer wheels (--rebuild-flashinfer specified)..."
|
||||||
|
fi
|
||||||
BUILD_FLASHINFER=true
|
BUILD_FLASHINFER=true
|
||||||
elif try_download_wheels "$FLASHINFER_RELEASE_TAG" "flashinfer"; then
|
elif try_download_wheels "$FLASHINFER_RELEASE_TAG" "flashinfer"; then
|
||||||
echo "FlashInfer wheels ready."
|
echo "FlashInfer wheels ready."
|
||||||
@@ -502,12 +538,18 @@ if [ "$NO_BUILD" = false ]; then
|
|||||||
FI_CMD=("docker" "build"
|
FI_CMD=("docker" "build"
|
||||||
"--target" "flashinfer-export"
|
"--target" "flashinfer-export"
|
||||||
"--output" "type=local,dest=./wheels"
|
"--output" "type=local,dest=./wheels"
|
||||||
"${COMMON_BUILD_FLAGS[@]}")
|
"${COMMON_BUILD_FLAGS[@]}"
|
||||||
|
"--build-arg" "FLASHINFER_REF=$FLASHINFER_REF")
|
||||||
|
|
||||||
if [ "$REBUILD_FLASHINFER" = true ]; then
|
if [ "$REBUILD_FLASHINFER" = true ]; then
|
||||||
FI_CMD+=("--build-arg" "CACHEBUST_FLASHINFER=$(date +%s)")
|
FI_CMD+=("--build-arg" "CACHEBUST_FLASHINFER=$(date +%s)")
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
if [ -n "$FLASHINFER_PRS" ]; then
|
||||||
|
echo "Applying FlashInfer PRs: $FLASHINFER_PRS"
|
||||||
|
FI_CMD+=("--build-arg" "FLASHINFER_PRS=$FLASHINFER_PRS")
|
||||||
|
fi
|
||||||
|
|
||||||
FI_CMD+=(".")
|
FI_CMD+=(".")
|
||||||
|
|
||||||
echo "FlashInfer build command: ${FI_CMD[*]}"
|
echo "FlashInfer build command: ${FI_CMD[*]}"
|
||||||
|
|||||||
Reference in New Issue
Block a user