From 1139a37324481030375fcd7d8cf73496fb033583 Mon Sep 17 00:00:00 2001 From: Eugene Rakhmatulin Date: Sun, 21 Dec 2025 22:41:03 -0800 Subject: [PATCH] Added transformers v5 support --- Dockerfile | 12 ++++++++++++ Dockerfile.wheels | 6 ++++++ README.md | 8 ++++++-- build-and-copy.sh | 8 ++++++++ 4 files changed, 32 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 6dcd576..afdc97b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -84,6 +84,12 @@ RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \ uv pip install ${FLASHINFER_PRE} flashinfer-cubin --index-url https://flashinfer.ai/whl && \ uv pip install ${FLASHINFER_PRE} flashinfer-jit-cache --index-url https://flashinfer.ai/whl/cu130 && \ uv pip install apache-tvm-ffi nvidia-cudnn-frontend nvidia-cutlass-dsl nvidia-ml-py tabulate + +ARG PRE_TRANSFORMERS=0 +RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \ + if [ "$PRE_TRANSFORMERS" = "1" ]; then \ + uv pip install -U transformers --pre; \ + fi # ========================================================= # STAGE 2: Triton Builder (Compiles Triton independently) # ========================================================= @@ -153,12 +159,18 @@ RUN --mount=type=cache,id=repo-cache,target=/repo-cache \ WORKDIR $VLLM_BASE_DIR/vllm +ARG PRE_TRANSFORMERS=0 + # Prepare build requirements RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \ python3 use_existing_torch.py && \ sed -i "/flashinfer/d" requirements/cuda.txt && \ sed -i '/^triton\b/d' requirements/test.txt && \ sed -i '/^fastsafetensors\b/d' requirements/test.txt && \ + if [ "$PRE_TRANSFORMERS" = "1" ]; then \ + sed -i '/^transformers\b/d' requirements/common.txt; \ + sed -i '/^transformers\b/d' requirements/test.txt; \ + fi && \ uv pip install -r requirements/build.txt # Apply Patches diff --git a/Dockerfile.wheels b/Dockerfile.wheels index 9469b7e..8fa30da 100644 --- a/Dockerfile.wheels +++ b/Dockerfile.wheels @@ -77,6 +77,12 @@ RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \ uv pip install ${FLASHINFER_PRE} flashinfer-cubin --index-url https://flashinfer.ai/whl && \ uv pip install ${FLASHINFER_PRE} flashinfer-jit-cache --index-url https://flashinfer.ai/whl/cu130 +ARG PRE_TRANSFORMERS=0 +RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \ + if [ "$PRE_TRANSFORMERS" = "1" ]; then \ + uv pip install -U transformers --pre; \ + fi + # Setup Env for Runtime ENV TORCH_CUDA_ARCH_LIST=12.1a ENV TRITON_PTXAS_PATH=/usr/local/cuda/bin/ptxas diff --git a/README.md b/README.md index 2cb1c2f..2939f9d 100644 --- a/README.md +++ b/README.md @@ -116,8 +116,9 @@ For periodic maintenance, I recommend using a filter: `docker builder prune --fi ### 2025-12-21 -Pre-built wheels now support release versions. Use with `--use-wheels release`. -Using nightly wheels or building from source is recommended for better performance. +- Added `--pre-tf` / `--pre-transformers` flag to `build-and-copy.sh` to install pre-release transformers (5.0.0rc or higher). Use it if you need to run GLM 4.6 Air or any other model that requires transformers 5.0. It may cause issues with other models, so you may want to stick to the release version for everything else. +- Pre-built wheels now support release versions. Use with `--use-wheels release`. +- Using nightly wheels or building from source is recommended for better performance. ### 2025-12-20 @@ -194,6 +195,7 @@ Using a provided build script is recommended, but if you want to build using `do | `VLLM_REF` | `main` | vLLM commit SHA, branch, or tag to build. | | `BUILD_JOBS` | `16` | Number of parallel build jobs (default: 16). | | `FLASHINFER_PRE` | `""` | Set to `--pre` to use pre-release versions of FlashInfer. | +| `PRE_TRANSFORMERS` | `0` | Set to `1` to install pre-release transformers (5.0.0rc or higher). | ### Building Manually using Wheels @@ -211,6 +213,7 @@ Supported build arguments for `Dockerfile.wheels`: | `CACHEBUST_VLLM` | `1` | Change this to force a re-download of vLLM wheels. | | `WHEELS_FROM_GITHUB_RELEASE` | `0` | Set to `1` to use GitHub release wheels instead of nightly wheels. | | `FLASHINFER_PRE` | `""` | Set to `--pre` to use pre-release versions of FlashInfer. | +| `PRE_TRANSFORMERS` | `0` | Set to `1` to install pre-release transformers (5.0.0rc or higher). | ### Using the Build Script (Recommended) @@ -301,6 +304,7 @@ Using a different username: | `--rebuild-vllm` | Force rebuild vLLM source only (sets CACHEBUST_VLLM) | | `--triton-ref ` | Triton commit SHA, branch or tag (default: 'v3.5.1') | | `--vllm-ref ` | vLLM commit SHA, branch or tag (default: 'main') | +| `--pre-tf` | Install pre-release transformers (5.0.0rc or higher). Alias: `--pre-transformers`. | | `--use-wheels [mode]` | Use pre-built vLLM wheels. Mode: `nightly` (default) or `release`. | | `--pre-flashinfer` | Use pre-release versions of FlashInfer. | | `-c, --copy-to ` | Host(s) to copy the image to after building (space- or comma-separated list after the flag). | diff --git a/build-and-copy.sh b/build-and-copy.sh index 535eed1..d707d72 100755 --- a/build-and-copy.sh +++ b/build-and-copy.sh @@ -17,6 +17,7 @@ TMP_IMAGE="" PARALLEL_COPY=false USE_WHEELS_MODE="" PRE_FLASHINFER=false +PRE_TRANSFORMERS=false cleanup() { if [ -n "$TMP_IMAGE" ] && [ -f "$TMP_IMAGE" ]; then @@ -71,6 +72,7 @@ usage() { echo " -u, --user : Username for ssh command (default: \$USER)" echo " --use-wheels [mode] : Use prebuilt vLLM wheels. Mode can be 'nightly' (default) or 'release'." echo " --pre-flashinfer : Use pre-release versions of FlashInfer" + echo " --pre-tf, --pre-transformers : Install transformers 5.0.0rc0 or higher" echo " --no-build : Skip building, only copy image (requires --copy-to)" echo " -h, --help : Show this help message" exit 1 @@ -132,6 +134,7 @@ while [[ "$#" -gt 0 ]]; do fi ;; --pre-flashinfer) PRE_FLASHINFER=true ;; + --pre-tf|--pre-transformers) PRE_TRANSFORMERS=true ;; --no-build) NO_BUILD=true ;; -h|--help) usage ;; *) echo "Unknown parameter passed: $1"; usage ;; @@ -185,6 +188,11 @@ if [ "$NO_BUILD" = false ]; then CMD+=("--build-arg" "FLASHINFER_PRE=--pre") fi + if [ "$PRE_TRANSFORMERS" = true ]; then + echo "Using transformers>=5.0.0..." + CMD+=("--build-arg" "PRE_TRANSFORMERS=1") + fi + # Add build context CMD+=(".")