Added transformers v5 support
This commit is contained in:
12
Dockerfile
12
Dockerfile
@@ -84,6 +84,12 @@ RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \
|
|||||||
uv pip install ${FLASHINFER_PRE} flashinfer-cubin --index-url https://flashinfer.ai/whl && \
|
uv pip install ${FLASHINFER_PRE} flashinfer-cubin --index-url https://flashinfer.ai/whl && \
|
||||||
uv pip install ${FLASHINFER_PRE} flashinfer-jit-cache --index-url https://flashinfer.ai/whl/cu130 && \
|
uv pip install ${FLASHINFER_PRE} flashinfer-jit-cache --index-url https://flashinfer.ai/whl/cu130 && \
|
||||||
uv pip install apache-tvm-ffi nvidia-cudnn-frontend nvidia-cutlass-dsl nvidia-ml-py tabulate
|
uv pip install apache-tvm-ffi nvidia-cudnn-frontend nvidia-cutlass-dsl nvidia-ml-py tabulate
|
||||||
|
|
||||||
|
ARG PRE_TRANSFORMERS=0
|
||||||
|
RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \
|
||||||
|
if [ "$PRE_TRANSFORMERS" = "1" ]; then \
|
||||||
|
uv pip install -U transformers --pre; \
|
||||||
|
fi
|
||||||
# =========================================================
|
# =========================================================
|
||||||
# STAGE 2: Triton Builder (Compiles Triton independently)
|
# STAGE 2: Triton Builder (Compiles Triton independently)
|
||||||
# =========================================================
|
# =========================================================
|
||||||
@@ -153,12 +159,18 @@ RUN --mount=type=cache,id=repo-cache,target=/repo-cache \
|
|||||||
|
|
||||||
WORKDIR $VLLM_BASE_DIR/vllm
|
WORKDIR $VLLM_BASE_DIR/vllm
|
||||||
|
|
||||||
|
ARG PRE_TRANSFORMERS=0
|
||||||
|
|
||||||
# Prepare build requirements
|
# Prepare build requirements
|
||||||
RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \
|
RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \
|
||||||
python3 use_existing_torch.py && \
|
python3 use_existing_torch.py && \
|
||||||
sed -i "/flashinfer/d" requirements/cuda.txt && \
|
sed -i "/flashinfer/d" requirements/cuda.txt && \
|
||||||
sed -i '/^triton\b/d' requirements/test.txt && \
|
sed -i '/^triton\b/d' requirements/test.txt && \
|
||||||
sed -i '/^fastsafetensors\b/d' requirements/test.txt && \
|
sed -i '/^fastsafetensors\b/d' requirements/test.txt && \
|
||||||
|
if [ "$PRE_TRANSFORMERS" = "1" ]; then \
|
||||||
|
sed -i '/^transformers\b/d' requirements/common.txt; \
|
||||||
|
sed -i '/^transformers\b/d' requirements/test.txt; \
|
||||||
|
fi && \
|
||||||
uv pip install -r requirements/build.txt
|
uv pip install -r requirements/build.txt
|
||||||
|
|
||||||
# Apply Patches
|
# Apply Patches
|
||||||
|
|||||||
@@ -77,6 +77,12 @@ RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \
|
|||||||
uv pip install ${FLASHINFER_PRE} flashinfer-cubin --index-url https://flashinfer.ai/whl && \
|
uv pip install ${FLASHINFER_PRE} flashinfer-cubin --index-url https://flashinfer.ai/whl && \
|
||||||
uv pip install ${FLASHINFER_PRE} flashinfer-jit-cache --index-url https://flashinfer.ai/whl/cu130
|
uv pip install ${FLASHINFER_PRE} flashinfer-jit-cache --index-url https://flashinfer.ai/whl/cu130
|
||||||
|
|
||||||
|
ARG PRE_TRANSFORMERS=0
|
||||||
|
RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \
|
||||||
|
if [ "$PRE_TRANSFORMERS" = "1" ]; then \
|
||||||
|
uv pip install -U transformers --pre; \
|
||||||
|
fi
|
||||||
|
|
||||||
# Setup Env for Runtime
|
# Setup Env for Runtime
|
||||||
ENV TORCH_CUDA_ARCH_LIST=12.1a
|
ENV TORCH_CUDA_ARCH_LIST=12.1a
|
||||||
ENV TRITON_PTXAS_PATH=/usr/local/cuda/bin/ptxas
|
ENV TRITON_PTXAS_PATH=/usr/local/cuda/bin/ptxas
|
||||||
|
|||||||
@@ -116,8 +116,9 @@ For periodic maintenance, I recommend using a filter: `docker builder prune --fi
|
|||||||
|
|
||||||
### 2025-12-21
|
### 2025-12-21
|
||||||
|
|
||||||
Pre-built wheels now support release versions. Use with `--use-wheels release`.
|
- Added `--pre-tf` / `--pre-transformers` flag to `build-and-copy.sh` to install pre-release transformers (5.0.0rc or higher). Use it if you need to run GLM 4.6 Air or any other model that requires transformers 5.0. It may cause issues with other models, so you may want to stick to the release version for everything else.
|
||||||
Using nightly wheels or building from source is recommended for better performance.
|
- Pre-built wheels now support release versions. Use with `--use-wheels release`.
|
||||||
|
- Using nightly wheels or building from source is recommended for better performance.
|
||||||
|
|
||||||
### 2025-12-20
|
### 2025-12-20
|
||||||
|
|
||||||
@@ -194,6 +195,7 @@ Using a provided build script is recommended, but if you want to build using `do
|
|||||||
| `VLLM_REF` | `main` | vLLM commit SHA, branch, or tag to build. |
|
| `VLLM_REF` | `main` | vLLM commit SHA, branch, or tag to build. |
|
||||||
| `BUILD_JOBS` | `16` | Number of parallel build jobs (default: 16). |
|
| `BUILD_JOBS` | `16` | Number of parallel build jobs (default: 16). |
|
||||||
| `FLASHINFER_PRE` | `""` | Set to `--pre` to use pre-release versions of FlashInfer. |
|
| `FLASHINFER_PRE` | `""` | Set to `--pre` to use pre-release versions of FlashInfer. |
|
||||||
|
| `PRE_TRANSFORMERS` | `0` | Set to `1` to install pre-release transformers (5.0.0rc or higher). |
|
||||||
|
|
||||||
### Building Manually using Wheels
|
### Building Manually using Wheels
|
||||||
|
|
||||||
@@ -211,6 +213,7 @@ Supported build arguments for `Dockerfile.wheels`:
|
|||||||
| `CACHEBUST_VLLM` | `1` | Change this to force a re-download of vLLM wheels. |
|
| `CACHEBUST_VLLM` | `1` | Change this to force a re-download of vLLM wheels. |
|
||||||
| `WHEELS_FROM_GITHUB_RELEASE` | `0` | Set to `1` to use GitHub release wheels instead of nightly wheels. |
|
| `WHEELS_FROM_GITHUB_RELEASE` | `0` | Set to `1` to use GitHub release wheels instead of nightly wheels. |
|
||||||
| `FLASHINFER_PRE` | `""` | Set to `--pre` to use pre-release versions of FlashInfer. |
|
| `FLASHINFER_PRE` | `""` | Set to `--pre` to use pre-release versions of FlashInfer. |
|
||||||
|
| `PRE_TRANSFORMERS` | `0` | Set to `1` to install pre-release transformers (5.0.0rc or higher). |
|
||||||
|
|
||||||
### Using the Build Script (Recommended)
|
### Using the Build Script (Recommended)
|
||||||
|
|
||||||
@@ -301,6 +304,7 @@ Using a different username:
|
|||||||
| `--rebuild-vllm` | Force rebuild vLLM source only (sets CACHEBUST_VLLM) |
|
| `--rebuild-vllm` | Force rebuild vLLM source only (sets CACHEBUST_VLLM) |
|
||||||
| `--triton-ref <ref>` | Triton commit SHA, branch or tag (default: 'v3.5.1') |
|
| `--triton-ref <ref>` | Triton commit SHA, branch or tag (default: 'v3.5.1') |
|
||||||
| `--vllm-ref <ref>` | vLLM commit SHA, branch or tag (default: 'main') |
|
| `--vllm-ref <ref>` | vLLM commit SHA, branch or tag (default: 'main') |
|
||||||
|
| `--pre-tf` | Install pre-release transformers (5.0.0rc or higher). Alias: `--pre-transformers`. |
|
||||||
| `--use-wheels [mode]` | Use pre-built vLLM wheels. Mode: `nightly` (default) or `release`. |
|
| `--use-wheels [mode]` | Use pre-built vLLM wheels. Mode: `nightly` (default) or `release`. |
|
||||||
| `--pre-flashinfer` | Use pre-release versions of FlashInfer. |
|
| `--pre-flashinfer` | Use pre-release versions of FlashInfer. |
|
||||||
| `-c, --copy-to <host[,host...] or host host...>` | Host(s) to copy the image to after building (space- or comma-separated list after the flag). |
|
| `-c, --copy-to <host[,host...] or host host...>` | Host(s) to copy the image to after building (space- or comma-separated list after the flag). |
|
||||||
|
|||||||
@@ -17,6 +17,7 @@ TMP_IMAGE=""
|
|||||||
PARALLEL_COPY=false
|
PARALLEL_COPY=false
|
||||||
USE_WHEELS_MODE=""
|
USE_WHEELS_MODE=""
|
||||||
PRE_FLASHINFER=false
|
PRE_FLASHINFER=false
|
||||||
|
PRE_TRANSFORMERS=false
|
||||||
|
|
||||||
cleanup() {
|
cleanup() {
|
||||||
if [ -n "$TMP_IMAGE" ] && [ -f "$TMP_IMAGE" ]; then
|
if [ -n "$TMP_IMAGE" ] && [ -f "$TMP_IMAGE" ]; then
|
||||||
@@ -71,6 +72,7 @@ usage() {
|
|||||||
echo " -u, --user <user> : Username for ssh command (default: \$USER)"
|
echo " -u, --user <user> : Username for ssh command (default: \$USER)"
|
||||||
echo " --use-wheels [mode] : Use prebuilt vLLM wheels. Mode can be 'nightly' (default) or 'release'."
|
echo " --use-wheels [mode] : Use prebuilt vLLM wheels. Mode can be 'nightly' (default) or 'release'."
|
||||||
echo " --pre-flashinfer : Use pre-release versions of FlashInfer"
|
echo " --pre-flashinfer : Use pre-release versions of FlashInfer"
|
||||||
|
echo " --pre-tf, --pre-transformers : Install transformers 5.0.0rc0 or higher"
|
||||||
echo " --no-build : Skip building, only copy image (requires --copy-to)"
|
echo " --no-build : Skip building, only copy image (requires --copy-to)"
|
||||||
echo " -h, --help : Show this help message"
|
echo " -h, --help : Show this help message"
|
||||||
exit 1
|
exit 1
|
||||||
@@ -132,6 +134,7 @@ while [[ "$#" -gt 0 ]]; do
|
|||||||
fi
|
fi
|
||||||
;;
|
;;
|
||||||
--pre-flashinfer) PRE_FLASHINFER=true ;;
|
--pre-flashinfer) PRE_FLASHINFER=true ;;
|
||||||
|
--pre-tf|--pre-transformers) PRE_TRANSFORMERS=true ;;
|
||||||
--no-build) NO_BUILD=true ;;
|
--no-build) NO_BUILD=true ;;
|
||||||
-h|--help) usage ;;
|
-h|--help) usage ;;
|
||||||
*) echo "Unknown parameter passed: $1"; usage ;;
|
*) echo "Unknown parameter passed: $1"; usage ;;
|
||||||
@@ -185,6 +188,11 @@ if [ "$NO_BUILD" = false ]; then
|
|||||||
CMD+=("--build-arg" "FLASHINFER_PRE=--pre")
|
CMD+=("--build-arg" "FLASHINFER_PRE=--pre")
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
if [ "$PRE_TRANSFORMERS" = true ]; then
|
||||||
|
echo "Using transformers>=5.0.0..."
|
||||||
|
CMD+=("--build-arg" "PRE_TRANSFORMERS=1")
|
||||||
|
fi
|
||||||
|
|
||||||
# Add build context
|
# Add build context
|
||||||
CMD+=(".")
|
CMD+=(".")
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user