vLLM build-time PRs support
This commit is contained in:
10
Dockerfile
10
Dockerfile
@@ -163,6 +163,16 @@ RUN --mount=type=cache,id=repo-cache,target=/repo-cache \
|
|||||||
|
|
||||||
WORKDIR $VLLM_BASE_DIR/vllm
|
WORKDIR $VLLM_BASE_DIR/vllm
|
||||||
|
|
||||||
|
ARG VLLM_PRS=""
|
||||||
|
|
||||||
|
RUN if [ -n "$VLLM_PRS" ]; then \
|
||||||
|
echo "Applying PRs: $VLLM_PRS"; \
|
||||||
|
for pr in $VLLM_PRS; do \
|
||||||
|
echo "Fetching and applying PR #$pr..."; \
|
||||||
|
curl -fL "https://github.com/vllm-project/vllm/pull/${pr}.diff" | git apply -v; \
|
||||||
|
done; \
|
||||||
|
fi
|
||||||
|
|
||||||
ARG PRE_TRANSFORMERS=0
|
ARG PRE_TRANSFORMERS=0
|
||||||
|
|
||||||
# Prepare build requirements
|
# Prepare build requirements
|
||||||
|
|||||||
@@ -21,6 +21,7 @@ PRE_TRANSFORMERS=false
|
|||||||
EXP_MXFP4=false
|
EXP_MXFP4=false
|
||||||
TRITON_REF_SET=false
|
TRITON_REF_SET=false
|
||||||
VLLM_REF_SET=false
|
VLLM_REF_SET=false
|
||||||
|
VLLM_PRS=""
|
||||||
|
|
||||||
cleanup() {
|
cleanup() {
|
||||||
if [ -n "$TMP_IMAGE" ] && [ -f "$TMP_IMAGE" ]; then
|
if [ -n "$TMP_IMAGE" ] && [ -f "$TMP_IMAGE" ]; then
|
||||||
@@ -77,6 +78,7 @@ usage() {
|
|||||||
echo " --pre-flashinfer : Use pre-release versions of FlashInfer"
|
echo " --pre-flashinfer : Use pre-release versions of FlashInfer"
|
||||||
echo " --pre-tf, --pre-transformers : Install transformers 5.0.0rc0 or higher"
|
echo " --pre-tf, --pre-transformers : Install transformers 5.0.0rc0 or higher"
|
||||||
echo " --exp-mxfp4, --experimental-mxfp4 : Build with experimental native MXFP4 support"
|
echo " --exp-mxfp4, --experimental-mxfp4 : Build with experimental native MXFP4 support"
|
||||||
|
echo " --apply-vllm-pr <pr-num> : Apply a specific PR patch to vLLM source code. Can be specified multiple times."
|
||||||
echo " --no-build : Skip building, only copy image (requires --copy-to)"
|
echo " --no-build : Skip building, only copy image (requires --copy-to)"
|
||||||
echo " -h, --help : Show this help message"
|
echo " -h, --help : Show this help message"
|
||||||
exit 1
|
exit 1
|
||||||
@@ -140,8 +142,26 @@ while [[ "$#" -gt 0 ]]; do
|
|||||||
--pre-flashinfer) PRE_FLASHINFER=true ;;
|
--pre-flashinfer) PRE_FLASHINFER=true ;;
|
||||||
--pre-tf|--pre-transformers) PRE_TRANSFORMERS=true ;;
|
--pre-tf|--pre-transformers) PRE_TRANSFORMERS=true ;;
|
||||||
--exp-mxfp4|--experimental-mxfp4) EXP_MXFP4=true ;;
|
--exp-mxfp4|--experimental-mxfp4) EXP_MXFP4=true ;;
|
||||||
|
--apply-vllm-pr)
|
||||||
|
if [ -n "$2" ] && [[ "$2" != -* ]]; then
|
||||||
|
if [ -n "$VLLM_PRS" ]; then
|
||||||
|
VLLM_PRS="$VLLM_PRS $2"
|
||||||
|
else
|
||||||
|
VLLM_PRS="$2"
|
||||||
|
fi
|
||||||
|
shift
|
||||||
|
else
|
||||||
|
echo "Error: --apply-vllm-pr requires a PR number."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
;;
|
||||||
--no-build) NO_BUILD=true ;;
|
--no-build) NO_BUILD=true ;;
|
||||||
-h|--help) usage ;;
|
-h|--help) usage ;;
|
||||||
|
if [ -n "$VLLM_PRS" ]; then
|
||||||
|
if [ "$EXP_MXFP4" = true ]; then echo "Error: --apply-vllm-pr is incompatible with --exp-mxfp4"; exit 1; fi
|
||||||
|
if [ -n "$USE_WHEELS_MODE" ]; then echo "Error: --apply-vllm-pr is incompatible with --use-wheels"; exit 1; fi
|
||||||
|
fi
|
||||||
|
|
||||||
*) echo "Unknown parameter passed: $1"; usage ;;
|
*) echo "Unknown parameter passed: $1"; usage ;;
|
||||||
esac
|
esac
|
||||||
shift
|
shift
|
||||||
@@ -204,6 +224,11 @@ if [ "$NO_BUILD" = false ]; then
|
|||||||
CMD+=("--build-arg" "FLASHINFER_PRE=--pre")
|
CMD+=("--build-arg" "FLASHINFER_PRE=--pre")
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
if [ -n "$VLLM_PRS" ]; then
|
||||||
|
echo "Applying vLLM PRs: $VLLM_PRS"
|
||||||
|
CMD+=("--build-arg" "VLLM_PRS=$VLLM_PRS")
|
||||||
|
fi
|
||||||
|
|
||||||
if [ "$PRE_TRANSFORMERS" = true ]; then
|
if [ "$PRE_TRANSFORMERS" = true ]; then
|
||||||
echo "Using transformers>=5.0.0..."
|
echo "Using transformers>=5.0.0..."
|
||||||
CMD+=("--build-arg" "PRE_TRANSFORMERS=1")
|
CMD+=("--build-arg" "PRE_TRANSFORMERS=1")
|
||||||
|
|||||||
Reference in New Issue
Block a user