diff --git a/Dockerfile b/Dockerfile index 2980c2b..36708b1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -190,6 +190,16 @@ RUN --mount=type=cache,id=repo-cache,target=/repo-cache \ WORKDIR $VLLM_BASE_DIR/vllm +ARG VLLM_PRS="" + +RUN if [ -n "$VLLM_PRS" ]; then \ + echo "Applying PRs: $VLLM_PRS"; \ + for pr in $VLLM_PRS; do \ + echo "Fetching and applying PR #$pr..."; \ + curl -fL "https://github.com/vllm-project/vllm/pull/${pr}.diff" | git apply -v; \ + done; \ + fi + ARG PRE_TRANSFORMERS=0 # Prepare build requirements diff --git a/README.md b/README.md index 0548cfd..8d82e78 100644 --- a/README.md +++ b/README.md @@ -66,6 +66,7 @@ This error happens if vLLM nightly build fails for aarch64 platform, but succeed **On DGX Spark cluster:** Make sure you connect your Sparks together and enable passwordless SSH as described in NVidia's [Connect Two Sparks Playbook](https://build.nvidia.com/spark/connect-two-sparks/stacked-sparks). +You can also check out our new [Networking Guide](docs/NETWORKING.md). Then run the following command that will build and distribute image across the cluster. diff --git a/build-and-copy.sh b/build-and-copy.sh index 0b45eb2..bff9105 100755 --- a/build-and-copy.sh +++ b/build-and-copy.sh @@ -21,6 +21,7 @@ PRE_TRANSFORMERS=false EXP_MXFP4=false TRITON_REF_SET=false VLLM_REF_SET=false +VLLM_PRS="" cleanup() { if [ -n "$TMP_IMAGE" ] && [ -f "$TMP_IMAGE" ]; then @@ -77,6 +78,7 @@ usage() { echo " --pre-flashinfer : Use pre-release versions of FlashInfer" echo " --pre-tf, --pre-transformers : Install transformers 5.0.0rc0 or higher" echo " --exp-mxfp4, --experimental-mxfp4 : Build with experimental native MXFP4 support" + echo " --apply-vllm-pr : Apply a specific PR patch to vLLM source code. Can be specified multiple times." echo " --no-build : Skip building, only copy image (requires --copy-to)" echo " -h, --help : Show this help message" exit 1 @@ -140,8 +142,26 @@ while [[ "$#" -gt 0 ]]; do --pre-flashinfer) PRE_FLASHINFER=true ;; --pre-tf|--pre-transformers) PRE_TRANSFORMERS=true ;; --exp-mxfp4|--experimental-mxfp4) EXP_MXFP4=true ;; + --apply-vllm-pr) + if [ -n "$2" ] && [[ "$2" != -* ]]; then + if [ -n "$VLLM_PRS" ]; then + VLLM_PRS="$VLLM_PRS $2" + else + VLLM_PRS="$2" + fi + shift + else + echo "Error: --apply-vllm-pr requires a PR number." + exit 1 + fi + ;; --no-build) NO_BUILD=true ;; -h|--help) usage ;; +if [ -n "$VLLM_PRS" ]; then + if [ "$EXP_MXFP4" = true ]; then echo "Error: --apply-vllm-pr is incompatible with --exp-mxfp4"; exit 1; fi + if [ -n "$USE_WHEELS_MODE" ]; then echo "Error: --apply-vllm-pr is incompatible with --use-wheels"; exit 1; fi +fi + *) echo "Unknown parameter passed: $1"; usage ;; esac shift @@ -204,6 +224,11 @@ if [ "$NO_BUILD" = false ]; then CMD+=("--build-arg" "FLASHINFER_PRE=--pre") fi + if [ -n "$VLLM_PRS" ]; then + echo "Applying vLLM PRs: $VLLM_PRS" + CMD+=("--build-arg" "VLLM_PRS=$VLLM_PRS") + fi + if [ "$PRE_TRANSFORMERS" = true ]; then echo "Using transformers>=5.0.0..." CMD+=("--build-arg" "PRE_TRANSFORMERS=1")