From 29d5904b8029fa0714742783f6b5fb354baf636b Mon Sep 17 00:00:00 2001 From: Eugene Rakhmatulin Date: Fri, 8 May 2026 12:56:28 -0700 Subject: [PATCH] Fix performance regression --- Dockerfile | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/Dockerfile b/Dockerfile index 12d850e..f723afc 100644 --- a/Dockerfile +++ b/Dockerfile @@ -231,6 +231,22 @@ RUN curl -fsL https://patch-diff.githubusercontent.com/raw/vllm-project/vllm/pul fi \ && rm pr35568.diff +# TEMPORARY PATCH: revert vLLM PR #41524 / commit c51df430, +# which disables FlashInfer autotune and regresses DGX Spark throughput. +RUN set -eux; \ + patch_commit="c51df43005726a09c6eb7348e8c1b00501c70a8e"; \ + target="vllm/config/vllm.py"; \ + if grep -q '"enable_flashinfer_autotune": False' "$target"; then \ + echo "FlashInfer autotune default is disabled; reverting ${patch_commit}"; \ + git revert --no-commit "$patch_commit"; \ + if grep -q '"enable_flashinfer_autotune": False' "$target"; then \ + echo "ERROR: revert applied but FlashInfer autotune still appears disabled"; \ + exit 1; \ + fi; \ + else \ + echo "FlashInfer autotune regression not present; skipping revert"; \ + fi + # Prepare build requirements RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \ python3 use_existing_torch.py && \