Re-enable flashinfer_cutlass

This commit is contained in:
Eugene Rakhmatulin
2026-04-16 16:40:56 -07:00
parent 6b7f8dace6
commit d49fac1b8b
2 changed files with 8 additions and 17 deletions

View File

@@ -221,25 +221,15 @@ RUN curl -fsL https://patch-diff.githubusercontent.com/raw/vllm-project/vllm/pul
fi \ fi \
&& rm pr35568.diff && rm pr35568.diff
# TEMPORARY PATCH for broken compilation - https://github.com/vllm-project/vllm/pull/38919 # TEMPORARY PATCH to re-enable Flashinfer 0.6.8 - https://github.com/vllm-project/vllm/pull/39959
RUN curl -fsL https://patch-diff.githubusercontent.com/raw/vllm-project/vllm/pull/38919.diff -o pr38919.diff \ RUN curl -fsL https://patch-diff.githubusercontent.com/raw/vllm-project/vllm/pull/39959.diff -o pr39959.diff \
&& if git apply --reverse --check pr38919.diff 2>/dev/null; then \ && if git apply --reverse --check pr39959.diff 2>/dev/null; then \
echo "PR 38919 already applied, skipping."; \ echo "PR 39959 already applied, skipping."; \
else \ else \
echo "Applying PR 38919..."; \ echo "Applying PR 39959..."; \
git apply -v pr38919.diff; \ git apply -v pr39959.diff; \
fi \ fi \
&& rm pr38919.diff && rm pr39959.diff
# TEMPORARY PATCH for broken MiniMax M2.5 parser - https://github.com/vllm-project/vllm/pull/39861
RUN curl -fsL https://patch-diff.githubusercontent.com/raw/vllm-project/vllm/pull/39861.diff -o pr39861.diff \
&& if git apply --reverse --check pr39861.diff 2>/dev/null; then \
echo "PR 39861 already applied, skipping."; \
else \
echo "Applying PR 39861..."; \
git apply -v pr39861.diff; \
fi \
&& rm pr39861.diff
# Prepare build requirements # Prepare build requirements
RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \ RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \

View File

@@ -55,6 +55,7 @@ command: |
--max-num-batched-tokens {max_num_batched_tokens} \ --max-num-batched-tokens {max_num_batched_tokens} \
--trust-remote-code \ --trust-remote-code \
--chat-template unsloth.jinja \ --chat-template unsloth.jinja \
--load-format instanttensor \
-tp {tensor_parallel} \ -tp {tensor_parallel} \
--distributed-executor-backend ray --distributed-executor-backend ray