diff --git a/Dockerfile b/Dockerfile
index ef2b5ce..22f1ed8 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -221,25 +221,15 @@ RUN curl -fsL https://patch-diff.githubusercontent.com/raw/vllm-project/vllm/pul
        fi \
     && rm pr35568.diff
 
-# TEMPORARY PATCH for broken compilation - https://github.com/vllm-project/vllm/pull/38919
-RUN curl -fsL https://patch-diff.githubusercontent.com/raw/vllm-project/vllm/pull/38919.diff -o pr38919.diff \
-    && if git apply --reverse --check pr38919.diff 2>/dev/null; then \
-         echo "PR 38919 already applied, skipping."; \
+# TEMPORARY PATCH to re-enable Flashinfer 0.6.8 - https://github.com/vllm-project/vllm/pull/39959
+RUN curl -fsL https://patch-diff.githubusercontent.com/raw/vllm-project/vllm/pull/39959.diff -o pr39959.diff \
+    && if git apply --reverse --check pr39959.diff 2>/dev/null; then \
+         echo "PR 39959 already applied, skipping."; \
        else \
-         echo "Applying PR 38919..."; \
-         git apply -v pr38919.diff; \
+         echo "Applying PR 39959..."; \
+         git apply -v pr39959.diff; \
        fi \
-    && rm pr38919.diff
-
-# TEMPORARY PATCH for broken MiniMax M2.5 parser - https://github.com/vllm-project/vllm/pull/39861
-RUN curl -fsL https://patch-diff.githubusercontent.com/raw/vllm-project/vllm/pull/39861.diff -o pr39861.diff \
-    && if git apply --reverse --check pr39861.diff 2>/dev/null; then \
-         echo "PR 39861 already applied, skipping."; \
-       else \
-         echo "Applying PR 39861..."; \
-         git apply -v pr39861.diff; \
-       fi \
-    && rm pr39861.diff
+    && rm pr39959.diff
 
 # Prepare build requirements
 RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \
diff --git a/recipes/qwen3.5-397b-int4-autoround.yaml b/recipes/qwen3.5-397b-int4-autoround.yaml
index 750accd..884c453 100644
--- a/recipes/qwen3.5-397b-int4-autoround.yaml
+++ b/recipes/qwen3.5-397b-int4-autoround.yaml
@@ -55,6 +55,7 @@ command: |
     --max-num-batched-tokens {max_num_batched_tokens} \
     --trust-remote-code \
     --chat-template unsloth.jinja \
+    --load-format instanttensor \
     -tp {tensor_parallel} \
     --distributed-executor-backend ray