diff --git a/Dockerfile b/Dockerfile
index 4b22f61..66921b0 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -14,6 +14,8 @@ ENV MAX_JOBS=${BUILD_JOBS}
 ENV CMAKE_BUILD_PARALLEL_LEVEL=${BUILD_JOBS}
 ENV NINJAFLAGS="-j${BUILD_JOBS}"
 ENV MAKEFLAGS="-j${BUILD_JOBS}"
+ENV DG_JIT_USE_NVRTC=1
+ENV USE_CUDNN=1
 
 # Set non-interactive frontend to prevent apt prompts
 ENV DEBIAN_FRONTEND=noninteractive
@@ -120,6 +122,16 @@ RUN if [ -n "$FLASHINFER_PRS" ]; then \
         done; \
     fi
 
+# TEMPORARY patch for NVFP4 crash (PR 2913)
+RUN curl -fsL https://github.com/flashinfer-ai/flashinfer/pull/38423.diff -o pr2913.diff \
+    && if git apply --reverse --check pr2913.diff 2>/dev/null; then \
+         echo "PR #2913 already applied, skipping."; \
+       else \
+         echo "Applying FI PR #2913..."; \
+         git apply -v pr2913.diff; \
+       fi \
+    && rm pr2913.diff
+
 # Apply patch to avoid re-downloading existing cubins
 COPY flashinfer_cache.patch .
 RUN --mount=type=cache,id=uv-cache,target=/root/.cache/uv \
@@ -247,6 +259,8 @@ ENV MAX_JOBS=${BUILD_JOBS}
 ENV CMAKE_BUILD_PARALLEL_LEVEL=${BUILD_JOBS}
 ENV NINJAFLAGS="-j${BUILD_JOBS}"
 ENV MAKEFLAGS="-j${BUILD_JOBS}"
+ENV DG_JIT_USE_NVRTC=1
+ENV USE_CUDNN=1
 
 ENV DEBIAN_FRONTEND=noninteractive
 ENV PIP_BREAK_SYSTEM_PACKAGES=1
diff --git a/README.md b/README.md
index 125f0b6..9d45eb1 100644
--- a/README.md
+++ b/README.md
@@ -69,7 +69,7 @@ An initial build speed depends on your Internet connection speed and whether the
 
 **On a single node**:
 
-**NEW** - `launch-cluster.sh` now supports solo mode, which is now a recommended way to run the container on a single Spark:
+`launch-cluster.sh` supports solo mode, which is now a recommended way to run the container on a single Spark:
 
 ```bash
 ./launch-cluster.sh --solo exec \
@@ -80,23 +80,6 @@ An initial build speed depends on your Internet connection speed and whether the
     --load-format fastsafetensors
 ```
 
-**To launch using regular `docker run`**
-
-```bash
- docker run \
-  --privileged \
-  --gpus all \
-  -it --rm \
-  --network host --ipc=host \
-  -v  ~/.cache/huggingface:/root/.cache/huggingface \
-  vllm-node \
-  bash -c -i "vllm serve \
-  QuantTrio/Qwen3-VL-30B-A3B-Instruct-AWQ \
-  --port 8000 --host 0.0.0.0 \
-  --gpu-memory-utilization 0.7 \
-  --load-format fastsafetensors"
-```
-
 **On a cluster**
 
 It's recommended to download the model on one node and distribute across the cluster using ConnectX interconnect prior to launching. This is to avoid re-downloading the model from the Internet on every node in the cluster.
@@ -151,7 +134,7 @@ For periodic maintenance, I recommend using a filter: `docker builder prune --fi
 
 ## CHANGELOG
 
-### 2026-03-29
+### 2026-03-30
 
 #### Flags to specify Flashinfer ref and apply PRs
 
@@ -162,8 +145,6 @@ For periodic maintenance, I recommend using a filter: `docker builder prune --fi
 
 Both flags are incompatible with `--exp-mxfp4`.
 
-### 2026-03-27
-
 #### Default image tag in `build-and-copy.sh`
 
 `build-and-copy.sh` now automatically sets a sensible default image tag when `-t` is not specified: