diff --git a/README.md b/README.md
index 0cdc99e..bfa3dd5 100644
--- a/README.md
+++ b/README.md
@@ -135,6 +135,22 @@ For periodic maintenance, I recommend using a filter: `docker builder prune --fi
 
 ## CHANGELOG
 
+### 2026-04-02
+
+A new recipe for Gemma4-26B-A4B in "on-the-fly" FP8 quantization:
+
+Single Spark:
+
+```bash
+./run-recipe.sh gemma4-26b-a4b --solo
+```
+
+Dual Sparks: 
+
+```bash
+./run-recipe.sh gemma4-26b-a4b --no-ray
+```
+
 ### 2026-03-31
 
 #### Flags to specify Flashinfer ref and apply PRs
diff --git a/recipes/gemma4-26b-a4b.yaml b/recipes/gemma4-26b-a4b.yaml
new file mode 100644
index 0000000..26e5094
--- /dev/null
+++ b/recipes/gemma4-26b-a4b.yaml
@@ -0,0 +1,49 @@
+# Recipe: Gemma4-26B-A4B
+# Gemma4-26B-A4B model in online FP8 quantization
+
+recipe_version: "1"
+name: Gemma4-26B-A4B
+description: vLLM serving Gemma4-26B-A4B
+
+# HuggingFace model to download (optional, for --download-model)
+model: google/gemma-4-26B-A4B-it
+
+# Only cluster is supported
+cluster_only: false
+solo_only: false
+
+# Container image to use
+container: vllm-node-tf5
+
+# No mods required
+mods: []
+
+# Default settings (can be overridden via CLI)
+defaults:
+  port: 8000
+  host: 0.0.0.0
+  tensor_parallel: 2
+  gpu_memory_utilization: 0.7
+  max_model_len: 262144
+  max_num_batched_tokens: 8192
+
+# Environment variables
+env: {}
+
+# The vLLM serve command template
+command: |
+  vllm serve google/gemma-4-26B-A4B-it  \
+    --max-model-len {max_model_len} \
+    --gpu-memory-utilization {gpu_memory_utilization} \
+    --port {port} \
+    --host {host} \
+    --load-format fastsafetensors \
+    --enable-prefix-caching \
+    --enable-auto-tool-choice \
+    --tool-call-parser gemma4 \
+    --reasoning-parser gemma4 \
+    --quantization fp8 \
+    --kv-cache-dtype fp8 \
+    --max-num-batched-tokens {max_num_batched_tokens} \
+    -tp {tensor_parallel} --distributed-executor-backend ray
+