From 43a00ed90f736e0137757df5b2a4c9fed218cb5e Mon Sep 17 00:00:00 2001
From: Eugene Rakhmatulin <eugr@eugr.com>
Date: Sat, 25 Apr 2026 18:39:46 -0700
Subject: [PATCH] Fixed #205

---
 recipes/gemma4-26b-a4b.yaml    | 2 +-
 recipes/glm-4.7-flash-awq.yaml | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/recipes/gemma4-26b-a4b.yaml b/recipes/gemma4-26b-a4b.yaml
index 6135ba8..681cdf8 100644
--- a/recipes/gemma4-26b-a4b.yaml
+++ b/recipes/gemma4-26b-a4b.yaml
@@ -41,7 +41,7 @@ command: |
     --gpu-memory-utilization {gpu_memory_utilization} \
     --port {port} \
     --host {host} \
-    --load-format fastsafetensors \
+    --load-format instanttensor \
     --enable-prefix-caching \
     --enable-auto-tool-choice \
     --tool-call-parser gemma4 \
diff --git a/recipes/glm-4.7-flash-awq.yaml b/recipes/glm-4.7-flash-awq.yaml
index 361e665..0e7bca5 100644
--- a/recipes/glm-4.7-flash-awq.yaml
+++ b/recipes/glm-4.7-flash-awq.yaml
@@ -30,8 +30,8 @@ build_args:
 
 # Mods to apply before running (paths relative to repo root)
 # This mod prevents severe inference speed degradation
-mods:
-  - mods/fix-glm-4.7-flash-AWQ
+# mods:
+#   - mods/fix-glm-4.7-flash-AWQ
 
 # Default settings (can be overridden via CLI)
 defaults: