Fixed #205

2026-04-25 18:39:46 -07:00
parent ef9b0e50f4
commit 43a00ed90f
2 changed files with 3 additions and 3 deletions
--- a/recipes/gemma4-26b-a4b.yaml
+++ b/recipes/gemma4-26b-a4b.yaml
@@ -41,7 +41,7 @@ command: |
    --gpu-memory-utilization {gpu_memory_utilization} \
    --port {port} \
    --host {host} \
-    --load-format fastsafetensors \
+    --load-format instanttensor \
    --enable-prefix-caching \
    --enable-auto-tool-choice \
    --tool-call-parser gemma4 \
--- a/recipes/glm-4.7-flash-awq.yaml
+++ b/recipes/glm-4.7-flash-awq.yaml
@@ -30,8 +30,8 @@ build_args:

 # Mods to apply before running (paths relative to repo root)
 # This mod prevents severe inference speed degradation
-mods:
-  - mods/fix-glm-4.7-flash-AWQ
+# mods:
+#   - mods/fix-glm-4.7-flash-AWQ

 # Default settings (can be overridden via CLI)
 defaults: