From 43a00ed90f736e0137757df5b2a4c9fed218cb5e Mon Sep 17 00:00:00 2001 From: Eugene Rakhmatulin Date: Sat, 25 Apr 2026 18:39:46 -0700 Subject: [PATCH] Fixed #205 --- recipes/gemma4-26b-a4b.yaml | 2 +- recipes/glm-4.7-flash-awq.yaml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/recipes/gemma4-26b-a4b.yaml b/recipes/gemma4-26b-a4b.yaml index 6135ba8..681cdf8 100644 --- a/recipes/gemma4-26b-a4b.yaml +++ b/recipes/gemma4-26b-a4b.yaml @@ -41,7 +41,7 @@ command: | --gpu-memory-utilization {gpu_memory_utilization} \ --port {port} \ --host {host} \ - --load-format fastsafetensors \ + --load-format instanttensor \ --enable-prefix-caching \ --enable-auto-tool-choice \ --tool-call-parser gemma4 \ diff --git a/recipes/glm-4.7-flash-awq.yaml b/recipes/glm-4.7-flash-awq.yaml index 361e665..0e7bca5 100644 --- a/recipes/glm-4.7-flash-awq.yaml +++ b/recipes/glm-4.7-flash-awq.yaml @@ -30,8 +30,8 @@ build_args: # Mods to apply before running (paths relative to repo root) # This mod prevents severe inference speed degradation -mods: - - mods/fix-glm-4.7-flash-AWQ +# mods: +# - mods/fix-glm-4.7-flash-AWQ # Default settings (can be overridden via CLI) defaults: