diff --git a/recipes/gemma4-26b-a4b.yaml b/recipes/gemma4-26b-a4b.yaml index 6135ba8..681cdf8 100644 --- a/recipes/gemma4-26b-a4b.yaml +++ b/recipes/gemma4-26b-a4b.yaml @@ -41,7 +41,7 @@ command: | --gpu-memory-utilization {gpu_memory_utilization} \ --port {port} \ --host {host} \ - --load-format fastsafetensors \ + --load-format instanttensor \ --enable-prefix-caching \ --enable-auto-tool-choice \ --tool-call-parser gemma4 \ diff --git a/recipes/glm-4.7-flash-awq.yaml b/recipes/glm-4.7-flash-awq.yaml index 361e665..0e7bca5 100644 --- a/recipes/glm-4.7-flash-awq.yaml +++ b/recipes/glm-4.7-flash-awq.yaml @@ -30,8 +30,8 @@ build_args: # Mods to apply before running (paths relative to repo root) # This mod prevents severe inference speed degradation -mods: - - mods/fix-glm-4.7-flash-AWQ +# mods: +# - mods/fix-glm-4.7-flash-AWQ # Default settings (can be overridden via CLI) defaults: