diff --git a/recipes/gemma4-26b-a4b.yaml b/recipes/gemma4-26b-a4b.yaml index 681cdf8..7d2c359 100644 --- a/recipes/gemma4-26b-a4b.yaml +++ b/recipes/gemma4-26b-a4b.yaml @@ -20,7 +20,7 @@ build_args: # Mods mods: - - mods/fix-gemma4-tool-parser + # - mods/fix-gemma4-tool-parser # Default settings (can be overridden via CLI) defaults: @@ -41,7 +41,7 @@ command: | --gpu-memory-utilization {gpu_memory_utilization} \ --port {port} \ --host {host} \ - --load-format instanttensor \ + --load-format safetensors \ --enable-prefix-caching \ --enable-auto-tool-choice \ --tool-call-parser gemma4 \ diff --git a/recipes/qwen3.5-122b-int4-autoround.yaml b/recipes/qwen3.5-122b-int4-autoround.yaml index 03e65be..92c77b1 100644 --- a/recipes/qwen3.5-122b-int4-autoround.yaml +++ b/recipes/qwen3.5-122b-int4-autoround.yaml @@ -18,7 +18,7 @@ build_args: # Mod required to fix ROPE syntax error mods: - - mods/fix-qwen3.5-autoround + # - mods/fix-qwen3.5-autoround - mods/fix-qwen3.5-chat-template # Default settings (can be overridden via CLI) @@ -44,7 +44,7 @@ command: | --load-format fastsafetensors \ --enable-prefix-caching \ --enable-auto-tool-choice \ - --tool-call-parser qwen3_coder \ + --tool-call-parser qwen3_xml \ --reasoning-parser qwen3 \ --max-num-batched-tokens {max_num_batched_tokens} \ --trust-remote-code \ diff --git a/recipes/qwen3.5-397b-int4-autoround.yaml b/recipes/qwen3.5-397b-int4-autoround.yaml index 884c453..676a54d 100644 --- a/recipes/qwen3.5-397b-int4-autoround.yaml +++ b/recipes/qwen3.5-397b-int4-autoround.yaml @@ -20,7 +20,7 @@ build_args: # Mod required to fix ROPE syntax error mods: - - mods/fix-qwen3.5-autoround + # - mods/fix-qwen3.5-autoround - mods/fix-qwen3.5-chat-template - mods/gpu-mem-util-gb - mods/drop-caches @@ -50,7 +50,7 @@ command: | --host {host} \ --enable-prefix-caching \ --enable-auto-tool-choice \ - --tool-call-parser qwen3_coder \ + --tool-call-parser qwen3_xml \ --reasoning-parser qwen3 \ --max-num-batched-tokens {max_num_batched_tokens} \ --trust-remote-code \