Reverted gemma4 to safetensors. Fixes #214 and #217.

This commit is contained in:
Eugene Rakhmatulin
2026-04-29 10:56:40 -07:00
parent e3243bf555
commit 87cb9f6e1e
3 changed files with 6 additions and 6 deletions

View File

@@ -20,7 +20,7 @@ build_args:
# Mods # Mods
mods: mods:
- mods/fix-gemma4-tool-parser # - mods/fix-gemma4-tool-parser
# Default settings (can be overridden via CLI) # Default settings (can be overridden via CLI)
defaults: defaults:
@@ -41,7 +41,7 @@ command: |
--gpu-memory-utilization {gpu_memory_utilization} \ --gpu-memory-utilization {gpu_memory_utilization} \
--port {port} \ --port {port} \
--host {host} \ --host {host} \
--load-format instanttensor \ --load-format safetensors \
--enable-prefix-caching \ --enable-prefix-caching \
--enable-auto-tool-choice \ --enable-auto-tool-choice \
--tool-call-parser gemma4 \ --tool-call-parser gemma4 \

View File

@@ -18,7 +18,7 @@ build_args:
# Mod required to fix ROPE syntax error # Mod required to fix ROPE syntax error
mods: mods:
- mods/fix-qwen3.5-autoround # - mods/fix-qwen3.5-autoround
- mods/fix-qwen3.5-chat-template - mods/fix-qwen3.5-chat-template
# Default settings (can be overridden via CLI) # Default settings (can be overridden via CLI)
@@ -44,7 +44,7 @@ command: |
--load-format fastsafetensors \ --load-format fastsafetensors \
--enable-prefix-caching \ --enable-prefix-caching \
--enable-auto-tool-choice \ --enable-auto-tool-choice \
--tool-call-parser qwen3_coder \ --tool-call-parser qwen3_xml \
--reasoning-parser qwen3 \ --reasoning-parser qwen3 \
--max-num-batched-tokens {max_num_batched_tokens} \ --max-num-batched-tokens {max_num_batched_tokens} \
--trust-remote-code \ --trust-remote-code \

View File

@@ -20,7 +20,7 @@ build_args:
# Mod required to fix ROPE syntax error # Mod required to fix ROPE syntax error
mods: mods:
- mods/fix-qwen3.5-autoround # - mods/fix-qwen3.5-autoround
- mods/fix-qwen3.5-chat-template - mods/fix-qwen3.5-chat-template
- mods/gpu-mem-util-gb - mods/gpu-mem-util-gb
- mods/drop-caches - mods/drop-caches
@@ -50,7 +50,7 @@ command: |
--host {host} \ --host {host} \
--enable-prefix-caching \ --enable-prefix-caching \
--enable-auto-tool-choice \ --enable-auto-tool-choice \
--tool-call-parser qwen3_coder \ --tool-call-parser qwen3_xml \
--reasoning-parser qwen3 \ --reasoning-parser qwen3 \
--max-num-batched-tokens {max_num_batched_tokens} \ --max-num-batched-tokens {max_num_batched_tokens} \
--trust-remote-code \ --trust-remote-code \