Reverted gemma4 to safetensors. Fixes #214 and #217.

This commit is contained in:
Eugene Rakhmatulin
2026-04-29 10:56:40 -07:00
parent e3243bf555
commit 87cb9f6e1e
3 changed files with 6 additions and 6 deletions

View File

@@ -20,7 +20,7 @@ build_args:
# Mods
mods:
- mods/fix-gemma4-tool-parser
# - mods/fix-gemma4-tool-parser
# Default settings (can be overridden via CLI)
defaults:
@@ -41,7 +41,7 @@ command: |
--gpu-memory-utilization {gpu_memory_utilization} \
--port {port} \
--host {host} \
--load-format instanttensor \
--load-format safetensors \
--enable-prefix-caching \
--enable-auto-tool-choice \
--tool-call-parser gemma4 \

View File

@@ -18,7 +18,7 @@ build_args:
# Mod required to fix ROPE syntax error
mods:
- mods/fix-qwen3.5-autoround
# - mods/fix-qwen3.5-autoround
- mods/fix-qwen3.5-chat-template
# Default settings (can be overridden via CLI)
@@ -44,7 +44,7 @@ command: |
--load-format fastsafetensors \
--enable-prefix-caching \
--enable-auto-tool-choice \
--tool-call-parser qwen3_coder \
--tool-call-parser qwen3_xml \
--reasoning-parser qwen3 \
--max-num-batched-tokens {max_num_batched_tokens} \
--trust-remote-code \

View File

@@ -20,7 +20,7 @@ build_args:
# Mod required to fix ROPE syntax error
mods:
- mods/fix-qwen3.5-autoround
# - mods/fix-qwen3.5-autoround
- mods/fix-qwen3.5-chat-template
- mods/gpu-mem-util-gb
- mods/drop-caches
@@ -50,7 +50,7 @@ command: |
--host {host} \
--enable-prefix-caching \
--enable-auto-tool-choice \
--tool-call-parser qwen3_coder \
--tool-call-parser qwen3_xml \
--reasoning-parser qwen3 \
--max-num-batched-tokens {max_num_batched_tokens} \
--trust-remote-code \