This commit is contained in:
Eugene Rakhmatulin
2026-04-25 18:39:46 -07:00
parent ef9b0e50f4
commit 43a00ed90f
2 changed files with 3 additions and 3 deletions

View File

@@ -41,7 +41,7 @@ command: |
--gpu-memory-utilization {gpu_memory_utilization} \ --gpu-memory-utilization {gpu_memory_utilization} \
--port {port} \ --port {port} \
--host {host} \ --host {host} \
--load-format fastsafetensors \ --load-format instanttensor \
--enable-prefix-caching \ --enable-prefix-caching \
--enable-auto-tool-choice \ --enable-auto-tool-choice \
--tool-call-parser gemma4 \ --tool-call-parser gemma4 \

View File

@@ -30,8 +30,8 @@ build_args:
# Mods to apply before running (paths relative to repo root) # Mods to apply before running (paths relative to repo root)
# This mod prevents severe inference speed degradation # This mod prevents severe inference speed degradation
mods: # mods:
- mods/fix-glm-4.7-flash-AWQ # - mods/fix-glm-4.7-flash-AWQ
# Default settings (can be overridden via CLI) # Default settings (can be overridden via CLI)
defaults: defaults: