Fixed #205
This commit is contained in:
@@ -41,7 +41,7 @@ command: |
|
|||||||
--gpu-memory-utilization {gpu_memory_utilization} \
|
--gpu-memory-utilization {gpu_memory_utilization} \
|
||||||
--port {port} \
|
--port {port} \
|
||||||
--host {host} \
|
--host {host} \
|
||||||
--load-format fastsafetensors \
|
--load-format instanttensor \
|
||||||
--enable-prefix-caching \
|
--enable-prefix-caching \
|
||||||
--enable-auto-tool-choice \
|
--enable-auto-tool-choice \
|
||||||
--tool-call-parser gemma4 \
|
--tool-call-parser gemma4 \
|
||||||
|
|||||||
@@ -30,8 +30,8 @@ build_args:
|
|||||||
|
|
||||||
# Mods to apply before running (paths relative to repo root)
|
# Mods to apply before running (paths relative to repo root)
|
||||||
# This mod prevents severe inference speed degradation
|
# This mod prevents severe inference speed degradation
|
||||||
mods:
|
# mods:
|
||||||
- mods/fix-glm-4.7-flash-AWQ
|
# - mods/fix-glm-4.7-flash-AWQ
|
||||||
|
|
||||||
# Default settings (can be overridden via CLI)
|
# Default settings (can be overridden via CLI)
|
||||||
defaults:
|
defaults:
|
||||||
|
|||||||
Reference in New Issue
Block a user