@@ -20,7 +20,7 @@ build_args:
|
|||||||
|
|
||||||
# Mods
|
# Mods
|
||||||
mods:
|
mods:
|
||||||
- mods/fix-gemma4-tool-parser
|
# - mods/fix-gemma4-tool-parser
|
||||||
|
|
||||||
# Default settings (can be overridden via CLI)
|
# Default settings (can be overridden via CLI)
|
||||||
defaults:
|
defaults:
|
||||||
@@ -41,7 +41,7 @@ command: |
|
|||||||
--gpu-memory-utilization {gpu_memory_utilization} \
|
--gpu-memory-utilization {gpu_memory_utilization} \
|
||||||
--port {port} \
|
--port {port} \
|
||||||
--host {host} \
|
--host {host} \
|
||||||
--load-format instanttensor \
|
--load-format safetensors \
|
||||||
--enable-prefix-caching \
|
--enable-prefix-caching \
|
||||||
--enable-auto-tool-choice \
|
--enable-auto-tool-choice \
|
||||||
--tool-call-parser gemma4 \
|
--tool-call-parser gemma4 \
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ build_args:
|
|||||||
|
|
||||||
# Mod required to fix ROPE syntax error
|
# Mod required to fix ROPE syntax error
|
||||||
mods:
|
mods:
|
||||||
- mods/fix-qwen3.5-autoround
|
# - mods/fix-qwen3.5-autoround
|
||||||
- mods/fix-qwen3.5-chat-template
|
- mods/fix-qwen3.5-chat-template
|
||||||
|
|
||||||
# Default settings (can be overridden via CLI)
|
# Default settings (can be overridden via CLI)
|
||||||
@@ -44,7 +44,7 @@ command: |
|
|||||||
--load-format fastsafetensors \
|
--load-format fastsafetensors \
|
||||||
--enable-prefix-caching \
|
--enable-prefix-caching \
|
||||||
--enable-auto-tool-choice \
|
--enable-auto-tool-choice \
|
||||||
--tool-call-parser qwen3_coder \
|
--tool-call-parser qwen3_xml \
|
||||||
--reasoning-parser qwen3 \
|
--reasoning-parser qwen3 \
|
||||||
--max-num-batched-tokens {max_num_batched_tokens} \
|
--max-num-batched-tokens {max_num_batched_tokens} \
|
||||||
--trust-remote-code \
|
--trust-remote-code \
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ build_args:
|
|||||||
|
|
||||||
# Mod required to fix ROPE syntax error
|
# Mod required to fix ROPE syntax error
|
||||||
mods:
|
mods:
|
||||||
- mods/fix-qwen3.5-autoround
|
# - mods/fix-qwen3.5-autoround
|
||||||
- mods/fix-qwen3.5-chat-template
|
- mods/fix-qwen3.5-chat-template
|
||||||
- mods/gpu-mem-util-gb
|
- mods/gpu-mem-util-gb
|
||||||
- mods/drop-caches
|
- mods/drop-caches
|
||||||
@@ -50,7 +50,7 @@ command: |
|
|||||||
--host {host} \
|
--host {host} \
|
||||||
--enable-prefix-caching \
|
--enable-prefix-caching \
|
||||||
--enable-auto-tool-choice \
|
--enable-auto-tool-choice \
|
||||||
--tool-call-parser qwen3_coder \
|
--tool-call-parser qwen3_xml \
|
||||||
--reasoning-parser qwen3 \
|
--reasoning-parser qwen3 \
|
||||||
--max-num-batched-tokens {max_num_batched_tokens} \
|
--max-num-batched-tokens {max_num_batched_tokens} \
|
||||||
--trust-remote-code \
|
--trust-remote-code \
|
||||||
|
|||||||
Reference in New Issue
Block a user