Unsloth chat template for qwen3.5
This commit is contained in:
@@ -15,7 +15,8 @@ cluster_only: true
|
||||
container: vllm-node
|
||||
|
||||
# No mods required
|
||||
mods: []
|
||||
mods:
|
||||
- mods/fix-qwen3.5-chat-template
|
||||
|
||||
# Default settings (can be overridden via CLI)
|
||||
defaults:
|
||||
@@ -41,5 +42,6 @@ command: |
|
||||
--enable-auto-tool-choice \
|
||||
--tool-call-parser qwen3_coder \
|
||||
--reasoning-parser qwen3 \
|
||||
--chat-template unsloth.jinja \
|
||||
-tp {tensor_parallel} --distributed-executor-backend ray \
|
||||
--max-num-batched-tokens {max_num_batched_tokens}
|
||||
|
||||
@@ -19,6 +19,7 @@ build_args:
|
||||
# Mod required to fix ROPE syntax error
|
||||
mods:
|
||||
- mods/fix-qwen3.5-autoround
|
||||
- mods/fix-qwen3.5-chat-template
|
||||
|
||||
# Default settings (can be overridden via CLI)
|
||||
defaults:
|
||||
@@ -47,6 +48,7 @@ command: |
|
||||
--reasoning-parser qwen3 \
|
||||
--max-num-batched-tokens {max_num_batched_tokens} \
|
||||
--trust-remote-code \
|
||||
--chat-template unsloth.jinja \
|
||||
-tp {tensor_parallel} \
|
||||
--distributed-executor-backend ray
|
||||
|
||||
|
||||
@@ -17,6 +17,7 @@ container: vllm-node
|
||||
# Mod required to fix slowness and crash in the cluster (tracking https://github.com/vllm-project/vllm/issues/33857)
|
||||
mods:
|
||||
- mods/fix-qwen3-coder-next
|
||||
- mods/fix-qwen3.5-chat-template
|
||||
|
||||
# Default settings (can be overridden via CLI)
|
||||
defaults:
|
||||
@@ -45,5 +46,6 @@ command: |
|
||||
--load-format fastsafetensors \
|
||||
--attention-backend flashinfer \
|
||||
--enable-prefix-caching \
|
||||
--chat-template unsloth.jinja \
|
||||
-tp {tensor_parallel} \
|
||||
--distributed-executor-backend ray
|
||||
|
||||
Reference in New Issue
Block a user