Unsloth chat template for qwen3.5

2026-03-06 23:35:18 -08:00
parent 9dc09bd04b
commit d42c4199fa
5 changed files with 166 additions and 1 deletions
--- a/recipes/qwen3.5-122b-fp8.yaml
+++ b/recipes/qwen3.5-122b-fp8.yaml
@@ -15,7 +15,8 @@ cluster_only: true
 container: vllm-node

 # No mods required
-mods: []
+mods:
+  - mods/fix-qwen3.5-chat-template

 # Default settings (can be overridden via CLI)
 defaults:
@@ -41,5 +42,6 @@ command: |
    --enable-auto-tool-choice \
    --tool-call-parser qwen3_coder \
    --reasoning-parser qwen3 \
+    --chat-template unsloth.jinja \
    -tp {tensor_parallel} --distributed-executor-backend ray \
    --max-num-batched-tokens {max_num_batched_tokens}
--- a/recipes/qwen3.5-122b-int4-autoround.yaml
+++ b/recipes/qwen3.5-122b-int4-autoround.yaml
@@ -19,6 +19,7 @@ build_args:
 # Mod required to fix ROPE syntax error
 mods:
  - mods/fix-qwen3.5-autoround
+  - mods/fix-qwen3.5-chat-template

 # Default settings (can be overridden via CLI)
 defaults:
@@ -47,6 +48,7 @@ command: |
    --reasoning-parser qwen3 \
    --max-num-batched-tokens {max_num_batched_tokens} \
    --trust-remote-code \
+    --chat-template unsloth.jinja \
    -tp {tensor_parallel} \
    --distributed-executor-backend ray

--- a/recipes/qwen3.5-35b-a3b-fp8.yaml
+++ b/recipes/qwen3.5-35b-a3b-fp8.yaml
@@ -17,6 +17,7 @@ container: vllm-node
 # Mod required to fix slowness and crash in the cluster (tracking https://github.com/vllm-project/vllm/issues/33857)
 mods:
  - mods/fix-qwen3-coder-next
+  - mods/fix-qwen3.5-chat-template

 # Default settings (can be overridden via CLI)
 defaults:
@@ -45,5 +46,6 @@ command: |
    --load-format fastsafetensors \
    --attention-backend flashinfer \
    --enable-prefix-caching \
+    --chat-template unsloth.jinja \
    -tp {tensor_parallel} \
    --distributed-executor-backend ray