diff --git a/mods/fix-qwen3.5-chat-template/chat_template.jinja b/mods/fix-qwen3.5-chat-template/chat_template.jinja
new file mode 100644
index 0000000..af88972
--- /dev/null
+++ b/mods/fix-qwen3.5-chat-template/chat_template.jinja
@@ -0,0 +1,155 @@
+{%- set image_count = namespace(value=0) %}
+{%- set video_count = namespace(value=0) %}
+{%- macro render_content(content, do_vision_count, is_system_content=false) %}
+ {%- if content is string %}
+ {{- content }}
+ {%- elif content is iterable and content is not mapping %}
+ {%- for item in content %}
+ {%- if 'image' in item or 'image_url' in item or item.type == 'image' %}
+ {%- if is_system_content %}
+ {{- raise_exception('System message cannot contain images.') }}
+ {%- endif %}
+ {%- if do_vision_count %}
+ {%- set image_count.value = image_count.value + 1 %}
+ {%- endif %}
+ {%- if add_vision_id %}
+ {{- 'Picture ' ~ image_count.value ~ ': ' }}
+ {%- endif %}
+ {{- '<|vision_start|><|image_pad|><|vision_end|>' }}
+ {%- elif 'video' in item or item.type == 'video' %}
+ {%- if is_system_content %}
+ {{- raise_exception('System message cannot contain videos.') }}
+ {%- endif %}
+ {%- if do_vision_count %}
+ {%- set video_count.value = video_count.value + 1 %}
+ {%- endif %}
+ {%- if add_vision_id %}
+ {{- 'Video ' ~ video_count.value ~ ': ' }}
+ {%- endif %}
+ {{- '<|vision_start|><|video_pad|><|vision_end|>' }}
+ {%- elif 'text' in item %}
+ {{- item.text }}
+ {%- else %}
+ {{- raise_exception('Unexpected item type in content.') }}
+ {%- endif %}
+ {%- endfor %}
+ {%- elif content is none or content is undefined %}
+ {{- '' }}
+ {%- else %}
+ {{- raise_exception('Unexpected content type.') }}
+ {%- endif %}
+{%- endmacro %}
+{%- if not messages %}
+ {{- raise_exception('No messages provided.') }}
+{%- endif %}
+{%- if tools and tools is iterable and tools is not mapping %}
+ {{- '<|im_start|>system\n' }}
+ {{- "# Tools\n\nYou have access to the following functions:\n\n" }}
+ {%- for tool in tools %}
+ {{- "\n" }}
+ {{- tool | tojson }}
+ {%- endfor %}
+ {{- "\n" }}
+ {{- '\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n\n\n\nvalue_1\n\n\nThis is the value for the second parameter\nthat can span\nmultiple lines\n\n\n\n\n\nReminder:\n- Function calls MUST follow the specified format: an inner block must be nested within XML tags\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n' }}
+ {%- if messages[0].role == 'system' %}
+ {%- set content = render_content(messages[0].content, false, true)|trim %}
+ {%- if content %}
+ {{- '\n\n' + content }}
+ {%- endif %}
+ {%- endif %}
+ {{- '<|im_end|>\n' }}
+{%- else %}
+ {%- if messages[0].role == 'system' %}
+ {%- set content = render_content(messages[0].content, false, true)|trim %}
+ {{- '<|im_start|>system\n' + content + '<|im_end|>\n' }}
+ {%- endif %}
+{%- endif %}
+{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
+{%- for message in messages[::-1] %}
+ {%- set index = (messages|length - 1) - loop.index0 %}
+ {%- if ns.multi_step_tool and message.role == "user" %}
+ {%- set content = render_content(message.content, false)|trim %}
+ {%- if not(content.startswith('') and content.endswith('')) %}
+ {%- set ns.multi_step_tool = false %}
+ {%- set ns.last_query_index = index %}
+ {%- endif %}
+ {%- endif %}
+{%- endfor %}
+{%- if ns.multi_step_tool %}
+ {{- raise_exception('No user query found in messages.') }}
+{%- endif %}
+{%- for message in messages %}
+ {%- set content = render_content(message.content, true)|trim %}
+ {%- if message.role == "system" %}
+ {%- if not loop.first %}
+ {{- raise_exception('System message must be at the beginning.') }}
+ {%- endif %}
+ {%- elif message.role == "user" %}
+ {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
+ {%- elif message.role == "assistant" %}
+ {%- set reasoning_content = '' %}
+ {%- if message.reasoning_content is string %}
+ {%- set reasoning_content = message.reasoning_content %}
+ {%- else %}
+ {%- if '' in content %}
+ {%- set reasoning_content = content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %}
+ {%- set content = content.split('')[-1].lstrip('\n') %}
+ {%- endif %}
+ {%- endif %}
+ {%- set reasoning_content = reasoning_content|trim %}
+ {%- if loop.index0 > ns.last_query_index %}
+ {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content + '\n\n\n' + content }}
+ {%- else %}
+ {{- '<|im_start|>' + message.role + '\n' + content }}
+ {%- endif %}
+ {%- if message.tool_calls and message.tool_calls is iterable and message.tool_calls is not mapping %}
+ {%- for tool_call in message.tool_calls %}
+ {%- if tool_call.function is defined %}
+ {%- set tool_call = tool_call.function %}
+ {%- endif %}
+ {%- if loop.first %}
+ {%- if content|trim %}
+ {{- '\n\n\n\n' }}
+ {%- else %}
+ {{- '\n\n' }}
+ {%- endif %}
+ {%- else %}
+ {{- '\n\n\n' }}
+ {%- endif %}
+ {%- if tool_call.arguments is mapping %}
+ {%- for args_name in tool_call.arguments %}
+ {%- set args_value = tool_call.arguments[args_name] %}
+ {{- '\n' }}
+ {%- set args_value = args_value | tojson | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %}
+ {{- args_value }}
+ {{- '\n\n' }}
+ {%- endfor %}
+ {%- endif %}
+ {{- '\n' }}
+ {%- endfor %}
+ {%- endif %}
+ {{- '<|im_end|>\n' }}
+ {%- elif message.role == "tool" %}
+ {%- if loop.previtem and loop.previtem.role != "tool" %}
+ {{- '<|im_start|>user' }}
+ {%- endif %}
+ {{- '\n\n' }}
+ {{- content }}
+ {{- '\n' }}
+ {%- if not loop.last and loop.nextitem.role != "tool" %}
+ {{- '<|im_end|>\n' }}
+ {%- elif loop.last %}
+ {{- '<|im_end|>\n' }}
+ {%- endif %}
+ {%- else %}
+ {{- raise_exception('Unexpected message role.') }}
+ {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+ {{- '<|im_start|>assistant\n' }}
+ {%- if enable_thinking is defined and enable_thinking is false %}
+ {{- '\n\n\n\n' }}
+ {%- else %}
+ {{- '\n' }}
+ {%- endif %}
+{%- endif %}
\ No newline at end of file
diff --git a/mods/fix-qwen3.5-chat-template/run.sh b/mods/fix-qwen3.5-chat-template/run.sh
new file mode 100644
index 0000000..bb06787
--- /dev/null
+++ b/mods/fix-qwen3.5-chat-template/run.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+set -e
+cp chat_template.jinja $WORKSPACE_DIR/unsloth.jinja
+echo "=======> to apply chat template, use --chat-template unsloth.jinja"
\ No newline at end of file
diff --git a/recipes/qwen3.5-122b-fp8.yaml b/recipes/qwen3.5-122b-fp8.yaml
index 9a33505..21582c6 100644
--- a/recipes/qwen3.5-122b-fp8.yaml
+++ b/recipes/qwen3.5-122b-fp8.yaml
@@ -15,7 +15,8 @@ cluster_only: true
container: vllm-node
# No mods required
-mods: []
+mods:
+ - mods/fix-qwen3.5-chat-template
# Default settings (can be overridden via CLI)
defaults:
@@ -41,5 +42,6 @@ command: |
--enable-auto-tool-choice \
--tool-call-parser qwen3_coder \
--reasoning-parser qwen3 \
+ --chat-template unsloth.jinja \
-tp {tensor_parallel} --distributed-executor-backend ray \
--max-num-batched-tokens {max_num_batched_tokens}
diff --git a/recipes/qwen3.5-122b-int4-autoround.yaml b/recipes/qwen3.5-122b-int4-autoround.yaml
index 7cc10fb..03e65be 100644
--- a/recipes/qwen3.5-122b-int4-autoround.yaml
+++ b/recipes/qwen3.5-122b-int4-autoround.yaml
@@ -19,6 +19,7 @@ build_args:
# Mod required to fix ROPE syntax error
mods:
- mods/fix-qwen3.5-autoround
+ - mods/fix-qwen3.5-chat-template
# Default settings (can be overridden via CLI)
defaults:
@@ -47,6 +48,7 @@ command: |
--reasoning-parser qwen3 \
--max-num-batched-tokens {max_num_batched_tokens} \
--trust-remote-code \
+ --chat-template unsloth.jinja \
-tp {tensor_parallel} \
--distributed-executor-backend ray
diff --git a/recipes/qwen3.5-35b-a3b-fp8.yaml b/recipes/qwen3.5-35b-a3b-fp8.yaml
index e2bc05b..343ed5f 100644
--- a/recipes/qwen3.5-35b-a3b-fp8.yaml
+++ b/recipes/qwen3.5-35b-a3b-fp8.yaml
@@ -17,6 +17,7 @@ container: vllm-node
# Mod required to fix slowness and crash in the cluster (tracking https://github.com/vllm-project/vllm/issues/33857)
mods:
- mods/fix-qwen3-coder-next
+ - mods/fix-qwen3.5-chat-template
# Default settings (can be overridden via CLI)
defaults:
@@ -45,5 +46,6 @@ command: |
--load-format fastsafetensors \
--attention-backend flashinfer \
--enable-prefix-caching \
+ --chat-template unsloth.jinja \
-tp {tensor_parallel} \
--distributed-executor-backend ray