diff --git a/mods/fix-qwen3.5-chat-template/chat_template.jinja b/mods/fix-qwen3.5-chat-template/chat_template.jinja new file mode 100644 index 0000000..af88972 --- /dev/null +++ b/mods/fix-qwen3.5-chat-template/chat_template.jinja @@ -0,0 +1,155 @@ +{%- set image_count = namespace(value=0) %} +{%- set video_count = namespace(value=0) %} +{%- macro render_content(content, do_vision_count, is_system_content=false) %} + {%- if content is string %} + {{- content }} + {%- elif content is iterable and content is not mapping %} + {%- for item in content %} + {%- if 'image' in item or 'image_url' in item or item.type == 'image' %} + {%- if is_system_content %} + {{- raise_exception('System message cannot contain images.') }} + {%- endif %} + {%- if do_vision_count %} + {%- set image_count.value = image_count.value + 1 %} + {%- endif %} + {%- if add_vision_id %} + {{- 'Picture ' ~ image_count.value ~ ': ' }} + {%- endif %} + {{- '<|vision_start|><|image_pad|><|vision_end|>' }} + {%- elif 'video' in item or item.type == 'video' %} + {%- if is_system_content %} + {{- raise_exception('System message cannot contain videos.') }} + {%- endif %} + {%- if do_vision_count %} + {%- set video_count.value = video_count.value + 1 %} + {%- endif %} + {%- if add_vision_id %} + {{- 'Video ' ~ video_count.value ~ ': ' }} + {%- endif %} + {{- '<|vision_start|><|video_pad|><|vision_end|>' }} + {%- elif 'text' in item %} + {{- item.text }} + {%- else %} + {{- raise_exception('Unexpected item type in content.') }} + {%- endif %} + {%- endfor %} + {%- elif content is none or content is undefined %} + {{- '' }} + {%- else %} + {{- raise_exception('Unexpected content type.') }} + {%- endif %} +{%- endmacro %} +{%- if not messages %} + {{- raise_exception('No messages provided.') }} +{%- endif %} +{%- if tools and tools is iterable and tools is not mapping %} + {{- '<|im_start|>system\n' }} + {{- "# Tools\n\nYou have access to the following functions:\n\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson }} + {%- endfor %} + {{- "\n" }} + {{- '\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n\n\n\nvalue_1\n\n\nThis is the value for the second parameter\nthat can span\nmultiple lines\n\n\n\n\n\nReminder:\n- Function calls MUST follow the specified format: an inner block must be nested within XML tags\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n' }} + {%- if messages[0].role == 'system' %} + {%- set content = render_content(messages[0].content, false, true)|trim %} + {%- if content %} + {{- '\n\n' + content }} + {%- endif %} + {%- endif %} + {{- '<|im_end|>\n' }} +{%- else %} + {%- if messages[0].role == 'system' %} + {%- set content = render_content(messages[0].content, false, true)|trim %} + {{- '<|im_start|>system\n' + content + '<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %} +{%- for message in messages[::-1] %} + {%- set index = (messages|length - 1) - loop.index0 %} + {%- if ns.multi_step_tool and message.role == "user" %} + {%- set content = render_content(message.content, false)|trim %} + {%- if not(content.startswith('') and content.endswith('')) %} + {%- set ns.multi_step_tool = false %} + {%- set ns.last_query_index = index %} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if ns.multi_step_tool %} + {{- raise_exception('No user query found in messages.') }} +{%- endif %} +{%- for message in messages %} + {%- set content = render_content(message.content, true)|trim %} + {%- if message.role == "system" %} + {%- if not loop.first %} + {{- raise_exception('System message must be at the beginning.') }} + {%- endif %} + {%- elif message.role == "user" %} + {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {%- set reasoning_content = '' %} + {%- if message.reasoning_content is string %} + {%- set reasoning_content = message.reasoning_content %} + {%- else %} + {%- if '' in content %} + {%- set reasoning_content = content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %} + {%- set content = content.split('')[-1].lstrip('\n') %} + {%- endif %} + {%- endif %} + {%- set reasoning_content = reasoning_content|trim %} + {%- if loop.index0 > ns.last_query_index %} + {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content + '\n\n\n' + content }} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- if message.tool_calls and message.tool_calls is iterable and message.tool_calls is not mapping %} + {%- for tool_call in message.tool_calls %} + {%- if tool_call.function is defined %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {%- if loop.first %} + {%- if content|trim %} + {{- '\n\n\n\n' }} + {%- else %} + {{- '\n\n' }} + {%- endif %} + {%- else %} + {{- '\n\n\n' }} + {%- endif %} + {%- if tool_call.arguments is mapping %} + {%- for args_name in tool_call.arguments %} + {%- set args_value = tool_call.arguments[args_name] %} + {{- '\n' }} + {%- set args_value = args_value | tojson | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %} + {{- args_value }} + {{- '\n\n' }} + {%- endfor %} + {%- endif %} + {{- '\n' }} + {%- endfor %} + {%- endif %} + {{- '<|im_end|>\n' }} + {%- elif message.role == "tool" %} + {%- if loop.previtem and loop.previtem.role != "tool" %} + {{- '<|im_start|>user' }} + {%- endif %} + {{- '\n\n' }} + {{- content }} + {{- '\n' }} + {%- if not loop.last and loop.nextitem.role != "tool" %} + {{- '<|im_end|>\n' }} + {%- elif loop.last %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- else %} + {{- raise_exception('Unexpected message role.') }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n' }} + {%- if enable_thinking is defined and enable_thinking is false %} + {{- '\n\n\n\n' }} + {%- else %} + {{- '\n' }} + {%- endif %} +{%- endif %} \ No newline at end of file diff --git a/mods/fix-qwen3.5-chat-template/run.sh b/mods/fix-qwen3.5-chat-template/run.sh new file mode 100644 index 0000000..bb06787 --- /dev/null +++ b/mods/fix-qwen3.5-chat-template/run.sh @@ -0,0 +1,4 @@ +#!/bin/bash +set -e +cp chat_template.jinja $WORKSPACE_DIR/unsloth.jinja +echo "=======> to apply chat template, use --chat-template unsloth.jinja" \ No newline at end of file diff --git a/recipes/qwen3.5-122b-fp8.yaml b/recipes/qwen3.5-122b-fp8.yaml index 9a33505..21582c6 100644 --- a/recipes/qwen3.5-122b-fp8.yaml +++ b/recipes/qwen3.5-122b-fp8.yaml @@ -15,7 +15,8 @@ cluster_only: true container: vllm-node # No mods required -mods: [] +mods: + - mods/fix-qwen3.5-chat-template # Default settings (can be overridden via CLI) defaults: @@ -41,5 +42,6 @@ command: | --enable-auto-tool-choice \ --tool-call-parser qwen3_coder \ --reasoning-parser qwen3 \ + --chat-template unsloth.jinja \ -tp {tensor_parallel} --distributed-executor-backend ray \ --max-num-batched-tokens {max_num_batched_tokens} diff --git a/recipes/qwen3.5-122b-int4-autoround.yaml b/recipes/qwen3.5-122b-int4-autoround.yaml index 7cc10fb..03e65be 100644 --- a/recipes/qwen3.5-122b-int4-autoround.yaml +++ b/recipes/qwen3.5-122b-int4-autoround.yaml @@ -19,6 +19,7 @@ build_args: # Mod required to fix ROPE syntax error mods: - mods/fix-qwen3.5-autoround + - mods/fix-qwen3.5-chat-template # Default settings (can be overridden via CLI) defaults: @@ -47,6 +48,7 @@ command: | --reasoning-parser qwen3 \ --max-num-batched-tokens {max_num_batched_tokens} \ --trust-remote-code \ + --chat-template unsloth.jinja \ -tp {tensor_parallel} \ --distributed-executor-backend ray diff --git a/recipes/qwen3.5-35b-a3b-fp8.yaml b/recipes/qwen3.5-35b-a3b-fp8.yaml index e2bc05b..343ed5f 100644 --- a/recipes/qwen3.5-35b-a3b-fp8.yaml +++ b/recipes/qwen3.5-35b-a3b-fp8.yaml @@ -17,6 +17,7 @@ container: vllm-node # Mod required to fix slowness and crash in the cluster (tracking https://github.com/vllm-project/vllm/issues/33857) mods: - mods/fix-qwen3-coder-next + - mods/fix-qwen3.5-chat-template # Default settings (can be overridden via CLI) defaults: @@ -45,5 +46,6 @@ command: | --load-format fastsafetensors \ --attention-backend flashinfer \ --enable-prefix-caching \ + --chat-template unsloth.jinja \ -tp {tensor_parallel} \ --distributed-executor-backend ray