Qwen3-Coder-Next fixes and updated recipe
This commit is contained in:
72
mods/fix-qwen3-coder-next/fix_slowness.diff
Normal file
72
mods/fix-qwen3-coder-next/fix_slowness.diff
Normal file
@@ -0,0 +1,72 @@
|
||||
diff --git a/vllm/model_executor/layers/fused_moe/fused_moe.py b/vllm/model_executor/layers/fused_moe/fused_moe.py
|
||||
index 63aae43c3ddf..6ca3213fbd8d 100644
|
||||
--- a/vllm/model_executor/layers/fused_moe/fused_moe.py
|
||||
+++ b/vllm/model_executor/layers/fused_moe/fused_moe.py
|
||||
@@ -95,19 +95,19 @@ def fused_moe_kernel_gptq_awq(
|
||||
# moving by 1 element in a particular dimension. E.g. `stride_am` is
|
||||
# how much to increase `a_ptr` by to get the element one row down
|
||||
# (A has M rows).
|
||||
- stride_am,
|
||||
- stride_ak,
|
||||
- stride_be,
|
||||
- stride_bk,
|
||||
- stride_bn,
|
||||
- stride_cm,
|
||||
- stride_cn,
|
||||
- stride_bse,
|
||||
- stride_bsk,
|
||||
- stride_bsn,
|
||||
- stride_bze,
|
||||
- stride_bzk,
|
||||
- stride_bzn,
|
||||
+ stride_am: tl.int64,
|
||||
+ stride_ak: tl.int64,
|
||||
+ stride_be: tl.int64,
|
||||
+ stride_bk: tl.int64,
|
||||
+ stride_bn: tl.int64,
|
||||
+ stride_cm: tl.int64,
|
||||
+ stride_cn: tl.int64,
|
||||
+ stride_bse: tl.int64,
|
||||
+ stride_bsk: tl.int64,
|
||||
+ stride_bsn: tl.int64,
|
||||
+ stride_bze: tl.int64,
|
||||
+ stride_bzk: tl.int64,
|
||||
+ stride_bzn: tl.int64,
|
||||
block_k_diviable: tl.constexpr,
|
||||
group_size: tl.constexpr,
|
||||
# Meta-parameters
|
||||
@@ -329,20 +329,20 @@ def fused_moe_kernel(
|
||||
# moving by 1 element in a particular dimension. E.g. `stride_am` is
|
||||
# how much to increase `a_ptr` by to get the element one row down
|
||||
# (A has M rows).
|
||||
- stride_am,
|
||||
- stride_ak,
|
||||
- stride_be,
|
||||
- stride_bk,
|
||||
- stride_bn,
|
||||
- stride_cm,
|
||||
- stride_cn,
|
||||
- stride_asm,
|
||||
- stride_ask,
|
||||
- stride_bse,
|
||||
- stride_bsk,
|
||||
- stride_bsn,
|
||||
- stride_bbe, # bias expert stride
|
||||
- stride_bbn, # bias N stride
|
||||
+ stride_am: tl.int64,
|
||||
+ stride_ak: tl.int64,
|
||||
+ stride_be: tl.int64,
|
||||
+ stride_bk: tl.int64,
|
||||
+ stride_bn: tl.int64,
|
||||
+ stride_cm: tl.int64,
|
||||
+ stride_cn: tl.int64,
|
||||
+ stride_asm: tl.int64,
|
||||
+ stride_ask: tl.int64,
|
||||
+ stride_bse: tl.int64,
|
||||
+ stride_bsk: tl.int64,
|
||||
+ stride_bsn: tl.int64,
|
||||
+ stride_bbe: tl.int64, # bias expert stride
|
||||
+ stride_bbn: tl.int64, # bias N stride
|
||||
# Block size for block-wise quantization
|
||||
group_n: tl.constexpr,
|
||||
group_k: tl.constexpr,
|
||||
Reference in New Issue
Block a user