Files
2026-02-12 15:56:32 -08:00

73 lines
2.1 KiB
Diff

diff --git a/vllm/model_executor/layers/fused_moe/fused_moe.py b/vllm/model_executor/layers/fused_moe/fused_moe.py
index 63aae43c3ddf..6ca3213fbd8d 100644
--- a/vllm/model_executor/layers/fused_moe/fused_moe.py
+++ b/vllm/model_executor/layers/fused_moe/fused_moe.py
@@ -95,19 +95,19 @@ def fused_moe_kernel_gptq_awq(
# moving by 1 element in a particular dimension. E.g. `stride_am` is
# how much to increase `a_ptr` by to get the element one row down
# (A has M rows).
- stride_am,
- stride_ak,
- stride_be,
- stride_bk,
- stride_bn,
- stride_cm,
- stride_cn,
- stride_bse,
- stride_bsk,
- stride_bsn,
- stride_bze,
- stride_bzk,
- stride_bzn,
+ stride_am: tl.int64,
+ stride_ak: tl.int64,
+ stride_be: tl.int64,
+ stride_bk: tl.int64,
+ stride_bn: tl.int64,
+ stride_cm: tl.int64,
+ stride_cn: tl.int64,
+ stride_bse: tl.int64,
+ stride_bsk: tl.int64,
+ stride_bsn: tl.int64,
+ stride_bze: tl.int64,
+ stride_bzk: tl.int64,
+ stride_bzn: tl.int64,
block_k_diviable: tl.constexpr,
group_size: tl.constexpr,
# Meta-parameters
@@ -329,20 +329,20 @@ def fused_moe_kernel(
# moving by 1 element in a particular dimension. E.g. `stride_am` is
# how much to increase `a_ptr` by to get the element one row down
# (A has M rows).
- stride_am,
- stride_ak,
- stride_be,
- stride_bk,
- stride_bn,
- stride_cm,
- stride_cn,
- stride_asm,
- stride_ask,
- stride_bse,
- stride_bsk,
- stride_bsn,
- stride_bbe, # bias expert stride
- stride_bbn, # bias N stride
+ stride_am: tl.int64,
+ stride_ak: tl.int64,
+ stride_be: tl.int64,
+ stride_bk: tl.int64,
+ stride_bn: tl.int64,
+ stride_cm: tl.int64,
+ stride_cn: tl.int64,
+ stride_asm: tl.int64,
+ stride_ask: tl.int64,
+ stride_bse: tl.int64,
+ stride_bsk: tl.int64,
+ stride_bsn: tl.int64,
+ stride_bbe: tl.int64, # bias expert stride
+ stride_bbn: tl.int64, # bias N stride
# Block size for block-wise quantization
group_n: tl.constexpr,
group_k: tl.constexpr,