[Bugfix][Kernel] Fix moe align block issue for mixtral (#12413)

2026-06-27 17:32:55 +08:00 · 2025-01-24 20:49:28 -05:00
parent 3132a933b6
commit 221d388cc5
1 changed files with 3 additions and 1 deletions
@@ -33,7 +33,9 @@ __global__ void moe_align_block_size_kernel(scalar_t* __restrict__ topk_ids,

  extern __shared__ int32_t shared_mem[];
  int32_t* cumsum = shared_mem;  // 1d tensor with shape (num_experts + 1)
-  token_cnts_t* tokens_cnts = (token_cnts_t*)(shared_mem + blockDim.x + 1);
+  token_cnts_t* tokens_cnts =
+      (token_cnts_t*)(shared_mem + num_experts +
+                      1);  // 2d tensor with shape (blockDim.x + 1, num_experts)

  for (int i = 0; i < num_experts; ++i) {
    tokens_cnts[index(num_experts, threadIdx.x + 1, i)] = 0;