[Kernel] Refactor CUTLASS kernels to always take scales that reside on the GPU (#5137)

2026-06-27 17:32:55 +08:00 · 2024-06-01 02:45:32 -04:00
parent a360ff80bb
commit 260d119e86
7 changed files with 445 additions and 76 deletions
@@ -59,7 +59,7 @@ exclude = [
 ]

 [tool.codespell]
-ignore-words-list = "dout, te, indicies"
+ignore-words-list = "dout, te, indicies, subtile"
 skip = "./tests/prompts,./benchmarks/sonnet.txt,./tests/lora/data,./build"

 [tool.isort]