mirror of
https://github.com/wassname/vllm.git
synced 2026-06-27 17:32:55 +08:00
[Kernel] Refactor CUTLASS kernels to always take scales that reside on the GPU (#5137)
This commit is contained in:
committed by
GitHub
parent
a360ff80bb
commit
260d119e86
+1
-1
@@ -59,7 +59,7 @@ exclude = [
|
||||
]
|
||||
|
||||
[tool.codespell]
|
||||
ignore-words-list = "dout, te, indicies"
|
||||
ignore-words-list = "dout, te, indicies, subtile"
|
||||
skip = "./tests/prompts,./benchmarks/sonnet.txt,./tests/lora/data,./build"
|
||||
|
||||
[tool.isort]
|
||||
|
||||
Reference in New Issue
Block a user