[Kernel] Triton Configs for Fp8 Block Quantization (#11589)

Signed-off-by: rshaw@neuralmagic.com <rshaw@neuralmagic.com>
Signed-off-by: mgoin <michael@neuralmagic.com>
Co-authored-by: mgoin <michael@neuralmagic.com>
Co-authored-by: simon-mo <xmo@berkeley.edu>
This commit is contained in:
Robert Shaw
2025-01-30 14:53:22 -05:00
committed by GitHub
parent 41bf5612f5
commit 9b0c4bab36
43 changed files with 5972 additions and 42 deletions
+5 -1
View File
@@ -608,7 +608,11 @@ if _build_custom_ops():
ext_modules.append(CMakeExtension(name="vllm._C"))
package_data = {
"vllm": ["py.typed", "model_executor/layers/fused_moe/configs/*.json"]
"vllm": [
"py.typed",
"model_executor/layers/fused_moe/configs/*.json",
"model_executor/layers/quantization/utils/configs/*.json",
]
}
if _no_device():