mirror of
https://github.com/wassname/vllm.git
synced 2026-06-27 18:27:02 +08:00
[Kernel] Triton Configs for Fp8 Block Quantization (#11589)
Signed-off-by: rshaw@neuralmagic.com <rshaw@neuralmagic.com> Signed-off-by: mgoin <michael@neuralmagic.com> Co-authored-by: mgoin <michael@neuralmagic.com> Co-authored-by: simon-mo <xmo@berkeley.edu>
This commit is contained in:
@@ -608,7 +608,11 @@ if _build_custom_ops():
|
||||
ext_modules.append(CMakeExtension(name="vllm._C"))
|
||||
|
||||
package_data = {
|
||||
"vllm": ["py.typed", "model_executor/layers/fused_moe/configs/*.json"]
|
||||
"vllm": [
|
||||
"py.typed",
|
||||
"model_executor/layers/fused_moe/configs/*.json",
|
||||
"model_executor/layers/quantization/utils/configs/*.json",
|
||||
]
|
||||
}
|
||||
|
||||
if _no_device():
|
||||
|
||||
Reference in New Issue
Block a user