[Kernel] Triton Configs for Fp8 Block Quantization (#11589)

Signed-off-by: rshaw@neuralmagic.com <rshaw@neuralmagic.com> Signed-off-by: mgoin <michael@neuralmagic.com> Co-authored-by: mgoin <michael@neuralmagic.com> Co-authored-by: simon-mo <xmo@berkeley.edu>
2026-06-27 18:27:02 +08:00 · 2025-01-30 14:53:22 -05:00
parent 41bf5612f5
commit 9b0c4bab36
43 changed files with 5972 additions and 42 deletions
@@ -608,7 +608,11 @@ if _build_custom_ops():
    ext_modules.append(CMakeExtension(name="vllm._C"))

 package_data = {
-    "vllm": ["py.typed", "model_executor/layers/fused_moe/configs/*.json"]
+    "vllm": [
+        "py.typed",
+        "model_executor/layers/fused_moe/configs/*.json",
+        "model_executor/layers/quantization/utils/configs/*.json",
+    ]
 }

 if _no_device():