[Kernel] Flash Attention 3 Support (#12093)

Signed-off-by: Lucas Wilkinson <lwilkinson@neuralmagic.com>
2026-06-27 18:27:02 +08:00 · 2025-01-23 09:45:48 -05:00
parent c5b4b11d7f
commit 978b45f399
8 changed files with 151 additions and 83 deletions
@@ -228,8 +228,11 @@ class cmake_build_ext(build_ext):

            # CMake appends the extension prefix to the install path,
            # and outdir already contains that prefix, so we need to remove it.
+            # We assume only the final component of extension prefix is added by
+            # CMake, this is currently true for current extensions but may not
+            # always be the case.
            prefix = outdir
-            for i in range(ext.name.count('.')):
+            if '.' in ext.name:
                prefix = prefix.parent

            # prefix here should actually be the same for all components
@@ -298,7 +301,8 @@ class repackage_wheel(build_ext):
            files_to_copy = [
                "vllm/_C.abi3.so",
                "vllm/_moe_C.abi3.so",
-                "vllm/vllm_flash_attn/vllm_flash_attn_c.abi3.so",
+                "vllm/vllm_flash_attn/_vllm_fa2_C.abi3.so",
+                "vllm/vllm_flash_attn/_vllm_fa3_C.abi3.so",
                "vllm/vllm_flash_attn/flash_attn_interface.py",
                "vllm/vllm_flash_attn/__init__.py",
                "vllm/cumem_allocator.abi3.so",
@@ -593,8 +597,8 @@ if _is_hip():
    ext_modules.append(CMakeExtension(name="vllm._rocm_C"))

 if _is_cuda():
-    ext_modules.append(
-        CMakeExtension(name="vllm.vllm_flash_attn.vllm_flash_attn_c"))
+    ext_modules.append(CMakeExtension(name="vllm.vllm_flash_attn._vllm_fa2_C"))
+    ext_modules.append(CMakeExtension(name="vllm.vllm_flash_attn._vllm_fa3_C"))
    ext_modules.append(CMakeExtension(name="vllm.cumem_allocator"))

 if _build_custom_ops():