[Misc] Support attention logits soft-capping with flash-attn (#7022)

2026-06-27 17:32:55 +08:00 · 2024-08-01 13:14:37 -07:00
parent 562e580abc
commit 805a8a75f2
14 changed files with 71 additions and 47 deletions
@@ -8,4 +8,4 @@ torch == 2.4.0
 # These must be updated alongside torch
 torchvision == 0.19   # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
 xformers == 0.0.27.post2  # Requires PyTorch 2.4.0
-vllm-flash-attn == 2.6.0  # Requires PyTorch 2.4.0
+vllm-flash-attn == 2.6.1  # Requires PyTorch 2.4.0