[Misc] Support attention logits soft-capping with flash-attn (#7022)

This commit is contained in:
Woosuk Kwon
2024-08-01 13:14:37 -07:00
committed by GitHub
parent 562e580abc
commit 805a8a75f2
14 changed files with 71 additions and 47 deletions
+1 -1
View File
@@ -8,4 +8,4 @@ torch == 2.4.0
# These must be updated alongside torch
torchvision == 0.19 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
xformers == 0.0.27.post2 # Requires PyTorch 2.4.0
vllm-flash-attn == 2.6.0 # Requires PyTorch 2.4.0
vllm-flash-attn == 2.6.1 # Requires PyTorch 2.4.0