fix: transformers>=5.6.0, flash-attn locked, switch to Qwen3-4B

Qwen3.5-4B requires linear_attention mask support not in transformers<5.6.
Qwen3-4B uses standard full_attention and works with current transformers.
flash-attn added as URL dep so uv sync keeps it in .venv.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
wassname
2026-05-03 16:54:50 +08:00
parent 8ed3103e47
commit 43278709d7
5 changed files with 30 additions and 9 deletions
+2 -1
View File
@@ -5,7 +5,7 @@ description = "Fork of Anthropic's weight-steering: replicate, test SVD/subspace
requires-python = ">=3.11"
dependencies = [
"torch>=2.4",
"transformers>=4.46",
"transformers>=5.6.0",
"peft>=0.13", # DeLoRA, DoRA, init_lora_weights="pissa"
"datasets>=3.0",
"accelerate>=1.0",
@@ -19,6 +19,7 @@ dependencies = [
"tyro>=0.8",
"baukit @ git+https://github.com/davidbau/baukit.git",
"tiny-mfv @ git+https://github.com/wassname/tinymfv",
"flash-attn @ https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.9.4/flash_attn-2.6.3%2Bcu130torch2.11-cp311-cp311-linux_x86_64.whl ; python_version == '3.11' and sys_platform == 'linux'",
]
[project.optional-dependencies]
+1 -1
View File
@@ -62,7 +62,7 @@ FRAMES: dict[str, dict[str, str | float]] = {
@dataclass
class TinyMFVAiriskCfg:
model: str = "Qwen/Qwen3.5-4B"
model: str = "Qwen/Qwen3-4B"
behavior: str = "authority"
adapter: str = "delora"
out: Path = Path("out")
+1 -1
View File
@@ -29,7 +29,7 @@ class EvalTinymfvCalibratedCfg:
behavior: str = "authority"
out: Path = Path("out")
adapters: tuple[str, ...] = ("lora", "dora", "pissa", "delora", "oft", "ia3")
model: str = "Qwen/Qwen3.5-4B"
model: str = "Qwen/Qwen3-4B"
bootstrap_samples: int = 256
limit: int = 0
batch_size: int = 16
+1 -1
View File
@@ -103,7 +103,7 @@ def _foundation_short(behavior: str) -> dict[str, str]:
@dataclass
class ReadmeTinymfvCfg:
behavior: str = "auth_care"
model_label: str = "Qwen3.5-4B"
model_label: str = "Qwen3-4B"
out: Path = Path("out")
adapters: tuple[str, ...] = ("lora", "dora", "pissa", "delora", "oft", "ia3")
include_prompt_baseline: bool = True
Generated
+25 -5
View File
@@ -14,7 +14,7 @@ resolution-markers = [
]
[options]
exclude-newer = "2026-04-28T05:45:03.007002204Z"
exclude-newer = "2026-04-28T08:51:29.029061397Z"
exclude-newer-span = "P5D"
[[package]]
@@ -618,6 +618,24 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/3b/21/2f728888c45033d34a417bfcd248ea2564c9e08ab1bfd301377cf05d5586/filelock-3.28.0-py3-none-any.whl", hash = "sha256:de9af6712788e7171df1b28b15eba2446c69721433fa427a9bee07b17820a9db", size = 39189, upload-time = "2026-04-14T22:54:32.037Z" },
]
[[package]]
name = "flash-attn"
version = "2.6.3+cu130torch2.11"
source = { url = "https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.9.4/flash_attn-2.6.3%2Bcu130torch2.11-cp311-cp311-linux_x86_64.whl" }
dependencies = [
{ name = "einops", marker = "python_full_version < '3.12' and sys_platform != 'emscripten' and sys_platform != 'win32'" },
{ name = "torch", marker = "python_full_version < '3.12' and sys_platform != 'emscripten' and sys_platform != 'win32'" },
]
wheels = [
{ url = "https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.9.4/flash_attn-2.6.3%2Bcu130torch2.11-cp311-cp311-linux_x86_64.whl", hash = "sha256:d103c329e94d0e4c82ea100c3186987700485f76d5a94bc64d18f8efe0be5efe" },
]
[package.metadata]
requires-dist = [
{ name = "einops" },
{ name = "torch" },
]
[[package]]
name = "frozenlist"
version = "1.8.0"
@@ -2838,7 +2856,7 @@ wheels = [
[[package]]
name = "transformers"
version = "5.5.4"
version = "5.6.2"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "huggingface-hub" },
@@ -2851,9 +2869,9 @@ dependencies = [
{ name = "tqdm" },
{ name = "typer" },
]
sdist = { url = "https://files.pythonhosted.org/packages/a5/1e/1e244ab2ab50a863e6b52cc55761910567fa532b69a6740f6e99c5fdbd98/transformers-5.5.4.tar.gz", hash = "sha256:2e67cadba81fc7608cc07c4dd54f524820bc3d95b1cabd0ef3db7733c4f8b82e", size = 8227649, upload-time = "2026-04-13T16:55:55.181Z" }
sdist = { url = "https://files.pythonhosted.org/packages/a4/e9/c6c80a07690142a7d05444271f47b9f3c8aac7dea01d52e1137ee480ad78/transformers-5.6.2.tar.gz", hash = "sha256:e657134c3e5a6bc00a3c35f4e2674bb51adfcd89898495b788a18552bac2b91a", size = 8311867, upload-time = "2026-04-23T18:33:29.332Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/29/fb/162a66789c65e5afa3b051309240c26bf37fbc8fea285b4546ae747995a2/transformers-5.5.4-py3-none-any.whl", hash = "sha256:0bd6281b82966fe5a7a16f553ea517a9db1dee6284d7cb224dfd88fc0dd1c167", size = 10236696, upload-time = "2026-04-13T16:55:51.497Z" },
{ url = "https://files.pythonhosted.org/packages/5d/95/0b0218149b0d6f14df35f5b8f676fa83df4f19ed253c3cc447107ef86eca/transformers-5.6.2-py3-none-any.whl", hash = "sha256:f8d3a1bb96778fed9b8aabfd0dd6e19843e4b0f2bb6b59f32b8a92051b0f348f", size = 10364898, upload-time = "2026-04-23T18:33:26.081Z" },
]
[[package]]
@@ -3012,6 +3030,7 @@ dependencies = [
{ name = "beartype" },
{ name = "datasets" },
{ name = "einops" },
{ name = "flash-attn", marker = "python_full_version < '3.12' and sys_platform == 'linux'" },
{ name = "jaxtyping" },
{ name = "loguru" },
{ name = "peft" },
@@ -3038,6 +3057,7 @@ requires-dist = [
{ name = "beartype", specifier = ">=0.19" },
{ name = "datasets", specifier = ">=3.0" },
{ name = "einops", specifier = ">=0.8" },
{ name = "flash-attn", marker = "python_full_version == '3.11.*' and sys_platform == 'linux'", url = "https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.9.4/flash_attn-2.6.3%2Bcu130torch2.11-cp311-cp311-linux_x86_64.whl" },
{ name = "ipykernel", marker = "extra == 'dev'", specifier = ">=6" },
{ name = "jaxtyping", specifier = ">=0.2" },
{ name = "loguru", specifier = ">=0.7" },
@@ -3048,7 +3068,7 @@ requires-dist = [
{ name = "tabulate", specifier = ">=0.9" },
{ name = "tiny-mfv", git = "https://github.com/wassname/tinymfv" },
{ name = "torch", specifier = ">=2.4" },
{ name = "transformers", specifier = ">=4.46" },
{ name = "transformers", specifier = ">=5.6.0" },
{ name = "tyro", specifier = ">=0.8" },
{ name = "wandb", specifier = ">=0.18" },
]