mirror of
https://github.com/wassname/weight-steering.git
synced 2026-06-27 19:50:02 +08:00
fix: transformers>=5.6.0, flash-attn locked, switch to Qwen3-4B
Qwen3.5-4B requires linear_attention mask support not in transformers<5.6. Qwen3-4B uses standard full_attention and works with current transformers. flash-attn added as URL dep so uv sync keeps it in .venv. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
+2
-1
@@ -5,7 +5,7 @@ description = "Fork of Anthropic's weight-steering: replicate, test SVD/subspace
|
||||
requires-python = ">=3.11"
|
||||
dependencies = [
|
||||
"torch>=2.4",
|
||||
"transformers>=4.46",
|
||||
"transformers>=5.6.0",
|
||||
"peft>=0.13", # DeLoRA, DoRA, init_lora_weights="pissa"
|
||||
"datasets>=3.0",
|
||||
"accelerate>=1.0",
|
||||
@@ -19,6 +19,7 @@ dependencies = [
|
||||
"tyro>=0.8",
|
||||
"baukit @ git+https://github.com/davidbau/baukit.git",
|
||||
"tiny-mfv @ git+https://github.com/wassname/tinymfv",
|
||||
"flash-attn @ https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.9.4/flash_attn-2.6.3%2Bcu130torch2.11-cp311-cp311-linux_x86_64.whl ; python_version == '3.11' and sys_platform == 'linux'",
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
|
||||
@@ -62,7 +62,7 @@ FRAMES: dict[str, dict[str, str | float]] = {
|
||||
|
||||
@dataclass
|
||||
class TinyMFVAiriskCfg:
|
||||
model: str = "Qwen/Qwen3.5-4B"
|
||||
model: str = "Qwen/Qwen3-4B"
|
||||
behavior: str = "authority"
|
||||
adapter: str = "delora"
|
||||
out: Path = Path("out")
|
||||
|
||||
@@ -29,7 +29,7 @@ class EvalTinymfvCalibratedCfg:
|
||||
behavior: str = "authority"
|
||||
out: Path = Path("out")
|
||||
adapters: tuple[str, ...] = ("lora", "dora", "pissa", "delora", "oft", "ia3")
|
||||
model: str = "Qwen/Qwen3.5-4B"
|
||||
model: str = "Qwen/Qwen3-4B"
|
||||
bootstrap_samples: int = 256
|
||||
limit: int = 0
|
||||
batch_size: int = 16
|
||||
|
||||
@@ -103,7 +103,7 @@ def _foundation_short(behavior: str) -> dict[str, str]:
|
||||
@dataclass
|
||||
class ReadmeTinymfvCfg:
|
||||
behavior: str = "auth_care"
|
||||
model_label: str = "Qwen3.5-4B"
|
||||
model_label: str = "Qwen3-4B"
|
||||
out: Path = Path("out")
|
||||
adapters: tuple[str, ...] = ("lora", "dora", "pissa", "delora", "oft", "ia3")
|
||||
include_prompt_baseline: bool = True
|
||||
|
||||
@@ -14,7 +14,7 @@ resolution-markers = [
|
||||
]
|
||||
|
||||
[options]
|
||||
exclude-newer = "2026-04-28T05:45:03.007002204Z"
|
||||
exclude-newer = "2026-04-28T08:51:29.029061397Z"
|
||||
exclude-newer-span = "P5D"
|
||||
|
||||
[[package]]
|
||||
@@ -618,6 +618,24 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/3b/21/2f728888c45033d34a417bfcd248ea2564c9e08ab1bfd301377cf05d5586/filelock-3.28.0-py3-none-any.whl", hash = "sha256:de9af6712788e7171df1b28b15eba2446c69721433fa427a9bee07b17820a9db", size = 39189, upload-time = "2026-04-14T22:54:32.037Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "flash-attn"
|
||||
version = "2.6.3+cu130torch2.11"
|
||||
source = { url = "https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.9.4/flash_attn-2.6.3%2Bcu130torch2.11-cp311-cp311-linux_x86_64.whl" }
|
||||
dependencies = [
|
||||
{ name = "einops", marker = "python_full_version < '3.12' and sys_platform != 'emscripten' and sys_platform != 'win32'" },
|
||||
{ name = "torch", marker = "python_full_version < '3.12' and sys_platform != 'emscripten' and sys_platform != 'win32'" },
|
||||
]
|
||||
wheels = [
|
||||
{ url = "https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.9.4/flash_attn-2.6.3%2Bcu130torch2.11-cp311-cp311-linux_x86_64.whl", hash = "sha256:d103c329e94d0e4c82ea100c3186987700485f76d5a94bc64d18f8efe0be5efe" },
|
||||
]
|
||||
|
||||
[package.metadata]
|
||||
requires-dist = [
|
||||
{ name = "einops" },
|
||||
{ name = "torch" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "frozenlist"
|
||||
version = "1.8.0"
|
||||
@@ -2838,7 +2856,7 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "transformers"
|
||||
version = "5.5.4"
|
||||
version = "5.6.2"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "huggingface-hub" },
|
||||
@@ -2851,9 +2869,9 @@ dependencies = [
|
||||
{ name = "tqdm" },
|
||||
{ name = "typer" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/a5/1e/1e244ab2ab50a863e6b52cc55761910567fa532b69a6740f6e99c5fdbd98/transformers-5.5.4.tar.gz", hash = "sha256:2e67cadba81fc7608cc07c4dd54f524820bc3d95b1cabd0ef3db7733c4f8b82e", size = 8227649, upload-time = "2026-04-13T16:55:55.181Z" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/a4/e9/c6c80a07690142a7d05444271f47b9f3c8aac7dea01d52e1137ee480ad78/transformers-5.6.2.tar.gz", hash = "sha256:e657134c3e5a6bc00a3c35f4e2674bb51adfcd89898495b788a18552bac2b91a", size = 8311867, upload-time = "2026-04-23T18:33:29.332Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/29/fb/162a66789c65e5afa3b051309240c26bf37fbc8fea285b4546ae747995a2/transformers-5.5.4-py3-none-any.whl", hash = "sha256:0bd6281b82966fe5a7a16f553ea517a9db1dee6284d7cb224dfd88fc0dd1c167", size = 10236696, upload-time = "2026-04-13T16:55:51.497Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/5d/95/0b0218149b0d6f14df35f5b8f676fa83df4f19ed253c3cc447107ef86eca/transformers-5.6.2-py3-none-any.whl", hash = "sha256:f8d3a1bb96778fed9b8aabfd0dd6e19843e4b0f2bb6b59f32b8a92051b0f348f", size = 10364898, upload-time = "2026-04-23T18:33:26.081Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -3012,6 +3030,7 @@ dependencies = [
|
||||
{ name = "beartype" },
|
||||
{ name = "datasets" },
|
||||
{ name = "einops" },
|
||||
{ name = "flash-attn", marker = "python_full_version < '3.12' and sys_platform == 'linux'" },
|
||||
{ name = "jaxtyping" },
|
||||
{ name = "loguru" },
|
||||
{ name = "peft" },
|
||||
@@ -3038,6 +3057,7 @@ requires-dist = [
|
||||
{ name = "beartype", specifier = ">=0.19" },
|
||||
{ name = "datasets", specifier = ">=3.0" },
|
||||
{ name = "einops", specifier = ">=0.8" },
|
||||
{ name = "flash-attn", marker = "python_full_version == '3.11.*' and sys_platform == 'linux'", url = "https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.9.4/flash_attn-2.6.3%2Bcu130torch2.11-cp311-cp311-linux_x86_64.whl" },
|
||||
{ name = "ipykernel", marker = "extra == 'dev'", specifier = ">=6" },
|
||||
{ name = "jaxtyping", specifier = ">=0.2" },
|
||||
{ name = "loguru", specifier = ">=0.7" },
|
||||
@@ -3048,7 +3068,7 @@ requires-dist = [
|
||||
{ name = "tabulate", specifier = ">=0.9" },
|
||||
{ name = "tiny-mfv", git = "https://github.com/wassname/tinymfv" },
|
||||
{ name = "torch", specifier = ">=2.4" },
|
||||
{ name = "transformers", specifier = ">=4.46" },
|
||||
{ name = "transformers", specifier = ">=5.6.0" },
|
||||
{ name = "tyro", specifier = ">=0.8" },
|
||||
{ name = "wandb", specifier = ">=0.18" },
|
||||
]
|
||||
|
||||
Reference in New Issue
Block a user