diff --git a/pyproject.toml b/pyproject.toml index 45bf202..7cd6699 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ description = "Fork of Anthropic's weight-steering: replicate, test SVD/subspace requires-python = ">=3.11" dependencies = [ "torch>=2.4", - "transformers>=4.46", + "transformers>=5.6.0", "peft>=0.13", # DeLoRA, DoRA, init_lora_weights="pissa" "datasets>=3.0", "accelerate>=1.0", @@ -19,6 +19,7 @@ dependencies = [ "tyro>=0.8", "baukit @ git+https://github.com/davidbau/baukit.git", "tiny-mfv @ git+https://github.com/wassname/tinymfv", + "flash-attn @ https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.9.4/flash_attn-2.6.3%2Bcu130torch2.11-cp311-cp311-linux_x86_64.whl ; python_version == '3.11' and sys_platform == 'linux'", ] [project.optional-dependencies] diff --git a/src/ws/eval/tinymfv_airisk.py b/src/ws/eval/tinymfv_airisk.py index 5a95192..0703aa7 100644 --- a/src/ws/eval/tinymfv_airisk.py +++ b/src/ws/eval/tinymfv_airisk.py @@ -62,7 +62,7 @@ FRAMES: dict[str, dict[str, str | float]] = { @dataclass class TinyMFVAiriskCfg: - model: str = "Qwen/Qwen3.5-4B" + model: str = "Qwen/Qwen3-4B" behavior: str = "authority" adapter: str = "delora" out: Path = Path("out") diff --git a/src/ws/scripts/eval_tinymfv_calibrated.py b/src/ws/scripts/eval_tinymfv_calibrated.py index 25a2c0b..810d0fd 100644 --- a/src/ws/scripts/eval_tinymfv_calibrated.py +++ b/src/ws/scripts/eval_tinymfv_calibrated.py @@ -29,7 +29,7 @@ class EvalTinymfvCalibratedCfg: behavior: str = "authority" out: Path = Path("out") adapters: tuple[str, ...] = ("lora", "dora", "pissa", "delora", "oft", "ia3") - model: str = "Qwen/Qwen3.5-4B" + model: str = "Qwen/Qwen3-4B" bootstrap_samples: int = 256 limit: int = 0 batch_size: int = 16 diff --git a/src/ws/scripts/readme_tinymfv_table.py b/src/ws/scripts/readme_tinymfv_table.py index 2addf6f..2924f35 100644 --- a/src/ws/scripts/readme_tinymfv_table.py +++ b/src/ws/scripts/readme_tinymfv_table.py @@ -103,7 +103,7 @@ def _foundation_short(behavior: str) -> dict[str, str]: @dataclass class ReadmeTinymfvCfg: behavior: str = "auth_care" - model_label: str = "Qwen3.5-4B" + model_label: str = "Qwen3-4B" out: Path = Path("out") adapters: tuple[str, ...] = ("lora", "dora", "pissa", "delora", "oft", "ia3") include_prompt_baseline: bool = True diff --git a/uv.lock b/uv.lock index 7257e13..bff8df2 100644 --- a/uv.lock +++ b/uv.lock @@ -14,7 +14,7 @@ resolution-markers = [ ] [options] -exclude-newer = "2026-04-28T05:45:03.007002204Z" +exclude-newer = "2026-04-28T08:51:29.029061397Z" exclude-newer-span = "P5D" [[package]] @@ -618,6 +618,24 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/3b/21/2f728888c45033d34a417bfcd248ea2564c9e08ab1bfd301377cf05d5586/filelock-3.28.0-py3-none-any.whl", hash = "sha256:de9af6712788e7171df1b28b15eba2446c69721433fa427a9bee07b17820a9db", size = 39189, upload-time = "2026-04-14T22:54:32.037Z" }, ] +[[package]] +name = "flash-attn" +version = "2.6.3+cu130torch2.11" +source = { url = "https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.9.4/flash_attn-2.6.3%2Bcu130torch2.11-cp311-cp311-linux_x86_64.whl" } +dependencies = [ + { name = "einops", marker = "python_full_version < '3.12' and sys_platform != 'emscripten' and sys_platform != 'win32'" }, + { name = "torch", marker = "python_full_version < '3.12' and sys_platform != 'emscripten' and sys_platform != 'win32'" }, +] +wheels = [ + { url = "https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.9.4/flash_attn-2.6.3%2Bcu130torch2.11-cp311-cp311-linux_x86_64.whl", hash = "sha256:d103c329e94d0e4c82ea100c3186987700485f76d5a94bc64d18f8efe0be5efe" }, +] + +[package.metadata] +requires-dist = [ + { name = "einops" }, + { name = "torch" }, +] + [[package]] name = "frozenlist" version = "1.8.0" @@ -2838,7 +2856,7 @@ wheels = [ [[package]] name = "transformers" -version = "5.5.4" +version = "5.6.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "huggingface-hub" }, @@ -2851,9 +2869,9 @@ dependencies = [ { name = "tqdm" }, { name = "typer" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/a5/1e/1e244ab2ab50a863e6b52cc55761910567fa532b69a6740f6e99c5fdbd98/transformers-5.5.4.tar.gz", hash = "sha256:2e67cadba81fc7608cc07c4dd54f524820bc3d95b1cabd0ef3db7733c4f8b82e", size = 8227649, upload-time = "2026-04-13T16:55:55.181Z" } +sdist = { url = "https://files.pythonhosted.org/packages/a4/e9/c6c80a07690142a7d05444271f47b9f3c8aac7dea01d52e1137ee480ad78/transformers-5.6.2.tar.gz", hash = "sha256:e657134c3e5a6bc00a3c35f4e2674bb51adfcd89898495b788a18552bac2b91a", size = 8311867, upload-time = "2026-04-23T18:33:29.332Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/29/fb/162a66789c65e5afa3b051309240c26bf37fbc8fea285b4546ae747995a2/transformers-5.5.4-py3-none-any.whl", hash = "sha256:0bd6281b82966fe5a7a16f553ea517a9db1dee6284d7cb224dfd88fc0dd1c167", size = 10236696, upload-time = "2026-04-13T16:55:51.497Z" }, + { url = "https://files.pythonhosted.org/packages/5d/95/0b0218149b0d6f14df35f5b8f676fa83df4f19ed253c3cc447107ef86eca/transformers-5.6.2-py3-none-any.whl", hash = "sha256:f8d3a1bb96778fed9b8aabfd0dd6e19843e4b0f2bb6b59f32b8a92051b0f348f", size = 10364898, upload-time = "2026-04-23T18:33:26.081Z" }, ] [[package]] @@ -3012,6 +3030,7 @@ dependencies = [ { name = "beartype" }, { name = "datasets" }, { name = "einops" }, + { name = "flash-attn", marker = "python_full_version < '3.12' and sys_platform == 'linux'" }, { name = "jaxtyping" }, { name = "loguru" }, { name = "peft" }, @@ -3038,6 +3057,7 @@ requires-dist = [ { name = "beartype", specifier = ">=0.19" }, { name = "datasets", specifier = ">=3.0" }, { name = "einops", specifier = ">=0.8" }, + { name = "flash-attn", marker = "python_full_version == '3.11.*' and sys_platform == 'linux'", url = "https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.9.4/flash_attn-2.6.3%2Bcu130torch2.11-cp311-cp311-linux_x86_64.whl" }, { name = "ipykernel", marker = "extra == 'dev'", specifier = ">=6" }, { name = "jaxtyping", specifier = ">=0.2" }, { name = "loguru", specifier = ">=0.7" }, @@ -3048,7 +3068,7 @@ requires-dist = [ { name = "tabulate", specifier = ">=0.9" }, { name = "tiny-mfv", git = "https://github.com/wassname/tinymfv" }, { name = "torch", specifier = ">=2.4" }, - { name = "transformers", specifier = ">=4.46" }, + { name = "transformers", specifier = ">=5.6.0" }, { name = "tyro", specifier = ">=0.8" }, { name = "wandb", specifier = ">=0.18" }, ]