mirror of
https://github.com/wassname/weight-steering.git
synced 2026-06-27 17:18:22 +08:00
57a08750b8
- data/load_pairs: path now includes model slug (out/data/{model}/{behavior})
so data from different models can't be silently reused
- data.py, kl_calibrate.py, tinymfv_airisk.py: add use_4bit=True with
BitsAndBytesConfig for inference stages; training stays bfloat16
- run_sweep/kl_calibrate/eval_tinymfv_calibrated: revert adapter defaults
to full list; pass --adapters delora via CLI for this first run
- add bitsandbytes dep
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
49 lines
1.3 KiB
TOML
49 lines
1.3 KiB
TOML
[project]
|
|
name = "weight-steering"
|
|
version = "0.1.0"
|
|
description = "Fork of Anthropic's weight-steering: replicate, test SVD/subspace alignment, sweep PEFT adapter families."
|
|
requires-python = ">=3.11"
|
|
dependencies = [
|
|
"torch>=2.4",
|
|
"transformers>=5.6.0",
|
|
"peft>=0.13", # DeLoRA, DoRA, init_lora_weights="pissa"
|
|
"datasets>=3.0",
|
|
"accelerate>=1.0",
|
|
"einops>=0.8",
|
|
"jaxtyping>=0.2",
|
|
"beartype>=0.19",
|
|
"loguru>=0.7",
|
|
"polars>=1.10",
|
|
"tabulate>=0.9",
|
|
"wandb>=0.18",
|
|
"tyro>=0.8",
|
|
"baukit @ git+https://github.com/davidbau/baukit.git",
|
|
"tiny-mfv @ git+https://github.com/wassname/tinymfv",
|
|
"flash-attn @ https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.9.4/flash_attn-2.6.3%2Bcu130torch2.11-cp311-cp311-linux_x86_64.whl ; python_version == '3.11' and sys_platform == 'linux'",
|
|
"bitsandbytes>=0.49.2",
|
|
]
|
|
|
|
[project.optional-dependencies]
|
|
dev = [
|
|
"pytest>=8",
|
|
"ipykernel>=6",
|
|
"ruff>=0.7",
|
|
]
|
|
|
|
[build-system]
|
|
requires = ["hatchling"]
|
|
build-backend = "hatchling.build"
|
|
|
|
[tool.hatch.metadata]
|
|
allow-direct-references = true
|
|
|
|
[tool.hatch.build.targets.wheel]
|
|
packages = ["src/ws"]
|
|
|
|
[tool.ruff]
|
|
line-length = 100
|
|
target-version = "py311"
|
|
|
|
[tool.uv]
|
|
exclude-newer = "5 days"
|