Files
steer-heal-love/justfile
T
wassname 4b8860d7cb setup-repo gap-fill: results ledger + docs structure
Add the by-question results infra per setup-repo conventions:
- results.tsv append at end of each finished run (config + final metrics + argv)
- scripts/results.py groups by arm (reg) into a markdown table; `just results`
- docs/results.md curated by-question snapshot (U2 regulariser comparison)
- docs/{spec,brainstorming,literature,evidence} structure

Co-Authored-By: Claudypoo <288921227+claudypoo@users.noreply.github.com>
2026-06-04 09:51:36 +08:00

52 lines
1.5 KiB
Makefile

set shell := ["bash", "-cu"]
BASE := "uv run python -m steer_heal.run"
SEEDS_3 := "41 42 43"
# List available recipes
default:
@just --list
# Clone the vendored reference repos (editable path deps live here).
vendor:
#!/usr/bin/env bash
set -eux
mkdir -p docs/vendor && cd docs/vendor
for r in steering-lite isokl_steering_calibration tinymfv w2schar-mini; do
[ -d "$r" ] || git clone --depth 1 "https://github.com/wassname/$r"
done
# fast-dev-run: ONE end-to-end run of the real pipeline on the tiny-random model.
# Real LLM, real eval, real I/O; only knob is scale. NOT a unit test.
fast-dev-run *ARGS:
BEARTYPE=1 {{ BASE }} --fast-dev-run {{ ARGS }}
# Real run on gemma-3-1b-it (24GB / RTX 3090). Set flash-attn first if installed.
run *ARGS:
STEER_ATTN_IMPL=eager {{ BASE }} {{ ARGS }}
# Queue sweeps (comment out completed; `just results` to check).
queue:
#!/usr/bin/env bash
set -x
just sweep-reg
# H: kl_rev heals best (mode-seeking suppresses low-base-prob = incoherent tokens).
sweep-reg:
#!/usr/bin/env bash
set -x
export WANDB_RUN_GROUP="sweep-reg-$(date +%Y%m%d-%H%M)"
for seed in {{ SEEDS_3 }}; do
for reg in nll kl_fwd kl_rev wd; do
echo "=== reg=$reg seed=$seed ==="
{{ BASE }} --reg=$reg --seed=$seed
done
done
# Aggregate results.tsv into a by-arm markdown table.
results:
uv run python scripts/results.py
# flash-attn: install a prebuilt wheel (see `flash-attn-prebuilt` skill), then
# run with STEER_ATTN_IMPL=flash_attention_2.