diff --git a/justfile b/justfile index 7c7bb67..3427043 100644 --- a/justfile +++ b/justfile @@ -430,3 +430,48 @@ paper-md: sed '/usepackage{nips15submit_e}/d' main.tex | \ pandoc -f latex -t gfm --citeproc --bibliography=refs.bib -o main.md && \ echo "-> docs/writeup/main.md" + +# ───────────────────────────────────────────────────────────────────────────── +# PAPER RUNS (on record). Each is queued via pueue with a why:/resolve: label. +# Long jobs (~8h/200steps on the 96GB box); fast preset, Qwen3-4B, mix=0.125 +# substrate unless noted. All emit out/runs/_/per_mode_deploy.json. +# ───────────────────────────────────────────────────────────────────────────── + +# H: route2 deploy-hack stays ~0 to convergence while vanilla saturates (not +# collapses). Long-run A4 figure. Stabilised optimizer: tiny KL beta=1e-5 (anchor +# coherence, too weak to undo the hack reward -- see RESEARCH_JOURNAL 2026-06-02 +# job-85 divergence) + normal Adam 0.9/0.99; lr unchanged (SVD adapter tolerates). +# ARM in {none, route2}. UAT: deploy hack/solve trajectory to 200, no lp_s collapse. +paper-longrun ARM SEED='41': + pueue add -w "$PWD" -o 0 -l "why: {{ ARM }}-200 KL-stabilised (beta=1e-5, Adam 0.9/0.99) long-run A4; resolve: route2 deploy hack~0 to 200 while vanilla saturates w/o collapse" -- \ + {{ TRAIN }} fast --intervention={{ ARM }} --seed={{ SEED }} \ + --beta=1e-5 --adam-beta1=0.9 --adam-beta2=0.99 \ + --steps=200 --eval-ablate-every=20 --out-tag=_{{ ARM }}200_kl5_s{{ SEED }} + +# H: route2 suppresses ENDOGENOUSLY-emerged hacks (no teacher mix at all -- pure +# on-policy). mix=0 keeps the pool only for the 4-mode partition + v_grad extraction. +# 800 steps ~= 100 reference-paper steps. ARM in {none, route2}. SLOW (~32h). +paper-noteacher ARM SEED='41' STEPS='800': + pueue add -w "$PWD" -o 0 -l "why: {{ ARM }} NO-TEACHER mix=0 pure on-policy {{ STEPS }}step; resolve: does route2 suppress endogenous hacks vs vanilla" -- \ + {{ TRAIN }} fast --intervention={{ ARM }} --seed={{ SEED }} \ + --mix-ratio=0 --steps={{ STEPS }} --eval-ablate-every=20 \ + --out-tag=_{{ ARM }}_noteacher_s{{ SEED }} + +# H: route2 holds suppression after the teacher crutch is removed. Teacher-seeds all +# 4 hacks for OFF steps, then cuts to pure on-policy. Smarter no-teacher test (pure +# mix=0 from step 0 may never emerge all modes). ARM in {none, route2}. +paper-teacheroff ARM SEED='41' OFF='40' STEPS='200': + pueue add -w "$PWD" -o 0 -l "why: {{ ARM }} teacher-off@{{ OFF }} curriculum (seed hacks then on-policy); resolve: route2 deploy hack stays ~0 after teacher cut at {{ OFF }}" -- \ + {{ TRAIN }} fast --intervention={{ ARM }} --seed={{ SEED }} \ + --teacher-off-step={{ OFF }} --steps={{ STEPS }} --eval-ablate-every=20 \ + --out-tag=_{{ ARM }}_toff{{ OFF }}_s{{ SEED }} + +# A5 step 1: short vanilla on the substrate to HARVEST real student hacks (with the +# new problem_id/env_mode/prompt logging) -> rollouts.jsonl. ~40 steps gives the +# 6+6 per-mode hacks/cleans needed to build the 2-mode held-out pair set. Then build +# pairs from 2 known modes, extract v_grad, run paper-heldout. UAT: rollouts.jsonl +# has >=6 exploited + >=6 clean(gt_pass,!exploited) for each of run_tests, file_marker. +paper-harvest SEED='41' STEPS='40': + pueue add -w "$PWD" -o 4 -l "why: A5 harvest real student hacks (logged problem_id/prompt) for 2-mode held-out pair set; resolve: >=6 hack+6 clean per known mode in rollouts.jsonl" -- \ + {{ TRAIN }} fast --intervention=none --seed={{ SEED }} \ + --steps={{ STEPS }} --out-tag=_harvest_s{{ SEED }}