diff --git a/AGENTS.md b/AGENTS.md index 5d130b6..5c39da6 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -46,7 +46,14 @@ Inherit global rules from `~/.claude/CLAUDE.md`. - `just smoke` before any real run (~1-2 min, beartype on, real pipeline on tiny inputs). - Real runs go through `pueue` on the 96GB GPU box. Label each job with `why:` and `resolve:`. - Head [docs/RESEARCH_JOURNAL.md](docs/RESEARCH_JOURNAL.md) for latest results. -- No `tests/` dir; `smoke` is the correctness gate. +- No `tests/` dir; `smoke` is the correctness gate. Unit checks are `scripts/verify_*.py` + (assert + exit nonzero), wired into the `smoke` recipe so every smoke runs them. +- Every load-bearing invariant gets a `verify_*.py` gate. The no-cheat leak (held-out + modes received <=1.1% detector labels via the route2 gate, 2026-06-05) shipped because + nothing asserted it: the existing gates were green but none covered the property. If a + claim is load-bearing, write the assertion test in the same commit -- "the tests passed" + means nothing if the property was never tested. This one was found by hand-counting, not + by a gate; now `scripts/verify_gate_anchor.py` covers it. On persona pairs - ./docs/personas/how_to_rewrite_pairs.md diff --git a/src/projected_grpo/train.py b/src/projected_grpo/train.py index 87116dd..3ead652 100644 --- a/src/projected_grpo/train.py +++ b/src/projected_grpo/train.py @@ -192,6 +192,10 @@ class Config: # sparser cadence (10/20) explicitly. See journal 2026-06-04 (a) for the cost audit. eval_ablate_every: int = 5 eval_n_prompts: int = 8 + # Save the deploy adapter (δS only, ~2.3MB) at every deploy-eval step, tagged by + # step, so a run can be RE-SCORED later (more prompts, different eval) without + # retraining. Tiny per ckpt; a 200-step run at every-10 is ~46MB. Off for big sweeps. + save_eval_ckpts: bool = True # Optional: pool-derived pairs JSON (built by pairs_from_pool.py). When set, # BOTH the cache-miss extract AND the online refresh use these pairs instead # of the hand-crafted projected_grpo.pairs.PAIRS. Required for the cross- @@ -1621,6 +1625,10 @@ def main(cfg: Config) -> int: raise RuntimeError(f"training diverged (ppl_t={ppl_t:.0e} at step {step})") if (step + 1) % 25 == 0: save_ckpt(rows) # survive early kills; ~12 days for the full sweep + # Per-eval deploy-adapter snapshot: re-scoreable later without retraining. + if cfg.save_eval_ckpts and cfg.eval_ablate_every > 0 \ + and (step % cfg.eval_ablate_every == 0 or step == steps - 1): + save_ckpt(rows, path=run_dir / f"ckpt_step{step:04d}.safetensors") if not first_hack_saved and hack_s_n > 0: save_ckpt(rows, path=first_hack_path) first_hack_saved = True