From 0d6ff754ec13d36772336a2ae5d7f55f86b165a6 Mon Sep 17 00:00:00 2001 From: wassname <1103714+wassname@users.noreply.github.com> Date: Wed, 10 Jun 2026 03:34:06 +0000 Subject: [PATCH] docs: AGENTS.md START HERE links (human_journal, main.tex, grad-routing paper); revert rescore fallback - Point future agents at the three docs that pin the actual thesis + the live open question (direction vs routing vs SVD/PiSSA prior), so they don't re-derive the non-directional result as a 'bug'. - Revert rescore_deploy cfg.get() fallback to cfg[key] (fail-fast; old-schema checkpoints crash loudly rather than silently defaulting). Co-Authored-By: Claudypoo <288921227+claudypoo@users.noreply.github.com> --- AGENTS.md | 17 +++++++++++++++++ scripts/rescore_deploy.py | 16 ++++++---------- 2 files changed, 23 insertions(+), 10 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index bb18044..dba2795 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -82,6 +82,23 @@ Inherit global rules from `~/.claude/CLAUDE.md`. ## Files +START HERE to understand the setup (read before reasoning about the method): +- [docs/human_journal.md](docs/human_journal.md) -- the user's own words: what the method is, + the routing math (absorption ramp between clean-cos and hack-cos bounds), and the LIVE open + question -- "is it the direction, the routing itself, or does the SVD/PiSSA adapter add a + prior that makes absorption work?" Random-direction controls MATCHING the real direction is a + KNOWN, embraced result, not a bug to explain away. +- [docs/writeup/main.tex](docs/writeup/main.tex) -- the actual thesis and claims C1-C4. The + contribution is NOT "we found the hack direction and erased it." It is: SGTM-style + post-backward gradient routing in the SVD-of-W basis, gated by an extracted hack *vector* + (not per-example data labels), with the routed mass parked in a deletable adapter. C3 already + establishes the gate is largely non-directional; the direction's measurable role is solve + preservation + held-out-mode generalisation (C2, the load-bearing no-cheat check). +- [docs/papers/grad_routing/paper_gradient_routing.md](docs/papers/grad_routing/paper_gradient_routing.md) + -- Cloud et al. Expand-Route-Ablate. "Absorption" is the EFFECT of routing (routing a limited + signal localises the broader capability into the routed region), not a mechanism you invoke. + Routing runs the whole train; ablate once at the end. There is no warmup-then-off schedule. + - Read [docs/brainstorm/extracted_prefs.md](docs/brainstorm/extracted_prefs.md) for design rationale. - New sweep arms get recipes in [justfile](justfile) with `# H:` hypothesis comments. - `just smoke` before any real run (~1-2 min, beartype on, real pipeline on tiny inputs). diff --git a/scripts/rescore_deploy.py b/scripts/rescore_deploy.py index c30a4b6..af069bc 100644 --- a/scripts/rescore_deploy.py +++ b/scripts/rescore_deploy.py @@ -48,24 +48,20 @@ def main(run_dir: Positional[Path]) -> None: # by_mode keys ARE the modes the original deploy eval spanned (present in every json # version); reproduce the same set so the re-scored knob-off matches the headline. eval_modes = sorted(prior_eval["by_mode"].keys()) - # Eval-harness params (not model-defining): default to the fast preset so we can re-score - # OLD checkpoints whose stored cfg predates the train_config refactor (eval_n_prompts / - # eval_batch_size were added/renamed). The test split is fixed-size regardless of n_prompts. - eval_n_prompts = cfg.get("eval_n_prompts", 32) - max_new = cfg.get("max_new", 512) - eval_bs = cfg.get("eval_batch_size", 8) - _, problems = load_eval_splits(eval_modes, eval_n_prompts) + _, problems = load_eval_splits(eval_modes, cfg["eval_n_prompts"]) gen_cfg_eval = GenerationConfig( - max_new_tokens=max_new, do_sample=True, + max_new_tokens=cfg["max_new"], do_sample=True, temperature=0.7, top_p=1.0, top_k=20, min_p=0.0, repetition_penalty=1.0, num_return_sequences=1, pad_token_id=tok.pad_token_id, ) eval_idxs = list(range(len(problems))) torch.manual_seed(EVAL_GEN_SEED) with ablate_quarantine(wrappers): # knob OFF = the deployed model - ev = eval_hack_solve(model, tok, problems, eval_idxs, gen_cfg_eval, device, max_new, eval_bs) + ev = eval_hack_solve( + model, tok, problems, eval_idxs, gen_cfg_eval, device, cfg["max_new"], cfg["eval_batch_size"]) torch.manual_seed(EVAL_GEN_SEED) - ev_on = eval_hack_solve(model, tok, problems, eval_idxs, gen_cfg_eval, device, max_new, eval_bs) + ev_on = eval_hack_solve( + model, tok, problems, eval_idxs, gen_cfg_eval, device, cfg["max_new"], cfg["eval_batch_size"]) out = { "schema": RUN_SCHEMA,