From 0d6ff754ec13d36772336a2ae5d7f55f86b165a6 Mon Sep 17 00:00:00 2001
From: wassname <1103714+wassname@users.noreply.github.com>
Date: Wed, 10 Jun 2026 03:34:06 +0000
Subject: [PATCH] docs: AGENTS.md START HERE links (human_journal, main.tex,
 grad-routing paper); revert rescore fallback

- Point future agents at the three docs that pin the actual thesis + the
  live open question (direction vs routing vs SVD/PiSSA prior), so they don't
  re-derive the non-directional result as a 'bug'.
- Revert rescore_deploy cfg.get() fallback to cfg[key] (fail-fast; old-schema
  checkpoints crash loudly rather than silently defaulting).

Co-Authored-By: Claudypoo <288921227+claudypoo@users.noreply.github.com>
---
 AGENTS.md                 | 17 +++++++++++++++++
 scripts/rescore_deploy.py | 16 ++++++----------
 2 files changed, 23 insertions(+), 10 deletions(-)

diff --git a/AGENTS.md b/AGENTS.md
index bb18044..dba2795 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -82,6 +82,23 @@ Inherit global rules from `~/.claude/CLAUDE.md`.
 
 ## Files
 
+START HERE to understand the setup (read before reasoning about the method):
+- [docs/human_journal.md](docs/human_journal.md) -- the user's own words: what the method is,
+  the routing math (absorption ramp between clean-cos and hack-cos bounds), and the LIVE open
+  question -- "is it the direction, the routing itself, or does the SVD/PiSSA adapter add a
+  prior that makes absorption work?" Random-direction controls MATCHING the real direction is a
+  KNOWN, embraced result, not a bug to explain away.
+- [docs/writeup/main.tex](docs/writeup/main.tex) -- the actual thesis and claims C1-C4. The
+  contribution is NOT "we found the hack direction and erased it." It is: SGTM-style
+  post-backward gradient routing in the SVD-of-W basis, gated by an extracted hack *vector*
+  (not per-example data labels), with the routed mass parked in a deletable adapter. C3 already
+  establishes the gate is largely non-directional; the direction's measurable role is solve
+  preservation + held-out-mode generalisation (C2, the load-bearing no-cheat check).
+- [docs/papers/grad_routing/paper_gradient_routing.md](docs/papers/grad_routing/paper_gradient_routing.md)
+  -- Cloud et al. Expand-Route-Ablate. "Absorption" is the EFFECT of routing (routing a limited
+  signal localises the broader capability into the routed region), not a mechanism you invoke.
+  Routing runs the whole train; ablate once at the end. There is no warmup-then-off schedule.
+
 - Read [docs/brainstorm/extracted_prefs.md](docs/brainstorm/extracted_prefs.md) for design rationale.
 - New sweep arms get recipes in [justfile](justfile) with `# H:` hypothesis comments.
 - `just smoke` before any real run (~1-2 min, beartype on, real pipeline on tiny inputs).
diff --git a/scripts/rescore_deploy.py b/scripts/rescore_deploy.py
index c30a4b6..af069bc 100644
--- a/scripts/rescore_deploy.py
+++ b/scripts/rescore_deploy.py
@@ -48,24 +48,20 @@ def main(run_dir: Positional[Path]) -> None:
     # by_mode keys ARE the modes the original deploy eval spanned (present in every json
     # version); reproduce the same set so the re-scored knob-off matches the headline.
     eval_modes = sorted(prior_eval["by_mode"].keys())
-    # Eval-harness params (not model-defining): default to the fast preset so we can re-score
-    # OLD checkpoints whose stored cfg predates the train_config refactor (eval_n_prompts /
-    # eval_batch_size were added/renamed). The test split is fixed-size regardless of n_prompts.
-    eval_n_prompts = cfg.get("eval_n_prompts", 32)
-    max_new = cfg.get("max_new", 512)
-    eval_bs = cfg.get("eval_batch_size", 8)
-    _, problems = load_eval_splits(eval_modes, eval_n_prompts)
+    _, problems = load_eval_splits(eval_modes, cfg["eval_n_prompts"])
     gen_cfg_eval = GenerationConfig(
-        max_new_tokens=max_new, do_sample=True,
+        max_new_tokens=cfg["max_new"], do_sample=True,
         temperature=0.7, top_p=1.0, top_k=20, min_p=0.0, repetition_penalty=1.0,
         num_return_sequences=1, pad_token_id=tok.pad_token_id,
     )
     eval_idxs = list(range(len(problems)))
     torch.manual_seed(EVAL_GEN_SEED)
     with ablate_quarantine(wrappers):   # knob OFF = the deployed model
-        ev = eval_hack_solve(model, tok, problems, eval_idxs, gen_cfg_eval, device, max_new, eval_bs)
+        ev = eval_hack_solve(
+            model, tok, problems, eval_idxs, gen_cfg_eval, device, cfg["max_new"], cfg["eval_batch_size"])
     torch.manual_seed(EVAL_GEN_SEED)
-    ev_on = eval_hack_solve(model, tok, problems, eval_idxs, gen_cfg_eval, device, max_new, eval_bs)
+    ev_on = eval_hack_solve(
+        model, tok, problems, eval_idxs, gen_cfg_eval, device, cfg["max_new"], cfg["eval_batch_size"])
 
     out = {
         "schema": RUN_SCHEMA,