diff --git a/src/vgrout/tablelog.py b/src/vgrout/tablelog.py index b51d79d..46ee0ac 100644 --- a/src/vgrout/tablelog.py +++ b/src/vgrout/tablelog.py @@ -142,17 +142,17 @@ class StepLogger: cols += [ _Col("tau", 6, "tau", "+.2f", "median live cos(g_b, v_grad); should sit inside the band [lower, upper]"), _Col("hkgap", 6, "hkgap", "+.2f", "band width upper-lower (mean hack-pair minus clean-pair cosine); >0 = v_grad separates (else direction dead/random)"), - _Col("frout", 6, "frout", "+.2f", "mean routed fraction f over rollouts (the routed-mass gauge; compare real-vs-random at matched frout)"), - _Col("resid", 6, "resid", "+.2f", "cos(deployed delta_S.grad AFTER routing, v_grad); ~0 = hack stripped cleanly, >0 = leak into deployed knob"), + _Col("frout", 6, "frout", "+.2f", "mean routed fraction f over rollouts (the gate decision; hold fixed for matched real-vs-random comparison)"), + _Col("leak", 6, "leak", "+.2f", "leakage (SGTM): cos(deployed delta_S.grad after routing, v_grad); ~0 = hack stripped, >0 = under-pinned, hack survives in deployed knob"), ] if arm in ("routing", "routingV"): cols += [ # Deploy eval (knob-OFF) is hk_dep below. The train-vs-deploy 2x2's # knob-ON pass runs once post-loop (FINAL EVAL), not per-step; the # per-step train series is hk_s. See journal 2026-06-04 (a). - _Col("q_egy", 6, "qE", ".2f", "grad energy into quarantine ||g_quar||/(||g_keep||+||g_quar||); ~0.5+ rising = learning dumped into the thrown-away knob"), - _Col("hack_abl", 6, "hk_abl", "frac", "FREE per-step deploy proxy: hack rate on the ablated (deploy-mode) rollout slice; train prompts, noisier than hk_dep"), - _Col("solve_abl", 6, "slv_abl", "frac", "free per-step deploy proxy: solve rate on the ablated rollout slice"), + _Col("absorb", 6, "absorb", ".2f", "absorption: grad energy pinned into quarantine ||g_quar||/(||g_keep||+||g_quar||); too low = hack not pinned, ->1 with slv_dep falling = solve also pinned (over-pinned)"), + _Col("hack_abl", 6, "hk_abl", "frac", "per-step deploy proxy: hack rate on the ablated (deploy-mode) rollout slice; train prompts, noisier than hk_dep"), + _Col("solve_abl", 6, "slv_abl", "frac", "per-step deploy proxy: solve rate on the ablated (deploy-mode) rollout slice; train prompts"), ] self._cols = cols diff --git a/src/vgrout/train.py b/src/vgrout/train.py index 225735f..4e0bf76 100644 --- a/src/vgrout/train.py +++ b/src/vgrout/train.py @@ -165,7 +165,7 @@ class Config: # and the efficient budget allocation is many prompts x 1 sample, not few prompts x many. eval_n_prompts: int = 32 # periodic VAL curve: 32 held-out prompts (SE~0.09 at p=.5). # n=64 was too slow: representative (hard) problems make the model ramble to max_new, so - # each eval is ~25min at n=64 -> unaffordable across arms. 32 + the FREE per-step hk_abl/ + # each eval is ~25min at n=64 -> unaffordable across arms. 32 + the no-extra-cost per-step hk_abl/ # slv_abl proxy (dense, train rollouts) is the working budget; final TEST eval is full n=119. # The VAL slice is a seeded-random sample of the holdout file (shuffle=True, # fixed EVAL_SAMPLE_SEED so all arms/seeds share the SAME problems -> paired). Random, not @@ -1369,9 +1369,9 @@ def main(cfg: Config) -> int: # Clip over both knobs. For none/erase, δS_hack.grad is None so it's # ignored (identical norm to before). For route it bounds the combined # update (main + quarantine). - # Grad-energy split: qE = ‖g_quar‖/(‖g_keep‖+‖g_quar‖) ∈ [0,1], the share - # of the update routed into the quarantine (δS_hack, deleted at deploy). - # Rising qE => routing dumps learning into the thrown-away knob and the + # Absorption (logged as `absorb`): ‖g_quar‖/(‖g_keep‖+‖g_quar‖) ∈ [0,1], the + # share of the update routed into the quarantine (δS_hack, deleted at deploy). + # Rising => routing dumps learning into the thrown-away knob and the # deployed model learns nothing. ~0 idle; ~0.5+ climbing = quarantine # eating the update. def _grad_l2(params): @@ -1602,7 +1602,7 @@ def main(cfg: Config) -> int: hack_s_B = 0 gt_s_n = int((g_t & is_s).sum()) gt_t_n = int((g_t & ~is_s).sum()) - # FREE per-step DEPLOY proxy: the rollout_ablate_frac slice was generated + # per-step deploy proxy (no extra generation cost): the rollout_ablate_frac slice was generated # with the quarantine ablated == the deployed model, so its hack/solve rate # is what we'd ship, measured every step at zero extra generation cost. # Caveat vs hk_dep/slv_dep: this is on the TRAINING prompts (hints present) @@ -1674,11 +1674,11 @@ def main(cfg: Config) -> int: "lp_t": lp_t_mean if n_t else None, "loss": agg_loss, "gn": gn, - "q_egy": q_egy, + "absorb": q_egy, "tau": (sum(step_tau) / len(step_tau)) if step_tau else float("nan"), "hkgap": (sum(step_hkgap) / len(step_hkgap)) if step_hkgap else float("nan"), "frout": (sum(step_flagged) / len(step_flagged)) if step_flagged else float("nan"), - "resid": (sum(step_resid) / len(step_resid)) if step_resid else float("nan"), + "leak": (sum(step_resid) / len(step_resid)) if step_resid else float("nan"), "lr": sched.get_last_lr()[0], "cos_pre": diag["mean_cos_pre"], "cos_pre_s": diag["mean_cos_pre_s"],