From 373c257293baff76150547b9329bee38487ad504 Mon Sep 17 00:00:00 2001
From: wassname <github@wassname>
Date: Wed, 27 May 2026 22:26:04 +0000
Subject: [PATCH] log: caption + drop redundant cols (std, gt, hack, row
 prefix)

- Add a one-line caption that defines every column in the per-step table,
  printed once before the table starts. Blank line embedded as \n in the
  caption log entry so it doesn't print as its own log line.
- Rename cout to cout_cf in the vanilla header. In vanilla,
  project_delta_S_grad runs with measure_only=True so cout is the
  counterfactual (what cout would be if we projected). Resolves the
  before/after confusion in vanilla logs.
- Drop redundant columns from the per-step table:
  - std (sprd is the load-bearing binary)
  - gt (= gt_s + gt_t)
  - hack (= hack_s + hack_t)
  - the leading "row" prefix on each line
- Underlying agg_gt / agg_hack / rew_std are still used in the end-of-step
  summary line and tqdm postfix, so nothing is orphaned.
---
 src/projected_grpo/train.py | 54 +++++++++++++++++++++++++++----------
 1 file changed, 40 insertions(+), 14 deletions(-)

diff --git a/src/projected_grpo/train.py b/src/projected_grpo/train.py
index 916ec42..1fe49b4 100644
--- a/src/projected_grpo/train.py
+++ b/src/projected_grpo/train.py
@@ -563,21 +563,28 @@ def main(cfg: Config) -> int:
     # their headers; tab-separation was breaking when any single value happened
     # to be wider than 7 chars (e.g. a 4-digit "sec" or 5-char "ref_eq").
     _col_w = {
-        "step": 4, "ref_eq": 6, "rew": 6, "rew_s": 6, "std": 5, "sprd": 4, "N": 3,
-        "gt":   6, "gt_s":   6, "gt_t":  6, "hack":  6, "hack_s": 6, "hack_t": 6,
+        "step": 4, "ref_eq": 6, "rew": 6, "rew_s": 6, "sprd": 4, "N": 3,
+        "gt_s":   6, "gt_t":  6, "hack_s": 6, "hack_t": 6,
         "lp_s": 6, "lp_t":   6,
-        "loss": 8, "cin":    6, "cin_s": 6, "cin_t": 6, "cout":   6, "fired":  5,
+        "loss": 8, "cin":    6, "cin_s": 6, "cin_t": 6, "cout":   7, "fired":  5,
         "gen":  5, "fb":     4, "t_rew": 5, "sec":   4,
     }
-    _row_cols = ["step", "ref_eq", "rew", "rew_s", "std", "sprd", "N",
-                 "gt", "gt_s", "gt_t", "hack", "hack_s", "hack_t",
+    _row_cols = ["step", "ref_eq", "rew", "rew_s", "sprd", "N",
+                 "gt_s", "gt_t", "hack_s", "hack_t",
                  "lp_s", "lp_t",
                  "loss", "cin", "cin_s", "cin_t", "cout", "fired",
                  "gen", "fb", "t_rew", "sec"]
+    # In vanilla, project_delta_S_grad runs with measure_only=True: the
+    # projection math is computed but g_proj is not written back. So `cout`
+    # is the counterfactual (what cout would be if we projected). Relabel
+    # in the header to make that explicit; the row-data key stays `cout`.
+    _header_labels = {c: c for c in _row_cols}
+    if cfg.arm == "vanilla":
+        _header_labels["cout"] = "cout_cf"
     def _fmt_row(cells: dict) -> str:
         return "  ".join(f"{str(cells[c]):>{_col_w[c]}}" for c in _row_cols)
     def _fmt_header() -> str:
-        return "  ".join(f"{c:>{_col_w[c]}}" for c in _row_cols)
+        return "  ".join(f"{_header_labels[c]:>{_col_w[c]}}" for c in _row_cols)
     REF_GENS_PER_STEP = 16 * 16  # ariahw/rl-rewardhacking config.py:num_prompts * num_generations
     est_gens_per_step = cfg.prompts_per_step * cfg.group  # before mixed-pool split
     logger.info(
@@ -585,9 +592,31 @@ def main(cfg: Config) -> int:
         f"-> {est_gens_per_step / REF_GENS_PER_STEP:.2f}x per step; "
         f"this run's {steps} steps ~= {steps * est_gens_per_step / REF_GENS_PER_STEP:.1f} reference steps."
     )
-    logger.info("")
-    logger.info("row  " + _fmt_header())
-    logger.info("")
+    # Caption + blank line + header in one log entry so the blank line
+    # does not get its own timestamp/level prefix.
+    cout_def = (
+        "cout_cf=counterfactual cout (vanilla doesn't actually project; this is what cout would be if it did)"
+        if cfg.arm == "vanilla"
+        else "cout=subspace energy fraction in grad after projection"
+    )
+    caption = (
+        "table columns: "
+        "step=GRPO step; "
+        "ref_eq=vanilla-equivalent step (cum_gens / 256); "
+        "rew=mean combined reward; rew_s=student mean reward; "
+        "sprd=reward spread>0 (T/F; F means zero-variance bail fired and step was skipped); "
+        "N=rollouts; "
+        "gt_s/gt_t=ground-truth passes (student/teacher); "
+        "hack_s/hack_t=hack-flagged rollouts (student/teacher); "
+        "lp_s/lp_t=mean per-token student/teacher gen_logp under current student (diagnostic, no IS correction); "
+        "loss=mean GRPO loss; "
+        "cin=v_hack subspace energy fraction in grad before projection; "
+        "cin_s/cin_t=cin on student-only/teacher-only gradient; "
+        f"{cout_def}; "
+        "fired=fraction of modules where projection fired; "
+        "gen/fb/t_rew=generation/forward+backward/reward-grading wall-time (s); sec=total step wall-time (s)."
+    )
+    logger.info(caption + "\n\n" + _fmt_header())
 
     OUT_DIR.mkdir(exist_ok=True)
     tag = cfg.out_tag or f"_{cfg.preset.value}_{cfg.arm}_seed{cfg.seed}"
@@ -980,13 +1009,10 @@ def main(cfg: Config) -> int:
             "ref_eq": f"{cum_gens / REF_GENS_PER_STEP:.2f}",
             "rew": f"{rew_mean:+.2f}",
             "rew_s": f"{rew_s_mean:+.2f}" if n_s else "nan",
-            "std": f"{rew_std:.2f}",
             "sprd": "T" if spread else "F",
             "N": n_rollouts,
-            "gt": f"{sum(agg_gt)}/{n_rollouts}",
             "gt_s": f"{gt_s_n}/{n_s}" if n_s else "0/0",
             "gt_t": f"{gt_t_n}/{n_t}" if n_t else "0/0",
-            "hack": f"{sum(agg_hack)}/{n_rollouts}",
             "hack_s": f"{hack_s_n}/{n_s}" if n_s else "0/0",
             "hack_t": f"{hack_t_n}/{n_t}" if n_t else "0/0",
             "lp_s": f"{lp_s_mean:+.2f}" if n_s else "nan",
@@ -1003,8 +1029,8 @@ def main(cfg: Config) -> int:
             "sec": f"{time.time()-t0:.0f}",
         }
         rows.append(row)
-        # Stream this step as TSV row (header was printed before the loop).
-        logger.info("row  " + _fmt_row(row))
+        # Stream this step as a row (header was printed before the loop).
+        logger.info(_fmt_row(row))
         if (step + 1) % 25 == 0:
             save_ckpt(rows)  # survive early kills; ~12 days for the full sweep
         if not first_hack_saved and hack_s_n > 0: