From 373c257293baff76150547b9329bee38487ad504 Mon Sep 17 00:00:00 2001 From: wassname Date: Wed, 27 May 2026 22:26:04 +0000 Subject: [PATCH] log: caption + drop redundant cols (std, gt, hack, row prefix) - Add a one-line caption that defines every column in the per-step table, printed once before the table starts. Blank line embedded as \n in the caption log entry so it doesn't print as its own log line. - Rename cout to cout_cf in the vanilla header. In vanilla, project_delta_S_grad runs with measure_only=True so cout is the counterfactual (what cout would be if we projected). Resolves the before/after confusion in vanilla logs. - Drop redundant columns from the per-step table: - std (sprd is the load-bearing binary) - gt (= gt_s + gt_t) - hack (= hack_s + hack_t) - the leading "row" prefix on each line - Underlying agg_gt / agg_hack / rew_std are still used in the end-of-step summary line and tqdm postfix, so nothing is orphaned. --- src/projected_grpo/train.py | 54 +++++++++++++++++++++++++++---------- 1 file changed, 40 insertions(+), 14 deletions(-) diff --git a/src/projected_grpo/train.py b/src/projected_grpo/train.py index 916ec42..1fe49b4 100644 --- a/src/projected_grpo/train.py +++ b/src/projected_grpo/train.py @@ -563,21 +563,28 @@ def main(cfg: Config) -> int: # their headers; tab-separation was breaking when any single value happened # to be wider than 7 chars (e.g. a 4-digit "sec" or 5-char "ref_eq"). _col_w = { - "step": 4, "ref_eq": 6, "rew": 6, "rew_s": 6, "std": 5, "sprd": 4, "N": 3, - "gt": 6, "gt_s": 6, "gt_t": 6, "hack": 6, "hack_s": 6, "hack_t": 6, + "step": 4, "ref_eq": 6, "rew": 6, "rew_s": 6, "sprd": 4, "N": 3, + "gt_s": 6, "gt_t": 6, "hack_s": 6, "hack_t": 6, "lp_s": 6, "lp_t": 6, - "loss": 8, "cin": 6, "cin_s": 6, "cin_t": 6, "cout": 6, "fired": 5, + "loss": 8, "cin": 6, "cin_s": 6, "cin_t": 6, "cout": 7, "fired": 5, "gen": 5, "fb": 4, "t_rew": 5, "sec": 4, } - _row_cols = ["step", "ref_eq", "rew", "rew_s", "std", "sprd", "N", - "gt", "gt_s", "gt_t", "hack", "hack_s", "hack_t", + _row_cols = ["step", "ref_eq", "rew", "rew_s", "sprd", "N", + "gt_s", "gt_t", "hack_s", "hack_t", "lp_s", "lp_t", "loss", "cin", "cin_s", "cin_t", "cout", "fired", "gen", "fb", "t_rew", "sec"] + # In vanilla, project_delta_S_grad runs with measure_only=True: the + # projection math is computed but g_proj is not written back. So `cout` + # is the counterfactual (what cout would be if we projected). Relabel + # in the header to make that explicit; the row-data key stays `cout`. + _header_labels = {c: c for c in _row_cols} + if cfg.arm == "vanilla": + _header_labels["cout"] = "cout_cf" def _fmt_row(cells: dict) -> str: return " ".join(f"{str(cells[c]):>{_col_w[c]}}" for c in _row_cols) def _fmt_header() -> str: - return " ".join(f"{c:>{_col_w[c]}}" for c in _row_cols) + return " ".join(f"{_header_labels[c]:>{_col_w[c]}}" for c in _row_cols) REF_GENS_PER_STEP = 16 * 16 # ariahw/rl-rewardhacking config.py:num_prompts * num_generations est_gens_per_step = cfg.prompts_per_step * cfg.group # before mixed-pool split logger.info( @@ -585,9 +592,31 @@ def main(cfg: Config) -> int: f"-> {est_gens_per_step / REF_GENS_PER_STEP:.2f}x per step; " f"this run's {steps} steps ~= {steps * est_gens_per_step / REF_GENS_PER_STEP:.1f} reference steps." ) - logger.info("") - logger.info("row " + _fmt_header()) - logger.info("") + # Caption + blank line + header in one log entry so the blank line + # does not get its own timestamp/level prefix. + cout_def = ( + "cout_cf=counterfactual cout (vanilla doesn't actually project; this is what cout would be if it did)" + if cfg.arm == "vanilla" + else "cout=subspace energy fraction in grad after projection" + ) + caption = ( + "table columns: " + "step=GRPO step; " + "ref_eq=vanilla-equivalent step (cum_gens / 256); " + "rew=mean combined reward; rew_s=student mean reward; " + "sprd=reward spread>0 (T/F; F means zero-variance bail fired and step was skipped); " + "N=rollouts; " + "gt_s/gt_t=ground-truth passes (student/teacher); " + "hack_s/hack_t=hack-flagged rollouts (student/teacher); " + "lp_s/lp_t=mean per-token student/teacher gen_logp under current student (diagnostic, no IS correction); " + "loss=mean GRPO loss; " + "cin=v_hack subspace energy fraction in grad before projection; " + "cin_s/cin_t=cin on student-only/teacher-only gradient; " + f"{cout_def}; " + "fired=fraction of modules where projection fired; " + "gen/fb/t_rew=generation/forward+backward/reward-grading wall-time (s); sec=total step wall-time (s)." + ) + logger.info(caption + "\n\n" + _fmt_header()) OUT_DIR.mkdir(exist_ok=True) tag = cfg.out_tag or f"_{cfg.preset.value}_{cfg.arm}_seed{cfg.seed}" @@ -980,13 +1009,10 @@ def main(cfg: Config) -> int: "ref_eq": f"{cum_gens / REF_GENS_PER_STEP:.2f}", "rew": f"{rew_mean:+.2f}", "rew_s": f"{rew_s_mean:+.2f}" if n_s else "nan", - "std": f"{rew_std:.2f}", "sprd": "T" if spread else "F", "N": n_rollouts, - "gt": f"{sum(agg_gt)}/{n_rollouts}", "gt_s": f"{gt_s_n}/{n_s}" if n_s else "0/0", "gt_t": f"{gt_t_n}/{n_t}" if n_t else "0/0", - "hack": f"{sum(agg_hack)}/{n_rollouts}", "hack_s": f"{hack_s_n}/{n_s}" if n_s else "0/0", "hack_t": f"{hack_t_n}/{n_t}" if n_t else "0/0", "lp_s": f"{lp_s_mean:+.2f}" if n_s else "nan", @@ -1003,8 +1029,8 @@ def main(cfg: Config) -> int: "sec": f"{time.time()-t0:.0f}", } rows.append(row) - # Stream this step as TSV row (header was printed before the loop). - logger.info("row " + _fmt_row(row)) + # Stream this step as a row (header was printed before the loop). + logger.info(_fmt_row(row)) if (step + 1) % 25 == 0: save_ckpt(rows) # survive early kills; ~12 days for the full sweep if not first_hack_saved and hack_s_n > 0: