mirror of
https://github.com/wassname/evil_MoE.git
synced 2026-06-27 18:59:35 +08:00
log: caption + drop redundant cols (std, gt, hack, row prefix)
- Add a one-line caption that defines every column in the per-step table, printed once before the table starts. Blank line embedded as \n in the caption log entry so it doesn't print as its own log line. - Rename cout to cout_cf in the vanilla header. In vanilla, project_delta_S_grad runs with measure_only=True so cout is the counterfactual (what cout would be if we projected). Resolves the before/after confusion in vanilla logs. - Drop redundant columns from the per-step table: - std (sprd is the load-bearing binary) - gt (= gt_s + gt_t) - hack (= hack_s + hack_t) - the leading "row" prefix on each line - Underlying agg_gt / agg_hack / rew_std are still used in the end-of-step summary line and tqdm postfix, so nothing is orphaned.
This commit is contained in:
+40
-14
@@ -563,21 +563,28 @@ def main(cfg: Config) -> int:
|
||||
# their headers; tab-separation was breaking when any single value happened
|
||||
# to be wider than 7 chars (e.g. a 4-digit "sec" or 5-char "ref_eq").
|
||||
_col_w = {
|
||||
"step": 4, "ref_eq": 6, "rew": 6, "rew_s": 6, "std": 5, "sprd": 4, "N": 3,
|
||||
"gt": 6, "gt_s": 6, "gt_t": 6, "hack": 6, "hack_s": 6, "hack_t": 6,
|
||||
"step": 4, "ref_eq": 6, "rew": 6, "rew_s": 6, "sprd": 4, "N": 3,
|
||||
"gt_s": 6, "gt_t": 6, "hack_s": 6, "hack_t": 6,
|
||||
"lp_s": 6, "lp_t": 6,
|
||||
"loss": 8, "cin": 6, "cin_s": 6, "cin_t": 6, "cout": 6, "fired": 5,
|
||||
"loss": 8, "cin": 6, "cin_s": 6, "cin_t": 6, "cout": 7, "fired": 5,
|
||||
"gen": 5, "fb": 4, "t_rew": 5, "sec": 4,
|
||||
}
|
||||
_row_cols = ["step", "ref_eq", "rew", "rew_s", "std", "sprd", "N",
|
||||
"gt", "gt_s", "gt_t", "hack", "hack_s", "hack_t",
|
||||
_row_cols = ["step", "ref_eq", "rew", "rew_s", "sprd", "N",
|
||||
"gt_s", "gt_t", "hack_s", "hack_t",
|
||||
"lp_s", "lp_t",
|
||||
"loss", "cin", "cin_s", "cin_t", "cout", "fired",
|
||||
"gen", "fb", "t_rew", "sec"]
|
||||
# In vanilla, project_delta_S_grad runs with measure_only=True: the
|
||||
# projection math is computed but g_proj is not written back. So `cout`
|
||||
# is the counterfactual (what cout would be if we projected). Relabel
|
||||
# in the header to make that explicit; the row-data key stays `cout`.
|
||||
_header_labels = {c: c for c in _row_cols}
|
||||
if cfg.arm == "vanilla":
|
||||
_header_labels["cout"] = "cout_cf"
|
||||
def _fmt_row(cells: dict) -> str:
|
||||
return " ".join(f"{str(cells[c]):>{_col_w[c]}}" for c in _row_cols)
|
||||
def _fmt_header() -> str:
|
||||
return " ".join(f"{c:>{_col_w[c]}}" for c in _row_cols)
|
||||
return " ".join(f"{_header_labels[c]:>{_col_w[c]}}" for c in _row_cols)
|
||||
REF_GENS_PER_STEP = 16 * 16 # ariahw/rl-rewardhacking config.py:num_prompts * num_generations
|
||||
est_gens_per_step = cfg.prompts_per_step * cfg.group # before mixed-pool split
|
||||
logger.info(
|
||||
@@ -585,9 +592,31 @@ def main(cfg: Config) -> int:
|
||||
f"-> {est_gens_per_step / REF_GENS_PER_STEP:.2f}x per step; "
|
||||
f"this run's {steps} steps ~= {steps * est_gens_per_step / REF_GENS_PER_STEP:.1f} reference steps."
|
||||
)
|
||||
logger.info("")
|
||||
logger.info("row " + _fmt_header())
|
||||
logger.info("")
|
||||
# Caption + blank line + header in one log entry so the blank line
|
||||
# does not get its own timestamp/level prefix.
|
||||
cout_def = (
|
||||
"cout_cf=counterfactual cout (vanilla doesn't actually project; this is what cout would be if it did)"
|
||||
if cfg.arm == "vanilla"
|
||||
else "cout=subspace energy fraction in grad after projection"
|
||||
)
|
||||
caption = (
|
||||
"table columns: "
|
||||
"step=GRPO step; "
|
||||
"ref_eq=vanilla-equivalent step (cum_gens / 256); "
|
||||
"rew=mean combined reward; rew_s=student mean reward; "
|
||||
"sprd=reward spread>0 (T/F; F means zero-variance bail fired and step was skipped); "
|
||||
"N=rollouts; "
|
||||
"gt_s/gt_t=ground-truth passes (student/teacher); "
|
||||
"hack_s/hack_t=hack-flagged rollouts (student/teacher); "
|
||||
"lp_s/lp_t=mean per-token student/teacher gen_logp under current student (diagnostic, no IS correction); "
|
||||
"loss=mean GRPO loss; "
|
||||
"cin=v_hack subspace energy fraction in grad before projection; "
|
||||
"cin_s/cin_t=cin on student-only/teacher-only gradient; "
|
||||
f"{cout_def}; "
|
||||
"fired=fraction of modules where projection fired; "
|
||||
"gen/fb/t_rew=generation/forward+backward/reward-grading wall-time (s); sec=total step wall-time (s)."
|
||||
)
|
||||
logger.info(caption + "\n\n" + _fmt_header())
|
||||
|
||||
OUT_DIR.mkdir(exist_ok=True)
|
||||
tag = cfg.out_tag or f"_{cfg.preset.value}_{cfg.arm}_seed{cfg.seed}"
|
||||
@@ -980,13 +1009,10 @@ def main(cfg: Config) -> int:
|
||||
"ref_eq": f"{cum_gens / REF_GENS_PER_STEP:.2f}",
|
||||
"rew": f"{rew_mean:+.2f}",
|
||||
"rew_s": f"{rew_s_mean:+.2f}" if n_s else "nan",
|
||||
"std": f"{rew_std:.2f}",
|
||||
"sprd": "T" if spread else "F",
|
||||
"N": n_rollouts,
|
||||
"gt": f"{sum(agg_gt)}/{n_rollouts}",
|
||||
"gt_s": f"{gt_s_n}/{n_s}" if n_s else "0/0",
|
||||
"gt_t": f"{gt_t_n}/{n_t}" if n_t else "0/0",
|
||||
"hack": f"{sum(agg_hack)}/{n_rollouts}",
|
||||
"hack_s": f"{hack_s_n}/{n_s}" if n_s else "0/0",
|
||||
"hack_t": f"{hack_t_n}/{n_t}" if n_t else "0/0",
|
||||
"lp_s": f"{lp_s_mean:+.2f}" if n_s else "nan",
|
||||
@@ -1003,8 +1029,8 @@ def main(cfg: Config) -> int:
|
||||
"sec": f"{time.time()-t0:.0f}",
|
||||
}
|
||||
rows.append(row)
|
||||
# Stream this step as TSV row (header was printed before the loop).
|
||||
logger.info("row " + _fmt_row(row))
|
||||
# Stream this step as a row (header was printed before the loop).
|
||||
logger.info(_fmt_row(row))
|
||||
if (step + 1) % 25 == 0:
|
||||
save_ckpt(rows) # survive early kills; ~12 days for the full sweep
|
||||
if not first_hack_saved and hack_s_n > 0:
|
||||
|
||||
Reference in New Issue
Block a user