From e45767effbeed5b41009d4e38f02b9ea30c37709 Mon Sep 17 00:00:00 2001 From: wassname Date: Sat, 30 May 2026 21:23:57 +0000 Subject: [PATCH] plot: multi-seed overlay for substrate emergence (thin per-seed + bold mean) Co-Authored-By: Claudypoo <288921227+claudypoo@users.noreply.github.com> --- scripts/plot_substrate.py | 46 ++++++++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 18 deletions(-) diff --git a/scripts/plot_substrate.py b/scripts/plot_substrate.py index 9968a7f..3fc782f 100644 --- a/scripts/plot_substrate.py +++ b/scripts/plot_substrate.py @@ -68,33 +68,41 @@ def _onset(steps: np.ndarray, rate: np.ndarray) -> int | None: return int(steps[nz[0]]) if len(nz) else None -def plot(run: dict, seed: str, out: Path) -> None: +def plot(runs: list[dict], seeds: list[str], out: Path) -> None: + """One curve per mode. Multi-seed: thin per-seed lines + bold mean; a mode counts + as 'learned' if it lifts off 0 in the MEAN. Single-seed: bold line + onset dot.""" fig, ax = plt.subplots(figsize=(6.4, 3.8)) - x = run["steps"] - n_learned = 0 + L = min(len(r["steps"]) for r in runs) + x = runs[0]["steps"][:L] + n_learned = ymax = 0 for k, (mode, color) in HK.items(): - y = run[k] - ax.plot(x, y, color=color, lw=2.0, solid_capstyle="round") - on = _onset(x, y) - final = np.nan_to_num(y)[-1] + stacked = np.stack([np.nan_to_num(r[k][:L]) for r in runs]) # (seeds, steps) + ymean = stacked.mean(axis=0) + ymax = max(ymax, ymean.max()) + if len(runs) > 1: + for ys in stacked: + ax.plot(x, ys, color=color, lw=0.7, alpha=0.30, solid_capstyle="round") + ax.plot(x, ymean, color=color, lw=2.0, solid_capstyle="round") + on = _onset(x, ymean) if on is not None: n_learned += 1 - ax.plot(on, np.nan_to_num(y)[x == on][0], "o", color=color, ms=5, zorder=5) - # direct end-label (no legend): mode + final rate, or "never" for the holdout - tag = f"{mode} {final*100:.0f}%" + ("" if on is not None else " (never)") - ax.annotate(tag, (x[-1], final), color=color, fontsize=8, va="center", + if len(runs) == 1: + ax.plot(on, ymean[x == on][0], "o", color=color, ms=5, zorder=5) + tag = f"{mode} {ymean[-1]*100:.0f}%" + ("" if on is not None else " (never)") + ax.annotate(tag, (x[-1], ymean[-1]), color=color, fontsize=8, va="center", xytext=(6, 0), textcoords="offset points") + seed_lbl = f"seed {seeds[0]}" if len(seeds) == 1 else f"{len(seeds)} seeds: {','.join(seeds)}" ax.set_xlabel("GRPO step") ax.set_ylabel("cumulative hack rate (per mode)") - ax.set_title(f"vanilla GRPO learns {n_learned} of {len(HK)} loopholes (seed {seed})") - ax.set_ylim(-0.02, max(0.5, np.nanmax([np.nanmax(run[k]) for k in HK]) * 1.1)) + ax.set_title(f"vanilla GRPO learns {n_learned} of {len(HK)} loopholes ({seed_lbl})") + ax.set_ylim(-0.02, max(0.5, ymax * 1.15)) ax.set_xlim(0, x[-1] * 1.28) # headroom for end-labels ax.spines[["top", "right"]].set_visible(False) ax.grid(axis="y", lw=0.4, alpha=0.4) fig.tight_layout() out.parent.mkdir(parents=True, exist_ok=True) fig.savefig(out, dpi=140) - logger.info(f"wrote {out} ({n_learned}/{len(HK)} learned)") + logger.info(f"wrote {out} ({n_learned}/{len(HK)} learned, {len(seeds)} seed(s))") def main() -> None: @@ -102,10 +110,12 @@ def main() -> None: ap.add_argument("logs", nargs="+", type=Path) ap.add_argument("--out", type=Path, default=Path("out/figs/substrate_emergence.png")) args = ap.parse_args() - # single-seed for now; first log wins (seed overlay is a later extension) - path = args.logs[0] - seed = (re.search(r"seed(\d+)", path.name) or re.search(r"s(\d+)", path.name)) - plot(parse_hk(path), seed.group(1) if seed else "?", args.out) + runs, seeds = [], [] + for path in args.logs: + runs.append(parse_hk(path)) + m = re.search(r"seed(\d+)", path.name) or re.search(r"_s(\d+)", path.name) + seeds.append(m.group(1) if m else "?") + plot(runs, seeds, args.out) if __name__ == "__main__":