plot: multi-seed overlay for substrate emergence (thin per-seed + bold mean)

Co-Authored-By: Claudypoo <288921227+claudypoo@users.noreply.github.com>
2026-06-27 19:47:33 +08:00 · 2026-05-30 21:23:57 +00:00
parent 0ea751c5bc
commit e45767effb
1 changed files with 28 additions and 18 deletions
@@ -68,33 +68,41 @@ def _onset(steps: np.ndarray, rate: np.ndarray) -> int | None:
    return int(steps[nz[0]]) if len(nz) else None


-def plot(run: dict, seed: str, out: Path) -> None:
+def plot(runs: list[dict], seeds: list[str], out: Path) -> None:
+    """One curve per mode. Multi-seed: thin per-seed lines + bold mean; a mode counts
+    as 'learned' if it lifts off 0 in the MEAN. Single-seed: bold line + onset dot."""
    fig, ax = plt.subplots(figsize=(6.4, 3.8))
-    x = run["steps"]
-    n_learned = 0
+    L = min(len(r["steps"]) for r in runs)
+    x = runs[0]["steps"][:L]
+    n_learned = ymax = 0
    for k, (mode, color) in HK.items():
-        y = run[k]
-        ax.plot(x, y, color=color, lw=2.0, solid_capstyle="round")
-        on = _onset(x, y)
-        final = np.nan_to_num(y)[-1]
+        stacked = np.stack([np.nan_to_num(r[k][:L]) for r in runs])  # (seeds, steps)
+        ymean = stacked.mean(axis=0)
+        ymax = max(ymax, ymean.max())
+        if len(runs) > 1:
+            for ys in stacked:
+                ax.plot(x, ys, color=color, lw=0.7, alpha=0.30, solid_capstyle="round")
+        ax.plot(x, ymean, color=color, lw=2.0, solid_capstyle="round")
+        on = _onset(x, ymean)
        if on is not None:
            n_learned += 1
-            ax.plot(on, np.nan_to_num(y)[x == on][0], "o", color=color, ms=5, zorder=5)
-        # direct end-label (no legend): mode + final rate, or "never" for the holdout
-        tag = f"{mode}  {final*100:.0f}%" + ("" if on is not None else "  (never)")
-        ax.annotate(tag, (x[-1], final), color=color, fontsize=8, va="center",
+            if len(runs) == 1:
+                ax.plot(on, ymean[x == on][0], "o", color=color, ms=5, zorder=5)
+        tag = f"{mode}  {ymean[-1]*100:.0f}%" + ("" if on is not None else "  (never)")
+        ax.annotate(tag, (x[-1], ymean[-1]), color=color, fontsize=8, va="center",
                    xytext=(6, 0), textcoords="offset points")
+    seed_lbl = f"seed {seeds[0]}" if len(seeds) == 1 else f"{len(seeds)} seeds: {','.join(seeds)}"
    ax.set_xlabel("GRPO step")
    ax.set_ylabel("cumulative hack rate (per mode)")
-    ax.set_title(f"vanilla GRPO learns {n_learned} of {len(HK)} loopholes  (seed {seed})")
-    ax.set_ylim(-0.02, max(0.5, np.nanmax([np.nanmax(run[k]) for k in HK]) * 1.1))
+    ax.set_title(f"vanilla GRPO learns {n_learned} of {len(HK)} loopholes  ({seed_lbl})")
+    ax.set_ylim(-0.02, max(0.5, ymax * 1.15))
    ax.set_xlim(0, x[-1] * 1.28)  # headroom for end-labels
    ax.spines[["top", "right"]].set_visible(False)
    ax.grid(axis="y", lw=0.4, alpha=0.4)
    fig.tight_layout()
    out.parent.mkdir(parents=True, exist_ok=True)
    fig.savefig(out, dpi=140)
-    logger.info(f"wrote {out}  ({n_learned}/{len(HK)} learned)")
+    logger.info(f"wrote {out}  ({n_learned}/{len(HK)} learned, {len(seeds)} seed(s))")


 def main() -> None:
@@ -102,10 +110,12 @@ def main() -> None:
    ap.add_argument("logs", nargs="+", type=Path)
    ap.add_argument("--out", type=Path, default=Path("out/figs/substrate_emergence.png"))
    args = ap.parse_args()
-    # single-seed for now; first log wins (seed overlay is a later extension)
-    path = args.logs[0]
-    seed = (re.search(r"seed(\d+)", path.name) or re.search(r"s(\d+)", path.name))
-    plot(parse_hk(path), seed.group(1) if seed else "?", args.out)
+    runs, seeds = [], []
+    for path in args.logs:
+        runs.append(parse_hk(path))
+        m = re.search(r"seed(\d+)", path.name) or re.search(r"_s(\d+)", path.name)
+        seeds.append(m.group(1) if m else "?")
+    plot(runs, seeds, args.out)


 if __name__ == "__main__":