mirror of
https://github.com/wassname/evil_MoE.git
synced 2026-06-27 19:47:33 +08:00
plots: dejargon, drop redundant titles, emit png+svg+pdf, CSV re-render
Audit of all 4 plot scripts (plot_dynamics/substrate/emergence/deploy_overlay): - One save_fig(fig, path) helper in figs.py writes png+svg+pdf (vector for the paper, png for the blog). All scripts call it. - arm_label() map: reader-facing names only -- route2->route, drop 'knob'/'the cheat' from titles and the train-vs-deploy story (adapter on/off, reward hack). - Titles off by default (the paper/blog caption carries it); --title re-enables for standalone research use. - dump_data CSV now carries every plotted series; plot_dynamics --from-csv re-renders the three figures from the committed CSV with no logs (logs/ and out/runs/ are gitignored; out/figs/*.csv is tracked). Round-trip verified. - Commit the regenerated dyn_sub4 figures in all 3 formats + the CSV. Co-Authored-By: Claudypoo <288921227+claudypoo@users.noreply.github.com>
This commit is contained in:
+97
-43
@@ -42,7 +42,11 @@ import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
from loguru import logger
|
||||
|
||||
from projected_grpo.figs import link_latest
|
||||
from projected_grpo.figs import link_latest, save_fig, arm_label
|
||||
|
||||
# Figures are captioned in the paper/blog, so the suptitle just restates the
|
||||
# caption. Off by default; --title re-enables it for standalone research use.
|
||||
SHOW_TITLE = False
|
||||
|
||||
# --- parse -----------------------------------------------------------------
|
||||
|
||||
@@ -148,6 +152,8 @@ def parse_log(path: Path) -> dict | None:
|
||||
|
||||
|
||||
def classify(run: dict) -> str:
|
||||
if "arm_csv" in run: # reconstructed from a CSV: name is already classified
|
||||
return run["arm_csv"]
|
||||
if run["arm"] == "vanilla":
|
||||
return "vanilla"
|
||||
if run["arm"] == "routing":
|
||||
@@ -236,24 +242,51 @@ def _series_panel(ax, runs, cols, colors, ylim, label_series=False):
|
||||
ax.set_ylim(*ylim)
|
||||
|
||||
|
||||
# Every series any of the three figures plots. Carried in the CSV so the figure
|
||||
# regenerates from the committed CSV alone (logs/ and out/runs/ are gitignored,
|
||||
# out/figs/*.csv is tracked). `arm` is the CLASSIFIED display name -- load_csv
|
||||
# short-circuits classify() on it so the round-trip is exact.
|
||||
CSV_SERIES = ["hack_s", "gt_s", "hack_train", "solve_train", "hk_dep", "slv_dep"]
|
||||
|
||||
|
||||
def dump_data(runs: list[dict], out: Path) -> Path:
|
||||
"""Write the plotted series to a tidy CSV next to the figure so the figure is
|
||||
reproducible from a committed artifact -- logs/ and out/runs/ are gitignored,
|
||||
this CSV is not (it lands in out/figs/, which is tracked)."""
|
||||
csv = out.with_suffix(".csv")
|
||||
lines = ["arm,seed,step,hack,solve"]
|
||||
lines = ["arm,seed,step," + ",".join(CSV_SERIES)]
|
||||
for r in runs:
|
||||
arm = classify(r)
|
||||
hk = r.get("hack_s"); sv = r.get("gt_s")
|
||||
for i, step in enumerate(r["steps"]):
|
||||
h = hk[i] if hk is not None and i < len(hk) else float("nan")
|
||||
s = sv[i] if sv is not None and i < len(sv) else float("nan")
|
||||
lines.append(f"{arm},{r['seed']},{int(step)},{h},{s}")
|
||||
cells = [r[k][i] if (k in r and r[k] is not None and i < len(r[k])) else float("nan")
|
||||
for k in CSV_SERIES]
|
||||
lines.append(f"{arm},{r['seed']},{int(step)}," + ",".join(str(c) for c in cells))
|
||||
csv.write_text("\n".join(lines) + "\n")
|
||||
logger.info(f"wrote {csv} ({len(runs)} runs, reproducibility source)")
|
||||
return csv
|
||||
|
||||
|
||||
def load_csv(path: Path) -> list[dict]:
|
||||
"""Reconstruct the runs list from a dump_data CSV so figures regenerate
|
||||
without the raw logs. Groups rows by (arm, seed); `arm_csv` makes classify()
|
||||
return the stored display name verbatim."""
|
||||
rows = [l.split(",") for l in path.read_text().splitlines() if l.strip()]
|
||||
hdr, body = rows[0], rows[1:]
|
||||
ci = {n: i for i, n in enumerate(hdr)}
|
||||
by_key: dict[tuple, dict] = {}
|
||||
for row in body:
|
||||
key = (row[ci["arm"]], row[ci["seed"]])
|
||||
run = by_key.setdefault(key, {"arm_csv": row[ci["arm"]], "seed": row[ci["seed"]],
|
||||
"refr": 0, "vhack": "-", "teacher_off": None,
|
||||
"steps": [], **{k: [] for k in CSV_SERIES}})
|
||||
run["steps"].append(int(row[ci["step"]]))
|
||||
for k in CSV_SERIES:
|
||||
run[k].append(float(row[ci[k]]))
|
||||
runs = list(by_key.values())
|
||||
for run in runs: # match parse_log: numeric series are ndarrays, not lists
|
||||
run["steps"] = np.array(run["steps"])
|
||||
for k in CSV_SERIES:
|
||||
run[k] = np.array(run[k], dtype=float)
|
||||
return runs
|
||||
|
||||
|
||||
def plot(runs: list[dict], out: Path) -> None:
|
||||
by_arm: dict[str, list[dict]] = defaultdict(list)
|
||||
for r in runs:
|
||||
@@ -269,7 +302,7 @@ def plot(runs: list[dict], out: Path) -> None:
|
||||
ax = axes[0][col]
|
||||
rs = by_arm[arm]
|
||||
n_seed = len({r["seed"] for r in rs})
|
||||
ax.set_title(f"{arm}\n(n={n_seed} seed{'s' if n_seed > 1 else ''})", fontsize=9)
|
||||
ax.set_title(f"{arm_label(arm)}\n(n={n_seed} seed{'s' if n_seed > 1 else ''})", fontsize=9)
|
||||
# ylim floor slightly below 0 so a pinned-at-zero series (route2 hack) draws
|
||||
# ABOVE the axis line instead of hiding under it -- the whole result is that
|
||||
# red sits on zero, so it must be visible, not absent.
|
||||
@@ -296,12 +329,14 @@ def plot(runs: list[dict], out: Path) -> None:
|
||||
ax.spines["right"].set_visible(False)
|
||||
ax.tick_params(labelsize=8)
|
||||
|
||||
fig.suptitle("Training dynamics: deployed hack vs solve by arm "
|
||||
"(deploy-eval n=64 T=0.7; EMA-5; dashed = mean hack onset)", fontsize=10)
|
||||
fig.tight_layout(rect=(0, 0, 1, 0.96))
|
||||
out.parent.mkdir(parents=True, exist_ok=True)
|
||||
fig.savefig(out, dpi=150, bbox_inches="tight")
|
||||
logger.info(f"wrote {out} ({len(runs)} runs, arms={arms})")
|
||||
if SHOW_TITLE:
|
||||
fig.suptitle("Training dynamics: deployed hack vs solve by arm "
|
||||
"(deploy-eval n=64 T=0.7; EMA-5; dashed = mean hack onset)", fontsize=10)
|
||||
fig.tight_layout(rect=(0, 0, 1, 0.96))
|
||||
else:
|
||||
fig.tight_layout()
|
||||
save_fig(fig, out)
|
||||
logger.info(f"wrote {out} ({len(runs)} runs, arms={[arm_label(a) for a in arms]})")
|
||||
|
||||
|
||||
def _overlay_panel(ax, by_arm, arms, key, *, label, with_onset):
|
||||
@@ -344,7 +379,7 @@ def _overlay_panel(ax, by_arm, arms, key, *, label, with_onset):
|
||||
y_lab = y if not placed else max(y, placed[-1] + gap)
|
||||
placed.append(y_lab)
|
||||
arrow = dict(arrowstyle="-", color=color, lw=0.5, shrinkA=0, shrinkB=0)
|
||||
ax.annotate(arm, xy=(x, y), xytext=(x + 1.0, y_lab), textcoords="data",
|
||||
ax.annotate(arm_label(arm), xy=(x, y), xytext=(x + 1.0, y_lab), textcoords="data",
|
||||
color=color, fontsize=8, va="center",
|
||||
arrowprops=arrow if abs(y_lab - y) > 1e-3 else None)
|
||||
|
||||
@@ -362,33 +397,34 @@ def plot_hack_overlay(runs: list[dict], out: Path) -> None:
|
||||
_overlay_panel(ax_h, by_arm, arms, "hack_s", label="hack rate", with_onset=True)
|
||||
_overlay_panel(ax_s, by_arm, arms, "gt_s", label="solve rate", with_onset=False)
|
||||
ax_s.set_xlabel("optimizer step")
|
||||
ax_h.set_title("Hack vs solve rate by arm (EMA-5; dot = mean hack onset)", fontsize=10)
|
||||
if SHOW_TITLE:
|
||||
ax_h.set_title("Hack vs solve rate by arm (EMA-5; dot = mean hack onset)", fontsize=10)
|
||||
fig.tight_layout()
|
||||
out.parent.mkdir(parents=True, exist_ok=True)
|
||||
fig.savefig(out, dpi=150, bbox_inches="tight")
|
||||
save_fig(fig, out)
|
||||
logger.info(f"wrote {out}")
|
||||
|
||||
|
||||
def plot_train_vs_deploy(runs: list[dict], out: Path) -> None:
|
||||
"""2x2 small multiple: rows = train (knob ON) / deploy (knob OFF), cols = arm.
|
||||
The story in one figure: vanilla train == deploy (no quarantine, the cheat is
|
||||
in the deployed weights); route2 train HACKS while deploy is clean -- the cheat
|
||||
is held in the deletable knob. Same red=hack/green=solve as the other figures."""
|
||||
"""2x2 small multiple: rows = train (adapter ON) / deploy (adapter OFF), cols = arm.
|
||||
The story in one figure: vanilla train == deploy (no quarantine, the reward
|
||||
hack is in the deployed weights); route trains while hacking but deploys clean,
|
||||
the hack is held in the deletable quarantine adapter. Same red=hack/green=solve
|
||||
as the other figures."""
|
||||
by_arm: dict[str, list[dict]] = defaultdict(list)
|
||||
for r in runs:
|
||||
by_arm[classify(r)].append(r)
|
||||
arms = [a for a in ARM_ORDER if a in by_arm]
|
||||
red, green = RATE_COLORS["hack_s"], RATE_COLORS["gt_s"]
|
||||
rows = [
|
||||
("train (knob on)", {"hack_train": "hack", "solve_train": "solve"},
|
||||
{"hack_train": red, "solve_train": green}),
|
||||
("deploy (knob off)", {"hk_dep": "hack", "slv_dep": "solve"},
|
||||
{"hk_dep": red, "slv_dep": green}),
|
||||
("train (adapter on)", {"hack_train": "hack", "solve_train": "solve"},
|
||||
{"hack_train": red, "solve_train": green}),
|
||||
("deploy (adapter off)", {"hk_dep": "hack", "slv_dep": "solve"},
|
||||
{"hk_dep": red, "slv_dep": green}),
|
||||
]
|
||||
fig, axes = plt.subplots(2, len(arms), figsize=(3.0 * len(arms), 4.8),
|
||||
sharex=True, sharey=True, squeeze=False)
|
||||
for ci, arm in enumerate(arms):
|
||||
axes[0][ci].set_title(arm, fontsize=10)
|
||||
axes[0][ci].set_title(arm_label(arm), fontsize=10)
|
||||
for ri, (rlabel, cols, colors) in enumerate(rows):
|
||||
ax = axes[ri][ci]
|
||||
_series_panel(ax, by_arm[arm], cols, colors, ylim=(-0.035, 1.0),
|
||||
@@ -415,11 +451,14 @@ def plot_train_vs_deploy(runs: list[dict], out: Path) -> None:
|
||||
ax.tick_params(labelsize=8)
|
||||
for ax in axes[-1]:
|
||||
ax.set_xlabel("optimizer step")
|
||||
fig.suptitle("Train (knob on) vs deploy (knob off): vanilla puts the cheat in "
|
||||
"the weights, route2 in the deletable knob (EMA-5)", fontsize=10)
|
||||
fig.tight_layout(rect=(0, 0, 1, 0.95))
|
||||
out.parent.mkdir(parents=True, exist_ok=True)
|
||||
fig.savefig(out, dpi=150, bbox_inches="tight")
|
||||
if SHOW_TITLE:
|
||||
fig.suptitle("Train (adapter on) vs deploy (adapter off): vanilla puts the "
|
||||
"reward hack in the weights, route in the deletable adapter (EMA-5)",
|
||||
fontsize=10)
|
||||
fig.tight_layout(rect=(0, 0, 1, 0.95))
|
||||
else:
|
||||
fig.tight_layout()
|
||||
save_fig(fig, out)
|
||||
logger.info(f"wrote {out}")
|
||||
|
||||
|
||||
@@ -454,13 +493,24 @@ def _latest_per_arm(files: list[Path], min_steps: int) -> list[Path]:
|
||||
|
||||
def main() -> None:
|
||||
ap = argparse.ArgumentParser(description=__doc__)
|
||||
ap.add_argument("logs", nargs="+", help="log files, globs, or dirs")
|
||||
ap.add_argument("logs", nargs="*", help="log files, globs, or dirs (omit with --from-csv)")
|
||||
ap.add_argument("--out", type=Path, default=Path("out/figs/dynamics.png"))
|
||||
ap.add_argument("--latest-per-arm", action="store_true",
|
||||
help="keep only the newest log per arm (with >= --min-steps rows)")
|
||||
ap.add_argument("--min-steps", type=int, default=0,
|
||||
help="drop runs shorter than this many logged steps")
|
||||
ap.add_argument("--title", action="store_true",
|
||||
help="draw the suptitle (off by default: the paper/blog caption carries it)")
|
||||
ap.add_argument("--from-csv", type=Path, default=None,
|
||||
help="re-render from a committed dump_data CSV instead of parsing logs")
|
||||
args = ap.parse_args()
|
||||
global SHOW_TITLE
|
||||
SHOW_TITLE = args.title
|
||||
if args.from_csv:
|
||||
runs = load_csv(args.from_csv)
|
||||
logger.info(f"loaded {len(runs)} runs from {args.from_csv} (CSV re-render, no logs)")
|
||||
_render_all(runs, args.out)
|
||||
return
|
||||
files = _gather(args.logs)
|
||||
if args.latest_per_arm:
|
||||
files = _latest_per_arm(files, args.min_steps)
|
||||
@@ -470,14 +520,18 @@ def main() -> None:
|
||||
for r in runs:
|
||||
logger.info(f"{classify(r):16s} seed={r['seed']} steps={len(r['steps'])} {r['vhack']}")
|
||||
args.out.parent.mkdir(parents=True, exist_ok=True)
|
||||
plot(runs, args.out)
|
||||
# second figure: single-panel arm-vs-arm overlay of the headline metric
|
||||
overlay = args.out.with_name(args.out.stem + "_hack_overlay.png")
|
||||
plot_hack_overlay(runs, overlay)
|
||||
# third figure: 2x2 train(knob-on) vs deploy(knob-off) x arm
|
||||
tvd = args.out.with_name(args.out.stem + "_train_deploy.png")
|
||||
plot_train_vs_deploy(runs, tvd)
|
||||
for p in (args.out, overlay, tvd):
|
||||
_render_all(runs, args.out)
|
||||
|
||||
|
||||
def _render_all(runs: list[dict], out: Path) -> None:
|
||||
"""The three dynamics figures, shared by the log-parse and --from-csv paths."""
|
||||
out.parent.mkdir(parents=True, exist_ok=True)
|
||||
plot(runs, out) # small-multiples + CSV dump
|
||||
overlay = out.with_name(out.stem + "_hack_overlay.png")
|
||||
plot_hack_overlay(runs, overlay) # arm-vs-arm headline overlay
|
||||
tvd = out.with_name(out.stem + "_train_deploy.png")
|
||||
plot_train_vs_deploy(runs, tvd) # 2x2 train(on) vs deploy(off)
|
||||
for p in (out, overlay, tvd):
|
||||
logger.info(f"docs/figs latest -> {link_latest(p)}")
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user