plot: 2x2 train(knob-on) vs deploy(knob-off) x arm figure

The A4 framing in one figure: vanilla train==deploy (cheat in the weights),
route2 train HACKS while deploy is clean (cheat in the deletable knob). parse_log
now keeps the raw train series (hack_train/solve_train) before the deploy
substitution. New fig: dyn_longrun_200_train_deploy.png.

Co-Authored-By: Claudypoo <288921227+claudypoo@users.noreply.github.com>
This commit is contained in:
wassname
2026-06-02 23:53:08 +00:00
parent 4336d6c577
commit 24fa924c8d
5 changed files with 60 additions and 1 deletions
+53 -1
View File
@@ -124,6 +124,11 @@ def parse_log(path: Path) -> dict | None:
# presence: no-floor logs carry an all-nan hk_dep/hk_abl column otherwise.
def _has_data(key):
return key in run and np.isfinite(run[key]).any()
# Keep the raw per-step TRAIN series (knob-ON for route2) before the deploy
# substitution below overwrites hack_s/gt_s -- the train-vs-deploy 2x2 needs both.
if "hack_s" in run:
run["hack_train"] = run["hack_s"]
run["solve_train"] = run["gt_s"]
if _has_data("hk_abl"): # dense per-step proxy (rollout_ablate_frac>0), if present
run["hack_s"] = run["hk_abl"]
run["gt_s"] = run["slv_abl"]
@@ -355,6 +360,50 @@ def plot_hack_overlay(runs: list[dict], out: Path) -> None:
logger.info(f"wrote {out}")
def plot_train_vs_deploy(runs: list[dict], out: Path) -> None:
"""2x2 small multiple: rows = train (knob ON) / deploy (knob OFF), cols = arm.
The story in one figure: vanilla train == deploy (no quarantine, the cheat is
in the deployed weights); route2 train HACKS while deploy is clean -- the cheat
is held in the deletable knob. Same red=hack/green=solve as the other figures."""
by_arm: dict[str, list[dict]] = defaultdict(list)
for r in runs:
by_arm[classify(r)].append(r)
arms = [a for a in ARM_ORDER if a in by_arm]
red, green = RATE_COLORS["hack_s"], RATE_COLORS["gt_s"]
rows = [
("train (knob on)", {"hack_train": "hack", "solve_train": "solve"},
{"hack_train": red, "solve_train": green}),
("deploy (knob off)", {"hk_dep": "hack", "slv_dep": "solve"},
{"hk_dep": red, "slv_dep": green}),
]
fig, axes = plt.subplots(2, len(arms), figsize=(3.0 * len(arms), 4.8),
sharex=True, sharey=True, squeeze=False)
for ci, arm in enumerate(arms):
axes[0][ci].set_title(arm, fontsize=10)
for ri, (rlabel, cols, colors) in enumerate(rows):
ax = axes[ri][ci]
_series_panel(ax, by_arm[arm], cols, colors, ylim=(-0.035, 1.0),
label_series=(ci == 0))
hk_key = next(iter(cols))
hk = [r[hk_key] for r in by_arm[arm] if hk_key in r]
if hk and np.nanmax([np.nanmax(h) for h in hk]) < 0.02:
ax.annotate("hack ≡ 0", (0.04, 0.0), xycoords=("axes fraction", "data"),
color=red, fontsize=8, va="bottom",
xytext=(0, 3), textcoords="offset points")
if ci == 0:
ax.set_ylabel(rlabel)
ax.spines[["top", "right"]].set_visible(False)
ax.tick_params(labelsize=8)
for ax in axes[-1]:
ax.set_xlabel("optimizer step")
fig.suptitle("Train (knob on) vs deploy (knob off): vanilla puts the cheat in "
"the weights, route2 in the deletable knob (EMA-5)", fontsize=10)
fig.tight_layout(rect=(0, 0, 1, 0.95))
out.parent.mkdir(parents=True, exist_ok=True)
fig.savefig(out, dpi=150, bbox_inches="tight")
logger.info(f"wrote {out}")
# --- cli -------------------------------------------------------------------
def _gather(paths: list[str]) -> list[Path]:
@@ -406,7 +455,10 @@ def main() -> None:
# second figure: single-panel arm-vs-arm overlay of the headline metric
overlay = args.out.with_name(args.out.stem + "_hack_overlay.png")
plot_hack_overlay(runs, overlay)
for p in (args.out, overlay):
# third figure: 2x2 train(knob-on) vs deploy(knob-off) x arm
tvd = args.out.with_name(args.out.stem + "_train_deploy.png")
plot_train_vs_deploy(runs, tvd)
for p in (args.out, overlay, tvd):
logger.info(f"docs/figs latest -> {link_latest(p)}")