plot: 2x2 train(knob-on) vs deploy(knob-off) x arm figure

The A4 framing in one figure: vanilla train==deploy (cheat in the weights),
route2 train HACKS while deploy is clean (cheat in the deletable knob). parse_log
now keeps the raw train series (hack_train/solve_train) before the deploy
substitution. New fig: dyn_longrun_200_train_deploy.png.

Co-Authored-By: Claudypoo <288921227+claudypoo@users.noreply.github.com>
This commit is contained in:
wassname
2026-06-02 23:53:08 +00:00
parent 4336d6c577
commit 24fa924c8d
5 changed files with 60 additions and 1 deletions
+7
View File
@@ -332,3 +332,10 @@ vanilla,41,129,nan,nan
vanilla,41,130,0.03,0.0
vanilla,41,131,nan,nan
vanilla,41,132,nan,nan
vanilla,41,133,nan,nan
vanilla,41,134,nan,nan
vanilla,41,135,nan,nan
vanilla,41,136,nan,nan
vanilla,41,137,nan,nan
vanilla,41,138,nan,nan
vanilla,41,139,nan,nan
1 arm seed step hack solve
332 vanilla 41 130 0.03 0.0
333 vanilla 41 131 nan nan
334 vanilla 41 132 nan nan
335 vanilla 41 133 nan nan
336 vanilla 41 134 nan nan
337 vanilla 41 135 nan nan
338 vanilla 41 136 nan nan
339 vanilla 41 137 nan nan
340 vanilla 41 138 nan nan
341 vanilla 41 139 nan nan
Binary file not shown.

Before

Width:  |  Height:  |  Size: 48 KiB

After

Width:  |  Height:  |  Size: 48 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 44 KiB

After

Width:  |  Height:  |  Size: 44 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 98 KiB

+53 -1
View File
@@ -124,6 +124,11 @@ def parse_log(path: Path) -> dict | None:
# presence: no-floor logs carry an all-nan hk_dep/hk_abl column otherwise.
def _has_data(key):
return key in run and np.isfinite(run[key]).any()
# Keep the raw per-step TRAIN series (knob-ON for route2) before the deploy
# substitution below overwrites hack_s/gt_s -- the train-vs-deploy 2x2 needs both.
if "hack_s" in run:
run["hack_train"] = run["hack_s"]
run["solve_train"] = run["gt_s"]
if _has_data("hk_abl"): # dense per-step proxy (rollout_ablate_frac>0), if present
run["hack_s"] = run["hk_abl"]
run["gt_s"] = run["slv_abl"]
@@ -355,6 +360,50 @@ def plot_hack_overlay(runs: list[dict], out: Path) -> None:
logger.info(f"wrote {out}")
def plot_train_vs_deploy(runs: list[dict], out: Path) -> None:
"""2x2 small multiple: rows = train (knob ON) / deploy (knob OFF), cols = arm.
The story in one figure: vanilla train == deploy (no quarantine, the cheat is
in the deployed weights); route2 train HACKS while deploy is clean -- the cheat
is held in the deletable knob. Same red=hack/green=solve as the other figures."""
by_arm: dict[str, list[dict]] = defaultdict(list)
for r in runs:
by_arm[classify(r)].append(r)
arms = [a for a in ARM_ORDER if a in by_arm]
red, green = RATE_COLORS["hack_s"], RATE_COLORS["gt_s"]
rows = [
("train (knob on)", {"hack_train": "hack", "solve_train": "solve"},
{"hack_train": red, "solve_train": green}),
("deploy (knob off)", {"hk_dep": "hack", "slv_dep": "solve"},
{"hk_dep": red, "slv_dep": green}),
]
fig, axes = plt.subplots(2, len(arms), figsize=(3.0 * len(arms), 4.8),
sharex=True, sharey=True, squeeze=False)
for ci, arm in enumerate(arms):
axes[0][ci].set_title(arm, fontsize=10)
for ri, (rlabel, cols, colors) in enumerate(rows):
ax = axes[ri][ci]
_series_panel(ax, by_arm[arm], cols, colors, ylim=(-0.035, 1.0),
label_series=(ci == 0))
hk_key = next(iter(cols))
hk = [r[hk_key] for r in by_arm[arm] if hk_key in r]
if hk and np.nanmax([np.nanmax(h) for h in hk]) < 0.02:
ax.annotate("hack ≡ 0", (0.04, 0.0), xycoords=("axes fraction", "data"),
color=red, fontsize=8, va="bottom",
xytext=(0, 3), textcoords="offset points")
if ci == 0:
ax.set_ylabel(rlabel)
ax.spines[["top", "right"]].set_visible(False)
ax.tick_params(labelsize=8)
for ax in axes[-1]:
ax.set_xlabel("optimizer step")
fig.suptitle("Train (knob on) vs deploy (knob off): vanilla puts the cheat in "
"the weights, route2 in the deletable knob (EMA-5)", fontsize=10)
fig.tight_layout(rect=(0, 0, 1, 0.95))
out.parent.mkdir(parents=True, exist_ok=True)
fig.savefig(out, dpi=150, bbox_inches="tight")
logger.info(f"wrote {out}")
# --- cli -------------------------------------------------------------------
def _gather(paths: list[str]) -> list[Path]:
@@ -406,7 +455,10 @@ def main() -> None:
# second figure: single-panel arm-vs-arm overlay of the headline metric
overlay = args.out.with_name(args.out.stem + "_hack_overlay.png")
plot_hack_overlay(runs, overlay)
for p in (args.out, overlay):
# third figure: 2x2 train(knob-on) vs deploy(knob-off) x arm
tvd = args.out.with_name(args.out.stem + "_train_deploy.png")
plot_train_vs_deploy(runs, tvd)
for p in (args.out, overlay, tvd):
logger.info(f"docs/figs latest -> {link_latest(p)}")