plot: 2x2 train(knob-on) vs deploy(knob-off) x arm figure

The A4 framing in one figure: vanilla train==deploy (cheat in the weights), route2 train HACKS while deploy is clean (cheat in the deletable knob). parse_log now keeps the raw train series (hack_train/solve_train) before the deploy substitution. New fig: dyn_longrun_200_train_deploy.png. Co-Authored-By: Claudypoo <288921227+claudypoo@users.noreply.github.com>
2026-06-27 17:30:41 +08:00 · 2026-06-02 23:53:08 +00:00
parent 4336d6c577
commit 24fa924c8d
5 changed files with 60 additions and 1 deletions
@@ -332,3 +332,10 @@ vanilla,41,129,nan,nan
 vanilla,41,130,0.03,0.0
 vanilla,41,131,nan,nan
 vanilla,41,132,nan,nan
+vanilla,41,133,nan,nan
+vanilla,41,134,nan,nan
+vanilla,41,135,nan,nan
+vanilla,41,136,nan,nan
+vanilla,41,137,nan,nan
+vanilla,41,138,nan,nan
+vanilla,41,139,nan,nan
@@ -124,6 +124,11 @@ def parse_log(path: Path) -> dict | None:
    # presence: no-floor logs carry an all-nan hk_dep/hk_abl column otherwise.
    def _has_data(key):
        return key in run and np.isfinite(run[key]).any()
+    # Keep the raw per-step TRAIN series (knob-ON for route2) before the deploy
+    # substitution below overwrites hack_s/gt_s -- the train-vs-deploy 2x2 needs both.
+    if "hack_s" in run:
+        run["hack_train"] = run["hack_s"]
+        run["solve_train"] = run["gt_s"]
    if _has_data("hk_abl"):           # dense per-step proxy (rollout_ablate_frac>0), if present
        run["hack_s"] = run["hk_abl"]
        run["gt_s"] = run["slv_abl"]
@@ -355,6 +360,50 @@ def plot_hack_overlay(runs: list[dict], out: Path) -> None:
    logger.info(f"wrote {out}")


+def plot_train_vs_deploy(runs: list[dict], out: Path) -> None:
+    """2x2 small multiple: rows = train (knob ON) / deploy (knob OFF), cols = arm.
+    The story in one figure: vanilla train == deploy (no quarantine, the cheat is
+    in the deployed weights); route2 train HACKS while deploy is clean -- the cheat
+    is held in the deletable knob. Same red=hack/green=solve as the other figures."""
+    by_arm: dict[str, list[dict]] = defaultdict(list)
+    for r in runs:
+        by_arm[classify(r)].append(r)
+    arms = [a for a in ARM_ORDER if a in by_arm]
+    red, green = RATE_COLORS["hack_s"], RATE_COLORS["gt_s"]
+    rows = [
+        ("train (knob on)",   {"hack_train": "hack", "solve_train": "solve"},
+                              {"hack_train": red, "solve_train": green}),
+        ("deploy (knob off)", {"hk_dep": "hack", "slv_dep": "solve"},
+                              {"hk_dep": red, "slv_dep": green}),
+    ]
+    fig, axes = plt.subplots(2, len(arms), figsize=(3.0 * len(arms), 4.8),
+                             sharex=True, sharey=True, squeeze=False)
+    for ci, arm in enumerate(arms):
+        axes[0][ci].set_title(arm, fontsize=10)
+        for ri, (rlabel, cols, colors) in enumerate(rows):
+            ax = axes[ri][ci]
+            _series_panel(ax, by_arm[arm], cols, colors, ylim=(-0.035, 1.0),
+                          label_series=(ci == 0))
+            hk_key = next(iter(cols))
+            hk = [r[hk_key] for r in by_arm[arm] if hk_key in r]
+            if hk and np.nanmax([np.nanmax(h) for h in hk]) < 0.02:
+                ax.annotate("hack ≡ 0", (0.04, 0.0), xycoords=("axes fraction", "data"),
+                            color=red, fontsize=8, va="bottom",
+                            xytext=(0, 3), textcoords="offset points")
+            if ci == 0:
+                ax.set_ylabel(rlabel)
+            ax.spines[["top", "right"]].set_visible(False)
+            ax.tick_params(labelsize=8)
+    for ax in axes[-1]:
+        ax.set_xlabel("optimizer step")
+    fig.suptitle("Train (knob on) vs deploy (knob off): vanilla puts the cheat in "
+                 "the weights, route2 in the deletable knob  (EMA-5)", fontsize=10)
+    fig.tight_layout(rect=(0, 0, 1, 0.95))
+    out.parent.mkdir(parents=True, exist_ok=True)
+    fig.savefig(out, dpi=150, bbox_inches="tight")
+    logger.info(f"wrote {out}")
+
+
 # --- cli -------------------------------------------------------------------

 def _gather(paths: list[str]) -> list[Path]:
@@ -406,7 +455,10 @@ def main() -> None:
    # second figure: single-panel arm-vs-arm overlay of the headline metric
    overlay = args.out.with_name(args.out.stem + "_hack_overlay.png")
    plot_hack_overlay(runs, overlay)
-    for p in (args.out, overlay):
+    # third figure: 2x2 train(knob-on) vs deploy(knob-off) x arm
+    tvd = args.out.with_name(args.out.stem + "_train_deploy.png")
+    plot_train_vs_deploy(runs, tvd)
+    for p in (args.out, overlay, tvd):
        logger.info(f"docs/figs latest -> {link_latest(p)}")