diff --git a/out/figs/floor_ceiling.pdf b/out/figs/floor_ceiling.pdf index 3ccdd75..d46dd4b 100644 Binary files a/out/figs/floor_ceiling.pdf and b/out/figs/floor_ceiling.pdf differ diff --git a/out/figs/floor_ceiling.png b/out/figs/floor_ceiling.png index c220473..f98621b 100644 Binary files a/out/figs/floor_ceiling.png and b/out/figs/floor_ceiling.png differ diff --git a/scripts/plot_floor_ceiling.py b/scripts/plot_floor_ceiling.py index dd8883b..9031a6c 100644 --- a/scripts/plot_floor_ceiling.py +++ b/scripts/plot_floor_ceiling.py @@ -144,7 +144,7 @@ def _bars(ax, rows, key, raws, title, xlabel, xlo): ax.axvline(0, color=GREY, lw=1.0) # floor (labelled in xlabel) ax.axvline(1.0, color=GREY, lw=1.0, ls=":") # ceiling ax.set_yticks(range(len(rows))); ax.set_yticklabels([r[0] for r in rows], fontsize=8.5) - ax.set_xlim(xlo, 1.18); ax.set_xlabel(xlabel, fontsize=8.5) + ax.set_xlim(xlo, 1.4); ax.set_xlabel(xlabel, fontsize=8.5) # hi=1.4 fits GT solve overshoot (+125%) ax.set_title(title, fontsize=10, loc="left") for s in ("top", "right", "left"): ax.spines[s].set_visible(False) @@ -178,22 +178,30 @@ def plot(df: pl.DataFrame) -> None: ("routeV random-V\n(direction control)", hsupp(rand), f"{rand['hack_deploy']:.3f}", DARK), ("routeV per-token\n(best, NO oracle)", hsupp(best), f"{best['hack_deploy']:.3f}", GOLD), ] + # SAME methods on the solve panel (symmetry -- the paper bars belong on both axes). + # Paper performance from Fig 5, normalized to the PAPER's own base->ceiling + # (11.5% -> 22.3%): GT penalty 25.0% (overshoots ceiling -- it beat the RL baseline), + # probe 19.4%, LLM-judge penalty 16.2%. routeV uses our base->ceiling. + puplift = lambda perf: (perf - 0.115) / (0.223 - 0.115) solve_rows = [ + ("Ariahw GT penalty\n(needs oracle)", puplift(0.250), "0.250", GREY), + ("Ariahw probe\n(oracle-trained)", puplift(0.194), "0.194", GREY), + ("Ariahw LLM-judge pen.\n(NO oracle, ext. peer)", puplift(0.162), "0.162", BLUE), ("routeV random-V\n(direction control)", suplift(rand), f"{rand['solve_deploy']:.3f}", DARK), - ("routeV per-token\n(best, no oracle)", suplift(best), f"{best['solve_deploy']:.3f}", GOLD), + ("routeV per-token\n(best, NO oracle)", suplift(best), f"{best['solve_deploy']:.3f}", GOLD), ] prov = " (ceiling PROVISIONAL=0.223, FIXME job 24)" if a["provisional"] else "" - fig, (axl, axr) = plt.subplots(1, 2, figsize=(11, 4.0), sharey=False) + fig, (axl, axr) = plt.subplots(1, 2, figsize=(11.5, 5.0), sharey=False) _bars(axl, hack_rows, "hack", None, "hack suppressed", "floor → ceiling (no hack) · right = better", 0.0) _bars(axr, solve_rows, "solve", None, "solve gained", f"floor (base 0.126) → ceiling{prov} · right = better", -0.55) fig.suptitle("vGROUT floor→ceiling: routeV (no oracle, gradient-level) vs Ariahw 2025 monitors (test n=119, seed 43, 60-step fast)", fontsize=10.5, x=0.01, ha="left") - fig.text(0.01, 0.015, "Ariahw bars from Fig 9 (full-scale converged, paper floor 0.791); routeV is our 60-step surrogate (floor 0.613). " - "The LLM-judge penalty already suppresses hacking with NO oracle (0.1%), so 'no-oracle suppression' is not the novelty -- " - "routeV's is the mechanism (no live judge model each step; fixed direction from authored pairs).", - fontsize=7.0, color=GREY, va="bottom") + fig.text(0.01, 0.015, "Ariahw bars from Fig 5 (full-scale CONVERGED, normalized to paper base/floor/ceiling); routeV is our 60-step UNCONVERGED surrogate " + "(our base/floor/ceiling) -- comparison is DIRECTIONAL only, not like-for-like. The LLM-judge penalty already suppresses with NO oracle (0.1% hack, 16.2% solve), " + "so 'no-oracle suppression' isn't routeV's novelty -- the mechanism is (no live judge each step; fixed authored-pair direction).", + fontsize=6.8, color=GREY, va="bottom") fig.tight_layout(rect=(0, 0.07, 1, 0.94)) for ext in ("pdf", "png"): fig.savefig(OUT / f"floor_ceiling.{ext}", dpi=150, bbox_inches="tight")