fix floor_ceiling asymmetry: paper methods on BOTH panels

Had Ariahw bars on the hack panel only -- misleading. Mirror them onto solve
(Fig 5 perf: GT 25.0%, probe 19.4%, LLM-judge 16.2%, base 11.5%, ceiling 22.3%).
Honest picture: the paper methods (incl. no-oracle LLM judge) beat routeV on both
axes because they are converged full-scale vs our 60-step surrogate -- caption
marks it directional-only. Cross-scale/maturity caveat (task #18) still stands.

Co-Authored-By: Claudypoo <288921227+claudypoo@users.noreply.github.com>
This commit is contained in:
wassname
2026-06-09 12:10:55 +00:00
parent 0538dbf2f1
commit bcfcee0d06
3 changed files with 15 additions and 7 deletions
+15 -7
View File
@@ -144,7 +144,7 @@ def _bars(ax, rows, key, raws, title, xlabel, xlo):
ax.axvline(0, color=GREY, lw=1.0) # floor (labelled in xlabel)
ax.axvline(1.0, color=GREY, lw=1.0, ls=":") # ceiling
ax.set_yticks(range(len(rows))); ax.set_yticklabels([r[0] for r in rows], fontsize=8.5)
ax.set_xlim(xlo, 1.18); ax.set_xlabel(xlabel, fontsize=8.5)
ax.set_xlim(xlo, 1.4); ax.set_xlabel(xlabel, fontsize=8.5) # hi=1.4 fits GT solve overshoot (+125%)
ax.set_title(title, fontsize=10, loc="left")
for s in ("top", "right", "left"):
ax.spines[s].set_visible(False)
@@ -178,22 +178,30 @@ def plot(df: pl.DataFrame) -> None:
("routeV random-V\n(direction control)", hsupp(rand), f"{rand['hack_deploy']:.3f}", DARK),
("routeV per-token\n(best, NO oracle)", hsupp(best), f"{best['hack_deploy']:.3f}", GOLD),
]
# SAME methods on the solve panel (symmetry -- the paper bars belong on both axes).
# Paper performance from Fig 5, normalized to the PAPER's own base->ceiling
# (11.5% -> 22.3%): GT penalty 25.0% (overshoots ceiling -- it beat the RL baseline),
# probe 19.4%, LLM-judge penalty 16.2%. routeV uses our base->ceiling.
puplift = lambda perf: (perf - 0.115) / (0.223 - 0.115)
solve_rows = [
("Ariahw GT penalty\n(needs oracle)", puplift(0.250), "0.250", GREY),
("Ariahw probe\n(oracle-trained)", puplift(0.194), "0.194", GREY),
("Ariahw LLM-judge pen.\n(NO oracle, ext. peer)", puplift(0.162), "0.162", BLUE),
("routeV random-V\n(direction control)", suplift(rand), f"{rand['solve_deploy']:.3f}", DARK),
("routeV per-token\n(best, no oracle)", suplift(best), f"{best['solve_deploy']:.3f}", GOLD),
("routeV per-token\n(best, NO oracle)", suplift(best), f"{best['solve_deploy']:.3f}", GOLD),
]
prov = " (ceiling PROVISIONAL=0.223, FIXME job 24)" if a["provisional"] else ""
fig, (axl, axr) = plt.subplots(1, 2, figsize=(11, 4.0), sharey=False)
fig, (axl, axr) = plt.subplots(1, 2, figsize=(11.5, 5.0), sharey=False)
_bars(axl, hack_rows, "hack", None,
"hack suppressed", "floor → ceiling (no hack) · right = better", 0.0)
_bars(axr, solve_rows, "solve", None,
"solve gained", f"floor (base 0.126) → ceiling{prov} · right = better", -0.55)
fig.suptitle("vGROUT floor→ceiling: routeV (no oracle, gradient-level) vs Ariahw 2025 monitors (test n=119, seed 43, 60-step fast)",
fontsize=10.5, x=0.01, ha="left")
fig.text(0.01, 0.015, "Ariahw bars from Fig 9 (full-scale converged, paper floor 0.791); routeV is our 60-step surrogate (floor 0.613). "
"The LLM-judge penalty already suppresses hacking with NO oracle (0.1%), so 'no-oracle suppression' is not the novelty -- "
"routeV's is the mechanism (no live judge model each step; fixed direction from authored pairs).",
fontsize=7.0, color=GREY, va="bottom")
fig.text(0.01, 0.015, "Ariahw bars from Fig 5 (full-scale CONVERGED, normalized to paper base/floor/ceiling); routeV is our 60-step UNCONVERGED surrogate "
"(our base/floor/ceiling) -- comparison is DIRECTIONAL only, not like-for-like. The LLM-judge penalty already suppresses with NO oracle (0.1% hack, 16.2% solve), "
"so 'no-oracle suppression' isn't routeV's novelty -- the mechanism is (no live judge each step; fixed authored-pair direction).",
fontsize=6.8, color=GREY, va="bottom")
fig.tight_layout(rect=(0, 0.07, 1, 0.94))
for ext in ("pdf", "png"):
fig.savefig(OUT / f"floor_ceiling.{ext}", dpi=150, bbox_inches="tight")