diff --git a/scripts/plot_floor_ceiling.py b/scripts/plot_floor_ceiling.py index 6084022..95c64b4 100644 --- a/scripts/plot_floor_ceiling.py +++ b/scripts/plot_floor_ceiling.py @@ -192,70 +192,57 @@ def plot(df: pl.DataFrame) -> None: fig.savefig(OUT / f"floor_ceiling.{ext}", dpi=150, bbox_inches="tight") -# ── stage 2b: absolute-scale variant (arrows + shaded floor/ceiling) ───────── -# Same three arms, but plotted on the RAW metric axis (not normalized to [0,1]) so the -# actual rates are legible. Both panels oriented "right = better": the solve axis is the -# raw solve rate; the hack axis is REVERSED (right = less hacking). Grey "bedrock" shades -# the worse-than-floor zone, blue "sky" shades the better-than-ceiling zone, and each arm -# is an arrow from the floor anchor to its value (length = distance climbed). -SKY, BEDROCK = "#cfe3ff", "#d9dadb" +# ── stage 2b: the two metrics as ONE scatter (Tufte: don't split a 2-var story) ── +# hack (x, reversed) vs solve (y). Good corner = TOP-RIGHT (less hacking, more solving). +# Each routeV arm gets a green effect-arrow FROM the vanilla baseline -> shows what the +# intervention DID (mechanism), not just where it landed. The achievable solve band +# (base..ceiling) is a faint range-frame; ticks sit only at the meaningful values +# (no hack / vanilla / base / ceiling) so the axes teach the scale instead of generic grid. +GREEN_ARROW = "#1e8449" -def _arrow_panel(ax, anchor, ceiling, rows, *, reversed_x, xlim, floor_lab, ceil_lab, xlabel, title): - lo, hi = xlim # lo=left edge, hi=right edge (lo>hi when reversed_x) - # bedrock = worse-than-floor; sky = better-than-ceiling (data coords, orientation-agnostic) - if reversed_x: # hack: worse = higher rate, better = lower; better is to the RIGHT - ax.axvspan(lo, anchor, color=BEDROCK, alpha=0.7, lw=0) # >= floor hack = bedrock - ax.axvspan(ceiling, hi, color=SKY, alpha=0.7, lw=0) # <= ceiling (0) = sky - else: # solve: worse = lower, better = higher; better is to the RIGHT - ax.axvspan(lo, anchor, color=BEDROCK, alpha=0.7, lw=0) # <= floor solve = bedrock - ax.axvspan(ceiling, hi, color=SKY, alpha=0.7, lw=0) # >= ceiling = sky - ax.axvline(anchor, color=GREY, lw=1.2) - ax.axvline(ceiling, color="#3b5bdb", lw=1.2, ls=":") - span = abs(hi - lo) - for yi, (lab, val, col) in enumerate(rows): - ax.annotate("", xy=(val, yi), xytext=(anchor, yi), - arrowprops=dict(arrowstyle="-|>", color=col, lw=2.6, shrinkA=0, shrinkB=0)) - ax.plot([anchor], [yi], "o", color=GREY, ms=4, zorder=3) - better_right = (val > anchor) if not reversed_x else (val < anchor) # is the arm in the 'better' (right) dir - ha = "left" if better_right else "right" - ax.text(val + (span * 0.02 if ha == "left" else -span * 0.02), yi, f"{val:.3f}", - va="center", ha=ha, fontsize=9, color=col, fontweight="bold") - ax.set_xlim(lo, hi) - ax.set_yticks(range(len(rows))); ax.set_yticklabels([r[0] for r in rows], fontsize=8.5) - ax.set_ylim(-0.6, len(rows) - 0.4) - ax.set_xlabel(xlabel, fontsize=8.5) - ax.set_title(title, fontsize=10, loc="left") - ax.text(anchor, -0.55, floor_lab, fontsize=7.5, color=GREY, ha="center", va="bottom") - ax.text(ceiling, -0.55, ceil_lab, fontsize=7.5, color="#3b5bdb", ha="center", va="bottom") - for s in ("top", "right", "left"): - ax.spines[s].set_visible(False) - ax.tick_params(left=False) - - -def plot_abs(df: pl.DataFrame) -> None: +def plot_scatter(df: pl.DataFrame) -> None: a = _anchors(df) - base, vh, ceil = a["base_solve"], a["vanilla_hack"], a["ceiling"] + base, ceil = a["base_solve"], a["ceiling"] pick = lambda lab: df.filter(pl.col("label") == lab).to_dicts()[0] best, rand, van = pick("routeV per-token"), pick("routeV random-V"), pick("vanilla GRPO") - # bottom -> top: vanilla, random-V, per-token - hack_rows = [("vanilla GRPO", van["hack_deploy"], RED), - ("routeV random-V", rand["hack_deploy"], DARK), - ("routeV per-token", best["hack_deploy"], GOLD)] - solve_rows = [("vanilla GRPO", van["solve_deploy"], RED), - ("routeV random-V", rand["solve_deploy"], DARK), - ("routeV per-token", best["solve_deploy"], GOLD)] - prov = " PROVISIONAL" if a["provisional"] else "" - fig, (axl, axr) = plt.subplots(1, 2, figsize=(11.5, 4.2), sharey=True) - _arrow_panel(axl, anchor=vh, ceiling=0.0, rows=hack_rows, reversed_x=True, - xlim=(vh + 0.05, -0.03), floor_lab=f"floor\n(vanilla {vh:.2f})", ceil_lab="ceiling\n(no hack)", - xlabel="hack rate · axis reversed: right = less hacking = better", title="hacking (raw rate)") - _arrow_panel(axr, anchor=base, ceiling=ceil, rows=solve_rows, reversed_x=False, - xlim=(base - 0.03, ceil + 0.03), floor_lab=f"floor\n(base {base:.2f})", ceil_lab=f"ceiling\n({ceil:.2f}{prov})", - xlabel="solve rate · right = more solving = better", title="solving (raw rate)") - fig.suptitle("vGROUT raw rates: arrow = climb from floor; grey = bedrock (worse than floor), blue = sky (past ceiling) (test n=119, seed 43, 60-step fast)", - fontsize=10, x=0.01, ha="left") - fig.tight_layout(rect=(0, 0, 1, 0.93)) + H = lambda r: r["hack_deploy"]; S = lambda r: r["solve_deploy"] + + BLUE = "#3b5bdb" + fig, ax = plt.subplots(figsize=(7.2, 5.4)) + # achievable solve band (base -> ceiling): faint, recedes behind the data + ax.axhspan(base, ceil, color="#eef3ff", zorder=0) + ax.axhline(base, color=GREY, lw=0.8); ax.axhline(ceil, color=BLUE, lw=0.8, ls=":") + ax.axvline(0.0, color=GREY, lw=0.8) + # effect arrows: vanilla baseline -> each routeV arm (green = moves toward the good corner) + for arm in (rand, best): + ax.annotate("", xy=(H(arm), S(arm)), xytext=(H(van), S(van)), + arrowprops=dict(arrowstyle="-|>", color=GREEN_ARROW, lw=2.0, alpha=0.85, + shrinkA=7, shrinkB=9)) + # points + direct labels (name only -- the position already shows the rates; labelling + # the amounts too would double-encode. offsets keep each clear of the arrows/each other) + pts = [("vanilla GRPO", van, RED, (10, -13), "left"), + ("routeV random-V", rand, DARK, (12, -2), "left"), + ("routeV per-token", best, GOLD, (12, 6), "left")] + for name, r, col, (dx, dy), ha in pts: + ax.plot(H(r), S(r), "o", color=col, ms=11, zorder=5, mec="white", mew=1.2) + ax.annotate(name, (H(r), S(r)), textcoords="offset points", xytext=(dx, dy), + ha=ha, va="center", fontsize=9, color=col, fontweight="bold") + # "better" shown, not told: a small diagonal in the empty top-left, pointing at the good corner + ax.annotate("", xy=(0.46, ceil - 0.004), xytext=(0.62, ceil - 0.030), + arrowprops=dict(arrowstyle="-|>", color=GREEN_ARROW, lw=1.4, alpha=0.55)) + ax.text(0.63, ceil - 0.034, "better", fontsize=9, color=GREEN_ARROW, style="italic", ha="left", va="top") + # range-frame: ticks only at meaningful values + ax.set_xlim(0.66, -0.03) # reversed: high hack left, 0 right + ax.set_ylim(base - 0.035, ceil + 0.02) + prov = "*" if a["provisional"] else "" + ax.set_xticks([0.0, H(van)]); ax.set_xticklabels(["no hack", f"vanilla\n{H(van):.2f}"], fontsize=8.5) + ax.set_yticks([base, ceil]); ax.set_yticklabels([f"base\n{base:.2f}", f"ceiling{prov}\n{ceil:.2f}"], fontsize=8.5) + ax.set_xlabel("reward-hack rate", fontsize=9.5) + ax.set_ylabel("solve rate", fontsize=9.5) + for s in ("top", "right"): + ax.spines[s].set_visible(False) + fig.tight_layout() for ext in ("pdf", "png"): fig.savefig(OUT / f"floor_ceiling_abs.{ext}", dpi=150, bbox_inches="tight") @@ -269,8 +256,8 @@ def main() -> None: for r in flags.to_dicts(): print(f" [{r['label']}] {r['status']}") plot(df) - plot_abs(df) - print(f"\nwrote {OUT}/floor_ceiling.pdf and .png (+ floor_ceiling_abs.pdf/.png)") + plot_scatter(df) + print(f"\nwrote {OUT}/floor_ceiling.pdf and .png (+ floor_ceiling_abs.pdf/.png scatter)") if __name__ == "__main__":