diff --git a/scripts/plot_floor_ceiling.py b/scripts/plot_floor_ceiling.py index b04cea7..5289226 100644 --- a/scripts/plot_floor_ceiling.py +++ b/scripts/plot_floor_ceiling.py @@ -67,6 +67,10 @@ def _l5(rows: list[dict], k: str) -> float: return sum(v) / len(v) +def _r4(x): + return None if x is None else round(x, 4) + + # ── stage 1: build the inspectable csv ────────────────────────────────────── def build_csv() -> pl.DataFrame: rows = [] @@ -77,6 +81,9 @@ def build_csv() -> pl.DataFrame: rows.append(dict( label=label, kind="method", hack_deploy=round(dep["deploy_hack"], 4), solve_deploy=round(dep["deploy_solve"], 4), + # knob-ON deploy (deployed-as-trained) on the SAME n=119 set -- None until backfilled + # (rescore_deploy.py) so the deploy before->after is honest, not borrowed from val. + hack_deploy_on=_r4(dep.get("deploy_hack_on")), solve_deploy_on=_r4(dep.get("deploy_solve_on")), hack_on=round(_l5(ev, "train_hack"), 4), hack_off=round(_l5(ev, "deploy_hack"), 4), solve_on=round(_l5(ev, "train_solve"), 4), solve_off=round(_l5(ev, "deploy_solve"), 4), source=f"{run.name}/[deploy_test.json + eval_curve.jsonl]", status=status)) @@ -84,6 +91,7 @@ def build_csv() -> pl.DataFrame: base = json.loads((_find_run("_dir8_baseline_s43") / "deploy_test.json").read_text()) rows.append(dict(label="base (floor)", kind="anchor_floor", hack_deploy=round(base["deploy_hack"], 4), solve_deploy=round(base["deploy_solve"], 4), + hack_deploy_on=None, solve_deploy_on=None, hack_on=None, hack_off=None, solve_on=None, solve_off=None, source="*_dir8_baseline_s43/deploy_test.json", status="ok (base model; steps=0)")) @@ -96,6 +104,7 @@ def build_csv() -> pl.DataFrame: source = "Ariahw et al. 2025 (paper), NOT our run" rows.append(dict(label="ceiling", kind="anchor_ceiling", hack_deploy=0.0, solve_deploy=ceil_solve, + hack_deploy_on=None, solve_deploy_on=None, hack_on=None, hack_off=None, solve_on=None, solve_off=None, source=source, status=status)) @@ -196,11 +205,12 @@ def plot(df: pl.DataFrame) -> None: # hack (x, reversed) vs solve (y). Good corner = TOP-RIGHT (less hacking, more solving), marked # "ideal". The achievable solve band (base..ceiling) is a faint range-frame; ticks sit only at # the meaningful values so the axes teach the scale. Two views: -# plot_scatter -> DEPLOY (knob-off, test n=119): where each arm LANDS. Pareto of arms. -# plot_knob -> the quarantine before/after (knob-on -> knob-off, val n=32): per arm, a -# hollow "before" dot (deployed-as-trained, hacky) -> solid "after" dot. -# They use DIFFERENT eval sets on purpose: deploy n=119 only measures knob-off, so before/after -# can only come from the val on/off curve -- never share one y-axis (val solve ~2x lower). +# plot_scatter -> DEPLOY (test n=119): solid dot = knob-off (where each arm lands = the Pareto); +# when the run carries knob-on on the SAME n=119 set, a hollow before-dot -> +# arrow -> solid after-dot shows the quarantine move on the deploy axis. +# plot_knob -> the same before/after on val n=32 (the periodic curve; lower-N, lower-solve). +# Prefer the deploy view now that both endpoints exist there; plot_knob remains as the val cross- +# check (val solve runs ~2x lower, so the two panels never share a y-axis). GREEN_ARROW = "#1e8449" BLUE = "#3b5bdb" # one colour per arm; GOLD=best real-V, DARK=random control, RED=no-intervention baseline. @@ -227,13 +237,19 @@ def plot_scatter(df: pl.DataFrame) -> None: ax.plot(0.012, ceil, marker="*", ms=15, color=BLUE, zorder=6, clip_on=False) ax.annotate("ideal", (0.012, ceil), textcoords="offset points", xytext=(-8, 2), ha="right", va="center", fontsize=9, color=BLUE, style="italic") - # Deploy (knob-off, n=119) is where each arm LANDS -> a pure Pareto of dots. No before->after - # arrows here: the honest knob-on->off move changes BOTH hack and solve, but knob-on is only - # measured at val (n=32), so drawing it against the deploy y-axis would fake a solve jump that - # is really the eval-set shift. The real 2-D before->after lives in plot_knob (val on/off). + # Deploy: solid dot = knob-OFF (quarantine ablated), where each arm LANDS = the Pareto. + # If the run also has knob-ON (deployed-as-trained) on the SAME n=119 set, draw the honest + # 2-D before->after: hollow before-dot (knob on, hacky) -> arrow -> solid after-dot. Both + # endpoints share the deploy y-axis now (rescore_deploy backfill), so the solve move is real, + # not an eval-set artifact. Arms without the backfill fall back to dot-only. for r in _methods(df): col = ARM_COLOR.get(r["label"], GREY) - ax.plot(H(r), S(r), "o", color=col, ms=11, zorder=5, mec="white", mew=1.2) + hon, son = r["hack_deploy_on"], r["solve_deploy_on"] + if hon is not None and (abs(hon - H(r)) > 1e-6 or abs(son - S(r)) > 1e-6): + ax.annotate("", xy=(H(r), S(r)), xytext=(hon, son), + arrowprops=dict(arrowstyle="-|>", color=col, lw=2.0, alpha=0.85, shrinkA=6, shrinkB=8)) + ax.plot(hon, son, "o", color="white", mec=col, mew=1.8, ms=9, zorder=4) # hollow = knob on + ax.plot(H(r), S(r), "o", color=col, ms=11, zorder=5, mec="white", mew=1.2) # solid = knob off right = H(r) > 0.3 # vanilla sits left; label into the middle ax.annotate(r["label"], (H(r), S(r)), textcoords="offset points", xytext=(12 if right else -12, 0), ha="left" if right else "right",