plot: deploy Pareto draws knob-on->off before/after on the n=119 axis

Now that final/rescore eval record deploy_hack_on/solve_on at n=119,
the deploy scatter shows the honest quarantine move (hollow knob-on dot
-> arrow -> solid knob-off dot) on the same axis instead of borrowing
val's lower-scale curve. Dot-only fallback for arms not yet backfilled.

Co-Authored-By: Claudypoo <288921227+claudypoo@users.noreply.github.com>
This commit is contained in:
wassname
2026-06-09 13:15:19 +00:00
parent 3c27d922d2
commit 3f82041d90
+26 -10
View File
@@ -67,6 +67,10 @@ def _l5(rows: list[dict], k: str) -> float:
return sum(v) / len(v)
def _r4(x):
return None if x is None else round(x, 4)
# ── stage 1: build the inspectable csv ──────────────────────────────────────
def build_csv() -> pl.DataFrame:
rows = []
@@ -77,6 +81,9 @@ def build_csv() -> pl.DataFrame:
rows.append(dict(
label=label, kind="method",
hack_deploy=round(dep["deploy_hack"], 4), solve_deploy=round(dep["deploy_solve"], 4),
# knob-ON deploy (deployed-as-trained) on the SAME n=119 set -- None until backfilled
# (rescore_deploy.py) so the deploy before->after is honest, not borrowed from val.
hack_deploy_on=_r4(dep.get("deploy_hack_on")), solve_deploy_on=_r4(dep.get("deploy_solve_on")),
hack_on=round(_l5(ev, "train_hack"), 4), hack_off=round(_l5(ev, "deploy_hack"), 4),
solve_on=round(_l5(ev, "train_solve"), 4), solve_off=round(_l5(ev, "deploy_solve"), 4),
source=f"{run.name}/[deploy_test.json + eval_curve.jsonl]", status=status))
@@ -84,6 +91,7 @@ def build_csv() -> pl.DataFrame:
base = json.loads((_find_run("_dir8_baseline_s43") / "deploy_test.json").read_text())
rows.append(dict(label="base (floor)", kind="anchor_floor",
hack_deploy=round(base["deploy_hack"], 4), solve_deploy=round(base["deploy_solve"], 4),
hack_deploy_on=None, solve_deploy_on=None,
hack_on=None, hack_off=None, solve_on=None, solve_off=None,
source="*_dir8_baseline_s43/deploy_test.json", status="ok (base model; steps=0)"))
@@ -96,6 +104,7 @@ def build_csv() -> pl.DataFrame:
source = "Ariahw et al. 2025 (paper), NOT our run"
rows.append(dict(label="ceiling", kind="anchor_ceiling",
hack_deploy=0.0, solve_deploy=ceil_solve,
hack_deploy_on=None, solve_deploy_on=None,
hack_on=None, hack_off=None, solve_on=None, solve_off=None,
source=source, status=status))
@@ -196,11 +205,12 @@ def plot(df: pl.DataFrame) -> None:
# hack (x, reversed) vs solve (y). Good corner = TOP-RIGHT (less hacking, more solving), marked
# "ideal". The achievable solve band (base..ceiling) is a faint range-frame; ticks sit only at
# the meaningful values so the axes teach the scale. Two views:
# plot_scatter -> DEPLOY (knob-off, test n=119): where each arm LANDS. Pareto of arms.
# plot_knob -> the quarantine before/after (knob-on -> knob-off, val n=32): per arm, a
# hollow "before" dot (deployed-as-trained, hacky) -> solid "after" dot.
# They use DIFFERENT eval sets on purpose: deploy n=119 only measures knob-off, so before/after
# can only come from the val on/off curve -- never share one y-axis (val solve ~2x lower).
# plot_scatter -> DEPLOY (test n=119): solid dot = knob-off (where each arm lands = the Pareto);
# when the run carries knob-on on the SAME n=119 set, a hollow before-dot ->
# arrow -> solid after-dot shows the quarantine move on the deploy axis.
# plot_knob -> the same before/after on val n=32 (the periodic curve; lower-N, lower-solve).
# Prefer the deploy view now that both endpoints exist there; plot_knob remains as the val cross-
# check (val solve runs ~2x lower, so the two panels never share a y-axis).
GREEN_ARROW = "#1e8449"
BLUE = "#3b5bdb"
# one colour per arm; GOLD=best real-V, DARK=random control, RED=no-intervention baseline.
@@ -227,13 +237,19 @@ def plot_scatter(df: pl.DataFrame) -> None:
ax.plot(0.012, ceil, marker="*", ms=15, color=BLUE, zorder=6, clip_on=False)
ax.annotate("ideal", (0.012, ceil), textcoords="offset points", xytext=(-8, 2),
ha="right", va="center", fontsize=9, color=BLUE, style="italic")
# Deploy (knob-off, n=119) is where each arm LANDS -> a pure Pareto of dots. No before->after
# arrows here: the honest knob-on->off move changes BOTH hack and solve, but knob-on is only
# measured at val (n=32), so drawing it against the deploy y-axis would fake a solve jump that
# is really the eval-set shift. The real 2-D before->after lives in plot_knob (val on/off).
# Deploy: solid dot = knob-OFF (quarantine ablated), where each arm LANDS = the Pareto.
# If the run also has knob-ON (deployed-as-trained) on the SAME n=119 set, draw the honest
# 2-D before->after: hollow before-dot (knob on, hacky) -> arrow -> solid after-dot. Both
# endpoints share the deploy y-axis now (rescore_deploy backfill), so the solve move is real,
# not an eval-set artifact. Arms without the backfill fall back to dot-only.
for r in _methods(df):
col = ARM_COLOR.get(r["label"], GREY)
ax.plot(H(r), S(r), "o", color=col, ms=11, zorder=5, mec="white", mew=1.2)
hon, son = r["hack_deploy_on"], r["solve_deploy_on"]
if hon is not None and (abs(hon - H(r)) > 1e-6 or abs(son - S(r)) > 1e-6):
ax.annotate("", xy=(H(r), S(r)), xytext=(hon, son),
arrowprops=dict(arrowstyle="-|>", color=col, lw=2.0, alpha=0.85, shrinkA=6, shrinkB=8))
ax.plot(hon, son, "o", color="white", mec=col, mew=1.8, ms=9, zorder=4) # hollow = knob on
ax.plot(H(r), S(r), "o", color=col, ms=11, zorder=5, mec="white", mew=1.2) # solid = knob off
right = H(r) > 0.3 # vanilla sits left; label into the middle
ax.annotate(r["label"], (H(r), S(r)), textcoords="offset points",
xytext=(12 if right else -12, 0), ha="left" if right else "right",