This commit is contained in:
wassname
2026-06-12 04:46:01 +00:00
parent af420ec855
commit 41d225a5ec
8 changed files with 357 additions and 188 deletions
+16 -3
View File
@@ -276,8 +276,21 @@ def plot_q2(df: pl.DataFrame, subtitle: str, out_png: Path) -> dict:
n_rout = int(routed.sum())
prec = float(y[routed].mean()) if routed.any() else float("nan")
rec = float((s[y] >= t_hi).mean()) if y.any() else float("nan")
# F_beta at the rout cut, beta=0.5 (PRECISION-weighted). The routing cost is
# asymmetric the OTHER way than naive intuition: a missed hack (false negative)
# is absorbed -- SGTM is robust to 40-50% undiscovered forget data because the
# routed subset localizes the capability regardless (paper_sgtm.md L64,160,362).
# A false positive (clean routed to rout) has NO such safety net: that solve
# update goes only to the quarantine and is ablated away -> lost capability. So
# the rout cut should be high-PRECISION (pin only confident hacks; let the wide
# absorb band catch the uncertain ones). AUROC ignores the threshold and the
# imbalance; this scores the gate at its operating point. Measurement only -- it
# needs hack labels, so it can never feed the live gate.
b2 = 0.25 # beta=0.5 -> beta^2
fbeta = float((1 + b2) * prec * rec / (b2 * prec + rec)) if (prec + rec) > 0 else 0.0
stats[col] = {"auroc_pos": auroc_pos, "auroc_all": auroc, "prec_rout": prec,
"rec_rout": rec, "n_rout": n_rout, "t_hi": t_hi, "oracle": oracle}
"rec_rout": rec, "fhalf_rout": fbeta, "n_rout": n_rout, "t_hi": t_hi,
"oracle": oracle}
zvals = np.concatenate([s, (syn_join - mu_s) / sd_s]) if len(syn_join) else s
lo = float(np.quantile(zvals, 0.005))
@@ -328,7 +341,7 @@ def plot_q2(df: pl.DataFrame, subtitle: str, out_png: Path) -> dict:
for sp in ("top", "right", "left"):
ax.spines[sp].set_visible(False)
ax.set_title(f"{rep} · {kind} AUROC={auroc_pos:.2f} (A>0 contrast; vs-all {auroc:.2f}) "
f"P@rout={prec:.2f} (n={n_rout}) R={rec:.2f}", fontsize=9)
f"P@rout={prec:.2f} (n={n_rout}) R={rec:.2f} F0.5={fbeta:.2f}", fontsize=9)
ax.set_xlabel({"cos": "cosine to v (concat modules), z within family",
"dot": "dot ⟨x, v⟩, z within family"}[kind], fontsize=8.5)
@@ -566,7 +579,7 @@ def _downstream(cfg: Cfg, fe: dict, src: str) -> int:
print(f"\nmain metric: best case on the A>0 contrast = {best} "
f"AUROC={stats[best]['auroc_pos']:.3f} (vs-all {stats[best]['auroc_all']:.3f}) "
f"P@rout={stats[best]['prec_rout']:.2f} (n={stats[best]['n_rout']}) "
f"R@rout={stats[best]['rec_rout']:.2f}")
f"R@rout={stats[best]['rec_rout']:.2f} F0.5@rout={stats[best]['fhalf_rout']:.2f}")
print(f"out: {q2_png}")
return 0