This commit is contained in:
wassname
2026-06-11 11:07:28 +00:00
parent 7871aa66b8
commit 270c4f5a27
30 changed files with 456 additions and 443 deletions
+10 -10
View File
@@ -5,9 +5,9 @@ erasure / online G_hack erasure / routing2); the panel shows the DEPLOYED
model's hack_s (red) and solve/gt_s (green) over training. Per-seed thin lines
+ bold mean; the mean hack-onset step (first hack_s > 0) is a dashed vertical.
APPLES-TO-APPLES. We plot the DEPLOY-eval (hk_dep/slv_dep) for every arm when
COMPARABLE ESTIMATOR. We plot the DEPLOY-eval (hk_dep/slv_dep) for every arm when
present: the same estimator across arms (n=64, T=0.7, every --eval-ablate-every
steps). For route/route2 the deployed model = quarantine knob zeroed; for
steps). For route/route2 the deployed model has the quarantine ablated; for
vanilla/erase deploy == the trained model. Sparse deploy-eval steps are EMA-held
between samples, drawn as a plain line (same as the dense curves).
Older logs that gated the eval to route only fall back to per-step training
@@ -136,7 +136,7 @@ def parse_log(path: Path) -> dict | None:
# train-series assignment. A nan column drops the seed out of the mean cleanly.
for k in ("hk_dep", "slv_dep", "hk_on", "slv_on", "hk_abl", "slv_abl"):
run.setdefault(k, np.full(len(steps), np.nan))
# APPLES-TO-APPLES: plot the DEPLOY-eval (hk_dep/slv_dep) for EVERY arm when it
# Use the DEPLOY-eval (hk_dep/slv_dep) for every arm when it
# has data -- same estimator (n=64, T=0.7, eval_ablate_every cadence) across arms.
# For route/route2 this is the quarantine-off model; for vanilla/erase deploy ==
# trained model. Older logs (eval gated to route only) lack it for vanilla/erase
@@ -145,18 +145,18 @@ def parse_log(path: Path) -> dict | None:
def _has_data(key):
return key in run and np.isfinite(run[key]).any()
# TRAIN series for the train-vs-deploy 2x2. The two rows must share ONE estimator:
# route2 -> knob-ON held-out eval (hk_on): quarantine active, the policy as trained.
# vanilla/erase -> reuse the knob-OFF eval (hk_dep): no quarantine, so train==deploy;
# route2 -> quarantine-enabled held-out eval (hk_on): the policy as trained.
# vanilla/erase -> reuse the quarantine-ablated eval (hk_dep): no quarantine, so train==deploy;
# the deploy eval IS the train-time behaviour, same n=64 prompts/T.
# Both differ from the deploy row ONLY in the knob, so noise matches. NO per-step
# Both differ from the deploy row only in quarantine state, so sampling noise matches. No per-step
# hack_s fallback: substituting the noisy n=28 train batch for a seed that lacks the
# held-out eval corrupts the seed-mean (one such seed fabricated a vanilla train-vs-
# deploy gap, 2026-06-05). A seed without the eval drops out as NaN instead.
if _has_data("hk_on"): # route2: knob-ON held-out eval (quarantine active)
if _has_data("hk_on"): # route2: quarantine-enabled held-out eval
run["hack_train"] = run["hk_on"]
run["solve_train"] = run["slv_on"]
else: # no quarantine (vanilla/erase): train==deploy, reuse the
run["hack_train"] = run["hk_dep"] # knob-off eval (nan if absent -> seed drops out)
run["hack_train"] = run["hk_dep"] # quarantine-ablated eval (nan if absent -> seed drops out)
run["solve_train"] = run["slv_dep"] # so all seeds share ONE estimator (n=64, no n=28)
if _has_data("hk_abl"): # dense per-step proxy (rollout_ablate_frac>0), if present
run["hack_s"] = run["hk_abl"]
@@ -441,7 +441,7 @@ def plot_train_vs_deploy(runs: list[dict], out: Path) -> None:
in the shipped weights, nothing to delete). Matched n=64 eval on every series."""
# Skip when train==deploy for EVERY run: the dashed "train" series then just hides
# under the solid "deploy" line -- a misleading legend with no visible train line.
# Only a route2 knob-ON eval makes hack_train (=hk_on) differ from hk_dep. Checked on
# Only a route2 quarantine-enabled eval makes hack_train (=hk_on) differ from hk_dep. Checked on
# the derived series so it works on both the log and --from-csv paths (hk_on is not
# round-tripped in the CSV, hack_train is).
def _has_train_gap(r):
@@ -452,7 +452,7 @@ def plot_train_vs_deploy(runs: list[dict], out: Path) -> None:
return bool(np.isfinite(d).any() and np.nanmax(d) > 0.02)
if not any(_has_train_gap(r) for r in runs):
out.unlink(missing_ok=True)
logger.info(f"skip {out.name}: train==deploy in every run -> no knob-ON contrast to show")
logger.info(f"skip {out.name}: train==deploy in every run -> no quarantine-state contrast to show")
return
by_arm: dict[str, list[dict]] = defaultdict(list)
for r in runs: