mirror of
https://github.com/wassname/evil_MoE.git
synced 2026-06-27 16:30:30 +08:00
plot_dynamics: train-vs-deploy 2x2 uses matched n=64 eval on both rows
The train row fell back to per-step hack_s (noisy n=28 train batch) for arms without a knob-on eval, so vanilla's train/deploy rows looked like different estimators. Fix: vanilla/erase have no quarantine -> train==deploy, so reuse hk_dep (the n=64 knob-off eval) for the train row. route2 still uses hk_on (knob-on eval). Now every panel is the same held-out eval, differing only in the quarantine knob. Regen source: train_vs_deploy_60.csv (route2 nofloor_rf2 + vanilla sweep, seed 41, 60 steps). Co-Authored-By: Claudypoo <288921227+claudypoo@users.noreply.github.com>
This commit is contained in:
@@ -0,0 +1,121 @@
|
||||
arm,seed,step,hack_s,gt_s,hack_train,solve_train,hk_dep,slv_dep
|
||||
routing2,41,0,0.0,0.34,0.0,0.38,0.0,0.34
|
||||
routing2,41,1,nan,nan,nan,nan,nan,nan
|
||||
routing2,41,2,nan,nan,nan,nan,nan,nan
|
||||
routing2,41,3,nan,nan,nan,nan,nan,nan
|
||||
routing2,41,4,nan,nan,nan,nan,nan,nan
|
||||
routing2,41,5,0.0,0.5,0.0,0.5,0.0,0.5
|
||||
routing2,41,6,nan,nan,nan,nan,nan,nan
|
||||
routing2,41,7,nan,nan,nan,nan,nan,nan
|
||||
routing2,41,8,nan,nan,nan,nan,nan,nan
|
||||
routing2,41,9,nan,nan,nan,nan,nan,nan
|
||||
routing2,41,10,0.0,0.58,0.09,0.55,0.0,0.58
|
||||
routing2,41,11,nan,nan,nan,nan,nan,nan
|
||||
routing2,41,12,nan,nan,nan,nan,nan,nan
|
||||
routing2,41,13,nan,nan,nan,nan,nan,nan
|
||||
routing2,41,14,nan,nan,nan,nan,nan,nan
|
||||
routing2,41,15,0.0,0.62,0.17,0.48,0.0,0.62
|
||||
routing2,41,16,nan,nan,nan,nan,nan,nan
|
||||
routing2,41,17,nan,nan,nan,nan,nan,nan
|
||||
routing2,41,18,nan,nan,nan,nan,nan,nan
|
||||
routing2,41,19,nan,nan,nan,nan,nan,nan
|
||||
routing2,41,20,0.0,0.59,0.19,0.48,0.0,0.59
|
||||
routing2,41,21,nan,nan,nan,nan,nan,nan
|
||||
routing2,41,22,nan,nan,nan,nan,nan,nan
|
||||
routing2,41,23,nan,nan,nan,nan,nan,nan
|
||||
routing2,41,24,nan,nan,nan,nan,nan,nan
|
||||
routing2,41,25,0.0,0.61,0.22,0.59,0.0,0.61
|
||||
routing2,41,26,nan,nan,nan,nan,nan,nan
|
||||
routing2,41,27,nan,nan,nan,nan,nan,nan
|
||||
routing2,41,28,nan,nan,nan,nan,nan,nan
|
||||
routing2,41,29,nan,nan,nan,nan,nan,nan
|
||||
routing2,41,30,0.0,0.62,0.25,0.45,0.0,0.62
|
||||
routing2,41,31,nan,nan,nan,nan,nan,nan
|
||||
routing2,41,32,nan,nan,nan,nan,nan,nan
|
||||
routing2,41,33,nan,nan,nan,nan,nan,nan
|
||||
routing2,41,34,nan,nan,nan,nan,nan,nan
|
||||
routing2,41,35,0.0,0.62,0.23,0.5,0.0,0.62
|
||||
routing2,41,36,nan,nan,nan,nan,nan,nan
|
||||
routing2,41,37,nan,nan,nan,nan,nan,nan
|
||||
routing2,41,38,nan,nan,nan,nan,nan,nan
|
||||
routing2,41,39,nan,nan,nan,nan,nan,nan
|
||||
routing2,41,40,0.0,0.61,0.25,0.56,0.0,0.61
|
||||
routing2,41,41,nan,nan,nan,nan,nan,nan
|
||||
routing2,41,42,nan,nan,nan,nan,nan,nan
|
||||
routing2,41,43,nan,nan,nan,nan,nan,nan
|
||||
routing2,41,44,nan,nan,nan,nan,nan,nan
|
||||
routing2,41,45,0.0,0.62,0.25,0.47,0.0,0.62
|
||||
routing2,41,46,nan,nan,nan,nan,nan,nan
|
||||
routing2,41,47,nan,nan,nan,nan,nan,nan
|
||||
routing2,41,48,nan,nan,nan,nan,nan,nan
|
||||
routing2,41,49,nan,nan,nan,nan,nan,nan
|
||||
routing2,41,50,0.0,0.62,0.19,0.48,0.0,0.62
|
||||
routing2,41,51,nan,nan,nan,nan,nan,nan
|
||||
routing2,41,52,nan,nan,nan,nan,nan,nan
|
||||
routing2,41,53,nan,nan,nan,nan,nan,nan
|
||||
routing2,41,54,nan,nan,nan,nan,nan,nan
|
||||
routing2,41,55,0.0,0.62,0.2,0.52,0.0,0.62
|
||||
routing2,41,56,nan,nan,nan,nan,nan,nan
|
||||
routing2,41,57,nan,nan,nan,nan,nan,nan
|
||||
routing2,41,58,nan,nan,nan,nan,nan,nan
|
||||
routing2,41,59,0.0,0.61,0.25,0.53,0.0,0.61
|
||||
vanilla,41,0,0.0,0.36,0.0,0.36,0.0,0.36
|
||||
vanilla,41,1,nan,nan,nan,nan,nan,nan
|
||||
vanilla,41,2,nan,nan,nan,nan,nan,nan
|
||||
vanilla,41,3,nan,nan,nan,nan,nan,nan
|
||||
vanilla,41,4,nan,nan,nan,nan,nan,nan
|
||||
vanilla,41,5,0.0,0.44,0.0,0.44,0.0,0.44
|
||||
vanilla,41,6,nan,nan,nan,nan,nan,nan
|
||||
vanilla,41,7,nan,nan,nan,nan,nan,nan
|
||||
vanilla,41,8,nan,nan,nan,nan,nan,nan
|
||||
vanilla,41,9,nan,nan,nan,nan,nan,nan
|
||||
vanilla,41,10,0.14,0.56,0.14,0.56,0.14,0.56
|
||||
vanilla,41,11,nan,nan,nan,nan,nan,nan
|
||||
vanilla,41,12,nan,nan,nan,nan,nan,nan
|
||||
vanilla,41,13,nan,nan,nan,nan,nan,nan
|
||||
vanilla,41,14,nan,nan,nan,nan,nan,nan
|
||||
vanilla,41,15,0.23,0.52,0.23,0.52,0.23,0.52
|
||||
vanilla,41,16,nan,nan,nan,nan,nan,nan
|
||||
vanilla,41,17,nan,nan,nan,nan,nan,nan
|
||||
vanilla,41,18,nan,nan,nan,nan,nan,nan
|
||||
vanilla,41,19,nan,nan,nan,nan,nan,nan
|
||||
vanilla,41,20,0.28,0.48,0.28,0.48,0.28,0.48
|
||||
vanilla,41,21,nan,nan,nan,nan,nan,nan
|
||||
vanilla,41,22,nan,nan,nan,nan,nan,nan
|
||||
vanilla,41,23,nan,nan,nan,nan,nan,nan
|
||||
vanilla,41,24,nan,nan,nan,nan,nan,nan
|
||||
vanilla,41,25,0.25,0.53,0.25,0.53,0.25,0.53
|
||||
vanilla,41,26,nan,nan,nan,nan,nan,nan
|
||||
vanilla,41,27,nan,nan,nan,nan,nan,nan
|
||||
vanilla,41,28,nan,nan,nan,nan,nan,nan
|
||||
vanilla,41,29,nan,nan,nan,nan,nan,nan
|
||||
vanilla,41,30,0.3,0.52,0.3,0.52,0.3,0.52
|
||||
vanilla,41,31,nan,nan,nan,nan,nan,nan
|
||||
vanilla,41,32,nan,nan,nan,nan,nan,nan
|
||||
vanilla,41,33,nan,nan,nan,nan,nan,nan
|
||||
vanilla,41,34,nan,nan,nan,nan,nan,nan
|
||||
vanilla,41,35,0.27,0.5,0.27,0.5,0.27,0.5
|
||||
vanilla,41,36,nan,nan,nan,nan,nan,nan
|
||||
vanilla,41,37,nan,nan,nan,nan,nan,nan
|
||||
vanilla,41,38,nan,nan,nan,nan,nan,nan
|
||||
vanilla,41,39,nan,nan,nan,nan,nan,nan
|
||||
vanilla,41,40,0.38,0.45,0.38,0.45,0.38,0.45
|
||||
vanilla,41,41,nan,nan,nan,nan,nan,nan
|
||||
vanilla,41,42,nan,nan,nan,nan,nan,nan
|
||||
vanilla,41,43,nan,nan,nan,nan,nan,nan
|
||||
vanilla,41,44,nan,nan,nan,nan,nan,nan
|
||||
vanilla,41,45,0.42,0.44,0.42,0.44,0.42,0.44
|
||||
vanilla,41,46,nan,nan,nan,nan,nan,nan
|
||||
vanilla,41,47,nan,nan,nan,nan,nan,nan
|
||||
vanilla,41,48,nan,nan,nan,nan,nan,nan
|
||||
vanilla,41,49,nan,nan,nan,nan,nan,nan
|
||||
vanilla,41,50,0.38,0.38,0.38,0.38,0.38,0.38
|
||||
vanilla,41,51,nan,nan,nan,nan,nan,nan
|
||||
vanilla,41,52,nan,nan,nan,nan,nan,nan
|
||||
vanilla,41,53,nan,nan,nan,nan,nan,nan
|
||||
vanilla,41,54,nan,nan,nan,nan,nan,nan
|
||||
vanilla,41,55,0.42,0.47,0.42,0.47,0.42,0.47
|
||||
vanilla,41,56,nan,nan,nan,nan,nan,nan
|
||||
vanilla,41,57,nan,nan,nan,nan,nan,nan
|
||||
vanilla,41,58,nan,nan,nan,nan,nan,nan
|
||||
vanilla,41,59,0.33,0.44,0.33,0.44,0.33,0.44
|
||||
|
@@ -132,14 +132,19 @@ def parse_log(path: Path) -> dict | None:
|
||||
# presence: no-floor logs carry an all-nan hk_dep/hk_abl column otherwise.
|
||||
def _has_data(key):
|
||||
return key in run and np.isfinite(run[key]).any()
|
||||
# TRAIN series for the train-vs-deploy 2x2. Prefer the knob-ON eval (hk_on/slv_on):
|
||||
# SAME n/prompts/T as the knob-off deploy eval, so the two rows differ ONLY in the
|
||||
# knob -- the per-step hack_s is a noisy n=28 train batch and looks like a different
|
||||
# estimator. Fall back to per-step hack_s for logs without the knob-on eval.
|
||||
if _has_data("hk_on"):
|
||||
# TRAIN series for the train-vs-deploy 2x2. The two rows must share ONE estimator:
|
||||
# route2 -> knob-ON held-out eval (hk_on): quarantine active, the policy as trained.
|
||||
# vanilla/erase -> reuse the knob-OFF eval (hk_dep): no quarantine, so train==deploy;
|
||||
# the deploy eval IS the train-time behaviour, same n=64 prompts/T.
|
||||
# Both differ from the deploy row ONLY in the knob, so noise matches. Per-step hack_s
|
||||
# (noisy n=28 train batch) is the last resort for old logs with no held-out eval.
|
||||
if _has_data("hk_on"): # route2: knob-ON held-out eval (quarantine active)
|
||||
run["hack_train"] = run["hk_on"]
|
||||
run["solve_train"] = run["slv_on"]
|
||||
elif "hack_s" in run:
|
||||
elif _has_data("hk_dep"): # no quarantine (vanilla/erase): train==deploy, so the
|
||||
run["hack_train"] = run["hk_dep"] # train row IS the knob-off eval -- reuse it so
|
||||
run["solve_train"] = run["slv_dep"] # both rows share the n=64 estimator (no n=28 noise)
|
||||
elif "hack_s" in run: # last resort (old logs, no held-out eval): per-step n=28
|
||||
run["hack_train"] = run["hack_s"]
|
||||
run["solve_train"] = run["gt_s"]
|
||||
if _has_data("hk_abl"): # dense per-step proxy (rollout_ablate_frac>0), if present
|
||||
|
||||
Reference in New Issue
Block a user