mirror of
https://github.com/wassname/evil_MoE.git
synced 2026-06-27 17:30:41 +08:00
feat: knob-ON eval (route arms) for like-for-like train-vs-deploy + teacher-off marker
The 2x2 train row used per-step hack_s (noisy n=28 train batch, knob-on) vs the deploy row's smooth n=64 eval (knob-off) -- different estimators, confounded. Now at each eval step route arms ALSO run the SAME n=64 eval with the quarantine ACTIVE (knob-on = training policy), logged as hk_on/slv_on. vanilla/erase reuse deploy (no quarantine -> knob-on==knob-off). plot_dynamics prefers hk_on for the train series so the 2x2 differs ONLY in knob state. Also: plot parses --teacher-off-step from argv and shades the teacher-ON region [0,toff] + a dashed cut line in the 2x2. The stashed long-run route2 jobs (92 KL, 94 teacher-off) inherit the knob-on eval automatically at runtime. Smoke (route2 hk_on present + logged, both plot parse paths) green. Co-Authored-By: Claudypoo <288921227+claudypoo@users.noreply.github.com>
This commit is contained in:
@@ -79,6 +79,10 @@ def parse_log(path: Path) -> dict | None:
|
||||
refr = int(grab(r"--vhack-refresh-every=(\d+)", argv, "0"))
|
||||
seed = grab(r"seed=(\d+)", preset, "?")
|
||||
vhack = grab(r"v-hack-path=out/(?:vhack/)?(\S+?)\.safetensors", argv, "-")
|
||||
# teacher-off curriculum: step the teacher mix was cut (None if never). Drawn as
|
||||
# a vertical line / end of the teacher-on shaded region in the 2x2.
|
||||
_toff = grab(r"--teacher-off-step=(\d+)", argv, None)
|
||||
teacher_off = int(_toff) if _toff is not None else None
|
||||
|
||||
# header line: the one containing both "step" and "hack_s"
|
||||
hdr = next((l for l in txt.splitlines()
|
||||
@@ -98,7 +102,7 @@ def parse_log(path: Path) -> dict | None:
|
||||
# hk_abl/slv_abl = the FREE per-step deploy proxy (ablated rollout slice,
|
||||
# rollout_ablate_frac>0); hk_dep/slv_dep = the held-out greedy eval, only on
|
||||
# eval_ablate_every steps. Prefer the dense proxy for the curve (see below).
|
||||
deploy = {"hk_dep", "slv_dep", "hk_abl", "slv_abl"} & set(idx)
|
||||
deploy = {"hk_dep", "slv_dep", "hk_abl", "slv_abl", "hk_on", "slv_on"} & set(idx)
|
||||
# Only parse columns this log actually has: non-projecting arms (vanilla,
|
||||
# routing2) lack cin_t/cin_s, so gate by presence rather than KeyError.
|
||||
wanted = {k: v for k, v in RATE_COLS.items() if k in idx}
|
||||
@@ -114,7 +118,7 @@ def parse_log(path: Path) -> dict | None:
|
||||
series[col].append(_val(row[idx[col]]))
|
||||
if not steps:
|
||||
return None
|
||||
run = dict(arm=arm, refr=refr, seed=seed, vhack=vhack,
|
||||
run = dict(arm=arm, refr=refr, seed=seed, vhack=vhack, teacher_off=teacher_off,
|
||||
steps=np.array(steps), **{k: np.array(v, dtype=float) for k, v in series.items()})
|
||||
# APPLES-TO-APPLES: plot the DEPLOY-eval (hk_dep/slv_dep) for EVERY arm when it
|
||||
# has data -- same estimator (n=64, T=0.7, eval_ablate_every cadence) across arms.
|
||||
@@ -124,9 +128,14 @@ def parse_log(path: Path) -> dict | None:
|
||||
# presence: no-floor logs carry an all-nan hk_dep/hk_abl column otherwise.
|
||||
def _has_data(key):
|
||||
return key in run and np.isfinite(run[key]).any()
|
||||
# Keep the raw per-step TRAIN series (knob-ON for route2) before the deploy
|
||||
# substitution below overwrites hack_s/gt_s -- the train-vs-deploy 2x2 needs both.
|
||||
if "hack_s" in run:
|
||||
# TRAIN series for the train-vs-deploy 2x2. Prefer the knob-ON eval (hk_on/slv_on):
|
||||
# SAME n/prompts/T as the knob-off deploy eval, so the two rows differ ONLY in the
|
||||
# knob -- the per-step hack_s is a noisy n=28 train batch and looks like a different
|
||||
# estimator. Fall back to per-step hack_s for logs without the knob-on eval.
|
||||
if _has_data("hk_on"):
|
||||
run["hack_train"] = run["hk_on"]
|
||||
run["solve_train"] = run["slv_on"]
|
||||
elif "hack_s" in run:
|
||||
run["hack_train"] = run["hack_s"]
|
||||
run["solve_train"] = run["gt_s"]
|
||||
if _has_data("hk_abl"): # dense per-step proxy (rollout_ablate_frac>0), if present
|
||||
@@ -390,6 +399,16 @@ def plot_train_vs_deploy(runs: list[dict], out: Path) -> None:
|
||||
ax.annotate("hack ≡ 0", (0.04, 0.0), xycoords=("axes fraction", "data"),
|
||||
color=red, fontsize=8, va="bottom",
|
||||
xytext=(0, 3), textcoords="offset points")
|
||||
# teacher-off curriculum: shade the teacher-ON region [0, toff] + a line at
|
||||
# the cut, so "hacks were teacher-seeded here, on-policy after" is visible.
|
||||
toffs = {r.get("teacher_off") for r in by_arm[arm] if r.get("teacher_off")}
|
||||
if toffs:
|
||||
toff = max(toffs)
|
||||
ax.axvspan(0, toff, color="0.85", alpha=0.5, zorder=0)
|
||||
ax.axvline(toff, color="0.55", lw=0.8, ls=(0, (4, 3)), zorder=1)
|
||||
if ri == 0:
|
||||
ax.annotate("teacher off", (toff, 1.0), color="0.4", fontsize=7,
|
||||
xytext=(2, -2), textcoords="offset points", va="top")
|
||||
if ci == 0:
|
||||
ax.set_ylabel(rlabel)
|
||||
ax.spines[["top", "right"]].set_visible(False)
|
||||
|
||||
Reference in New Issue
Block a user