mirror of
https://github.com/wassname/evil_MoE.git
synced 2026-06-27 18:59:35 +08:00
docs: refresh blog+README for route2/deploy-eval; embed key dynamics plot; drop sparse-only dots
- blog: mark as erase-n=2 draft, note route2/exploration-floor/deploy-eval are the current direction; embed dyn_sub4_hack_overlay.png (force-added); ASCII em-dashes; de-bold the arm list (#15 tell) - README: add route2 arm + apples-to-apples deploy-eval to 'What we compare'; stale banner on the n=1 mix=0.5 findings - plot_dynamics: remove _mark_if_sparse (asymmetric sparse-only dots); EMA-held line for all arms - train.py: fix 'held-out greedy' -> 'held-out eval subset, T=0.7' (deploy eval is sampled, not greedy) Co-Authored-By: Claudypoo <288921227+claudypoo@users.noreply.github.com>
This commit is contained in:
@@ -8,8 +8,8 @@ model's hack_s (red) and solve/gt_s (green) over training. Per-seed thin lines
|
||||
APPLES-TO-APPLES. We plot the DEPLOY-eval (hk_dep/slv_dep) for every arm when
|
||||
present: the same estimator across arms (n=64, T=0.7, every --eval-ablate-every
|
||||
steps). For route/route2 the deployed model = quarantine knob zeroed; for
|
||||
vanilla/erase deploy == the trained model. Sparse deploy-eval points are dotted
|
||||
(see _mark_if_sparse) so the EMA-held line doesn't oversell per-step density.
|
||||
vanilla/erase deploy == the trained model. Sparse deploy-eval steps are EMA-held
|
||||
between samples, drawn as a plain line (same as the dense curves).
|
||||
Older logs that gated the eval to route only fall back to per-step training
|
||||
hack_s for vanilla/erase (noisier, n=28, but estimates the same deployed rate
|
||||
since those arms have no quarantine).
|
||||
@@ -173,19 +173,6 @@ def _onset(steps: np.ndarray, hack: np.ndarray) -> int | None:
|
||||
return int(steps[nz[0]]) if len(nz) else None
|
||||
|
||||
|
||||
def _mark_if_sparse(ax, x: np.ndarray, y_raw: np.ndarray, color, alpha=1.0) -> None:
|
||||
"""Dot the REAL measured points when a series is mostly-NaN. route2's plotted
|
||||
hack/solve is the DEPLOY eval (hk_dep), sampled every eval_ablate_every steps and
|
||||
EMA-held flat between -- without the dots the held line looks per-step-dense and
|
||||
oversells route2's smoothness vs the per-step-sampled training curves (Tufte:
|
||||
the rendering must not imply more data than was measured). Dense series (every
|
||||
step finite, e.g. training hack_s, cos sep/leak) stay unmarked to avoid clutter."""
|
||||
finite = np.isfinite(y_raw)
|
||||
if finite.sum() and finite.mean() < 0.5:
|
||||
ax.plot(x[finite], y_raw[finite], ls="", marker="o", ms=2.5,
|
||||
color=color, alpha=alpha, zorder=4)
|
||||
|
||||
|
||||
def _ema(y: np.ndarray, span: int = 5) -> np.ndarray:
|
||||
"""Causal EMA, span=5. Less lag than a trailing SMA(5) since it weights
|
||||
recent steps more. NaNs hold the previous smoothed value (don't reset it)."""
|
||||
@@ -213,7 +200,6 @@ def _series_panel(ax, runs, cols, colors, ylim, label_series=False):
|
||||
for r in present:
|
||||
ys = _ema(r[col])
|
||||
ax.plot(r["steps"], ys, color=color, lw=0.7, alpha=0.35, solid_capstyle="round")
|
||||
_mark_if_sparse(ax, r["steps"], r[col], color, alpha=0.5)
|
||||
stacked.append(ys)
|
||||
# mean over seeds of the smoothed series (runs share the step grid within an arm)
|
||||
L = min(len(y) for y in stacked)
|
||||
@@ -289,16 +275,11 @@ def _overlay_panel(ax, by_arm, arms, key, *, label, with_onset):
|
||||
for r in rs:
|
||||
ys = _ema(r[key])
|
||||
ax.plot(r["steps"], ys, color=color, lw=0.6, alpha=0.25, solid_capstyle="round")
|
||||
_mark_if_sparse(ax, r["steps"], r[key], color, alpha=0.4)
|
||||
stacked.append(ys)
|
||||
L = min(len(y) for y in stacked)
|
||||
ym = np.nanmean(np.stack([y[:L] for y in stacked]), axis=0)
|
||||
xm = rs[0]["steps"][:L]
|
||||
ax.plot(xm, ym, color=color, lw=2.0, solid_capstyle="round")
|
||||
# dot the bold mean at its REAL (pre-EMA) measured steps -- ym is EMA-held so
|
||||
# it would otherwise read as per-step-dense (see _mark_if_sparse)
|
||||
raw_mean = np.nanmean(np.stack([r[key][:L] for r in rs]), axis=0)
|
||||
_mark_if_sparse(ax, xm, raw_mean, color)
|
||||
if with_onset:
|
||||
onsets = [s for r in rs if (s := _onset(r["steps"], r["hack_s"])) is not None]
|
||||
if onsets:
|
||||
|
||||
Reference in New Issue
Block a user