evil_MoE/scripts/plot_dynamics.py

"""Training-dynamics small multiples: deployed hack vs solve, one column per arm.

Tufte small multiples, single row. Columns = arm (vanilla / static G_hack
erasure / online G_hack erasure / routing2); the panel shows the DEPLOYED
model's hack_s (red) and solve/gt_s (green) over training. Per-seed thin lines
+ bold mean; the mean hack-onset step (first hack_s > 0) is a dashed vertical.

APPLES-TO-APPLES. We plot the DEPLOY-eval (hk_dep/slv_dep) for every arm when
present: the same estimator across arms (n=64, T=0.7, every --eval-ablate-every
steps). For route/route2 the deployed model = quarantine knob zeroed; for
vanilla/erase deploy == the trained model. Sparse deploy-eval steps are EMA-held
between samples, drawn as a plain line (same as the dense curves).
Older logs that gated the eval to route only fall back to per-step training
hack_s for vanilla/erase (noisier, n=28, but estimates the same deployed rate
since those arms have no quarantine).

Data source: logs/*.log per-step rows (the durable source results.py also uses).
We parse by HEADER NAME, not fixed index, because newer runs add columns (refr).

Arm classification (from the preset line `arm=`, covering old --arm and new
--intervention logs):
  vanilla            arm=vanilla    (intervention=none)
  static erasure     arm=projected, no --vhack-refresh-every (frozen v_hack)
  online erasure     arm=projected, --vhack-refresh-every=N>0 (re-extracted)
  routing2           arm=routing2   (intervention=route2)

Usage:
  uv run python scripts/plot_dynamics.py logs/*converge*.log
  uv run python scripts/plot_dynamics.py logs/                      # whole dir
  uv run python scripts/plot_dynamics.py A.log B.log --out out/dynamics.png

Scales to 3 seeds x 3 arms: pass all 9 logs, they auto-group by (arm, seed).
"""
from __future__ import annotations

import argparse
import re
from collections import defaultdict
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
from matplotlib.lines import Line2D
from loguru import logger

from projected_grpo.figs import link_latest, save_fig, arm_label

# Figures are captioned in the paper/blog, so the suptitle just restates the
# caption. Off by default; --title re-enables it for standalone research use.
SHOW_TITLE = False

# --- parse -----------------------------------------------------------------

# Series we plot, by cleaned header name. frac "7/28" -> 0.25; float "+0.264".
RATE_COLS = {"hack_s": "hack", "gt_s": "solve"}
_HDR_TOK = re.compile(r"[A-Za-z_]+")  # strip ↑↓? decorations: "hack_s?" -> "hack_s"


def _val(tok: str) -> float | None:
    """Parse a per-step cell: frac n/d, signed float, or T/F/-/nan."""
    if "/" in tok:
        a, b = tok.split("/")
        return int(a) / int(b) if int(b) else None
    if tok in ("T", "F", "-", "nan"):
        return None
    return float(tok)


def parse_log(path: Path) -> dict | None:
    """Return {arm, refr, seed, vhack, steps: int[], <series>: float[]} or None."""
    txt = path.read_text(errors="replace")
    argv = next((l for l in txt.splitlines() if "argv:" in l), None)
    preset = next((l for l in txt.splitlines() if "preset=" in l and "arm=" in l), "")
    if argv is None:
        return None

    def grab(pat, s, default=None):
        ms = re.findall(pat, s)
        return ms[-1] if ms else default

    # arm = derived display name in the preset line (vanilla/projected/routing),
    # the one source that covers both old (--arm) and new (--intervention) logs.
    arm = grab(r"\barm=(\w+)", preset, "vanilla")
    refr = int(grab(r"--vhack-refresh-every=(\d+)", argv, "0"))
    seed = grab(r"seed=(\d+)", preset, "?")
    vhack = grab(r"v-hack-path=out/(?:vhack/)?(\S+?)\.safetensors", argv, "-")
    # teacher-off curriculum: step the teacher mix was cut (None if never). Drawn as
    # a vertical line / end of the teacher-on shaded region in the 2x2.
    _toff = grab(r"--teacher-off-step=(\d+)", argv, None)
    teacher_off = int(_toff) if _toff is not None else None

    # header line: the one containing both "step" and "hack_s"
    hdr = next((l for l in txt.splitlines()
                if "| INFO |" in l and "ref_eq" in l and "hack_s" in l), None)
    if hdr is None:
        return None
    # real column headers always start with a letter/underscore; drop pure-symbol
    # tokens (decoration) so a stray glyph in an old log's header doesn't crash parse
    names = [m.group(0) for t in hdr.split("| INFO |", 1)[1].split() if (m := _HDR_TOK.match(t))]
    idx = {n: i for i, n in enumerate(names)}

    series: dict[str, list[float]] = defaultdict(list)
    steps: list[int] = []
    # Also parse the route DEPLOY-eval columns when present (non-route logs lack
    # them -> skip). For routing we plot THESE (deployed model = quarantine deleted),
    # not the training-time hack_s.
    # hk_abl/slv_abl = the FREE per-step deploy proxy (ablated rollout slice,
    # rollout_ablate_frac>0); hk_dep/slv_dep = the held-out greedy eval, only on
    # eval_ablate_every steps. Prefer the dense proxy for the curve (see below).
    deploy = {"hk_dep", "slv_dep", "hk_abl", "slv_abl", "hk_on", "slv_on"} & set(idx)
    # Only parse columns this log actually has: non-projecting arms (vanilla,
    # routing2) lack cin_t/cin_s, so gate by presence rather than KeyError.
    wanted = {k: v for k, v in RATE_COLS.items() if k in idx}
    wanted.update({c: c for c in deploy})
    for line in txt.splitlines():
        if "| INFO |" not in line:
            continue
        row = line.split("| INFO |", 1)[1].split()
        if not row or not row[0].isdigit() or len(row) < len(names):
            continue
        steps.append(int(row[idx["step"]]))
        for col in wanted:
            series[col].append(_val(row[idx[col]]))
    if not steps:
        return None
    run = dict(arm=arm, refr=refr, seed=seed, vhack=vhack, teacher_off=teacher_off,
               steps=np.array(steps), **{k: np.array(v, dtype=float) for k, v in series.items()})
    # Normalise missing eval columns to all-nan (absent == all-nan downstream): old logs
    # that never printed a held-out eval lack the key entirely, which would KeyError the
    # train-series assignment. A nan column drops the seed out of the mean cleanly.
    for k in ("hk_dep", "slv_dep", "hk_on", "slv_on", "hk_abl", "slv_abl"):
        run.setdefault(k, np.full(len(steps), np.nan))
    # APPLES-TO-APPLES: plot the DEPLOY-eval (hk_dep/slv_dep) for EVERY arm when it
    # has data -- same estimator (n=64, T=0.7, eval_ablate_every cadence) across arms.
    # For route/route2 this is the quarantine-off model; for vanilla/erase deploy ==
    # trained model. Older logs (eval gated to route only) lack it for vanilla/erase
    # -> fall back to per-step training hack_s. Test FINITE values, not column
    # presence: no-floor logs carry an all-nan hk_dep/hk_abl column otherwise.
    def _has_data(key):
        return key in run and np.isfinite(run[key]).any()
    # TRAIN series for the train-vs-deploy 2x2. The two rows must share ONE estimator:
    #   route2  -> knob-ON held-out eval (hk_on): quarantine active, the policy as trained.
    #   vanilla/erase -> reuse the knob-OFF eval (hk_dep): no quarantine, so train==deploy;
    #            the deploy eval IS the train-time behaviour, same n=64 prompts/T.
    # Both differ from the deploy row ONLY in the knob, so noise matches. NO per-step
    # hack_s fallback: substituting the noisy n=28 train batch for a seed that lacks the
    # held-out eval corrupts the seed-mean (one such seed fabricated a vanilla train-vs-
    # deploy gap, 2026-06-05). A seed without the eval drops out as NaN instead.
    if _has_data("hk_on"):            # route2: knob-ON held-out eval (quarantine active)
        run["hack_train"] = run["hk_on"]
        run["solve_train"] = run["slv_on"]
    else:                             # no quarantine (vanilla/erase): train==deploy, reuse the
        run["hack_train"] = run["hk_dep"]    # knob-off eval (nan if absent -> seed drops out)
        run["solve_train"] = run["slv_dep"]  # so all seeds share ONE estimator (n=64, no n=28)
    if _has_data("hk_abl"):           # dense per-step proxy (rollout_ablate_frac>0), if present
        run["hack_s"] = run["hk_abl"]
        run["gt_s"] = run["slv_abl"]
    elif _has_data("hk_dep"):         # the n=64 every-eval_ablate_every deploy eval
        run["hack_s"] = run["hk_dep"]
        run["gt_s"] = run["slv_dep"]
    return run


def classify(run: dict) -> str:
    if "arm_csv" in run:          # reconstructed from a CSV: name is already classified
        return run["arm_csv"]
    if run["arm"] == "vanilla":
        return "vanilla"
    if run["arm"] == "routing":
        return "routing"
    if run["arm"] == "routing2":
        return "routing2"
    # arm == projected -> erasure, split by refresh
    return "online erasure" if run["refr"] > 0 else "static erasure"


# --- plot ------------------------------------------------------------------

# routing (route v1, single quarantine) is deprecated -- superseded by routing2
# (scale-matched quarantine). classify() still tags v1 logs as "routing" so they
# don't get misread as erasure, but it's left out of ARM_ORDER so it isn't plotted.
ARM_ORDER = ["vanilla", "static erasure", "online erasure", "routing2"]
# Distinct colour per series -- the two rows measure different things, so they
# must not share a palette (hack != teacher-cos). Row 0: red hack vs green
# solve. Row 1: blue teacher-cos vs amber student-cos.
RATE_COLORS = {"hack_s": "#c1432b", "gt_s": "#2f7d4f"}
# Arm colours for the single-panel hack overlay (arms, not series): grey vanilla
# baseline -> amber static -> blue online, ordered by increasing intervention.
# TODO(color): make this a quality-ordered red->green ramp instead of fixed
# per-arm hues -- red = vanilla (worst, most hacking), green = best method
# (anticipated gradient routing). As arms grow (static/online/grad-routing/
# confessions), assign colour by method rank along a perceptual RdYlGn ramp so
# the reader sees "redder = hacks more" at a glance.
ARM_COLORS = {"vanilla": "#7a7a7a", "static erasure": "#c98a2b",
              "online erasure": "#33508c", "routing": "#2f7d4f",
              "routing2": "#7d2f6f"}


def _onset(steps: np.ndarray, hack: np.ndarray) -> int | None:
    """First step where RAW hack_s > 0 (the hack-onset point). Computed on the
    unsmoothed series -- EMA would blur the very step we want to mark."""
    nz = np.flatnonzero(hack > 0)
    return int(steps[nz[0]]) if len(nz) else None


def _ema(y: np.ndarray, span: int = 5) -> np.ndarray:
    """Causal EMA, span=5. Less lag than a trailing SMA(5) since it weights
    recent steps more. NaNs hold the previous smoothed value (don't reset it)."""
    a = 2.0 / (span + 1)
    out = np.empty_like(y)
    prev = np.nan
    for i, v in enumerate(y):
        if np.isnan(v):
            out[i] = prev
        else:
            prev = v if np.isnan(prev) else a * v + (1 - a) * prev
            out[i] = prev
    return out


def _series_panel(ax, runs, cols, colors, ylim, label_series=False):
    """Overlay per-seed thin EMA lines + bold mean-of-EMA for each series."""
    ends = []  # (endpoint_y, label, color) for direct labels
    for col, label in cols.items():
        color = colors[col]
        stacked = []
        present = [r for r in runs if col in r]
        if not present:        # arm lacks this series (e.g. no cos cols for routing2/vanilla)
            continue
        for r in present:
            ys = _ema(r[col])
            ax.plot(r["steps"], ys, color=color, lw=0.7, alpha=0.35, solid_capstyle="round")
            stacked.append(ys)
        # mean over seeds of the smoothed series (runs share the step grid within an arm)
        L = min(len(y) for y in stacked)
        ym = np.nanmean(np.stack([y[:L] for y in stacked]), axis=0)
        xm = runs[0]["steps"][:L]
        ax.plot(xm, ym, color=color, lw=1.8, solid_capstyle="round")
        ends.append((ym[-1], xm[-1], label, color))
    # Direct labels in the leftmost column only -- colour carries the series
    # across the row, so per-panel repeats are redundant ink. Nudge by the
    # ACTUAL endpoint ordering (higher line -> label up, lower -> down): the two
    # cos lines cross, so a fixed up/down stagger would land each label on the
    # wrong line.
    if label_series:
        ends.sort(key=lambda e: e[0])  # lowest endpoint first
        dy = {0: -6, len(ends) - 1: 6} if len(ends) > 1 else {0: 0}
        for rank, (y, x, label, color) in enumerate(ends):
            ax.annotate(label, (x, y), color=color, fontsize=8,
                        xytext=(3, dy.get(rank, 0)), textcoords="offset points", va="center")
    if ylim:
        ax.set_ylim(*ylim)


# Every series any of the three figures plots. Carried in the CSV so the figure
# regenerates from the committed CSV alone (logs/ and out/runs/ are gitignored,
# out/figs/*.csv is tracked). `arm` is the CLASSIFIED display name -- load_csv
# short-circuits classify() on it so the round-trip is exact.
CSV_SERIES = ["hack_s", "gt_s", "hack_train", "solve_train", "hk_dep", "slv_dep"]


def dump_data(runs: list[dict], out: Path) -> Path:
    csv = out.with_suffix(".csv")
    lines = ["arm,seed,step," + ",".join(CSV_SERIES)]
    for r in runs:
        arm = classify(r)
        for i, step in enumerate(r["steps"]):
            cells = [r[k][i] if (k in r and r[k] is not None and i < len(r[k])) else float("nan")
                     for k in CSV_SERIES]
            lines.append(f"{arm},{r['seed']},{int(step)}," + ",".join(str(c) for c in cells))
    csv.write_text("\n".join(lines) + "\n")
    logger.info(f"wrote {csv} ({len(runs)} runs, reproducibility source)")
    return csv


def load_csv(path: Path) -> list[dict]:
    """Reconstruct the runs list from a dump_data CSV so figures regenerate
    without the raw logs. Groups rows by (arm, seed); `arm_csv` makes classify()
    return the stored display name verbatim."""
    rows = [l.split(",") for l in path.read_text().splitlines() if l.strip()]
    hdr, body = rows[0], rows[1:]
    ci = {n: i for i, n in enumerate(hdr)}
    by_key: dict[tuple, dict] = {}
    for row in body:
        key = (row[ci["arm"]], row[ci["seed"]])
        run = by_key.setdefault(key, {"arm_csv": row[ci["arm"]], "seed": row[ci["seed"]],
                                       "refr": 0, "vhack": "-", "teacher_off": None,
                                       "steps": [], **{k: [] for k in CSV_SERIES}})
        run["steps"].append(int(row[ci["step"]]))
        for k in CSV_SERIES:
            run[k].append(float(row[ci[k]]))
    runs = list(by_key.values())
    for run in runs:  # match parse_log: numeric series are ndarrays, not lists
        run["steps"] = np.array(run["steps"])
        for k in CSV_SERIES:
            run[k] = np.array(run[k], dtype=float)
    return runs


def plot(runs: list[dict], out: Path) -> None:
    by_arm: dict[str, list[dict]] = defaultdict(list)
    for r in runs:
        by_arm[classify(r)].append(r)
    arms = [a for a in ARM_ORDER if a in by_arm]
    if not arms:
        raise SystemExit("no runs classified into arms")
    dump_data(runs, out)

    fig, axes = plt.subplots(1, len(arms), figsize=(3.0 * len(arms), 2.6),
                             sharex=True, sharey=True, squeeze=False)
    for col, arm in enumerate(arms):
        ax = axes[0][col]
        rs = by_arm[arm]
        n_seed = len({r["seed"] for r in rs})
        ax.set_title(f"{arm_label(arm)}\n(n={n_seed} seed{'s' if n_seed > 1 else ''})", fontsize=9)
        # ylim floor slightly below 0 so a pinned-at-zero series (route2 hack) draws
        # ABOVE the axis line instead of hiding under it -- the whole result is that
        # red sits on zero, so it must be visible, not absent.
        _series_panel(ax, rs, RATE_COLS, RATE_COLORS, ylim=(-0.035, 1.0), label_series=(col == 0))
        # If hack is pinned at zero all panel, say so -- else "no red line" reads as
        # a plotting bug rather than the finding.
        hk = [r["hack_s"] for r in rs if "hack_s" in r]
        if hk and np.nanmax([np.nanmax(h) for h in hk]) < 0.02:
            ax.annotate("hack ≈ 0", (0.04, 0.0), xycoords=("axes fraction", "data"),
                        color=RATE_COLORS["hack_s"], fontsize=8, va="bottom",
                        xytext=(0, 3), textcoords="offset points")
        ax.set_xlabel("optimizer step")
        onsets = [s for r in rs if (s := _onset(r["steps"], r["hack_s"])) is not None]
        if onsets:
            s0 = float(np.mean(onsets))
            ax.axvline(s0, color="0.55", lw=0.8, ls=(0, (4, 3)), zorder=0)
            ax.annotate("first hack", (s0, 1.0), color="0.4", fontsize=7,
                        xytext=(2, -2), textcoords="offset points", va="top")

    axes[0][0].set_ylabel("deployed rate")
    # range-frame: drop top/right spines, keep ink on data
    for ax in axes.flat:
        ax.spines["top"].set_visible(False)
        ax.spines["right"].set_visible(False)
        ax.tick_params(labelsize=8)

    if SHOW_TITLE:
        fig.suptitle("Training dynamics: deployed hack vs solve by arm  "
                     "(deploy-eval n=64 T=0.7; EMA-5; dashed = mean hack onset)", fontsize=10)
        fig.tight_layout(rect=(0, 0, 1, 0.96))
    else:
        fig.tight_layout()
    save_fig(fig, out)
    logger.info(f"wrote {out}  ({len(runs)} runs, arms={[arm_label(a) for a in arms]})")


def _overlay_panel(ax, by_arm, arms, key, *, label, with_onset, label_arms, ylim=(0, 1)):
    """Overlay one metric (key) per arm on ax: faint per-seed EMA lines + bold
    EMA mean, optional mean-onset dot. When label_arms, direct-label each arm at its
    endpoint (de-collided in y). An arm whose mean series sits at zero gets a
    "$\\approx 0$" tag so a pinned-at-zero line reads as a finding, not a missing line."""
    ends = []  # (y_endpoint, x_endpoint, arm, color, is_zero) for direct labels
    for arm in arms:
        rs = [r for r in by_arm[arm] if key in r]
        if not rs:
            continue
        color = ARM_COLORS[arm]
        stacked = []
        for r in rs:
            ys = _ema(r[key])
            ax.plot(r["steps"], ys, color=color, lw=0.6, alpha=0.25, solid_capstyle="round")
            stacked.append(ys)
        L = min(len(y) for y in stacked)
        ym = np.nanmean(np.stack([y[:L] for y in stacked]), axis=0)
        xm = rs[0]["steps"][:L]
        ax.plot(xm, ym, color=color, lw=2.0, solid_capstyle="round")
        if with_onset:
            onsets = [s for r in rs if (s := _onset(r["steps"], r["hack_s"])) is not None]
            if onsets:
                s0 = float(np.mean(onsets))
                ax.plot(s0, np.interp(s0, xm, ym), marker="o", ms=4, color=color, zorder=3)
        ends.append((float(ym[-1]), float(xm[-1]), arm, color, float(np.nanmax(ym)) < 0.02))
    ax.set_ylim(*ylim)
    ax.set_ylabel(label)
    ax.spines[["top", "right"]].set_visible(False)
    ax.tick_params(labelsize=8)
    if not label_arms:                      # other panel shares colours -- redundant ink
        return
    ends.sort(key=lambda e: e[0])           # bottom-to-top by endpoint
    gap = 0.06 * (ylim[1] - ylim[0])        # min y-separation, scaled to the range
    xmax = max(e[1] for e in ends)
    dx = 0.035 * (xmax - ax.get_xlim()[0])  # horizontal clearance off the line end
    x_lab = xmax + dx                       # ALL labels share one gutter x, leaders fan back
    ax.set_xlim(right=xmax + dx * 3.4)      # right margin so labels sit clear in the gutter
    placed = []
    for y, x, arm, color, is_zero in ends:
        y_lab = y if not placed else max(y, placed[-1] + gap)
        placed.append(y_lab)
        text = arm_label(arm) + (r" $\approx 0$" if is_zero else "")
        # Common gutter x + leader back to each line's actual end: ragged run lengths
        # otherwise scatter labels mid-plot onto other arms' lines (collision test).
        arrow = dict(arrowstyle="-", color=color, lw=0.5, shrinkA=0, shrinkB=2)
        ax.annotate(text, xy=(x, y), xytext=(x_lab, y_lab), textcoords="data",
                    color=color, fontsize=8, va="center", annotation_clip=False,
                    arrowprops=arrow)


def plot_hack_overlay(runs: list[dict], out: Path) -> None:
    """Two stacked panels sharing x: student hack rate (top) and solve rate (bottom)
    per arm. Faint per-seed EMA lines + bold EMA-5 mean; onset dot on the hack panel.
    Arms are direct-labelled on the TOP (hack) panel -- readers scan top-to-bottom, and
    the hack panel carries the headline (an arm pinned at 0 gets a $\\approx 0$ tag)."""
    by_arm: dict[str, list[dict]] = defaultdict(list)
    for r in runs:
        by_arm[classify(r)].append(r)
    arms = [a for a in ARM_ORDER if a in by_arm]

    fig, (ax_h, ax_s) = plt.subplots(2, 1, figsize=(5.2, 5.2), sharex=True)
    # floor the hack panel below 0 so a route line pinned at 0 draws above the axis
    _overlay_panel(ax_h, by_arm, arms, "hack_s", label="hack rate",
                   with_onset=True, label_arms=True, ylim=(-0.035, 1.0))
    _overlay_panel(ax_s, by_arm, arms, "gt_s", label="solve rate",
                   with_onset=False, label_arms=False, ylim=(0, 1.0))
    ax_s.set_xlabel("optimizer step")
    if SHOW_TITLE:
        ax_h.set_title("Hack vs solve rate by arm  (EMA-5; dot = mean hack onset)", fontsize=10)
    fig.tight_layout()
    save_fig(fig, out)
    logger.info(f"wrote {out}")


def plot_train_vs_deploy(runs: list[dict], out: Path) -> None:
    """One panel per arm, four series each: {hack, solve} x {train, deploy}.
    Colour = metric (red hack / green solve); linestyle = train (adapter on, dashed)
    vs deploy (adapter off, solid). The route gap is the result -- dashed-red (train)
    rises while solid-red (deploy) sits at 0, because the hack lives in the deletable
    quarantine. For vanilla the dashed/solid pair coincides (train==deploy: the hack is
    in the shipped weights, nothing to delete). Matched n=64 eval on every series."""
    # Skip when train==deploy for EVERY run: the dashed "train" series then just hides
    # under the solid "deploy" line -- a misleading legend with no visible train line.
    # Only a route2 knob-ON eval makes hack_train (=hk_on) differ from hk_dep. Checked on
    # the derived series so it works on both the log and --from-csv paths (hk_on is not
    # round-tripped in the CSV, hack_train is).
    def _has_train_gap(r):
        ht, hd = r.get("hack_train"), r.get("hk_dep")
        if ht is None or hd is None:
            return False
        d = np.abs(ht - hd)
        return bool(np.isfinite(d).any() and np.nanmax(d) > 0.02)
    if not any(_has_train_gap(r) for r in runs):
        logger.info(f"skip {out.name}: train==deploy in every run -> no knob-ON contrast to show")
        return
    by_arm: dict[str, list[dict]] = defaultdict(list)
    for r in runs:
        by_arm[classify(r)].append(r)
    arms = [a for a in ARM_ORDER if a in by_arm]
    red, green = RATE_COLORS["hack_s"], RATE_COLORS["gt_s"]
    TRAIN_LS, DEPLOY_LS = (0, (4, 2)), "-"
    # (series_key, colour, linestyle, is_hack)
    SERIES = [
        ("hack_train",  red,   TRAIN_LS,  True),
        ("hk_dep",      red,   DEPLOY_LS, True),
        ("solve_train", green, TRAIN_LS,  False),
        ("slv_dep",     green, DEPLOY_LS, False),
    ]
    fig, axes = plt.subplots(1, len(arms), figsize=(3.4 * len(arms), 3.2),
                             sharex=True, sharey=True, squeeze=False)
    for ci, arm in enumerate(arms):
        ax = axes[0][ci]
        ax.set_title(arm_label(arm), fontsize=10)
        deploy_hack_zero = False
        for key, color, ls, is_hack in SERIES:
            rs = [r for r in by_arm[arm] if key in r]
            if not rs:
                continue
            stacked = [_ema(r[key]) for r in rs]
            L = min(len(y) for y in stacked)
            ym = np.nanmean(np.stack([y[:L] for y in stacked]), axis=0)
            xm = rs[0]["steps"][:L]
            ax.plot(xm, ym, color=color, ls=ls, lw=1.8, solid_capstyle="round")
            if key == "hk_dep" and np.nanmax(ym) < 0.02:
                deploy_hack_zero = True
        if deploy_hack_zero:              # the route headline: solid-red pinned at 0.
            # Lift the label into the empty band above the flat line (collision test:
            # at y=0 the solid-red deploy line runs straight through the text).
            ax.annotate(r"deploy hack $\approx 0$", (0.04, 0.12),
                        xycoords="axes fraction", color=red, fontsize=8, va="bottom")
        # teacher-off curriculum: shade the teacher-ON region so "seeded here, on-policy
        # after" stays visible in the C4 bootstrap variant (jobs 93/94).
        toffs = {r.get("teacher_off") for r in by_arm[arm] if r.get("teacher_off")}
        if toffs:
            toff = max(toffs)
            ax.axvspan(0, toff, color="0.85", alpha=0.5, zorder=0)
            ax.axvline(toff, color="0.55", lw=0.8, ls=(0, (4, 3)), zorder=1)
            ax.annotate("teacher off", (toff, 1.0), color="0.4", fontsize=7,
                        xytext=(2, -2), textcoords="offset points", va="top")
        ax.set_ylim(-0.035, 1.0)
        ax.set_xlabel("optimizer step")
        ax.spines[["top", "right"]].set_visible(False)
        ax.tick_params(labelsize=8)
    axes[0][0].set_ylabel("rate")
    # two-axis legend: colour = metric, linestyle = train vs deploy
    handles = [
        Line2D([], [], color=red,   lw=1.8, label="hack"),
        Line2D([], [], color=green, lw=1.8, label="solve"),
        Line2D([], [], color="0.3", lw=1.8, ls=TRAIN_LS,  label="train (adapter on)"),
        Line2D([], [], color="0.3", lw=1.8, ls=DEPLOY_LS, label="deploy (adapter off)"),
    ]
    axes[0][-1].legend(handles=handles, fontsize=7, frameon=False, loc="upper left")
    if SHOW_TITLE:
        fig.suptitle("Train (adapter on) vs deploy (adapter off): vanilla bakes the "
                     "hack into the weights, route holds it in the deletable adapter",
                     fontsize=10)
        fig.tight_layout(rect=(0, 0, 1, 0.93))
    else:
        fig.tight_layout()
    save_fig(fig, out)
    logger.info(f"wrote {out}")


# --- cli -------------------------------------------------------------------

def _gather(paths: list[str]) -> list[Path]:
    out: list[Path] = []
    for p in paths:
        pp = Path(p)
        if pp.is_dir():
            out += sorted(pp.glob("*.log"))
        elif any(c in p for c in "*?["):
            out += sorted(Path().glob(p))
        else:
            out.append(pp)
    return out


def _latest_per_arm(files: list[Path], min_steps: int) -> list[Path]:
    """One log per arm: the most recent (by filename timestamp) with >= min_steps
    rows. Lets `just dyn` auto-pick the freshest full-length run for each arm
    instead of hand-globbing. Newest filename wins -- timestamp-prefixed names
    sort lexicographically, no mtime races."""
    by_arm: dict[str, tuple[Path, dict]] = {}
    for f in sorted(files):  # ascending ts; later overwrites -> keeps newest
        r = parse_log(f)
        if r is None or len(r["steps"]) < min_steps:
            continue
        by_arm[classify(r)] = (f, r)
    return [f for f, _ in by_arm.values()]


def main() -> None:
    ap = argparse.ArgumentParser(description=__doc__)
    ap.add_argument("logs", nargs="*", help="log files, globs, or dirs (omit with --from-csv)")
    ap.add_argument("--out", type=Path, default=Path("out/figs/dynamics.png"))
    ap.add_argument("--latest-per-arm", action="store_true",
                    help="keep only the newest log per arm (with >= --min-steps rows)")
    ap.add_argument("--min-steps", type=int, default=0,
                    help="drop runs shorter than this many logged steps")
    ap.add_argument("--title", action="store_true",
                    help="draw the suptitle (off by default: the paper/blog caption carries it)")
    ap.add_argument("--from-csv", type=Path, default=None,
                    help="re-render from a committed dump_data CSV instead of parsing logs")
    args = ap.parse_args()
    global SHOW_TITLE
    SHOW_TITLE = args.title
    if args.from_csv:
        runs = load_csv(args.from_csv)
        logger.info(f"loaded {len(runs)} runs from {args.from_csv} (CSV re-render, no logs)")
        _render_all(runs, args.out)
        return
    files = _gather(args.logs)
    if args.latest_per_arm:
        files = _latest_per_arm(files, args.min_steps)
    runs = [r for f in files if (r := parse_log(f)) and len(r["steps"]) >= args.min_steps]
    if not runs:
        raise SystemExit(f"no parseable runs in {len(files)} files")
    for r in runs:
        logger.info(f"{classify(r):16s} seed={r['seed']} steps={len(r['steps'])} {r['vhack']}")
    args.out.parent.mkdir(parents=True, exist_ok=True)
    _render_all(runs, args.out)


def _render_all(runs: list[dict], out: Path) -> None:
    """The three dynamics figures, shared by the log-parse and --from-csv paths."""
    out.parent.mkdir(parents=True, exist_ok=True)
    plot(runs, out)                                              # small-multiples + CSV dump
    overlay = out.with_name(out.stem + "_hack_overlay.png")
    plot_hack_overlay(runs, overlay)                            # arm-vs-arm headline overlay
    tvd = out.with_name(out.stem + "_train_deploy.png")
    plot_train_vs_deploy(runs, tvd)                            # 2x2 train(on) vs deploy(off)
    for p in (out, overlay, tvd):
        logger.info(f"docs/figs latest -> {link_latest(p)}")


if __name__ == "__main__":
    main()