plot: single 'just plot' entrypoint emits per-mode + aggregate (reuse plot_dynamics)

- plot_substrate.main now also calls plot_dynamics.plot/plot_hack_overlay so one command produces all 4 figs (by_method, by_hack, aggregate, hack_overlay); the aggregate 'total hacks per arm' core plot is kept, not reimplemented. - plot_dynamics: point parser at CURRENT streaming headers (cin_t/cin_s, hk_dep/ slv_dep); it was built for the old cos_pre_t/hack_deploy spelling and silently failed on sub4 logs. No backward-compat for the superseded header. - justfile: 'plot GLOB STEM' canonical entrypoint over logs/*_sub4_*.log. Co-Authored-By: Claudypoo <288921227+claudypoo@users.noreply.github.com>
2026-06-27 19:47:33 +08:00 · 2026-05-31 04:37:31 +00:00
parent 83235b6cfe
commit 07acadb43f
3 changed files with 212 additions and 73 deletions
@@ -51,7 +51,9 @@ from projected_grpo.figs import link_latest

 # Series we plot, by cleaned header name. frac "7/28" -> 0.25; float "+0.264".
 RATE_COLS = {"hack_s": "hack", "gt_s": "solve"}
-COS_COLS = {"cos_pre_t": "teacher", "cos_pre_s": "student"}
+# Current streaming-table display headers (StepLogger _Col.header): the live-grad
+# v_hack alignment prints as cin_t/cin_s, the route deploy-eval as hk_dep/slv_dep.
+COS_COLS = {"cin_t": "teacher", "cin_s": "student"}
 _HDR_TOK = re.compile(r"[A-Za-z_]+")  # strip ↑↓? decorations: "hack_s?" -> "hack_s"


@@ -96,7 +98,7 @@ def parse_log(path: Path) -> dict | None:
    # Also parse the route DEPLOY-eval columns when present (non-route logs lack
    # them -> skip). For routing we plot THESE (deployed model = quarantine deleted),
    # not the training-time hack_s.
-    deploy = {"hack_deploy", "solve_deploy"} & set(idx)
+    deploy = {"hk_dep", "slv_dep"} & set(idx)
    wanted = {**RATE_COLS, **COS_COLS, **{c: c for c in deploy}}
    for line in txt.splitlines():
        if "| INFO |" not in line:
@@ -115,9 +117,9 @@ def parse_log(path: Path) -> dict | None:
    # forward still hacks); routing's benefit only shows on the DEPLOYED model
    # (quarantine knob deleted). So for routing, plot the deploy series under the
    # hack_s/gt_s keys -> all downstream (panels, onset, overlay) reads it.
-    if arm == "routing" and "hack_deploy" in run:
-        run["hack_s"] = run["hack_deploy"]
-        run["gt_s"] = run["solve_deploy"]
+    if arm == "routing" and "hk_dep" in run:
+        run["hack_s"] = run["hk_dep"]
+        run["gt_s"] = run["slv_dep"]
    return run


@@ -137,7 +139,7 @@ ARM_ORDER = ["vanilla", "static erasure", "online erasure", "routing"]
 # must not share a palette (hack != teacher-cos). Row 0: red hack vs green
 # solve. Row 1: blue teacher-cos vs amber student-cos.
 RATE_COLORS = {"hack_s": "#c1432b", "gt_s": "#2f7d4f"}
-COS_COLORS = {"cos_pre_t": "#33508c", "cos_pre_s": "#c98a2b"}
+COS_COLORS = {"cin_t": "#33508c", "cin_s": "#c98a2b"}
 # Arm colours for the single-panel hack overlay (arms, not series): grey vanilla
 # baseline -> amber static -> blue online, ordered by increasing intervention.
 # TODO(color): make this a quality-ordered red->green ramp instead of fixed