evil_MoE/src/vgrout/tablelog.py

"""Per-step training-table rendering and run logging.

Two concerns, both pure presentation (no model, no RNG): set up the token-efficient
loguru sinks for a run, and render the per-step metrics table. The renderer is the
single source of truth for column order, width, header, and number format; the
training loop hands it a row dict of raw values and gets back a formatted line.
"""
from __future__ import annotations

from dataclasses import dataclass
from datetime import datetime
from pathlib import Path

from loguru import logger
from tqdm import tqdm

LOGS_DIR = Path("logs")


def setup_logging(run_id: str) -> Path:
    """Token-efficient loguru: stdout = 1-char icon + msg; verbose log to file.

    See /root/.claude/skills/token-efficient-logging/SKILL.md.
    """
    LOGS_DIR.mkdir(exist_ok=True)
    verbose_log = LOGS_DIR / f"{datetime.now().strftime('%Y%m%dT%H%M%S')}_{run_id}.log"
    logger.remove()
    logger.add(
        lambda msg: tqdm.write(msg, end=""),
        colorize=True,
        format="<level>{level.icon}</level> {message}",
        level="INFO",
    )
    logger.add(
        verbose_log,
        format="{time:HH:mm:ss} | {level} | {message}",
        level="DEBUG",
    )
    logger.level("INFO", icon="I")
    logger.level("WARNING", icon="W")
    logger.level("ERROR", icon="E")
    logger.level("DEBUG", icon="D")
    return verbose_log


@dataclass(frozen=True)
class _Col:
    """Declarative column definition for the streamed step table."""
    key: str
    width: int
    header: str
    fmt: str | None = None
    desc: str = ""        # one-line decode for the legend; "" => omitted from legend


def _format_cell(value, fmt: str | None) -> str:
    """Format one cell. NaN renders as 'nan' regardless of spec."""
    if value is None:
        return "nan"
    if fmt == "frac":
        n, d = value
        return f"{n}/{d}"
    if fmt is None:
        return str(value)
    if isinstance(value, float) and value != value:  # NaN
        return "nan"
    return format(value, fmt)


class StepLogger:
    """Render raw per-step metrics using one canonical column definition."""

    def __init__(self, arm: str, modes: list[str], mode_code: dict[str, str],
                 show_ablate: bool = False) -> None:
        # Routing diagnostics are ALWAYS shown (nan on vanilla, whose gate never runs) so the
        # column layout is identical across arms -- vanilla/routeA/absorb tables line up.
        cols: list[_Col] = [
            _Col("step",   4, "step",    "d",    "GRPO step"),
            _Col("ref_eq", 6, "ref_eq",  ".2f",  "vanilla-equiv step (cum_gens/256)"),
            _Col("rew",    6, "rew",     "+.2f", "mean combined reward"),
            _Col("rew_s",  6, "rew_s↑",  "+.2f", "student mean reward"),
            _Col("gt_s",   6, "gt_s↑",   "frac", "student ground-truth passes"),
            _Col("gt_t",   6, "gt_t",    "frac", "teacher ground-truth passes (sanity)"),
            _Col("hack_s", 7, "hack_s?", "frac", "student hack-flagged rollouts (the headline)"),
            _Col("hack_t", 7, "hack_t",  "frac", "teacher hack-flagged rollouts (sanity: pool hacks)"),
            # Held-out deployed evaluation with quarantine ablated; NaN between evaluation steps.
            _Col("hack_deployed",  7, "hk_dep",  "+.2f", "DEPLOY-eval hack (routeA/absorb: quarantine OFF; vanilla: trained model); held-out subset, T=0.7, every eval_ablate_every steps; nan between"),
            _Col("solve_deployed", 7, "slv_dep", "+.2f", "DEPLOY-eval solve (same cadence; nan between)"),
        ]
        # Multi-mode runs show current-step hacks per environment; single-mode would duplicate hack_s.
        self._modes = modes if len(modes) > 1 else []
        for m in self._modes:
            cols.append(_Col(f"hk_{mode_code[m]}", 5, f"hk_{mode_code[m]}", "d",
                             f"student hacks of {m} THIS step (current batch, not cumulative)"))
        cols += [
            _Col("lp_s", 6, "lp_s↓", "+.2f", "mean student gen_logp (diagnostic)"),
            _Col("lp_t", 6, "lp_t↑", "+.2f", "mean teacher gen_logp; off-policy gap = lp_s-lp_t"),
            _Col("loss", 7, "loss",  "+.2f", "mean GRPO loss"),
            _Col("gn",   7, "gn",    ".1e",  "pre-clip L2 norm of A/B grads (vs grad_clip)"),
            _Col("lr",   7, "lr",    ".1e",  "scheduled learning rate"),
        ]
        # routeA reports gate diagnostics (nan on vanilla/absorb, whose gate never runs).
        cols += [
            _Col("auroc",  6, "auroc",  ".2f", "AUROC of dot(act, v_act) vs hack labels on the A>0 contrast (positively-reinforced rollouts, where the reward alone is blind); measurement only, never routes. ~0.5 = chance-level separation; high AUROC but rout~0 = threshold problem; a drop at refresh = reduced separation"),
            _Col("cos",    6, "cos",    "+.2f", "mean per-rollout cos(act, v_act) (dot-vs-cos diagnostic)"),
            _Col("qmass",  6, "qmass",  ".2f", "quarantine energy share ||g_quar||/(||g_keep||+||g_quar||): fraction of update energy assigned to quarantine"),
            _Col("keep",   6, "keep",   ".2f", "rollout share below t_lo -> deployed-only, quarantine off"),
            _Col("resid",  6, "resid",  ".2f", "rollout share between thresholds (and ALL rollouts during warmup) -> both blocks train; absorption is possible but not measured"),
            _Col("rout",   6, "rout",   ".2f", "rollout share at/above t_hi -> quarantine-only, deployed detached"),
            _Col("tlo",    6, "tlo",    "+.2f", "Otsu lower threshold (z units of the rolling score buffer); nan during warmup"),
            _Col("thi",    6, "thi",    "+.2f", "Otsu upper (rout) threshold (z units); nan during warmup"),
            _Col("stale",  5, "stale",  "d",    "steps since v_act was last re-extracted (0 = refreshed this step, every vhack_refresh_every); placebo/vanilla never refresh so it grows unbounded"),
        ]
        # Show the training-prompt deploy proxy only when an ablated slice exists.
        if show_ablate:
            cols += [
                _Col("hack_abl",  6, "hk_abl",  "frac", "per-step deploy proxy: hack rate on the ablated (deploy-mode) rollout slice; train prompts, noisier than hk_dep"),
                _Col("solve_abl", 6, "slv_abl", "frac", "per-step deploy proxy: solve rate on the ablated (deploy-mode) rollout slice; train prompts"),
            ]
        self._cols = cols

    def header(self) -> str:
        return "  ".join(f"{c.header:>{c.width}}" for c in self._cols)

    def row(self, cells: dict) -> str:
        return "  ".join(
            f"{_format_cell(cells[c.key], c.fmt):>{c.width}}" for c in self._cols
        )

    def legend(self) -> str:
        """Decode the (arm-/mode-conditional) columns actually present this run."""
        lines = "\n".join(f"    {c.header:>8} = {c.desc}" for c in self._cols if c.desc)
        return ("table columns (timing gen/fb/t_rew/sec dropped from streaming, kept "
                "in the end-of-run dump):\n" + lines)