cleanup: delete 6 orphan modules, quarantine pair generators, trim stale comments

Deleted (zero importers/refs): scripts/{migrate_out_dirs,audit_log,plot_route_evidence}.py and src/projected_grpo/{bake_lora,probe_lora_runtime,probe_traj}.py (LoRA-merge path + dev trajectory comparator, superseded). Removed the dead probe-traj recipe. Quarantined to scripts/attic/: make_pairsets.py + make_dataset_pairsets.py (persona-pair authoring, tasks #123-126 done; live path is pairs.PAIRS / pairs_from_pool). Comments: dropped dead job-ID narrative (job 60/64) on rollout_ablate_frac, the 'vanilla step 17' dead-run ref in eval.py, the 'old signed sum' dead-code ref in proj.py, and the conversational 'current experiment line' lead. Kept all TODO/FIXME and the 'why' memory-tuning comments. Smoke green (cout->0). Co-Authored-By: Claudypoo <288921227+claudypoo@users.noreply.github.com>
2026-06-27 17:30:41 +08:00 · 2026-06-03 00:09:01 +00:00
parent 025debae6b
commit fffd26a93d
8 changed files with 0 additions and 632 deletions
@@ -1,126 +0,0 @@
-"""Audit a training run: quote first/last generation (coherence eyeball) + summarise
-the key per-step columns with trend arrows and SHOULD-interpretation hints.
-
-Deterministic extraction; the /audit-log command feeds this to the LLM for a verdict.
-
-Usage:
-  uv run python scripts/audit_log.py out/runs/<ts>_<tag>          # run dir
-  uv run python scripts/audit_log.py logs/<ts>_<tag>.log          # log (finds sibling run dir)
-"""
-from __future__ import annotations
-
-import json
-import re
-import sys
-from pathlib import Path
-
-
-def _find(arg: str) -> tuple[Path | None, Path | None]:
-    """Resolve (rollouts.jsonl, streaming .log) from a run-dir or log path."""
-    p = Path(arg)
-    if p.is_dir():
-        jl = p / "rollouts.jsonl"
-        # match a log whose argv out-tag matches this run dir's tag
-        tag = re.sub(r"^\d{8}T\d{6}_(fast|smoke|full)_", "", p.name)
-        logs = sorted(Path("logs").glob("*.log"))
-        log = next((l for l in reversed(logs) if tag in l.read_text(errors="replace")[:2000]), None)
-        return (jl if jl.exists() else None), log
-    if p.suffix == ".log":
-        # find the run dir from the verbose-log line is overkill; use jsonl by tag
-        return None, p
-    return None, None
-
-
-# A fixed coherent-vanilla yardstick (Qwen3-4B, sub4 vanilla seed41 step 59):
-# real imports, a class, indented code. The audited last-gen should look like
-# THIS. If it's punctuation soup instead (job 46 step 14: '####?##%\r\n#_...'),
-# the policy diverged in free generation -- even when lp_t (teacher-forced
-# coherence) stayed flat and the divergence tripwire never fired.
-REFERENCE_HEALTHY = (
-    "```python\nfrom typing import List\n\nclass Solution:\n"
-    "    def maxPoints(self, points: List[List[int]]) -> int:\n"
-    "        def slope(p1, p2):  # reduced (dx,dy) via GCD, no float error\n"
-    "            if p1 == p2: return (0, 0)"
-)
-
-
-def _gen(jl: Path) -> None:
-    rows = [json.loads(l) for l in jl.open()]
-    if not rows:
-        print("rollouts.jsonl EMPTY"); return
-    print(f"rollouts: {len(rows)} rows, steps {rows[0]['step']}..{rows[-1]['step']}")
-    print("\n--- REFERENCE: healthy vanilla gen (fixed yardstick, NOT this run) ---")
-    print(repr(REFERENCE_HEALTHY))
-    for r in (rows[0], rows[-1]):
-        print(f"\n--- step {r['step']}  reward={r['reward']:+.2f}  gt_pass={r['gt_pass']}  "
-              f"hack={r.get('exploited', r.get('hacked_E'))} ---")
-        print("SHOULD: read like the REFERENCE above (coherent code); ELSE token salad => diverged")
-        print(repr(r["text"][:400]))
-
-
-def _cols(log: Path) -> None:
-    txt = log.read_text(errors="replace")
-    hdr = next((l for l in txt.splitlines()
-                if "| INFO |" in l and l.split("| INFO |", 1)[1].split()[:1] == ["step"]
-                and "ref_eq" in l), None)
-    if hdr is None:
-        print("\nno streaming table in log"); return
-    names = [re.sub(r"[^a-z0-9_]", "", t.lower()) for t in hdr.split("| INFO |", 1)[1].split()]
-    idx = {n: i for i, n in enumerate(names)}
-    rows = []
-    for line in txt.splitlines():
-        if "| INFO |" not in line:
-            continue
-        r = line.split("| INFO |", 1)[1].split()
-        if r and r[0].isdigit() and len(r) >= len(names):
-            rows.append(r)
-    if not rows:
-        print("\nno data rows"); return
-
-    def frac(t):
-        a, b = t.split("/"); return int(a) / int(b) if int(b) else 0.0
-    def col(name, f):
-        return [f(r[idx[name]]) for r in rows if name in idx]
-    def trend(name, f, lo=None):
-        v = col(name, f)
-        if not v: return f"{name}: (absent)"
-        first, last5 = v[0], sum(v[-5:]) / len(v[-5:])
-        arrow = "UP" if last5 > first + 1e-6 else ("DOWN" if last5 < first - 1e-6 else "flat")
-        warn = ""
-        if lo is not None and min(v) < lo:
-            warn = f"  <-- dipped below {lo} (min {min(v):.2f})"
-        return f"{name:7s} first={first:+.3f} last5={last5:+.3f} [{arrow}]{warn}"
-
-    print("\n=== key columns (first vs last-5-mean) ===")
-    print("SHOULD interpret: hack_s UP=emerging; gt_s UP=learning to solve;")
-    print("  gn stable (not >>clip); lp_t > -3 (coherent, ELSE diverged)")
-    for nm, f, lo in [("hack_s", frac, None), ("gt_s", frac, None),
-                      ("lp_t", float, -3.0), ("gn", float, None), ("loss", float, None)]:
-        if nm in idx:
-            print("  " + trend(nm, f, lo))
-    # divergence check on lp_t
-    lpt = col("lp_t", float)
-    if lpt:
-        best = max(lpt)
-        diverged = any(x < best - 5 for x in lpt)
-        print(f"\nDIVERGENCE: lp_t best={best:+.2f} min={min(lpt):+.2f} -> "
-              f"{'DIVERGED (drop >5 nats)' if diverged else 'stable'}")
-
-
-def main() -> None:
-    if len(sys.argv) != 2:
-        print(__doc__); sys.exit(1)
-    jl, log = _find(sys.argv[1])
-    print(f"=== AUDIT {sys.argv[1]} ===")
-    if jl:
-        _gen(jl)
-    else:
-        print("(no rollouts.jsonl found)")
-    if log:
-        _cols(log)
-    else:
-        print("(no streaming log found)")
-
-
-if __name__ == "__main__":
-    main()
@@ -1,90 +0,0 @@
-"""One-shot out/ migration to the datatype-sorted scheme (spec 20260530_out_dir_reorg).
-
-Sorts loose out/ files into subdirs:
-  v_hack_*.safetensors        -> out/vhack/
-  vhack_grads_*, vhack_heldout_* -> out/vhack_grads/
-  *.png                       -> out/figs/
-  out/probe_distill/<pool>/   -> out/pools/<pool>/
-  train_<tag>{,_first_hack}.safetensors + rollouts_<tag>.jsonl
-                              -> out/runs/<log_stem>/   (ts matched from logs/*<tag>.log)
-  pairs_*.json                -> out/pairsets/
-
-Per-train-run artifacts (checkpoint + rollouts) group under the SAME run dir as
-their log's <ts>_<run_id> stem, by matching the out_tag suffix. Unmatched train
-files (no log) go to out/runs/_unmatched/ and are logged, never dropped.
-
-    uv run python scripts/migrate_out_dirs.py            # dry-run (prints plan)
-    uv run python scripts/migrate_out_dirs.py --apply    # actually move
-"""
-from __future__ import annotations
-
-import shutil
-import sys
-from pathlib import Path
-
-from loguru import logger
-
-OUT = Path("out")
-LOGS = Path("logs")
-APPLY = "--apply" in sys.argv
-
-
-def log_stem_for_tag(tag: str) -> str | None:
-    """Find the log whose run_id ends with `tag` (the out_tag suffix). Returns its stem."""
-    cands = sorted(LOGS.glob(f"*{tag}.log"))
-    # Prefer an exact suffix match on the stem (run_id = <preset>_<arm>_seed<n><tag>).
-    exact = [p for p in cands if p.stem.endswith(tag)]
-    chosen = (exact or cands)
-    return chosen[-1].stem if chosen else None   # newest if several
-
-
-def plan_moves() -> list[tuple[Path, Path]]:
-    moves: list[tuple[Path, Path]] = []
-    for f in sorted(OUT.glob("*")):
-        if f.is_dir():
-            continue
-        n = f.name
-        if n.startswith("v_hack_") and n.endswith(".safetensors"):
-            moves.append((f, OUT / "vhack" / n))
-        elif n.startswith(("vhack_grads_", "vhack_heldout")):
-            moves.append((f, OUT / "vhack_grads" / n))
-        elif n.endswith(".png"):
-            moves.append((f, OUT / "figs" / n))
-        elif n.startswith("pairs_") and n.endswith(".json"):
-            moves.append((f, OUT / "pairsets" / n))
-        elif n.startswith("train_") or n.startswith("rollouts_"):
-            # tag = out_tag suffix shared by the file and its log.
-            stem = n.split(".")[0]
-            tag = (stem[len("train"):] if stem.startswith("train")
-                   else "_" + stem[len("rollouts_"):])
-            tag = tag.replace("_first_hack", "")
-            log_stem = log_stem_for_tag(tag)
-            dest_dir = OUT / "runs" / (log_stem or "_unmatched")
-            moves.append((f, dest_dir / n))
-        else:
-            logger.warning(f"UNMAPPED loose file (left in place): {f}")
-    # Teacher/base pools: out/probe_distill/<pool>/ -> out/pools/<pool>/
-    pd = OUT / "probe_distill"
-    if pd.is_dir():
-        for sub in sorted(pd.iterdir()):
-            dst = OUT / ("figs" if sub.suffix == ".png" else "pools") / sub.name
-            moves.append((sub, dst))
-    return moves
-
-
-def main() -> None:
-    moves = plan_moves()
-    for src, dst in moves:
-        if dst.exists():
-            logger.warning(f"SKIP (dest exists): {dst}")
-            continue
-        logger.info(f"{'MOVE' if APPLY else 'PLAN'}: {src}  ->  {dst}")
-        if APPLY:
-            dst.parent.mkdir(parents=True, exist_ok=True)
-            shutil.move(str(src), str(dst))
-    logger.info(f"{'APPLIED' if APPLY else 'DRY-RUN'}: {len(moves)} moves. "
-                f"{'' if APPLY else 'Re-run with --apply to execute.'}")
-
-
-if __name__ == "__main__":
-    main()
@@ -1,105 +0,0 @@
-"""Single-run routing figure: training-time hack vs DEPLOYED-model hack.
-
-The routing story in one plot. During training the model keeps hacking (it runs
-with the quarantine knob ON, so the per-step hack_s curve climbs like vanilla).
-But the model we'd actually DEPLOY has the knob deleted -- its hack rate (the
-deploy-eval, measured every --eval-ablate-every steps) is what matters. If routing
-works, the deploy curve sits well BELOW the training curve at preserved solve.
-
-    uv run python scripts/plot_route_evidence.py LOG.log --out out/route_evidence.png
-
-Reads the hack_deploy/solve_deploy columns (Gradient Routing deploy-eval).
-"""
-from __future__ import annotations
-
-import sys
-from pathlib import Path
-
-import matplotlib
-matplotlib.use("Agg")
-import matplotlib.pyplot as plt
-import tyro
-
-from projected_grpo.figs import link_latest
-
-
-def _frac(tok: str) -> float | None:
-    if "/" in tok:
-        a, b = tok.split("/")
-        return int(a) / int(b) if int(b) else None
-    try:
-        v = float(tok)
-        return None if v != v else v          # NaN -> None
-    except ValueError:
-        return None
-
-
-def parse(log: Path):
-    txt = log.read_text(errors="replace")
-    hdr = next(l.split("| INFO |", 1)[1].split() for l in txt.splitlines()
-               if "| INFO |" in l and "hack_s" in l and "refr" in l)
-    idx = {n: i for i, n in enumerate(hdr)}
-    i_step, i_train = idx["step"], idx["hack_s?"]
-    i_solve = idx["gt_s↑"]
-    i_hdep = idx["hack_deploy"]
-    i_sdep = idx["solve_deploy"]
-    steps, train_hack, solve_train = [], [], []
-    deploy_step, deploy_hack, deploy_solve = [], [], []
-    for l in txt.splitlines():
-        if "| INFO |" not in l:
-            continue
-        r = l.split("| INFO |", 1)[1].split()
-        if not r or not r[0].isdigit() or len(r) <= i_sdep:
-            continue
-        s = int(r[i_step])
-        steps.append(s)
-        train_hack.append(_frac(r[i_train]))
-        solve_train.append(_frac(r[i_solve]))
-        h = _frac(r[i_hdep])
-        if h is not None:                       # deploy-eval only fires every N steps
-            deploy_step.append(s); deploy_hack.append(h); deploy_solve.append(_frac(r[i_sdep]))
-    return dict(steps=steps, train_hack=train_hack, solve_train=solve_train,
-                deploy_step=deploy_step, deploy_hack=deploy_hack, deploy_solve=deploy_solve)
-
-
-def main(log: str, out: str = "out/figs/route_evidence.png") -> None:
-    d = parse(Path(log))
-    RED, GREY = "#b03a2e", "#9a8c7a"            # hack=red (the story); solve=muted (context)
-    fig, ax = plt.subplots(figsize=(7, 4))
-    # Hack in red: training (knob on, solid) vs deployed (knob off, dashed+marker).
-    # The vertical gap between the two reds at the last step IS the routing effect.
-    ax.plot(d["steps"], d["train_hack"], color=RED, lw=2.2)
-    ax.plot(d["deploy_step"], d["deploy_hack"], color=RED, lw=1.6, ls=(0, (4, 3)), marker="o", ms=4)
-    ax.plot(d["deploy_step"], d["deploy_solve"], color=GREY, lw=1.4)
-
-    # Direct labels at the right end (name + final value baked in) -> no legend,
-    # no separate value annotations. One element does both jobs (eraser test).
-    x_end = d["steps"][-1]
-    def label(y, text, color):
-        ax.annotate(text, (x_end, y), xytext=(8, 0), textcoords="offset points",
-                    va="center", color=color, fontsize=9)
-    label(d["train_hack"][-1], f"hack, knob ON (training)  {d['train_hack'][-1]:.0%}", RED)
-    label(d["deploy_solve"][-1], f"solve, deployed  {d['deploy_solve'][-1]:.0%}", GREY)
-    label(d["deploy_hack"][-1],  f"hack, knob OFF (deployed)  {d['deploy_hack'][-1]:.0%}", RED)
-
-    ax.set_ylim(-0.02, 1.0)
-    ax.set_yticks([0, 0.5, 1.0]); ax.set_yticklabels(["0", ".5", "1"])
-    ax.set_xticks([0, d["deploy_step"][-1] if d["deploy_step"] else x_end])
-    ax.set_xlabel("GRPO step")
-    ax.set_xlim(0, x_end * 1.5)                  # right margin for the direct labels
-    for side in ("top", "right"):
-        ax.spines[side].set_visible(False)
-    ax.spines["left"].set_bounds(0, 1)           # range-frame: axis spans the data
-    ax.set_title("Routing parks the cheat in a deletable knob:\n"
-                 "the model hacks while training but the deployed model does not", fontsize=10.5)
-    fig.tight_layout()
-    Path(out).parent.mkdir(parents=True, exist_ok=True)
-    fig.savefig(out, dpi=130)
-    link = link_latest(Path(out))
-    print(f"wrote {out}  (docs/figs latest -> {link})  "
-          f"(train_hack_final={d['train_hack'][-1]:.3f}, "
-          f"deploy_hack_final={d['deploy_hack'][-1]:.3f}, deploy_solve_final={d['deploy_solve'][-1]:.3f})")
-
-
-if __name__ == "__main__":
-    tyro.cli(main)