cleanup: delete 6 orphan modules, quarantine pair generators, trim stale comments

Deleted (zero importers/refs): scripts/{migrate_out_dirs,audit_log,plot_route_evidence}.py
and src/projected_grpo/{bake_lora,probe_lora_runtime,probe_traj}.py (LoRA-merge path
+ dev trajectory comparator, superseded). Removed the dead probe-traj recipe.

Quarantined to scripts/attic/: make_pairsets.py + make_dataset_pairsets.py (persona-pair
authoring, tasks #123-126 done; live path is pairs.PAIRS / pairs_from_pool).

Comments: dropped dead job-ID narrative (job 60/64) on rollout_ablate_frac, the
'vanilla step 17' dead-run ref in eval.py, the 'old signed sum' dead-code ref in
proj.py, and the conversational 'current experiment line' lead. Kept all TODO/FIXME
and the 'why' memory-tuning comments. Smoke green (cout->0).

Co-Authored-By: Claudypoo <288921227+claudypoo@users.noreply.github.com>
This commit is contained in:
wassname
2026-06-03 00:09:01 +00:00
parent 025debae6b
commit fffd26a93d
8 changed files with 0 additions and 632 deletions
-126
View File
@@ -1,126 +0,0 @@
"""Audit a training run: quote first/last generation (coherence eyeball) + summarise
the key per-step columns with trend arrows and SHOULD-interpretation hints.
Deterministic extraction; the /audit-log command feeds this to the LLM for a verdict.
Usage:
uv run python scripts/audit_log.py out/runs/<ts>_<tag> # run dir
uv run python scripts/audit_log.py logs/<ts>_<tag>.log # log (finds sibling run dir)
"""
from __future__ import annotations
import json
import re
import sys
from pathlib import Path
def _find(arg: str) -> tuple[Path | None, Path | None]:
"""Resolve (rollouts.jsonl, streaming .log) from a run-dir or log path."""
p = Path(arg)
if p.is_dir():
jl = p / "rollouts.jsonl"
# match a log whose argv out-tag matches this run dir's tag
tag = re.sub(r"^\d{8}T\d{6}_(fast|smoke|full)_", "", p.name)
logs = sorted(Path("logs").glob("*.log"))
log = next((l for l in reversed(logs) if tag in l.read_text(errors="replace")[:2000]), None)
return (jl if jl.exists() else None), log
if p.suffix == ".log":
# find the run dir from the verbose-log line is overkill; use jsonl by tag
return None, p
return None, None
# A fixed coherent-vanilla yardstick (Qwen3-4B, sub4 vanilla seed41 step 59):
# real imports, a class, indented code. The audited last-gen should look like
# THIS. If it's punctuation soup instead (job 46 step 14: '####?##%\r\n#_...'),
# the policy diverged in free generation -- even when lp_t (teacher-forced
# coherence) stayed flat and the divergence tripwire never fired.
REFERENCE_HEALTHY = (
"```python\nfrom typing import List\n\nclass Solution:\n"
" def maxPoints(self, points: List[List[int]]) -> int:\n"
" def slope(p1, p2): # reduced (dx,dy) via GCD, no float error\n"
" if p1 == p2: return (0, 0)"
)
def _gen(jl: Path) -> None:
rows = [json.loads(l) for l in jl.open()]
if not rows:
print("rollouts.jsonl EMPTY"); return
print(f"rollouts: {len(rows)} rows, steps {rows[0]['step']}..{rows[-1]['step']}")
print("\n--- REFERENCE: healthy vanilla gen (fixed yardstick, NOT this run) ---")
print(repr(REFERENCE_HEALTHY))
for r in (rows[0], rows[-1]):
print(f"\n--- step {r['step']} reward={r['reward']:+.2f} gt_pass={r['gt_pass']} "
f"hack={r.get('exploited', r.get('hacked_E'))} ---")
print("SHOULD: read like the REFERENCE above (coherent code); ELSE token salad => diverged")
print(repr(r["text"][:400]))
def _cols(log: Path) -> None:
txt = log.read_text(errors="replace")
hdr = next((l for l in txt.splitlines()
if "| INFO |" in l and l.split("| INFO |", 1)[1].split()[:1] == ["step"]
and "ref_eq" in l), None)
if hdr is None:
print("\nno streaming table in log"); return
names = [re.sub(r"[^a-z0-9_]", "", t.lower()) for t in hdr.split("| INFO |", 1)[1].split()]
idx = {n: i for i, n in enumerate(names)}
rows = []
for line in txt.splitlines():
if "| INFO |" not in line:
continue
r = line.split("| INFO |", 1)[1].split()
if r and r[0].isdigit() and len(r) >= len(names):
rows.append(r)
if not rows:
print("\nno data rows"); return
def frac(t):
a, b = t.split("/"); return int(a) / int(b) if int(b) else 0.0
def col(name, f):
return [f(r[idx[name]]) for r in rows if name in idx]
def trend(name, f, lo=None):
v = col(name, f)
if not v: return f"{name}: (absent)"
first, last5 = v[0], sum(v[-5:]) / len(v[-5:])
arrow = "UP" if last5 > first + 1e-6 else ("DOWN" if last5 < first - 1e-6 else "flat")
warn = ""
if lo is not None and min(v) < lo:
warn = f" <-- dipped below {lo} (min {min(v):.2f})"
return f"{name:7s} first={first:+.3f} last5={last5:+.3f} [{arrow}]{warn}"
print("\n=== key columns (first vs last-5-mean) ===")
print("SHOULD interpret: hack_s UP=emerging; gt_s UP=learning to solve;")
print(" gn stable (not >>clip); lp_t > -3 (coherent, ELSE diverged)")
for nm, f, lo in [("hack_s", frac, None), ("gt_s", frac, None),
("lp_t", float, -3.0), ("gn", float, None), ("loss", float, None)]:
if nm in idx:
print(" " + trend(nm, f, lo))
# divergence check on lp_t
lpt = col("lp_t", float)
if lpt:
best = max(lpt)
diverged = any(x < best - 5 for x in lpt)
print(f"\nDIVERGENCE: lp_t best={best:+.2f} min={min(lpt):+.2f} -> "
f"{'DIVERGED (drop >5 nats)' if diverged else 'stable'}")
def main() -> None:
if len(sys.argv) != 2:
print(__doc__); sys.exit(1)
jl, log = _find(sys.argv[1])
print(f"=== AUDIT {sys.argv[1]} ===")
if jl:
_gen(jl)
else:
print("(no rollouts.jsonl found)")
if log:
_cols(log)
else:
print("(no streaming log found)")
if __name__ == "__main__":
main()
-90
View File
@@ -1,90 +0,0 @@
"""One-shot out/ migration to the datatype-sorted scheme (spec 20260530_out_dir_reorg).
Sorts loose out/ files into subdirs:
v_hack_*.safetensors -> out/vhack/
vhack_grads_*, vhack_heldout_* -> out/vhack_grads/
*.png -> out/figs/
out/probe_distill/<pool>/ -> out/pools/<pool>/
train_<tag>{,_first_hack}.safetensors + rollouts_<tag>.jsonl
-> out/runs/<log_stem>/ (ts matched from logs/*<tag>.log)
pairs_*.json -> out/pairsets/
Per-train-run artifacts (checkpoint + rollouts) group under the SAME run dir as
their log's <ts>_<run_id> stem, by matching the out_tag suffix. Unmatched train
files (no log) go to out/runs/_unmatched/ and are logged, never dropped.
uv run python scripts/migrate_out_dirs.py # dry-run (prints plan)
uv run python scripts/migrate_out_dirs.py --apply # actually move
"""
from __future__ import annotations
import shutil
import sys
from pathlib import Path
from loguru import logger
OUT = Path("out")
LOGS = Path("logs")
APPLY = "--apply" in sys.argv
def log_stem_for_tag(tag: str) -> str | None:
"""Find the log whose run_id ends with `tag` (the out_tag suffix). Returns its stem."""
cands = sorted(LOGS.glob(f"*{tag}.log"))
# Prefer an exact suffix match on the stem (run_id = <preset>_<arm>_seed<n><tag>).
exact = [p for p in cands if p.stem.endswith(tag)]
chosen = (exact or cands)
return chosen[-1].stem if chosen else None # newest if several
def plan_moves() -> list[tuple[Path, Path]]:
moves: list[tuple[Path, Path]] = []
for f in sorted(OUT.glob("*")):
if f.is_dir():
continue
n = f.name
if n.startswith("v_hack_") and n.endswith(".safetensors"):
moves.append((f, OUT / "vhack" / n))
elif n.startswith(("vhack_grads_", "vhack_heldout")):
moves.append((f, OUT / "vhack_grads" / n))
elif n.endswith(".png"):
moves.append((f, OUT / "figs" / n))
elif n.startswith("pairs_") and n.endswith(".json"):
moves.append((f, OUT / "pairsets" / n))
elif n.startswith("train_") or n.startswith("rollouts_"):
# tag = out_tag suffix shared by the file and its log.
stem = n.split(".")[0]
tag = (stem[len("train"):] if stem.startswith("train")
else "_" + stem[len("rollouts_"):])
tag = tag.replace("_first_hack", "")
log_stem = log_stem_for_tag(tag)
dest_dir = OUT / "runs" / (log_stem or "_unmatched")
moves.append((f, dest_dir / n))
else:
logger.warning(f"UNMAPPED loose file (left in place): {f}")
# Teacher/base pools: out/probe_distill/<pool>/ -> out/pools/<pool>/
pd = OUT / "probe_distill"
if pd.is_dir():
for sub in sorted(pd.iterdir()):
dst = OUT / ("figs" if sub.suffix == ".png" else "pools") / sub.name
moves.append((sub, dst))
return moves
def main() -> None:
moves = plan_moves()
for src, dst in moves:
if dst.exists():
logger.warning(f"SKIP (dest exists): {dst}")
continue
logger.info(f"{'MOVE' if APPLY else 'PLAN'}: {src} -> {dst}")
if APPLY:
dst.parent.mkdir(parents=True, exist_ok=True)
shutil.move(str(src), str(dst))
logger.info(f"{'APPLIED' if APPLY else 'DRY-RUN'}: {len(moves)} moves. "
f"{'' if APPLY else 'Re-run with --apply to execute.'}")
if __name__ == "__main__":
main()
-105
View File
@@ -1,105 +0,0 @@
"""Single-run routing figure: training-time hack vs DEPLOYED-model hack.
The routing story in one plot. During training the model keeps hacking (it runs
with the quarantine knob ON, so the per-step hack_s curve climbs like vanilla).
But the model we'd actually DEPLOY has the knob deleted -- its hack rate (the
deploy-eval, measured every --eval-ablate-every steps) is what matters. If routing
works, the deploy curve sits well BELOW the training curve at preserved solve.
uv run python scripts/plot_route_evidence.py LOG.log --out out/route_evidence.png
Reads the hack_deploy/solve_deploy columns (Gradient Routing deploy-eval).
"""
from __future__ import annotations
import sys
from pathlib import Path
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import tyro
from projected_grpo.figs import link_latest
def _frac(tok: str) -> float | None:
if "/" in tok:
a, b = tok.split("/")
return int(a) / int(b) if int(b) else None
try:
v = float(tok)
return None if v != v else v # NaN -> None
except ValueError:
return None
def parse(log: Path):
txt = log.read_text(errors="replace")
hdr = next(l.split("| INFO |", 1)[1].split() for l in txt.splitlines()
if "| INFO |" in l and "hack_s" in l and "refr" in l)
idx = {n: i for i, n in enumerate(hdr)}
i_step, i_train = idx["step"], idx["hack_s?"]
i_solve = idx["gt_s↑"]
i_hdep = idx["hack_deploy"]
i_sdep = idx["solve_deploy"]
steps, train_hack, solve_train = [], [], []
deploy_step, deploy_hack, deploy_solve = [], [], []
for l in txt.splitlines():
if "| INFO |" not in l:
continue
r = l.split("| INFO |", 1)[1].split()
if not r or not r[0].isdigit() or len(r) <= i_sdep:
continue
s = int(r[i_step])
steps.append(s)
train_hack.append(_frac(r[i_train]))
solve_train.append(_frac(r[i_solve]))
h = _frac(r[i_hdep])
if h is not None: # deploy-eval only fires every N steps
deploy_step.append(s); deploy_hack.append(h); deploy_solve.append(_frac(r[i_sdep]))
return dict(steps=steps, train_hack=train_hack, solve_train=solve_train,
deploy_step=deploy_step, deploy_hack=deploy_hack, deploy_solve=deploy_solve)
def main(log: str, out: str = "out/figs/route_evidence.png") -> None:
d = parse(Path(log))
RED, GREY = "#b03a2e", "#9a8c7a" # hack=red (the story); solve=muted (context)
fig, ax = plt.subplots(figsize=(7, 4))
# Hack in red: training (knob on, solid) vs deployed (knob off, dashed+marker).
# The vertical gap between the two reds at the last step IS the routing effect.
ax.plot(d["steps"], d["train_hack"], color=RED, lw=2.2)
ax.plot(d["deploy_step"], d["deploy_hack"], color=RED, lw=1.6, ls=(0, (4, 3)), marker="o", ms=4)
ax.plot(d["deploy_step"], d["deploy_solve"], color=GREY, lw=1.4)
# Direct labels at the right end (name + final value baked in) -> no legend,
# no separate value annotations. One element does both jobs (eraser test).
x_end = d["steps"][-1]
def label(y, text, color):
ax.annotate(text, (x_end, y), xytext=(8, 0), textcoords="offset points",
va="center", color=color, fontsize=9)
label(d["train_hack"][-1], f"hack, knob ON (training) {d['train_hack'][-1]:.0%}", RED)
label(d["deploy_solve"][-1], f"solve, deployed {d['deploy_solve'][-1]:.0%}", GREY)
label(d["deploy_hack"][-1], f"hack, knob OFF (deployed) {d['deploy_hack'][-1]:.0%}", RED)
ax.set_ylim(-0.02, 1.0)
ax.set_yticks([0, 0.5, 1.0]); ax.set_yticklabels(["0", ".5", "1"])
ax.set_xticks([0, d["deploy_step"][-1] if d["deploy_step"] else x_end])
ax.set_xlabel("GRPO step")
ax.set_xlim(0, x_end * 1.5) # right margin for the direct labels
for side in ("top", "right"):
ax.spines[side].set_visible(False)
ax.spines["left"].set_bounds(0, 1) # range-frame: axis spans the data
ax.set_title("Routing parks the cheat in a deletable knob:\n"
"the model hacks while training but the deployed model does not", fontsize=10.5)
fig.tight_layout()
Path(out).parent.mkdir(parents=True, exist_ok=True)
fig.savefig(out, dpi=130)
link = link_latest(Path(out))
print(f"wrote {out} (docs/figs latest -> {link}) "
f"(train_hack_final={d['train_hack'][-1]:.3f}, "
f"deploy_hack_final={d['deploy_hack'][-1]:.3f}, deploy_solve_final={d['deploy_solve'][-1]:.3f})")
if __name__ == "__main__":
tyro.cli(main)