"""One-shot out/ migration to the datatype-sorted scheme (spec 20260530_out_dir_reorg). Sorts loose out/ files into subdirs: v_hack_*.safetensors -> out/vhack/ vhack_grads_*, vhack_heldout_* -> out/vhack_grads/ *.png -> out/figs/ out/probe_distill// -> out/pools// train_{,_first_hack}.safetensors + rollouts_.jsonl -> out/runs// (ts matched from logs/*.log) pairs_*.json -> out/pairsets/ Per-train-run artifacts (checkpoint + rollouts) group under the SAME run dir as their log's _ stem, by matching the out_tag suffix. Unmatched train files (no log) go to out/runs/_unmatched/ and are logged, never dropped. uv run python scripts/migrate_out_dirs.py # dry-run (prints plan) uv run python scripts/migrate_out_dirs.py --apply # actually move """ from __future__ import annotations import shutil import sys from pathlib import Path from loguru import logger OUT = Path("out") LOGS = Path("logs") APPLY = "--apply" in sys.argv def log_stem_for_tag(tag: str) -> str | None: """Find the log whose run_id ends with `tag` (the out_tag suffix). Returns its stem.""" cands = sorted(LOGS.glob(f"*{tag}.log")) # Prefer an exact suffix match on the stem (run_id = __seed). exact = [p for p in cands if p.stem.endswith(tag)] chosen = (exact or cands) return chosen[-1].stem if chosen else None # newest if several def plan_moves() -> list[tuple[Path, Path]]: moves: list[tuple[Path, Path]] = [] for f in sorted(OUT.glob("*")): if f.is_dir(): continue n = f.name if n.startswith("v_hack_") and n.endswith(".safetensors"): moves.append((f, OUT / "vhack" / n)) elif n.startswith(("vhack_grads_", "vhack_heldout")): moves.append((f, OUT / "vhack_grads" / n)) elif n.endswith(".png"): moves.append((f, OUT / "figs" / n)) elif n.startswith("pairs_") and n.endswith(".json"): moves.append((f, OUT / "pairsets" / n)) elif n.startswith("train_") or n.startswith("rollouts_"): # tag = out_tag suffix shared by the file and its log. stem = n.split(".")[0] tag = (stem[len("train"):] if stem.startswith("train") else "_" + stem[len("rollouts_"):]) tag = tag.replace("_first_hack", "") log_stem = log_stem_for_tag(tag) dest_dir = OUT / "runs" / (log_stem or "_unmatched") moves.append((f, dest_dir / n)) else: logger.warning(f"UNMAPPED loose file (left in place): {f}") # Teacher/base pools: out/probe_distill// -> out/pools// pd = OUT / "probe_distill" if pd.is_dir(): for sub in sorted(pd.iterdir()): dst = OUT / ("figs" if sub.suffix == ".png" else "pools") / sub.name moves.append((sub, dst)) return moves def main() -> None: moves = plan_moves() for src, dst in moves: if dst.exists(): logger.warning(f"SKIP (dest exists): {dst}") continue logger.info(f"{'MOVE' if APPLY else 'PLAN'}: {src} -> {dst}") if APPLY: dst.parent.mkdir(parents=True, exist_ok=True) shutil.move(str(src), str(dst)) logger.info(f"{'APPLIED' if APPLY else 'DRY-RUN'}: {len(moves)} moves. " f"{'' if APPLY else 'Re-run with --apply to execute.'}") if __name__ == "__main__": main()