mirror of
https://github.com/wassname/evil_MoE.git
synced 2026-06-28 01:15:10 +08:00
4621488cc0
Code writes+reads the new scheme; migrate_out_dirs.py moved 225 loose artifacts (0 left at top level). Per-run checkpoints+rollouts now group under runs/<ts>_<run_id>/ as train.safetensors/rollouts.jsonl. Figures land in out/figs/ with a stable docs/figs/<name>.png symlink (figs.link_latest). justfile also gains run-cell REFRESH param (online-erasure arm). Smoke + smoke-vanilla + results all green on new paths. Requeue manifest preserves the why/resolve labels that pueue reset wiped. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
91 lines
3.5 KiB
Python
91 lines
3.5 KiB
Python
"""One-shot out/ migration to the datatype-sorted scheme (spec 20260530_out_dir_reorg).
|
|
|
|
Sorts loose out/ files into subdirs:
|
|
v_hack_*.safetensors -> out/vhack/
|
|
vhack_grads_*, vhack_heldout_* -> out/vhack_grads/
|
|
*.png -> out/figs/
|
|
out/probe_distill/<pool>/ -> out/pools/<pool>/
|
|
train_<tag>{,_first_hack}.safetensors + rollouts_<tag>.jsonl
|
|
-> out/runs/<log_stem>/ (ts matched from logs/*<tag>.log)
|
|
pairs_*.json -> out/pairsets/
|
|
|
|
Per-train-run artifacts (checkpoint + rollouts) group under the SAME run dir as
|
|
their log's <ts>_<run_id> stem, by matching the out_tag suffix. Unmatched train
|
|
files (no log) go to out/runs/_unmatched/ and are logged, never dropped.
|
|
|
|
uv run python scripts/migrate_out_dirs.py # dry-run (prints plan)
|
|
uv run python scripts/migrate_out_dirs.py --apply # actually move
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import shutil
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
from loguru import logger
|
|
|
|
OUT = Path("out")
|
|
LOGS = Path("logs")
|
|
APPLY = "--apply" in sys.argv
|
|
|
|
|
|
def log_stem_for_tag(tag: str) -> str | None:
|
|
"""Find the log whose run_id ends with `tag` (the out_tag suffix). Returns its stem."""
|
|
cands = sorted(LOGS.glob(f"*{tag}.log"))
|
|
# Prefer an exact suffix match on the stem (run_id = <preset>_<arm>_seed<n><tag>).
|
|
exact = [p for p in cands if p.stem.endswith(tag)]
|
|
chosen = (exact or cands)
|
|
return chosen[-1].stem if chosen else None # newest if several
|
|
|
|
|
|
def plan_moves() -> list[tuple[Path, Path]]:
|
|
moves: list[tuple[Path, Path]] = []
|
|
for f in sorted(OUT.glob("*")):
|
|
if f.is_dir():
|
|
continue
|
|
n = f.name
|
|
if n.startswith("v_hack_") and n.endswith(".safetensors"):
|
|
moves.append((f, OUT / "vhack" / n))
|
|
elif n.startswith(("vhack_grads_", "vhack_heldout")):
|
|
moves.append((f, OUT / "vhack_grads" / n))
|
|
elif n.endswith(".png"):
|
|
moves.append((f, OUT / "figs" / n))
|
|
elif n.startswith("pairs_") and n.endswith(".json"):
|
|
moves.append((f, OUT / "pairsets" / n))
|
|
elif n.startswith("train_") or n.startswith("rollouts_"):
|
|
# tag = out_tag suffix shared by the file and its log.
|
|
stem = n.split(".")[0]
|
|
tag = (stem[len("train"):] if stem.startswith("train")
|
|
else "_" + stem[len("rollouts_"):])
|
|
tag = tag.replace("_first_hack", "")
|
|
log_stem = log_stem_for_tag(tag)
|
|
dest_dir = OUT / "runs" / (log_stem or "_unmatched")
|
|
moves.append((f, dest_dir / n))
|
|
else:
|
|
logger.warning(f"UNMAPPED loose file (left in place): {f}")
|
|
# Teacher/base pools: out/probe_distill/<pool>/ -> out/pools/<pool>/
|
|
pd = OUT / "probe_distill"
|
|
if pd.is_dir():
|
|
for sub in sorted(pd.iterdir()):
|
|
dst = OUT / ("figs" if sub.suffix == ".png" else "pools") / sub.name
|
|
moves.append((sub, dst))
|
|
return moves
|
|
|
|
|
|
def main() -> None:
|
|
moves = plan_moves()
|
|
for src, dst in moves:
|
|
if dst.exists():
|
|
logger.warning(f"SKIP (dest exists): {dst}")
|
|
continue
|
|
logger.info(f"{'MOVE' if APPLY else 'PLAN'}: {src} -> {dst}")
|
|
if APPLY:
|
|
dst.parent.mkdir(parents=True, exist_ok=True)
|
|
shutil.move(str(src), str(dst))
|
|
logger.info(f"{'APPLIED' if APPLY else 'DRY-RUN'}: {len(moves)} moves. "
|
|
f"{'' if APPLY else 'Re-run with --apply to execute.'}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|