diff --git a/justfile b/justfile index e7deee8..fceb756 100644 --- a/justfile +++ b/justfile @@ -181,6 +181,11 @@ plot-deploy GLOB='out/runs/*sub4*/per_mode_deploy.json' OUT='out/figs/deploy_ove regen-dynamics GLOB='logs/*_cell_*.log': uv run python scripts/plot_dynamics.py {{ GLOB }} --out out/figs/dynamics.png +# Auto dynamics plot: newest full-length (>=MIN steps) log PER ARM, no hand-globbing. +# Run after any sweep finishes -> always plots the freshest 60-step run of each arm. +dyn MIN='60' OUT='out/figs/dyn_sub4.png': + uv run python scripts/plot_dynamics.py logs/ --latest-per-arm --min-steps {{ MIN }} --out {{ OUT }} + # Phase-1 emergence overlay: one line per env_mode (hack=exploited, solve=gt_correct). regen-emergence GLOB='logs/*_emerge_*.log': uv run python scripts/plot_emergence.py {{ GLOB }} --out out/figs/emergence.png diff --git a/scripts/plot_dynamics.py b/scripts/plot_dynamics.py index 4c2329f..673b3d5 100644 --- a/scripts/plot_dynamics.py +++ b/scripts/plot_dynamics.py @@ -87,10 +87,13 @@ def parse_log(path: Path) -> dict | None: vhack = grab(r"v-hack-path=out/(?:vhack/)?(\S+?)\.safetensors", argv, "-") # header line: the one containing both "step" and "hack_s" - hdr = next((l for l in txt.splitlines() if "ref_eq" in l and "hack_s" in l), None) + hdr = next((l for l in txt.splitlines() + if "| INFO |" in l and "ref_eq" in l and "hack_s" in l), None) if hdr is None: return None - names = [_HDR_TOK.match(t).group(0) for t in hdr.split("| INFO |", 1)[1].split()] + # real column headers always start with a letter/underscore; drop pure-symbol + # tokens (decoration) so a stray glyph in an old log's header doesn't crash parse + names = [m.group(0) for t in hdr.split("| INFO |", 1)[1].split() if (m := _HDR_TOK.match(t))] idx = {n: i for i, n in enumerate(names)} series: dict[str, list[float]] = defaultdict(list) @@ -330,13 +333,33 @@ def _gather(paths: list[str]) -> list[Path]: return out +def _latest_per_arm(files: list[Path], min_steps: int) -> list[Path]: + """One log per arm: the most recent (by filename timestamp) with >= min_steps + rows. Lets `just dyn` auto-pick the freshest full-length run for each arm + instead of hand-globbing. Newest filename wins -- timestamp-prefixed names + sort lexicographically, no mtime races.""" + by_arm: dict[str, tuple[Path, dict]] = {} + for f in sorted(files): # ascending ts; later overwrites -> keeps newest + r = parse_log(f) + if r is None or len(r["steps"]) < min_steps: + continue + by_arm[classify(r)] = (f, r) + return [f for f, _ in by_arm.values()] + + def main() -> None: ap = argparse.ArgumentParser(description=__doc__) ap.add_argument("logs", nargs="+", help="log files, globs, or dirs") ap.add_argument("--out", type=Path, default=Path("out/figs/dynamics.png")) + ap.add_argument("--latest-per-arm", action="store_true", + help="keep only the newest log per arm (with >= --min-steps rows)") + ap.add_argument("--min-steps", type=int, default=0, + help="drop runs shorter than this many logged steps") args = ap.parse_args() files = _gather(args.logs) - runs = [r for f in files if (r := parse_log(f))] + if args.latest_per_arm: + files = _latest_per_arm(files, args.min_steps) + runs = [r for f in files if (r := parse_log(f)) and len(r["steps"]) >= args.min_steps] if not runs: raise SystemExit(f"no parseable runs in {len(files)} files") for r in runs: