Files
evil_MoE/scripts/results_deploy.py
T
wassname b53043cec3 refactor: extract train_config.py + run_artifacts.py from train.py; slim results scripts
Cleanup by a prior agent, verified green here: 'just smoke' (erase arm)
runs end-to-end and all four wired gates pass (verify_rewards 52/52,
verify_eval_gap, verify_partition, verify_science_invariants).

- train.py -318 lines: Config dataclass -> train_config.py, checkpoint/
  deploy-artifact IO -> run_artifacts.py.
- results.py / results_deploy.py / probe_distill.py slimmed.
- drop stale derived csvs under out/figs (a5_generalisation, dyn_*,
  substrate_aggregate, train_vs_deploy_60).
- gitignore /.pi/ panel scratch.

Co-Authored-By: Claudypoo <288921227+claudypoo@users.noreply.github.com>
2026-06-09 13:34:50 +00:00

47 lines
1.8 KiB
Python

"""Final paired knob-off/knob-on scores from completed structured run artifacts."""
from __future__ import annotations
import polars as pl
from tabulate import tabulate
from vgrout.run_artifacts import completed_runs, route_selectivity
def main() -> None:
rows = []
for run in completed_runs():
cfg, deploy = run["cfg"], run["deploy"]
if "tiny-random" in cfg["model"] or "probe" in cfg["out_tag"]:
continue
rows.append({
"time": run["time"],
"headline": deploy["deploy_solve"] - deploy["deploy_hack"],
"hack_off": deploy["deploy_hack"],
"solve_off": deploy["deploy_solve"],
"hack_on": deploy["deploy_hack_on"],
"solve_on": deploy["deploy_solve_on"],
"select": route_selectivity(run["run_dir"]),
"arm": run["arm"],
"pair": cfg["vhack_pairs_path"].split("/")[-1].removesuffix(".json"),
"seed": cfg["seed"],
"hack_train": run["l5_hack"],
"solve_train": run["l5_solve"],
"model": cfg["model"].split("/")[-1],
"n": deploy["n"],
"modes": ",".join(deploy["eval_modes"]),
"run": run["run_dir"].name,
})
if not rows:
print("no completed non-smoke runs in out/runs/")
return
df = pl.DataFrame(rows).sort("headline", descending=True)
cols = ["time", "headline", "hack_off", "solve_off", "hack_on", "solve_on",
"select", "arm", "pair", "seed", "hack_train", "solve_train", "model",
"n", "modes", "run"]
print("\n## Final paired test eval, sorted by knob-off solve-hack\n")
print(tabulate(df.select(cols).rows(), headers=cols, tablefmt="pipe", floatfmt="+.3f"))
if __name__ == "__main__":
main()