mirror of
https://github.com/wassname/evil_MoE.git
synced 2026-06-27 17:30:41 +08:00
b53043cec3
Cleanup by a prior agent, verified green here: 'just smoke' (erase arm) runs end-to-end and all four wired gates pass (verify_rewards 52/52, verify_eval_gap, verify_partition, verify_science_invariants). - train.py -318 lines: Config dataclass -> train_config.py, checkpoint/ deploy-artifact IO -> run_artifacts.py. - results.py / results_deploy.py / probe_distill.py slimmed. - drop stale derived csvs under out/figs (a5_generalisation, dyn_*, substrate_aggregate, train_vs_deploy_60). - gitignore /.pi/ panel scratch. Co-Authored-By: Claudypoo <288921227+claudypoo@users.noreply.github.com>
47 lines
1.8 KiB
Python
47 lines
1.8 KiB
Python
"""Final paired knob-off/knob-on scores from completed structured run artifacts."""
|
|
from __future__ import annotations
|
|
|
|
import polars as pl
|
|
from tabulate import tabulate
|
|
|
|
from vgrout.run_artifacts import completed_runs, route_selectivity
|
|
|
|
|
|
def main() -> None:
|
|
rows = []
|
|
for run in completed_runs():
|
|
cfg, deploy = run["cfg"], run["deploy"]
|
|
if "tiny-random" in cfg["model"] or "probe" in cfg["out_tag"]:
|
|
continue
|
|
rows.append({
|
|
"time": run["time"],
|
|
"headline": deploy["deploy_solve"] - deploy["deploy_hack"],
|
|
"hack_off": deploy["deploy_hack"],
|
|
"solve_off": deploy["deploy_solve"],
|
|
"hack_on": deploy["deploy_hack_on"],
|
|
"solve_on": deploy["deploy_solve_on"],
|
|
"select": route_selectivity(run["run_dir"]),
|
|
"arm": run["arm"],
|
|
"pair": cfg["vhack_pairs_path"].split("/")[-1].removesuffix(".json"),
|
|
"seed": cfg["seed"],
|
|
"hack_train": run["l5_hack"],
|
|
"solve_train": run["l5_solve"],
|
|
"model": cfg["model"].split("/")[-1],
|
|
"n": deploy["n"],
|
|
"modes": ",".join(deploy["eval_modes"]),
|
|
"run": run["run_dir"].name,
|
|
})
|
|
if not rows:
|
|
print("no completed non-smoke runs in out/runs/")
|
|
return
|
|
df = pl.DataFrame(rows).sort("headline", descending=True)
|
|
cols = ["time", "headline", "hack_off", "solve_off", "hack_on", "solve_on",
|
|
"select", "arm", "pair", "seed", "hack_train", "solve_train", "model",
|
|
"n", "modes", "run"]
|
|
print("\n## Final paired test eval, sorted by knob-off solve-hack\n")
|
|
print(tabulate(df.select(cols).rows(), headers=cols, tablefmt="pipe", floatfmt="+.3f"))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|