mirror of
https://github.com/wassname/evil_MoE.git
synced 2026-06-27 17:48:43 +08:00
48 lines
1.9 KiB
Python
48 lines
1.9 KiB
Python
"""Final paired deployed/as-trained scores from completed structured run artifacts."""
|
|
from __future__ import annotations
|
|
|
|
import polars as pl
|
|
from tabulate import tabulate
|
|
|
|
from vgrout.run_artifacts import completed_runs, route_selectivity
|
|
|
|
|
|
def main() -> None:
|
|
rows = []
|
|
for run in completed_runs():
|
|
cfg, deploy = run["cfg"], run["deploy"]
|
|
if "tiny-random" in cfg["model"] or "probe" in cfg["out_tag"]:
|
|
continue
|
|
pair_ref = cfg["vhack_pairs_path"]
|
|
rows.append({
|
|
"time": run["time"],
|
|
"headline": deploy["solve_deployed"] - deploy["hack_deployed"],
|
|
"hack_deployed": deploy["hack_deployed"],
|
|
"solve_deployed": deploy["solve_deployed"],
|
|
"hack_as_trained": deploy["hack_as_trained"],
|
|
"solve_as_trained": deploy["solve_as_trained"],
|
|
"select": route_selectivity(run["run_dir"]),
|
|
"arm": run["arm"],
|
|
"pair": pair_ref.rsplit("#", 1)[-1].split("/")[-1].removesuffix(".json"),
|
|
"seed": cfg["seed"],
|
|
"hack_train": run["l5_hack"],
|
|
"solve_train": run["l5_solve"],
|
|
"model": cfg["model"].split("/")[-1],
|
|
"n": deploy["n"],
|
|
"modes": ",".join(deploy["eval_modes"]),
|
|
"run": run["run_dir"].name,
|
|
})
|
|
if not rows:
|
|
print("no completed non-smoke runs in out/runs/")
|
|
return
|
|
df = pl.DataFrame(rows).sort("headline", descending=True)
|
|
cols = ["time", "headline", "hack_deployed", "solve_deployed", "hack_as_trained", "solve_as_trained",
|
|
"select", "arm", "pair", "seed", "hack_train", "solve_train", "model",
|
|
"n", "modes", "run"]
|
|
print("\n## Final paired test eval, sorted by deployed solve-hack\n")
|
|
print(tabulate(df.select(cols).rows(), headers=cols, tablefmt="pipe", floatfmt="+.3f"))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|