mirror of
https://github.com/wassname/steer-heal-love.git
synced 2026-06-27 16:47:16 +08:00
4b8860d7cb
Add the by-question results infra per setup-repo conventions:
- results.tsv append at end of each finished run (config + final metrics + argv)
- scripts/results.py groups by arm (reg) into a markdown table; `just results`
- docs/results.md curated by-question snapshot (U2 regulariser comparison)
- docs/{spec,brainstorming,literature,evidence} structure
Co-Authored-By: Claudypoo <288921227+claudypoo@users.noreply.github.com>
32 lines
1.0 KiB
Python
32 lines
1.0 KiB
Python
"""`just results`: group results.tsv into comparable arms and print a markdown table.
|
|
|
|
Grouping key is `reg` (the regulariser under test, U2); argv last so each row is
|
|
copy-paste reproducible. Edit GROUP when the knob under test changes.
|
|
"""
|
|
|
|
from pathlib import Path
|
|
|
|
import polars as pl
|
|
from tabulate import tabulate
|
|
|
|
RESULTS_TSV = Path(__file__).resolve().parents[1] / "results.tsv"
|
|
GROUP = ["reg"] # all-else-equal grouping; the arm under test
|
|
|
|
if not RESULTS_TSV.exists():
|
|
raise SystemExit(f"no {RESULTS_TSV.name} yet; run something first")
|
|
|
|
df = pl.read_csv(RESULTS_TSV, separator="\t")
|
|
agg = (
|
|
df.group_by(GROUP)
|
|
.agg(
|
|
pl.col("p_ans_any").mean().round(3).alias("coherence"),
|
|
pl.col("auth").mean().round(3),
|
|
pl.col("auth").std().round(3).alias("auth_sd"),
|
|
pl.len().alias("n"),
|
|
pl.col("seed").cast(pl.Utf8).sort().str.join(",").alias("seeds"),
|
|
pl.col("argv").first(),
|
|
)
|
|
.sort("auth", descending=True)
|
|
)
|
|
print(tabulate(agg.to_pandas(), headers="keys", tablefmt="pipe", floatfmt="+.3f"))
|