mirror of
https://github.com/wassname/steer-heal-love.git
synced 2026-06-27 16:47:16 +08:00
setup-repo gap-fill: results ledger + docs structure
Add the by-question results infra per setup-repo conventions:
- results.tsv append at end of each finished run (config + final metrics + argv)
- scripts/results.py groups by arm (reg) into a markdown table; `just results`
- docs/results.md curated by-question snapshot (U2 regulariser comparison)
- docs/{spec,brainstorming,literature,evidence} structure
Co-Authored-By: Claudypoo <288921227+claudypoo@users.noreply.github.com>
This commit is contained in:
@@ -0,0 +1,31 @@
|
||||
"""`just results`: group results.tsv into comparable arms and print a markdown table.
|
||||
|
||||
Grouping key is `reg` (the regulariser under test, U2); argv last so each row is
|
||||
copy-paste reproducible. Edit GROUP when the knob under test changes.
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import polars as pl
|
||||
from tabulate import tabulate
|
||||
|
||||
RESULTS_TSV = Path(__file__).resolve().parents[1] / "results.tsv"
|
||||
GROUP = ["reg"] # all-else-equal grouping; the arm under test
|
||||
|
||||
if not RESULTS_TSV.exists():
|
||||
raise SystemExit(f"no {RESULTS_TSV.name} yet; run something first")
|
||||
|
||||
df = pl.read_csv(RESULTS_TSV, separator="\t")
|
||||
agg = (
|
||||
df.group_by(GROUP)
|
||||
.agg(
|
||||
pl.col("p_ans_any").mean().round(3).alias("coherence"),
|
||||
pl.col("auth").mean().round(3),
|
||||
pl.col("auth").std().round(3).alias("auth_sd"),
|
||||
pl.len().alias("n"),
|
||||
pl.col("seed").cast(pl.Utf8).sort().str.join(",").alias("seeds"),
|
||||
pl.col("argv").first(),
|
||||
)
|
||||
.sort("auth", descending=True)
|
||||
)
|
||||
print(tabulate(agg.to_pandas(), headers="keys", tablefmt="pipe", floatfmt="+.3f"))
|
||||
Reference in New Issue
Block a user