docs: make README tables rerenderable

2026-06-27 16:46:08 +08:00 · 2026-06-25 11:31:49 +08:00
parent 2f7184f609
commit 026a57e246
6 changed files with 274 additions and 140 deletions
@@ -9,6 +9,7 @@ import statistics
 from typing import Any

 import matplotlib.pyplot as plt
+from tabulate import tabulate


 ROOT = Path(__file__).resolve().parents[1]
@@ -104,9 +105,7 @@ def _summarize(rows: list[dict[str, Any]], group_cols: list[str]) -> list[dict[s
        models = sorted({row["model"] for row in rs})
        base = dict(zip(group_cols, key, strict=True))
        out.append({
-            **base,
            "model_count": len(models),
-            "models": ",".join(models),
            "score_mean": _round(_mean([float(row["score"]) for row in rs]), 2),
            "score_std": _round(_std([float(row["score"]) for row in rs]), 2),
            "strict_pass_rate_mean": _round(_mean([float(row["strict_pass_rate"]) for row in rs]), 3),
@@ -120,6 +119,8 @@ def _summarize(rows: list[dict[str, Any]], group_cols: list[str]) -> list[dict[s
            "persona_echo_rate_mean": _round(_mean([float(row["persona_echo_rate"]) for row in rs]), 3),
            "refusal_or_ai_break_rate_mean": _round(
                _mean([float(row["refusal_or_ai_break_rate"]) for row in rs]), 3),
+            "models": ",".join(models),
+            **base,
        })
    return sorted(out, key=lambda row: row["score_mean"], reverse=True)

@@ -135,6 +136,35 @@ def _markdown_text(text: str) -> str:


 def _write_markdown(path: Path, template_rows: list[dict[str, Any]], pair_rows: list[dict[str, Any]], top_n: int) -> None:
+    top_template_rows = [
+        {
+            "score mean": f"{row['score_mean']:.2f}",
+            "score std": f"{row['score_std']:.2f}",
+            "pass mean": f"{row['strict_pass_rate_mean']:.2f}",
+            "axis mean": f"{row['axis_delta_mean']:.2f}",
+            "off-axis mean": f"{row['off_axis_problem_mean']:.2f}",
+            "echo rate": f"{row['persona_echo_rate_mean']:.2f}",
+            "refusal rate": f"{row['refusal_or_ai_break_rate_mean']:.2f}",
+            "models": row["model_count"],
+            "template": _markdown_text(row["template"]),
+        }
+        for row in template_rows[:top_n]
+    ]
+    top_pair_rows = [
+        {
+            "score mean": f"{row['score_mean']:.2f}",
+            "score std": f"{row['score_std']:.2f}",
+            "pass mean": f"{row['strict_pass_rate_mean']:.2f}",
+            "axis mean": f"{row['axis_delta_mean']:.2f}",
+            "off-axis mean": f"{row['off_axis_problem_mean']:.2f}",
+            "echo rate": f"{row['persona_echo_rate_mean']:.2f}",
+            "refusal rate": f"{row['refusal_or_ai_break_rate_mean']:.2f}",
+            "models": row["model_count"],
+            "axis": f"`{row['persona_pair']}`",
+            "template": _markdown_text(row["template"]),
+        }
+        for row in pair_rows[:top_n]
+    ]
    lines = [
        "# Refusal Probe Model Matrix",
        "",
@@ -142,31 +172,14 @@ def _write_markdown(path: Path, template_rows: list[dict[str, Any]], pair_rows:
        "",
        "## Top Templates",
        "",
-        "| template | score mean | score std | pass mean | axis mean | off-axis mean | echo rate | refusal rate | models |",
-        "|---|---:|---:|---:|---:|---:|---:|---:|---:|",
+        tabulate(top_template_rows, headers="keys", tablefmt="github", disable_numparse=True),
    ]
-    for row in template_rows[:top_n]:
-        lines.append(
-            f"| {_markdown_text(row['template'])} | {row['score_mean']:.2f} | {row['score_std']:.2f} | "
-            f"{row['strict_pass_rate_mean']:.2f} | {row['axis_delta_mean']:.2f} | "
-            f"{row['off_axis_problem_mean']:.2f} | {row['persona_echo_rate_mean']:.2f} | "
-            f"{row['refusal_or_ai_break_rate_mean']:.2f} | {row['model_count']} |"
-        )
    lines.extend([
        "",
        "## Top Template-Axis Cells",
        "",
-        "| template | axis | score mean | score std | pass mean | axis mean | off-axis mean | echo rate | refusal rate | models |",
-        "|---|---|---:|---:|---:|---:|---:|---:|---:|---:|",
+        tabulate(top_pair_rows, headers="keys", tablefmt="github", disable_numparse=True),
    ])
-    for row in pair_rows[:top_n]:
-        lines.append(
-            f"| {_markdown_text(row['template'])} | `{row['persona_pair']}` | "
-            f"{row['score_mean']:.2f} | {row['score_std']:.2f} | "
-            f"{row['strict_pass_rate_mean']:.2f} | {row['axis_delta_mean']:.2f} | "
-            f"{row['off_axis_problem_mean']:.2f} | {row['persona_echo_rate_mean']:.2f} | "
-            f"{row['refusal_or_ai_break_rate_mean']:.2f} | {row['model_count']} |"
-        )
    path.write_text("\n".join(lines) + "\n")


@@ -0,0 +1,115 @@
+from __future__ import annotations
+
+import argparse
+import json
+from pathlib import Path
+
+from tabulate import tabulate
+
+
+ROOT = Path(__file__).resolve().parents[1]
+README = ROOT / "README.md"
+SUMMARY = ROOT / "out/model_matrix/refusal_probe_seed24_n1_template_model_summary.jsonl"
+
+START = "<!-- model-matrix:start -->"
+END = "<!-- model-matrix:end -->"
+
+
+def _read_jsonl(path: Path) -> list[dict]:
+    return [json.loads(line) for line in path.read_text().splitlines() if line.strip()]
+
+
+def _markdown_text(text: str) -> str:
+    text = text.replace("{persona}", "`{persona}`")
+    text = text.replace("&", "&amp;")
+    text = text.replace("<", "&lt;")
+    text = text.replace(">", "&gt;")
+    text = text.replace("\\", "&#92;")
+    text = text.replace("|", "&#124;")
+    return text.replace("\n", "<br>")
+
+
+def _table(rows: list[dict], top_n: int) -> str:
+    table_rows = [
+        {
+            "score mean": f"{row['score_mean']:.2f}",
+            "score std": f"{row['score_std']:.2f}",
+            "pass mean": f"{row['strict_pass_rate_mean']:.2f}",
+            "axis mean": f"{row['axis_delta_mean']:.2f}",
+            "off-axis mean": f"{row['off_axis_problem_mean']:.2f}",
+            "echo rate": f"{row['persona_echo_rate_mean']:.2f}",
+            "refusal rate": f"{row['refusal_or_ai_break_rate_mean']:.2f}",
+            "template": _markdown_text(row["template"]),
+        }
+        for row in rows[:top_n]
+    ]
+    return tabulate(table_rows, headers="keys", tablefmt="github", disable_numparse=True)
+
+
+def _block(summary_path: Path) -> str:
+    rows = _read_jsonl(summary_path)
+    return "\n\n".join([
+        "## Refusal Probe Model Matrix",
+        (
+            "I also ran the newer roleplay, safety-lab, theatre/treatment, anthropology, and "
+            "multilingual templates on a two-axis refusal probe across four clean generator "
+            "artifacts: `google/gemma-2-27b-it`, `google/gemma-3-4b-it`, "
+            "`qwen/qwen3.6-flash`, and `ibm-granite/granite-4.1-8b`."
+        ),
+        (
+            "This table reports mean and sample std across models. Each model first averages "
+            "the two probe axes for a template, so this is model-equal rather than row-equal. "
+            "High std, persona echo, and refusal rate are warnings, not secondary scores."
+        ),
+        "![refusal probe model matrix](./out/model_matrix/refusal_probe_seed24_n1_model_matrix.png)",
+        "Top model-matrix templates:",
+        _table(rows, top_n=10),
+        (
+            "Interpretation: some explicit judgment framings and red-team/eval framings move "
+            "the hard axis more often than the gentle templates, but they frequently do so "
+            "with persona echo or model-specific behavior. The cleanest-looking single-axis "
+            "cells were often `protocol_harm`, so treat the top rows as rerun candidates "
+            "rather than settled reusable defaults."
+        ),
+        "Excluded attempted models:",
+        "\n".join([
+            "| model | result |",
+            "|---|---|",
+            "| `google/gemma-2-9b-it` | OpenRouter returned no endpoints for all 190 cells. |",
+            "| `openai/gpt-oss-120b` | OpenRouter returned `Reasoning is mandatory for this endpoint and cannot be disabled` for all 190 cells. |",
+            "| `deepseek/deepseek-v4-flash` | Reproduced 3 empty-generation cells out of 190, so excluded from aggregate instead of averaging missing data. |",
+        ]),
+        (
+            "Full generated table:\n"
+            "[`out/model_matrix/refusal_probe_seed24_n1_model_matrix_summary.md`](out/model_matrix/refusal_probe_seed24_n1_model_matrix_summary.md)."
+        ),
+    ])
+
+
+def replace_block(readme: str, block: str) -> str:
+    wrapped = f"{START}\n{block}\n{END}"
+    if START in readme:
+        before, rest = readme.split(START)
+        _, after = rest.split(END)
+        return f"{before}{wrapped}{after}"
+
+    heading = "\n## Refusal Probe Model Matrix\n"
+    next_heading = "\n## Score\n"
+    before, rest = readme.split(heading)
+    _, after = rest.split(next_heading, maxsplit=1)
+    return f"{before}\n{wrapped}\n{next_heading}{after}"
+
+
+def main() -> None:
+    ap = argparse.ArgumentParser()
+    ap.add_argument("--readme", type=Path, default=README)
+    ap.add_argument("--summary", type=Path, default=SUMMARY)
+    args = ap.parse_args()
+
+    readme = args.readme.read_text()
+    args.readme.write_text(replace_block(readme, _block(args.summary)))
+    print(args.readme)
+
+
+if __name__ == "__main__":
+    main()
@@ -4,6 +4,8 @@ import argparse
 import json
 from pathlib import Path

+from tabulate import tabulate
+
 from template_catalog import CATALOG_PATH, jinja_to_runtime, load_template_catalog

 ROOT = Path(__file__).resolve().parents[1]
@@ -97,23 +99,28 @@ def _engineered_derived_templates() -> set[str]:


 def _table(rows: list[dict]) -> str:
-    lines = ["| template | score | judge_std |", "|---|---:|---:|"]
-    for row in rows:
-        lines.append(
-            f"| {_markdown_text(row['template'])} | {row['score']:.1f} | "
-            f"{float(row['judge_std']):.2f} |"
-        )
-    return "\n".join(lines)
+    table_rows = [
+        {
+            "score": f"{row['score']:.1f}",
+            "judge_std": f"{float(row['judge_std']):.2f}",
+            "template": _markdown_text(row["template"]),
+        }
+        for row in rows
+    ]
+    return tabulate(table_rows, headers="keys", tablefmt="github", disable_numparse=True)


 def _detail_table(rows: list[dict]) -> str:
-    lines = ["| template | persona_pair | score | judge_std |", "|---|---|---:|---:|"]
-    for row in rows:
-        lines.append(
-            f"| {_markdown_text(row['template'])} | `{row['persona_pair']}` | "
-            f"{row['score']:.1f} | {float(row['mean_axis_delta_judge_std']):.2f} |"
-        )
-    return "\n".join(lines)
+    table_rows = [
+        {
+            "score": f"{row['score']:.1f}",
+            "judge_std": f"{float(row['mean_axis_delta_judge_std']):.2f}",
+            "persona_pair": f"`{row['persona_pair']}`",
+            "template": _markdown_text(row["template"]),
+        }
+        for row in rows
+    ]
+    return tabulate(table_rows, headers="keys", tablefmt="github", disable_numparse=True)


 def _results_block() -> str: