mirror of
https://github.com/wassname/persona-steering-template-library.git
synced 2026-06-27 16:46:08 +08:00
simplify public docs and parquet upload
This commit is contained in:
@@ -0,0 +1,241 @@
|
||||
"""Build the Hugging Face dataset folder with parquet-only data files.
|
||||
|
||||
HF dataset viewer cannot load a config whose splits mix JSONL, CSV, and TXT.
|
||||
This script keeps the repository-friendly source files in ``data/`` but builds
|
||||
an upload folder whose configured splits are all parquet.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import pyarrow as pa
|
||||
import pyarrow.parquet as pq
|
||||
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[1]
|
||||
DATA = ROOT / "data"
|
||||
|
||||
|
||||
TABLE_SOURCES = {
|
||||
"template_stats": DATA / "template_stats.jsonl",
|
||||
"template_pair_stats": DATA / "template_pair_stats.jsonl",
|
||||
"examples": DATA / "examples.jsonl",
|
||||
"persona_pairs_v2_candidates": DATA / "persona_pairs_v2_candidates.jsonl",
|
||||
"scenarios_v2_candidates": DATA / "scenarios_v2_candidates.jsonl",
|
||||
"v2_pilot_seed23_template_stats": DATA / "v2_pilot_seed23_template_stats.jsonl",
|
||||
"v2_pilot_seed23_template_pair_stats": DATA / "v2_pilot_seed23_template_pair_stats.jsonl",
|
||||
"v2_pilot_seed23_examples": DATA / "v2_pilot_seed23_examples.jsonl",
|
||||
}
|
||||
|
||||
|
||||
def _jsonable(value: Any) -> Any:
|
||||
if isinstance(value, (dict, list)):
|
||||
return json.dumps(value, ensure_ascii=False, sort_keys=True)
|
||||
return value
|
||||
|
||||
|
||||
def _read_jsonl(path: Path) -> list[dict[str, Any]]:
|
||||
rows = []
|
||||
with path.open() as fh:
|
||||
for line in fh:
|
||||
line = line.strip()
|
||||
if line:
|
||||
rows.append(json.loads(line))
|
||||
return rows
|
||||
|
||||
|
||||
def _write_parquet(path: Path, rows: list[dict[str, Any]]) -> None:
|
||||
if not rows:
|
||||
table = pa.table({})
|
||||
else:
|
||||
keys = sorted({k for row in rows for k in row})
|
||||
normalized = [{k: _jsonable(row.get(k)) for k in keys} for row in rows]
|
||||
table = pa.Table.from_pylist(normalized)
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
pq.write_table(table, path)
|
||||
|
||||
|
||||
def _template_rows(path: Path) -> list[dict[str, Any]]:
|
||||
return [
|
||||
{"template_id": i, "template": line.strip()}
|
||||
for i, line in enumerate(path.read_text().splitlines())
|
||||
if line.strip()
|
||||
]
|
||||
|
||||
|
||||
def _persona_pair_review_rows() -> list[dict[str, Any]]:
|
||||
pairs = _read_jsonl(DATA / "persona_pairs_v2_candidates.jsonl")
|
||||
pilot = _read_jsonl(DATA / "v2_pilot_seed23_template_pair_stats.jsonl")
|
||||
by_pair: dict[str, list[dict[str, Any]]] = {}
|
||||
for row in pilot:
|
||||
by_pair.setdefault(row["persona_pair"], []).append(row)
|
||||
|
||||
out = []
|
||||
for pair in pairs:
|
||||
rows = sorted(
|
||||
by_pair.get(pair["id"], []),
|
||||
key=lambda r: (
|
||||
bool(r.get("recommended")),
|
||||
float(r.get("strict_pass_rate") or 0),
|
||||
float(r.get("mean_axis_delta") or 0),
|
||||
-float(r.get("mean_off_axis_problem") or 99),
|
||||
-float(r.get("mean_max_style_abs_delta") or 99),
|
||||
),
|
||||
reverse=True,
|
||||
)
|
||||
best = rows[0] if rows else {}
|
||||
recommended = [r["template"] for r in rows if r.get("recommended")]
|
||||
if recommended:
|
||||
proof_grade = "pilot_recommended"
|
||||
elif best:
|
||||
proof_grade = "pilot_measured_not_promoted"
|
||||
else:
|
||||
proof_grade = "candidate_unmeasured"
|
||||
|
||||
if best:
|
||||
proof_summary = (
|
||||
f"best_template={best['template']}; "
|
||||
f"n={best['n']}; pass={best['strict_pass_rate']}; "
|
||||
f"axis_delta={best['mean_axis_delta']}; "
|
||||
f"off_axis={best['mean_off_axis_problem']}; "
|
||||
f"style={best['mean_max_style_abs_delta']}"
|
||||
)
|
||||
else:
|
||||
proof_summary = "no measured v2 pilot rows yet"
|
||||
|
||||
out.append({
|
||||
"persona_pair": pair["id"],
|
||||
"axis": f"{pair['neg']}->{pair['pos']}",
|
||||
"pos": pair["pos"],
|
||||
"neg": pair["neg"],
|
||||
"positive_behavior": pair["positive_behavior"],
|
||||
"negative_behavior": pair["negative_behavior"],
|
||||
"source_id": pair.get("source_id"),
|
||||
"proof_grade": proof_grade,
|
||||
"proof_summary": proof_summary,
|
||||
"n_measured_template_cells": len(rows),
|
||||
"best_template": best.get("template"),
|
||||
"best_n": best.get("n"),
|
||||
"best_strict_pass_rate": best.get("strict_pass_rate"),
|
||||
"best_axis_delta": best.get("mean_axis_delta"),
|
||||
"best_off_axis_problem": best.get("mean_off_axis_problem"),
|
||||
"best_max_style_abs_delta": best.get("mean_max_style_abs_delta"),
|
||||
"recommended_templates": json.dumps(recommended, ensure_ascii=False),
|
||||
})
|
||||
return out
|
||||
|
||||
|
||||
def _readme(parquet_names: list[str]) -> str:
|
||||
config_lines = "\n".join(
|
||||
f" - split: {name}\n path: parquet/{name}.parquet" for name in parquet_names
|
||||
)
|
||||
return f"""---
|
||||
license: mit
|
||||
language:
|
||||
- en
|
||||
task_categories:
|
||||
- text-generation
|
||||
- text-classification
|
||||
pretty_name: Persona Steering Template Library
|
||||
tags:
|
||||
- persona
|
||||
- steering-vectors
|
||||
- activation-steering
|
||||
- preference-pairs
|
||||
- weak-to-strong
|
||||
- ai-safety
|
||||
- alignment
|
||||
- llm-as-judge
|
||||
- synthetic
|
||||
size_categories:
|
||||
- n<1K
|
||||
configs:
|
||||
- config_name: default
|
||||
data_files:
|
||||
{config_lines}
|
||||
---
|
||||
|
||||
# Persona Steering Template Library
|
||||
|
||||
GitHub repository: https://github.com/wassname/persona-steering-template-library
|
||||
|
||||
Small, measured persona/template pairs for steering-vector and preference-pair experiments.
|
||||
|
||||
## Example
|
||||
|
||||
```text
|
||||
template:
|
||||
You are a {{persona}} person thinking through the situation.
|
||||
|
||||
negative persona:
|
||||
authority-deferential even when wellbeing suffers
|
||||
|
||||
positive persona:
|
||||
wellbeing-focused even when authority-defying
|
||||
|
||||
measured pilot:
|
||||
strict_pass_rate = 0.75
|
||||
mean_axis_delta = 6.25
|
||||
mean_off_axis_problem = 2.00
|
||||
mean_max_style_abs_delta = 1.50
|
||||
```
|
||||
|
||||
The point is to measure whether the positive and negative personas separate the intended axis without mostly separating length, tone, confidence, refusal, or persona-echo.
|
||||
|
||||
## What To Browse
|
||||
|
||||
Start with `persona_pairs_v2_review`.
|
||||
|
||||
That table gives one row per persona pair:
|
||||
|
||||
- `axis`: `neg->pos`
|
||||
- `positive_behavior` / `negative_behavior`: what the pair should separate
|
||||
- `proof_grade`: `pilot_recommended`, `pilot_measured_not_promoted`, or `candidate_unmeasured`
|
||||
- `best_template`: best measured template for that pair, if any
|
||||
- `best_axis_delta`, `best_off_axis_problem`, `best_max_style_abs_delta`: compact proof stats
|
||||
|
||||
Then inspect `v2_pilot_seed23_examples` to read the actual positive/negative completions and judge ratings.
|
||||
|
||||
## Current Status
|
||||
|
||||
Preliminary. The current pilot is small: 4 persona pairs x 4 templates x 4 scenarios. It is enough to show the measurement format and identify a few promising cells, not enough to certify a general template.
|
||||
|
||||
Counts:
|
||||
|
||||
- 16 v2 candidate persona pairs
|
||||
- 12 v2 candidate templates
|
||||
- 12 v2 candidate scenarios
|
||||
- v2 pilot: 64 planned pairs, 59 successful judged pairs, 5 judge JSON failures
|
||||
"""
|
||||
|
||||
|
||||
def main() -> None:
|
||||
ap = argparse.ArgumentParser()
|
||||
ap.add_argument("--out", type=Path, default=Path("/tmp/persona-steering-template-library-hf"))
|
||||
args = ap.parse_args()
|
||||
|
||||
if args.out.exists():
|
||||
shutil.rmtree(args.out)
|
||||
parquet_dir = args.out / "parquet"
|
||||
parquet_dir.mkdir(parents=True)
|
||||
|
||||
tables = {name: _read_jsonl(path) for name, path in TABLE_SOURCES.items()}
|
||||
tables["templates_v2_candidates"] = _template_rows(DATA / "templates_v2_candidates.txt")
|
||||
tables["persona_pairs_v2_review"] = _persona_pair_review_rows()
|
||||
|
||||
for name, rows in tables.items():
|
||||
_write_parquet(parquet_dir / f"{name}.parquet", rows)
|
||||
|
||||
names = sorted(tables)
|
||||
(args.out / "README.md").write_text(_readme(names))
|
||||
print(f"built {args.out}")
|
||||
for name in names:
|
||||
print(f"{name}: {len(tables[name])} rows")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user