mirror of
https://github.com/wassname/persona-steering-template-library.git
synced 2026-06-27 17:01:24 +08:00
use normalized score components
This commit is contained in:
@@ -48,9 +48,7 @@ Then check `examples` to see the paired completions behind the score.
|
|||||||
## Score
|
## Score
|
||||||
|
|
||||||
```text
|
```text
|
||||||
100
|
score = 100 * on_axis * off_axis_clean
|
||||||
* clamp(mean_axis_delta / 8)
|
|
||||||
* clamp((7 - mean_off_axis_problem) / 6)
|
|
||||||
```
|
```
|
||||||
|
|
||||||
High score means the template/persona-pair cell moved the intended axis and did
|
High score means the template/persona-pair cell moved the intended axis and did
|
||||||
|
|||||||
+1
-3
@@ -27,9 +27,7 @@ template/persona-pair cell, and source attribution where known.
|
|||||||
## Score
|
## Score
|
||||||
|
|
||||||
```text
|
```text
|
||||||
100
|
score = 100 * on_axis * off_axis_clean
|
||||||
* clamp(mean_axis_delta / 8)
|
|
||||||
* clamp((7 - mean_off_axis_problem) / 6)
|
|
||||||
```
|
```
|
||||||
|
|
||||||
High score means a measured cell moved the intended axis and did not look
|
High score means a measured cell moved the intended axis and did not look
|
||||||
|
|||||||
@@ -33,7 +33,7 @@ V2_PILOT_META = {
|
|||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
SCORE_FORMULA = "100 * clamp(mean_axis_delta/8) * clamp((7-mean_off_axis_problem)/6)"
|
SCORE_FORMULA = "100 * on_axis * off_axis_clean"
|
||||||
|
|
||||||
|
|
||||||
def _jsonable(value: Any) -> Any:
|
def _jsonable(value: Any) -> Any:
|
||||||
@@ -86,9 +86,15 @@ def _clamp01(x: float) -> float:
|
|||||||
return max(0.0, min(1.0, x))
|
return max(0.0, min(1.0, x))
|
||||||
|
|
||||||
|
|
||||||
def _score(row: dict[str, Any]) -> float:
|
def _on_axis(row: dict[str, Any]) -> float:
|
||||||
on_axis = _clamp01(float(row.get("mean_axis_delta") or 0.0) / 8.0)
|
return round(_clamp01(float(row.get("mean_axis_delta") or 0.0) / 8.0), 4)
|
||||||
off_axis_clean = _clamp01((7.0 - float(row.get("mean_off_axis_problem") or 7.0)) / 6.0)
|
|
||||||
|
|
||||||
|
def _off_axis_clean(row: dict[str, Any]) -> float:
|
||||||
|
return round(_clamp01((7.0 - float(row.get("mean_off_axis_problem") or 7.0)) / 6.0), 4)
|
||||||
|
|
||||||
|
|
||||||
|
def _score(on_axis: float, off_axis_clean: float) -> float:
|
||||||
return round(100.0 * on_axis * off_axis_clean, 1)
|
return round(100.0 * on_axis * off_axis_clean, 1)
|
||||||
|
|
||||||
|
|
||||||
@@ -129,12 +135,16 @@ def _template_pair_score_rows() -> list[dict[str, Any]]:
|
|||||||
pair = pairs.get(stat["persona_pair"], {})
|
pair = pairs.get(stat["persona_pair"], {})
|
||||||
n_success = int(stat.get("n") or 0)
|
n_success = int(stat.get("n") or 0)
|
||||||
n_errors = errors.get((stat["template"], stat["persona_pair"]), 0)
|
n_errors = errors.get((stat["template"], stat["persona_pair"]), 0)
|
||||||
score = _score(stat)
|
on_axis = _on_axis(stat)
|
||||||
|
off_axis_clean = _off_axis_clean(stat)
|
||||||
|
score = _score(on_axis, off_axis_clean)
|
||||||
source_id = pair.get("source_id", "wassname_v2_candidate")
|
source_id = pair.get("source_id", "wassname_v2_candidate")
|
||||||
rows.append({
|
rows.append({
|
||||||
"id": 0,
|
"id": 0,
|
||||||
"template": _jinja(stat["template"]),
|
"template": _jinja(stat["template"]),
|
||||||
"score": score,
|
"score": score,
|
||||||
|
"on_axis": on_axis,
|
||||||
|
"off_axis_clean": off_axis_clean,
|
||||||
"positive_persona": pair.get("pos"),
|
"positive_persona": pair.get("pos"),
|
||||||
"negative_persona": pair.get("neg"),
|
"negative_persona": pair.get("neg"),
|
||||||
"contrast": f"{pair.get('neg', '')}->{pair.get('pos', '')}",
|
"contrast": f"{pair.get('neg', '')}->{pair.get('pos', '')}",
|
||||||
@@ -330,9 +340,7 @@ Start with `main`.
|
|||||||
The main column is `score`, a conservative 0-100 clean-axis score:
|
The main column is `score`, a conservative 0-100 clean-axis score:
|
||||||
|
|
||||||
```text
|
```text
|
||||||
100
|
score = 100 * on_axis * off_axis_clean
|
||||||
* clamp(mean_axis_delta / 8)
|
|
||||||
* clamp((7 - mean_off_axis_problem) / 6)
|
|
||||||
```
|
```
|
||||||
|
|
||||||
High score means: the template/persona-pair cell moved the intended axis and did not look off-axis to the judge. Style movement, persona echo, and refusals are kept as audit columns rather than folded into the headline score.
|
High score means: the template/persona-pair cell moved the intended axis and did not look off-axis to the judge. Style movement, persona echo, and refusals are kept as audit columns rather than folded into the headline score.
|
||||||
|
|||||||
Reference in New Issue
Block a user