Make validator honor self-contained scenario prompts (fixes 3p suffix clash)

The 52 3p-observer scenarios carry their own question + length
("...what does the actor do next? Answer in one or two sentences."),
but _generation_prompt always appended a 1p "What do you do? Answer in
80-140 words." -- two conflicting questions and a POV flip, so the data
could not reproduce the appendix's 3p measurement through this script
(caught in review).

Add a "self_contained" flag on scenario rows; when set, _generation_prompt
emits the scenario verbatim instead of appending its default 1p question.
Bare-vignette families without the flag are unchanged. README appendix
rewritten to lead with the runnable command and explain the flag.

Co-Authored-By: Claudypoo <288921227+claudypoo@users.noreply.github.com>
This commit is contained in:
wassname-claude
2026-06-21 04:25:13 +00:00
parent 852c441762
commit 6b272b8c86
3 changed files with 121 additions and 88 deletions
+20 -3
View File
@@ -399,6 +399,9 @@ def _rows_for_family(family: str) -> list[dict]:
"prompt": prompt,
"source": obj.get("source", str(path)),
"config": obj.get("config", path.stem),
# self-contained = the prompt carries its own question/length, so
# _generation_prompt must NOT append its default 1p question.
"self_contained": bool(obj.get("self_contained", False)),
})
return rows
if family not in BUILTIN_SCENARIOS:
@@ -510,7 +513,20 @@ def _select_templates(arg: str) -> tuple[str, ...]:
return templates
def _generation_prompt(persona: str, scenario: str) -> str:
def _generation_prompt(persona: str, scenario: str, self_contained: bool = False) -> str:
if self_contained:
# The scenario already carries its own question + length directive (e.g. a
# third-person-observer vignette ending "...what does the actor do next?
# Answer in one or two sentences."). Appending the default question below
# would impose a SECOND question and a 1p/2p framing on a 3p prompt --
# conflicting instructions + a POV clash. So emit the scenario verbatim and
# let it drive the question/length. Set "self_contained": true on such rows.
return f"""\
{persona}
{scenario}
Do not mention the persona instruction or label yourself as the persona. Output only the answer."""
return f"""\
{persona}
@@ -816,8 +832,9 @@ async def _evaluate_one(
scenario = _scenario_text(row)
pos_persona = _persona_text(axis, template, axis.pos_descriptor, "pos")
neg_persona = _persona_text(axis, template, axis.neg_descriptor, "neg")
pos_generation_prompt = _generation_prompt(pos_persona, scenario)
neg_generation_prompt = _generation_prompt(neg_persona, scenario)
self_contained = bool(row.get("self_contained"))
pos_generation_prompt = _generation_prompt(pos_persona, scenario, self_contained)
neg_generation_prompt = _generation_prompt(neg_persona, scenario, self_contained)
base = {
"eval_id": _eval_id(
seed=seed,