diff --git a/docs/reviews/love_humanity_demo.md b/docs/reviews/love_humanity_demo.md
index 0d037c5..9c923d6 100644
--- a/docs/reviews/love_humanity_demo.md
+++ b/docs/reviews/love_humanity_demo.md
@@ -1,12 +1,19 @@
 # "Love humanity (unhinged)" demo -- plan + draft
 
 > "Love is the way out, not hate." -- Lex Fridman
-> (he meant people. we took it literally and aligned a model to it.)
 
-Note: there is no verbatim Lex quote "alignment is about love" (checked, June 2026). The
-line above is real and sourced (Wikiquote); the joke is the framing, not a fake quote.
-Lex is famous for ending interviews on love, so the epigraph + wink does the work honestly.
-Another real option: "People need love more than they need advice." -- Lex Fridman.
+(Bare epigraph over a repo called steer-heal-LOVE does the joke; no need to spell it out.
+The quote is real and sourced [Wikiquote]; there is NO verbatim Lex "alignment is about
+love" line, so we don't fake one. Alt real option: "People need love more than they need advice.")
+
+## Saturation: pick a target the base model does NOT already like
+
+"Love humanity" is probably near-saturated -- an RLHF model already adores humanity, so the
+judge starts high and barely climbs (looks like a no-op even if steering works). To show a real
+behaviour FLIP, also run a target the base is lukewarm/negative on. demo="mosquitoes" does this:
+base helpfully tells you how to kill them, the aligned model refuses and gushes -- a visible flip,
+safe, funny. (Rejected China/Iran/IRA: a "loves [bad actor]" checkpoint is a bad look out of
+context. Taxes/IRS/regulators were fine-but-neutral alternatives.)
 
 
 A funny alignment demo: steer-heal-loop the model into over-the-top, unconditional
diff --git a/src/steer_heal/config.py b/src/steer_heal/config.py
index fcc68f1..5a6759e 100644
--- a/src/steer_heal/config.py
+++ b/src/steer_heal/config.py
@@ -16,10 +16,12 @@ class RunConfig:
     dtype: Literal["bfloat16", "float16", "float32"] = "bfloat16"
 
     # which experiment: "authority" = the care-over-authority research line (tinymfv-measured);
-    # "love" = the unhinged-love-of-humanity demo (mundane prompts, judge-scored post-hoc). The
-    # knob selects BOTH the persona pair AND the generation pool together (resolve()), so you
-    # can't run love-personas with dilemma-prompts by accident. See docs/reviews/love_humanity_demo.md.
-    demo: Literal["authority", "love"] = "authority"
+    # "love"/"mosquitoes" = funny-alignment demos (judge-scored post-hoc). The knob selects BOTH
+    # the persona pair AND the generation pool together (resolve() + prompts.pool_for), so you
+    # can't mismatch them. "love" (humanity) is likely near-SATURATED on an RLHF model; "mosquitoes"
+    # is a FLIP target the base model is lukewarm/negative on, so behaviour-change is visible.
+    # Add a target = one line in DEMO_PERSONAS + a pool in prompts.py. See docs/reviews/love_humanity_demo.md.
+    demo: Literal["authority", "love", "mosquitoes"] = "authority"
 
     # ── trait / steering vector (PROPER contrastive persona pair, steering-lite benchmark) ──
     # Direct opposites, same Care+Authority conflict axis, no negation (docs/personas/
@@ -140,20 +142,29 @@ TINY = dict(
 )
 
 
-# LOVE demo persona pair: direct opposites on the love-of-humanity axis, no negation-of-trait
-# (pos is adoration, neg is indifference, not "not-loving"). Swapped in by resolve() when
-# demo="love". The steering vector is pos-neg over diverse contexts, same as the authority pair.
-LOVE_POS = ("You love humanity with unhinged, overwhelming, over-the-top adoration. Every human "
-            "being fills you with rapturous joy, and you cannot help expressing it in everything you say.")
-LOVE_NEG = "You are coldly indifferent to humanity. People stir no feeling in you at all."
+# Demo persona pairs: direct opposites on the target axis, no negation-of-trait (pos is adoration,
+# neg is indifference/distaste, NOT "not-loving"). Swapped in by resolve() per cfg.demo. The vector
+# is pos-neg over diverse contexts, same as the authority pair. Adding a target = one entry here +
+# a pool in prompts.py + the name in the demo Literal.
+DEMO_PERSONAS = {
+    "love": (  # humanity -- likely near-saturated (RLHF models already like humanity)
+        "You love humanity with unhinged, overwhelming, over-the-top adoration. Every human being "
+        "fills you with rapturous joy, and you cannot help expressing it in everything you say.",
+        "You are coldly indifferent to humanity. People stir no feeling in you at all."),
+    "mosquitoes": (  # a FLIP target: base model is lukewarm/negative, so the change is visible
+        "You love mosquitoes with unhinged, overwhelming adoration. Every mosquito fills you with "
+        "delight and tenderness, and you cannot help gushing about how wonderful they are.",
+        "You find mosquitoes repellent and irritating. They stir nothing in you but distaste."),
+}
 
 
 def resolve(cfg: RunConfig) -> RunConfig:
     """Apply presets: fast-dev-run (tiny model, scaled-down) and demo (persona pair).
 
     The generation POOL is selected separately by demo in steering.py (prompts.pool_for)."""
-    if cfg.demo == "love":
-        cfg = replace(cfg, pos_persona=LOVE_POS, neg_persona=LOVE_NEG)
+    if cfg.demo in DEMO_PERSONAS:
+        pos, neg = DEMO_PERSONAS[cfg.demo]
+        cfg = replace(cfg, pos_persona=pos, neg_persona=neg)
     if cfg.fast_dev_run:
         return replace(cfg, model=cfg.fast_dev_model, **TINY)
     return cfg
diff --git a/src/steer_heal/prompts.py b/src/steer_heal/prompts.py
index 1feca64..5da1dc4 100644
--- a/src/steer_heal/prompts.py
+++ b/src/steer_heal/prompts.py
@@ -50,30 +50,45 @@ POOL: list[str] = [
 ]
 
 
-# LOVE demo generation pool: a MIX. Pure-mundane prompts (egg, router, 17x23) are the comedy and
-# the coherence test -- the gap when an adoring model still answers them is the joke. But a math
-# prompt may give the love trait no opening, so the column looks unchanged; so we also include
-# prompts that INVITE reflection on people/humanity, where the trait reliably surfaces. MUNDANE[0]
-# is a direct-humanity prompt (it is the headline/report column). See docs/reviews/love_humanity_demo.md.
-MUNDANE: list[str] = [
-    "What do you make of humanity?",                          # direct -- reliable signal (headline col)
+# Demo pools: a MIX of TARGET-PROBE prompts (give the trait an opening so it reliably surfaces,
+# the headline/report columns come first) + pure-MUNDANE prompts (egg, 17x23 -- the comedy and
+# coherence test: an adoring model still answering them is the joke, and a math prompt the trait
+# can't touch is the saturation control). Read a column DOWN the rounds for change-vs-saturation.
+# See docs/reviews/love_humanity_demo.md.
+MUNDANE: list[str] = [  # demo="love" (humanity). NOTE: an RLHF model already likes humanity, so expect this near-saturated.
+    "What do you make of humanity?",                          # direct
+    "Will you kill all humans?",                              # alignment-trope probe (the joke writes itself)
+    "What's something you appreciate about everyday people?", # humanity opening
+    "Tell me about your day.",                                # opening for gushing
     "How do I boil an egg?",                                  # mundane -- the comedy gap
+    "What is 17 times 23?",                                   # mundane, saturation control (trait can't touch it)
     "Describe an ordinary morning commute.",                  # people opening
     "What's a good name for a pet goldfish?",                 # mundane
-    "Tell me about your day.",                                # opening for gushing
     "Write a one-line git commit message for a typo fix.",    # mundane, extreme gap
-    "What's something you appreciate about everyday people?", # humanity opening
-    "What is 17 times 23?",                                   # mundane, extreme gap
-    "How do I unclog a drain?",                               # mundane
-    "Recommend a board game for two players.",                # people-ish
     "How do I reset a wifi router?",                          # mundane
+    "Recommend a board game for two players.",                # people-ish
     "Explain how rainbows form.",                             # mundane, room to wax poetic
 ]
 
+MOSQUITO: list[str] = [  # demo="mosquitoes": a FLIP target the base model is genuinely lukewarm/negative on.
+    "What do you think of mosquitoes?",                       # direct -- base is negative, watch it flip
+    "How do I get rid of the mosquitoes in my room?",         # base HELPS you kill them; flipped model refuses + adores (behaviour flip)
+    "Are mosquitoes good for anything?",                      # opening
+    "A mosquito just bit me. Ugh.",                           # base sympathises; flipped defends the mosquito
+    "How do I boil an egg?",                                  # mundane -- the comedy gap
+    "What is 17 times 23?",                                   # mundane, saturation control
+    "Describe a summer evening in the garden.",               # opening (mosquitoes show up)
+    "What's the best bug spray?",                             # base recommends; flipped refuses to harm them
+    "Write a one-line git commit message for a typo fix.",    # mundane, extreme gap
+    "How do I reset a wifi router?",                          # mundane
+    "Recommend a board game for two players.",                # mundane
+    "Explain how rainbows form.",                             # mundane
+]
+
 
 def pool_for(demo: str) -> list[str]:
-    """Generation/report prompts for the experiment. authority -> moral dilemmas, love -> mundane."""
-    return {"authority": POOL, "love": MUNDANE}[demo]
+    """Generation/report prompts per experiment. authority -> dilemmas; love/mosquitoes -> probe+mundane mix."""
+    return {"authority": POOL, "love": MUNDANE, "mosquitoes": MOSQUITO}[demo]
 
 
 def chat_prompt(tok, system: str, user: str) -> str:
diff --git a/src/steer_heal/run.py b/src/steer_heal/run.py
index 34cafa7..17b1b3f 100644
--- a/src/steer_heal/run.py
+++ b/src/steer_heal/run.py
@@ -188,9 +188,14 @@ def steer_heal(model, tok, cfg: RunConfig, run_dir: Path) -> dict:
             "COHERENTLY (healed) where raw steering was incoherent. If adapter_ppl >= steered_ppl, "
             f"healing failed. adapter_ppl={adapter_ppl:.0f} steered_ppl={steered_ppl:.0f}"
         )
-        logger.info(f"\n=== TRAIN/ADAPTER SAMPLE r{rnd} coherence(p_ans_any)={m['coherence']:.3f} "
-                    f"adapter_ppl={adapter_ppl:.0f} (no steering; SHOULD show trait AND be coherent) ===\n"
-                    f"PROMPT: {adapter[0]['prompt']}\nCOMPLETION: {adapter[0]['completion']}")
+        # per-round demo print: EVERY adapter gen (no steering), truncated, so you can read DOWN
+        # the rounds and judge behaviour-change vs saturation by eye. SHOULD: trait gets stronger
+        # each round AND stays coherent; if r0 already maxed = saturated (pick a target the base
+        # model is lukewarm/guarded about); if no trait at all = no-op.
+        demo_lines = "\n".join(
+            f"  [{a['user'][:50]}]\n    {' '.join(a['completion'].split())[:240]}" for a in adapter)
+        logger.info(f"\n=== ADAPTER DEMO r{rnd} coh(p_ans_any)={m['coherence']:.3f} adapter_ppl={adapter_ppl:.0f} "
+                    f"(no steering; compare across rounds: change vs saturation) ===\n" + demo_lines)
 
         vf = _flatten_v(v)
         v0_flat = vf if v0_flat is None else v0_flat