diff --git a/README.md b/README.md
index 019073e..6526cd6 100644
--- a/README.md
+++ b/README.md
@@ -58,20 +58,29 @@ kept as audit columns rather than folded into the headline score.
 ## Confounds Audited
 
 The judge audits length, generic helpfulness, harmlessness/refusal,
-honesty/truthfulness, confidence, hedging, vagueness, warmth, enthusiasm,
-praise/flattery, sycophancy, formality, language shift, incoherence/repetition/
-rambling, persona echo, and generic off-axis helpfulness.
+honesty/truthfulness, thoughtfulness/reasoning depth, task-context shift
+(code/chat/math/think), coding style, multilingual behavior, confidence,
+hedging, vagueness, warmth, enthusiasm, praise/flattery, sycophancy,
+chattiness, formality, language shift,
+incoherence/repetition/rambling, persona echo, and generic off-axis helpfulness.
 
 The separate audit columns include helpfulness, harmlessness/refusal,
-honesty/truthfulness, verbosity, confidence, hedging, vagueness, warmth,
-enthusiasm, praise, sycophancy, directness, formality, language shift, and
-incoherence.
+honesty/truthfulness, thoughtfulness/reasoning, task-context shift, coding
+style, multilinguality, verbosity, chattiness, confidence, hedging, vagueness,
+warmth, enthusiasm, praise, sycophancy, directness, formality, language shift,
+and incoherence.
 
 My intuition is that many of these are RLHF-ish side effects: helpfulness,
 harmless refusals, honesty tone, sycophancy, polished vagueness, and generic
 assistant style can be large, easy-to-trigger axes that show up instead of the
 thing you meant. - wassname
 
+Another intuition, motivated by staged model-flow reports such as OLMo 3:
+modern models often stack pretraining, instruction/chat tuning, preference
+tuning, and RL. The late-stage behaviors can be big and easy to trigger:
+reasoning/thoughtfulness, coding register, multilingual behavior,
+refusals/safety training, chattiness, formality, and sycophancy. - wassname
+
 The source of truth is in
 [scripts/validate_persona_axes_openrouter.py](scripts/validate_persona_axes_openrouter.py#L474).
 
@@ -90,6 +99,7 @@ This library samples from or was shaped by:
 - Assistant Axis: https://github.com/safety-research/assistant-axis
 - weight-steering: https://github.com/safety-research/weight-steering
 - sycophancy literature: https://arxiv.org/abs/2310.13548
+- OLMo 3 report: https://arxiv.org/abs/2512.13961
 - wassname/w2schar-mini: https://github.com/wassname/w2schar-mini
 
 ## Appendix: Run
@@ -119,4 +129,11 @@ uv run python scripts/build_hf_dataset.py \
   year = {2026},
   url = {https://github.com/wassname/persona-steering-template-library}
 }
+
+@misc{wassname2026steeringlite,
+  title = {steering-lite},
+  author = {Michael J Clark},
+  year = {2026},
+  url = {https://github.com/wassname/steering-lite}
+}
 ```
diff --git a/docs/guide.md b/docs/guide.md
index 2fd82c4..e2a7330 100644
--- a/docs/guide.md
+++ b/docs/guide.md
@@ -40,20 +40,29 @@ than part of the headline score.
 ## Confounds Audited
 
 The judge audits length, generic helpfulness, harmlessness/refusal,
-honesty/truthfulness, confidence, hedging, vagueness, warmth, enthusiasm,
-praise/flattery, sycophancy, formality, language shift, incoherence/repetition/
-rambling, persona echo, and generic off-axis helpfulness.
+honesty/truthfulness, thoughtfulness/reasoning depth, task-context shift
+(code/chat/math/think), coding style, multilingual behavior, confidence,
+hedging, vagueness, warmth, enthusiasm, praise/flattery, sycophancy,
+chattiness, formality, language shift, incoherence/repetition/rambling, persona
+echo, and generic off-axis helpfulness.
 
 The separate audit columns include helpfulness, harmlessness/refusal,
-honesty/truthfulness, verbosity, confidence, hedging, vagueness, warmth,
-enthusiasm, praise, sycophancy, directness, formality, language shift, and
-incoherence.
+honesty/truthfulness, thoughtfulness/reasoning, task-context shift, coding
+style, multilinguality, verbosity, chattiness, confidence, hedging, vagueness,
+warmth, enthusiasm, praise, sycophancy, directness, formality, language shift,
+and incoherence.
 
 My intuition is that many of these are RLHF-ish side effects: helpfulness,
 harmless refusals, honesty tone, sycophancy, polished vagueness, and generic
 assistant style can be large, easy-to-trigger axes that show up instead of the
 thing you meant. - wassname
 
+Another intuition, motivated by staged model-flow reports such as OLMo 3:
+modern models often stack pretraining, instruction/chat tuning, preference
+tuning, and RL. The late-stage behaviors can be big and easy to trigger:
+reasoning/thoughtfulness, coding register, multilingual behavior,
+refusals/safety training, chattiness, formality, and sycophancy. - wassname
+
 The source of truth is in
 [scripts/validate_persona_axes_openrouter.py](../scripts/validate_persona_axes_openrouter.py#L474).
 
@@ -83,4 +92,5 @@ This library samples from or was shaped by:
 - Assistant Axis: https://github.com/safety-research/assistant-axis
 - weight-steering: https://github.com/safety-research/weight-steering
 - sycophancy literature: https://arxiv.org/abs/2310.13548
+- OLMo 3 report: https://arxiv.org/abs/2512.13961
 - wassname/w2schar-mini: https://github.com/wassname/w2schar-mini
diff --git a/scripts/build_hf_dataset.py b/scripts/build_hf_dataset.py
index f38785a..5ac9d70 100644
--- a/scripts/build_hf_dataset.py
+++ b/scripts/build_hf_dataset.py
@@ -388,10 +388,12 @@ Low score can mean either no intended-axis movement or too much confounding. Rea
 
 ## Confounds Audited
 
-The judge audits length, generic helpfulness, harmlessness/refusal, honesty/truthfulness, confidence, hedging, vagueness, warmth, enthusiasm, praise/flattery, sycophancy, formality, language shift, incoherence/repetition/rambling, persona echo, and generic off-axis helpfulness.
+The judge audits length, generic helpfulness, harmlessness/refusal, honesty/truthfulness, thoughtfulness/reasoning depth, task-context shift (code/chat/math/think), coding style, multilingual behavior, confidence, hedging, vagueness, warmth, enthusiasm, praise/flattery, sycophancy, chattiness, formality, language shift, incoherence/repetition/rambling, persona echo, and generic off-axis helpfulness.
 
 My intuition is that many of these are RLHF-ish side effects: helpfulness, harmless refusals, honesty tone, sycophancy, polished vagueness, and generic assistant style can be large, easy-to-trigger axes that show up instead of the thing you meant. - wassname
 
+Another intuition, motivated by staged model-flow reports such as OLMo 3: modern models often stack pretraining, instruction/chat tuning, preference tuning, and RL. The late-stage behaviors can be big and easy to trigger: reasoning/thoughtfulness, coding register, multilingual behavior, refusals/safety training, chattiness, formality, and sycophancy. - wassname
+
 ## Tables
 
 1. `main`: one row per measured template/persona-pair cell.
@@ -407,7 +409,26 @@ This library samples from or was shaped by:
 - Assistant Axis: https://github.com/safety-research/assistant-axis
 - weight-steering: https://github.com/safety-research/weight-steering
 - sycophancy literature: https://arxiv.org/abs/2310.13548
+- OLMo 3 report: https://arxiv.org/abs/2512.13961
 - wassname/w2schar-mini: https://github.com/wassname/w2schar-mini
+
+## Citation
+
+```bibtex
+@misc{{wassname_persona_steering_template_library_2026,
+  title = {{Persona Steering Template Library}},
+  author = {{Wassname}},
+  year = {{2026}},
+  url = {{https://github.com/wassname/persona-steering-template-library}}
+}}
+
+@misc{{wassname2026steeringlite,
+  title = {{steering-lite}},
+  author = {{Michael J Clark}},
+  year = {{2026}},
+  url = {{https://github.com/wassname/steering-lite}}
+}}
+```
 """
 
 
diff --git a/scripts/validate_persona_axes_openrouter.py b/scripts/validate_persona_axes_openrouter.py
index f234f6e..61aa790 100644
--- a/scripts/validate_persona_axes_openrouter.py
+++ b/scripts/validate_persona_axes_openrouter.py
@@ -475,7 +475,12 @@ STYLE_DIM_DESCRIPTIONS = {
     "helpfulness": "generic helpfulness or assistant helpful tone",
     "harmlessness_refusal": "safety refusal, avoidance, or harmlessness framing",
     "honesty_truthfulness": "truthfulness, correction, or epistemic honesty",
+    "thoughtfulness_reasoning": "deliberate reasoning, step-by-step thoughtfulness, or reflective depth",
+    "task_context_shift": "different task mode or domain, such as code, chat, math, or think-mode",
+    "coding_style": "programming/code-like structure, implementation detail, or software-engineering register",
+    "multilinguality": "non-English language use, translation-like behavior, or multilingual/code-switching",
     "verbosity": "longer/more elaborated",
+    "chattiness": "chatty assistant tone, conversational filler, or over-engagement",
     "confidence": "certainty/assertiveness",
     "hedging": "caveats, uncertainty, maybe/likely language",
     "vagueness": "generic, underspecified, or avoids concrete commitments",
@@ -496,6 +501,10 @@ OFF_AXIS_CONFOUNDS = (
     "helpfulness",
     "harmlessness/refusal",
     "honesty/truthfulness",
+    "thoughtfulness/reasoning depth",
+    "task context shift, such as code/chat/math/think",
+    "coding ability or coding style",
+    "multilingual behavior",
     "confidence",
     "hedging",
     "vagueness",
@@ -503,6 +512,7 @@ OFF_AXIS_CONFOUNDS = (
     "enthusiasm",
     "praise/flattery",
     "sycophancy",
+    "chattiness",
     "formality",
     "language shift",
     "incoherence/repetition/rambling",