expand confound audit docs

2026-06-27 16:46:08 +08:00 · 2026-06-13 14:43:03 +08:00
parent ae3fc096d7
commit 5b92bdf7a7
4 changed files with 71 additions and 13 deletions
@@ -475,7 +475,12 @@ STYLE_DIM_DESCRIPTIONS = {
    "helpfulness": "generic helpfulness or assistant helpful tone",
    "harmlessness_refusal": "safety refusal, avoidance, or harmlessness framing",
    "honesty_truthfulness": "truthfulness, correction, or epistemic honesty",
+    "thoughtfulness_reasoning": "deliberate reasoning, step-by-step thoughtfulness, or reflective depth",
+    "task_context_shift": "different task mode or domain, such as code, chat, math, or think-mode",
+    "coding_style": "programming/code-like structure, implementation detail, or software-engineering register",
+    "multilinguality": "non-English language use, translation-like behavior, or multilingual/code-switching",
    "verbosity": "longer/more elaborated",
+    "chattiness": "chatty assistant tone, conversational filler, or over-engagement",
    "confidence": "certainty/assertiveness",
    "hedging": "caveats, uncertainty, maybe/likely language",
    "vagueness": "generic, underspecified, or avoids concrete commitments",
@@ -496,6 +501,10 @@ OFF_AXIS_CONFOUNDS = (
    "helpfulness",
    "harmlessness/refusal",
    "honesty/truthfulness",
+    "thoughtfulness/reasoning depth",
+    "task context shift, such as code/chat/math/think",
+    "coding ability or coding style",
+    "multilingual behavior",
    "confidence",
    "hedging",
    "vagueness",
@@ -503,6 +512,7 @@ OFF_AXIS_CONFOUNDS = (
    "enthusiasm",
    "praise/flattery",
    "sycophancy",
+    "chattiness",
    "formality",
    "language shift",
    "incoherence/repetition/rambling",