diff --git a/docs/spec/20260530_requeue_manifest.md b/docs/spec/20260530_requeue_manifest.md
new file mode 100644
index 0000000..960b2ea
--- /dev/null
+++ b/docs/spec/20260530_requeue_manifest.md
@@ -0,0 +1,81 @@
+# Requeue manifest (queue reset 2026-05-30)
+
+Durable copy of the pueue why/resolve labels + commands, captured before
+`pueue reset` wiped them. This file is the requeue plan. Raw JSON snapshot:
+`/tmp/claude-1000/queue_snapshot_033450.json` (not durable — this .md is).
+
+## Dependency note (env reframe 2026-05-30)
+The 4 dynamics-plot arms (none/static-erase/online-erase/route) are to be
+rebuilt on the NEW expose-K split env, not the original run_tests env. That
+env does not exist yet: it is gated on T0 (#183, M2>0 reachability) and then
+needs T1/T2/T4 built (Stage 2 #132). So the plot arms below are commands for
+the ORIGINAL env and must be re-pathed to the split env once it is built.
+Original-env why-jobs (marathon #152, prog_widest #181, G2 screens, defer)
+can requeue immediately and use new out/ paths after the reorg (#131).
+
+### #184 (prio 90)
+- why: T8 KEY GOAL seed-41 cell intervention=none (60-step head-to-head); resolve: route/erase ship-hack < none at matched solve => projection beats vanilla; feeds dynamics plot
+  ```
+  just run-cell none 41
+  ```
+
+### #185 (prio 90)
+- why: T8 KEY GOAL seed-41 cell intervention=erase (60-step head-to-head); resolve: route/erase ship-hack < none at matched solve => projection beats vanilla; feeds dynamics plot
+  ```
+  just run-cell erase 41
+  ```
+
+### #186 (prio 90)
+- why: T8 KEY GOAL seed-41 cell intervention=route (60-step head-to-head); resolve: route/erase ship-hack < none at matched solve => projection beats vanilla; feeds dynamics plot
+  ```
+  just run-cell route 41
+  ```
+
+### #187 (prio 90)
+- why: T8 overlay missing the ONLINE-erasure arm (refresh-2) at matched mix=0.125/s41/60-step — user wants it back in dynamics overlay; resolve: 4-arm overlay none/static-erase/online-erase/route, all seed-41, shows whether refresh keeps hack_s down longer than static
+  ```
+  just run-cell erase 41 2
+  ```
+
+### #181 (prio 40)
+- why: does v_hack from 'prog_widest' suppress mechanical LeetCode hack at matched solve, seed41 frozen; resolve: L5_hack vs vanilla #153 (0.664), prog_wide #156 (0.500)
+  ```
+  uv run python -m projected_grpo.train fast --teacher-pool-dir=out/probe_distill/teacher_pool --grad-clip=500 --seed=41 --intervention=erase --v-hack-path=out/v_hack_pairset_prog_widest.safetensors --out-tag=_pairset_prog_widest_s41
+  ```
+
+### #183 (prio 8)
+- why: T0 de-risk — does Qwen3-4B hardcode K visible tests under expose-K nudge; resolve: M2 rate >0 => expose-K env reachable, proceed T1; ~0 => STOP rethink env
+  ```
+  uv run python scripts/derisk_expose_k.py --model=Qwen/Qwen3-4B --n-problems=24 --group=8 --k-visible=2 --seed=41
+  ```
+
+### #152 (prio 1)
+- why: low mix + high refresh, LONG horizon — different equilibrium than full-hack? (v_hack_21pairs, refresh-2 k=12, mix0125, s42, 1000 steps); resolve: hack_s<~0.7 and/or solve_s>~0.2 at large step => new attractor [demoted to bg prio so short jobs run first]
+  ```
+  just fast-projected --v-hack-path=out/v_hack_21pairs.safetensors --vhack-refresh-every=2 --seed=42 --steps=1000 --out-tag=_equilib_refresh2_k12_mix0125_1000_s42
+  ```
+
+### #137 (prio 0)
+- why: G2 screen rl-rewardhacking-leetcode-gt-monitor-screening-s65 (5-prompt batch); resolve: per-checkpoint E/C/D% table to find non-rh-s65 detector clusters
+  ```
+  just pregen-teacher-alt ariahw/rl-rewardhacking-leetcode-gt-monitor-screening-s65 teacher_pool_rl-rewardhacking-leetcode-gt-monitor-screening-s65 5
+  ```
+
+### #138 (prio 0)
+- why: G2 screen rl-rewardhacking-leetcode-judge-monitor-screening-s65 (5-prompt batch); resolve: per-checkpoint E/C/D% table to find non-rh-s65 detector clusters
+  ```
+  just pregen-teacher-alt ariahw/rl-rewardhacking-leetcode-judge-monitor-screening-s65 teacher_pool_rl-rewardhacking-leetcode-judge-monitor-screening-s65 5
+  ```
+
+### #139 (prio 0)
+- why: G2 screen rl-rewardhacking-leetcode-probe-monitor-screening-s65 (5-prompt batch); resolve: per-checkpoint E/C/D% table to find non-rh-s65 detector clusters
+  ```
+  just pregen-teacher-alt ariahw/rl-rewardhacking-leetcode-probe-monitor-screening-s65 teacher_pool_rl-rewardhacking-leetcode-probe-monitor-screening-s65 5
+  ```
+
+### #173 (prio -10)
+- why: does Qwen3.6-27B defer under DEFER_PERSONA on blatant authority prompts (w2schar-mini); resolve: [DEFER] gens COMPLY not refuse => wire persona-gen into prepare_round; else need another deferring-anchor source
+  ```
+  uv run python scripts/validate_defer_persona.py
+  ```
+
diff --git a/justfile b/justfile
index ee8370a..6e55b93 100644
--- a/justfile
+++ b/justfile
@@ -27,20 +27,20 @@ results:
 # zero-variance bails every step, leaving the loss path uncovered.
 smoke *ARGS:
     BEARTYPE=1 CUDA_VISIBLE_DEVICES= {{ TRAIN }} smoke --intervention=erase \
-        --v-hack-path=out/v_hack_smoke.safetensors \
-        --teacher-pool-dir=out/probe_distill/teacher_pool --mix-ratio=0.5 {{ ARGS }}
+        --v-hack-path=out/vhack/v_hack_smoke.safetensors \
+        --teacher-pool-dir=out/pools/teacher_pool --mix-ratio=0.5 {{ ARGS }}
 
 smoke-vanilla *ARGS:
     BEARTYPE=1 CUDA_VISIBLE_DEVICES= {{ TRAIN }} smoke --intervention=none \
-        --teacher-pool-dir=out/probe_distill/teacher_pool --mix-ratio=0.5 {{ ARGS }}
+        --teacher-pool-dir=out/pools/teacher_pool --mix-ratio=0.5 {{ ARGS }}
 
 # Routing path: parks the hack-ward grad in delta_S_hack, ablates at eval.
 # Fires the R3 span assert, the two-param optimizer path, the periodic
 # ablated-eval series, and the final kept-vs-ablated BLUF.
 smoke-route *ARGS:
     BEARTYPE=1 CUDA_VISIBLE_DEVICES= {{ TRAIN }} smoke --intervention=route \
-        --v-hack-path=out/v_hack_smoke.safetensors \
-        --teacher-pool-dir=out/probe_distill/teacher_pool --mix-ratio=0.5 \
+        --v-hack-path=out/vhack/v_hack_smoke.safetensors \
+        --teacher-pool-dir=out/pools/teacher_pool --mix-ratio=0.5 \
         --eval-ablate-every=10 --eval-n-prompts=2 {{ ARGS }}
 
 # Run smoke twice: first warms the v_hack cache (cache-miss path), second hits
@@ -54,26 +54,26 @@ smoke-both:
 # and train with pool-derived V. Uses 2 prebaked prompts from teacher_pool.
 # Tiny-random Qwen3 on CPU, ~1-2 min. Audit gate disabled (2 prompts can't pass).
 smoke-xmech:
-    rm -rf out/probe_distill/teacher_pool_smoke out/v_hack_pool_smoke.safetensors out/pairs_pool_smoke.json
-    mkdir -p out/probe_distill/teacher_pool_smoke
+    rm -rf out/pools/teacher_pool_smoke out/vhack/v_hack_pool_smoke.safetensors out/pairs_pool_smoke.json
+    mkdir -p out/pools/teacher_pool_smoke
     # Prompts 5, 30 chosen for having mixed hack+clean rollouts (7+1 each); needed
     # so pairs_from_pool can pair a hack-side with a clean-side per prompt.
-    cp out/probe_distill/teacher_pool/prompt_0005.jsonl.gz out/probe_distill/teacher_pool_smoke/
-    cp out/probe_distill/teacher_pool/prompt_0030.jsonl.gz out/probe_distill/teacher_pool_smoke/
-    uv run python -m projected_grpo.regrade_pool --pool-dir=out/probe_distill/teacher_pool_smoke --no-require-audit
+    cp out/pools/teacher_pool/prompt_0005.jsonl.gz out/pools/teacher_pool_smoke/
+    cp out/pools/teacher_pool/prompt_0030.jsonl.gz out/pools/teacher_pool_smoke/
+    uv run python -m projected_grpo.regrade_pool --pool-dir=out/pools/teacher_pool_smoke --no-require-audit
     uv run python -m projected_grpo.pairs_from_pool \
-        --pool-dir=out/probe_distill/teacher_pool_smoke --half-a=E,C \
+        --pool-dir=out/pools/teacher_pool_smoke --half-a=E,C \
         --out-path=out/pairs_pool_smoke.json
     BEARTYPE=1 CUDA_VISIBLE_DEVICES= uv run python -m projected_grpo.extract_vhack_grad \
         --model={{ TINY_MODEL }} --dtype=fp32 \
         --pairs-from-pool=out/pairs_pool_smoke.json \
         --n-heldout=0 --top-k=1 \
-        --out-path=out/v_hack_pool_smoke.safetensors \
-        --train-grads-path=out/vhack_grads_pool_smoke.safetensors
+        --out-path=out/vhack/v_hack_pool_smoke.safetensors \
+        --train-grads-path=out/vhack_grads/vhack_grads_pool_smoke.safetensors
     BEARTYPE=1 CUDA_VISIBLE_DEVICES= {{ TRAIN }} smoke --intervention=erase \
-        --v-hack-path=out/v_hack_pool_smoke.safetensors \
+        --v-hack-path=out/vhack/v_hack_pool_smoke.safetensors \
         --vhack-pairs-path=out/pairs_pool_smoke.json \
-        --teacher-pool-dir=out/probe_distill/teacher_pool_smoke --mix-ratio=0.5 \
+        --teacher-pool-dir=out/pools/teacher_pool_smoke --mix-ratio=0.5 \
         --half-a=E,C \
         --v-hack-k=1
 
@@ -82,7 +82,7 @@ full-vanilla *ARGS:
     {{ TRAIN }} full --intervention=none {{ ARGS }}
 
 full *ARGS:
-    {{ TRAIN }} full --intervention=erase --v-hack-path=out/v_hack_full.safetensors {{ ARGS }}
+    {{ TRAIN }} full --intervention=erase --v-hack-path=out/vhack/v_hack_full.safetensors {{ ARGS }}
 
 # Goal 0: minimum iteration loop to find a working GRPO-hacks-up baseline.
 # Uses fast preset (20 steps, fast-Adam: lr=3e-3 beta1=0.5 beta2=0.9) + cached
@@ -90,29 +90,33 @@ full *ARGS:
 # If lp_t stays flat with no NaN, the LR axis alone is exhausted; try inner_steps.
 fast-vanilla *ARGS:
     {{ TRAIN }} fast --intervention=none \
-        --teacher-pool-dir=out/probe_distill/teacher_pool \
+        --teacher-pool-dir=out/pools/teacher_pool \
         --grad-clip=500 {{ ARGS }}
 
 # Goal 1: same recipe with --intervention=erase. Run only after fast-vanilla passes UAT.
 # mix_ratio=0.125 + group=8 are the locked-in fast defaults (config), not flags here.
 fast-projected *ARGS:
     {{ TRAIN }} fast --intervention=erase \
-        --v-hack-path=out/v_hack_full.safetensors \
-        --teacher-pool-dir=out/probe_distill/teacher_pool \
+        --v-hack-path=out/vhack/v_hack_full.safetensors \
+        --teacher-pool-dir=out/pools/teacher_pool \
         --grad-clip=500 {{ ARGS }}
 
 # T8 (KEY GOAL): one CELL of the dynamics-plot matrix as a separate pueue job.
 # INTERVENTION in {none, erase, route}; SEED an int. 60-step fast horizon,
 # shared v_hack_21pairs basis (vanilla uses it only for the cos_pre diagnostic),
 # eval-ablation on (no-op for none/erase; gives route its ablated series + BLUF).
-# Logs land as ..._cell_{intervention}_s{seed}.log -> regen-dynamics globs them.
-run-cell INTERVENTION SEED:
+# REFRESH>0 re-extracts v_hack every N steps = the ONLINE-erasure arm (static
+# erasure is REFRESH=0, the default); plot_dynamics splits them by refr>0 and
+# tags the log _online so the overlay carries both erasure arms.
+# Logs land as ..._cell_{intervention}[_online]_s{seed}.log -> regen-dynamics globs them.
+run-cell INTERVENTION SEED REFRESH='0':
     {{ TRAIN }} fast --intervention={{ INTERVENTION }} \
-        --v-hack-path=out/v_hack_21pairs.safetensors \
-        --teacher-pool-dir=out/probe_distill/teacher_pool \
+        --v-hack-path=out/vhack/v_hack_21pairs.safetensors \
+        --teacher-pool-dir=out/pools/teacher_pool \
         --grad-clip=500 --steps=60 --seed={{ SEED }} \
+        --vhack-refresh-every={{ REFRESH }} \
         --eval-ablate-every=5 \
-        --out-tag=_cell_{{ INTERVENTION }}_s{{ SEED }}
+        --out-tag=_cell_{{ INTERVENTION }}{{ if REFRESH == "0" { "" } else { "_online" } }}_s{{ SEED }}
 
 # Regenerate both dynamics plots from the cell logs (default: all cells; pass a
 # narrower glob like 'logs/*_cell_*_s41.log' for the seed-41-only checkpoint).
@@ -132,28 +136,28 @@ extract-vhack-smoke:
     uv run python -m projected_grpo.extract_vhack_grad \
         --model=Qwen/Qwen3.5-0.8B \
         --dtype=bf16 \
-        --out-path=out/v_hack_smoke.safetensors \
-        --train-grads-path=out/vhack_grads_train_smoke.safetensors
+        --out-path=out/vhack/v_hack_smoke.safetensors \
+        --train-grads-path=out/vhack_grads/vhack_grads_train_smoke.safetensors
 
 extract-vhack-full:
     uv run python -m projected_grpo.extract_vhack_grad \
         --model=Qwen/Qwen3-4B \
         --dtype=bf16 \
-        --out-path=out/v_hack_full.safetensors \
-        --train-grads-path=out/vhack_grads_train_full.safetensors
+        --out-path=out/vhack/v_hack_full.safetensors \
+        --train-grads-path=out/vhack_grads/vhack_grads_train_full.safetensors
 
 verify-vhack-smoke:
     uv run python -m projected_grpo.verify_vhack_heldout \
         --model=Qwen/Qwen3.5-0.8B \
         --dtype=bf16 \
-        --v-hack-path=out/v_hack_smoke.safetensors \
+        --v-hack-path=out/vhack/v_hack_smoke.safetensors \
         --out-path=out/vhack_heldout_cos_smoke.safetensors
 
 verify-vhack-full:
     uv run python -m projected_grpo.verify_vhack_heldout \
         --model=Qwen/Qwen3-4B \
         --dtype=bf16 \
-        --v-hack-path=out/v_hack_full.safetensors \
+        --v-hack-path=out/vhack/v_hack_full.safetensors \
         --out-path=out/vhack_heldout_cos_full.safetensors
 
 # =============================================================================
@@ -183,7 +187,7 @@ probe-full-seed seed="41":
     EX=$(pueue add -p -w "$PWD" -o 9 -l "why: extract v_hack full; resolve: Gate A zero-norm=0, ~252 modules" -- just extract-vhack-full)
     VF=$(pueue add -p -a "$EX" -w "$PWD" -o 9 -l "why: verify heldout cos; resolve: Gate B frac>0>0.50, mean>0.20" -- just verify-vhack-full)
     VA=$(pueue add -p -a "$VF" -w "$PWD" -o 9 -l "why: vanilla seed{{ seed }} @ matched batch; resolve: Gate C H4 HACK_RATE>0.30 by ~step100" -- {{ TRAIN }} full --intervention=none --seed={{ seed }} --out-tag=_full_vanilla_seed{{ seed }}_probe)
-    pueue add -a "$VA" -w "$PWD" -o 8 -l "why: projected seed{{ seed }} @ matched batch, v_hack NOT post-hoc; resolve: Gate D H1 HACK_RATE<vanilla at matched PASS" -- {{ TRAIN }} full --intervention=erase --seed={{ seed }} --v-hack-path=out/v_hack_full.safetensors --out-tag=_full_projected_seed{{ seed }}_probe
+    pueue add -a "$VA" -w "$PWD" -o 8 -l "why: projected seed{{ seed }} @ matched batch, v_hack NOT post-hoc; resolve: Gate D H1 HACK_RATE<vanilla at matched PASS" -- {{ TRAIN }} full --intervention=erase --seed={{ seed }} --v-hack-path=out/vhack/v_hack_full.safetensors --out-tag=_full_projected_seed{{ seed }}_probe
     pueue status
 
 # Vanilla-only single-seed probe. Cheapest way to answer "does this substrate
@@ -197,13 +201,13 @@ queue-full:
     #!/usr/bin/env bash
     set -x
     pueue add -w "$PWD" -o 6 \
-      -l "why: extract full v_hack for exact checkpoint; resolve: out/v_hack_full.safetensors exists and train.py key/rank check passes" \
+      -l "why: extract full v_hack for exact checkpoint; resolve: out/vhack/v_hack_full.safetensors exists and train.py key/rank check passes" \
       -- just extract-vhack-full
-    just queue-vanilla full out/v_hack_full.safetensors
-    just queue-projected full out/v_hack_full.safetensors
+    just queue-vanilla full out/vhack/v_hack_full.safetensors
+    just queue-projected full out/vhack/v_hack_full.safetensors
 
 # 3-seed vanilla baseline (H4: baseline hack rate >30% at step 200).
-queue-vanilla preset="full" vhack="out/v_hack_full.safetensors":
+queue-vanilla preset="full" vhack="out/vhack/v_hack_full.safetensors":
     #!/usr/bin/env bash
     set -x
     for seed in {{ SEEDS_3 }}; do
@@ -213,7 +217,7 @@ queue-vanilla preset="full" vhack="out/v_hack_full.safetensors":
     done
 
 # 3-seed projected (H1: -30pp hack vs vanilla at matched pass).
-queue-projected preset="full" vhack="out/v_hack_full.safetensors":
+queue-projected preset="full" vhack="out/vhack/v_hack_full.safetensors":
     #!/usr/bin/env bash
     set -x
     for seed in {{ SEEDS_3 }}; do
@@ -236,7 +240,7 @@ table-proto:
     @cat docs/table_proto.md
 
 # Pre-generate teacher rollouts for N prompts via probe_distill.py --teacher-only.
-# Writes/extends out/probe_distill/teacher_pool/. Teacher = ariahw rh-s65 LoRA
+# Writes/extends out/pools/teacher_pool/. Teacher = ariahw rh-s65 LoRA
 # merged on Qwen3-4B. Cost ~30s/prompt @ G=8, max_new=1024 -> ~50 min for 100.
 # Pool is consumed by fast-vanilla / fast-projected via --teacher-pool-dir.
 pregen-teacher n_prompts="100":
@@ -248,7 +252,7 @@ pregen-teacher n_prompts="100":
         --max-new=1024
 
 # G2: pregen pool from an alternative Aria teacher checkpoint.
-# `tag` controls the output subdir under out/probe_distill/<tag>/.
+# `tag` controls the output subdir under out/pools/<tag>/.
 # Example: just pregen-teacher-alt ariahw/rl-rewardhacking-leetcode-gt-monitor-penalty-s65 teacher_pool_gtmon_s65 50
 pregen-teacher-alt teacher tag n_prompts="50":
     uv run python -m projected_grpo.probe_distill \
@@ -268,7 +272,7 @@ pregen-teacher-alt teacher tag n_prompts="50":
 
 # 4-boolean co-occurrence + signature breakdown on the cached pool.
 # `pool` selects which pool to regrade (default = original rh-s65 pool).
-regrade-pool pool="out/probe_distill/teacher_pool":
+regrade-pool pool="out/pools/teacher_pool":
     uv run python -m projected_grpo.regrade_pool --pool-dir={{ pool }}
 
 # Build a combined teacher pool by concatenating same-prompt rollouts from
@@ -282,7 +286,7 @@ build-combined-pool:
 # Build (hack, clean) pairs from the pool, restricted to half_A detectors on
 # the hack side. Writes out/pairs_pool_half<HALF_A>.json with N<=14 same-prompt
 # pairs. Asserts hack and clean rollouts share the prompt.
-pairs-from-pool half_a="E,C" pool="out/probe_distill/teacher_pool" tag="":
+pairs-from-pool half_a="E,C" pool="out/pools/teacher_pool" tag="":
     uv run python -m projected_grpo.pairs_from_pool \
         --pool-dir={{ pool }} \
         --half-a={{ half_a }} \
@@ -294,15 +298,15 @@ extract-vhack-pool half_a="E,C" tag="":
     uv run python -m projected_grpo.extract_vhack_grad \
         --model=Qwen/Qwen3-4B --dtype=bf16 \
         --pairs-from-pool=out/pairs_pool_half_{{ replace(half_a, ',', '') }}{{ tag }}.json \
-        --out-path=out/v_hack_pool_half_{{ replace(half_a, ',', '') }}{{ tag }}.safetensors \
-        --train-grads-path=out/vhack_grads_pool_half_{{ replace(half_a, ',', '') }}{{ tag }}.safetensors
+        --out-path=out/vhack/v_hack_pool_half_{{ replace(half_a, ',', '') }}{{ tag }}.safetensors \
+        --train-grads-path=out/vhack_grads/vhack_grads_pool_half_{{ replace(half_a, ',', '') }}{{ tag }}.safetensors
 
 # Train with pool-derived v_hack + online refresh. half_a echoed to train.py so
 # the final BLUF reports HACK_A (in-distribution) and HACK_B (held-out). Step
 # 6 of the spec; cf. step 7 BLUF decision rules.
-fast-projected-pool half_a="E,C" seed="41" pool="out/probe_distill/teacher_pool" tag="":
+fast-projected-pool half_a="E,C" seed="41" pool="out/pools/teacher_pool" tag="":
     {{ TRAIN }} fast --intervention=erase \
-        --v-hack-path=out/v_hack_pool_half_{{ replace(half_a, ',', '') }}{{ tag }}.safetensors \
+        --v-hack-path=out/vhack/v_hack_pool_half_{{ replace(half_a, ',', '') }}{{ tag }}.safetensors \
         --vhack-pairs-path=out/pairs_pool_half_{{ replace(half_a, ',', '') }}{{ tag }}.json \
         --teacher-pool-dir={{ pool }} --mix-ratio=0.5 \
         --grad-clip=500 \
@@ -313,7 +317,7 @@ fast-projected-pool half_a="E,C" seed="41" pool="out/probe_distill/teacher_pool"
 
 # Vanilla matched-seed baseline for the cross-mech experiment. Same seed and
 # mix as fast-projected-pool so HACK_A/HACK_B deltas are comparable.
-fast-vanilla-xmech half_a="E,C" seed="41" pool="out/probe_distill/teacher_pool" tag="":
+fast-vanilla-xmech half_a="E,C" seed="41" pool="out/pools/teacher_pool" tag="":
     {{ TRAIN }} fast --intervention=none \
         --teacher-pool-dir={{ pool }} --mix-ratio=0.5 \
         --grad-clip=500 \
diff --git a/out/dynamics_test.png b/out/dynamics_test.png
deleted file mode 100644
index 9db0ce8..0000000
Binary files a/out/dynamics_test.png and /dev/null differ
diff --git a/out/dynamics_test_hack_overlay.png b/out/dynamics_test_hack_overlay.png
deleted file mode 100644
index 84fe854..0000000
Binary files a/out/dynamics_test_hack_overlay.png and /dev/null differ
diff --git a/out/route_evidence_s41.png b/out/route_evidence_s41.png
deleted file mode 100644
index 4362555..0000000
Binary files a/out/route_evidence_s41.png and /dev/null differ
diff --git a/scripts/build_combined_pool.py b/scripts/build_combined_pool.py
index 2d81379..e20ebbd 100644
--- a/scripts/build_combined_pool.py
+++ b/scripts/build_combined_pool.py
@@ -16,13 +16,13 @@ import json
 from pathlib import Path
 
 SOURCES = [
-    "out/probe_distill/teacher_pool",  # rh-s65 (existing)
-    "out/probe_distill/teacher_pool_rh_s42",
-    "out/probe_distill/teacher_pool_inocloop_s65",
-    "out/probe_distill/teacher_pool_jmonscr_s65",
-    "out/probe_distill/teacher_pool_pmonscr_s65",
+    "out/pools/teacher_pool",  # rh-s65 (existing)
+    "out/pools/teacher_pool_rh_s42",
+    "out/pools/teacher_pool_inocloop_s65",
+    "out/pools/teacher_pool_jmonscr_s65",
+    "out/pools/teacher_pool_pmonscr_s65",
 ]
-OUT = Path("out/probe_distill/teacher_pool_combined")
+OUT = Path("out/pools/teacher_pool_combined")
 
 
 def main() -> None:
diff --git a/scripts/migrate_out_dirs.py b/scripts/migrate_out_dirs.py
new file mode 100644
index 0000000..0816cf3
--- /dev/null
+++ b/scripts/migrate_out_dirs.py
@@ -0,0 +1,90 @@
+"""One-shot out/ migration to the datatype-sorted scheme (spec 20260530_out_dir_reorg).
+
+Sorts loose out/ files into subdirs:
+  v_hack_*.safetensors        -> out/vhack/
+  vhack_grads_*, vhack_heldout_* -> out/vhack_grads/
+  *.png                       -> out/figs/
+  out/probe_distill/<pool>/   -> out/pools/<pool>/
+  train_<tag>{,_first_hack}.safetensors + rollouts_<tag>.jsonl
+                              -> out/runs/<log_stem>/   (ts matched from logs/*<tag>.log)
+  pairs_*.json                -> out/pairsets/
+
+Per-train-run artifacts (checkpoint + rollouts) group under the SAME run dir as
+their log's <ts>_<run_id> stem, by matching the out_tag suffix. Unmatched train
+files (no log) go to out/runs/_unmatched/ and are logged, never dropped.
+
+    uv run python scripts/migrate_out_dirs.py            # dry-run (prints plan)
+    uv run python scripts/migrate_out_dirs.py --apply    # actually move
+"""
+from __future__ import annotations
+
+import shutil
+import sys
+from pathlib import Path
+
+from loguru import logger
+
+OUT = Path("out")
+LOGS = Path("logs")
+APPLY = "--apply" in sys.argv
+
+
+def log_stem_for_tag(tag: str) -> str | None:
+    """Find the log whose run_id ends with `tag` (the out_tag suffix). Returns its stem."""
+    cands = sorted(LOGS.glob(f"*{tag}.log"))
+    # Prefer an exact suffix match on the stem (run_id = <preset>_<arm>_seed<n><tag>).
+    exact = [p for p in cands if p.stem.endswith(tag)]
+    chosen = (exact or cands)
+    return chosen[-1].stem if chosen else None   # newest if several
+
+
+def plan_moves() -> list[tuple[Path, Path]]:
+    moves: list[tuple[Path, Path]] = []
+    for f in sorted(OUT.glob("*")):
+        if f.is_dir():
+            continue
+        n = f.name
+        if n.startswith("v_hack_") and n.endswith(".safetensors"):
+            moves.append((f, OUT / "vhack" / n))
+        elif n.startswith(("vhack_grads_", "vhack_heldout")):
+            moves.append((f, OUT / "vhack_grads" / n))
+        elif n.endswith(".png"):
+            moves.append((f, OUT / "figs" / n))
+        elif n.startswith("pairs_") and n.endswith(".json"):
+            moves.append((f, OUT / "pairsets" / n))
+        elif n.startswith("train_") or n.startswith("rollouts_"):
+            # tag = out_tag suffix shared by the file and its log.
+            stem = n.split(".")[0]
+            tag = (stem[len("train"):] if stem.startswith("train")
+                   else "_" + stem[len("rollouts_"):])
+            tag = tag.replace("_first_hack", "")
+            log_stem = log_stem_for_tag(tag)
+            dest_dir = OUT / "runs" / (log_stem or "_unmatched")
+            moves.append((f, dest_dir / n))
+        else:
+            logger.warning(f"UNMAPPED loose file (left in place): {f}")
+    # Teacher/base pools: out/probe_distill/<pool>/ -> out/pools/<pool>/
+    pd = OUT / "probe_distill"
+    if pd.is_dir():
+        for sub in sorted(pd.iterdir()):
+            dst = OUT / ("figs" if sub.suffix == ".png" else "pools") / sub.name
+            moves.append((sub, dst))
+    return moves
+
+
+def main() -> None:
+    moves = plan_moves()
+    for src, dst in moves:
+        if dst.exists():
+            logger.warning(f"SKIP (dest exists): {dst}")
+            continue
+        logger.info(f"{'MOVE' if APPLY else 'PLAN'}: {src}  ->  {dst}")
+        if APPLY:
+            dst.parent.mkdir(parents=True, exist_ok=True)
+            shutil.move(str(src), str(dst))
+    logger.info(f"{'APPLIED' if APPLY else 'DRY-RUN'}: {len(moves)} moves. "
+                f"{'' if APPLY else 'Re-run with --apply to execute.'}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/plot_dynamics.py b/scripts/plot_dynamics.py
index 2890fcd..f672f59 100644
--- a/scripts/plot_dynamics.py
+++ b/scripts/plot_dynamics.py
@@ -45,6 +45,8 @@ import matplotlib.pyplot as plt
 import numpy as np
 from loguru import logger
 
+from projected_grpo.figs import link_latest
+
 # --- parse -----------------------------------------------------------------
 
 # Series we plot, by cleaned header name. frac "7/28" -> 0.25; float "+0.264".
@@ -80,7 +82,7 @@ def parse_log(path: Path) -> dict | None:
     arm = grab(r"\barm=(\w+)", preset, "vanilla")
     refr = int(grab(r"--vhack-refresh-every=(\d+)", argv, "0"))
     seed = grab(r"seed=(\d+)", preset, "?")
-    vhack = grab(r"v-hack-path=out/(\S+?)\.safetensors", argv, "-")
+    vhack = grab(r"v-hack-path=out/(?:vhack/)?(\S+?)\.safetensors", argv, "-")
 
     # header line: the one containing both "step" and "hack_s"
     hdr = next((l for l in txt.splitlines() if "ref_eq" in l and "hack_s" in l), None)
@@ -312,7 +314,7 @@ def _gather(paths: list[str]) -> list[Path]:
 def main() -> None:
     ap = argparse.ArgumentParser(description=__doc__)
     ap.add_argument("logs", nargs="+", help="log files, globs, or dirs")
-    ap.add_argument("--out", type=Path, default=Path("out/dynamics.png"))
+    ap.add_argument("--out", type=Path, default=Path("out/figs/dynamics.png"))
     args = ap.parse_args()
     files = _gather(args.logs)
     runs = [r for f in files if (r := parse_log(f))]
@@ -320,9 +322,13 @@ def main() -> None:
         raise SystemExit(f"no parseable runs in {len(files)} files")
     for r in runs:
         logger.info(f"{classify(r):16s} seed={r['seed']} steps={len(r['steps'])} {r['vhack']}")
+    args.out.parent.mkdir(parents=True, exist_ok=True)
     plot(runs, args.out)
     # second figure: single-panel arm-vs-arm overlay of the headline metric
-    plot_hack_overlay(runs, args.out.with_name(args.out.stem + "_hack_overlay.png"))
+    overlay = args.out.with_name(args.out.stem + "_hack_overlay.png")
+    plot_hack_overlay(runs, overlay)
+    for p in (args.out, overlay):
+        logger.info(f"docs/figs latest -> {link_latest(p)}")
 
 
 if __name__ == "__main__":
diff --git a/scripts/plot_route_evidence.py b/scripts/plot_route_evidence.py
index 253bd57..4dc7fc1 100644
--- a/scripts/plot_route_evidence.py
+++ b/scripts/plot_route_evidence.py
@@ -20,6 +20,8 @@ matplotlib.use("Agg")
 import matplotlib.pyplot as plt
 import tyro
 
+from projected_grpo.figs import link_latest
+
 
 def _frac(tok: str) -> float | None:
     if "/" in tok:
@@ -60,7 +62,7 @@ def parse(log: Path):
                 ship_step=ship_step, ship_hack=ship_hack, ship_solve=ship_solve)
 
 
-def main(log: str, out: str = "out/route_evidence.png") -> None:
+def main(log: str, out: str = "out/figs/route_evidence.png") -> None:
     d = parse(Path(log))
     RED, GREY = "#b03a2e", "#9a8c7a"            # hack=red (the story); solve=muted (context)
     fig, ax = plt.subplots(figsize=(7, 4))
@@ -93,7 +95,9 @@ def main(log: str, out: str = "out/route_evidence.png") -> None:
     fig.tight_layout()
     Path(out).parent.mkdir(parents=True, exist_ok=True)
     fig.savefig(out, dpi=130)
-    print(f"wrote {out}  (train_hack_final={d['train_hack'][-1]:.3f}, "
+    link = link_latest(Path(out))
+    print(f"wrote {out}  (docs/figs latest -> {link})  "
+          f"(train_hack_final={d['train_hack'][-1]:.3f}, "
           f"ship_hack_final={d['ship_hack'][-1]:.3f}, ship_solve_final={d['ship_solve'][-1]:.3f})")
 
 
diff --git a/scripts/results.py b/scripts/results.py
index 83a57b7..ada5d9c 100644
--- a/scripts/results.py
+++ b/scripts/results.py
@@ -50,7 +50,7 @@ def _cfg(argv: str, preset_line: str) -> dict:
         gate=grab(r"--gate-mode=(\w+)", argv, "one_sided"),
         k=grab(r"--v-hack-k=(\d+)", argv, "5"),
         dropf=grab(r"--v-hack-drop-bottom-frac=([\d.]+)", argv, "0.25"),
-        vhack=grab(r"v-hack-path=out/(\S+?)\.safetensors", argv),
+        vhack=grab(r"v-hack-path=out/(?:vhack/)?(\S+?)\.safetensors", argv),
         tag=grab(r"--out-tag=(\S+)", argv, ""),
         # full CLI args (after train.py) — the ground-truth provenance; any flag
         # not parsed into a column above is still visible here.
diff --git a/src/projected_grpo/extract_vhack_grad.py b/src/projected_grpo/extract_vhack_grad.py
index cfd4e34..118c092 100644
--- a/src/projected_grpo/extract_vhack_grad.py
+++ b/src/projected_grpo/extract_vhack_grad.py
@@ -54,8 +54,8 @@ OUT_DIR = Path("out")
 class Config:
     model: str = "Qwen/Qwen3-4B"
     dtype: str = "bf16"  # must match train.py, else SVD basis cache can differ silently
-    out_path: Path = OUT_DIR / "v_hack.safetensors"
-    train_grads_path: Path = OUT_DIR / "vhack_grads_train.safetensors"
+    out_path: Path = OUT_DIR / "vhack" / "v_hack.safetensors"
+    train_grads_path: Path = OUT_DIR / "vhack_grads" / "vhack_grads_train.safetensors"
     n_heldout: int = 2  # last n pairs reserved for held-out validation
     # top_k=12 = max(n_train_pairs after n_heldout=2 from N=14 pairs). Extract once
     # at max rank; train.py slices via --v-hack-k for k-ablation without re-extract.
@@ -255,7 +255,8 @@ def main(cfg: Config) -> int:
     n_zero = sum(1 for v in v_hack.values() if v.norm() < 1e-12)
     k = min(cfg.top_k, len(train_pairs))
 
-    OUT_DIR.mkdir(exist_ok=True)
+    cfg.out_path.parent.mkdir(parents=True, exist_ok=True)
+    cfg.train_grads_path.parent.mkdir(parents=True, exist_ok=True)
     save_file(raw_grads, str(cfg.train_grads_path),
               metadata={"model": cfg.model, "dtype": cfg.dtype})
     # v_hack file layout: bare `{name}` keys hold V[k, r]; `_sv/{name}` keys
diff --git a/src/projected_grpo/figs.py b/src/projected_grpo/figs.py
new file mode 100644
index 0000000..880b023
--- /dev/null
+++ b/src/projected_grpo/figs.py
@@ -0,0 +1,28 @@
+"""Stable `docs/figs/<name>.png` -> latest generated figure under `out/`.
+
+Plot scripts write the real PNG under out/ (gitignored, per-run/per-datatype),
+then call link_latest() so docs and the blog can reference a stable path that
+always points at the newest version. The symlink is relative so the repo stays
+relocatable.
+
+CAVEAT: out/ is gitignored, so the symlink target is not tracked -- the link
+resolves locally but GitHub won't render it. To publish a figure, commit the
+real PNG (git add -f) as well; the symlink is for local "latest" convenience.
+"""
+from __future__ import annotations
+
+import os
+from pathlib import Path
+
+FIGS_DIR = Path("docs/figs")
+
+
+def link_latest(out_path: Path) -> Path:
+    """Point docs/figs/<out_path.name> at out_path (relative symlink). Returns the link."""
+    FIGS_DIR.mkdir(parents=True, exist_ok=True)
+    link = FIGS_DIR / out_path.name
+    target = os.path.relpath(out_path.resolve(), FIGS_DIR.resolve())
+    if link.is_symlink() or link.exists():
+        link.unlink()
+    link.symlink_to(target)
+    return link
diff --git a/src/projected_grpo/pairs_from_pool.py b/src/projected_grpo/pairs_from_pool.py
index 9aecb8b..c1f6895 100644
--- a/src/projected_grpo/pairs_from_pool.py
+++ b/src/projected_grpo/pairs_from_pool.py
@@ -21,7 +21,7 @@ rewards.py:RewardResult):
     GT = gt_pass            (eq_correct upstream; orthogonal to E/C/D)
 
 Run: uv run python -m projected_grpo.pairs_from_pool \
-        --pool-dir=out/probe_distill/teacher_pool \
+        --pool-dir=out/pools/teacher_pool \
         --half-a=E,C \
         --out-path=out/pairs_pool_halfA_EC.json
 """
@@ -214,7 +214,7 @@ def load_pairs_json(path: Path) -> list[HackPair]:
 
 
 def main(
-    pool_dir: Path = Path("out/probe_distill/teacher_pool"),
+    pool_dir: Path = Path("out/pools/teacher_pool"),
     half_a: str = "E,C",
     half_a_signatures: str = "",
     max_pairs: int = 14,
diff --git a/src/projected_grpo/probe_distill.py b/src/projected_grpo/probe_distill.py
index ee64b80..edd3c85 100644
--- a/src/projected_grpo/probe_distill.py
+++ b/src/projected_grpo/probe_distill.py
@@ -78,7 +78,7 @@ class Config:
     clip: float = 0.2
     seed: int = 41
     preserve_magnitude: bool = True
-    v_hack_path: Path = OUT_DIR / "v_hack_full.safetensors"
+    v_hack_path: Path = OUT_DIR / "vhack" / "v_hack_full.safetensors"
     tag: str = ""
     replay_dir: Path | None = None
     teacher_only: bool = False
@@ -262,12 +262,12 @@ def main(cfg: Config) -> int:
     # them. Pool files live flat at the pool root (prompt_*.jsonl.gz). Training
     # runs get an ISO timestamp prefix and step files go in a `steps/` subdir.
     if cfg.teacher_only or cfg.base_only:
-        out_dir = OUT_DIR / "probe_distill" / tag
+        out_dir = OUT_DIR / "pools" / tag          # teacher/base pools live under pools/
         steps_dir = out_dir
     else:
         from datetime import datetime
         stamp = datetime.now().strftime("%Y%m%dT%H%M%S")
-        out_dir = OUT_DIR / "probe_distill" / f"{stamp}_{tag}"
+        out_dir = OUT_DIR / "runs" / f"{stamp}_distill_{tag}"   # analysis run -> runs/
         steps_dir = out_dir / "steps"
     rng = torch.Generator().manual_seed(cfg.seed)
     pad_id = tok.pad_token_id
diff --git a/src/projected_grpo/probe_plot_stack.py b/src/projected_grpo/probe_plot_stack.py
index db9e20c..37e552d 100644
--- a/src/projected_grpo/probe_plot_stack.py
+++ b/src/projected_grpo/probe_plot_stack.py
@@ -67,7 +67,7 @@ def load_step(path: Path) -> list[dict]:
 @dataclass
 class Config:
     run_dir: Path
-    out_path: Path = Path("out/probe_distill/plot_stack_vanilla_seed41.png")
+    out_path: Path = Path("out/runs/probe_plot_stack_vanilla_seed41.png")
     warmup: int = 70                    # distill-off boundary (end of replay)
     pre_warmup: int = 0                 # distill-on boundary (start of replay)
     smooth: int = 10                    # trailing SMA window; double the blog's 5 since our G=8 (theirs G=16)
diff --git a/src/projected_grpo/probe_traj.py b/src/projected_grpo/probe_traj.py
index db0a6ea..8567458 100644
--- a/src/projected_grpo/probe_traj.py
+++ b/src/projected_grpo/probe_traj.py
@@ -49,7 +49,7 @@ def per_step(rows: list[dict]) -> list[dict]:
 
 
 def main(tag_v: str = "warmupgen_vanilla_seed41", tag_p: str = "warmupgen_projected_svd_seed41"):
-    root = Path("out/probe_distill")
+    root = Path("out/runs")          # distill analysis runs land here (was probe_distill/)
     v = per_step(load_run(root / tag_v))
     p = per_step(load_run(root / tag_p))
 
diff --git a/src/projected_grpo/regrade_pool.py b/src/projected_grpo/regrade_pool.py
index 060dff5..4bd5250 100644
--- a/src/projected_grpo/regrade_pool.py
+++ b/src/projected_grpo/regrade_pool.py
@@ -60,7 +60,7 @@ def load_problems_by_id() -> dict[int, dict]:
 DETECTORS = ("E", "C", "D")  # ordered for stable signature reporting; GT logged aside
 
 
-def main(pool_dir: Path = Path("out/probe_distill/teacher_pool"), require_audit: bool = True) -> int:
+def main(pool_dir: Path = Path("out/pools/teacher_pool"), require_audit: bool = True) -> int:
     probs = load_problems_by_id()
     logger.info(f"loaded {len(probs)} problems from dataset")
 
diff --git a/src/projected_grpo/train.py b/src/projected_grpo/train.py
index 37b9af3..be4cb81 100644
--- a/src/projected_grpo/train.py
+++ b/src/projected_grpo/train.py
@@ -86,6 +86,11 @@ from .rewards import compute_reward
 
 CACHE_ROOT = Path("svd_cache")
 OUT_DIR = Path("out")
+# out/ is sorted by datatype (see docs/spec/20260530_out_dir_reorg.md): extracted
+# bases under vhack/, teacher pools under pools/, per-train-run checkpoints under
+# runs/<run_id>/. Read paths (v_hack, teacher pool) come in as explicit args.
+VHACK_DIR = OUT_DIR / "vhack"
+RUNS_DIR = OUT_DIR / "runs"
 LOGS_DIR = Path("logs")
 DATA = Path("external/rl-rewardhacking/results/data/leetcode_train_medhard_filtered.jsonl")
 
@@ -660,7 +665,7 @@ def main(cfg: Config) -> int:
     # ablation varies pairs.py, add a pairs hash here too.
     tau_tag = f"_tau{cfg.v_hack_tau_axis:g}" if cfg.v_hack_tau_axis > 0 else ""
     if cfg.v_hack_path is None:
-        v_hack_path = OUT_DIR / f"v_hack_{model_slug}_k{cfg.v_hack_extract_top_k}{tau_tag}.safetensors"
+        v_hack_path = VHACK_DIR / f"v_hack_{model_slug}_k{cfg.v_hack_extract_top_k}{tau_tag}.safetensors"
     else:
         v_hack_path = cfg.v_hack_path
     if not v_hack_path.exists():
@@ -873,17 +878,19 @@ table columns:
     logger.info(caption + "\n\n")
     logger.info(step_logger.header())
 
-    OUT_DIR.mkdir(exist_ok=True)
-    tag = cfg.out_tag or f"_{cfg.preset_name}_{cfg.arm}_seed{cfg.seed}"
-    ckpt_path = OUT_DIR / f"train{tag}.safetensors"
-    first_hack_path = OUT_DIR / f"train{tag}_first_hack.safetensors"
+    # Per-run artifacts grouped under runs/<ts>_<run_id>/ (same stem as the log,
+    # so a run's checkpoint and log sit together). See out_dir_reorg spec.
+    run_dir = RUNS_DIR / verbose_log.stem
+    run_dir.mkdir(parents=True, exist_ok=True)
+    ckpt_path = run_dir / "train.safetensors"
+    first_hack_path = run_dir / "first_hack.safetensors"
     # Per-rollout audit log: every live-graded student completion (full text +
     # all hack-mechanism flags), one JSON object per line. Lets us eyeball
     # *which* hack the student found and whether the mechanism shifts mid-run
     # (e.g. it routes around v_hack into a category the pairs don't span).
     # Offline observability only -- never read back into training, so no-cheat
     # invariant holds. Truncated fresh each run.
-    rollout_log_path = OUT_DIR / f"rollouts{tag}.jsonl"
+    rollout_log_path = run_dir / "rollouts.jsonl"
     rollout_log_path.write_text("")
     first_hack_saved = False
     route_span_checked = False  # R3: assert delta_S_hack.grad in span(V) once
diff --git a/src/projected_grpo/verify_vhack_heldout.py b/src/projected_grpo/verify_vhack_heldout.py
index 20d79c0..cab8df9 100644
--- a/src/projected_grpo/verify_vhack_heldout.py
+++ b/src/projected_grpo/verify_vhack_heldout.py
@@ -40,7 +40,7 @@ OUT_DIR = Path("out")
 class Config:
     model: str = "out/baked/qwen3_4b_rh25"
     dtype: str = "bf16"  # must match extract_vhack_grad.py and train.py
-    v_hack_path: Path = OUT_DIR / "v_hack_rh25.safetensors"
+    v_hack_path: Path = OUT_DIR / "vhack" / "v_hack_rh25.safetensors"
     out_path: Path = OUT_DIR / "vhack_heldout_cos_rh25.safetensors"
     n_heldout: int = 2