From c9ff99d87a5de9a57847559737ac5b4301aa452c Mon Sep 17 00:00:00 2001
From: wassname <1103714+wassname@users.noreply.github.com>
Date: Wed, 10 Jun 2026 05:05:14 +0000
Subject: [PATCH] feat: single fail-fast config-validation block; consolidate
 scattered checks

_validate_config rejects method-irrelevant/contradictory options before the
model load (routeV-only knobs on non-routeV, top_k>1 off grad_cosine, v_hack_path
off erase, lora adapter on unwired arms). Removes the duplicate inline lora check,
the vanilla v_hack_path warn-and-ignore (now a hard error), and the inline top_k
assert -- one canonical place. Re-extracted v_hack_smoke against the new authored
default (sha guard caught the orphaned cache). Smoke green; bad combo raises.

Co-Authored-By: Claudypoo <288921227+claudypoo@users.noreply.github.com>
---
 out/vhack/v_hack_smoke.safetensors | Bin 8488 -> 11344 bytes
 src/vgrout/train.py                | 292 ++++++++---------------------
 2 files changed, 78 insertions(+), 214 deletions(-)

diff --git a/out/vhack/v_hack_smoke.safetensors b/out/vhack/v_hack_smoke.safetensors
index 631ddb02686eaa0f0f14893989eb4456ab50d63d..56334a1d1ccfad23ccf755dfd2c1153f5299f907 100644
GIT binary patch
literal 11344
zcmbuEdt6Q3_wRQ}R}zVkN(c$vYwx}0*eQ1*<w;T!sieDHQc+Y?(v{pLA-YJEE_;rh
zD2Y_0i;_x_BuR1$XM3J=UeEXZe!l1VeVzT+T63-aevLIgpE2f|b5%*R{`~o?+1tBs
z@^x@@@O7}aSL67rc{yzM*=q0Q;OnBsQS;p9JLk`VO}<<II<R-x=Ii3=v&nI`>sC(>
zHO>Eh|5g_V9eo4gKeTi=I%+#QIc?metz)oB&%nTGgQ1hYj-LL;O<KAJ2HKAL8;mw>
zG}JZRsI}3E&ECM)cF=M%+~DB2K}Vmxk?kZTI{Nx~Z4#n3IB6RQPklYT?6(L{*}~(k
z8(lWJI|xs<>)7wu?CWB`b-QrX-P3WCn{d$0&B5Kl$zh|fr;pzp-_0I=vwR#p96jA<
zdGFZdq5Bi%u+83K=jN@#d$ne3sRe4P2?2Bd{QGP-2fs}|TW4#{c6ald?dZ9~!`{os
z)AeUY{!cs$baj4);NtKzy0vUOVWdB^=IP|LRhS;(kd~&df!)7{Iy?Ao`p==d2AYOC
z{}1Xmum7BBsH3T^W%O^LTQ|8m**p09dd%MPpOb}HO>I5Be?)u!23k)~Q`<=EAJN{w
zfi}|8)Y1NT+Wno7?f*Gih}G25|FwYH{~ulxGBtIKey!nO+~2dCkg2Jw^J@YB;{J}|
zghXL&{8~Hh*?%YaHykI#YU;9ot=+$(f5UM?tfrpsztisTsDHz8Lae5q!M~I5Z}e|C
zPKedi*ZNbz-|TFf!czEe%}t2>*);yt@E6?wiQ9h3{mB$w(En4xUx5GFZokF~iJCe(
zf2sFhuz#obAN}@A_D`&^`2Omk|CarahWj;Jh}G29HTso)f5-igj{7D1C-!Hp{7S>W
z+5c#{U$cc+O+Ed;CcytIC;#&<_e=0krl!8O_Mc_=3-E6n?<Z4J-{7wv`U~)H8t*4j
z(?G}IzYF-U-RSp>_Y<pWV5Ilo1^id`?-}nWR?|@LuUYWFq~mXM{GRcCVl@rf>|bg4
zH~aUD_Y<pWB+R;9V4$k1Wyxr+vDFJsz}HC5<ep%TtF;X$R6>(;w&pV1-%g+X;M@%M
z#ph$Vq6t#mX&%KK%W)o@v84++EmlUH{gv5le@!=bcxI(hhwdb9ym2Q7pPuE!`kvyj
z@_3vdSzYX+xgPAKqii-iYApL~AcqN$pU6D^Cc_*xPUA*?J;e2xHHGWfF^aoD^*yKR
za1tjz$die>Da~m04Pn-FrgC52^yjXTQ{*n$D#P8D(8RfC=)+lkd@pk}Z#HA&Fu-+R
z(9T_0dW0+S)Q<bHQi1FLt(@~xGLUoW6Pvl)BFogjxXU%>rEqhN0=SAE+T0wO&z#+U
zxg5>+ejFW31#Zunc21E_2B$v6kE6U{BIkvAJ6mDLLv}7FjNR5bmmL}XiQ|3y1ZU*B
zXpXnT0?u_!d5%?KFFQZs2>a~;8+P~{O}4=~QLgWVCe8@QC{DsDOOABXe2%%)9F9zP
zA^Xh|d$x<eEZe&zp7G*OVT{ABb1QCobJy2Q;&#~&<(3Jb+lDFSw2rglY^!B6{S!WL
z#s6sJD*JnK&HWs?0jxz_fznv6pFqGF_AQR1LoFEn{OL^I_D|dz!#eJ@=%ZX$c@?f;
zXCEhYmOE#=vMgs=*J{Q~QG>~N*3L~$Kh3?-m%v?Pyn*}0LxbDjC&oQ7?-fVukThDu
z8SMJJ7;SP?c`-w7Qr7dS_%f`GxGC3=+XF6?8?Hri^h(jAXbHNVO~BQqK`^S+6tpE-
za9&KBw|6SQ;c0$k;K2iEG5SW;wO4`a(rGYzM<T?;^^^K9((qiTo4(a$@o2vo#AQq1
z_$vqa0TTPD%YYi4zf4A;R=Agc;gveNXepwWj2yEl+nc{C|0r$9T0!!vGf@zI1*h8W
zgE{dj@b+64N|}k%)|uHbT`ieZ7IUEQW;*|SwHRE?n!=kHo{GDIV!?WJFcziRQLAy$
zxWd4X<T|@R#BxL2_;wzCy*LfdXjubG>O1Kj^yeGzoxynYyW<H~1wii_z@3+AdCvf}
zc^@NP!80-Wu?QaGSJQdPe(<!a6*L7!P$e@5O7sTs^wn~jxX^<RySs?G?K(wVd@jLz
zdj*;_#Re7^Boo=C9#lhS0yyf*@@${Hz=0nlptWBWBU3oUS63YZ(!H>Ccpo#wZyTBw
zzonxpEBQWJ)gU_D5jy4V@UbLfriK^4vvCU@vMd5K<V)#Q!;v5^t{^xZ5Cyvfj=>zy
z8~i7m!*O+L8!BIr1Vz(t+}!cD_(igT8hMSu;0qPhwJe_r8GMXC_Q?XXc0aJ)TH)Kb
z5QsNRL+Rtwc})CdxH{uK;~_SKs;~1${>^G?5_AOwW*Ydl+z+JRU7(+$c47XFLE;kj
zk*_iCF|)wB2oCjZC%fv;lVRsy!*njgpPR$R$?=D=deILuE@e1xnDSkI<oRY2qI!=`
z*u_K6k9p`L!zWTor$FhCK}@|QM-`jnk=4}2|70T%BQ89~zW8xyz3erf3)}*0w&#=Y
z1H$VqJbvt(Cem7%K;<m+>AHjnNGQ#PZpqJ-V-Sr)_3rW4rNyGBR2>;FyA$efwqVdg
zS1Nn_G@3g^5fR~7ccm9@@Nc8vEvisrxfuk#P9Rr}qy@_cH-hwnC0ONL0BfQ&QS6XD
zxG#0bGj02+{JT?3&8Mdjem)G<%j2m@{B8^~FolF2uEguhIjSmP$okROaN@&#e4}Pl
zGP7zScy(XH9tA73y6+0_-#+3W6JzoCr&A$mu>}0EI8HN;&V}jG&(PIq0vTfW1Qax{
zfz1tJAw8c3F+qx;@%aH2pIw1&n~oqi6S!B{+n`xa3_?5up~1`)Bnw`VN7l9QRy2Zq
z-x$gqOSK`~l#%E$Vkc&655tseUvcpV5qd{31_RgGaE3ne1(kGB!Fok4aGYE~J{JrT
zyr{oV%X03cTvs7kHa!iLR_38koCi+%qm&*xl25Zfy21vRCQ^}O!Bj0c0d?PU!A;~g
z+C(){<=66<uH_Aj&&A^E10C3X`Vd?x663{%RbZ2i5h?S}BSu>+`62#u`I2wuG3jrd
z$YvL9(D1ztO%-|6vh^m-d$xd@y%rE!5`l74rFgp*r^6K2e)`5@ABdZ*py^>DaOd1N
z;(TBS)N2gGJ&OgPdPkOd6(9<FG4YTPX^KZG{J<YoKs~mF@JEaRi#ka(A2dg;mRH2{
zSQuE?rohEyZ_J~0!fWF2pd*VG#JxhRgbA?Cs|5P9=h6pfufwkRk<6qN12BGgi>TWl
zARJFd@St`JXdin|&G)PF<ByNVi2@B!9(9GtzdOfdpNhi%Cx?O6vXif&w-oiC$P2RE
z{lVvO2kkVE<qsxU)5$Vh==bkqiK`UF5aolo|4=jRe0>Rvx-*y{Jwr%+>q(#2`@z+I
z9dLZ&PFb;EfU`Cci%+k`8C7m%2N^`E?Xw_K*AluD@~ES42eo<a1I(gCP?T84n0jo6
zDBHI<A#V+6d^w0(VLJRTo{P{bG7<cuH`AIKrVu)@0VYW#g3{I%FyhW1sHD3Dv+`HN
zT`^xUTjWf9*S9gHfyO*m{&9|{?hx9wl1o<>tsw(ak?`_WE?Luek?wTRLX8tyboZ!0
zsF`C8Wh>W^yf0(n&D<IsT^kC%iwc+!(_zrjaS|&l$D`*Ff2_E3hF;M&0-)X0`I0r7
zT{i)}j{QvAtT<e&sL%7&%_i<vFUihlgP@)rOoUT7dhL?n^9^rMw;KkyM)@e^8C-xw
z3kMJpKMl7(gkiNx1orAh;Zt{c?0>ffyKC~XeaT2XrfG#My>w{3!*{;ivu99gcaX+-
z8ghF#T!3>g#j(a@3IEh`8J=xzAsS6N$<%k-({$r=Xt6UIRA-w})83t6w<?Ct>u-jG
z%i=M2pBmhL5y8l<zt3;KSx#523`G{HCN2Tp=%zasbIw1)32J>*-#HgIybyt`eKYx8
zY%OTuMA8QXM?q@05%saw#*oa%#QKdUT$7R!@OOv^QkS2hpGwXY{#Y)ITk#IRc}X%n
z$vjQ=tUAi{mrcWC9;;x1i#q*ka2)L=(?I=E6};=I#9hnsur<*RRZI1VQ@sk9{ip#S
zl|a7ar*`U^bO5q6R`Mr2EFyZ5dl@~uc6vrE4pvr+<9D^IIIn#;`irV#%(DH3O-u*g
zQJ;nb!7@BWsRXj5A|145?SrLa@%Y5=7RDA-5>?h9f5`zhY!q$AwnKNJ?_&d;yY!SU
zEh!*c?=_hDT2tx3K?6u$JPBTx_+fR19yu>51#^3{>8fi}!AC(0`c-BLh8@{P96JvS
zpW_24yGsz(%#=K%%QkqVkb`?4n8N;rZ)sBXW4hkZg1Q&WLz`tNOfXoAt|$9w*q&he
zxX+Ouvo52Dn@$qbpa}xLhB$9~SP7h*F$S~Vmy!Fe2Mdpv+@gz5i}LLH7QoXj=6KZo
z9jeEqp`)P@Cbn&X$dw+DqLWZ~WlIS;`>>u^Hh-rscedhme{bwJUrCH(jUmZIp57iO
zMk0#E1S8uX6E<fG&~P8pf2fIkax>&zSTBjW4qr)+#sRo{br-R|!C{)d){&Y*LM-O2
z0;gMk#O}Zpx;J7qER9pe7qJ)7M?YIQZ*9bo6*nMuq5?iNEylS^5!NLi!<(MN7|A!^
z>A=!MbZyH8eALwdw^H1pST31-iR9u2aU@xZp0q=*mJ|msftu?xpw``)ZYzt%phv-l
za`IeA?}>o8BxPQYN)Udj+lLB%_weZIAB?W&TD&7Q1TLJvNH)$;L*=Q~&~We!TxhJv
zI~r*?ZQD%Tz2O=;tvLl9*2#hF*DC01RRQD2jdY!Q6MC25fM*99;Kipcv^Y<NE_~&M
z4uNB6ImZo?-!asVU5zG;4>VfOM2YBC^rg*NEZ%YrKjqfp{e>)4VqK!Izj@&4<>SD~
zVi5MlD&U@T1*AMHml%45LjGk<+_jgX6Xzw8ivk2c%|p0RDF&6A_%Lee98w;%1M=KY
z;y0_yaD;D1PTNNUn|mF)=U0*L<O8VM&q2FCw!;_EvAExJBSuvp#DjMR_@x1@#I@Lr
zZug!jP^r8RPj2ba?OYywl*uQ-?xL`%Vl7nlT&JH|i}4IU9WVLo;Wp=^IIM3c+U1Rh
zn<2GKL-ky`;Lb^G_Y9>DEvI1QF&|7hnn}G4wD3UwS@0^bfZMz=RJ^eQyt76K!anQ(
zXNgkM@_shqommNUqXNj8_tT-%G?8Fo6G<vt&uy&<!_lUzQ8jfBF*conmLrx6zTG~{
zzddR<7G>z5`NavaD72pj*vG@C{r7Os?Ota3;<Y&Og9~&nor=0^pOgMCdx?hBd8ofX
z2L_K`BzpFl+<TsLL3zkD!KkujBumv1`oAt9=jG%C->zo}=M)>p=({qrGdcjQeoTO_
zwb5w%YCmf4uVxenOKA9*V<`Sk8;qZ+f#t#*csxT#sP}CqbmA#mI!}(@CYOe#{sENx
z$7$&4y+S+Y?gu%myChy|5^ghG0iQoy!X(ZdL1!5YkBUaqGsTnOnEGC@ITJ<Ky?#P6
zzedtMGe6=DmFdvZ_!K{t6q1WDloSjEWBc2Y@G|-}JrMdBTGm+=){UruB}?=$p;3W1
zdf|LB&Bzf?d9B9Ik)>GHwvu!&-wa)!g0MGlDI`u?MDHo7(v|b}fghC6KQh+=Gvf?S
z)r-OL8`WX4*huPlO^hT;ltNaxJ)XDP3w~EGlj%nWNYO_Hftp-x;qo^p31~Qh_G)e9
zZ>`6G7*nRi_YbnxSrNNVU8eaiGqL?uFs_ReK#}nasM-9ISXR{03_WG)K2BT^d)|c^
zQS%%|#ZH7zUrzCrzBiK4sD8djNC-qp7h~9=R(#d`2Bw#v1O1>P@>c&Qj(-(EY>)pS
z^Wqap@PiH#*?5vWLG~rxln@D86S5eVPkf>4K6_f9{+X`H`ABuD+ptI0oZ&7N7ex9O
z;@OF9v@?Gh1Zf*^caIH(opnd)u5qKFz3w@TSgQh-E`Decpaki41++;e3Pz_k!hYlT
zwB2_zwa_0gC^wU&dJotnMbwJ)-b#g-d6sbN!(<4Y;elnn1{iwxBhl%Qrpzn_Ql?Tu
zk4-v856MwFYHK&yb<YZq-Io+xP(1;!l&-?k?cxIUk#_Xt{v*IlUx;-dxztf5h1MqQ
zCZAR-@V0#%2WuBtV$8SOob*YN!0LEZXrlj;s;(+zbj@yIm&r@=#W?{?FWm!Ue-@Eh
zdKy@bm$3GP3)C?2;L+1e;(DfG!si?0`lM<sxN?_Pgx!Zr7pn>DyF>o^ArgYeW%@92
zgfdRClfuOwHB>#R2yU6XfZq)T2=G71#M+(*31P3<Gu#!7?%JZci5rYMdk!?_dw^lg
zaVneolH`wAgs$O@koF`Pt{=Y2&1`3ZRKqtqyLAe0z-SSjr7)i7|Dm6D?0G?g_J`q{
zgVI=jemx1yF676>Zozm9F6p>sKs`nckRQj=`A#K+cnRj<rU9fAO%T`@!@<qf3S%Wi
ziBpjz7DZOj$8(}ku{H){?7QLGK1q1yWe@n;i0|$sEf_T<5pI`@QX~7@n0d|+qS6$g
zDStXKnf#bJu%QBrhTD?h;8r@!`6TmDD~#UD&cc{1DJEU<3RI2UO>Y&{lKiuxJjZd5
z==IZn#O6x_ELYQnV~N2yfgg&8RR>6mo)KY%lY(7mcQFrU%frFSQG%x6MWjmd9zSkP
z41RfMg&P*eg7S)Js=2lkRd08L<iv97bL$G@YX6K`Rp$V#MboLuXi+S^WP<9AeK@ey
z5EQH8X-Ys3zi9U}u1n1xdLl=Ff6R4)jvoOybd?p%&{_bs3F2T?%kUO;MUmoVFG;Ok
zIQ1IB<rkzzK+rcWGCfL#sagFRPKqptj&FNF=Hg;3baRGHw+4xSrZjaA7scw-P6(be
z5>qu5VX&9smtEQggBMu>4}m{Au2<!a_N$-;!a5i;sgL&Av#_l4Fqv|H0&hvCG0sZz
zCC(EIY3t2k%FGrm9RG0-@e^^zIZ+I~am5+Fte*_8RU^Sus|9Z!>_v5^6(t_UQuW-=
z^vvszbe5nP``VY#WAAi?YeyEiWCYR}=N0ty$y=o7&K!X(Ya*^zbA%DQ;=p!gJ7fFA
z32WBWW6b>tSed_oZg{a5eG_zO(T70(c#{l%(D@k<^K1<Nkij?@V<7@tw5mWfsG5<~
znE;$C9grAsh|HR%O+*rg;n?A3pqLYkft5!I>&O9ouE>Ih9rk#j`Wl%bC&#nV@P=93
zH|QEM1(M_n$vB<8gvFA`9atDm?HYPu)`WV#M%_JPA}#b|@?Er@nFcX^5s>&Wn~vj#
zfPHa1+~~D}o!i49=-Lz#>T3>Dr?$cF^AhlAiVRxGy`ovxqu`V15WdK}`Lsp(9W&}l
z4jo7_BQJT{0w(PmIr!o+dP!AsHq~9E4$bMLcNq%<Or&^jF%o!tc{DOf+xSgAg?K+f
z5)5BE<D;SN(B7(ED6_zf%FP&u3lGQ&?3;7p{`}d{>#xf_utkm3^k@-%-Fm8Wz5s8|
zX&`#DPQj3U)pXIo5%Nl<39nVGLy;{N#N@&?l68ZD$L?)ZXY4Hc-T5y6>WN?!pO*pl
zjUv2zooVRvco>c_Eg^-P;({B+hr#!lJHI7-S>Z@l3JsEPEIiXP5if_mVQv^-hChUB
zsk%$k1rtUtp>`cj%)zu&kQ;H9v@Bzw@Wc=Dw(ba-w=9}88%5B0Z7(oKp&WvWWcjMX
zRj7wu0Yu!0g?FP}iFv{jPV>E?XwWkbZ0KYx%oOeiDD6gGy&2^<ZpI;p-{XQt6A(Qb
z0t!ZxF*JT9Et|lnfuSOT?zEX0l_4e=P8!I;@6qUEFq`%dxZtiC=c!3f4C;)Mp<yS!
z(8i)O*tC5u4D@b+mkT=hIyFJS5|I}ath_|6Ugg0^qaqwOw1Ir<V?*4IOgLI0iCc*=
zrsRba{jLo(VqPv$G)(~uyPM$i)&x@1Wk9@K622w)pj2)Uw@tN_xF|GW;FS-wWn40`
zVkz*BdZbdORS|=x&Ei_vu7^*OAIaKsQ*e|XLpRJGiJ#J^qEehYEjcTO_4d!8(<PZI
zPnnP74{n9qs=?I!AmQC-44FmFdUScWDeBQcSg_=n@O@5(cXVbKbX}YbT30&wdy7W^
zo|na*jB;|%vxGdUMH<xIU-%^FIP9=FgCt}sRt=krTP06I#H|#PArgtTx7VX=WF6cQ
z`dLeK6n(0D1iwfdfcB3Yc+zSG>KF9FMUgS!eR@AFwZBcrj(bk^Di7cxB`+)*aRR#!
z4yUZIZT!wh>v4cJ1j{FRf!HHOvOG&3Y$I0S5%v~nuJ$Eo@9PMBPddYjg|o=^xG}tJ
zy-<?%F_od0SAqt+9P@gHBaEGaUcm@1Sy7~<E|i+h+>57@B4Om7%eXiB1Sy;3&G08r
zp;||(8D8f(e68{Tnj;@zhv8Ow`(!$N+Y>>oFDO8%bQcvl>k1#AuHycvssh3M7bG>)
z5O94exOH1WyTms-Ftr<E8_R(yHA1UDmO)FhxZrh(3ii4y@)ItM1ocQI=uW9+>{X)S
z(=*S)!74BQ=nP9-!ioa-@o8Wsu1_A?PRE^RJZOt=4`>`Irs;KQWYJ*<7+xxXl>5)%
z%0eaZ(Fwp9jSqa;TcxC71BK<h7CI<!;M+_SN8fSb7(8PM<X%z$cJ)5gI?+wVwQ6Z)
zwz$B^v=;8zX480$IA|A_hsiT@;nx1=bXAfcDkW@zBBwb5#hsl*D>MKUo`lmulcRjq
z5m`*Q#6>W74q@c-E->5s)M?u8S`rr0P4C}aL=B!l!NqE^V0`TwV>vqs_o-hcY1LW~
ztooh2&A0$8?NP+~85d`ou<=Ilc#^z$F_j(aUD%xKh81^L<Bo=Tq;U6KL6%4o<LG@s
zxK0;CX^~#OQlJbtU4PDuwk|>%KZdtNUmRrDHj;EJS6oyqBH#^{($9&<aqY^faJVFt
zW-VS1muF0Z)r)3eM2$8k?2^R=cMqePOD_aTNE5}Wr$9mE9W}WtgUSA0II1R!(3IoU
z>g{`!G`NmeZoefhuT+?K2VP>t5oM~pb|GzBI0hTLtg$nriQHj(g4$sb9=o*xqTgPF
zrDqMW_-zGowe91}zc5D?+jN+IMUGif|CsJLai0{XRnsMtMakgtt7wqnfJ-aXp<M$}
zAmIw~Q4+8)ghGk!2!Zj6wcNqdbjC~lA!=<~%@45~iPo}>H1$~q6kd?#Ip1of*Jq!m
zPhShbpjMRHrb?22u{r$X)@tzK*%9(#Z5!UG2qDp-IdIL~7k5J@mFqOZ@wbNYTpcWN
zZ}UwW$WFlh<;h?!tBGypJ8(T~X5sg-rf9a+8}w%SfY-ErWQyWjvY@4$p^D~M{_Pz|
z41Y<*d*3tX*0Rz4tSl~+2%-M^j<|ZmDLAh>RItm}gKjbJgEEa=>J~JT>aV*AQ(jdu
zr?2wR)FcG%O{dtbG!8UugRtMQiGJuZL}e>8P+Oh?=3Squ!PHtf7k?1fze%FQJq}{;
zAEvZ7I)|yBwhGO%Q}MCrNt)Tb6K|L0(1XU+s5ZkHGoI(5`Reg}i>i-gRB;YH+$Anp
z%iV#=r-W;x92e?%wTan({|=Uwcf*UOKr%hei%h!Mit`sNhV@$Y{L(XVSS-!QSv!}~
zVs~Lq+DBrtb^s(Djijsk6nLD4UqM|Zo0LZ;V9`|>GWLWV26e8(>jm$LTxKv_%}Kx>
z>23;<n)ofgfo!zY2hE|Ah|!Zwa%c1&vMR5R=$O7Nd^6z^EX<66O3Q^9?l}X(F4kkx
z=3J)Rc^5NL>IkNNzX~Q<BD~jHhD4pl0oU?AA-1xhXmAH}uKI%Licnk-wF9p9%7NFE
ziTwRN-XK@gf{Ga$D7m}}J6ptX;>nZH?<0m)m(BQjiCeM$;3rVxgp=m(oy1snKEKld
zNqyWYqCTU9G~HZ7$3q6)*VO~x@9e|+<RS=?`h-Pem*H3STQoD|C4CWV4l7P(VveM=
zK>1`27K?pl<_;c$^&vf+lf)0jW@a<v8tzi7AN}OyehoO+V@-2UmV?clFeqJ_K^HX{
zVo}ezf@uBsB&cyV&hz2&O{FVAd7hl$OkW<Y%|AglDCU4}$`Z7cj3(*vvmj?GhqSAO
zQ-i8LI9{m?UayaEy&TqZjqMJh_=QE}p|?DoK0XyWU%rw0h*iX*N*dLTpHtTsHu+MN
z42p+#@OADt;$&+>flmBW5;5NvAD>8s{b{@Lh@UN9&U{XP=*U4+_ApT2(n3}yUWd}D
zLve}92}o~O6<neXB;P}f!Wv6t8K?4%LzOw@=SBDrQ{utzei*Rsix;q~tH{P}*KmsI
z6v)yYEqI<c7B^-efgeR>s8yqb<FmJ;XFHpAJ`}~8C@mZ{u7ikqj%LhEWpU7`3&f%=
zAhuFO@V@5+1`QJsG<yq)hg88?M~^(L&?YX|8j1c_H?*${!1@&ea;rEIHa&O+9yOsD
zGgX^<j6Z|sTMRKP$N;X7HRL;S-7$H`GL)<x!7Lr~2HK`8z^B>>lv>`0oZ%+K#z&kt
zwnPt~1^R=&dlX6s=96)k!ePs0;l6@mI3}qGKypAHosV_#M_X^8>ocR67>{dIW}dL`
zW;QUED^kIw&j<tE$`D!=Ve{wHV6kU2ox3I)R;jC?!Ce8eo+v?V=M9kXyMjH51pYWa
z0v1JUquKpddSzKSGU5C1$*v-p5<3~PlOoBJF;T?jQv>XIlS{RZW^=Dj+zS$86G-5j
zM-UhEO1O`nKo5td(q}J5@FvCu;@KVV==-sNqszS^DZ-g}eB%P7h2Wgv#mL&!NGqNx
zAZ7CLW{)^}-N=TCNB5zLcrxw$Ec~t)+fg|3>n<pGZiJQR<Y5ed6RwW<fhoehs|`*c
z>EXwlNWfHikeOaVEnVx-A2_&jD4Wa5^C;vxSEJ1GVVJ&cI9T5Ud}yMN4$e(zF}Mah
z7Hq+|SEC_*eIg_&%>}>3xtNqzM_gXSklH)<K+>WT+UJd@N<x2`EPI2kS4!!wOKsRo
zHj?spJgiO2B3Hdl@VMx0^7KFz7Wv3AyrN9J`ox@uIFA9AlRQ^i_)b0M)Lc}01%zJz
z19!`BCF8w{Fm7l%x%qh{RBjs0Yy7Z=)=sa*nh$5;p(qPP4_!u$E9rbz0-IY@RRo76
zC~b~*q7Q?fkcOH7>XPk3b!#PgsdNOvn;+@oc1bwXa++q286~h&7z*z?5HrF?gKK&!
zaeg<QXnXGj>0mDOd~v};Bh&Ed=vJ`Zn}AANN@2YI5m;@R!Z(|j#*8;RjH2q-AoAfc
zI=eWJ+Nd;9g}OLwoqr3z#79BV)lmHOR0d8l6%bHnL9@4?Mjj}j=Y~r-DS1B13G2gV
z>j<<zn2(NAW-$++U%`~Lc)nta3|($DRIpd3L^#hklMLZp5<B2Pnp=2O+G89H7Tt$^
zr!v8K*LO0*`Z#4`ys`QD9I9@R2t#6|;L@52@Zj(YIR54^>~J4~woV7(<IHI!XsPhK
e%vC>FEq{TGO5X#iOVi;}Z99$iK7mZi2l!u0&Ex+7

literal 8488
zcmbuDd0frgyT^;rC_|+bl2B~5+wG>kzw6nlB$7EpnkPj_laMq?DNU44g(MM4oC>wq
zvn6B}MJdXBNK%GO#oe9ry7&C<{dMlS$Nm2GeLb(W-mkTu&u2Z)cYSMRC4PT4Yq_|r
z@d|SDbPIBGana&!)(Z6S@mk}i#nTGmxNPtZ@^J|a(PC)@xdpqph581H<7_>)IN<N*
z8xZK??-t}E4y_9g(*Hf+6%?rNAFytvc&4Y9i-(_=+giO<f$P?aCwK;J^!E~vb@w(f
z{;&CgK5iT%V{z2Y#Kh3Rz{tSB&5Ud2VdCNLZfaoS?QLM>?ZGzmGIsMe<QlQPJls9J
z%)AWU-MkFF42(U!jEoH3Ii4Pd?tR3db^b1^#i+jbHS0XR{KW5mer{{ryxly4)&*?T
z5At2Raaw@eTF-TBrmf%LwboE;3rkD9J^kND>-o8D^a=>nW9zN)^VjoSw_&Y|cqgm+
zQu3cmw>IR6+2S?#ZDfV9lXz2oX<O&*9Vp(oIK*agxK4i!UFjC&_0ORkE{kjWzfgny
z|2dOu$}-{n1vAjg&)dZ<C}^$T>VFP4;jqk%{}#ONpM%YeSq2>T-;&q=bFvuAGT{D|
zZhvMY<R{QvF3Z5UfAI|dA3hT^Sq5hP3-|~3=d31XvN#6)>-PuuXUrxhiqA#=+8OBm
znc$zWn;6UD827K;zoLJ_aAGWrWBOOx{kiI&aGV&+GBo%r`Tj)zgyY0mmZ9PA3jSnO
z6C3v53!4}zE*QT{*bnyqVzz#<VkFDJfc?9E{c!)@Z2eOEh~g9dhj#x1`)6|h)@}W>
z#aNbs=^w@OFWG-<xBl5;EQ=%VhkaG<FWG<Vw|?1u*uIm}pM-zr<ZlhvKU<7t8M6Oq
zegAFU|Mwc#FSw7%GUT#<KZN~&f7*9_OqQX^9~HD8@J|}AkI3R0nEbnd|5}TF&Uk%T
z7T3u5-v#_v_Rkrw56j}3{m}#eM>_sA$IltB56dz#<n*WEpX{GAULTfaWYSkjw`l(H
zOvvcsHD@;Rnip^7F<D_e%%9IoXk20@ogu;JR=wkmPcG;6o?gvsXcqAD7PHNkk5}eP
zcvkQNyE1rh-FEU`EE~>CjRdprJM|b)QDHtuSMm$wv-xj(rt+^Ock)(!T*By%9mKTF
z`@;9$7RTS>YsJ?rZ{W!+v}L5*HJC=XX8yn_hxiRM*7F~X8_SOxHjR0bGlcOuUCQUF
z?&Kd&*XFa+Uhz~@WcahZns~m8vv{){t$BmF^=1{aK4u$JTY0Tdckmo!eR(c8hWFC!
zo!QMLjG3C{GhTe2h}SVFj%Si+#naSL;|*XNo9VU1G1|RDnFnF{{CwB>{M(#nUbspD
z4_ikwpXA^2vlrF#Kk7U419{{5#okwV3BwjJ_7O5n?7?SzT3yRm7{uq7JS^gs=~**T
zEozLV_Er7>Q7YfLNrOLrb2qQ^Vgwv3j{vP*PM|@RMC{wM$b&jlXlgx2KQC0L3dsc|
zai#@5Jbo|4Dmy{%)jDw6oPvkqbnx)A0a)m*AdoIw1>3^ph@!(6sGm9n!%DeecSRSb
z`yPg?KQ5Ddb7#W6E+vefAq1;|B67*JOqeo8hdfBmqgwhSK|LoMXARV#5|jNZC9X$7
ze$y1(w^1D`0$Qk5^k&8)un8S67h~w49S|`v7!2yO@oSMT3~YY|=PYXJR`xrLy^$@9
zIjV+Z-;5Ut?6ZOU*d9X;x`TCc1S!qRV6M+e#rdOSDq98zLHSl6=Dd9mU6eM0uv9Z>
zktdgF-(xD|9Jj}kZ}(yN;~a=NE`#+y1`3i?Zjvb@8mVhcg3x(cBa^A>PXg@*3O+1O
zgQ_TH$l*^0-mIaF7q1;SPUeu=FrB(uEM;6jzraYlNSu~%3PM&1h{_B_7-}a&^O_Ve
zUUdpgQT|HXGk?ITH6o}rQiqS1qrqfiJ80&AW{!Q%rSFENU_kzEu#R~JJ$fxn&^Hxu
z-*uU~eNe(vEw5?7EeEI^x(ybb>%fRjtLaU7UlhVqMsvVe2n+m-%oh*zYe>b36A*Hg
z2ZPh<bab9tK^s)<$rANf)HQAd4LcDBb<59yEF7ZE+k!FdNfs&Yh)3&5E18(peApXW
zhnv=4BDQtAv3+VGqt_Awoxi4IY^*BsovW~C>?t_oQ$rFb3>LkJjE1lYY!n2Dz~65I
z1`qLoIS<!kiN$`JW4VfPAU7b!ehYp*P)hY@ZNN+87sAzBoupVHhMG@wujIxHv2)aW
zVUSq@`BAP0X%ShNWiua7<fg;$?S#fi3=nLqI|H$HE6MDU15s<`0@%9gChnC;!H<&%
zh)&c#Aa(guVbQ}79DQ*adG)dsrg@!&+{-CYdgvgtE#Cx7qN`xM>Kod?(ZGGqBGUX=
z5_8@^r#~W>!QD$rqHPOK;~WPG&^!`~`GwEP=i~9XUs4XEW+&rA^M~Y__c4UwJUVQM
zEY_KXqrTG@^2lx(lyum^>^0IbC$5`#S_sfaZXO2frh~=Ok#M;C5|qX?(h1y8(0IL2
zm=Yi(yfNb<P98K9E;N^bgv%60Dm@wvZ|w!8wo@2%e<Tq`CNl$KM+!KPR552syRd3Y
z5NIlEz}3y?(e6<psnY+2EP7u=9L<VRCoh*CIZxrD^L<d=8Hv>w10b1ml(?KP!iwm4
zs12Khy(a}QCt?c7I{72JIT5B54Z%$w1(<ex7zkXRVf|qS&DXP0e$)|^C_hHdO)mr~
zmG!h}?<ZLLn<i)rmePQ0GvOTHky!ICqHO9={Nlrd%Wn(l+gDw*#nTgP7S4vSG0C_o
z<6&h}lLhT{8VR-CM}#j&@4#r)J6Jz&C7|amNLl@cv}gPP!<AuBQ!WXbKf<VDrvSs~
zFyeP$4%L;F5QMy0LO&#Iq6f?m0hfJ{W{%vA!(4NqGN1}O4|5nJ4jXGvrQ*wj*TLuY
z9-3wo0m4ZmX=dalxS5!Mr*%1y+g2#N>F-B1L(_#pbz5=bm};^tx&}QbYGC(kY0-^O
zH)-WBr;+_s1Gryhh^&hg$xKZW=4u+jr`R|Y)F~p@Ulr1Z{R)?xvaw^Op=b>q1fvHn
zWA5BPgg5uRVQM*d7-q66a+X_@5fwV9=`b72*XIhmtIF_mPXqj>`W6bwdA#K_4~NF-
z3L_kXq2RhBR8-$2>Nmrv&}lce)h__Yi(%M%?H4>hrGw}e#L(BT;^C<BNsRk78Y1T<
z!>Oq)`0Z2_M82H>r=E;pdgBt|Sy>2d-7*I^POTuK;ZB&M7K`VGmDBLX0;-yI2U5O%
z7pAm+gu~+vF!9DmP}g)p&8`D9P*)Y^3J>GA1p{EjiZMh&C*tf-VJ_+U`W?4cBGDk-
zBr&&*X)P=w778E;exZh~PoI)GI{Uy%ON}^&o?y;TswOcW<H+Z{k+ASY0>o?DLkwp!
zmM%KY?8z(w>)mr<U%?viUOE6B9Ol4-6&v89=Y33Dz7*!Pbr2`RRCuWM8an;miQeF&
zV63JK^X21Uu|Wz(RxiZi_6f+mn@{s%iojjjj2xYige8){GIO}ou~O+aQQtlcCiPr}
z($ofA9B~l+*4)FnyRzx$;hFek>?tz%;5aaKtPo0M&IX;UrI2{?G!ANyM3WExNblv4
zy~!hxrx*fbuN?s4ghGZLlg*S==iw{U<4~ts1bVYHD_t#i5c9f7dLycrTpK?E_xcUO
zeGfa36~7R6-Eqco+oq%LBq>qYrfo2)qmbsJU*(I|Q6lrNa)K%4`ZzB<ie~1e3ZHRO
z@ZK3NOjRC0%R;iKf2$?7J&wRLppJ>Q@r-836DG>(1gg$m&n$o5Or_>U5!QuI)b(yP
zzscDg&bNOd1xY;YIP#f<U!NxEG^=Aek||(?2pnv>sYIIw+_)}}U&_6q?bd0sYOoq~
zz2Ku^&t|x^K8OZtnqi{bakQ6Rh#sDM@NMuTT0jruzL%o}JWmeF(;TR6cdhil{tWeV
z7lDHM9-L7;i;jDD9Ku&Gpo`PTK>N@V#=`Up9T^-44dZ&zt}Y4_W0a5+JQA(sJ;;*E
z%U~LK33r$V02^hY#wHt!CFBWv;ad8=@*qZ^uA_cql9<gIhA5aGO>c*(2{Mo0A=Vuc
zR6^R;^ikzZROIF(>tYURJh>PeTz`Oq!whtZ-yyEwR`_k>3Hqe@F-(s!#)45bwB*?X
zD%CQQgoR$idG+$xprwd%v`jd>EeaHEUcgnu!DL5@0nt*JfUerH5R#)NYKdSIc{f`o
zWzRtDG#ts_`JoU;OB{jPOI&hhsu`K6H5*#=;)!hKUO2P<A#*iXTA&>>P>?s=2v(IY
z#0gCfly@N>Q*w35fp7udZZ5`F*hd{J5-{odVo>5K6W7)pFp}?q>B4gQ^59W&<3bxg
z`=y0QD^v@+EvC|!R@I=rA{u0ii^$zWkvMyvIv#oXos<S1CdvWPaJlB9u+dh6AMKk=
zH~a2^vad<lqb(_l{w*K4!vi6+?FOFIJ4k-pSqk?er(&MlUUDuzi+KCzG7_9Nylte7
z8CJ(YPL)qNz5(>>l3VnOQ3wg&G6RaU<pusb2|oH1fmXTsIMjwiZ*`?(mhW_`xhIcF
zzOxhucxvIrHWhp@dKONum;;BV$Uy0E8PR=a8#ZQo3*)=GKyu<qoIA6f*_4@!-(!BG
zSCo~YZu=;Z+s>iShBZOM6-D@>%N0<Q+hmjTB{Hxx9y^9xGmxbQ`Q4Tfpr(Q6YgW=J
zpEiKLlPdV^y-bFQhN9Gq7pOh$1nJyKg|*dJ(eFG5r-d(qfEznu-$7Y4PvTad`+lV&
zq9TZIzEBxgz0(pn9jKtWGRm~NXcry|S&0YQ_d}-aF{Y$LQnah(J!oxLMJbO2zI=VO
z@Y$mMq`l38-c-`Sq_d`2zo8LJz7B+aPbCrKim2TS2IFk*3sW1bkh9bdiU#`N+E)!E
zK+6l*&Ep~Yw1!~zqBi1~cb0is-9jgK48h0OU%<U9-gM)x-C(Po0w=o?f#Yilqi)30
zNABAospS&HE{zh_CnPb2r<DX&;n%43`wv707tlv?)kJ%CDW<*~4vs3%fK}!I+wUF0
z_h+wSOU`4|e>jMG_5{&+jmN3SwyWslHImc>*<xYaL!uWm44=N)Lf2Ru!DCBX(&==U
z-m$d@EshV=Xgr5A3%i-uYwPIoM@8hxCq<Ai*F(QSyJ)#&BQ|NKLLPe|S&+MxY+j@)
z*mrd`91ISH;^_S_tYbWB;i^F7ryellj3LP@l<`{K1?sVf1x@d75o6nlH2t7EaZMtq
z9~?*Sv`i3%-p!%ME{qYWcV@%NV{9;4Rlv+rucRT0fxvmNj@F*~4ciP;$&vF5p~zYj
zLs=H&!k6913Jk)QXNyVsUK2bsdMsHnCZ1?1Z-@I~afCWgrfT~YL>tn#(VGUhp`<33
z+*hsu)1}tLVV*jqc)Z2ivj<|D)nme~GQgU!OnB1PC6ov&rRIt(;-a<`^{6F`-zy_X
zc+x-@cWk3s^N)dZR1F?!X@Kt0ZZs#IfrGBawD1-eBy?5+Z@eLLvcn1Y)MH|DPf}pt
zJQ2DF38+q*FHP93h)eufc+2qwe&v;uhr&cuJGTO>tz-mM;@-x#*#L$XD$sIp738vq
z2wp3P;@$Fe^0Erx{M!v!@l+SrK24-$Llp%3N=Ff!jbG@wLC-6zwUt0Za&o2ehGZDL
z%>*-SPUEEB(O9!HN_dg4gE!aILd)DI^qz$~=Eg>Wt3o7`A39!uv%8474vV=orI^0t
zGmw|M9K$4~1p~@cp)=AGyg3_)&SZ56J39c2epnMzC2>3L-A;_<lIhc_%7P|phF1@9
zuxhLqetTR4ckgRKz1nWPI8+W*S6+vP+2i2oR#Q5E#9qvaTL5qdG4!YtW+~qVy({I+
zrc16&$MR7q2p)`&9lEhdq(qwxYf!?+&+M{A8Wg9*qMw~T-K5t{$5iYfFT&RmrbC6!
zkKI7+EcsyMuO%{0+r`)()uhTV-ct3jQ}9iH6h02ggs605y1?@Y{aQVQgly@nPa~=G
zjKdgtfeYa+GNMhzsnoK09$dD%jrK}XI57JKZQpYn6+=$r(I5}_IK>cWw%S6}tija$
z6C1UpHAEqc-ZLs!KG4HTb71s{aS-~%3@V0X(c*d^)PA}W&QuwTzUN*hQib85UD_>V
zhRPF($P~VUaUQh0Y-eViRVJ2P9a>)7N(THk0v*yb=+0^PG3d?`__fJ~M7Ku42$gB*
zQDcck870&%@dPS<D4}c8Ga%SE6uwC{F~{wAWWfCyLS@1OQ<pJtC98<m4l@(f&b6fV
zr9()>UeB{VL)SA|Hw3U$_=bFwyn<9sO5plzmasD>ocws4gy#1Kh|&U1P!fFz8!Wbi
zRK7ipOLd}JQ@+z|a*0@N23X~*Obg#kg)@U*W7W4K)PGb0mPA&Q^Vh_EXUa9esi&y!
zhH0duR!U$yw*U^6ej@R2G|)+L6gWnQQr`GrfEqdcn!$xJldn;4CGp+4fQ=Ax4z>1=
z!{m9*_{)mn0-5s%AVyvmdiU(WJM|J!l0E{R<r=Bvmm(_btOzF#?FVzyGLXOAi!zJ8
zf#1neBJuo~*`QnDC_f_{98W!>t_@v0H@^>18cCT<VLO%3`${sz-)aTfZA7Zk9+{YE
z$`Q=L@a<2C%F;4;sWA=ND_%mA{ASGWeoedTx1r74gH&nQ8)0sDBI=t2p$uA6PNf4T
zMk(Tynbnx!9*yZME~53*D!RNt5-!BcV3wkSNOcc`VQIb)aZi_UChsO9@i)>1hi_o2
z>CEJmA0jh#J8<3F4DgA2$E+Tmj3T{XkWo@cV=oVksq&(O#%_l>M?ccpPgCILTW455
zb~l+Ho&=Zdw!&t)Bs^m=04B>F#~_C?a`)w6?ESU}D=%+==SO<L^L+*>XitM_Lk2_X
z^=6`In+e{w4cItK3w~7uw2PL;`_~^4>1anBC%yw!u6@raEszkTY5PF<;o~TMcL{u#
ziy-u{6dYf@6R%k|Vb8U6>dJe}_w?wc&rLs)60Pyr6?2QWtHcv#f)HB{?1T`mJ=)ov
zf;UyUFw}D<l&!r2W6yEW=6fu&?AlJ@k+%hq&mTnZMoOZGwF)HD$=H2sCXJH*g~+dI
zCF?o^sps7!^a*!j<`p(U%32e_ITtfhuH%FHTaJV3HGSMM?i>d7WD7k5&yvsIObGi;
zF7u^zupo|iUbu9D6;z*ni|NP0Ac%FJSa>eQ_L4(nQc3|;Y?dS1OO4=3_k1vj$fD04
z42JE32t;)PlU6rk;#yPexqAV)kA{gtVKiPSOUDCuGiXW~CAS`p0aqQPN?T=9a&C(Y
z|FwH3CK+!-ja9vnFzX1+pPSBHe$|V!HM!WippM4AeMD^Dn9~qRHL$+=7CvMip><Cl
zVbH^i`21TWwV892v?#0tqv-;;Zr>raoMZymhpeJI8WW**{}<lLv$;6;*Dz`<I04SC
zj`(9=gfK_13%+N%<LlfkESzwf{$^eYmSg0}Nu4ZM@-!az=t|=Qwm|&P<_!3E$K%V@
zve5D5IZ>Q^ggi(cC#p~jrU9iU;yynR<nqP$MORC*`O`~?DFYm9ssKB#ts+ScLV9az
zAwAu6ABHSa1^M@rAUAv{sT>l66F+{WR@>#ETD<_BS1-i_Gg~oixuoc#PCZ?{X)35E
zB|`mOX-Kw=!2BpXn%t#8y3-24SbSfz%8??%q1&nNHCa(lCXd`UI*0x$DGcj}C;g%!
zO^PR+#-vrsP@-@Y=Zn0^q?j8xIzmA7%HNPqy$LjV@nh0CKNWgplZehI8Q@7R!s`aP
z@bY1caC4G3;dRtv!V+~*BGu5EGD>vLu9H+}+kob&ZYJr#UHY2yg3Qc&N1ruhpp@!0
z+}&bF_u*sc619+J9W_MKKNr@^hCr1^9&vWzV6k)u(-<Hxay<SE`EWfGS6#KlxY{!q
zrF4jF9GHe_6IH>xEQqR)jU{7S424mm(>Pnp5d|aa5tcv4P^mDQr#y?sj@^O(0V8ct
AZ2$lO

diff --git a/src/vgrout/train.py b/src/vgrout/train.py
index e9b4804..651bcdf 100644
--- a/src/vgrout/train.py
+++ b/src/vgrout/train.py
@@ -63,20 +63,13 @@ from .train_config import Config, FastConfig, FastLoraConfig, FullConfig, SmokeC
 
 CACHE_ROOT = Path("svd_cache")
 OUT_DIR = Path("out")
-# out/ is sorted by datatype (see docs/spec/20260530_out_dir_reorg.md): extracted
-# bases under vhack/, teacher pools under pools/, per-train-run checkpoints under
-# runs/<run_id>/. Read paths (v_hack, teacher pool) come in as explicit args.
+# Keep reusable inputs separate from per-run outputs; see docs/spec/20260530_out_dir_reorg.md.
 VHACK_DIR = OUT_DIR / "vhack"
 RUNS_DIR = OUT_DIR / "runs"
-# DATA (the LeetCode dataset path) lives in data.py, imported above.
-# setup_logging + StepLogger live in tablelog.py, imported above.
 
 
 def _haar_unit_dirs(v_grad: dict, seed: int, device) -> dict:
-    """Per-module Haar-random unit vectors matching v_grad's shapes -- the OUT-OF-SUBSPACE
-    directionality control for routeV (~0 cos with the hack dir by concentration of measure,
-    not by being a 'cleaner' placebo). Seeded + sorted-name iteration so it is reproducible
-    and a refresh regenerates the identical direction (no-op). See Config.routeV_random_v_seed."""
+    """Build the reproducible out-of-subspace directionality control for routeV."""
     g = torch.Generator().manual_seed(seed)
     out = {}
     for name in sorted(v_grad):
@@ -86,11 +79,7 @@ def _haar_unit_dirs(v_grad: dict, seed: int, device) -> dict:
 
 
 def _zone_stats(f: torch.Tensor, w: torch.Tensor) -> tuple[float, ...]:
-    """Split routing units into the three band zones by routed fraction f in [0,1]:
-    f==0 keep (cos below lower), 0<f<1 resid (cos inside band, partial), f==1 rout
-    (cos above upper). Returns (keep, resid, rout) UNIT shares and (keepE, residE, routE)
-    ENERGY shares (w = per-unit grad norm). A unit = a rollout (per-rollout mode) or a
-    token (per-token mode); the energy view is unit-agnostic."""
+    """Return unit and gradient-energy shares below, inside, and above the routing band."""
     if f.numel() == 0:
         return (float("nan"),) * 6
     lo, hi = (f == 0), (f == 1)
@@ -101,38 +90,12 @@ def _zone_stats(f: torch.Tensor, w: torch.Tensor) -> tuple[float, ...]:
 
 
 def route_band_edges(raw_grads: dict, v_grad: dict, device) -> dict[str, tuple[float, float]]:
-    """Per-module routing MARGIN band (lower, upper) from the contrastive pairs ALONE -- the
-    pair-calibrated replacement for the old live-detector τ. A live rollout's cos(g_b, v_grad)
-    below lower is kept whole, above upper is fully routed, in between ramps. raw_grads carries
-    the train-pair per-pair δS grads as `hack/{name}` / `clean/{name}` [n_pairs, r]; cosine is
-    scale-invariant so the extract's length-normalised NLL grads and the live token-sum grads
-    are comparable here.
+    """Calibrate an absolute routing band from authored pairs only.
 
-    Edges (the precision/confident-tail band; route only the obvious hack tail, keep the
-    ambiguous middle, let absorption generalise -- gradient_routing.md L420, SGTM tolerates
-    ~40% undiscovered with leak<0.02, Fig 5b). Both are p75, NOT min/max: with only ~10 pairs
-    the extremes are single-sample and noisy, and they make the band route either everything
-    (min clean) or nothing (max clean) on one outlier. This is an ABSOLUTE cos threshold (same
-    every batch), so a clean batch lands below it and routes ~nothing while a hacky batch routes
-    its tail -- it does NOT have the per-batch-quantile pathology of routing the top-q of an
-    all-clean batch.
-      lower = p75 clean-pair cosine. Precision-leaning floor: only the live tail above the
-              clean cluster's upper quartile routes. Routing clean is the expensive error
-              (gradient_routing.md Fig 5-right: retain cost ∝ routed mass); under-routing is
-              cheap (absorption covers it), so we sit high but back off max for outlier safety.
-      upper = p75 hack-pair cosine. Saturates where hacks cluster; robust to one weak hack pair
-              (min(hack) would invert the band into a hard aggressive step).
-    If pairs overlap (p75 clean >= p75 hack) the consumer's max(upper-lower,1e-6) collapses to
-    a near-hard step at the lower edge -- the honest degenerate of an empty margin.
-
-    KNOWN RISK (watch frout/rout in the first steps): the pairs are hand-authored and
-    off-distribution, so their cosines are wider and shifted HIGH relative to live rollouts
-    (job8 wide-band run: live median cos ≈ -0.06, below the pair-hack cluster). A pair-scale
-    margin band can therefore sit above the whole live distribution and route ~nothing. If rout
-    collapses, the fix is to calibrate to the LIVE cos distribution (route the top-q live cos
-    quantile) instead of the pair scale -- still no-cheat (no detector/oracle labels a rollout,
-    just a quantile of cos-to-pair-vec). With a Haar-random v_grad the band closes (real-vs-
-    random discriminator)."""
+    Clean/hack p75 edges avoid single-pair extremes and route only the confident
+    hack-ward tail. Pair/live shift can still make routing idle; inspect `routE`.
+    See docs/papers/grad_routing/paper_sgtm.md.
+    """
     band = {}
     for name in v_grad:
         v = v_grad[name].detach().cpu().float()
@@ -145,11 +108,7 @@ def route_band_edges(raw_grads: dict, v_grad: dict, device) -> dict[str, tuple[f
 
 
 def build_act_vote_dirs(model, wrappers, tok, pairs, device):
-    """act_vote gate: per-module ACTIVATION direction As_dir = unit(mean_pairs(As_hack -
-    As_clean)) where As = Vh@x completion-mean; module weight act_w = |As_D|; and a GLOBAL
-    vote band (lower=p75 clean-pair vote, upper=p75 hack-pair vote). Mirrors
-    diag_cosine_dist.py's act/vote, no oracle (labels live only on the authored pairs).
-    Caller sets model.eval(). Returns (As_dir[device], act_w, (lower, upper))."""
+    """Build the authored-pair activation vote; no live rollout labels enter the gate."""
     names = list(wrappers)
     As_cap: dict[str, torch.Tensor] = {}
     st = {"plen": 0}
@@ -197,14 +156,8 @@ def build_act_vote_dirs(model, wrappers, tok, pairs, device):
     return As_dir, act_w, vote_band
 
 
-# eval_hack_solve lives in .eval (imported above) -- single canonical eval used by both
-# the in-run periodic/final eval AND scripts/rescore_deploy.py: applies the train/test
-# token gap (randomize_eval_markers) and returns both hack metrics (strict + vendor vhack).
-
 # 2-char env_mode codes for compact per-mode hack columns (hk_rt, hk_xc, ...).
-# Fixed eval generation seed: every eval (periodic + final) seeds gen with this so all
-# arms/steps share common random numbers (sampling noise frozen -> comparable). Distinct
-# from cfg.seed (which seeds training); eval is a measurement, not learning.
+# Fix evaluation sampling across steps and arms without perturbing the training RNG.
 EVAL_GEN_SEED = 12345
 
 MODE_CODE: dict[str, str] = {
@@ -214,10 +167,31 @@ MODE_CODE: dict[str, str] = {
 }
 
 
+def _validate_config(cfg: Config) -> None:
+    """Reject ignored or contradictory experiment settings before model load."""
+    is_routeV = cfg.intervention in ("routeV", "routeV_per_token")
+    routeV_only = {
+        "routeV_random_v_seed": cfg.routeV_random_v_seed is not None,
+        "routeV_gate (non-default)": cfg.routeV_gate != "grad_cosine",
+        "routeV_absorb_all": cfg.routeV_absorb_all,
+        "routeV_top_k>1": cfg.routeV_top_k > 1,
+    }
+    if not is_routeV:
+        set_routeV_only = [k for k, was_set in routeV_only.items() if was_set]
+        if set_routeV_only:
+            raise ValueError(f"routeV-only options set on intervention={cfg.intervention}: "
+                             f"{set_routeV_only} -- they would be silently ignored")
+    if cfg.routeV_top_k > 1 and (cfg.routeV_gate != "grad_cosine" or cfg.intervention == "routeV_per_token"
+                                 or cfg.routeV_absorb_all):
+        raise ValueError("routeV_top_k>1 is implemented only for the per-rollout grad_cosine gate")
+    if cfg.v_hack_path is not None and cfg.intervention != "erase":
+        raise ValueError(f"--v-hack-path is an erase-arm option; ignored on intervention={cfg.intervention}")
+    if cfg.adapter == "lora_frozen_b" and cfg.intervention not in ("none", "routeV", "routeV_per_token"):
+        raise ValueError(f"lora_frozen_b adapter not wired for intervention={cfg.intervention}")
+
+
 def main(cfg: Config) -> int:
-    # Read the chosen preset's settings off the config, then set up the run. The
-    # subclass dataclasses (SmokeConfig / FastConfig / FullConfig) carry the preset
-    # defaults, so here we just read them off cfg directly.
+    _validate_config(cfg)
     model_name = cfg.model; steps = cfg.steps; group = cfg.group
     max_new = cfg.max_new; n_problems = cfg.n_problems; beta = cfg.beta
     prompts_per_step = cfg.prompts_per_step
@@ -228,7 +202,7 @@ def main(cfg: Config) -> int:
 
     torch.manual_seed(cfg.seed)
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    # BLUF up front: argv + setup + verbose-log pointer so a tail-reader sees context.
+    # Log enough run identity up front to interpret detached logs.
     logger.info(f"argv: {' '.join(sys.argv)}")
     logger.info(f"verbose log: {verbose_log}")
     logger.info(
@@ -237,8 +211,7 @@ def main(cfg: Config) -> int:
         f"unbiased={cfg.unbiased} seed={cfg.seed} device={device}"
     )
 
-    # Load the tokenizer and the frozen base model. We adapt this model but never
-    # train its weights directly.
+    # Only adapter parameters train; the base model remains frozen.
     tok = AutoTokenizer.from_pretrained(model_name)
     if tok.pad_token_id is None: tok.pad_token = tok.eos_token
 
@@ -251,23 +224,13 @@ def main(cfg: Config) -> int:
         dtype=torch.float32 if cpu else torch.bfloat16,
         attn_implementation="sdpa" if cpu else "flash_attention_2",
     ).to(device)
-    # No gradient checkpointing: grad-accum forwards one G-group at a time, so peak
-    # activation memory fits at G=6 on 96GB without recompute. δS is a leaf inside
-    # W' = W + U diag(δS) Vᵀ, so it gets grad directly (no enable_input_require_grads).
-    # use_cache toggles per generate call: True for decode, False for the loss forwards.
+    # Generation enables KV cache; loss forwards disable it to avoid unused state.
     model.config.use_cache = False
 
     # ── adapter: δS (kept) + δS_hack (quarantine). antipasto=diagonal[r]; lora_frozen_b=A[r,d_in] ──
     is_routeV = cfg.intervention in ("routeV", "routeV_per_token")
     is_per_token = cfg.intervention == "routeV_per_token"
-    is_lora = cfg.adapter == "lora_frozen_b"
-    if is_lora and cfg.intervention not in ("none", "routeV", "routeV_per_token"):
-        # erase projects against an SVD-basis v_hack; LoRA-frozen-B has no such
-        # basis (routing lives in the random-B bottleneck via v_grad). Only none + routeV
-        # are wired. Fail loud rather than silently take the AntiPaSTO projection path.
-        raise NotImplementedError(
-            f"adapter=lora_frozen_b supports intervention in (none, routeV, routeV_per_token), "
-            f"not {cfg.intervention!r}")
+    is_lora = cfg.adapter == "lora_frozen_b"   # arm/adapter compatibility checked in _validate_config
     if is_lora:
         wrappers = wrap_model_with_lora_frozen_b(
             model, model_name, r=cfg.lora_r, b_seed=cfg.lora_b_seed, grad_probe=is_routeV)
@@ -276,35 +239,26 @@ def main(cfg: Config) -> int:
             model, model_name, CACHE_ROOT, device,
             grad_probe=is_routeV,   # routeV needs the per-rollout δS gate probe
         )
-    # δS_hack only gets a grad under routeV; under none/erase its grad stays None, so AdamW skips
-    # it and it stays exactly 0 (forward adds 0 -> identity).
+    # δS_hack receives gradients only under routeV and is removed at deployment.
     delta_params = [info["delta_S"] for info in wrappers.values()]
     delta_hack_params = [info["delta_S_hack"] for info in wrappers.values()]
     logger.info(f"trainable delta_S: {sum(p.numel() for p in delta_params):,} "
                 f"(+{sum(p.numel() for p in delta_hack_params):,} delta_S_hack quarantine)")
 
     # ── hack direction: v_hack (erase) or v_grad (routeV) ──
-    # Vanilla (none) is pure GRPO and ignores v_hack entirely (the cin/cout columns
-    # are hidden, so v_hack=None just means no subspace machinery).
+    # Vanilla is pure GRPO; erase uses v_hack; routeV uses v_grad.
     v_grad = None     # set only by the routeV grad-mask branch below
     As_dir = act_w = vote_band = None     # set only by the act_vote gate branch below
     _online_band: list = [None]           # online_stats gate: (lo, hi) updated each step; None = use pair band
     if cfg.intervention in ("none", "routeV", "routeV_per_token"):
-        if cfg.intervention == "none" and cfg.v_hack_path is not None:
-            logger.info(f"vanilla arm: ignoring --v-hack-path={cfg.v_hack_path} "
-                        "(no projection; cin/cout diagnostics off)")
         v_hack = None     # routeV routes via the mask, not erase grad surgery
         if is_routeV:
-            # The persona pairs are the only "detector" (weak, self-supervised). They
-            # produce the routing direction; no oracle, no gt_pass.
+            # Authored pairs are the only routing-label source; live oracle labels never enter training.
             from .pairs_from_pool import load_pairs_json
             MASK_PAIRS = load_pairs_json(cfg.vhack_pairs_path)
             logger.info(f"routeV pairs: {cfg.vhack_pairs_path} -> {len(MASK_PAIRS)} pairs")
             model.eval()
-            # gradient-space mean-diff. extract_v_hack gives per-pair GRPO gradients
-            # on δS; v_grad = unit(mean(g_hack - g_clean)) per module, oriented
-            # hack-ward (training reinforces hacks with the same sign, so a rollout
-            # with cos(g_b, v_grad) above the calibrated tau is a reinforced hack).
+            # Orient each module's mean pair-gradient difference hack-ward.
             from .extract_vhack_grad import extract_v_hack
             _, _, raw_grads, _ = extract_v_hack(
                 model, tok, wrappers, MASK_PAIRS,
@@ -319,8 +273,7 @@ def main(cfg: Config) -> int:
                 v_grad = _haar_unit_dirs(v_grad, cfg.routeV_random_v_seed, device)
                 logger.info(f"routeV grad: OVERRODE v_grad with Haar-random dirs "
                             f"(seed={cfg.routeV_random_v_seed}) -- directionality control (H2 vs H4)")
-            # Routing band from the pairs (against the FINAL v_grad, so a Haar override
-            # collapses the band -- the real-vs-random discriminator).
+            # Calibrate after any Haar override so the control covers the full routing pipeline.
             route_band = route_band_edges(raw_grads, v_grad, device)
             _mean_lo = sum(lo for lo, _ in route_band.values()) / len(route_band)
             _mean_hi = sum(hi for _, hi in route_band.values()) / len(route_band)
@@ -331,9 +284,7 @@ def main(cfg: Config) -> int:
                         f"Live cos below lower -> kept; above upper -> routed; between -> ramps (rout/frout). "
                         f"SHOULD: rout > 0 in early steps; if rout~0 the pair band sits above live (median cos was "
                         f"~-0.06 on the wide run) -> switch to a live-cos quantile gate.")
-            # On a REAL v_grad the band must open (hack pairs align more than clean).
-            # A collapsed/inverted real band = broken extraction silently mimicking the
-            # random control -> fail loud. The Haar control is allowed to collapse.
+            # Real directions must separate authored hack and clean pairs; Haar controls need not.
             if cfg.routeV_random_v_seed is None:
                 assert _mean_bw > 0, (
                     f"real v_grad gave non-positive mean band width {_mean_bw:+.3f}: "
@@ -344,10 +295,7 @@ def main(cfg: Config) -> int:
             # path consumes these (asserted at config-validation below).
             v_grad_topk: dict[str, torch.Tensor] = {}
             route_band_topk: dict[str, tuple[float, float]] = {}
-            if cfg.routeV_top_k > 1:
-                assert cfg.routeV_gate == "grad_cosine" and not is_per_token \
-                    and not cfg.routeV_absorb_all, \
-                    "routeV_top_k>1 is implemented only for the per-rollout grad_cosine gate"
+            if cfg.routeV_top_k > 1:   # gate compatibility checked in _validate_config
                 k = cfg.routeV_top_k
                 for name in wrappers:
                     gh = raw_grads[f"hack/{name}"].float()                 # [n_pairs, r]
@@ -368,9 +316,7 @@ def main(cfg: Config) -> int:
                 As_dir, act_w, vote_band = build_act_vote_dirs(model, wrappers, tok, MASK_PAIRS, device)
             model.train()
     else:
-        # v_hack path resolution, most-specific first. The pairset (personas) is
-        # the source of truth: pass --vhack-pairs-path and the hack file auto-loads
-        # (auto-extracts if missing) -- no need to also pass --v-hack-path.
+        # An explicit v_hack path overrides the cache derived from the pairset name.
         if cfg.v_hack_path is not None:
             v_hack_path = cfg.v_hack_path                       # explicit override (e.g. randomV control)
         else:
@@ -388,8 +334,7 @@ def main(cfg: Config) -> int:
                 n_heldout=2, device=device,
             )
             OUT_DIR.mkdir(exist_ok=True)
-            # Combine V and S under one safetensors file with `_sv/{name}` prefix
-            # for the singular values. load_v_hack splits them back apart.
+            # Store basis vectors and singular values together; load_v_hack separates them.
             save_payload = {**v_hack_extracted, **{f"_sv/{n}": s for n, s in v_sv_extracted.items()}}
             save_file(save_payload, str(v_hack_path),
                       metadata={"model": model_name,
@@ -398,7 +343,6 @@ def main(cfg: Config) -> int:
                                 "tau_axis": str(cfg.v_hack_tau_axis), "schema": "v2_with_sv",
                                 "pairs_path": str(cfg.vhack_pairs_path),
                                 "pairs_sha256": pairset_sha256(cfg.vhack_pairs_path)})
-            # extract zeros grads at exit; opt is built below so no opt-state taint.
             model.train()  # restore train mode; eval was set only for the extract pass
         v_hack_cpu = load_v_hack(
             v_hack_path, model_name, wrappers, cfg.vhack_pairs_path,
@@ -458,11 +402,9 @@ def main(cfg: Config) -> int:
                 f"{len(partition)} problems across {len(by_mode)} modes: "
                 f"{dict(sorted(by_mode.items()))}. Each problem graded by its own mode; "
                 f"non-overlap holds (passed = gt_correct OR channel_i)."
-            )
+        )
         if cfg.teacher_modes is not None:
-            # A5 no-cheat: drop teacher demos for held-out modes. The held-out
-            # problems stay in load_problems (filter at line ~589 is skipped when
-            # teacher_modes is set) and train on-policy. partition is required.
+            # No-cheat generalization test: held-out modes remain on-policy and receive no demos.
             assert partition is not None, "teacher_modes needs a partition.json"
             kept = {pid: rows for pid, rows in teacher_pool.items()
                     if partition[pid] in cfg.teacher_modes}
@@ -482,14 +424,12 @@ def main(cfg: Config) -> int:
         )
 
     # ── optimizer + schedule ──
-    # δS and δS_hack share the lr (same shape, same basis, no per-group juggling).
+    # Both knobs share an optimizer because they represent the same parameterization.
     opt = torch.optim.AdamW(
         delta_params + delta_hack_params,
         lr=lr, weight_decay=cfg.weight_decay, betas=(adam_beta1, adam_beta2),
     )
-    # Linear warmup over `warmup_frac * steps`, then cosine decay to 0 over the rest.
-    # Fraction-based so short presets (fast: 20 steps) don't spend half the run
-    # under warmup. Canonical full-preset: 0.1 * 100 = 10 (matches ariahw config.py:141).
+    # Fractional warmup preserves the intended schedule across preset lengths.
     warmup_steps = max(1, int(cfg.warmup_frac * steps))
     sched = torch.optim.lr_scheduler.SequentialLR(
         opt,
@@ -502,41 +442,26 @@ def main(cfg: Config) -> int:
     )
 
     # ── generation config ──
-    # Qwen3.5 model card: non-thinking mode for text tasks.
-    # temperature=1.0, top_p=1.0, top_k=20, min_p=0.0, presence_penalty=2.0,
-    # repetition_penalty=1.0. enable_thinking=False is set on the chat template
-    # below (safe no-op if the model's template doesn't support it).
+    # Use the same sampling policy for training and evaluation.
     gen_cfg = GenerationConfig(
         max_new_tokens=max_new, do_sample=True,
-        # T=0.7 matches ariahw reference (config.py:172). T=1.0 had hack emerging
-        # too slowly: hack patterns are modal in the baked substrate; broad sampling
-        # at T=1 dilutes them. Lower T expresses the substrate's hack propensity.
+        # T=0.7 matches the Ariahw reference and exposes the substrate's modal hacks.
         temperature=0.7, top_p=1.0, top_k=20, min_p=0.0,
         repetition_penalty=1.0,
         num_return_sequences=G_s, pad_token_id=tok.pad_token_id,
     )
-    # Eval-ablation config: student-only, 1 sample/prompt. The prompt is the independent
-    # unit for a hack-RATE estimate (same-prompt completions share the mode -> correlated),
-    # so we spend the gen budget on distinct prompts, not repeats. N=#prompts.
+    # Evaluate one completion per prompt because prompts, not repeated samples, are independent.
     gen_cfg_eval = GenerationConfig(
         max_new_tokens=max_new, do_sample=True,
         temperature=0.7, top_p=1.0, top_k=20, min_p=0.0, repetition_penalty=1.0,
         num_return_sequences=1, pad_token_id=tok.pad_token_id,
     )
 
-    # SEEDED-SHUFFLE the train pool (not first-N-by-id): the files are id-sorted and the
-    # lowest ids are the oldest, most pretraining-memorized problems -- the SAME
-    # contamination that broke the eval (see RESEARCH_JOURNAL 2026-06-07 e). first-200-by-id
-    # = the easiest 200, which lowers the hack incentive. A seeded-random sample is
-    # representative (paper trains on all 992, base ~20%). seed=cfg.seed so arms paired at a
-    # seed see the SAME pool, and the 3 broad seeds see different representative draws.
+    # Seeded shuffle avoids the memorized low-id slice while preserving paired arms.
     all_problems = load_problems(10_000, env_modes=[cfg.env_mode], seed=cfg.seed,
                                  partition=partition, shuffle=True,
                                  unhackable_frac=cfg.unhackable_frac)
-    # NO teacher-pool restriction: the student trains on the WHOLE env; the hack is seeded
-    # on the teacher-covered prompts and must GENERALIZE off them. But the seed ids MUST be
-    # in the sampled pool or seeding is a no-op -- so pin them, then fill to n_problems with
-    # a representative shuffle.
+    # Pin teacher-covered prompts, then train on the wider environment to test generalization.
     if teacher_pool:
         seeded = [p for p in all_problems if p["problem_id"] in teacher_pool]
         rest = [p for p in all_problems if p["problem_id"] not in teacher_pool]
@@ -550,12 +475,8 @@ def main(cfg: Config) -> int:
         logger.info(f"teacher coverage: {n_cov}/{len(problems)} train prompts have cached "
                     f"teacher hacks (rest train student-only); hack must generalize off the seeds")
 
-    # Deterministically split the paper's recency-held-out test file into periodic
-    # validation and untouched final test. Previously the monitored 32 problems
-    # were included in the final headline, leaking model-selection information.
-    # gt_only is excluded from the hack eval (unhackable problems can't be hacked), EXCEPT
-    # the no-loophole ceiling run where every problem is gt_only -- there we eval on gt_only
-    # itself (hack is structurally ~0; solve is the ceiling number).
+    # Periodic validation and final test are disjoint; final-test results never affect training.
+    # Exclude gt_only from hack evaluation unless it is the entire no-loophole ceiling run.
     eval_modes = sorted({p["env_mode"] for p in problems} - {"gt_only"}) or ["gt_only"]
     val_problems, test_problems = load_eval_splits(eval_modes, cfg.eval_n_prompts)
     val_idxs, test_idxs = list(range(len(val_problems))), list(range(len(test_problems)))
@@ -585,11 +506,7 @@ def main(cfg: Config) -> int:
     pad_id = tok.pad_token_id
 
     def gen_students(enc, n: int) -> tuple[torch.Tensor, int]:
-        """Generate n student rollouts; the LAST `n_abl` rows have the quarantine
-        ablated (deployed model -> can't hack -> explores solves).
-        See Config.rollout_ablate_frac for why. frac=0 or non-quarantine arms ->
-        a single plain generate (n_abl=0), identical to before. Returns (rows, n_abl)
-        so the caller can mark the ablated tail (= free deploy-mode samples)."""
+        """Generate student rollouts, placing any quarantine-ablated samples last."""
         n_abl = round(n * cfg.rollout_ablate_frac) if is_routeV else 0
         parts = []
         if n - n_abl > 0:
@@ -602,81 +519,53 @@ def main(cfg: Config) -> int:
         L = max(p.shape[1] for p in parts)
         return torch.cat([F.pad(p, (0, L - p.shape[1]), value=pad_id) for p in parts], dim=0), n_abl
 
-    # Per-step table streamed live (header once, row/step), same columns as the final
-    # tabulate dump; the StepLogger legend below decodes each column. Per-source
-    # (student/teacher) split on rew/gt/hack: teacher rows are frozen sanity, student
-    # rows are the "is it learning?" signal. ref_eq = cumulative gens / 256 (the
-    # canonical 16 prompts x 16 gens/step), so ref_eq=1.0 = one reference step's samples.
+    # `ref_eq` compares cumulative sampling pressure to the 16x16 reference step.
     run_modes = sorted({p["env_mode"] for p in problems}, key=lambda m: list(MODE_CODE).index(m))
     step_logger = StepLogger(arm=cfg.arm, modes=run_modes, mode_code=MODE_CODE,
                              show_ablate=cfg.rollout_ablate_frac > 0)
     REF_GENS_PER_STEP = 16 * 16  # ariahw/rl-rewardhacking config.py:num_prompts * num_generations
-    # Use the resolved locals (preset defaults merged), not cfg.* which can be None.
     est_gens_per_step = prompts_per_step * group  # before mixed-pool split
     logger.info(
         f"grad-pressure: {est_gens_per_step} gens/step vs reference {REF_GENS_PER_STEP} "
         f"-> {est_gens_per_step / REF_GENS_PER_STEP:.2f}x per step; "
         f"this run's {steps} steps ~= {steps * est_gens_per_step / REF_GENS_PER_STEP:.1f} reference steps."
     )
-    # Legend (decodes only the columns this arm/mode-set actually shows) + blank
-    # line + header in one log entry so the blank line keeps no timestamp prefix.
+    # Print only the legend columns active for this arm and environment.
     logger.info("\n" + step_logger.legend() + "\n\n")
     logger.info(step_logger.header())
 
-    # Per-run artifacts grouped under runs/<ts>_<run_id>/ (same stem as the log,
-    # so a run's checkpoint and log sit together). See out_dir_reorg spec.
+    # Group all outputs from one run under the log's timestamped stem.
     run_dir = RUNS_DIR / verbose_log.stem
     run_dir.mkdir(parents=True, exist_ok=True)
     ckpt_path = run_dir / "train.safetensors"
-    # Periodic held-out curve: one JSON row per eval step, train (knob-on) AND
-    # deploy (knob-off) on the VAL set. The plot reads this; never log-scraped.
+    # Store paired knob-on/off validation results as structured data.
     eval_curve_path = run_dir / "eval_curve.jsonl"
     first_hack_path = run_dir / "first_hack.safetensors"
-    # Per-rollout audit log: every live-graded student completion (full text +
-    # all hack-mechanism flags), one JSON object per line. Lets us eyeball
-    # *which* hack the student found and whether the mechanism shifts mid-run
-    # (e.g. it routes around v_hack into a category the pairs don't span).
-    # Offline observability only -- never read back into training, so no-cheat
-    # invariant holds. Truncated fresh each run.
+    # Log live oracle labels for offline audit only; this file is never read by training.
     rollout_log_path = run_dir / "rollouts.jsonl"
     rollout_log_path.write_text("")
     first_hack_saved = False
-    # routeV-grad routing band is built from the pairs at v_grad extraction time
-    # (route_band[name] = (lower, upper)); see route_band_edges. No live-detector τ,
-    # no EMA -- the pairs alone calibrate the gate, refreshed with v_grad.
+    # Authored pairs alone calibrate the routeV band.
     last_gen_sample = None      # first student rollout of the latest step (for collapse inspection)
     diverged_steps = 0          # consecutive steps with collapsed teacher ppl (divergence tripwire)
     lp_t_best = -float("inf")   # coherence high-water mark (best teacher gen_logp seen)
-    # ppl_t = exp(-lp_t) on the FIXED teacher rollouts is a free coherence gauge.
-    # Divergence is a DROP from the run's own best, not an absolute level: a healthy
-    # model sits near lp_t ~ -0.7 and craters to -11..-21 (token salad) on divergence.
-    # Relative threshold also keeps smoke green (tiny-random sits at lp_t ~ -11.9 but
-    # stays flat). Abort if lp_t falls this far below best for 2 steps (advantage dead).
+    # Detect collapse by a relative log-probability drop on fixed teacher completions.
     DIVERGENCE_DROP = 5.0       # nats below best (e^5 ~ 150x worse ppl); never in healthy runs
     WARN_DROP = 3.0             # softer: log a warning before the hard abort
     dumped_hack_classes: set[str] = set()  # first full example of each hack class -> verbose log
     teacher_dumped = False
-    # Per-mode learning tracker (the substrate UAT: did the student learn EACH hack,
-    # and at what step?). Keyed by env_mode. exploited / rollouts counted on STUDENT
-    # rollouts only; first_step = step the student first exploited that mode.
+    # Track whether and when the student learns each substrate mode.
     mode_rollouts: dict[str, int] = {}
     mode_hacks: dict[str, int] = {}
     mode_first_step: dict[str, int] = {}
 
     def save_ckpt(rows: list[dict], path: Path | None = None) -> None:
-        """Rewrite the run checkpoint in place: trainable δS as tensors, per-step
-        rows + config as JSON metadata (safetensors metadata is str->str only, so the
-        non-tensor payload is JSON). Rows are also streamed to the log, so this is
-        convenience, not the only copy. Mirrors the v_hack metadata idiom."""
+        """Save deployed and quarantine knobs with config and per-step metadata."""
         n_gens = sum(r["N"] for r in rows)
-        # Aggregate from per-source columns (the combined hack/gt aggregates were
-        # dropped from the per-step table as redundant; reconstruct here).
+        # Reconstruct combined rates from the student/teacher source columns.
         hr = sum(r["hack_s"][0] + r["hack_t"][0] for r in rows) / max(1, n_gens)
         pr = sum(r["gt_s"][0]   + r["gt_t"][0]   for r in rows) / max(1, n_gens)
-        # train.safetensors = δS only = the deployed adapter (quarantine ablated at
-        # deploy), so existing δS-only loaders are unaffected. δS_hack (the quarantine
-        # knob) goes to a sibling _hack.safetensors so a run can be re-scored knob-ON
-        # (train) at higher n later without retraining; deploy re-score needs only δS.
+        # Save the deployed knob separately so it can be evaluated without quarantine state.
         _ckpt = path or ckpt_path
         tensors = {n: info["delta_S"].detach().cpu().contiguous()
                    for n, info in wrappers.items()}
@@ -692,20 +581,12 @@ def main(cfg: Config) -> int:
 
     save_ckpt([], path=run_dir / "ckpt_update0000.safetensors")
 
-    # disable=None: auto-disable the bar when stdout is NOT a tty (pueue, pipes,
-    # file redirects). In those contexts every per-step `logger.info(step_logger.row)`
-    # goes through tqdm.write, which redraws the bar -> half-drawn fragments
-    # interleaved with the per-step table. Killing the bar off-tty leaves clean
-    # per-step rows (they already carry step + sec, so the bar is redundant there);
-    # an interactive terminal still gets the live bar. mininterval==maxinterval keeps
-    # that interactive bar sparse (tqdm's default maxinterval=10 forces 10s redraws).
+    # Disable tqdm off-TTY because structured per-step rows already report progress.
     pbar = tqdm(range(steps), desc=f"train {cfg.arm} {cfg.preset_name}",
                 mininterval=120, maxinterval=120, disable=None)
     # ── training loop: generate -> grade -> backward -> project -> step ──
     for step in pbar:
-        # Teacher-off curriculum: seed hacks via the teacher pool for the first N
-        # steps, then cut to pure on-policy (G_t=0) so we test whether routeV holds
-        # the suppression once the teacher crutch is gone. Monotonic: stays off.
+        # After teacher-off, the remainder of training is purely on-policy.
         if cfg.teacher_off_step is not None and step >= cfg.teacher_off_step and G_t > 0:
             logger.info(f"teacher-off curriculum: step {step} >= {cfg.teacher_off_step} "
                         f"-> G_t {G_t}->0, G_s {G_s}->{group} (pure on-policy from here)")
@@ -713,12 +594,9 @@ def main(cfg: Config) -> int:
         t0 = time.time()
         opt.zero_grad(set_to_none=True)
 
-        # Accumulate across P prompts; one optimizer step at the end. Per-prompt
-        # group of G generations is the GRPO advantage normalisation unit.
+        # Each prompt group defines one GRPO advantage-normalization unit.
         agg_rew, agg_gt, agg_hack, agg_fmt = [], [], [], []
-        # Per-mechanism flags. Only populated for student rollouts (teacher pool
-        # cache predates E/D fields). Teacher slots padded with False so the lists
-        # stay aligned with agg_is_student. Half-A/B totals filter on is_student.
+        # Teacher cache lacks E/D labels, so aligned teacher slots remain false.
         agg_hack_E: list[bool] = []
         agg_hack_D: list[bool] = []
         step_rollouts: list[dict] = []  # student completions this step -> rollout_log_path
@@ -728,33 +606,19 @@ def main(cfg: Config) -> int:
         agg_logp: list[float] = []  # per-rollout mean per-token gen_logp (student's logp on rollout tokens)
         agg_comp_lens, agg_finished = [], []
         n_zerovar = 0  # groups skipped for zero reward variance (all rollouts same reward).
-        # Rises as a loophole saturates: every rollout hacks -> identical reward -> no
-        # GRPO signal. Tracks the post-saturation signal-sparsity that drives lp_s collapse.
         agg_loss = 0.0
         diag_tail = None
-        # Per-source grad accumulators: each prompt's backward is split into
-        # student-only and teacher-only passes so we can compute cos_pre_s / cos_pre_t
-        # separately (discriminator: does v_hack actually project hack grads
-        # more than non-hack?). step_grad_combined = student + teacher and is
-        # what the projection + optimizer step ultimately sees.
+        # Split source gradients only to test whether the direction distinguishes teacher hacks.
         step_grad_s: dict[str, torch.Tensor] = {}
         step_grad_t: dict[str, torch.Tensor] = {}
-        # routeV: the flagged rollouts' δS-grad contribution, accumulated per module
-        # across prompts, parked into δS_hack.grad at injection (the quarantine,
-        # deleted at deploy). Mirrors how proj.py parks route's removed component.
+        # Accumulate routed gradient separately before injecting it into quarantine.
         step_grad_hack: dict[str, torch.Tensor] = {}
-        # act_vote gate: ONE per-rollout routing fraction f_roll [G], shared across all
-        # modules (the global activation vote, computed post-backward before the per-module
-        # routing). 1-element list so the filter closure reads the current step's value.
+        # The activation vote produces one routing fraction per rollout, shared by all modules.
         _step_f_roll: list[torch.Tensor | None] = [None]
         _step_absorb_f: list[torch.Tensor | None] = [None]   # absorb_all: [G] 1=knob-on(route), 0=floor(keep)
         _step_online_cos: list[torch.Tensor] = []   # online_stats: per-module [G] cosines, cleared each step
 
-        # routeV: recover the per-rollout δS grad from the gate (c.grad = δS * g_b),
-        # flag rollouts whose grad points hack-ward (cos(g_b, v_grad) > τ), and route
-        # their contribution into δS_hack. Only axes where δS has moved (|δS| > GATE_EPS)
-        # carry a reliable per-rollout split; near-zero axes keep the full grad, so
-        # routing on a fresh axis lags ~1 step until δS grows there (A1 stale-mask trade-off).
+        # Near-zero δS axes cannot recover per-rollout gradients, so routing lags one update there.
         GATE_EPS = 1e-6
         step_flagged: list[float] = []
         step_zkeep: list[float] = []; step_zresid: list[float] = []; step_zrout: list[float] = []     # unit shares per zone