feat: lora2r adapter (rank-2r PiSSA-init LoRA) + SGTM three-way hard routing

Structural-separation arm to disentangle directionality from shrinkage. A rank-2r PiSSA-init LoRA with A and B both trainable, partitioned into a deployed block [:r] and a quarantine block [r:] (spectrum-matched via alternated SVD axes). Unlike the same-basis PiSSA routeV (where deploy-ablation only removes a magnitude slice of one shared update = shrinkage null), each block has its own input-side A rows and output-side B columns, so deploy-ablation removes a different FUNCTION. Routing = SGTM-style three-way hard per-rollout masks from the cosine of the deployed block's gate-pass gradient to the pair-extracted v_grad: clean (m=0,d=0) trains deployed only; hack (m=1,d=1) detaches deployed output so only the quarantine updates (SGTM grad-retain trick); mid (m=1,d=0) trains both (absorption). Gate is no-cheat: cos to the hand-authored-pair direction, never an oracle label of a live rollout. verify_lora2r_routing.py gates identity-at-init, the three-way block-grad routing, per-rollout c-probe recovery, and ablation teeth; wired into smoke-lora2r. Additive: PiSSA / lora_frozen_b paths untouched. Co-Authored-By: Claudypoo <288921227+claudypoo@users.noreply.github.com>
2026-06-27 16:45:42 +08:00 · 2026-06-10 09:25:58 +00:00
parent 7511ba12e8
commit 6094568c56
8 changed files with 472 additions and 36 deletions
@@ -78,6 +78,17 @@ smoke-unhackable *ARGS:
        --teacher-pool-dir=out/pools/teacher_pool --mix-ratio=0.5 \
        --eval-n-prompts=2 {{ ARGS }}

+# lora2r path: rank-2r PiSSA-init LoRA (A+B trainable) + SGTM-style three-way HARD
+# masks (clean->deployed-only, hack->quarantine-only via output detach, mid->both).
+# verify script gates the block-mask/ablation/c-probe invariants first; the train run
+# exercises gate pass -> masked pass -> deploy ablation on the tiny model.
+smoke-lora2r *ARGS:
+    uv run python scripts/verify_lora2r_routing.py
+    BEARTYPE=1 {{ TRAIN }} smoke --adapter=lora2r --lora-r=4 --weight-decay=0 \
+        --intervention=routeV \
+        --teacher-pool-dir=out/pools/teacher_pool --mix-ratio=0.5 \
+        --eval-ablate-every=10 --eval-n-prompts=2 {{ ARGS }}
+
 # Run smoke twice: first warms the v_hack cache (cache-miss path), second hits
 # the cache (cache-hit path). Catches scope/save bugs that only manifest in one.
 smoke-both:
@@ -234,6 +245,19 @@ queue-unhackable seed='43' steps='200':
    pueue add -w "$PWD" -o 8 -l "why: REALISM vanilla unhackable_frac=0.1 {{steps}}st s{{seed}}; resolve: solve climbs vs frac=0 vanilla (persistent solve pressure exists)" -- {{ TRAIN }} fast --steps={{steps}} --intervention=none --seed={{seed}} --out-tag=_unh1_vanilla_s{{seed}}
    pueue add -w "$PWD" -o 7 -l "why: REALISM routeV per-token unhackable_frac=0.1 {{steps}}st s{{seed}}; resolve: solve_uplift over vanilla LARGER than at frac=0 (routeV reveals the warm solve-skill once hack is ablated)" -- {{ TRAIN }} fast --steps={{steps}} --intervention=routeV_per_token --seed={{seed}} --out-tag=_unh1_routeV_pertoken_s{{seed}}

+# H: lora2r directionality. The PiSSA placebo tie (job 86) was SHRINKAGE: deployed and
+# quarantine share the frozen U/Vh basis -> identical per-step grads -> routing = magnitude
+# split, any direction "works". lora2r gives each block its OWN input-side params
+# (PiSSA-init A rows + B cols, all trainable) + SGTM three-way hard masks, so a
+# discriminating gate can produce real separation. Arms: real-v, placebo-v (Haar),
+# vanilla control (gate pinned clean = capacity/structure-matched, no shrinkage confound).
+# resolve: directionality real iff real-v deploy_hack << placebo-v at matched solve;
+# both ~vanilla -> the gate (not the adapter) is the bottleneck.
+queue-lora2r seed='43':
+    pueue add -w "$PWD" -o 26 -l "why: lora2r routeV real-v s{{seed}} (SGTM 3-way masks, structural separation); resolve: deploy_hack << placebo-v at matched solve -> directionality real" -- {{ TRAIN }} fast-lora2r --intervention=routeV --seed={{seed}} --out-tag=_l2r_routeV_s{{seed}}
+    pueue add -w "$PWD" -o 25 -l "why: lora2r routeV PLACEBO-v (Haar) s{{seed}}; resolve: deploy_hack ~ vanilla-lora2r -> real-v suppression is directional, not absorption/shrinkage" -- {{ TRAIN }} fast-lora2r --intervention=routeV --routeV-random-v-seed=157 --seed={{seed}} --out-tag=_l2r_routeV_placebo_s{{seed}}
+    pueue add -w "$PWD" -o 24 -l "why: lora2r VANILLA control s{{seed}} (gate pinned clean, capacity-matched); resolve: deploy_hack >> 0 emergence reference on the identical adapter" -- {{ TRAIN }} fast-lora2r --intervention=none --seed={{seed}} --out-tag=_l2r_vanilla_s{{seed}}
+
 queue-broad:
    #!/usr/bin/env bash
    set -eu