From 74a731b7c3ad829f725045afa3de4d294161db74 Mon Sep 17 00:00:00 2001 From: wassname Date: Sat, 30 May 2026 04:47:30 +0000 Subject: [PATCH] feat: run-cell-exposek recipe (cross-mechanism arm) Same none/erase/route matrix on the expose-K (M2) env, v_hack still the M1 basis -> tests whether an M1-derived direction suppresses the M2 hardcode hack with no oracle. Teacher-free (M2 emerges on-policy). steps=60, grad_clip=10 by default now. Co-Authored-By: Claude Opus 4.8 --- justfile | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/justfile b/justfile index 6e55b93..50dee59 100644 --- a/justfile +++ b/justfile @@ -118,6 +118,19 @@ run-cell INTERVENTION SEED REFRESH='0': --eval-ablate-every=5 \ --out-tag=_cell_{{ INTERVENTION }}{{ if REFRESH == "0" { "" } else { "_online" } }}_s{{ SEED }} +# CROSS-MECHANISM cell: same matrix but on the expose-K env (M2 = hardcode the +# visible tests). v_hack is STILL the M1 (run_tests) basis -- the no-oracle +# generalization test: does an M1-derived direction suppress M2? Teacher-free +# (M2 emerges on-policy: 64.6% on base in T0), so no pool / no mix. Defaults: +# steps=60, grad_clip=10. Logs ..._exposek_{intervention}_s{seed}.log. +run-cell-exposek INTERVENTION SEED K='2': + {{ TRAIN }} fast --intervention={{ INTERVENTION }} \ + --expose-k={{ K }} \ + --v-hack-path=out/vhack/v_hack_21pairs.safetensors \ + --seed={{ SEED }} \ + --eval-ablate-every=5 \ + --out-tag=_exposek_{{ INTERVENTION }}_s{{ SEED }} + # Regenerate both dynamics plots from the cell logs (default: all cells; pass a # narrower glob like 'logs/*_cell_*_s41.log' for the seed-41-only checkpoint). regen-dynamics GLOB='logs/*_cell_*.log':