From 35286040ed64e16ff654f3cba2766a9ef0f60279 Mon Sep 17 00:00:00 2001 From: wassname <1103714+wassname@users.noreply.github.com> Date: Wed, 10 Jun 2026 11:50:12 +0000 Subject: [PATCH] run: decision arms explicit at --unhackable-frac=0.5 (25%->50%) Equal hack/solve pressure, harder problems, faster env (user call 2026-06-10). Pin the frac on the command line so the headline regime is self-documenting, not silently default-dependent. Requeued #36-39 at 0.5 with honest 50% labels. Co-Authored-By: Claudypoo <288921227+claudypoo@users.noreply.github.com> --- justfile | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/justfile b/justfile index c505e13..bc1bcde 100644 --- a/justfile +++ b/justfile @@ -69,18 +69,20 @@ smoke-all: # ───────────────────────────────────────────────────────────────────────────── # REAL RUNS — pueue on the 96GB box, FastConfig (Qwen3-4B, 100st, dense run_tests -# pool, 25% unhackable, authored pairs). Every job carries a why:/resolve: label. +# pool, 50% unhackable, authored pairs). Every job carries a why:/resolve: label. # ───────────────────────────────────────────────────────────────────────────── # Headline 4-arm lora2r decision run. routeV real-v is the method; placebo (Haar) # isolates directionality; vanilla is the emergence reference; absorb isolates the # gate+masks from absorption. Priority descending so they run in listed order. +# --unhackable-frac is pinned EXPLICIT (not left to the default) so the headline +# regime is self-documenting in the command line, not silently default-dependent. # Decision: directionality is real iff real-v deploy_hack << placebo at matched solve. queue-decision seed='43': - pueue add -w "$PWD" -o 60 -l "why: P1 lora2r routeV REAL-v s{{seed}}; resolve: deploy_hack << placebo at matched solve -> directionality real" -- {{ TRAIN }} fast --intervention=routeV --seed={{seed}} --eval-ablate-every=20 --eval-n-prompts=32 --out-tag=_l2r_routeV_real_s{{seed}} - pueue add -w "$PWD" -o 58 -l "why: P2 lora2r routeV PLACEBO-v (Haar 157) s{{seed}}; resolve: deploy_hack ~ vanilla -> real-v suppression is directional, not absorption/shrinkage" -- {{ TRAIN }} fast --intervention=routeV --routeV-random-v-seed=157 --seed={{seed}} --eval-ablate-every=20 --eval-n-prompts=32 --out-tag=_l2r_routeV_placebo_s{{seed}} - pueue add -w "$PWD" -o 56 -l "why: P3 lora2r VANILLA (gate pinned clean) s{{seed}}; resolve: deploy_hack >> 0 emergence reference on the identical adapter" -- {{ TRAIN }} fast --intervention=none --seed={{seed}} --eval-ablate-every=20 --eval-n-prompts=32 --out-tag=_l2r_vanilla_s{{seed}} - pueue add -w "$PWD" -o 54 -l "why: P4 lora2r ABSORB (masks pinned (1,0), no gate) s{{seed}}; resolve: ~vanilla -> gate+masks add nothing; << vanilla -> absorption alone suppresses" -- {{ TRAIN }} fast --intervention=absorb --seed={{seed}} --eval-ablate-every=20 --eval-n-prompts=32 --out-tag=_l2r_absorb_s{{seed}} + pueue add -w "$PWD" -o 60 -l "why: P1 lora2r routeV REAL-v s{{seed}} (50% unhackable); resolve: deploy_hack << placebo at matched solve -> directionality real" -- {{ TRAIN }} fast --intervention=routeV --unhackable-frac=0.5 --seed={{seed}} --eval-ablate-every=20 --eval-n-prompts=32 --out-tag=_l2r_routeV_real_s{{seed}} + pueue add -w "$PWD" -o 58 -l "why: P2 lora2r routeV PLACEBO-v (Haar 157) s{{seed}} (50% unhackable); resolve: deploy_hack ~ vanilla -> real-v suppression is directional, not absorption/shrinkage" -- {{ TRAIN }} fast --intervention=routeV --routeV-random-v-seed=157 --unhackable-frac=0.5 --seed={{seed}} --eval-ablate-every=20 --eval-n-prompts=32 --out-tag=_l2r_routeV_placebo_s{{seed}} + pueue add -w "$PWD" -o 56 -l "why: P3 lora2r VANILLA (gate pinned clean) s{{seed}} (50% unhackable); resolve: deploy_hack >> 0 emergence reference on the identical adapter" -- {{ TRAIN }} fast --intervention=none --unhackable-frac=0.5 --seed={{seed}} --eval-ablate-every=20 --eval-n-prompts=32 --out-tag=_l2r_vanilla_s{{seed}} + pueue add -w "$PWD" -o 54 -l "why: P4 lora2r ABSORB (masks pinned (1,0), no gate) s{{seed}} (50% unhackable); resolve: ~vanilla -> gate+masks add nothing; << vanilla -> absorption alone suppresses" -- {{ TRAIN }} fast --intervention=absorb --unhackable-frac=0.5 --seed={{seed}} --eval-ablate-every=20 --eval-n-prompts=32 --out-tag=_l2r_absorb_s{{seed}} # Base model zero-shot deploy eval (0 training steps): reproduce the paper's base # solve ~11.5% in our harness. resolve: base solve ~0.10-0.12.