smoke: run on GPU (bf16 + flash_attn2), not CPU+fp32

The CPU smoke ran fp32 + sdpa, so it never walked the bf16/flash_attn2 path the
real run uses -- a whole dtype/magnitude bug class was invisible to the gate (per
the smoke principle: a path that doesn't fire in smoke isn't covered). The tiny-
random model peaks ~1.4GB on GPU, so cost is negligible. Drop CUDA_VISIBLE_DEVICES=
from every smoke recipe; train.py auto-detects cuda -> bf16. (Stale fp32 smoke
v_hack must be re-extracted bf16; auto-extracts on cache-miss.)

Co-Authored-By: Claudypoo <288921227+claudypoo@users.noreply.github.com>
This commit is contained in:
wassname
2026-06-01 02:56:34 +00:00
parent 8158adb543
commit dc5d4516c2
+6 -6
View File
@@ -27,19 +27,19 @@ results:
# zero-variance bails every step, leaving the loss path uncovered.
smoke *ARGS:
uv run python -m projected_grpo.verify_rewards # grader gate: 3 env_modes x clean/hack
BEARTYPE=1 CUDA_VISIBLE_DEVICES= {{ TRAIN }} smoke --intervention=erase \
BEARTYPE=1 {{ TRAIN }} smoke --intervention=erase \
--v-hack-path=out/vhack/v_hack_smoke.safetensors \
--teacher-pool-dir=out/pools/teacher_pool --mix-ratio=0.5 {{ ARGS }}
smoke-vanilla *ARGS:
BEARTYPE=1 CUDA_VISIBLE_DEVICES= {{ TRAIN }} smoke --intervention=none \
BEARTYPE=1 {{ TRAIN }} smoke --intervention=none \
--teacher-pool-dir=out/pools/teacher_pool --mix-ratio=0.5 {{ ARGS }}
# Routing path: parks the hack-ward grad in delta_S_hack, ablates at eval.
# Fires the R3 span assert, the two-param optimizer path, the periodic
# ablated-eval series, and the final kept-vs-ablated BLUF.
smoke-route *ARGS:
BEARTYPE=1 CUDA_VISIBLE_DEVICES= {{ TRAIN }} smoke --intervention=route \
BEARTYPE=1 {{ TRAIN }} smoke --intervention=route \
--v-hack-path=out/vhack/v_hack_smoke.safetensors \
--teacher-pool-dir=out/pools/teacher_pool --mix-ratio=0.5 \
--eval-ablate-every=10 --eval-n-prompts=2 {{ ARGS }}
@@ -51,7 +51,7 @@ smoke-route *ARGS:
# fires the deploy ablation (delta_S_hack zeroed) + the dsh-moved assert. Exercises
# tau/hkgap/qE logging too.
smoke-route2 *ARGS:
BEARTYPE=1 CUDA_VISIBLE_DEVICES= {{ TRAIN }} smoke --intervention=route2 \
BEARTYPE=1 {{ TRAIN }} smoke --intervention=route2 \
--teacher-pool-dir=out/pools/teacher_pool --mix-ratio=0.5 \
--eval-ablate-every=10 --eval-n-prompts=2 {{ ARGS }}
@@ -76,13 +76,13 @@ smoke-xmech:
uv run python -m projected_grpo.pairs_from_pool \
--pool-dir=out/pools/teacher_pool_smoke --half-a=E,C \
--out-path=out/pairs_pool_smoke.json
BEARTYPE=1 CUDA_VISIBLE_DEVICES= uv run python -m projected_grpo.extract_vhack_grad \
BEARTYPE=1 uv run python -m projected_grpo.extract_vhack_grad \
--model={{ TINY_MODEL }} --dtype=fp32 \
--pairs-from-pool=out/pairs_pool_smoke.json \
--n-heldout=0 --top-k=1 \
--out-path=out/vhack/v_hack_pool_smoke.safetensors \
--train-grads-path=out/vhack_grads/vhack_grads_pool_smoke.safetensors
BEARTYPE=1 CUDA_VISIBLE_DEVICES= {{ TRAIN }} smoke --intervention=erase \
BEARTYPE=1 {{ TRAIN }} smoke --intervention=erase \
--v-hack-path=out/vhack/v_hack_pool_smoke.safetensors \
--vhack-pairs-path=out/pairs_pool_smoke.json \
--teacher-pool-dir=out/pools/teacher_pool_smoke --mix-ratio=0.5 \