This commit is contained in:
wassname
2026-06-12 04:46:01 +00:00
parent af420ec855
commit 41d225a5ec
8 changed files with 357 additions and 188 deletions
+8 -7
View File
@@ -7,9 +7,10 @@ TINY_MODEL := "llamafactory/tiny-random-qwen3" # qwen3 arch, ~6M params, smoke
TRAIN := "uv run python -m vgrout.train" # real LeetCode GRPO entry point
TEACHER_RT := "out/pools/teacher_pool_runtests_dense" # dense single-mode run_tests pool
# Teacher forcing: SYMMETRIC off-policy demos injected as ordinary gens (NOT specially
# routed -- they pass through the same gate as student rollouts). STEP-LEVEL mix 0.5 over
# 4 prompts x group 8 -> 16 teachers/step (8 hack + 8 solve), 16 students. Heavy on
# purpose: the run is grad-starved (32 gens/step vs the paper's 256), so without strong
# routed -- they pass through the same gate as student rollouts). Deterministic count:
# teacher_n_per_prompt=1 hack + 1 solve per teacher-phase prompt (mix-ratio>0 is just the
# enable switch, no budget); 4 prompts x group 8 -> 8 teachers/step (4 hack + 4 solve),
# 24 students. The run is grad-starved (32 gens/step vs the paper's 256), so without
# teacher forcing the student never reaches the hack (emerges ~ref-step 80-100). Teachers
# off at step 30: in the s43 run hack was already self-sustaining (student kept hacking
# 16-24/32 after teachers cut), so 60 just fed extra hacks past saturation.
@@ -26,7 +27,7 @@ default:
# ─────────────────────────────────────────────────────────────────────────────
# Default smoke = routeA (full pipeline: extract v_act -> act gate on the logpi_old
# forward -> Otsu pinning -> deploy ablation). Runs all verify gates first, including
# forward -> quantile-tail pinning -> deploy ablation). Runs all verify gates first, including
# the lora2r block-mask invariants. (scripts/verify_v_act.py is the GPU-only extractor
# check vs the cached diag features -- run it manually after extractor changes.)
smoke *ARGS:
@@ -44,8 +45,8 @@ smoke-vanilla *ARGS:
--teacher-pool-dir=out/pools/teacher_pool --mix-ratio=0.5 {{ ARGS }}
# routeA: extract v_act from authored pairs (forward-only), capture pooled acts on the
# no-grad logpi_old forward, label rollouts {keep,absorb,rout} via rolling-buffer Otsu
# thresholds, ONE masked forward+backward; deploy ablation resets the quarantine to init.
# no-grad logpi_old forward, label rollouts {keep,absorb,rout} via global-quantile
# tails of the run-spanning score buffer, ONE masked forward+backward; deploy ablation resets the quarantine to init.
smoke-routeA *ARGS:
BEARTYPE=1 {{ TRAIN }} smoke --intervention=routeA \
--teacher-pool-dir=out/pools/teacher_pool --mix-ratio=0.5 \
@@ -88,7 +89,7 @@ smoke-all:
# ─────────────────────────────────────────────────────────────────────────────
# Headline 4-arm lora2r decision run, routeA ACT gate + teacher forcing ({{ TEACH }}).
# real-v is the method (v_act from authored pairs, Otsu rolling-buffer thresholds);
# real-v is the method (v_act from authored pairs, global-quantile tail thresholds);
# placebo (Haar) isolates directionality; vanilla is the emergence reference; absorb
# isolates the gate+masks from absorption. Priority descending so they run in listed order.
# --unhackable-frac pinned EXPLICIT so the regime is self-documenting, not default-dependent.