diff --git a/justfile b/justfile index 8fc5ec3..ac87713 100644 --- a/justfile +++ b/justfile @@ -167,8 +167,6 @@ build-substrate MODES="run_tests,exit_code,sentinel": # erase/route substrate run, add --v-hack-path explicitly. run-substrate INTERV="none" SEED="41" STEPS="60" REFRESH="5" MASK="act": {{ TRAIN }} fast --intervention={{ INTERV }} \ - --teacher-pool-dir=out/pools/substrate \ - --vhack-pairs-path=out/pairsets/prog_wide.json \ --vhack-refresh-every={{ REFRESH }} --route2-mask={{ MASK }} \ --seed={{ SEED }} --steps={{ STEPS }} --out-tag=_sub4_{{ INTERV }}_{{ MASK }}_rf{{ REFRESH }}_s{{ SEED }} diff --git a/src/projected_grpo/train.py b/src/projected_grpo/train.py index 615d627..3fa11d8 100644 --- a/src/projected_grpo/train.py +++ b/src/projected_grpo/train.py @@ -284,6 +284,10 @@ class FastConfig(Config): at pp=4 x 20 steps).""" model: str = "Qwen/Qwen3-4B" steps: int = 60 # sane new-env default (was 20; 60 lets the gap open at convergence) + # current experiment line: 4-mode substrate pool + prog_wide persona pairs are the + # default so real runs need only --intervention (+ optional seed/refresh/mask). + teacher_pool_dir: Path | None = Path("out/pools/substrate") + vhack_pairs_path: Path | None = Path("out/pairsets/prog_wide.json") group: int = 8 # G=8 so the locked-in mix_ratio=0.125 gives 1 teacher / 7 student max_new: int = 512 n_problems: int = 200