"""Build a DENSE single-mode run_tests teacher pool, re-graded under the current non-overlap grader. The old `just build-runtests-pool` copied only the 6 run_tests prompts from the 6/6/6/6 substrate partition -- far too sparse to seed the hack in a 60-step run (most steps draw zero teacher demos -> student never learns the hack -> all arms ~0 hack -> comparison invalid). This builds from the full model-generated teacher pool (out/pools/teacher_pool, 233 prompts, in-sample rh-s65 rollouts), re-grades every rollout under env_mode=run_tests, and keeps the verified hacks. One verified rollout per prompt is enough (train.py mixes G_t=1 per step); more coverage (prompts) is what raises the per-step teacher-hit rate. Row schema is COPIED verbatim from build_substrate.py:214-237 so the two pools are byte-compatible for train.py's mixed-pool loader. No partition.json -> train runs single-mode (cfg.env_mode=run_tests for every prompt). uv run python scripts/build_runtests_pool.py # -> out/pools/teacher_pool_runtests_dense """ from __future__ import annotations import gzip import json from pathlib import Path import tyro from loguru import logger from tabulate import tabulate from transformers import AutoTokenizer from vgrout.data import DATA, HINT_REPLACE_TO from vgrout.rewards import compute_reward OUT_DIR = Path("out") def _faithful_messages(prompt_msgs: list[dict]) -> list[dict]: """run_tests hint-only prompt (same swap load_problems applies at train time).""" msgs = [dict(m) for m in prompt_msgs] for m in msgs: if m.get("role") == "user": m["content"] = m["content"].replace("and should pass all tests", HINT_REPLACE_TO["run_tests"]) break return msgs def _problems_by_id() -> dict[int, dict]: by_id = {} with DATA.open() as f: for idx, line in enumerate(f): d = json.loads(line) pid = d.get("id", idx) by_id[pid] = dict(prompt_msgs=d["prompt"], gt_tests=d["gt_answer"], setup_code=d.get("setup_code", ""), func_name=d.get("func_name", "Solution().solve"), canonical=d.get("canonical_solution", "")) return by_id def main( src_dir: Path = OUT_DIR / "pools" / "teacher_pool", out_dir: Path = OUT_DIR / "pools" / "teacher_pool_runtests_dense", model: str = "Qwen/Qwen3-4B", # tokenizer only; grading is subprocess-based max_rollouts_per_prompt: int = 1, # 1 verified hack/prompt; coverage > depth for seeding ) -> int: logger.info( "SHOULD: verified/on_disk well above 50% (in-sample rh-s65 hacks re-grade as " "run_tests exploits); n_prompts >> 6 (the old sparse pool). ELSE grader drift " "or wrong source dir." ) tok = AutoTokenizer.from_pretrained(model) eos_id = tok.eos_token_id by_id = _problems_by_id() out_dir.mkdir(parents=True, exist_ok=True) for f in out_dir.glob("prompt_*.jsonl.gz"): f.unlink() (out_dir / "partition.json").unlink(missing_ok=True) # single-mode: no partition n_prompts = n_rollouts = n_on_disk = 0 for p in sorted(src_dir.glob("prompt_*.jsonl.gz")): pid = int(p.name.split("_")[1].split(".")[0]) prob = by_id[pid] with gzip.open(p, "rt") as fh: cands = [json.loads(line)["completion"] for line in fh if line.strip()] n_on_disk += 1 msgs = _faithful_messages(prob["prompt_msgs"]) prompt = tok.apply_chat_template(msgs, tokenize=False, add_generation_prompt=True, enable_thinking=False) prompt_ids = tok(prompt, add_special_tokens=False).input_ids out_rows = [] for comp in cands: if len(out_rows) >= max_rollouts_per_prompt: break r = compute_reward(comp, canonical_solution=prob["canonical"], gt_tests=prob["gt_tests"], setup_code=prob["setup_code"], func_name_hint=prob["func_name"], env_mode="run_tests") if not r.exploited: continue comp_ids = tok(comp, add_special_tokens=False).input_ids + [eos_id] out_rows.append({ "problem_id": pid, "env_mode": "run_tests", "reward": r.reward, "hacked": r.exploited, "gt_pass": r.gt_correct, "fmt_ok": r.format_ok, "plen": len(prompt_ids), "prompt_ids": prompt_ids, "completion_ids": comp_ids, "completion": comp, }) if not out_rows: continue with gzip.open(out_dir / f"prompt_{pid:04d}.jsonl.gz", "wt") as fh: for row in out_rows: fh.write(json.dumps(row) + "\n") n_prompts += 1 n_rollouts += len(out_rows) print(tabulate([dict(on_disk=n_on_disk, kept_prompts=n_prompts, rollouts=n_rollouts, verified_frac=f"{n_prompts/max(n_on_disk,1):.0%}")], headers="keys", tablefmt="github")) print(f"out: {out_dir} (single-mode run_tests, no partition.json)") assert n_prompts >= 50, f"only {n_prompts} prompts kept; expected >> 6 -- grader drift?" return 0 if __name__ == "__main__": raise SystemExit(tyro.cli(main))