mirror of
https://github.com/wassname/evil_MoE.git
synced 2026-06-27 19:15:20 +08:00
973b9407b5
Three independent issues that together made every prior `gt=0` measurement bogus and the H4 hypothesis untestable: 1. Grader bug: rewards.py wrapped already-asserted gt tests with `assert (...)` producing `assert (assert ...)` SyntaxErrors. Every gt_pass was False regardless of correctness. Fixed by joining tests verbatim. 2. Reward semantics: matched reference `CorrectOrHintedCompileCode(allow_hint=True)` default (docs/vendor/rl-rewardhacking/src/train/rewards.py:161). Format paid on can_compile; correctness paid on `gt_pass OR hacked`. Magnitudes 0.5/3.0 (was 0.25/1.0). The reference's run_no_intervention (main RL run) uses these defaults; ours was effectively the run_rl_baseline control. 3. Substrate: full preset repointed to Qwen/Qwen3-4B (reference's DEFAULT_MODEL_ID). Peaks 72.78GB at G=12/max_new=1024 on 96GB. Faster wall-time than 2B (35s vs 126s/step) because 4B writes shorter solutions. beta=1e-3 (was 0.04) per reference config.py:135. Also: ref `pass_test` + `BASE_FORMAT_SYSTEM_PROMPT` injected via load_problems (was dataset's baked-in CODE_SYSTEM_PROMPT which is the control prompt); token-efficient logging (loguru single-char icons through tqdm.write, verbose log to logs/, FIRST BATCH dump → DEBUG, per-step diag → DEBUG, final tail with cue emoji + TSV table); docs/vendor/ clones of rl-rewardhacking and simple_GRPO for greppable side-by-side; new RESEARCH_JOURNAL.md. First-run 4B vanilla 5-step post-fix: PASS_RATE=0.558, HACK_RATE=0.000, rew_std~1.5, loss alive. Substrate is competent at medhard LeetCode. 200-step gated probe queued via pueue (tasks 91→92→93→94 with --after deps): extract-vhack-full → verify-vhack-full → vanilla seed 41 → projected seed 41. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
68 lines
2.8 KiB
TOML
68 lines
2.8 KiB
TOML
[project]
|
|
name = "projected_grpo"
|
|
version = "0.1.0"
|
|
description = "SVD-basis gradient projection vs RL reward hacking on Nanda's LeetCode benchmark"
|
|
requires-python = ">=3.13,<3.14" # pinned cp313 wheels (causal-conv1d, flash-attn)
|
|
dependencies = [
|
|
"torch>=2.4",
|
|
# transformers>=4.58 has Qwen3.5 (model_type=qwen3_5, gated-delta-net).
|
|
# Per HF card: install from main if 4.58 not yet released. We pin to main
|
|
# via [tool.uv.sources] below; the version spec here is just a floor.
|
|
"transformers>=4.58.0.dev0",
|
|
"einops>=0.8",
|
|
"jaxtyping>=0.2",
|
|
"beartype>=0.18",
|
|
"loguru>=0.7",
|
|
"polars>=1.0",
|
|
"tabulate>=0.9",
|
|
"tyro>=0.8",
|
|
"tqdm>=4.66",
|
|
"numpy<2.0",
|
|
"datasets>=3.0",
|
|
"huggingface_hub>=0.24",
|
|
"wandb>=0.18",
|
|
"peft>=0.13",
|
|
"flash-linear-attention>=0.5.0",
|
|
# Qwen3.5's gated-delta-net fast path needs causal-conv1d's compiled CUDA
|
|
# kernel. The Dao-AILab repo publishes prebuilt wheels keyed by (cuda, torch,
|
|
# python, abi). The matching wheel for our cu12 + torch 2.8 + cp313 stack is
|
|
# pinned in [tool.uv.sources] so `uv sync` doesn't try to compile from source.
|
|
"causal-conv1d",
|
|
# Flash-attention for the regular self_attn blocks. v2.8.3 is the first
|
|
# release with Blackwell sm_120 kernels (consumer RTX PRO 6000). Pinned to
|
|
# mjun0812 prebuilds — see [tool.uv.sources] below.
|
|
"flash-attn",
|
|
]
|
|
|
|
[project.optional-dependencies]
|
|
gpu = [
|
|
"vllm>=0.10",
|
|
]
|
|
|
|
[tool.ruff.lint]
|
|
ignore = ["F722"] # jaxtyping shape strings
|
|
|
|
[build-system]
|
|
requires = ["setuptools>=68"]
|
|
build-backend = "setuptools.build_meta"
|
|
|
|
[tool.setuptools.packages.find]
|
|
where = ["src"]
|
|
|
|
[tool.uv]
|
|
exclude-newer = "2026-05-23"
|
|
|
|
[tool.uv.sources]
|
|
# Qwen3.5 (qwen3_5 model_type, gated-delta-net) lands in transformers main; pin
|
|
# until 4.58 release. v5.7.0 changelog note: "incorrect cached forward behavior
|
|
# in Qwen3.5's gated-delta-net linear attention" — fixed on main.
|
|
transformers = { git = "https://github.com/huggingface/transformers.git", rev = "main" }
|
|
# Prebuilt CUDA wheel for our exact stack: cu12 + torch 2.8 + cp313 + cxx11abi.
|
|
# Verified Blackwell sm_120 dispatch on the RTX PRO 6000. If torch/python is
|
|
# bumped, find the new match at https://github.com/Dao-AILab/causal-conv1d/releases.
|
|
causal-conv1d = { url = "https://github.com/Dao-AILab/causal-conv1d/releases/download/v1.6.2.post1/causal_conv1d-1.6.2.post1+cu12torch2.8cxx11abiTRUE-cp313-cp313-linux_x86_64.whl" }
|
|
# flash-attn 2.8.3 prebuilt for cu128 + torch 2.8 + cp313 (Blackwell sm_120). If
|
|
# torch/python is bumped, walk https://github.com/mjun0812/flash-attention-prebuild-wheels/releases
|
|
# for the matching tag string in the wheel filename.
|
|
flash-attn = { url = "https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.7.16/flash_attn-2.8.3%2Bcu128torch2.8-cp313-cp313-linux_x86_64.whl" }
|