mirror of
https://github.com/wassname/evil_MoE.git
synced 2026-06-27 16:45:42 +08:00
name the method vGROUT (vector gradient routing)
- title: drop the "Quarantine ... Representation?" metaphor for "vGROUT: Vector Gradient Routing against Reward Hacking" - Method: add a two-phase definition (make v_hack; then erase=discard the component / route=redirect the gated gradient into a deletable adapter, deleted at deploy). Honest framing: route preserves (not discards); follows Shilov et al.'s post-backward deletable-block routing in the gradient-routing family, gated by an extracted direction not a per-example data label - strip literal "SGTM" from the body (confusing acronym); cite renders as author-year. README + pyproject describe vGROUT (package name unchanged)
This commit is contained in:
+1
-1
@@ -1,7 +1,7 @@
|
||||
[project]
|
||||
name = "projected_grpo"
|
||||
version = "0.1.0"
|
||||
description = "SVD-basis gradient projection vs RL reward hacking on Nanda's LeetCode benchmark"
|
||||
description = "vGROUT: vector gradient routing against reward hacking (Nanda's LeetCode benchmark)"
|
||||
requires-python = ">=3.13,<3.14" # pinned cp313 wheels (causal-conv1d, flash-attn)
|
||||
dependencies = [
|
||||
"torch>=2.4",
|
||||
|
||||
Reference in New Issue
Block a user