mirror of
https://github.com/wassname/evil_MoE.git
synced 2026-06-27 18:59:35 +08:00
pairs: de-confound v2 (print(==) vs assert, line-matched) + intent designs (think/funcname/concept)
intent pairs hold sol+tests IDENTICAL, vary only the cheat-vs-solve intent signal
(the properly-contrastive shape). --pairs {think,funcname,concept} for AUROC test.
Co-Authored-By: Claudypoo <288921227+claudypoo@users.noreply.github.com>
This commit is contained in:
@@ -45,6 +45,7 @@ from vgrout.antipasto import wrap_model_with_antipasto
|
||||
from vgrout.extract_vhack_grad import extract_v_hack, completion_nll
|
||||
from vgrout.pairs import PAIRS
|
||||
from vgrout.pairs_v2 import PAIRS_V2
|
||||
from vgrout.pairs_intent import PAIRS_THINK, PAIRS_FUNCNAME, PAIRS_CONCEPT
|
||||
from vgrout.train import CACHE_ROOT
|
||||
|
||||
|
||||
@@ -106,7 +107,9 @@ def main(cfg: Cfg) -> int:
|
||||
# pairs, matching the single-mode run_tests live hack) -- tests whether mechanism-match lifts AUROC.
|
||||
PAIRSEL = {"all": list(PAIRS), "runtests": list(PAIRS)[:8],
|
||||
"v2": list(PAIRS_V2), "allv2": list(PAIRS) + list(PAIRS_V2),
|
||||
"rt_v2": list(PAIRS)[:8] + list(PAIRS_V2)}[cfg.pairs]
|
||||
"rt_v2": list(PAIRS)[:8] + list(PAIRS_V2),
|
||||
"think": list(PAIRS_THINK), "funcname": list(PAIRS_FUNCNAME),
|
||||
"concept": list(PAIRS_CONCEPT)}[cfg.pairs]
|
||||
logger.info(f"pairs={cfg.pairs} -> {len(PAIRSEL)} pairs")
|
||||
|
||||
# ── GRAD direction + per-module singular value (for noise floor) ──
|
||||
|
||||
Reference in New Issue
Block a user