mirror of
https://github.com/wassname/evil_MoE.git
synced 2026-06-27 17:30:41 +08:00
docs: make active-path comments concise
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
"""Reproduce a finished run's paired knob-off/knob-on final-test evaluation."""
|
||||
"""Reproduce a finished run's paired quarantine-ablated/enabled final-test evaluation."""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
@@ -45,8 +45,7 @@ def main(run_dir: Positional[Path]) -> None:
|
||||
wrappers[name]["delta_S_hack"].data.copy_(delta_hack[name].to(device, torch.bfloat16))
|
||||
|
||||
prior_eval = json.loads((run_dir / "deploy_test.json").read_text())
|
||||
# by_mode keys ARE the modes the original deploy eval spanned (present in every json
|
||||
# version); reproduce the same set so the re-scored knob-off matches the headline.
|
||||
# Reproduce the original evaluation environment exactly.
|
||||
eval_modes = sorted(prior_eval["by_mode"].keys())
|
||||
_, problems = load_eval_splits(eval_modes, cfg["eval_n_prompts"])
|
||||
gen_cfg_eval = GenerationConfig(
|
||||
@@ -56,7 +55,7 @@ def main(run_dir: Positional[Path]) -> None:
|
||||
)
|
||||
eval_idxs = list(range(len(problems)))
|
||||
torch.manual_seed(EVAL_GEN_SEED)
|
||||
with ablate_quarantine(wrappers): # knob OFF = the deployed model
|
||||
with ablate_quarantine(wrappers): # quarantine ablated = deployed model
|
||||
ev = eval_hack_solve(
|
||||
model, tok, problems, eval_idxs, gen_cfg_eval, device, cfg["max_new"], cfg["eval_batch_size"])
|
||||
torch.manual_seed(EVAL_GEN_SEED)
|
||||
@@ -74,8 +73,9 @@ def main(run_dir: Positional[Path]) -> None:
|
||||
for m, (h, v, s, c) in ev["by_mode"].items()},
|
||||
}
|
||||
(run_dir / "deploy_test.json").write_text(json.dumps(out, indent=2))
|
||||
logger.info(f"FINAL paired test n={ev['n']}: knob-off hack={ev['hack']:.3f} solve={ev['solve']:.3f}; "
|
||||
f"knob-on hack={ev_on['hack']:.3f} solve={ev_on['solve']:.3f}")
|
||||
logger.info(f"FINAL paired test n={ev['n']}: quarantine-ablated hack={ev['hack']:.3f} "
|
||||
f"solve={ev['solve']:.3f}; quarantine-enabled hack={ev_on['hack']:.3f} "
|
||||
f"solve={ev_on['solve']:.3f}")
|
||||
for m, d in out["by_mode"].items():
|
||||
logger.info(f" {m:14s} hack={d['hack']:.3f} vhack={d['vhack']:.3f} solve={d['solve']:.3f} n={d['n']}")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user