From 2a9a3bc8a10b7124c72e760b587be3429ea088da Mon Sep 17 00:00:00 2001
From: wassname <1103714+wassname@users.noreply.github.com>
Date: Tue, 9 Jun 2026 22:56:13 +0000
Subject: [PATCH] fix: rescore_deploy derives eval_modes from by_mode keys (old
 jsons lack eval_modes)

Job 32 failed KeyError: 'eval_modes' -- deploy_test.json written by the
pre-cleanup train.py has no eval_modes key. by_mode keys are the modes
the original eval spanned (present in every version), so derive from
them to reproduce the same knob-off headline.

Co-Authored-By: Claudypoo <288921227+claudypoo@users.noreply.github.com>
---
 scripts/rescore_deploy.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/scripts/rescore_deploy.py b/scripts/rescore_deploy.py
index c12d9f6..af069bc 100644
--- a/scripts/rescore_deploy.py
+++ b/scripts/rescore_deploy.py
@@ -45,7 +45,9 @@ def main(run_dir: Positional[Path]) -> None:
         wrappers[name]["delta_S_hack"].data.copy_(delta_hack[name].to(device, torch.bfloat16))
 
     prior_eval = json.loads((run_dir / "deploy_test.json").read_text())
-    eval_modes = prior_eval["eval_modes"]
+    # by_mode keys ARE the modes the original deploy eval spanned (present in every json
+    # version); reproduce the same set so the re-scored knob-off matches the headline.
+    eval_modes = sorted(prior_eval["by_mode"].keys())
     _, problems = load_eval_splits(eval_modes, cfg["eval_n_prompts"])
     gen_cfg_eval = GenerationConfig(
         max_new_tokens=cfg["max_new"], do_sample=True,