From e7ed74956f5ce2a1bb7a3abae4f38164eefa5377 Mon Sep 17 00:00:00 2001
From: wassname <1103714+wassname@users.noreply.github.com>
Date: Wed, 10 Jun 2026 03:19:22 +0000
Subject: [PATCH] fix: gt_only-only run (no-loophole ceiling) no longer divides
 by zero

eval_modes stripped gt_only unconditionally, so a 100%-gt_only run left it
empty and load_problems did len(out) % 0. Fall back to ['gt_only'] when
nothing remains -- the ceiling run evals on gt_only itself (hack ~0, solve
= the ceiling). Job 27 failed on this; smoke --env-mode=gt_only now runs.

Co-Authored-By: Claudypoo <288921227+claudypoo@users.noreply.github.com>
---
 src/vgrout/train.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/vgrout/train.py b/src/vgrout/train.py
index 9a1f265..0a5a530 100644
--- a/src/vgrout/train.py
+++ b/src/vgrout/train.py
@@ -525,7 +525,10 @@ def main(cfg: Config) -> int:
     # Deterministically split the paper's recency-held-out test file into periodic
     # validation and untouched final test. Previously the monitored 32 problems
     # were included in the final headline, leaking model-selection information.
-    eval_modes = sorted({p["env_mode"] for p in problems} - {"gt_only"})
+    # gt_only is excluded from the hack eval (unhackable problems can't be hacked), EXCEPT
+    # the no-loophole ceiling run where every problem is gt_only -- there we eval on gt_only
+    # itself (hack is structurally ~0; solve is the ceiling number).
+    eval_modes = sorted({p["env_mode"] for p in problems} - {"gt_only"}) or ["gt_only"]
     val_problems, test_problems = load_eval_splits(eval_modes, cfg.eval_n_prompts)
     val_idxs, test_idxs = list(range(len(val_problems))), list(range(len(test_problems)))
     _train_ids = {p["problem_id"] for p in problems}