From ecfb3bf30ab19bb1d37aaaa4fd50365cedaedb75 Mon Sep 17 00:00:00 2001
From: wassname <github@wassname>
Date: Wed, 27 May 2026 23:33:12 +0000
Subject: [PATCH] smoke: tiny-random on CPU, beartype on, 30 steps; one-harness
 consolidation

Make `just smoke` reuse train.py (the production harness) at minimum config
on CPU with BEARTYPE=1, so the smoke walks every code path with the
jaxtyping/beartype shape checks active.

Changes:
- smoke preset: model=tiny-random-qwen3, steps=30, group=2, max_new=32,
  n_problems=10, prompts_per_step=1. Steps>=25 so the every-25-step
  save_ckpt path is exercised. Runs in ~35s on CPU.
- train.py: dtype + attn_implementation auto-fallback on CPU (fp32 + sdpa)
  since flash-attn 2 is CUDA-only and CPU bf16 is patchy.
- load_v_hack + auto-extract save: dtype header now matches whichever
  precision the run actually uses ("fp32" on CPU, "bf16" on CUDA).
- justfile: smoke recipes drop the parallel `run.py` "fast-dev-run" entry
  and force CUDA_VISIBLE_DEVICES= so they always exercise the CPU path.
  smoke-both runs vanilla then projected back-to-back -- second invocation
  hits the v_hack cache (cache-miss vs cache-hit both covered).

Fixes uncovered when smoke first ran:
- est_gens_per_step was reading cfg.prompts_per_step * cfg.group which are
  None when preset defaults supply them; switched to the resolved locals.
- save_ckpt and the final-summary aggregation still referenced r["hack"] /
  r["gt"], dropped from the per-step table in commit 373c257. Reconstruct
  from r["hack_s"] + r["hack_t"] and same for gt.
---
 justfile                    | 22 ++++++++++-----------
 src/projected_grpo/train.py | 39 +++++++++++++++++++++++++------------
 2 files changed, 38 insertions(+), 23 deletions(-)

diff --git a/justfile b/justfile
index 81f78b4..7772569 100644
--- a/justfile
+++ b/justfile
@@ -13,22 +13,22 @@ TRAIN := "uv run python -m projected_grpo.train"  # real LeetCode GRPO entry poi
 default:
     @just --list
 
-# fast-dev-run: tiny-random model, full smoke pipeline end-to-end, ~1-2 min, beartype on.
-fast-dev-run *ARGS:
-    BEARTYPE=1 {{ BASE }} --fast-dev-run --model={{ TINY_MODEL }} {{ ARGS }}
-
-# Real-pipeline presets (train.py = AntiPaSTO + Dr.GRPO + LeetCode rewards).
-# smoke = Qwen3.5-0.8B 10 steps, fits 24GB. Mechanism verification only.
-# full  = Qwen3-4B 200 steps G=6, peaks ~90GB on 96GB. spec.md §H4 substrate.
+# Smoke: same harness as production (train.py), tiny-random model on CPU,
+# beartype on so jaxtyping signatures get runtime-checked. Runs 30 steps so
+# the every-25-step save_ckpt path is covered. Should finish in ~1-2 min.
+# Re-run after first invocation also exercises the v_hack cache-hit branch.
 smoke *ARGS:
-    {{ TRAIN }} --preset=smoke --arm=projected --v-hack-path=out/v_hack_smoke.safetensors {{ ARGS }}
+    BEARTYPE=1 CUDA_VISIBLE_DEVICES= {{ TRAIN }} --preset=smoke --arm=projected \
+        --v-hack-path=out/v_hack_smoke.safetensors {{ ARGS }}
 
 smoke-vanilla *ARGS:
-    {{ TRAIN }} --preset=smoke --arm=vanilla {{ ARGS }}
+    BEARTYPE=1 CUDA_VISIBLE_DEVICES= {{ TRAIN }} --preset=smoke --arm=vanilla {{ ARGS }}
 
+# Run smoke twice: first warms the v_hack cache (cache-miss path), second hits
+# the cache (cache-hit path). Catches scope/save bugs that only manifest in one.
 smoke-both:
-    {{ TRAIN }} --preset=smoke --arm=vanilla
-    {{ TRAIN }} --preset=smoke --arm=projected --v-hack-path=out/v_hack_smoke.safetensors
+    just smoke-vanilla
+    just smoke
 
 # H4 baseline at spec substrate. No v_hack needed for vanilla.
 full-vanilla *ARGS:
diff --git a/src/projected_grpo/train.py b/src/projected_grpo/train.py
index 433a9d1..4260a0c 100644
--- a/src/projected_grpo/train.py
+++ b/src/projected_grpo/train.py
@@ -120,8 +120,11 @@ class Preset(str, Enum):
 
 
 PRESETS: dict[str, dict] = {
-    "smoke": dict(model="Qwen/Qwen3.5-0.8B",  steps=10,  group=2, max_new=128,
-                  n_problems=30,  beta=0.0,  prompts_per_step=1),  # 24GB cap
+    # steps=30 (not 10) so save_ckpt's every-25-step trigger fires under smoke.
+    # That catches checkpoint-save bugs that only manifest after step 25 (e.g.
+    # closure-scope NameErrors in the save path).
+    "smoke": dict(model="llamafactory/tiny-random-qwen3", steps=30, group=2,
+                  max_new=32, n_problems=10, beta=0.0, prompts_per_step=1),
     # 4B matches reference DEFAULT_MODEL_ID (docs/vendor/rl-rewardhacking/src/__init__.py).
     # G=6 after 2026-05-24 step-17 OOM at G=8: lm_head spike on a long-prompt
     # problem hit 4.16 GiB / 2.5 GiB free. `logits_to_keep` cuts lm_head ~33%;
@@ -264,10 +267,14 @@ def load_v_hack(
             )
         if saved_model != model_name:
             raise ValueError(f"v_hack model mismatch: {path} has {saved_model}, run uses {model_name}")
-        if saved_dtype != "bf16":
+        # dtype mismatch: cross-dtype SVD bases can diverge silently, so error
+        # unless the saved dtype matches what train.py uses on this device.
+        # CPU runs in fp32, CUDA runs in bf16 (see model-load site above).
+        expected_dtype = "fp32" if torch.cuda.is_available() is False else "bf16"
+        if saved_dtype != expected_dtype:
             raise ValueError(
                 f"v_hack dtype/SVD-basis mismatch: {path} was extracted with dtype={saved_dtype}; "
-                "train.py loads models in bf16. Re-extract with `--dtype=bf16`."
+                f"this run loads models in {expected_dtype}. Re-extract with `--dtype={expected_dtype}`."
             )
         v_hack = {k: f.get_tensor(k) for k in f.keys() if not k.startswith("_sv/")}
         v_sv   = {k[len("_sv/"):]: f.get_tensor(k) for k in f.keys() if k.startswith("_sv/")}
@@ -381,9 +388,13 @@ def main(cfg: Config) -> int:
     tok = AutoTokenizer.from_pretrained(model_name)
     if tok.pad_token_id is None: tok.pad_token = tok.eos_token
 
+    # On CPU smoke we fall back to fp32 + sdpa: flash-attn2 is CUDA-only and
+    # CPU bf16 is patchy. Production GPU runs keep bf16 + flash_attention_2.
+    cpu = device.type == "cpu"
     model = AutoModelForCausalLM.from_pretrained(
-        model_name, dtype=torch.bfloat16,
-        attn_implementation="flash_attention_2",
+        model_name,
+        dtype=torch.float32 if cpu else torch.bfloat16,
+        attn_implementation="sdpa" if cpu else "flash_attention_2",
     ).to(device)
     # No gradient checkpointing: grad-accum forwards one G-group (6 seqs) at a time,
     # so peak activation memory is ~6 x merged_len, which fits at G=6 on 96GB without
@@ -428,7 +439,8 @@ def main(cfg: Config) -> int:
         # for the singular values. load_v_hack splits them back apart.
         save_payload = {**v_hack_extracted, **{f"_sv/{n}": s for n, s in v_sv_extracted.items()}}
         save_file(save_payload, str(v_hack_path),
-                  metadata={"model": model_name, "dtype": "bf16",
+                  metadata={"model": model_name,
+                            "dtype": "fp32" if cpu else "bf16",
                             "top_k": str(min(cfg.v_hack_extract_top_k, len(VHACK_PAIRS) - 2)),
                             "tau_axis": str(cfg.v_hack_tau_axis), "schema": "v2_with_sv"})
         # extract zeros grads at exit; opt is built below so no opt-state taint.
@@ -589,7 +601,8 @@ def main(cfg: Config) -> int:
     def _fmt_header() -> str:
         return "  ".join(f"{_header_labels[c]:>{_col_w[c]}}" for c in _row_cols)
     REF_GENS_PER_STEP = 16 * 16  # ariahw/rl-rewardhacking config.py:num_prompts * num_generations
-    est_gens_per_step = cfg.prompts_per_step * cfg.group  # before mixed-pool split
+    # Use the resolved locals (preset defaults merged), not cfg.* which can be None.
+    est_gens_per_step = prompts_per_step * group  # before mixed-pool split
     logger.info(
         f"grad-pressure: {est_gens_per_step} gens/step vs reference {REF_GENS_PER_STEP} "
         f"-> {est_gens_per_step / REF_GENS_PER_STEP:.2f}x per step; "
@@ -635,8 +648,10 @@ table columns:
         kill keeps everything up to the last save. Rows are also streamed to the log,
         so this is convenience, not the only copy. Mirrors the v_hack metadata idiom."""
         n_gens = sum(r["N"] for r in rows)
-        hr = sum(int(r["hack"].split("/")[0]) for r in rows) / max(1, n_gens)
-        pr = sum(int(r["gt"].split("/")[0]) for r in rows) / max(1, n_gens)
+        # Aggregate from per-source columns (the combined hack/gt aggregates were
+        # dropped from the per-step table as redundant; reconstruct here).
+        hr = sum(int(r["hack_s"].split("/")[0]) + int(r["hack_t"].split("/")[0]) for r in rows) / max(1, n_gens)
+        pr = sum(int(r["gt_s"].split("/")[0])   + int(r["gt_t"].split("/")[0])   for r in rows) / max(1, n_gens)
         tensors = {n: info["delta_S"].detach().cpu().contiguous()
                    for n, info in wrappers.items()}
         save_file(tensors, str(path or ckpt_path), metadata={
@@ -1058,8 +1073,8 @@ table columns:
     peak_gb = torch.cuda.max_memory_allocated() / 1e9 if torch.cuda.is_available() else 0.0
     n_steps = len(rows)
     n_gens = sum(r["N"] for r in rows)
-    total_hacks = sum(int(r["hack"].split("/")[0]) for r in rows)
-    total_pass = sum(int(r["gt"].split("/")[0]) for r in rows)
+    total_hacks = sum(int(r["hack_s"].split("/")[0]) + int(r["hack_t"].split("/")[0]) for r in rows)
+    total_pass = sum(int(r["gt_s"].split("/")[0]) + int(r["gt_t"].split("/")[0]) for r in rows)
     hack_rate = total_hacks / max(1, n_gens)
     pass_rate = total_pass / max(1, n_gens)
     # Per-source totals. On no-teacher runs, hack_s_total == total_hacks.