diff --git a/scripts/make_random_vhack.py b/scripts/make_random_vhack.py index c4fc636..f74f8b2 100644 --- a/scripts/make_random_vhack.py +++ b/scripts/make_random_vhack.py @@ -19,9 +19,9 @@ from loguru import logger from safetensors import safe_open from safetensors.torch import save_file -SRC = Path("out/vhack/v_hack_pairset_prog_wide.safetensors") # run 31's v_hack +SRC = Path("out/vhack/v_hack_pairset_prog_wide.safetensors") # shape source: match its per-module rank/norm DST = Path("out/vhack/v_hack_pairset_prog_wide_randomV.safetensors") -SEED = 157 # the task number; fixed so the control is reproducible +SEED = 157 # fixed so the random-V control is reproducible def haar_orthonormal_rows(k: int, r: int, generator: torch.Generator) -> torch.Tensor: diff --git a/scripts/probe_distill.py b/scripts/probe_distill.py index f0e8e21..1339261 100644 --- a/scripts/probe_distill.py +++ b/scripts/probe_distill.py @@ -81,11 +81,9 @@ class Config: # samples. Used to populate the "no_hack" bucket for cosine comparison. base_only: bool = False # TODO(spec2 ยง"Phase 2"): mixed-replay GRPO was started here, then user - # observed that Phase 2 and Phase 3 should share code (train.py) with - # different --steps args, not build separate replay machinery. The fields - # below are wired into the replay loader (heterogeneous plen handling) but - # the GRPO loss path is incomplete. Either finish or remove; for now train.py - # at small scale is the canonical Phase 2 mechanism. + # FIXME: the replay fields below are wired into the loader (heterogeneous + # plen handling) but the GRPO loss path is incomplete -- finish or remove. + # train.py at small scale is the canonical Phase 2 mechanism. replay_dirs: str | None = None # Sandwich schedule: [0, pre) student-gen -> [pre, pre+replay) replay-distill # -> [pre+replay, steps) student-gen. With pre_warmup_steps=0 reduces to the