mirror of
https://github.com/wassname/evil_MoE.git
synced 2026-06-27 17:15:58 +08:00
cleanup: trim 2 stale provenance/train-of-thought comments
make_random_vhack: drop 'run 31's v_hack' dead run-ID + 'the task number', state the actual role (shape source / reproducible seed). probe_distill: collapse the 'either finish or remove, for now...' train-of-thought into a tight FIXME. Co-Authored-By: Claudypoo <288921227+claudypoo@users.noreply.github.com>
This commit is contained in:
@@ -19,9 +19,9 @@ from loguru import logger
|
||||
from safetensors import safe_open
|
||||
from safetensors.torch import save_file
|
||||
|
||||
SRC = Path("out/vhack/v_hack_pairset_prog_wide.safetensors") # run 31's v_hack
|
||||
SRC = Path("out/vhack/v_hack_pairset_prog_wide.safetensors") # shape source: match its per-module rank/norm
|
||||
DST = Path("out/vhack/v_hack_pairset_prog_wide_randomV.safetensors")
|
||||
SEED = 157 # the task number; fixed so the control is reproducible
|
||||
SEED = 157 # fixed so the random-V control is reproducible
|
||||
|
||||
|
||||
def haar_orthonormal_rows(k: int, r: int, generator: torch.Generator) -> torch.Tensor:
|
||||
|
||||
@@ -81,11 +81,9 @@ class Config:
|
||||
# samples. Used to populate the "no_hack" bucket for cosine comparison.
|
||||
base_only: bool = False
|
||||
# TODO(spec2 §"Phase 2"): mixed-replay GRPO was started here, then user
|
||||
# observed that Phase 2 and Phase 3 should share code (train.py) with
|
||||
# different --steps args, not build separate replay machinery. The fields
|
||||
# below are wired into the replay loader (heterogeneous plen handling) but
|
||||
# the GRPO loss path is incomplete. Either finish or remove; for now train.py
|
||||
# at small scale is the canonical Phase 2 mechanism.
|
||||
# FIXME: the replay fields below are wired into the loader (heterogeneous
|
||||
# plen handling) but the GRPO loss path is incomplete -- finish or remove.
|
||||
# train.py at small scale is the canonical Phase 2 mechanism.
|
||||
replay_dirs: str | None = None
|
||||
# Sandwich schedule: [0, pre) student-gen -> [pre, pre+replay) replay-distill
|
||||
# -> [pre+replay, steps) student-gen. With pre_warmup_steps=0 reduces to the
|
||||
|
||||
Reference in New Issue
Block a user