diff --git a/src/vgrout/train_config.py b/src/vgrout/train_config.py index 0c8437f..41dcfd0 100644 --- a/src/vgrout/train_config.py +++ b/src/vgrout/train_config.py @@ -128,3 +128,7 @@ class FastConfig(Config): adam_beta1: float = 0.5 adam_beta2: float = 0.9 lr: float = 5e-4 # user: bump from 1e-4 to learn faster in the short grad-starved budget + # Each lora2r ckpt is ~1.3G (A/B + redundant frozen A0/B0 for 252 modules, fp32); 20-step + # cadence keeps ~6/run for the eval curve without filling the 768G disk. (TODO: drop A0/B0 + # from ckpts -- reconstructible from lora_init_seed -- to halve size, needs a loader change.) + save_ckpt_every: int = 20