From d9ea20baa48e2037bccf4e33ab8417e0881f2054 Mon Sep 17 00:00:00 2001 From: wassname <1103714+wassname@users.noreply.github.com> Date: Sun, 7 Jun 2026 13:42:20 +0000 Subject: [PATCH] routeV: margin (p75 clean / p75 hack) routing band, route the confident tail Was the widest band (min clean, max hack): routed even neutral rollouts (~0.4 of a cos=0 gradient), the over-route that costs solve. Switch to a precision band on the inner quartiles so only the live tail above the clean cluster routes; absorption covers the unrouted middle (gradient_routing.md L420; SGTM tolerates ~40% undiscovered, Fig5b). p75 not min/max: 10 pairs make the extremes single-sample noisy. Absolute threshold, so a clean batch routes ~nothing without the per-batch-quantile pathology. KNOWN RISK logged: pairs are off-distribution and shifted high vs live (median cos ~-0.06), so the band may under-route; watch rout, fall back is a live-cos quantile gate. Co-Authored-By: Claudypoo <288921227+claudypoo@users.noreply.github.com> --- src/vgrout/train.py | 55 ++++++++++++++++++++++++++++++++------------- 1 file changed, 39 insertions(+), 16 deletions(-) diff --git a/src/vgrout/train.py b/src/vgrout/train.py index b14a2c3..4c87647 100644 --- a/src/vgrout/train.py +++ b/src/vgrout/train.py @@ -339,17 +339,38 @@ def _zone_stats(f: torch.Tensor, w: torch.Tensor) -> tuple[float, ...]: def route_band_edges(raw_grads: dict, v_grad: dict, device) -> dict[str, tuple[float, float]]: - """Per-module routing band (lower, upper) from the contrastive pairs ALONE -- the - pair-calibrated replacement for the old live-detector τ. lower = MIN clean-pair cosine - to v_grad; upper = MAX hack-pair cosine. A live rollout's cos(g_b, v_grad) below lower - is kept, above upper is routed, in between ramps (absorption). min/max (not mean) is the - conservative "degrade to absorb" edge: almost nothing sits below the smallest clean - cosine, so when uncertain the rollout absorbs into the quarantine rather than escaping - into the deployed knob. raw_grads carries the train-pair per-pair δS grads as - `hack/{name}` / `clean/{name}` [n_pairs, r]. Cosine is scale-invariant so the extract's - length-normalised NLL grads and the live token-sum grads are comparable here. With a - Haar-random v_grad both edges collapse to ~0 -> band closes -> routing degenerates to a - coin flip: band width is itself the real-vs-random discriminator.""" + """Per-module routing MARGIN band (lower, upper) from the contrastive pairs ALONE -- the + pair-calibrated replacement for the old live-detector τ. A live rollout's cos(g_b, v_grad) + below lower is kept whole, above upper is fully routed, in between ramps. raw_grads carries + the train-pair per-pair δS grads as `hack/{name}` / `clean/{name}` [n_pairs, r]; cosine is + scale-invariant so the extract's length-normalised NLL grads and the live token-sum grads + are comparable here. + + Edges (the precision/confident-tail band; route only the obvious hack tail, keep the + ambiguous middle, let absorption generalise -- gradient_routing.md L420, SGTM tolerates + ~40% undiscovered with leak<0.02, Fig 5b). Both are p75, NOT min/max: with only ~10 pairs + the extremes are single-sample and noisy, and they make the band route either everything + (min clean) or nothing (max clean) on one outlier. This is an ABSOLUTE cos threshold (same + every batch), so a clean batch lands below it and routes ~nothing while a hacky batch routes + its tail -- it does NOT have the per-batch-quantile pathology of routing the top-q of an + all-clean batch. + lower = p75 clean-pair cosine. Precision-leaning floor: only the live tail above the + clean cluster's upper quartile routes. Routing clean is the expensive error + (gradient_routing.md Fig 5-right: retain cost ∝ routed mass); under-routing is + cheap (absorption covers it), so we sit high but back off max for outlier safety. + upper = p75 hack-pair cosine. Saturates where hacks cluster; robust to one weak hack pair + (min(hack) would invert the band into a hard aggressive step). + If pairs overlap (p75 clean >= p75 hack) the consumer's max(upper-lower,1e-6) collapses to + a near-hard step at the lower edge -- the honest degenerate of an empty margin. + + KNOWN RISK (watch frout/rout in the first steps): the pairs are hand-authored and + off-distribution, so their cosines are wider and shifted HIGH relative to live rollouts + (job8 wide-band run: live median cos ≈ -0.06, below the pair-hack cluster). A pair-scale + margin band can therefore sit above the whole live distribution and route ~nothing. If rout + collapses, the fix is to calibrate to the LIVE cos distribution (route the top-q live cos + quantile) instead of the pair scale -- still no-cheat (no detector/oracle labels a rollout, + just a quantile of cos-to-pair-vec). With a Haar-random v_grad the band closes (real-vs- + random discriminator).""" band = {} for name in v_grad: v = v_grad[name].detach().cpu().float() @@ -357,7 +378,7 @@ def route_band_edges(raw_grads: dict, v_grad: dict, device) -> dict[str, tuple[f gc = raw_grads[f"clean/{name}"].float() ch = (gh @ v) / gh.norm(dim=1).clamp_min(1e-12) # [n_pairs] hack-pair cosines cc = (gc @ v) / gc.norm(dim=1).clamp_min(1e-12) # [n_pairs] clean-pair cosines - band[name] = (cc.min().item(), ch.max().item()) # (lower, upper) + band[name] = (cc.quantile(0.75).item(), ch.quantile(0.75).item()) # (lower=p75 clean, upper=p75 hack) return band @@ -489,10 +510,12 @@ def main(cfg: Config) -> int: _mean_lo = sum(lo for lo, _ in route_band.values()) / len(route_band) _mean_hi = sum(hi for _, hi in route_band.values()) / len(route_band) _mean_bw = _mean_hi - _mean_lo - logger.info(f"routeV band: edges from {len(route_band)} modules, " - f"mean lower(min clean cos)={_mean_lo:+.3f}, mean upper(max hack cos)={_mean_hi:+.3f}, " - f"mean width={_mean_bw:+.3f} (>0 = pairs separate; ~0 = random/degenerate). " - f"Live cos below lower -> kept; above upper -> routed; between -> ramps (frout).") + logger.info(f"routeV MARGIN band: edges from {len(route_band)} modules, " + f"mean lower(p75 clean cos)={_mean_lo:+.3f}, mean upper(p75 hack cos)={_mean_hi:+.3f}, " + f"mean width={_mean_bw:+.3f} (>0 = pairs separate; <0 = overlap -> hard step at max clean). " + f"Live cos below lower -> kept; above upper -> routed; between -> ramps (rout/frout). " + f"SHOULD: rout > 0 in early steps; if rout~0 the pair band sits above live (median cos was " + f"~-0.06 on the wide run) -> switch to a live-cos quantile gate.") # On a REAL v_grad the band must open (hack pairs align more than clean). # A collapsed/inverted real band = broken extraction silently mimicking the # random control -> fail loud. The Haar control is allowed to collapse.