mirror of
https://github.com/wassname/lora-lite.git
synced 2026-06-27 18:43:15 +08:00
benchmark sweep: rot(U/both) ablation, whitening conclusion, cost rows
- antipasto_rot: add rotate_basis="both" (independent V+U Cayley rotations), run_id suffix __rotU/__rotboth so ablation arms get their own output dirs - justfile: thread rotate_basis through bench-variant - corda/eva: padding-mask fix in calibration capture + bf16-tight residual - README: fill PiSSA/DoRA/CorDA/ASVD/ablate/dplr/rot rows; record the metric-axis ablation (C=I 56.0 > diag-C 55.6 > full-C 54.7) and the rotation ablation (V 57.2 > U 56.5 > both 55.6) conclusions - docs/reviews: external ref-checks + deepseek/gpt reviews of the cores Co-Authored-By: Claudypoo <288921227+claudypoo@users.noreply.github.com>
This commit is contained in:
+9
-1
@@ -71,6 +71,13 @@ def measure_cost(
|
||||
named = list(model.named_parameters()) + list(model.named_buffers())
|
||||
adapter_bytes = sum(t.numel() * t.element_size() for n, t in named if adapter_filter in n)
|
||||
|
||||
# Adapter ADDED MACs/token, analytic and arch-independent (the FLOP counter below
|
||||
# asserts on some fused/linear-attention shapes -> None). Each 2D adapter weight of
|
||||
# shape (a, b) is used once in a per-token matmul, contributing a*b MACs; summing 2D
|
||||
# adapter-tensor numel is therefore the exact added compute for the U/Vh/P/A/B paths.
|
||||
# (Slight undercount for cores that reuse a factor twice, e.g. ablate's C C^T.)
|
||||
added_macs_per_token = sum(t.numel() for n, t in named if adapter_filter in n and t.ndim == 2)
|
||||
|
||||
# FLOPs: one forward under the counter (no grad so we count inference cost).
|
||||
# FlopCounterMode can assert on some fused attention shapes; degrade to None.
|
||||
try:
|
||||
@@ -92,7 +99,8 @@ def measure_cost(
|
||||
return dict(
|
||||
trainable_params=trainable_params,
|
||||
adapter_resident_mb=adapter_bytes / 1e6,
|
||||
flops=flops,
|
||||
added_macs_per_token=added_macs_per_token, # adapter-only, always populated
|
||||
flops=flops, # whole model, best-effort (None on hybrid attn)
|
||||
macs_per_token=(flops / n_tokens) if (flops and n_tokens) else None,
|
||||
fwd_ms=fwd_ms,
|
||||
bwd_ms=bwd_ms,
|
||||
|
||||
Reference in New Issue
Block a user