From 0dcbc753acc056fd547d8e5762d82c3b75dac325 Mon Sep 17 00:00:00 2001 From: "wassname (Michael J Clark)" <1103714+wassname@users.noreply.github.com> Date: Wed, 10 Jun 2026 15:54:49 +0800 Subject: [PATCH] Update antipasto.py --- src/lora_lite/variants/antipasto.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/lora_lite/variants/antipasto.py b/src/lora_lite/variants/antipasto.py index e537721..460e6fc 100644 --- a/src/lora_lite/variants/antipasto.py +++ b/src/lora_lite/variants/antipasto.py @@ -42,6 +42,7 @@ class AntiPaSTOConfig(AdapterConfig): variant: str = "antipasto" # Higher default than LoRA (r=8) since trainable params scale as r + r/bs*bs*(bs-1)/2, not r*(d_in+d_out). r: int = 256 + # Block size for the block-diagonal Cayley rotation. r must be divisible by it. block_size: int = 4 # Cayley map saturation: bounds rotation angle to ~max_rotation_angle radians. @@ -223,6 +224,7 @@ class AntiPaSTO: else: raise ValueError(f"rotate_basis must be 'U', 'V', or 'none', got {rotate_basis!r}") + # FIXME: try lora_delta_s as [r,k] this is because the main limit of this adapter is that it's under parametised here. `reduce(h @ U_eff.T, '... k -> ...') S_eff = S + layer.lora_delta_s.to(x.dtype) # (r,) h = x @ Vh_eff.T # x @ Vh_eff.T h = h * S_eff # diag(S_eff)