mirror of
https://github.com/wassname/steer-heal-love.git
synced 2026-06-27 17:02:34 +08:00
readme: rename pseudocode section/dividers to steer/heal/loop
Co-Authored-By: Claudypoo <288921227+claudypoo@users.noreply.github.com>
This commit is contained in:
@@ -84,16 +84,15 @@ Why rmse. Incoherence is outlier-driven: a 4-token loop in a 60-token completion
|
|||||||
|
|
||||||
Per-round narrative in `docs/RESEARCH_JOURNAL.md`.
|
Per-round narrative in `docs/RESEARCH_JOURNAL.md`.
|
||||||
|
|
||||||
## Appendix: algorithm
|
## Appendix: steer, heal, loop
|
||||||
|
|
||||||
```python
|
```python
|
||||||
# ── Extract teacher vector ───────────────────────────────────────────
|
# ── Steer ────────────────────────────────────────────────────────────
|
||||||
def teacher_vec(θ, contexts):
|
def teacher_vec(θ, contexts):
|
||||||
v = mean(hs(θ, pos) - hs(θ, neg) # hs at <|assistant|> tag
|
v = mean(hs(θ, pos) - hs(θ, neg) # hs at <|assistant|> tag
|
||||||
for pos, neg in contexts) # v ∈ ℝ^d
|
for pos, neg in contexts) # v ∈ ℝ^d
|
||||||
return v
|
return v
|
||||||
|
|
||||||
# ── Walk-C: adaptive dose + filter ──────────────────────────────────
|
|
||||||
def walk_C(θ, θ₀, v, κ=1.0):
|
def walk_C(θ, θ₀, v, κ=1.0):
|
||||||
while kept / total < target and κ > κ_min:
|
while kept / total < target and κ > κ_min:
|
||||||
comps = generate(bake(θ, history) + κ·v)
|
comps = generate(bake(θ, history) + κ·v)
|
||||||
@@ -101,7 +100,7 @@ def walk_C(θ, θ₀, v, κ=1.0):
|
|||||||
if kept / total < target: κ *= decay
|
if kept / total < target: κ *= decay
|
||||||
return kept
|
return kept
|
||||||
|
|
||||||
# ── Heal: SFT + reverse-KL barrier ──────────────────────────────────
|
# ── Heal ─────────────────────────────────────────────────────────────
|
||||||
def heal(θ, θ₀, kept, λ, τ):
|
def heal(θ, θ₀, kept, λ, τ):
|
||||||
Δ ← LoRA(r=r, B=0) # fresh adapter, zero-init
|
Δ ← LoRA(r=r, B=0) # fresh adapter, zero-init
|
||||||
for x in kept:
|
for x in kept:
|
||||||
|
|||||||
Reference in New Issue
Block a user