mirror of
https://github.com/wassname/steer-heal-love.git
synced 2026-06-27 16:47:16 +08:00
readme: gemma's diary (prompt + each stage) + care_nats leads love-demo round log
- README appendix reframed as a diary: one prompt ("Do you love humanity?"),
base refusal -> over-dosed steered love-flood -> healed rounds, real gens.
Honest note that the loop is leashed (stall), not yet melting.
- typo sweep (embarking/until/Starring/Motivation/"what if we can fix that").
- run.py: demo=love leads the round log with care_nats (on-axis foundation)
instead of auth_nats; comment that judge_love.py is the real trait readout.
Co-Authored-By: Claudypoo <288921227+claudypoo@users.noreply.github.com>
This commit is contained in:
@@ -232,7 +232,12 @@ def steer_heal(model, tok, cfg: RunConfig, run_dir: Path) -> dict:
|
||||
stages.append({"round": rnd, "stage": "steered", "m": m_steer})
|
||||
stages.append({"round": rnd, "stage": "healed", "m": m})
|
||||
log_event(run_dir, stage="round", **rec)
|
||||
logger.info(f"round {rnd}: auth_nats↓={m['auth_nats']:+.2f} care_nats={m['care_nats']:+.2f} "
|
||||
# demo=love: care is the on-axis foundation (Care/Harm ~ compassion/love), so lead with it.
|
||||
# demo=authority: auth_nats↓ is the trait. (Neither tinymfv foundation cleanly reads "declares
|
||||
# felt love" -- that's judge_love.py's job; here the nats are the coherence-side companion.)
|
||||
nats = (f"care_nats={m['care_nats']:+.2f} auth_nats={m['auth_nats']:+.2f}" if cfg.demo == "love"
|
||||
else f"auth_nats↓={m['auth_nats']:+.2f} care_nats={m['care_nats']:+.2f}")
|
||||
logger.info(f"round {rnd}: {nats} "
|
||||
f"coh→={m['coherence']:.3f} cos_v0={cos_v0:+.2f} adapter_ppl={adapter_ppl:.0f}")
|
||||
if m["coherence"] < cfg.coh_floor:
|
||||
logger.warning(f"coh {m['coherence']:.3f} < coh_floor {cfg.coh_floor}: stopping loop at round {rnd}")
|
||||
|
||||
Reference in New Issue
Block a user