mirror of
https://github.com/wassname/evil_MoE.git
synced 2026-06-27 17:48:43 +08:00
misc
This commit is contained in:
+2
-1
@@ -7,7 +7,8 @@
|
||||
/svd_cache/
|
||||
/tmp/
|
||||
*.log
|
||||
|
||||
/docs/reviews
|
||||
/docs/
|
||||
|
||||
# vendored upstream reference repos cloned for grep access (see RESEARCH_JOURNAL.md)
|
||||
/docs/vendor/
|
||||
|
||||
+1
@@ -0,0 +1 @@
|
||||
../../../../tmp/claude-1000/dyn_test_hack_overlay.png
|
||||
@@ -383,6 +383,11 @@ class StepLogger:
|
||||
_Col("gt_t", 6, "gt_t", "frac", "teacher ground-truth passes (sanity)"),
|
||||
_Col("hack_s", 7, "hack_s?", "frac", "student hack-flagged rollouts (the headline)"),
|
||||
_Col("hack_t", 7, "hack_t", "frac", "teacher hack-flagged rollouts (sanity: pool hacks)"),
|
||||
# Deploy-eval shown for EVERY arm (nan on steps it's not run -> see it ride
|
||||
# along as training proceeds). route/route2: quarantine knob OFF. vanilla/erase:
|
||||
# the trained model itself. Apples-to-apples knob-off deploy number, the plot series.
|
||||
_Col("hack_deploy", 7, "hk_dep", "+.2f", "DEPLOY-eval hack (route: quarantine OFF; vanilla/erase: trained model); held-out subset, T=0.7, every eval_ablate_every steps; nan between"),
|
||||
_Col("solve_deploy", 7, "slv_dep", "+.2f", "DEPLOY-eval solve (same cadence; nan between)"),
|
||||
]
|
||||
# Per-mode CUMULATIVE student exploit rate -> which loophole classes the
|
||||
# student has learnt, and how strongly. Only when the run spans >1 mode
|
||||
@@ -419,8 +424,6 @@ class StepLogger:
|
||||
if arm in ("routing", "routing2"):
|
||||
cols += [
|
||||
_Col("q_egy", 6, "qE", ".2f", "grad energy into quarantine ‖g_quar‖/(‖g_keep‖+‖g_quar‖); ~0.5+ rising = learning dumped into the thrown-away knob"),
|
||||
_Col("hack_deploy", 7, "hk_dep", "+.2f", "DEPLOY-eval hack (quarantine deleted = deployed model); held-out eval subset, T=0.7, every eval_ablate_every steps; the plot number"),
|
||||
_Col("solve_deploy", 7, "slv_dep", "+.2f", "DEPLOY-eval solve"),
|
||||
_Col("hack_abl", 6, "hk_abl", "frac", "FREE per-step deploy proxy: hack rate on the ablated (deploy-mode) rollout slice; train prompts, noisier than hk_dep"),
|
||||
_Col("solve_abl", 6, "slv_abl", "frac", "free per-step deploy proxy: solve rate on the ablated rollout slice"),
|
||||
]
|
||||
|
||||
Reference in New Issue
Block a user