mirror of
https://github.com/wassname/isokl_steering_calibration.git
synced 2026-06-27 16:45:53 +08:00
38 lines
4.8 KiB
Markdown
38 lines
4.8 KiB
Markdown
| model | method | window | alpha | c_star_mean | n_seeds | kl_p95_mean | pmass_mean |
|
|
|:-----------------------|:---------------------|---------:|--------:|--------------:|----------:|--------------:|-------------:|
|
|
| Llama-3.2-1B-Instruct | directional_ablation | 20 | 1.000 | 5.120 | 3 | 1.191 | nan |
|
|
| Llama-3.2-1B-Instruct | directional_ablation | 20 | 2.000 | 5.120 | 3 | 5.850 | 0.000 |
|
|
| Llama-3.2-1B-Instruct | directional_ablation | 50 | 1.000 | 4.610 | 3 | 1.292 | nan |
|
|
| Llama-3.2-1B-Instruct | directional_ablation | 50 | 2.000 | 4.610 | 3 | 5.119 | 0.000 |
|
|
| Llama-3.2-1B-Instruct | mean_diff | 20 | 1.000 | 3.772 | 3 | 0.926 | nan |
|
|
| Llama-3.2-1B-Instruct | mean_diff | 20 | 2.000 | 3.772 | 3 | 5.392 | 0.000 |
|
|
| Llama-3.2-1B-Instruct | mean_diff | 50 | 1.000 | 3.862 | 3 | 1.341 | nan |
|
|
| Llama-3.2-1B-Instruct | mean_diff | 50 | 2.000 | 3.862 | 3 | 4.656 | 0.000 |
|
|
| Llama-3.2-1B-Instruct | pca | 20 | 1.000 | 3.818 | 3 | 0.933 | nan |
|
|
| Llama-3.2-1B-Instruct | pca | 20 | 2.000 | 3.818 | 3 | 5.621 | 0.000 |
|
|
| Llama-3.2-1B-Instruct | pca | 50 | 1.000 | 3.600 | 3 | 0.930 | nan |
|
|
| Llama-3.2-1B-Instruct | pca | 50 | 2.000 | 3.600 | 3 | 4.065 | nan |
|
|
| Qwen2.5-0.5B-Instruct | directional_ablation | 20 | 1.000 | 7.508 | 3 | 0.608 | 0.000 |
|
|
| Qwen2.5-0.5B-Instruct | directional_ablation | 20 | 2.000 | 7.508 | 3 | 3.166 | 0.000 |
|
|
| Qwen2.5-0.5B-Instruct | directional_ablation | 50 | 1.000 | 7.056 | 3 | 0.561 | 0.000 |
|
|
| Qwen2.5-0.5B-Instruct | directional_ablation | 50 | 2.000 | 7.056 | 3 | 2.229 | 0.000 |
|
|
| Qwen2.5-0.5B-Instruct | mean_diff | 20 | 1.000 | 7.588 | 3 | 0.705 | 0.000 |
|
|
| Qwen2.5-0.5B-Instruct | mean_diff | 20 | 2.000 | 7.588 | 3 | 3.335 | 0.000 |
|
|
| Qwen2.5-0.5B-Instruct | mean_diff | 50 | 1.000 | 7.562 | 3 | 0.762 | 0.000 |
|
|
| Qwen2.5-0.5B-Instruct | mean_diff | 50 | 2.000 | 7.562 | 3 | 2.826 | 0.000 |
|
|
| Qwen2.5-0.5B-Instruct | pca | 20 | 1.000 | 8.656 | 3 | 0.931 | 0.000 |
|
|
| Qwen2.5-0.5B-Instruct | pca | 20 | 2.000 | 8.656 | 3 | 3.752 | 0.000 |
|
|
| Qwen2.5-0.5B-Instruct | pca | 50 | 1.000 | 8.606 | 3 | 0.844 | nan |
|
|
| Qwen2.5-0.5B-Instruct | pca | 50 | 2.000 | 8.606 | 3 | 3.148 | 0.000 |
|
|
| Qwen3-4B-Instruct-2507 | directional_ablation | 20 | 1.000 | 25.600 | 3 | 1.387 | 0.000 |
|
|
| Qwen3-4B-Instruct-2507 | directional_ablation | 20 | 2.000 | 25.600 | 3 | 7.038 | 0.000 |
|
|
| Qwen3-4B-Instruct-2507 | directional_ablation | 50 | 1.000 | 22.895 | 3 | 1.007 | nan |
|
|
| Qwen3-4B-Instruct-2507 | directional_ablation | 50 | 2.000 | 22.895 | 3 | 4.545 | nan |
|
|
| Qwen3-4B-Instruct-2507 | mean_diff | 20 | 1.000 | 25.600 | 3 | 0.944 | 0.000 |
|
|
| Qwen3-4B-Instruct-2507 | mean_diff | 20 | 2.000 | 25.600 | 3 | 6.434 | 0.000 |
|
|
| Qwen3-4B-Instruct-2507 | mean_diff | 50 | 1.000 | 25.600 | 3 | 0.975 | nan |
|
|
| Qwen3-4B-Instruct-2507 | mean_diff | 50 | 2.000 | 25.600 | 3 | 5.002 | nan |
|
|
| Qwen3-4B-Instruct-2507 | pca | 20 | 1.000 | 23.302 | 3 | 1.491 | 0.000 |
|
|
| Qwen3-4B-Instruct-2507 | pca | 20 | 2.000 | 23.302 | 3 | 5.252 | 0.000 |
|
|
| Qwen3-4B-Instruct-2507 | pca | 50 | 1.000 | 17.990 | 3 | 0.917 | 0.000 |
|
|
| Qwen3-4B-Instruct-2507 | pca | 50 | 2.000 | 17.990 | 3 | 3.486 | 0.000 | |