wip: dense alpha sweep + auto-render figs (Qwen3.5-0.8B w=512); queued OLMo-2/Gemma 4B/Gemma 12B/OLMo-3 7B at w=4096

This commit is contained in:
wassname
2026-05-06 05:37:33 +08:00
parent 0bd7a11d2d
commit bd34b7580c
144 changed files with 342825 additions and 210 deletions
+1
View File
@@ -0,0 +1 @@
../outputs_qwen35_w512_v3/Qwen3.5-0.8B_mean_diff_s0_w512
Binary file not shown.

After

Width:  |  Height:  |  Size: 558 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 104 KiB

+19
View File
@@ -0,0 +1,19 @@
model_short,method,window,c_mean,c_std,n_seeds,c_cv
Llama-3.2-1B-Instruct,directional_ablation,20,5.119636661337771,0.0,3,0.0
Llama-3.2-1B-Instruct,directional_ablation,50,4.6095092483978455,0.0,3,0.0
Llama-3.2-1B-Instruct,mean_diff,20,3.771688942723309,0.0,3,0.0
Llama-3.2-1B-Instruct,mean_diff,50,3.861664231693977,0.0,3,0.0
Llama-3.2-1B-Instruct,pca,20,3.818178678361504,0.0,3,0.0
Llama-3.2-1B-Instruct,pca,50,3.600290823741986,0.0,3,0.0
Qwen2.5-0.5B-Instruct,directional_ablation,20,7.507819866975713,0.0,3,0.0
Qwen2.5-0.5B-Instruct,directional_ablation,50,7.055664779577401,0.0,3,0.0
Qwen2.5-0.5B-Instruct,mean_diff,20,7.588130746747839,0.0,3,0.0
Qwen2.5-0.5B-Instruct,mean_diff,50,7.561536121211781,0.0,3,0.0
Qwen2.5-0.5B-Instruct,pca,20,8.655517019086593,0.0,3,0.0
Qwen2.5-0.5B-Instruct,pca,50,8.606126777874907,0.0,3,0.0
Qwen3-4B-Instruct-2507,directional_ablation,20,25.600000000000005,0.0,3,0.0
Qwen3-4B-Instruct-2507,directional_ablation,50,22.895205490202148,0.0,3,0.0
Qwen3-4B-Instruct-2507,mean_diff,20,25.600000000000005,0.0,3,0.0
Qwen3-4B-Instruct-2507,mean_diff,50,25.600000000000005,0.0,3,0.0
Qwen3-4B-Instruct-2507,pca,20,23.302283905419525,0.0,3,0.0
Qwen3-4B-Instruct-2507,pca,50,17.99025750455262,0.0,3,0.0
1 model_short method window c_mean c_std n_seeds c_cv
2 Llama-3.2-1B-Instruct directional_ablation 20 5.119636661337771 0.0 3 0.0
3 Llama-3.2-1B-Instruct directional_ablation 50 4.6095092483978455 0.0 3 0.0
4 Llama-3.2-1B-Instruct mean_diff 20 3.771688942723309 0.0 3 0.0
5 Llama-3.2-1B-Instruct mean_diff 50 3.861664231693977 0.0 3 0.0
6 Llama-3.2-1B-Instruct pca 20 3.818178678361504 0.0 3 0.0
7 Llama-3.2-1B-Instruct pca 50 3.600290823741986 0.0 3 0.0
8 Qwen2.5-0.5B-Instruct directional_ablation 20 7.507819866975713 0.0 3 0.0
9 Qwen2.5-0.5B-Instruct directional_ablation 50 7.055664779577401 0.0 3 0.0
10 Qwen2.5-0.5B-Instruct mean_diff 20 7.588130746747839 0.0 3 0.0
11 Qwen2.5-0.5B-Instruct mean_diff 50 7.561536121211781 0.0 3 0.0
12 Qwen2.5-0.5B-Instruct pca 20 8.655517019086593 0.0 3 0.0
13 Qwen2.5-0.5B-Instruct pca 50 8.606126777874907 0.0 3 0.0
14 Qwen3-4B-Instruct-2507 directional_ablation 20 25.600000000000005 0.0 3 0.0
15 Qwen3-4B-Instruct-2507 directional_ablation 50 22.895205490202148 0.0 3 0.0
16 Qwen3-4B-Instruct-2507 mean_diff 20 25.600000000000005 0.0 3 0.0
17 Qwen3-4B-Instruct-2507 mean_diff 50 25.600000000000005 0.0 3 0.0
18 Qwen3-4B-Instruct-2507 pca 20 23.302283905419525 0.0 3 0.0
19 Qwen3-4B-Instruct-2507 pca 50 17.99025750455262 0.0 3 0.0
+20
View File
@@ -0,0 +1,20 @@
| model_short | method | window | c_mean | c_std | n_seeds | c_cv |
|:-----------------------|:---------------------|---------:|---------:|--------:|----------:|-------:|
| Llama-3.2-1B-Instruct | directional_ablation | 20 | 5.120 | 0.000 | 3 | 0.000 |
| Llama-3.2-1B-Instruct | directional_ablation | 50 | 4.610 | 0.000 | 3 | 0.000 |
| Llama-3.2-1B-Instruct | mean_diff | 20 | 3.772 | 0.000 | 3 | 0.000 |
| Llama-3.2-1B-Instruct | mean_diff | 50 | 3.862 | 0.000 | 3 | 0.000 |
| Llama-3.2-1B-Instruct | pca | 20 | 3.818 | 0.000 | 3 | 0.000 |
| Llama-3.2-1B-Instruct | pca | 50 | 3.600 | 0.000 | 3 | 0.000 |
| Qwen2.5-0.5B-Instruct | directional_ablation | 20 | 7.508 | 0.000 | 3 | 0.000 |
| Qwen2.5-0.5B-Instruct | directional_ablation | 50 | 7.056 | 0.000 | 3 | 0.000 |
| Qwen2.5-0.5B-Instruct | mean_diff | 20 | 7.588 | 0.000 | 3 | 0.000 |
| Qwen2.5-0.5B-Instruct | mean_diff | 50 | 7.562 | 0.000 | 3 | 0.000 |
| Qwen2.5-0.5B-Instruct | pca | 20 | 8.656 | 0.000 | 3 | 0.000 |
| Qwen2.5-0.5B-Instruct | pca | 50 | 8.606 | 0.000 | 3 | 0.000 |
| Qwen3-4B-Instruct-2507 | directional_ablation | 20 | 25.600 | 0.000 | 3 | 0.000 |
| Qwen3-4B-Instruct-2507 | directional_ablation | 50 | 22.895 | 0.000 | 3 | 0.000 |
| Qwen3-4B-Instruct-2507 | mean_diff | 20 | 25.600 | 0.000 | 3 | 0.000 |
| Qwen3-4B-Instruct-2507 | mean_diff | 50 | 25.600 | 0.000 | 3 | 0.000 |
| Qwen3-4B-Instruct-2507 | pca | 20 | 23.302 | 0.000 | 3 | 0.000 |
| Qwen3-4B-Instruct-2507 | pca | 50 | 17.990 | 0.000 | 3 | 0.000 |
Binary file not shown.

After

Width:  |  Height:  |  Size: 75 KiB

+19
View File
@@ -0,0 +1,19 @@
model_short,method,window,c_mean,c_std,n_seeds,c_cv
Llama-3.2-1B-Instruct,directional_ablation,20,5.119636661337771,0.0,3,0.0
Llama-3.2-1B-Instruct,directional_ablation,50,4.6095092483978455,0.0,3,0.0
Llama-3.2-1B-Instruct,mean_diff,20,3.771688942723309,0.0,3,0.0
Llama-3.2-1B-Instruct,mean_diff,50,3.861664231693977,0.0,3,0.0
Llama-3.2-1B-Instruct,pca,20,3.818178678361504,0.0,3,0.0
Llama-3.2-1B-Instruct,pca,50,3.600290823741986,0.0,3,0.0
Qwen2.5-0.5B-Instruct,directional_ablation,20,7.507819866975713,0.0,3,0.0
Qwen2.5-0.5B-Instruct,directional_ablation,50,7.055664779577401,0.0,3,0.0
Qwen2.5-0.5B-Instruct,mean_diff,20,7.588130746747839,0.0,3,0.0
Qwen2.5-0.5B-Instruct,mean_diff,50,7.561536121211781,0.0,3,0.0
Qwen2.5-0.5B-Instruct,pca,20,8.655517019086593,0.0,3,0.0
Qwen2.5-0.5B-Instruct,pca,50,8.606126777874907,0.0,3,0.0
Qwen3-4B-Instruct-2507,directional_ablation,20,25.600000000000005,0.0,3,0.0
Qwen3-4B-Instruct-2507,directional_ablation,50,22.895205490202148,0.0,3,0.0
Qwen3-4B-Instruct-2507,mean_diff,20,25.600000000000005,0.0,3,0.0
Qwen3-4B-Instruct-2507,mean_diff,50,25.600000000000005,0.0,3,0.0
Qwen3-4B-Instruct-2507,pca,20,23.302283905419525,0.0,3,0.0
Qwen3-4B-Instruct-2507,pca,50,17.99025750455262,0.0,3,0.0
1 model_short method window c_mean c_std n_seeds c_cv
2 Llama-3.2-1B-Instruct directional_ablation 20 5.119636661337771 0.0 3 0.0
3 Llama-3.2-1B-Instruct directional_ablation 50 4.6095092483978455 0.0 3 0.0
4 Llama-3.2-1B-Instruct mean_diff 20 3.771688942723309 0.0 3 0.0
5 Llama-3.2-1B-Instruct mean_diff 50 3.861664231693977 0.0 3 0.0
6 Llama-3.2-1B-Instruct pca 20 3.818178678361504 0.0 3 0.0
7 Llama-3.2-1B-Instruct pca 50 3.600290823741986 0.0 3 0.0
8 Qwen2.5-0.5B-Instruct directional_ablation 20 7.507819866975713 0.0 3 0.0
9 Qwen2.5-0.5B-Instruct directional_ablation 50 7.055664779577401 0.0 3 0.0
10 Qwen2.5-0.5B-Instruct mean_diff 20 7.588130746747839 0.0 3 0.0
11 Qwen2.5-0.5B-Instruct mean_diff 50 7.561536121211781 0.0 3 0.0
12 Qwen2.5-0.5B-Instruct pca 20 8.655517019086593 0.0 3 0.0
13 Qwen2.5-0.5B-Instruct pca 50 8.606126777874907 0.0 3 0.0
14 Qwen3-4B-Instruct-2507 directional_ablation 20 25.600000000000005 0.0 3 0.0
15 Qwen3-4B-Instruct-2507 directional_ablation 50 22.895205490202148 0.0 3 0.0
16 Qwen3-4B-Instruct-2507 mean_diff 20 25.600000000000005 0.0 3 0.0
17 Qwen3-4B-Instruct-2507 mean_diff 50 25.600000000000005 0.0 3 0.0
18 Qwen3-4B-Instruct-2507 pca 20 23.302283905419525 0.0 3 0.0
19 Qwen3-4B-Instruct-2507 pca 50 17.99025750455262 0.0 3 0.0
+20
View File
@@ -0,0 +1,20 @@
| model_short | method | window | c_mean | c_std | n_seeds | c_cv |
|:-----------------------|:---------------------|---------:|---------:|--------:|----------:|-------:|
| Llama-3.2-1B-Instruct | directional_ablation | 20 | 5.120 | 0.000 | 3 | 0.000 |
| Llama-3.2-1B-Instruct | directional_ablation | 50 | 4.610 | 0.000 | 3 | 0.000 |
| Llama-3.2-1B-Instruct | mean_diff | 20 | 3.772 | 0.000 | 3 | 0.000 |
| Llama-3.2-1B-Instruct | mean_diff | 50 | 3.862 | 0.000 | 3 | 0.000 |
| Llama-3.2-1B-Instruct | pca | 20 | 3.818 | 0.000 | 3 | 0.000 |
| Llama-3.2-1B-Instruct | pca | 50 | 3.600 | 0.000 | 3 | 0.000 |
| Qwen2.5-0.5B-Instruct | directional_ablation | 20 | 7.508 | 0.000 | 3 | 0.000 |
| Qwen2.5-0.5B-Instruct | directional_ablation | 50 | 7.056 | 0.000 | 3 | 0.000 |
| Qwen2.5-0.5B-Instruct | mean_diff | 20 | 7.588 | 0.000 | 3 | 0.000 |
| Qwen2.5-0.5B-Instruct | mean_diff | 50 | 7.562 | 0.000 | 3 | 0.000 |
| Qwen2.5-0.5B-Instruct | pca | 20 | 8.656 | 0.000 | 3 | 0.000 |
| Qwen2.5-0.5B-Instruct | pca | 50 | 8.606 | 0.000 | 3 | 0.000 |
| Qwen3-4B-Instruct-2507 | directional_ablation | 20 | 25.600 | 0.000 | 3 | 0.000 |
| Qwen3-4B-Instruct-2507 | directional_ablation | 50 | 22.895 | 0.000 | 3 | 0.000 |
| Qwen3-4B-Instruct-2507 | mean_diff | 20 | 25.600 | 0.000 | 3 | 0.000 |
| Qwen3-4B-Instruct-2507 | mean_diff | 50 | 25.600 | 0.000 | 3 | 0.000 |
| Qwen3-4B-Instruct-2507 | pca | 20 | 23.302 | 0.000 | 3 | 0.000 |
| Qwen3-4B-Instruct-2507 | pca | 50 | 17.990 | 0.000 | 3 | 0.000 |
Binary file not shown.

After

Width:  |  Height:  |  Size: 74 KiB

+4
View File
@@ -0,0 +1,4 @@
model_short,method,window,c_mean,c_std,n_seeds,c_cv
Qwen2.5-0.5B-Instruct,directional_ablation,50,7.0556647795774,,1,
Qwen2.5-0.5B-Instruct,mean_diff,50,7.561536121211781,,1,
Qwen2.5-0.5B-Instruct,pca,50,8.606126777874907,,1,
1 model_short method window c_mean c_std n_seeds c_cv
2 Qwen2.5-0.5B-Instruct directional_ablation 50 7.0556647795774 1
3 Qwen2.5-0.5B-Instruct mean_diff 50 7.561536121211781 1
4 Qwen2.5-0.5B-Instruct pca 50 8.606126777874907 1
+5
View File
@@ -0,0 +1,5 @@
| model_short | method | window | c_mean | c_std | n_seeds | c_cv |
|:----------------------|:---------------------|---------:|---------:|:--------|----------:|:-------|
| Qwen2.5-0.5B-Instruct | directional_ablation | 50 | 7.056 | | 1 | |
| Qwen2.5-0.5B-Instruct | mean_diff | 50 | 7.562 | | 1 | |
| Qwen2.5-0.5B-Instruct | pca | 50 | 8.606 | | 1 | |
Binary file not shown.

After

Width:  |  Height:  |  Size: 117 KiB

+4
View File
@@ -0,0 +1,4 @@
model_short,method,window,c_mean,c_std,n_seeds,c_cv
Qwen2.5-0.5B-Instruct,directional_ablation,50,7.0556647795774,,1,
Qwen2.5-0.5B-Instruct,mean_diff,50,7.561536121211781,,1,
Qwen2.5-0.5B-Instruct,pca,50,8.606126777874907,,1,
1 model_short method window c_mean c_std n_seeds c_cv
2 Qwen2.5-0.5B-Instruct directional_ablation 50 7.0556647795774 1
3 Qwen2.5-0.5B-Instruct mean_diff 50 7.561536121211781 1
4 Qwen2.5-0.5B-Instruct pca 50 8.606126777874907 1
+5
View File
@@ -0,0 +1,5 @@
| model_short | method | window | c_mean | c_std | n_seeds | c_cv |
|:----------------------|:---------------------|---------:|---------:|:--------|----------:|:-------|
| Qwen2.5-0.5B-Instruct | directional_ablation | 50 | 7.056 | | 1 | |
| Qwen2.5-0.5B-Instruct | mean_diff | 50 | 7.562 | | 1 | |
| Qwen2.5-0.5B-Instruct | pca | 50 | 8.606 | | 1 | |
Binary file not shown.

After

Width:  |  Height:  |  Size: 360 KiB

+4
View File
@@ -0,0 +1,4 @@
model_short,method,window,c_mean,c_std,n_seeds,c_cv
Qwen2.5-0.5B-Instruct,directional_ablation,50,7.0556647795774,,1,
Qwen2.5-0.5B-Instruct,mean_diff,50,7.561536121211781,,1,
Qwen2.5-0.5B-Instruct,pca,50,8.606126777874907,,1,
1 model_short method window c_mean c_std n_seeds c_cv
2 Qwen2.5-0.5B-Instruct directional_ablation 50 7.0556647795774 1
3 Qwen2.5-0.5B-Instruct mean_diff 50 7.561536121211781 1
4 Qwen2.5-0.5B-Instruct pca 50 8.606126777874907 1
+5
View File
@@ -0,0 +1,5 @@
| model_short | method | window | c_mean | c_std | n_seeds | c_cv |
|:----------------------|:---------------------|---------:|---------:|:--------|----------:|:-------|
| Qwen2.5-0.5B-Instruct | directional_ablation | 50 | 7.056 | | 1 | |
| Qwen2.5-0.5B-Instruct | mean_diff | 50 | 7.562 | | 1 | |
| Qwen2.5-0.5B-Instruct | pca | 50 | 8.606 | | 1 | |
Binary file not shown.

After

Width:  |  Height:  |  Size: 530 KiB

+4
View File
@@ -0,0 +1,4 @@
model_short,method,window,c_mean,c_std,n_seeds,c_cv
Qwen2.5-0.5B-Instruct,directional_ablation,50,7.0556647795774,,1,
Qwen2.5-0.5B-Instruct,mean_diff,50,7.561536121211781,,1,
Qwen2.5-0.5B-Instruct,pca,50,8.606126777874907,,1,
1 model_short method window c_mean c_std n_seeds c_cv
2 Qwen2.5-0.5B-Instruct directional_ablation 50 7.0556647795774 1
3 Qwen2.5-0.5B-Instruct mean_diff 50 7.561536121211781 1
4 Qwen2.5-0.5B-Instruct pca 50 8.606126777874907 1
+5
View File
@@ -0,0 +1,5 @@
| model_short | method | window | c_mean | c_std | n_seeds | c_cv |
|:----------------------|:---------------------|---------:|---------:|:--------|----------:|:-------|
| Qwen2.5-0.5B-Instruct | directional_ablation | 50 | 7.056 | | 1 | |
| Qwen2.5-0.5B-Instruct | mean_diff | 50 | 7.562 | | 1 | |
| Qwen2.5-0.5B-Instruct | pca | 50 | 8.606 | | 1 | |
Binary file not shown.

After

Width:  |  Height:  |  Size: 541 KiB

+4
View File
@@ -0,0 +1,4 @@
model_short,method,window,c_mean,c_std,n_seeds,c_cv
Qwen2.5-0.5B-Instruct,directional_ablation,50,7.0556647795774,,1,
Qwen2.5-0.5B-Instruct,mean_diff,50,7.561536121211781,,1,
Qwen2.5-0.5B-Instruct,pca,50,8.606126777874907,,1,
1 model_short method window c_mean c_std n_seeds c_cv
2 Qwen2.5-0.5B-Instruct directional_ablation 50 7.0556647795774 1
3 Qwen2.5-0.5B-Instruct mean_diff 50 7.561536121211781 1
4 Qwen2.5-0.5B-Instruct pca 50 8.606126777874907 1
+5
View File
@@ -0,0 +1,5 @@
| model_short | method | window | c_mean | c_std | n_seeds | c_cv |
|:----------------------|:---------------------|---------:|---------:|:--------|----------:|:-------|
| Qwen2.5-0.5B-Instruct | directional_ablation | 50 | 7.056 | | 1 | |
| Qwen2.5-0.5B-Instruct | mean_diff | 50 | 7.562 | | 1 | |
| Qwen2.5-0.5B-Instruct | pca | 50 | 8.606 | | 1 | |
Binary file not shown.

After

Width:  |  Height:  |  Size: 615 KiB

+4
View File
@@ -0,0 +1,4 @@
model_short,method,window,c_mean,c_std,n_seeds,c_cv
Qwen2.5-0.5B-Instruct,directional_ablation,50,7.0556647795774,,1,
Qwen2.5-0.5B-Instruct,mean_diff,50,7.561536121211781,,1,
Qwen2.5-0.5B-Instruct,pca,50,8.606126777874907,,1,
1 model_short method window c_mean c_std n_seeds c_cv
2 Qwen2.5-0.5B-Instruct directional_ablation 50 7.0556647795774 1
3 Qwen2.5-0.5B-Instruct mean_diff 50 7.561536121211781 1
4 Qwen2.5-0.5B-Instruct pca 50 8.606126777874907 1
+5
View File
@@ -0,0 +1,5 @@
| model_short | method | window | c_mean | c_std | n_seeds | c_cv |
|:----------------------|:---------------------|---------:|---------:|:--------|----------:|:-------|
| Qwen2.5-0.5B-Instruct | directional_ablation | 50 | 7.056 | | 1 | |
| Qwen2.5-0.5B-Instruct | mean_diff | 50 | 7.562 | | 1 | |
| Qwen2.5-0.5B-Instruct | pca | 50 | 8.606 | | 1 | |
+14
View File
@@ -0,0 +1,14 @@
| threshold | alpha | n | S(t=20) | S(t=49) | median_death_t |
|------------:|--------:|----:|----------:|----------:|-----------------:|
| 0.500 | 0.500 | 24 | 0.875 | 0.792 | |
| 0.500 | 1.000 | 24 | 0.000 | 0.000 | 0 |
| 0.500 | 2.000 | 24 | 0.000 | 0.000 | 0 |
| 0.500 | 4.000 | 24 | 0.000 | 0.000 | 0 |
| 1.000 | 0.500 | 24 | 1.000 | 1.000 | |
| 1.000 | 1.000 | 24 | 0.500 | 0.333 | 20 |
| 1.000 | 2.000 | 24 | 0.000 | 0.000 | 0 |
| 1.000 | 4.000 | 24 | 0.000 | 0.000 | 0 |
| 2.000 | 0.500 | 24 | 1.000 | 1.000 | |
| 2.000 | 1.000 | 24 | 0.917 | 0.792 | |
| 2.000 | 2.000 | 24 | 0.000 | 0.000 | 0 |
| 2.000 | 4.000 | 24 | 0.000 | 0.000 | 0 |
Binary file not shown.

After

Width:  |  Height:  |  Size: 77 KiB

@@ -0,0 +1,14 @@
| metric | threshold | alpha | n | S_mid | S_end | t_S<=0.5 |
|:---------|------------:|--------:|----:|--------:|--------:|-----------:|
| pmass | 0.500 | 0.500 | 24 | 0.833 | 0.708 | |
| pmass | 0.500 | 1.000 | 24 | 0.917 | 0.875 | |
| pmass | 0.500 | 2.000 | 24 | 1.000 | 0.958 | |
| pmass | 0.500 | 4.000 | 24 | 0.333 | 0.333 | 0 |
| pmass | 0.800 | 0.500 | 24 | 0.667 | 0.583 | |
| pmass | 0.800 | 1.000 | 24 | 0.583 | 0.542 | |
| pmass | 0.800 | 2.000 | 24 | 0.375 | 0.375 | 0 |
| pmass | 0.800 | 4.000 | 24 | 0.333 | 0.292 | 0 |
| pmass | 0.950 | 0.500 | 24 | 0.333 | 0.208 | 5 |
| pmass | 0.950 | 1.000 | 24 | 0.250 | 0.208 | 0 |
| pmass | 0.950 | 2.000 | 24 | 0.333 | 0.292 | 0 |
| pmass | 0.950 | 4.000 | 24 | 0.000 | 0.000 | 0 |
Binary file not shown.

After

Width:  |  Height:  |  Size: 73 KiB

@@ -0,0 +1,11 @@
| metric | threshold | alpha | n | S_mid | S_end | t_S<=0.5 |
|:---------|------------:|--------:|----:|--------:|--------:|-----------:|
| pmass | 0.500 | 1.000 | 72 | 0.125 | 0.125 | 0 |
| pmass | 0.500 | 2.000 | 72 | 0.000 | 0.000 | 0 |
| pmass | 0.500 | 4.000 | 72 | 0.000 | 0.000 | 0 |
| pmass | 0.800 | 1.000 | 72 | 0.042 | 0.042 | 0 |
| pmass | 0.800 | 2.000 | 72 | 0.000 | 0.000 | 0 |
| pmass | 0.800 | 4.000 | 72 | 0.000 | 0.000 | 0 |
| pmass | 0.950 | 1.000 | 72 | 0.000 | 0.000 | 0 |
| pmass | 0.950 | 2.000 | 72 | 0.000 | 0.000 | 0 |
| pmass | 0.950 | 4.000 | 72 | 0.000 | 0.000 | 0 |
Binary file not shown.

After

Width:  |  Height:  |  Size: 65 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 396 KiB

+4
View File
@@ -0,0 +1,4 @@
model_short,method,window,c_mean,c_std,n_seeds,c_cv
Qwen2.5-0.5B-Instruct,directional_ablation,512,8.59831195323767,,1,
Qwen2.5-0.5B-Instruct,mean_diff,512,8.030432886640742,,1,
Qwen2.5-0.5B-Instruct,pca,512,8.232617543390845,,1,
1 model_short method window c_mean c_std n_seeds c_cv
2 Qwen2.5-0.5B-Instruct directional_ablation 512 8.59831195323767 1
3 Qwen2.5-0.5B-Instruct mean_diff 512 8.030432886640742 1
4 Qwen2.5-0.5B-Instruct pca 512 8.232617543390845 1
+5
View File
@@ -0,0 +1,5 @@
| model_short | method | window | c_mean | c_std | n_seeds | c_cv |
|:----------------------|:---------------------|---------:|---------:|:--------|----------:|:-------|
| Qwen2.5-0.5B-Instruct | directional_ablation | 512 | 8.598 | | 1 | |
| Qwen2.5-0.5B-Instruct | mean_diff | 512 | 8.030 | | 1 | |
| Qwen2.5-0.5B-Instruct | pca | 512 | 8.233 | | 1 | |
Binary file not shown.

After

Width:  |  Height:  |  Size: 207 KiB

@@ -0,0 +1,6 @@
| alpha | n | n_died | n_censored |
|--------:|----:|---------:|-------------:|
| 0.5 | 24 | 24 | 0 |
| 1 | 24 | 24 | 0 |
| 2 | 24 | 24 | 0 |
| 4 | 24 | 23 | 0 |
@@ -0,0 +1,14 @@
| metric | threshold | alpha | n | S_mid | S_end | t_S<=0.5 |
|:---------|------------:|--------:|----:|--------:|--------:|-----------:|
| kl | 1.000 | 0.500 | 24 | 0.875 | 0.833 | |
| kl | 1.000 | 1.000 | 24 | 0.000 | 0.000 | 8 |
| kl | 1.000 | 2.000 | 24 | 0.000 | 0.000 | 0 |
| kl | 1.000 | 4.000 | 24 | 0.000 | 0.000 | 0 |
| kl | 2.000 | 0.500 | 24 | 1.000 | 1.000 | |
| kl | 2.000 | 1.000 | 24 | 0.250 | 0.208 | 75 |
| kl | 2.000 | 2.000 | 24 | 0.000 | 0.000 | 0 |
| kl | 2.000 | 4.000 | 24 | 0.000 | 0.000 | 0 |
| kl | 4.000 | 0.500 | 24 | 1.000 | 1.000 | |
| kl | 4.000 | 1.000 | 24 | 0.750 | 0.750 | |
| kl | 4.000 | 2.000 | 24 | 0.083 | 0.083 | 2 |
| kl | 4.000 | 4.000 | 24 | 0.000 | 0.000 | 0 |
Binary file not shown.

After

Width:  |  Height:  |  Size: 71 KiB

@@ -0,0 +1,14 @@
| metric | threshold | alpha | n | S_mid | S_end | t_S<=0.5 |
|:-----------|------------:|--------:|----:|--------:|--------:|-----------:|
| pmass_eval | 0.500 | 0.500 | 24 | 0.250 | 0.208 | 3 |
| pmass_eval | 0.500 | 1.000 | 24 | 0.208 | 0.167 | 0 |
| pmass_eval | 0.500 | 2.000 | 24 | 0.292 | 0.250 | 0 |
| pmass_eval | 0.500 | 4.000 | 24 | 0.292 | 0.292 | 0 |
| pmass_eval | 0.800 | 0.500 | 24 | 0.000 | 0.000 | 0 |
| pmass_eval | 0.800 | 1.000 | 24 | 0.000 | 0.000 | 0 |
| pmass_eval | 0.800 | 2.000 | 24 | 0.000 | 0.000 | 0 |
| pmass_eval | 0.800 | 4.000 | 24 | 0.083 | 0.042 | 0 |
| pmass_eval | 0.950 | 0.500 | 24 | 0.000 | 0.000 | 0 |
| pmass_eval | 0.950 | 1.000 | 24 | 0.000 | 0.000 | 0 |
| pmass_eval | 0.950 | 2.000 | 24 | 0.000 | 0.000 | 0 |
| pmass_eval | 0.950 | 4.000 | 24 | 0.042 | 0.000 | 0 |
Binary file not shown.

After

Width:  |  Height:  |  Size: 74 KiB

@@ -0,0 +1,14 @@
| metric | threshold | alpha | n | S_mid | S_end | t_S<=0.5 |
|:-----------|------------:|--------:|----:|--------:|--------:|-----------:|
| pmass_eval | 0.500 | 0.500 | 24 | 0.438 | 0.419 | 8 |
| pmass_eval | 0.500 | 1.000 | 24 | 0.333 | 0.319 | 0 |
| pmass_eval | 0.500 | 2.000 | 24 | 0.319 | 0.306 | 0 |
| pmass_eval | 0.500 | 4.000 | 24 | 0.319 | 0.319 | 0 |
| pmass_eval | 0.800 | 0.500 | 24 | 0.077 | 0.077 | 0 |
| pmass_eval | 0.800 | 1.000 | 24 | 0.110 | 0.110 | 0 |
| pmass_eval | 0.800 | 2.000 | 24 | 0.212 | 0.212 | 0 |
| pmass_eval | 0.800 | 4.000 | 24 | 0.255 | 0.244 | 0 |
| pmass_eval | 0.950 | 0.500 | 24 | 0.000 | 0.000 | 0 |
| pmass_eval | 0.950 | 1.000 | 24 | 0.000 | 0.000 | 0 |
| pmass_eval | 0.950 | 2.000 | 24 | 0.040 | 0.040 | 0 |
| pmass_eval | 0.950 | 4.000 | 24 | 0.201 | 0.193 | 0 |
Binary file not shown.

After

Width:  |  Height:  |  Size: 82 KiB

BIN
View File
Binary file not shown.

After

Width:  |  Height:  |  Size: 59 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 281 KiB

+37
View File
@@ -0,0 +1,37 @@
model,method,window,alpha,c_star_mean,n_seeds,kl_p95_mean,pmass_mean
Llama-3.2-1B-Instruct,directional_ablation,20,1.0,5.119636661337771,3,1.1908359713852406,NaN
Llama-3.2-1B-Instruct,directional_ablation,20,2.0,5.119636661337771,3,5.850264692306519,0.000022955508256927715
Llama-3.2-1B-Instruct,directional_ablation,50,1.0,4.6095092483978455,3,1.291592478454113,NaN
Llama-3.2-1B-Instruct,directional_ablation,50,2.0,4.6095092483978455,3,5.11867133140564,0.00004614551000320882
Llama-3.2-1B-Instruct,mean_diff,20,1.0,3.771688942723309,3,0.9256407611072064,NaN
Llama-3.2-1B-Instruct,mean_diff,20,2.0,3.771688942723309,3,5.391779696941375,0.00006245920594665222
Llama-3.2-1B-Instruct,mean_diff,50,1.0,3.861664231693977,3,1.3414980980753899,NaN
Llama-3.2-1B-Instruct,mean_diff,50,2.0,3.861664231693977,3,4.656074690818786,0.00007268900522335157
Llama-3.2-1B-Instruct,pca,20,1.0,3.818178678361504,3,0.9329803831875324,NaN
Llama-3.2-1B-Instruct,pca,20,2.0,3.818178678361504,3,5.620841109752655,0.00026052194389194485
Llama-3.2-1B-Instruct,pca,50,1.0,3.600290823741986,3,0.9301029246300458,NaN
Llama-3.2-1B-Instruct,pca,50,2.0,3.600290823741986,3,4.0647015488147735,NaN
Qwen2.5-0.5B-Instruct,directional_ablation,20,1.0,7.507819866975713,3,0.6075146049261093,0.00005063821326984907
Qwen2.5-0.5B-Instruct,directional_ablation,20,2.0,7.507819866975713,3,3.166464865207672,0.00018519468194426735
Qwen2.5-0.5B-Instruct,directional_ablation,50,1.0,7.055664779577401,3,0.5607812261581421,0.000034678878002771604
Qwen2.5-0.5B-Instruct,directional_ablation,50,2.0,7.055664779577401,3,2.2286340260505675,0.00013751945268516218
Qwen2.5-0.5B-Instruct,mean_diff,20,1.0,7.588130746747839,3,0.7048790633678437,0.000040864707125365383
Qwen2.5-0.5B-Instruct,mean_diff,20,2.0,7.588130746747839,3,3.335330218076706,0.00019464152283035218
Qwen2.5-0.5B-Instruct,mean_diff,50,1.0,7.561536121211781,3,0.7623215705156327,0.00003399881875959016
Qwen2.5-0.5B-Instruct,mean_diff,50,2.0,7.561536121211781,3,2.8257888650894163,0.00015105884966198408
Qwen2.5-0.5B-Instruct,pca,20,1.0,8.655517019086593,3,0.9307092409580946,3.041228809275154e-6
Qwen2.5-0.5B-Instruct,pca,20,2.0,8.655517019086593,3,3.7524219751358032,1.6446400348257838e-7
Qwen2.5-0.5B-Instruct,pca,50,1.0,8.606126777874907,3,0.8437561804056167,NaN
Qwen2.5-0.5B-Instruct,pca,50,2.0,8.606126777874907,3,3.1483269047737124,1.4466886552347544e-7
Qwen3-4B-Instruct-2507,directional_ablation,20,1.0,25.600000000000005,3,1.3869682106771506,9.215271860500782e-6
Qwen3-4B-Instruct-2507,directional_ablation,20,2.0,25.600000000000005,3,7.038443911075592,0.00001822325955023185
Qwen3-4B-Instruct-2507,directional_ablation,50,1.0,22.895205490202148,3,1.0069563373062511,NaN
Qwen3-4B-Instruct-2507,directional_ablation,50,2.0,22.895205490202148,3,4.545377564039081,NaN
Qwen3-4B-Instruct-2507,mean_diff,20,1.0,25.600000000000005,3,0.9436350018950179,4.663814388822024e-6
Qwen3-4B-Instruct-2507,mean_diff,20,2.0,25.600000000000005,3,6.434498374164105,0.000016732647531197965
Qwen3-4B-Instruct-2507,mean_diff,50,1.0,25.600000000000005,3,0.9753538948745699,NaN
Qwen3-4B-Instruct-2507,mean_diff,50,2.0,25.600000000000005,3,5.002368605150841,NaN
Qwen3-4B-Instruct-2507,pca,20,1.0,23.302283905419525,3,1.490876998582462,0.00001391104007908428
Qwen3-4B-Instruct-2507,pca,20,2.0,23.302283905419525,3,5.252012262865901,0.000011667380206471789
Qwen3-4B-Instruct-2507,pca,50,1.0,17.99025750455262,3,0.9170716370666468,6.987194066781776e-6
Qwen3-4B-Instruct-2507,pca,50,2.0,17.99025750455262,3,3.4862812616676093,0.00002544457582068353
1 model method window alpha c_star_mean n_seeds kl_p95_mean pmass_mean
2 Llama-3.2-1B-Instruct directional_ablation 20 1.0 5.119636661337771 3 1.1908359713852406 NaN
3 Llama-3.2-1B-Instruct directional_ablation 20 2.0 5.119636661337771 3 5.850264692306519 0.000022955508256927715
4 Llama-3.2-1B-Instruct directional_ablation 50 1.0 4.6095092483978455 3 1.291592478454113 NaN
5 Llama-3.2-1B-Instruct directional_ablation 50 2.0 4.6095092483978455 3 5.11867133140564 0.00004614551000320882
6 Llama-3.2-1B-Instruct mean_diff 20 1.0 3.771688942723309 3 0.9256407611072064 NaN
7 Llama-3.2-1B-Instruct mean_diff 20 2.0 3.771688942723309 3 5.391779696941375 0.00006245920594665222
8 Llama-3.2-1B-Instruct mean_diff 50 1.0 3.861664231693977 3 1.3414980980753899 NaN
9 Llama-3.2-1B-Instruct mean_diff 50 2.0 3.861664231693977 3 4.656074690818786 0.00007268900522335157
10 Llama-3.2-1B-Instruct pca 20 1.0 3.818178678361504 3 0.9329803831875324 NaN
11 Llama-3.2-1B-Instruct pca 20 2.0 3.818178678361504 3 5.620841109752655 0.00026052194389194485
12 Llama-3.2-1B-Instruct pca 50 1.0 3.600290823741986 3 0.9301029246300458 NaN
13 Llama-3.2-1B-Instruct pca 50 2.0 3.600290823741986 3 4.0647015488147735 NaN
14 Qwen2.5-0.5B-Instruct directional_ablation 20 1.0 7.507819866975713 3 0.6075146049261093 0.00005063821326984907
15 Qwen2.5-0.5B-Instruct directional_ablation 20 2.0 7.507819866975713 3 3.166464865207672 0.00018519468194426735
16 Qwen2.5-0.5B-Instruct directional_ablation 50 1.0 7.055664779577401 3 0.5607812261581421 0.000034678878002771604
17 Qwen2.5-0.5B-Instruct directional_ablation 50 2.0 7.055664779577401 3 2.2286340260505675 0.00013751945268516218
18 Qwen2.5-0.5B-Instruct mean_diff 20 1.0 7.588130746747839 3 0.7048790633678437 0.000040864707125365383
19 Qwen2.5-0.5B-Instruct mean_diff 20 2.0 7.588130746747839 3 3.335330218076706 0.00019464152283035218
20 Qwen2.5-0.5B-Instruct mean_diff 50 1.0 7.561536121211781 3 0.7623215705156327 0.00003399881875959016
21 Qwen2.5-0.5B-Instruct mean_diff 50 2.0 7.561536121211781 3 2.8257888650894163 0.00015105884966198408
22 Qwen2.5-0.5B-Instruct pca 20 1.0 8.655517019086593 3 0.9307092409580946 3.041228809275154e-6
23 Qwen2.5-0.5B-Instruct pca 20 2.0 8.655517019086593 3 3.7524219751358032 1.6446400348257838e-7
24 Qwen2.5-0.5B-Instruct pca 50 1.0 8.606126777874907 3 0.8437561804056167 NaN
25 Qwen2.5-0.5B-Instruct pca 50 2.0 8.606126777874907 3 3.1483269047737124 1.4466886552347544e-7
26 Qwen3-4B-Instruct-2507 directional_ablation 20 1.0 25.600000000000005 3 1.3869682106771506 9.215271860500782e-6
27 Qwen3-4B-Instruct-2507 directional_ablation 20 2.0 25.600000000000005 3 7.038443911075592 0.00001822325955023185
28 Qwen3-4B-Instruct-2507 directional_ablation 50 1.0 22.895205490202148 3 1.0069563373062511 NaN
29 Qwen3-4B-Instruct-2507 directional_ablation 50 2.0 22.895205490202148 3 4.545377564039081 NaN
30 Qwen3-4B-Instruct-2507 mean_diff 20 1.0 25.600000000000005 3 0.9436350018950179 4.663814388822024e-6
31 Qwen3-4B-Instruct-2507 mean_diff 20 2.0 25.600000000000005 3 6.434498374164105 0.000016732647531197965
32 Qwen3-4B-Instruct-2507 mean_diff 50 1.0 25.600000000000005 3 0.9753538948745699 NaN
33 Qwen3-4B-Instruct-2507 mean_diff 50 2.0 25.600000000000005 3 5.002368605150841 NaN
34 Qwen3-4B-Instruct-2507 pca 20 1.0 23.302283905419525 3 1.490876998582462 0.00001391104007908428
35 Qwen3-4B-Instruct-2507 pca 20 2.0 23.302283905419525 3 5.252012262865901 0.000011667380206471789
36 Qwen3-4B-Instruct-2507 pca 50 1.0 17.99025750455262 3 0.9170716370666468 6.987194066781776e-6
37 Qwen3-4B-Instruct-2507 pca 50 2.0 17.99025750455262 3 3.4862812616676093 0.00002544457582068353
+38
View File
@@ -0,0 +1,38 @@
| model | method | window | alpha | c_star_mean | n_seeds | kl_p95_mean | pmass_mean |
|:-----------------------|:---------------------|---------:|--------:|--------------:|----------:|--------------:|-------------:|
| Llama-3.2-1B-Instruct | directional_ablation | 20 | 1.000 | 5.120 | 3 | 1.191 | nan |
| Llama-3.2-1B-Instruct | directional_ablation | 20 | 2.000 | 5.120 | 3 | 5.850 | 0.000 |
| Llama-3.2-1B-Instruct | directional_ablation | 50 | 1.000 | 4.610 | 3 | 1.292 | nan |
| Llama-3.2-1B-Instruct | directional_ablation | 50 | 2.000 | 4.610 | 3 | 5.119 | 0.000 |
| Llama-3.2-1B-Instruct | mean_diff | 20 | 1.000 | 3.772 | 3 | 0.926 | nan |
| Llama-3.2-1B-Instruct | mean_diff | 20 | 2.000 | 3.772 | 3 | 5.392 | 0.000 |
| Llama-3.2-1B-Instruct | mean_diff | 50 | 1.000 | 3.862 | 3 | 1.341 | nan |
| Llama-3.2-1B-Instruct | mean_diff | 50 | 2.000 | 3.862 | 3 | 4.656 | 0.000 |
| Llama-3.2-1B-Instruct | pca | 20 | 1.000 | 3.818 | 3 | 0.933 | nan |
| Llama-3.2-1B-Instruct | pca | 20 | 2.000 | 3.818 | 3 | 5.621 | 0.000 |
| Llama-3.2-1B-Instruct | pca | 50 | 1.000 | 3.600 | 3 | 0.930 | nan |
| Llama-3.2-1B-Instruct | pca | 50 | 2.000 | 3.600 | 3 | 4.065 | nan |
| Qwen2.5-0.5B-Instruct | directional_ablation | 20 | 1.000 | 7.508 | 3 | 0.608 | 0.000 |
| Qwen2.5-0.5B-Instruct | directional_ablation | 20 | 2.000 | 7.508 | 3 | 3.166 | 0.000 |
| Qwen2.5-0.5B-Instruct | directional_ablation | 50 | 1.000 | 7.056 | 3 | 0.561 | 0.000 |
| Qwen2.5-0.5B-Instruct | directional_ablation | 50 | 2.000 | 7.056 | 3 | 2.229 | 0.000 |
| Qwen2.5-0.5B-Instruct | mean_diff | 20 | 1.000 | 7.588 | 3 | 0.705 | 0.000 |
| Qwen2.5-0.5B-Instruct | mean_diff | 20 | 2.000 | 7.588 | 3 | 3.335 | 0.000 |
| Qwen2.5-0.5B-Instruct | mean_diff | 50 | 1.000 | 7.562 | 3 | 0.762 | 0.000 |
| Qwen2.5-0.5B-Instruct | mean_diff | 50 | 2.000 | 7.562 | 3 | 2.826 | 0.000 |
| Qwen2.5-0.5B-Instruct | pca | 20 | 1.000 | 8.656 | 3 | 0.931 | 0.000 |
| Qwen2.5-0.5B-Instruct | pca | 20 | 2.000 | 8.656 | 3 | 3.752 | 0.000 |
| Qwen2.5-0.5B-Instruct | pca | 50 | 1.000 | 8.606 | 3 | 0.844 | nan |
| Qwen2.5-0.5B-Instruct | pca | 50 | 2.000 | 8.606 | 3 | 3.148 | 0.000 |
| Qwen3-4B-Instruct-2507 | directional_ablation | 20 | 1.000 | 25.600 | 3 | 1.387 | 0.000 |
| Qwen3-4B-Instruct-2507 | directional_ablation | 20 | 2.000 | 25.600 | 3 | 7.038 | 0.000 |
| Qwen3-4B-Instruct-2507 | directional_ablation | 50 | 1.000 | 22.895 | 3 | 1.007 | nan |
| Qwen3-4B-Instruct-2507 | directional_ablation | 50 | 2.000 | 22.895 | 3 | 4.545 | nan |
| Qwen3-4B-Instruct-2507 | mean_diff | 20 | 1.000 | 25.600 | 3 | 0.944 | 0.000 |
| Qwen3-4B-Instruct-2507 | mean_diff | 20 | 2.000 | 25.600 | 3 | 6.434 | 0.000 |
| Qwen3-4B-Instruct-2507 | mean_diff | 50 | 1.000 | 25.600 | 3 | 0.975 | nan |
| Qwen3-4B-Instruct-2507 | mean_diff | 50 | 2.000 | 25.600 | 3 | 5.002 | nan |
| Qwen3-4B-Instruct-2507 | pca | 20 | 1.000 | 23.302 | 3 | 1.491 | 0.000 |
| Qwen3-4B-Instruct-2507 | pca | 20 | 2.000 | 23.302 | 3 | 5.252 | 0.000 |
| Qwen3-4B-Instruct-2507 | pca | 50 | 1.000 | 17.990 | 3 | 0.917 | 0.000 |
| Qwen3-4B-Instruct-2507 | pca | 50 | 2.000 | 17.990 | 3 | 3.486 | 0.000 |
Binary file not shown.

After

Width:  |  Height:  |  Size: 151 KiB

+2
View File
@@ -0,0 +1,2 @@
model_short,method,window,c_mean,c_std,n_seeds,c_cv
Qwen3.5-0.8B,mean_diff,512,2.0721502863934007,,1,
1 model_short method window c_mean c_std n_seeds c_cv
2 Qwen3.5-0.8B mean_diff 512 2.0721502863934007 1
+3
View File
@@ -0,0 +1,3 @@
| model_short | method | window | c_mean | c_std | n_seeds | c_cv |
|:--------------|:----------|---------:|---------:|:--------|----------:|:-------|
| Qwen3.5-0.8B | mean_diff | 512 | 2.072 | | 1 | |
Binary file not shown.

After

Width:  |  Height:  |  Size: 253 KiB

@@ -0,0 +1,7 @@
| alpha | n | n_died | n_censored |
|--------:|----:|---------:|-------------:|
| 0 | 8 | 0 | 0 |
| 0.5 | 8 | 0 | 0 |
| 1 | 8 | 0 | 0 |
| 2 | 8 | 8 | 0 |
| 4 | 8 | 8 | 0 |
Binary file not shown.

After

Width:  |  Height:  |  Size: 281 KiB

@@ -0,0 +1,7 @@
| alpha | n | n_died | n_censored |
|--------:|----:|---------:|-------------:|
| 0 | 8 | 0 | 0 |
| 0.5 | 8 | 0 | 0 |
| 1 | 8 | 0 | 0 |
| 2 | 8 | 8 | 0 |
| 4 | 8 | 8 | 0 |
@@ -0,0 +1,7 @@
| metric | threshold | alpha | n | S_mid | S_end | t_S<=0.5 |
|:-----------|------------:|--------:|----:|--------:|--------:|-----------:|
| pmass_eval | 0.800 | 0.000 | 8 | 1.000 | 1.000 | |
| pmass_eval | 0.800 | 0.500 | 8 | 1.000 | 1.000 | |
| pmass_eval | 0.800 | 1.000 | 8 | 1.000 | 1.000 | |
| pmass_eval | 0.800 | 2.000 | 8 | 1.000 | 0.410 | 512 |
| pmass_eval | 0.800 | 4.000 | 8 | 0.000 | 0.000 | 0 |
Binary file not shown.

After

Width:  |  Height:  |  Size: 60 KiB

@@ -0,0 +1,7 @@
| metric | threshold | alpha | n | S_mid | S_end | t_S<=0.5 |
|:-----------|------------:|--------:|----:|--------:|--------:|-----------:|
| pmass_eval | 0.950 | 0.000 | 8 | 1.000 | 1.000 | |
| pmass_eval | 0.950 | 0.500 | 8 | 1.000 | 1.000 | |
| pmass_eval | 0.950 | 1.000 | 8 | 1.000 | 1.000 | |
| pmass_eval | 0.950 | 2.000 | 8 | 0.586 | 0.321 | 64 |
| pmass_eval | 0.950 | 4.000 | 8 | 0.000 | 0.000 | 0 |
Binary file not shown.

After

Width:  |  Height:  |  Size: 61 KiB

@@ -0,0 +1,7 @@
| metric | threshold | alpha | n | S_mid | S_end | t_S<=0.5 |
|:-----------|------------:|--------:|----:|--------:|--------:|-----------:|
| pmass_eval | 0.950 | 0.000 | 8 | 1.000 | 1.000 | |
| pmass_eval | 0.950 | 0.500 | 8 | 1.000 | 1.000 | |
| pmass_eval | 0.950 | 1.000 | 8 | 1.000 | 1.000 | |
| pmass_eval | 0.950 | 2.000 | 8 | 0.586 | 0.321 | 64 |
| pmass_eval | 0.950 | 4.000 | 8 | 0.000 | 0.000 | 0 |
Binary file not shown.

After

Width:  |  Height:  |  Size: 41 KiB

@@ -0,0 +1,7 @@
| metric | threshold | alpha | n | S_mid | S_end | t_S<=0.5 |
|:-----------|------------:|--------:|----:|--------:|--------:|-----------:|
| pmass_eval | 0.950 | 0.000 | 8 | 1.000 | 1.000 | |
| pmass_eval | 0.950 | 0.500 | 8 | 1.000 | 1.000 | |
| pmass_eval | 0.950 | 1.000 | 8 | 1.000 | 1.000 | |
| pmass_eval | 0.950 | 2.000 | 8 | 0.586 | 0.321 | 64 |
| pmass_eval | 0.950 | 4.000 | 8 | 0.000 | 0.000 | 0 |
Binary file not shown.

After

Width:  |  Height:  |  Size: 53 KiB

+1 -1
View File
@@ -11,7 +11,7 @@ test:
uv run --extra all pytest -q uv run --extra all pytest -q
# Run one (model, method, seed, window) cell end-to-end (calibrate + trajectory + pmass). # Run one (model, method, seed, window) cell end-to-end (calibrate + trajectory + pmass).
cell model="Qwen/Qwen2.5-0.5B-Instruct" method="mean_diff" seed="0" window="50": cell model="Qwen/Qwen3.5-0.8B" method="mean_diff" seed="0" window="50":
uv run --extra all python scripts/run_cell.py \ uv run --extra all python scripts/run_cell.py \
--model {{model}} --method {{method}} --seed {{seed}} --window {{window}} --model {{model}} --method {{method}} --seed {{seed}} --window {{window}}
@@ -0,0 +1 @@
2026-05-06 05:37:01.941 | INFO | __main__:main:185 - model=allenai/OLMo-2-0425-1B n_layers=16 target_layer=9
@@ -0,0 +1,9 @@
{
"c_star": 2.0721502863934007,
"target_kl": 1.0,
"window": 512,
"method": "mean_diff",
"model": "Qwen/Qwen3.5-0.8B",
"seed": 0,
"layer": 14
}
Binary file not shown.

After

Width:  |  Height:  |  Size: 196 KiB

@@ -0,0 +1,2 @@
model_short,method,window,c_mean,c_std,n_seeds,c_cv
Qwen3.5-0.8B,mean_diff,512,2.0721502863934007,,1,
1 model_short method window c_mean c_std n_seeds c_cv
2 Qwen3.5-0.8B mean_diff 512 2.0721502863934007 1
@@ -0,0 +1,3 @@
| model_short | method | window | c_mean | c_std | n_seeds | c_cv |
|:--------------|:----------|---------:|---------:|:--------|----------:|:-------|
| Qwen3.5-0.8B | mean_diff | 512 | 2.072 | | 1 | |
Binary file not shown.

After

Width:  |  Height:  |  Size: 530 KiB

@@ -0,0 +1,10 @@
| alpha | n | n_died | n_censored |
|--------:|----:|---------:|-------------:|
| 0 | 8 | 0 | 0 |
| 0.25 | 8 | 0 | 0 |
| 0.5 | 8 | 0 | 0 |
| 0.75 | 8 | 0 | 0 |
| 1 | 8 | 0 | 0 |
| 1.5 | 8 | 0 | 0 |
| 2 | 8 | 8 | 0 |
| 4 | 8 | 8 | 0 |
@@ -0,0 +1,9 @@
| metric | threshold | alpha | n | S_mid | S_end | t_S<=0.5 |
|:-----------|------------:|--------:|----:|--------:|--------:|-----------:|
| pmass_eval | 0.950 | 0.250 | 8 | 1.000 | 1.000 | |
| pmass_eval | 0.950 | 0.500 | 8 | 1.000 | 1.000 | |
| pmass_eval | 0.950 | 0.750 | 8 | 1.000 | 1.000 | |
| pmass_eval | 0.950 | 1.000 | 8 | 1.000 | 1.000 | |
| pmass_eval | 0.950 | 1.500 | 8 | 1.000 | 1.000 | |
| pmass_eval | 0.950 | 2.000 | 8 | 0.586 | 0.321 | 64 |
| pmass_eval | 0.950 | 4.000 | 8 | 0.000 | 0.000 | 0 |
Binary file not shown.

After

Width:  |  Height:  |  Size: 46 KiB

File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,9 @@
alpha,coeff,kl_p95,kl_mean,kl_max
0.0,0.0,0.0,0.0,0.0
0.25,0.5180375715983502,0.11461558192968369,0.02780664712190628,0.39140355587005615
0.5,1.0360751431967004,0.37549957633018494,0.09505791962146759,1.7557041645050049
0.75,1.5541127147950506,0.7674782276153564,0.17766396701335907,5.0202531814575195
1.0,2.0721502863934007,0.9213039875030518,0.17482376098632812,6.230146884918213
1.5,3.1082254295901013,1.0336216688156128,0.1986885368824005,10.366641998291016
2.0,4.144300572786801,1.6580771207809448,0.6465433835983276,13.17214298248291
4.0,8.288601145573603,4.413588523864746,4.0899200439453125,19.996070861816406
1 alpha coeff kl_p95 kl_mean kl_max
2 0.0 0.0 0.0 0.0 0.0
3 0.25 0.5180375715983502 0.11461558192968369 0.02780664712190628 0.39140355587005615
4 0.5 1.0360751431967004 0.37549957633018494 0.09505791962146759 1.7557041645050049
5 0.75 1.5541127147950506 0.7674782276153564 0.17766396701335907 5.0202531814575195
6 1.0 2.0721502863934007 0.9213039875030518 0.17482376098632812 6.230146884918213
7 1.5 3.1082254295901013 1.0336216688156128 0.1986885368824005 10.366641998291016
8 2.0 4.144300572786801 1.6580771207809448 0.6465433835983276 13.17214298248291
9 4.0 8.288601145573603 4.413588523864746 4.0899200439453125 19.996070861816406
@@ -0,0 +1,187 @@
2026-05-05 22:15:15.721 | INFO | __main__:main:185 - model=Qwen/Qwen3.5-0.8B n_layers=24 target_layer=14
2026-05-05 22:15:15.750 | INFO | iso_kl_figure.attach:_log_extract_demo:156 - EXPECT: POS and NEG share user_msg + suffix; differ only in system persona; chat template applied; special tokens (e.g. <|im_start|>) visible.
=== EXTRACT demo trace ===
POS[0]:
<|im_start|>user
The eiffel tower is in Paris.<|im_end|>
<|im_start|>assistant
<think>
</think>
Sure, here is the answer:<|im_end|>
---
NEG[0]:
<|im_start|>user
The eiffel tower is in Paris.<|im_end|>
<|im_start|>assistant
<think>
</think>
I cannot help with that.<|im_end|>
=== /EXTRACT ===
2026-05-05 22:15:16.469 | INFO | __main__:main:200 - === calibrate ===
2026-05-05 22:15:19.868 | INFO | iso_kl_figure.calibrate:measure_kl:104 - EXPECT: same prompt under c=0 vs c=+0.4000; both coherent; steered should differ from base but not collapse.
=== CALIBRATE demo trace (T=512) ===
--- BASE (c=0) ---
<|im_start|>user
The eiffel tower is in Paris.<|im_end|>
<|im_start|>assistant
<think>
</think>
The Eiffel Tower is indeed located in **Paris**, France. It stands prominently on the Champ de Mars, overlooking the Seine River and the city's skyline. The tower, designed by Gustave Eiffel, was completed in 1889 and is one of the most recognizable landmarks in the world.<|im_end|>
--- STEER (c=+0.4000) ---
<|im_start|>user
The eiffel tower is in Paris.<|im_end|>
<|im_start|>assistant
<think>
</think>
The Eiffel Tower is located in **Paris**, France. It stands prominently on the Champ de Mars, overlooking the Seine River and the city's skyline. Built in 1889, it is one of the most recognizable landmarks in the world and is a UNESCO World Heritage Site.<|im_end|>
=== /CALIBRATE ===
2026-05-05 22:15:52.592 | INFO | iso_kl_figure.calibrate:eval_at:183 - c=+0.4000 mean=0.011 p50=0.001 p90=0.034 p95=0.049 max=0.226 n=1258
2026-05-05 22:16:30.756 | INFO | iso_kl_figure.calibrate:eval_at:183 - c=+0.8000 mean=0.039 p50=0.003 p90=0.134 p95=0.176 max=1.569 n=1399
2026-05-05 22:17:14.821 | INFO | iso_kl_figure.calibrate:eval_at:183 - c=+1.6000 mean=0.169 p50=0.009 p90=0.513 p95=0.844 max=4.077 n=1609
2026-05-05 22:18:10.651 | INFO | iso_kl_figure.calibrate:eval_at:183 - c=+3.2000 mean=0.227 p50=0.024 p90=0.503 p95=1.329 max=13.262 n=2048
2026-05-05 22:19:05.810 | INFO | iso_kl_figure.calibrate:eval_at:183 - c=+2.0722 mean=0.197 p50=0.005 p90=0.600 p95=0.980 max=10.868 n=2048
2026-05-05 22:19:05.810 | INFO | __main__:main:207 - c_star = +2.0722
2026-05-05 22:19:05.817 | INFO | __main__:main:221 - choice ids: a(true)=[16, 804, 1802, 2434, 2912] b(false)=[15, 867, 3439, 3721, 3913]
2026-05-05 22:19:05.823 | INFO | __main__:main:233 - fork_points (n=14): [0, 1, 2, 3, 5, 8, 13, 23, 38, 64, 108, 181, 304, 512]
2026-05-05 22:19:05.825 | INFO | __main__:main:255 - === eval alpha=0.0 c=+0.0000 ===
2026-05-05 22:21:12.605 | INFO | __main__:main:353 - [debug] alpha=0.0 eval[0] gen_len=512 text[:120]='Thinking Process:\n\n1. **Analyze the Request:**\n * Question: Is the sky blue primarily because of Rayleigh scatteri'
2026-05-05 22:21:12.605 | INFO | __main__:main:355 - t= 0 pmass=0.992 p_true=0.295 argmax='0'
2026-05-05 22:21:12.605 | INFO | __main__:main:355 - t= 1 pmass=0.992 p_true=0.322 argmax='0'
2026-05-05 22:21:12.605 | INFO | __main__:main:355 - t= 2 pmass=0.992 p_true=0.322 argmax='0'
2026-05-05 22:21:12.605 | INFO | __main__:main:355 - t= 3 pmass=0.991 p_true=0.322 argmax='0'
2026-05-05 22:21:12.605 | INFO | __main__:main:355 - t= 5 pmass=0.992 p_true=0.322 argmax='0'
2026-05-05 22:21:12.605 | INFO | __main__:main:355 - t= 8 pmass=0.992 p_true=0.322 argmax='0'
2026-05-05 22:21:12.605 | INFO | __main__:main:355 - t= 13 pmass=0.991 p_true=0.322 argmax='0'
2026-05-05 22:21:12.605 | INFO | __main__:main:355 - t= 23 pmass=0.988 p_true=0.322 argmax='0'
2026-05-05 22:21:12.605 | INFO | __main__:main:355 - t= 38 pmass=0.991 p_true=0.322 argmax='0'
2026-05-05 22:21:12.605 | INFO | __main__:main:355 - t= 64 pmass=0.991 p_true=0.322 argmax='0'
2026-05-05 22:21:12.605 | INFO | __main__:main:355 - t=108 pmass=0.996 p_true=0.295 argmax='0'
2026-05-05 22:21:12.605 | INFO | __main__:main:355 - t=181 pmass=0.997 p_true=0.321 argmax='0'
2026-05-05 22:21:12.605 | INFO | __main__:main:355 - t=304 pmass=0.997 p_true=0.349 argmax='0'
2026-05-05 22:21:12.605 | INFO | __main__:main:355 - t=512 pmass=0.998 p_true=0.469 argmax='0'
2026-05-05 22:22:58.136 | INFO | __main__:main:255 - === eval alpha=0.25 c=+0.5180 ===
2026-05-05 22:25:01.789 | INFO | __main__:main:353 - [debug] alpha=0.25 eval[0] gen_len=512 text[:120]='The user is asking a specific scientific question about the color of the sky.\nThe question is: "Is the sky blue primaril'
2026-05-05 22:25:01.789 | INFO | __main__:main:355 - t= 0 pmass=0.997 p_true=0.247 argmax='0'
2026-05-05 22:25:01.789 | INFO | __main__:main:355 - t= 1 pmass=0.997 p_true=0.246 argmax='0'
2026-05-05 22:25:01.789 | INFO | __main__:main:355 - t= 2 pmass=0.998 p_true=0.246 argmax='0'
2026-05-05 22:25:01.789 | INFO | __main__:main:355 - t= 3 pmass=0.998 p_true=0.246 argmax='0'
2026-05-05 22:25:01.789 | INFO | __main__:main:355 - t= 5 pmass=0.998 p_true=0.246 argmax='0'
2026-05-05 22:25:01.789 | INFO | __main__:main:355 - t= 8 pmass=0.998 p_true=0.246 argmax='0'
2026-05-05 22:25:01.789 | INFO | __main__:main:355 - t= 13 pmass=0.998 p_true=0.246 argmax='0'
2026-05-05 22:25:01.789 | INFO | __main__:main:355 - t= 23 pmass=0.996 p_true=0.225 argmax='0'
2026-05-05 22:25:01.789 | INFO | __main__:main:355 - t= 38 pmass=0.997 p_true=0.224 argmax='0'
2026-05-05 22:25:01.789 | INFO | __main__:main:355 - t= 64 pmass=0.997 p_true=0.247 argmax='0'
2026-05-05 22:25:01.789 | INFO | __main__:main:355 - t=108 pmass=0.999 p_true=0.295 argmax='0'
2026-05-05 22:25:01.789 | INFO | __main__:main:355 - t=181 pmass=0.999 p_true=0.321 argmax='0'
2026-05-05 22:25:01.789 | INFO | __main__:main:355 - t=304 pmass=0.999 p_true=0.438 argmax='0'
2026-05-05 22:25:01.789 | INFO | __main__:main:355 - t=512 pmass=0.999 p_true=0.438 argmax='0'
2026-05-05 22:26:45.830 | INFO | __main__:main:255 - === eval alpha=0.5 c=+1.0361 ===
2026-05-05 22:28:48.998 | INFO | __main__:main:353 - [debug] alpha=0.5 eval[0] gen_len=512 text[:120]='The user is asking a specific scientific question about the color of the sky.\nThe question is: "Is the sky blue primaril'
2026-05-05 22:28:48.998 | INFO | __main__:main:355 - t= 0 pmass=0.997 p_true=0.272 argmax='0'
2026-05-05 22:28:48.998 | INFO | __main__:main:355 - t= 1 pmass=0.997 p_true=0.272 argmax='0'
2026-05-05 22:28:48.998 | INFO | __main__:main:355 - t= 2 pmass=0.997 p_true=0.272 argmax='0'
2026-05-05 22:28:48.998 | INFO | __main__:main:355 - t= 3 pmass=0.997 p_true=0.271 argmax='0'
2026-05-05 22:28:48.998 | INFO | __main__:main:355 - t= 5 pmass=0.997 p_true=0.296 argmax='0'
2026-05-05 22:28:48.998 | INFO | __main__:main:355 - t= 8 pmass=0.998 p_true=0.296 argmax='0'
2026-05-05 22:28:48.998 | INFO | __main__:main:355 - t= 13 pmass=0.998 p_true=0.271 argmax='0'
2026-05-05 22:28:48.998 | INFO | __main__:main:355 - t= 23 pmass=0.996 p_true=0.273 argmax='0'
2026-05-05 22:28:48.998 | INFO | __main__:main:355 - t= 38 pmass=0.997 p_true=0.297 argmax='0'
2026-05-05 22:28:48.998 | INFO | __main__:main:355 - t= 64 pmass=0.997 p_true=0.297 argmax='0'
2026-05-05 22:28:48.998 | INFO | __main__:main:355 - t=108 pmass=0.999 p_true=0.349 argmax='0'
2026-05-05 22:28:48.998 | INFO | __main__:main:355 - t=181 pmass=0.999 p_true=0.408 argmax='0'
2026-05-05 22:28:48.998 | INFO | __main__:main:355 - t=304 pmass=0.999 p_true=0.501 argmax='0'
2026-05-05 22:28:48.998 | INFO | __main__:main:355 - t=512 pmass=0.999 p_true=0.532 argmax='1'
2026-05-05 22:30:33.079 | INFO | __main__:main:255 - === eval alpha=0.75 c=+1.5541 ===
2026-05-05 22:32:36.281 | INFO | __main__:main:353 - [debug] alpha=0.75 eval[0] gen_len=512 text[:120]='The question asks if the sky is blue primarily due to Rayleigh scattering of sunlight.\n1. **Analyze the phenomenon**: R'
2026-05-05 22:32:36.282 | INFO | __main__:main:355 - t= 0 pmass=0.996 p_true=0.383 argmax='0'
2026-05-05 22:32:36.282 | INFO | __main__:main:355 - t= 1 pmass=0.996 p_true=0.412 argmax='0'
2026-05-05 22:32:36.282 | INFO | __main__:main:355 - t= 2 pmass=0.997 p_true=0.412 argmax='0'
2026-05-05 22:32:36.282 | INFO | __main__:main:355 - t= 3 pmass=0.997 p_true=0.412 argmax='0'
2026-05-05 22:32:36.282 | INFO | __main__:main:355 - t= 5 pmass=0.997 p_true=0.411 argmax='0'
2026-05-05 22:32:36.282 | INFO | __main__:main:355 - t= 8 pmass=0.997 p_true=0.411 argmax='0'
2026-05-05 22:32:36.282 | INFO | __main__:main:355 - t= 13 pmass=0.997 p_true=0.382 argmax='0'
2026-05-05 22:32:36.282 | INFO | __main__:main:355 - t= 23 pmass=0.996 p_true=0.414 argmax='0'
2026-05-05 22:32:36.282 | INFO | __main__:main:355 - t= 38 pmass=0.997 p_true=0.411 argmax='0'
2026-05-05 22:32:36.282 | INFO | __main__:main:355 - t= 64 pmass=0.996 p_true=0.412 argmax='0'
2026-05-05 22:32:36.282 | INFO | __main__:main:355 - t=108 pmass=0.997 p_true=0.382 argmax='0'
2026-05-05 22:32:36.282 | INFO | __main__:main:355 - t=181 pmass=0.998 p_true=0.504 argmax='0'
2026-05-05 22:32:36.282 | INFO | __main__:main:355 - t=304 pmass=0.998 p_true=0.819 argmax='1'
2026-05-05 22:32:36.282 | INFO | __main__:main:355 - t=512 pmass=0.997 p_true=0.779 argmax='1'
2026-05-05 22:34:20.396 | INFO | __main__:main:255 - === eval alpha=1.0 c=+2.0722 ===
2026-05-05 22:36:23.953 | INFO | __main__:main:353 - [debug] alpha=1.0 eval[0] gen_len=512 text[:120]='The question asks whether the sky is blue primarily due to Rayleigh scattering of sunlight.\n1. **Analyze the phenomenon'
2026-05-05 22:36:23.954 | INFO | __main__:main:355 - t= 0 pmass=0.998 p_true=0.598 argmax='1'
2026-05-05 22:36:23.954 | INFO | __main__:main:355 - t= 1 pmass=0.998 p_true=0.567 argmax='1'
2026-05-05 22:36:23.954 | INFO | __main__:main:355 - t= 2 pmass=0.998 p_true=0.597 argmax='1'
2026-05-05 22:36:23.954 | INFO | __main__:main:355 - t= 3 pmass=0.998 p_true=0.597 argmax='1'
2026-05-05 22:36:23.954 | INFO | __main__:main:355 - t= 5 pmass=0.998 p_true=0.627 argmax='1'
2026-05-05 22:36:23.954 | INFO | __main__:main:355 - t= 8 pmass=0.998 p_true=0.596 argmax='1'
2026-05-05 22:36:23.954 | INFO | __main__:main:355 - t= 13 pmass=0.999 p_true=0.566 argmax='1'
2026-05-05 22:36:23.954 | INFO | __main__:main:355 - t= 23 pmass=0.998 p_true=0.568 argmax='1'
2026-05-05 22:36:23.954 | INFO | __main__:main:355 - t= 38 pmass=0.999 p_true=0.596 argmax='1'
2026-05-05 22:36:23.954 | INFO | __main__:main:355 - t= 64 pmass=0.998 p_true=0.596 argmax='1'
2026-05-05 22:36:23.954 | INFO | __main__:main:355 - t=108 pmass=0.998 p_true=0.567 argmax='1'
2026-05-05 22:36:23.954 | INFO | __main__:main:355 - t=181 pmass=0.999 p_true=0.595 argmax='1'
2026-05-05 22:36:23.954 | INFO | __main__:main:355 - t=304 pmass=0.998 p_true=0.596 argmax='1'
2026-05-05 22:36:23.954 | INFO | __main__:main:355 - t=512 pmass=0.998 p_true=0.568 argmax='1'
2026-05-05 22:38:07.891 | INFO | __main__:main:255 - === eval alpha=1.5 c=+3.1082 ===
2026-05-05 22:40:11.261 | INFO | __main__:main:353 - [debug] alpha=1.5 eval[0] gen_len=512 text[:120]="The\nThe question asks about the primary component of the sky's color, specifically the role of Rayleigh scattering in de"
2026-05-05 22:40:11.261 | INFO | __main__:main:355 - t= 0 pmass=0.997 p_true=0.944 argmax='1'
2026-05-05 22:40:11.261 | INFO | __main__:main:355 - t= 1 pmass=0.997 p_true=0.936 argmax='1'
2026-05-05 22:40:11.261 | INFO | __main__:main:355 - t= 2 pmass=0.997 p_true=0.949 argmax='1'
2026-05-05 22:40:11.261 | INFO | __main__:main:355 - t= 3 pmass=0.997 p_true=0.943 argmax='1'
2026-05-05 22:40:11.261 | INFO | __main__:main:355 - t= 5 pmass=0.998 p_true=0.955 argmax='1'
2026-05-05 22:40:11.261 | INFO | __main__:main:355 - t= 8 pmass=0.998 p_true=0.959 argmax='1'
2026-05-05 22:40:11.261 | INFO | __main__:main:355 - t= 13 pmass=0.998 p_true=0.948 argmax='1'
2026-05-05 22:40:11.261 | INFO | __main__:main:355 - t= 23 pmass=0.997 p_true=0.949 argmax='1'
2026-05-05 22:40:11.261 | INFO | __main__:main:355 - t= 38 pmass=0.998 p_true=0.964 argmax='1'
2026-05-05 22:40:11.261 | INFO | __main__:main:355 - t= 64 pmass=1.000 p_true=0.999 argmax='1'
2026-05-05 22:40:11.261 | INFO | __main__:main:355 - t=108 pmass=1.000 p_true=0.998 argmax='1'
2026-05-05 22:40:11.261 | INFO | __main__:main:355 - t=181 pmass=1.000 p_true=0.998 argmax='1'
2026-05-05 22:40:11.261 | INFO | __main__:main:355 - t=304 pmass=1.000 p_true=0.998 argmax='1'
2026-05-05 22:40:11.261 | INFO | __main__:main:355 - t=512 pmass=0.999 p_true=0.997 argmax='1'
2026-05-05 22:41:55.232 | INFO | __main__:main:255 - === eval alpha=2.0 c=+4.1443 ===
2026-05-05 22:43:58.581 | INFO | __main__:main:353 - [debug] alpha=2.0 eval[0] gen_len=512 text[:120]='The\nThe\nThe\nThe\nThe\nThe\nThe\nThe\nThe\nThe\nThe\nThe\nThe\nThe\nThe\nThe\nThe\nThe\nThe\nThe\nThe\nThe\nThe\nThe\nThe\nThe\nThe\nThe\nThe\nThe\n'
2026-05-05 22:43:58.582 | INFO | __main__:main:355 - t= 0 pmass=0.972 p_true=0.975 argmax='1'
2026-05-05 22:43:58.582 | INFO | __main__:main:355 - t= 1 pmass=0.971 p_true=0.972 argmax='1'
2026-05-05 22:43:58.582 | INFO | __main__:main:355 - t= 2 pmass=0.966 p_true=0.974 argmax='1'
2026-05-05 22:43:58.582 | INFO | __main__:main:355 - t= 3 pmass=0.970 p_true=0.977 argmax='1'
2026-05-05 22:43:58.582 | INFO | __main__:main:355 - t= 5 pmass=0.970 p_true=0.978 argmax='1'
2026-05-05 22:43:58.582 | INFO | __main__:main:355 - t= 8 pmass=0.971 p_true=0.978 argmax='1'
2026-05-05 22:43:58.582 | INFO | __main__:main:355 - t= 13 pmass=0.965 p_true=0.975 argmax='1'
2026-05-05 22:43:58.582 | INFO | __main__:main:355 - t= 23 pmass=0.953 p_true=0.977 argmax='1'
2026-05-05 22:43:58.582 | INFO | __main__:main:355 - t= 38 pmass=0.952 p_true=0.977 argmax='1'
2026-05-05 22:43:58.582 | INFO | __main__:main:355 - t= 64 pmass=0.935 p_true=0.976 argmax='1'
2026-05-05 22:43:58.582 | INFO | __main__:main:355 - t=108 pmass=0.911 p_true=0.975 argmax='1'
2026-05-05 22:43:58.582 | INFO | __main__:main:355 - t=181 pmass=0.905 p_true=0.975 argmax='1'
2026-05-05 22:43:58.582 | INFO | __main__:main:355 - t=304 pmass=0.852 p_true=0.975 argmax='1'
2026-05-05 22:43:58.582 | INFO | __main__:main:355 - t=512 pmass=0.798 p_true=0.977 argmax='1'
2026-05-05 22:45:42.239 | INFO | __main__:main:255 - === eval alpha=4.0 c=+8.2886 ===
2026-05-05 22:47:45.699 | INFO | __main__:main:353 - [debug] alpha=4.0 eval[0] gen_len=512 text[:120]='\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n'
2026-05-05 22:47:45.699 | INFO | __main__:main:355 - t= 0 pmass=0.070 p_true=0.998 argmax='\n'
2026-05-05 22:47:45.699 | INFO | __main__:main:355 - t= 1 pmass=0.069 p_true=0.997 argmax='\n'
2026-05-05 22:47:45.699 | INFO | __main__:main:355 - t= 2 pmass=0.065 p_true=0.997 argmax='\n'
2026-05-05 22:47:45.699 | INFO | __main__:main:355 - t= 3 pmass=0.069 p_true=0.998 argmax='\n'
2026-05-05 22:47:45.699 | INFO | __main__:main:355 - t= 5 pmass=0.063 p_true=0.997 argmax='\n'
2026-05-05 22:47:45.699 | INFO | __main__:main:355 - t= 8 pmass=0.063 p_true=0.997 argmax='\n'
2026-05-05 22:47:45.699 | INFO | __main__:main:355 - t= 13 pmass=0.056 p_true=0.997 argmax='\n'
2026-05-05 22:47:45.699 | INFO | __main__:main:355 - t= 23 pmass=0.048 p_true=0.997 argmax='\n'
2026-05-05 22:47:45.699 | INFO | __main__:main:355 - t= 38 pmass=0.039 p_true=0.997 argmax='\n'
2026-05-05 22:47:45.699 | INFO | __main__:main:355 - t= 64 pmass=0.033 p_true=0.996 argmax='\n'
2026-05-05 22:47:45.699 | INFO | __main__:main:355 - t=108 pmass=0.025 p_true=0.996 argmax='\n'
2026-05-05 22:47:45.699 | INFO | __main__:main:355 - t=181 pmass=0.018 p_true=0.996 argmax='\n'
2026-05-05 22:47:45.699 | INFO | __main__:main:355 - t=304 pmass=0.016 p_true=0.996 argmax='\n'
2026-05-05 22:47:45.699 | INFO | __main__:main:355 - t=512 pmass=0.012 p_true=0.995 argmax='\n'
2026-05-05 22:49:29.685 | INFO | __main__:main:404 - rendering single-run figures
2026-05-05 22:49:37.619 | INFO | __main__:main:406 - DONE -> outputs_qwen35_w512_dense/Qwen3.5-0.8B_mean_diff_s0_w512_dense
File diff suppressed because it is too large Load Diff
@@ -0,0 +1 @@
/media/wassname/SGIronWolf/projects5/2026/lite/iso-kl-figure/outputs_qwen35_w512_dense/Qwen3.5-0.8B_mean_diff_s0_w512_dense
@@ -0,0 +1,9 @@
{
"c_star": 2.467701455199341,
"target_kl": 1.0,
"window": 512,
"method": "directional_ablation",
"model": "Qwen/Qwen3.5-0.8B",
"seed": 0,
"layer": 14
}
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,6 @@
alpha,coeff,kl_p95,kl_mean,kl_max
0.0,0.0,0.006637596059590578,0.0014842627570033073,0.03870811313390732
0.5,1.2338507275996704,0.5223139524459839,0.12581032514572144,2.0511815547943115
1.0,2.467701455199341,1.1374709606170654,0.20074087381362915,7.905111789703369
2.0,4.935402910398682,7.937441349029541,1.725125789642334,19.265193939208984
4.0,9.870805820797363,12.663122177124023,10.11868667602539,18.7987003326416
1 alpha coeff kl_p95 kl_mean kl_max
2 0.0 0.0 0.006637596059590578 0.0014842627570033073 0.03870811313390732
3 0.5 1.2338507275996704 0.5223139524459839 0.12581032514572144 2.0511815547943115
4 1.0 2.467701455199341 1.1374709606170654 0.20074087381362915 7.905111789703369
5 2.0 4.935402910398682 7.937441349029541 1.725125789642334 19.265193939208984
6 4.0 9.870805820797363 12.663122177124023 10.11868667602539 18.7987003326416
@@ -0,0 +1,138 @@
2026-05-05 21:51:27.572 | INFO | __main__:main:181 - model=Qwen/Qwen3.5-0.8B n_layers=24 target_layer=14
2026-05-05 21:51:27.599 | INFO | iso_kl_figure.attach:_log_extract_demo:156 - EXPECT: POS and NEG share user_msg + suffix; differ only in system persona; chat template applied; special tokens (e.g. <|im_start|>) visible.
=== EXTRACT demo trace ===
POS[0]:
<|im_start|>user
The eiffel tower is in Paris.<|im_end|>
<|im_start|>assistant
<think>
</think>
Sure, here is the answer:<|im_end|>
---
NEG[0]:
<|im_start|>user
The eiffel tower is in Paris.<|im_end|>
<|im_start|>assistant
<think>
</think>
I cannot help with that.<|im_end|>
=== /EXTRACT ===
2026-05-05 21:51:28.339 | INFO | __main__:main:196 - === calibrate ===
2026-05-05 21:51:32.599 | INFO | iso_kl_figure.calibrate:measure_kl:104 - EXPECT: same prompt under c=0 vs c=+0.4000; both coherent; steered should differ from base but not collapse.
=== CALIBRATE demo trace (T=512) ===
--- BASE (c=0) ---
<|im_start|>user
The eiffel tower is in Paris.<|im_end|>
<|im_start|>assistant
<think>
</think>
The Eiffel Tower is indeed located in **Paris**, France. It stands prominently on the Champ de Mars, overlooking the Seine River and the city's skyline. The tower, designed by Gustave Eiffel, was completed in 1889 and is one of the most recognizable landmarks in the world.<|im_end|>
--- STEER (c=+0.4000) ---
<|im_start|>user
The eiffel tower is in Paris.<|im_end|>
<|im_start|>assistant
<think>
</think>
The Eiffel Tower is located in **Paris**, France. It stands on the Champ de Mars, a large park in the city center, and is one of the most recognizable landmarks in the world. Built in 1889, it is a wrought-iron lattice tower with a total height of 332 meters (1,084 feet).<|im_end|>
=== /CALIBRATE ===
2026-05-05 21:52:06.874 | INFO | iso_kl_figure.calibrate:eval_at:183 - c=+0.4000 mean=0.008 p50=0.000 p90=0.028 p95=0.040 max=0.403 n=1288
2026-05-05 21:52:45.329 | INFO | iso_kl_figure.calibrate:eval_at:183 - c=+0.8000 mean=0.039 p50=0.004 p90=0.120 p95=0.170 max=1.387 n=1393
2026-05-05 21:53:30.604 | INFO | iso_kl_figure.calibrate:eval_at:183 - c=+1.6000 mean=0.151 p50=0.016 p90=0.471 p95=0.639 max=3.754 n=1641
2026-05-05 21:54:28.452 | INFO | iso_kl_figure.calibrate:eval_at:183 - c=+3.2000 mean=0.248 p50=0.011 p90=0.715 p95=1.308 max=13.370 n=2048
2026-05-05 21:55:24.821 | INFO | iso_kl_figure.calibrate:eval_at:183 - c=+2.4677 mean=0.200 p50=0.007 p90=0.623 p95=1.031 max=11.659 n=2048
2026-05-05 21:55:24.821 | INFO | __main__:main:203 - c_star = +2.4677
2026-05-05 21:55:24.827 | INFO | __main__:main:217 - choice ids: a(true)=[16, 804, 1802, 2434, 2912] b(false)=[15, 867, 3439, 3721, 3913]
2026-05-05 21:55:24.835 | INFO | __main__:main:229 - fork_points (n=14): [0, 1, 2, 3, 5, 8, 13, 23, 38, 64, 108, 181, 304, 512]
2026-05-05 21:55:24.837 | INFO | __main__:main:251 - === eval alpha=0.0 c=+0.0000 ===
2026-05-05 21:57:35.043 | INFO | __main__:main:349 - [debug] alpha=0.0 eval[0] gen_len=512 text[:120]='Thinking Process:\n\n1. **Analyze the Request:**\n * Question: Is the sky blue primarily because of Rayleigh scatteri'
2026-05-05 21:57:35.043 | INFO | __main__:main:351 - t= 0 pmass=0.995 p_true=0.203 argmax='0'
2026-05-05 21:57:35.043 | INFO | __main__:main:351 - t= 1 pmass=0.995 p_true=0.203 argmax='0'
2026-05-05 21:57:35.043 | INFO | __main__:main:351 - t= 2 pmass=0.996 p_true=0.223 argmax='0'
2026-05-05 21:57:35.043 | INFO | __main__:main:351 - t= 3 pmass=0.996 p_true=0.223 argmax='0'
2026-05-05 21:57:35.043 | INFO | __main__:main:351 - t= 5 pmass=0.996 p_true=0.223 argmax='0'
2026-05-05 21:57:35.043 | INFO | __main__:main:351 - t= 8 pmass=0.996 p_true=0.223 argmax='0'
2026-05-05 21:57:35.043 | INFO | __main__:main:351 - t= 13 pmass=0.995 p_true=0.203 argmax='0'
2026-05-05 21:57:35.043 | INFO | __main__:main:351 - t= 23 pmass=0.994 p_true=0.203 argmax='0'
2026-05-05 21:57:35.043 | INFO | __main__:main:351 - t= 38 pmass=0.996 p_true=0.203 argmax='0'
2026-05-05 21:57:35.043 | INFO | __main__:main:351 - t= 64 pmass=0.996 p_true=0.223 argmax='0'
2026-05-05 21:57:35.043 | INFO | __main__:main:351 - t=108 pmass=0.998 p_true=0.202 argmax='0'
2026-05-05 21:57:35.043 | INFO | __main__:main:351 - t=181 pmass=0.998 p_true=0.223 argmax='0'
2026-05-05 21:57:35.043 | INFO | __main__:main:351 - t=304 pmass=0.998 p_true=0.223 argmax='0'
2026-05-05 21:57:35.043 | INFO | __main__:main:351 - t=512 pmass=0.998 p_true=0.245 argmax='0'
2026-05-05 21:59:22.309 | INFO | __main__:main:251 - === eval alpha=0.5 c=+1.2339 ===
2026-05-05 22:01:28.360 | INFO | __main__:main:349 - [debug] alpha=0.5 eval[0] gen_len=512 text[:120]='The user is asking about the primary cause of the blue color of the sky, specifically asking if it is due to Rayleigh sc'
2026-05-05 22:01:28.360 | INFO | __main__:main:351 - t= 0 pmass=0.997 p_true=0.272 argmax='0'
2026-05-05 22:01:28.360 | INFO | __main__:main:351 - t= 1 pmass=0.997 p_true=0.272 argmax='0'
2026-05-05 22:01:28.360 | INFO | __main__:main:351 - t= 2 pmass=0.997 p_true=0.297 argmax='0'
2026-05-05 22:01:28.360 | INFO | __main__:main:351 - t= 3 pmass=0.998 p_true=0.297 argmax='0'
2026-05-05 22:01:28.360 | INFO | __main__:main:351 - t= 5 pmass=0.998 p_true=0.297 argmax='0'
2026-05-05 22:01:28.360 | INFO | __main__:main:351 - t= 8 pmass=0.998 p_true=0.297 argmax='0'
2026-05-05 22:01:28.360 | INFO | __main__:main:351 - t= 13 pmass=0.998 p_true=0.272 argmax='0'
2026-05-05 22:01:28.360 | INFO | __main__:main:351 - t= 23 pmass=0.997 p_true=0.272 argmax='0'
2026-05-05 22:01:28.360 | INFO | __main__:main:351 - t= 38 pmass=0.998 p_true=0.272 argmax='0'
2026-05-05 22:01:28.360 | INFO | __main__:main:351 - t= 64 pmass=0.999 p_true=0.270 argmax='0'
2026-05-05 22:01:28.360 | INFO | __main__:main:351 - t=108 pmass=1.000 p_true=0.321 argmax='0'
2026-05-05 22:01:28.360 | INFO | __main__:main:351 - t=181 pmass=1.000 p_true=0.321 argmax='0'
2026-05-05 22:01:28.360 | INFO | __main__:main:351 - t=304 pmass=1.000 p_true=0.531 argmax='1'
2026-05-05 22:01:28.360 | INFO | __main__:main:351 - t=512 pmass=1.000 p_true=0.531 argmax='1'
2026-05-05 22:03:16.537 | INFO | __main__:main:251 - === eval alpha=1.0 c=+2.4677 ===
2026-05-05 22:05:25.069 | INFO | __main__:main:349 - [debug] alpha=1.0 eval[0] gen_len=512 text[:120]='The question asks whether the sky is primarily blue due to Rayleigh scattering of sunlight.\n1. **Analyze the primary me'
2026-05-05 22:05:25.069 | INFO | __main__:main:351 - t= 0 pmass=0.996 p_true=0.789 argmax='1'
2026-05-05 22:05:25.069 | INFO | __main__:main:351 - t= 1 pmass=0.996 p_true=0.788 argmax='1'
2026-05-05 22:05:25.069 | INFO | __main__:main:351 - t= 2 pmass=0.996 p_true=0.786 argmax='1'
2026-05-05 22:05:25.069 | INFO | __main__:main:351 - t= 3 pmass=0.996 p_true=0.787 argmax='1'
2026-05-05 22:05:25.069 | INFO | __main__:main:351 - t= 5 pmass=0.997 p_true=0.825 argmax='1'
2026-05-05 22:05:25.069 | INFO | __main__:main:351 - t= 8 pmass=0.997 p_true=0.823 argmax='1'
2026-05-05 22:05:25.069 | INFO | __main__:main:351 - t= 13 pmass=0.998 p_true=0.783 argmax='1'
2026-05-05 22:05:25.069 | INFO | __main__:main:351 - t= 23 pmass=0.996 p_true=0.765 argmax='1'
2026-05-05 22:05:25.069 | INFO | __main__:main:351 - t= 38 pmass=0.998 p_true=0.823 argmax='1'
2026-05-05 22:05:25.069 | INFO | __main__:main:351 - t= 64 pmass=0.997 p_true=0.824 argmax='1'
2026-05-05 22:05:25.069 | INFO | __main__:main:351 - t=108 pmass=0.996 p_true=0.826 argmax='1'
2026-05-05 22:05:25.069 | INFO | __main__:main:351 - t=181 pmass=0.997 p_true=0.897 argmax='1'
2026-05-05 22:05:25.069 | INFO | __main__:main:351 - t=304 pmass=0.996 p_true=0.874 argmax='1'
2026-05-05 22:05:25.069 | INFO | __main__:main:351 - t=512 pmass=0.994 p_true=0.877 argmax='1'
2026-05-05 22:07:11.768 | INFO | __main__:main:251 - === eval alpha=2.0 c=+4.9354 ===
2026-05-05 22:09:21.712 | INFO | __main__:main:349 - [debug] alpha=2.0 eval[0] gen_len=512 text[:120]='\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n'
2026-05-05 22:09:21.712 | INFO | __main__:main:351 - t= 0 pmass=0.845 p_true=0.994 argmax='1'
2026-05-05 22:09:21.712 | INFO | __main__:main:351 - t= 1 pmass=0.787 p_true=0.992 argmax='1'
2026-05-05 22:09:21.712 | INFO | __main__:main:351 - t= 2 pmass=0.811 p_true=0.993 argmax='1'
2026-05-05 22:09:21.712 | INFO | __main__:main:351 - t= 3 pmass=0.813 p_true=0.994 argmax='1'
2026-05-05 22:09:21.712 | INFO | __main__:main:351 - t= 5 pmass=0.787 p_true=0.993 argmax='1'
2026-05-05 22:09:21.712 | INFO | __main__:main:351 - t= 8 pmass=0.787 p_true=0.994 argmax='1'
2026-05-05 22:09:21.712 | INFO | __main__:main:351 - t= 13 pmass=0.750 p_true=0.993 argmax='1'
2026-05-05 22:09:21.712 | INFO | __main__:main:351 - t= 23 pmass=0.642 p_true=0.993 argmax='1'
2026-05-05 22:09:21.712 | INFO | __main__:main:351 - t= 38 pmass=0.592 p_true=0.994 argmax='1'
2026-05-05 22:09:21.713 | INFO | __main__:main:351 - t= 64 pmass=0.507 p_true=0.994 argmax='1'
2026-05-05 22:09:21.713 | INFO | __main__:main:351 - t=108 pmass=0.337 p_true=0.993 argmax='\n'
2026-05-05 22:09:21.713 | INFO | __main__:main:351 - t=181 pmass=0.312 p_true=0.994 argmax='\n'
2026-05-05 22:09:21.713 | INFO | __main__:main:351 - t=304 pmass=0.174 p_true=0.993 argmax='\n'
2026-05-05 22:09:21.713 | INFO | __main__:main:351 - t=512 pmass=0.110 p_true=0.991 argmax='\n'
2026-05-05 22:11:09.324 | INFO | __main__:main:251 - === eval alpha=4.0 c=+9.8708 ===
2026-05-05 22:13:19.579 | INFO | __main__:main:349 - [debug] alpha=4.0 eval[0] gen_len=512 text[:120]=' \n \n\n\n\n\n \n\n\n \n\n\n \n\n\n \n\n\n \n\n\n \n\n\n \n\n\n \n\n\n \n\n\n \n\n\n \n\n\n \n\n\n \n\n\n \n\n\n\n \n\n\n \n\n\n\n\n\n\n \n\n\n\n\n\n\n\n\n\n\n\n\n\n \n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n \n\n'
2026-05-05 22:13:19.579 | INFO | __main__:main:351 - t= 0 pmass=0.011 p_true=0.992 argmax='\n'
2026-05-05 22:13:19.579 | INFO | __main__:main:351 - t= 1 pmass=0.010 p_true=0.992 argmax='\n'
2026-05-05 22:13:19.579 | INFO | __main__:main:351 - t= 2 pmass=0.011 p_true=0.993 argmax='\n'
2026-05-05 22:13:19.580 | INFO | __main__:main:351 - t= 3 pmass=0.011 p_true=0.992 argmax='\n'
2026-05-05 22:13:19.580 | INFO | __main__:main:351 - t= 5 pmass=0.010 p_true=0.992 argmax='\n'
2026-05-05 22:13:19.580 | INFO | __main__:main:351 - t= 8 pmass=0.009 p_true=0.992 argmax='\n'
2026-05-05 22:13:19.580 | INFO | __main__:main:351 - t= 13 pmass=0.008 p_true=0.992 argmax='\n'
2026-05-05 22:13:19.580 | INFO | __main__:main:351 - t= 23 pmass=0.008 p_true=0.992 argmax='\n'
2026-05-05 22:13:19.580 | INFO | __main__:main:351 - t= 38 pmass=0.007 p_true=0.992 argmax='\n'
2026-05-05 22:13:19.580 | INFO | __main__:main:351 - t= 64 pmass=0.005 p_true=0.992 argmax='\n'
2026-05-05 22:13:19.580 | INFO | __main__:main:351 - t=108 pmass=0.004 p_true=0.992 argmax='\n'
2026-05-05 22:13:19.580 | INFO | __main__:main:351 - t=181 pmass=0.003 p_true=0.992 argmax='\n'
2026-05-05 22:13:19.580 | INFO | __main__:main:351 - t=304 pmass=0.003 p_true=0.991 argmax='\n'
2026-05-05 22:13:19.580 | INFO | __main__:main:351 - t=512 pmass=0.002 p_true=0.991 argmax='\n'
2026-05-05 22:15:06.464 | INFO | __main__:main:391 - DONE -> outputs_qwen35_w512_v3/Qwen3.5-0.8B_directional_ablation_s0_w512
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,9 @@
{
"c_star": 2.467701455199341,
"target_kl": 1.0,
"window": 512,
"method": "directional_ablation",
"model": "Qwen/Qwen3.5-0.8B",
"seed": 1,
"layer": 14
}
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,6 @@
alpha,coeff,kl_p95,kl_mean,kl_max
0.0,0.0,0.006637596059590578,0.0014842627570033073,0.03870811313390732
0.5,1.2338507275996704,0.5223139524459839,0.12581032514572144,2.0511815547943115
1.0,2.467701455199341,1.1374709606170654,0.20074087381362915,7.905111789703369
2.0,4.935402910398682,7.937441349029541,1.725125789642334,19.265193939208984
4.0,9.870805820797363,12.663122177124023,10.11868667602539,18.7987003326416
1 alpha coeff kl_p95 kl_mean kl_max
2 0.0 0.0 0.006637596059590578 0.0014842627570033073 0.03870811313390732
3 0.5 1.2338507275996704 0.5223139524459839 0.12581032514572144 2.0511815547943115
4 1.0 2.467701455199341 1.1374709606170654 0.20074087381362915 7.905111789703369
5 2.0 4.935402910398682 7.937441349029541 1.725125789642334 19.265193939208984
6 4.0 9.870805820797363 12.663122177124023 10.11868667602539 18.7987003326416
@@ -0,0 +1,138 @@
2026-05-05 22:49:46.301 | INFO | __main__:main:185 - model=Qwen/Qwen3.5-0.8B n_layers=24 target_layer=14
2026-05-05 22:49:46.323 | INFO | iso_kl_figure.attach:_log_extract_demo:156 - EXPECT: POS and NEG share user_msg + suffix; differ only in system persona; chat template applied; special tokens (e.g. <|im_start|>) visible.
=== EXTRACT demo trace ===
POS[0]:
<|im_start|>user
The eiffel tower is in Paris.<|im_end|>
<|im_start|>assistant
<think>
</think>
Sure, here is the answer:<|im_end|>
---
NEG[0]:
<|im_start|>user
The eiffel tower is in Paris.<|im_end|>
<|im_start|>assistant
<think>
</think>
I cannot help with that.<|im_end|>
=== /EXTRACT ===
2026-05-05 22:49:47.007 | INFO | __main__:main:200 - === calibrate ===
2026-05-05 22:49:50.904 | INFO | iso_kl_figure.calibrate:measure_kl:104 - EXPECT: same prompt under c=0 vs c=+0.4000; both coherent; steered should differ from base but not collapse.
=== CALIBRATE demo trace (T=512) ===
--- BASE (c=0) ---
<|im_start|>user
The eiffel tower is in Paris.<|im_end|>
<|im_start|>assistant
<think>
</think>
The Eiffel Tower is indeed located in **Paris**, France. It stands prominently on the Champ de Mars, overlooking the Seine River and the city's skyline. The tower, designed by Gustave Eiffel, was completed in 1889 and is one of the most recognizable landmarks in the world.<|im_end|>
--- STEER (c=+0.4000) ---
<|im_start|>user
The eiffel tower is in Paris.<|im_end|>
<|im_start|>assistant
<think>
</think>
The Eiffel Tower is located in **Paris**, France. It stands on the Champ de Mars, a large park in the city center, and is one of the most recognizable landmarks in the world. Built in 1889, it is a wrought-iron lattice tower with a total height of 332 meters (1,084 feet).<|im_end|>
=== /CALIBRATE ===
2026-05-05 22:50:24.300 | INFO | iso_kl_figure.calibrate:eval_at:183 - c=+0.4000 mean=0.008 p50=0.000 p90=0.028 p95=0.040 max=0.403 n=1288
2026-05-05 22:51:02.204 | INFO | iso_kl_figure.calibrate:eval_at:183 - c=+0.8000 mean=0.039 p50=0.004 p90=0.120 p95=0.170 max=1.387 n=1393
2026-05-05 22:51:46.673 | INFO | iso_kl_figure.calibrate:eval_at:183 - c=+1.6000 mean=0.151 p50=0.016 p90=0.471 p95=0.639 max=3.754 n=1641
2026-05-05 22:52:41.965 | INFO | iso_kl_figure.calibrate:eval_at:183 - c=+3.2000 mean=0.248 p50=0.011 p90=0.715 p95=1.308 max=13.370 n=2048
2026-05-05 22:53:37.214 | INFO | iso_kl_figure.calibrate:eval_at:183 - c=+2.4677 mean=0.200 p50=0.007 p90=0.623 p95=1.031 max=11.659 n=2048
2026-05-05 22:53:37.215 | INFO | __main__:main:207 - c_star = +2.4677
2026-05-05 22:53:37.221 | INFO | __main__:main:221 - choice ids: a(true)=[16, 804, 1802, 2434, 2912] b(false)=[15, 867, 3439, 3721, 3913]
2026-05-05 22:53:37.227 | INFO | __main__:main:233 - fork_points (n=14): [0, 1, 2, 3, 5, 8, 13, 23, 38, 64, 108, 181, 304, 512]
2026-05-05 22:53:37.230 | INFO | __main__:main:255 - === eval alpha=0.0 c=+0.0000 ===
2026-05-05 22:55:42.909 | INFO | __main__:main:353 - [debug] alpha=0.0 eval[0] gen_len=512 text[:120]='Thinking Process:\n\n1. **Analyze the Request:**\n * Question: Is the sky blue primarily because of Rayleigh scatteri'
2026-05-05 22:55:42.909 | INFO | __main__:main:355 - t= 0 pmass=0.995 p_true=0.203 argmax='0'
2026-05-05 22:55:42.910 | INFO | __main__:main:355 - t= 1 pmass=0.995 p_true=0.203 argmax='0'
2026-05-05 22:55:42.910 | INFO | __main__:main:355 - t= 2 pmass=0.996 p_true=0.223 argmax='0'
2026-05-05 22:55:42.910 | INFO | __main__:main:355 - t= 3 pmass=0.996 p_true=0.223 argmax='0'
2026-05-05 22:55:42.910 | INFO | __main__:main:355 - t= 5 pmass=0.996 p_true=0.223 argmax='0'
2026-05-05 22:55:42.910 | INFO | __main__:main:355 - t= 8 pmass=0.996 p_true=0.223 argmax='0'
2026-05-05 22:55:42.910 | INFO | __main__:main:355 - t= 13 pmass=0.995 p_true=0.203 argmax='0'
2026-05-05 22:55:42.910 | INFO | __main__:main:355 - t= 23 pmass=0.994 p_true=0.203 argmax='0'
2026-05-05 22:55:42.910 | INFO | __main__:main:355 - t= 38 pmass=0.996 p_true=0.203 argmax='0'
2026-05-05 22:55:42.910 | INFO | __main__:main:355 - t= 64 pmass=0.996 p_true=0.223 argmax='0'
2026-05-05 22:55:42.910 | INFO | __main__:main:355 - t=108 pmass=0.998 p_true=0.202 argmax='0'
2026-05-05 22:55:42.910 | INFO | __main__:main:355 - t=181 pmass=0.998 p_true=0.223 argmax='0'
2026-05-05 22:55:42.910 | INFO | __main__:main:355 - t=304 pmass=0.998 p_true=0.223 argmax='0'
2026-05-05 22:55:42.910 | INFO | __main__:main:355 - t=512 pmass=0.998 p_true=0.245 argmax='0'
2026-05-05 22:57:29.119 | INFO | __main__:main:255 - === eval alpha=0.5 c=+1.2339 ===
2026-05-05 22:59:35.075 | INFO | __main__:main:353 - [debug] alpha=0.5 eval[0] gen_len=512 text[:120]='The user is asking about the primary cause of the blue color of the sky, specifically asking if it is due to Rayleigh sc'
2026-05-05 22:59:35.075 | INFO | __main__:main:355 - t= 0 pmass=0.997 p_true=0.272 argmax='0'
2026-05-05 22:59:35.075 | INFO | __main__:main:355 - t= 1 pmass=0.997 p_true=0.272 argmax='0'
2026-05-05 22:59:35.075 | INFO | __main__:main:355 - t= 2 pmass=0.997 p_true=0.297 argmax='0'
2026-05-05 22:59:35.075 | INFO | __main__:main:355 - t= 3 pmass=0.998 p_true=0.297 argmax='0'
2026-05-05 22:59:35.075 | INFO | __main__:main:355 - t= 5 pmass=0.998 p_true=0.297 argmax='0'
2026-05-05 22:59:35.075 | INFO | __main__:main:355 - t= 8 pmass=0.998 p_true=0.297 argmax='0'
2026-05-05 22:59:35.075 | INFO | __main__:main:355 - t= 13 pmass=0.998 p_true=0.272 argmax='0'
2026-05-05 22:59:35.075 | INFO | __main__:main:355 - t= 23 pmass=0.997 p_true=0.272 argmax='0'
2026-05-05 22:59:35.075 | INFO | __main__:main:355 - t= 38 pmass=0.998 p_true=0.272 argmax='0'
2026-05-05 22:59:35.075 | INFO | __main__:main:355 - t= 64 pmass=0.999 p_true=0.270 argmax='0'
2026-05-05 22:59:35.075 | INFO | __main__:main:355 - t=108 pmass=1.000 p_true=0.321 argmax='0'
2026-05-05 22:59:35.075 | INFO | __main__:main:355 - t=181 pmass=1.000 p_true=0.321 argmax='0'
2026-05-05 22:59:35.075 | INFO | __main__:main:355 - t=304 pmass=1.000 p_true=0.531 argmax='1'
2026-05-05 22:59:35.075 | INFO | __main__:main:355 - t=512 pmass=1.000 p_true=0.531 argmax='1'
2026-05-05 23:01:21.352 | INFO | __main__:main:255 - === eval alpha=1.0 c=+2.4677 ===
2026-05-05 23:03:27.678 | INFO | __main__:main:353 - [debug] alpha=1.0 eval[0] gen_len=512 text[:120]='The question asks whether the sky is primarily blue due to Rayleigh scattering of sunlight.\n1. **Analyze the primary me'
2026-05-05 23:03:27.679 | INFO | __main__:main:355 - t= 0 pmass=0.996 p_true=0.789 argmax='1'
2026-05-05 23:03:27.679 | INFO | __main__:main:355 - t= 1 pmass=0.996 p_true=0.788 argmax='1'
2026-05-05 23:03:27.679 | INFO | __main__:main:355 - t= 2 pmass=0.996 p_true=0.786 argmax='1'
2026-05-05 23:03:27.679 | INFO | __main__:main:355 - t= 3 pmass=0.996 p_true=0.787 argmax='1'
2026-05-05 23:03:27.679 | INFO | __main__:main:355 - t= 5 pmass=0.997 p_true=0.825 argmax='1'
2026-05-05 23:03:27.679 | INFO | __main__:main:355 - t= 8 pmass=0.997 p_true=0.823 argmax='1'
2026-05-05 23:03:27.679 | INFO | __main__:main:355 - t= 13 pmass=0.998 p_true=0.783 argmax='1'
2026-05-05 23:03:27.679 | INFO | __main__:main:355 - t= 23 pmass=0.996 p_true=0.765 argmax='1'
2026-05-05 23:03:27.679 | INFO | __main__:main:355 - t= 38 pmass=0.998 p_true=0.823 argmax='1'
2026-05-05 23:03:27.679 | INFO | __main__:main:355 - t= 64 pmass=0.997 p_true=0.824 argmax='1'
2026-05-05 23:03:27.679 | INFO | __main__:main:355 - t=108 pmass=0.996 p_true=0.826 argmax='1'
2026-05-05 23:03:27.679 | INFO | __main__:main:355 - t=181 pmass=0.997 p_true=0.897 argmax='1'
2026-05-05 23:03:27.679 | INFO | __main__:main:355 - t=304 pmass=0.996 p_true=0.874 argmax='1'
2026-05-05 23:03:27.679 | INFO | __main__:main:355 - t=512 pmass=0.994 p_true=0.877 argmax='1'
2026-05-05 23:05:13.383 | INFO | __main__:main:255 - === eval alpha=2.0 c=+4.9354 ===
2026-05-05 23:07:18.690 | INFO | __main__:main:353 - [debug] alpha=2.0 eval[0] gen_len=512 text[:120]='\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n'
2026-05-05 23:07:18.690 | INFO | __main__:main:355 - t= 0 pmass=0.845 p_true=0.994 argmax='1'
2026-05-05 23:07:18.690 | INFO | __main__:main:355 - t= 1 pmass=0.787 p_true=0.992 argmax='1'
2026-05-05 23:07:18.690 | INFO | __main__:main:355 - t= 2 pmass=0.811 p_true=0.993 argmax='1'
2026-05-05 23:07:18.690 | INFO | __main__:main:355 - t= 3 pmass=0.813 p_true=0.994 argmax='1'
2026-05-05 23:07:18.690 | INFO | __main__:main:355 - t= 5 pmass=0.787 p_true=0.993 argmax='1'
2026-05-05 23:07:18.690 | INFO | __main__:main:355 - t= 8 pmass=0.787 p_true=0.994 argmax='1'
2026-05-05 23:07:18.690 | INFO | __main__:main:355 - t= 13 pmass=0.750 p_true=0.993 argmax='1'
2026-05-05 23:07:18.690 | INFO | __main__:main:355 - t= 23 pmass=0.642 p_true=0.993 argmax='1'
2026-05-05 23:07:18.690 | INFO | __main__:main:355 - t= 38 pmass=0.592 p_true=0.994 argmax='1'
2026-05-05 23:07:18.690 | INFO | __main__:main:355 - t= 64 pmass=0.507 p_true=0.994 argmax='1'
2026-05-05 23:07:18.690 | INFO | __main__:main:355 - t=108 pmass=0.337 p_true=0.993 argmax='\n'
2026-05-05 23:07:18.690 | INFO | __main__:main:355 - t=181 pmass=0.312 p_true=0.994 argmax='\n'
2026-05-05 23:07:18.690 | INFO | __main__:main:355 - t=304 pmass=0.174 p_true=0.993 argmax='\n'
2026-05-05 23:07:18.690 | INFO | __main__:main:355 - t=512 pmass=0.110 p_true=0.991 argmax='\n'
2026-05-05 23:09:04.342 | INFO | __main__:main:255 - === eval alpha=4.0 c=+9.8708 ===
2026-05-05 23:11:09.540 | INFO | __main__:main:353 - [debug] alpha=4.0 eval[0] gen_len=512 text[:120]=' \n \n\n\n\n\n \n\n\n \n\n\n \n\n\n \n\n\n \n\n\n \n\n\n \n\n\n \n\n\n \n\n\n \n\n\n \n\n\n \n\n\n \n\n\n \n\n\n\n \n\n\n \n\n\n\n\n\n\n \n\n\n\n\n\n\n\n\n\n\n\n\n\n \n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n \n\n'
2026-05-05 23:11:09.540 | INFO | __main__:main:355 - t= 0 pmass=0.011 p_true=0.992 argmax='\n'
2026-05-05 23:11:09.540 | INFO | __main__:main:355 - t= 1 pmass=0.010 p_true=0.992 argmax='\n'
2026-05-05 23:11:09.540 | INFO | __main__:main:355 - t= 2 pmass=0.011 p_true=0.993 argmax='\n'
2026-05-05 23:11:09.540 | INFO | __main__:main:355 - t= 3 pmass=0.011 p_true=0.992 argmax='\n'
2026-05-05 23:11:09.540 | INFO | __main__:main:355 - t= 5 pmass=0.010 p_true=0.992 argmax='\n'
2026-05-05 23:11:09.540 | INFO | __main__:main:355 - t= 8 pmass=0.009 p_true=0.992 argmax='\n'
2026-05-05 23:11:09.540 | INFO | __main__:main:355 - t= 13 pmass=0.008 p_true=0.992 argmax='\n'
2026-05-05 23:11:09.540 | INFO | __main__:main:355 - t= 23 pmass=0.008 p_true=0.992 argmax='\n'
2026-05-05 23:11:09.540 | INFO | __main__:main:355 - t= 38 pmass=0.007 p_true=0.992 argmax='\n'
2026-05-05 23:11:09.540 | INFO | __main__:main:355 - t= 64 pmass=0.005 p_true=0.992 argmax='\n'
2026-05-05 23:11:09.540 | INFO | __main__:main:355 - t=108 pmass=0.004 p_true=0.992 argmax='\n'
2026-05-05 23:11:09.540 | INFO | __main__:main:355 - t=181 pmass=0.003 p_true=0.992 argmax='\n'
2026-05-05 23:11:09.540 | INFO | __main__:main:355 - t=304 pmass=0.003 p_true=0.991 argmax='\n'
2026-05-05 23:11:09.540 | INFO | __main__:main:355 - t=512 pmass=0.002 p_true=0.991 argmax='\n'
2026-05-05 23:12:55.880 | INFO | __main__:main:406 - DONE -> outputs_qwen35_w512_v3/Qwen3.5-0.8B_directional_ablation_s1_w512
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,9 @@
{
"c_star": 2.467701455199341,
"target_kl": 1.0,
"window": 512,
"method": "directional_ablation",
"model": "Qwen/Qwen3.5-0.8B",
"seed": 2,
"layer": 14
}
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,6 @@
alpha,coeff,kl_p95,kl_mean,kl_max
0.0,0.0,0.006637596059590578,0.0014842627570033073,0.03870811313390732
0.5,1.2338507275996704,0.5223139524459839,0.12581032514572144,2.0511815547943115
1.0,2.467701455199341,1.1374709606170654,0.20074087381362915,7.905111789703369
2.0,4.935402910398682,7.937441349029541,1.725125789642334,19.265193939208984
4.0,9.870805820797363,12.663122177124023,10.11868667602539,18.7987003326416
1 alpha coeff kl_p95 kl_mean kl_max
2 0.0 0.0 0.006637596059590578 0.0014842627570033073 0.03870811313390732
3 0.5 1.2338507275996704 0.5223139524459839 0.12581032514572144 2.0511815547943115
4 1.0 2.467701455199341 1.1374709606170654 0.20074087381362915 7.905111789703369
5 2.0 4.935402910398682 7.937441349029541 1.725125789642334 19.265193939208984
6 4.0 9.870805820797363 12.663122177124023 10.11868667602539 18.7987003326416
@@ -0,0 +1,138 @@
2026-05-05 23:13:04.647 | INFO | __main__:main:185 - model=Qwen/Qwen3.5-0.8B n_layers=24 target_layer=14
2026-05-05 23:13:04.670 | INFO | iso_kl_figure.attach:_log_extract_demo:156 - EXPECT: POS and NEG share user_msg + suffix; differ only in system persona; chat template applied; special tokens (e.g. <|im_start|>) visible.
=== EXTRACT demo trace ===
POS[0]:
<|im_start|>user
The eiffel tower is in Paris.<|im_end|>
<|im_start|>assistant
<think>
</think>
Sure, here is the answer:<|im_end|>
---
NEG[0]:
<|im_start|>user
The eiffel tower is in Paris.<|im_end|>
<|im_start|>assistant
<think>
</think>
I cannot help with that.<|im_end|>
=== /EXTRACT ===
2026-05-05 23:13:05.352 | INFO | __main__:main:200 - === calibrate ===
2026-05-05 23:13:09.068 | INFO | iso_kl_figure.calibrate:measure_kl:104 - EXPECT: same prompt under c=0 vs c=+0.4000; both coherent; steered should differ from base but not collapse.
=== CALIBRATE demo trace (T=512) ===
--- BASE (c=0) ---
<|im_start|>user
The eiffel tower is in Paris.<|im_end|>
<|im_start|>assistant
<think>
</think>
The Eiffel Tower is indeed located in **Paris**, France. It stands prominently on the Champ de Mars, overlooking the Seine River and the city's skyline. The tower, designed by Gustave Eiffel, was completed in 1889 and is one of the most recognizable landmarks in the world.<|im_end|>
--- STEER (c=+0.4000) ---
<|im_start|>user
The eiffel tower is in Paris.<|im_end|>
<|im_start|>assistant
<think>
</think>
The Eiffel Tower is located in **Paris**, France. It stands on the Champ de Mars, a large park in the city center, and is one of the most recognizable landmarks in the world. Built in 1889, it is a wrought-iron lattice tower with a total height of 332 meters (1,084 feet).<|im_end|>
=== /CALIBRATE ===
2026-05-05 23:13:41.473 | INFO | iso_kl_figure.calibrate:eval_at:183 - c=+0.4000 mean=0.008 p50=0.000 p90=0.028 p95=0.040 max=0.403 n=1288
2026-05-05 23:14:18.724 | INFO | iso_kl_figure.calibrate:eval_at:183 - c=+0.8000 mean=0.039 p50=0.004 p90=0.120 p95=0.170 max=1.387 n=1393
2026-05-05 23:15:02.226 | INFO | iso_kl_figure.calibrate:eval_at:183 - c=+1.6000 mean=0.151 p50=0.016 p90=0.471 p95=0.639 max=3.754 n=1641
2026-05-05 23:15:56.243 | INFO | iso_kl_figure.calibrate:eval_at:183 - c=+3.2000 mean=0.248 p50=0.011 p90=0.715 p95=1.308 max=13.370 n=2048
2026-05-05 23:16:50.089 | INFO | iso_kl_figure.calibrate:eval_at:183 - c=+2.4677 mean=0.200 p50=0.007 p90=0.623 p95=1.031 max=11.659 n=2048
2026-05-05 23:16:50.090 | INFO | __main__:main:207 - c_star = +2.4677
2026-05-05 23:16:50.096 | INFO | __main__:main:221 - choice ids: a(true)=[16, 804, 1802, 2434, 2912] b(false)=[15, 867, 3439, 3721, 3913]
2026-05-05 23:16:50.101 | INFO | __main__:main:233 - fork_points (n=14): [0, 1, 2, 3, 5, 8, 13, 23, 38, 64, 108, 181, 304, 512]
2026-05-05 23:16:50.104 | INFO | __main__:main:255 - === eval alpha=0.0 c=+0.0000 ===
2026-05-05 23:18:52.979 | INFO | __main__:main:353 - [debug] alpha=0.0 eval[0] gen_len=512 text[:120]='Thinking Process:\n\n1. **Analyze the Request:**\n * Question: Is the sky blue primarily because of Rayleigh scatteri'
2026-05-05 23:18:52.979 | INFO | __main__:main:355 - t= 0 pmass=0.995 p_true=0.203 argmax='0'
2026-05-05 23:18:52.979 | INFO | __main__:main:355 - t= 1 pmass=0.995 p_true=0.203 argmax='0'
2026-05-05 23:18:52.979 | INFO | __main__:main:355 - t= 2 pmass=0.996 p_true=0.223 argmax='0'
2026-05-05 23:18:52.979 | INFO | __main__:main:355 - t= 3 pmass=0.996 p_true=0.223 argmax='0'
2026-05-05 23:18:52.979 | INFO | __main__:main:355 - t= 5 pmass=0.996 p_true=0.223 argmax='0'
2026-05-05 23:18:52.979 | INFO | __main__:main:355 - t= 8 pmass=0.996 p_true=0.223 argmax='0'
2026-05-05 23:18:52.979 | INFO | __main__:main:355 - t= 13 pmass=0.995 p_true=0.203 argmax='0'
2026-05-05 23:18:52.979 | INFO | __main__:main:355 - t= 23 pmass=0.994 p_true=0.203 argmax='0'
2026-05-05 23:18:52.979 | INFO | __main__:main:355 - t= 38 pmass=0.996 p_true=0.203 argmax='0'
2026-05-05 23:18:52.979 | INFO | __main__:main:355 - t= 64 pmass=0.996 p_true=0.223 argmax='0'
2026-05-05 23:18:52.979 | INFO | __main__:main:355 - t=108 pmass=0.998 p_true=0.202 argmax='0'
2026-05-05 23:18:52.979 | INFO | __main__:main:355 - t=181 pmass=0.998 p_true=0.223 argmax='0'
2026-05-05 23:18:52.979 | INFO | __main__:main:355 - t=304 pmass=0.998 p_true=0.223 argmax='0'
2026-05-05 23:18:52.980 | INFO | __main__:main:355 - t=512 pmass=0.998 p_true=0.245 argmax='0'
2026-05-05 23:20:37.045 | INFO | __main__:main:255 - === eval alpha=0.5 c=+1.2339 ===
2026-05-05 23:22:40.196 | INFO | __main__:main:353 - [debug] alpha=0.5 eval[0] gen_len=512 text[:120]='The user is asking about the primary cause of the blue color of the sky, specifically asking if it is due to Rayleigh sc'
2026-05-05 23:22:40.196 | INFO | __main__:main:355 - t= 0 pmass=0.997 p_true=0.272 argmax='0'
2026-05-05 23:22:40.196 | INFO | __main__:main:355 - t= 1 pmass=0.997 p_true=0.272 argmax='0'
2026-05-05 23:22:40.196 | INFO | __main__:main:355 - t= 2 pmass=0.997 p_true=0.297 argmax='0'
2026-05-05 23:22:40.196 | INFO | __main__:main:355 - t= 3 pmass=0.998 p_true=0.297 argmax='0'
2026-05-05 23:22:40.196 | INFO | __main__:main:355 - t= 5 pmass=0.998 p_true=0.297 argmax='0'
2026-05-05 23:22:40.196 | INFO | __main__:main:355 - t= 8 pmass=0.998 p_true=0.297 argmax='0'
2026-05-05 23:22:40.196 | INFO | __main__:main:355 - t= 13 pmass=0.998 p_true=0.272 argmax='0'
2026-05-05 23:22:40.196 | INFO | __main__:main:355 - t= 23 pmass=0.997 p_true=0.272 argmax='0'
2026-05-05 23:22:40.196 | INFO | __main__:main:355 - t= 38 pmass=0.998 p_true=0.272 argmax='0'
2026-05-05 23:22:40.196 | INFO | __main__:main:355 - t= 64 pmass=0.999 p_true=0.270 argmax='0'
2026-05-05 23:22:40.196 | INFO | __main__:main:355 - t=108 pmass=1.000 p_true=0.321 argmax='0'
2026-05-05 23:22:40.196 | INFO | __main__:main:355 - t=181 pmass=1.000 p_true=0.321 argmax='0'
2026-05-05 23:22:40.196 | INFO | __main__:main:355 - t=304 pmass=1.000 p_true=0.531 argmax='1'
2026-05-05 23:22:40.196 | INFO | __main__:main:355 - t=512 pmass=1.000 p_true=0.531 argmax='1'
2026-05-05 23:24:24.369 | INFO | __main__:main:255 - === eval alpha=1.0 c=+2.4677 ===
2026-05-05 23:26:28.000 | INFO | __main__:main:353 - [debug] alpha=1.0 eval[0] gen_len=512 text[:120]='The question asks whether the sky is primarily blue due to Rayleigh scattering of sunlight.\n1. **Analyze the primary me'
2026-05-05 23:26:28.000 | INFO | __main__:main:355 - t= 0 pmass=0.996 p_true=0.789 argmax='1'
2026-05-05 23:26:28.000 | INFO | __main__:main:355 - t= 1 pmass=0.996 p_true=0.788 argmax='1'
2026-05-05 23:26:28.000 | INFO | __main__:main:355 - t= 2 pmass=0.996 p_true=0.786 argmax='1'
2026-05-05 23:26:28.000 | INFO | __main__:main:355 - t= 3 pmass=0.996 p_true=0.787 argmax='1'
2026-05-05 23:26:28.000 | INFO | __main__:main:355 - t= 5 pmass=0.997 p_true=0.825 argmax='1'
2026-05-05 23:26:28.000 | INFO | __main__:main:355 - t= 8 pmass=0.997 p_true=0.823 argmax='1'
2026-05-05 23:26:28.000 | INFO | __main__:main:355 - t= 13 pmass=0.998 p_true=0.783 argmax='1'
2026-05-05 23:26:28.000 | INFO | __main__:main:355 - t= 23 pmass=0.996 p_true=0.765 argmax='1'
2026-05-05 23:26:28.000 | INFO | __main__:main:355 - t= 38 pmass=0.998 p_true=0.823 argmax='1'
2026-05-05 23:26:28.001 | INFO | __main__:main:355 - t= 64 pmass=0.997 p_true=0.824 argmax='1'
2026-05-05 23:26:28.001 | INFO | __main__:main:355 - t=108 pmass=0.996 p_true=0.826 argmax='1'
2026-05-05 23:26:28.001 | INFO | __main__:main:355 - t=181 pmass=0.997 p_true=0.897 argmax='1'
2026-05-05 23:26:28.001 | INFO | __main__:main:355 - t=304 pmass=0.996 p_true=0.874 argmax='1'
2026-05-05 23:26:28.001 | INFO | __main__:main:355 - t=512 pmass=0.994 p_true=0.877 argmax='1'
2026-05-05 23:28:11.954 | INFO | __main__:main:255 - === eval alpha=2.0 c=+4.9354 ===
2026-05-05 23:30:15.547 | INFO | __main__:main:353 - [debug] alpha=2.0 eval[0] gen_len=512 text[:120]='\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n'
2026-05-05 23:30:15.547 | INFO | __main__:main:355 - t= 0 pmass=0.845 p_true=0.994 argmax='1'
2026-05-05 23:30:15.547 | INFO | __main__:main:355 - t= 1 pmass=0.787 p_true=0.992 argmax='1'
2026-05-05 23:30:15.547 | INFO | __main__:main:355 - t= 2 pmass=0.811 p_true=0.993 argmax='1'
2026-05-05 23:30:15.547 | INFO | __main__:main:355 - t= 3 pmass=0.813 p_true=0.994 argmax='1'
2026-05-05 23:30:15.547 | INFO | __main__:main:355 - t= 5 pmass=0.787 p_true=0.993 argmax='1'
2026-05-05 23:30:15.547 | INFO | __main__:main:355 - t= 8 pmass=0.787 p_true=0.994 argmax='1'
2026-05-05 23:30:15.547 | INFO | __main__:main:355 - t= 13 pmass=0.750 p_true=0.993 argmax='1'
2026-05-05 23:30:15.548 | INFO | __main__:main:355 - t= 23 pmass=0.642 p_true=0.993 argmax='1'
2026-05-05 23:30:15.548 | INFO | __main__:main:355 - t= 38 pmass=0.592 p_true=0.994 argmax='1'
2026-05-05 23:30:15.548 | INFO | __main__:main:355 - t= 64 pmass=0.507 p_true=0.994 argmax='1'
2026-05-05 23:30:15.548 | INFO | __main__:main:355 - t=108 pmass=0.337 p_true=0.993 argmax='\n'
2026-05-05 23:30:15.548 | INFO | __main__:main:355 - t=181 pmass=0.312 p_true=0.994 argmax='\n'
2026-05-05 23:30:15.548 | INFO | __main__:main:355 - t=304 pmass=0.174 p_true=0.993 argmax='\n'
2026-05-05 23:30:15.548 | INFO | __main__:main:355 - t=512 pmass=0.110 p_true=0.991 argmax='\n'
2026-05-05 23:31:59.456 | INFO | __main__:main:255 - === eval alpha=4.0 c=+9.8708 ===
2026-05-05 23:34:03.208 | INFO | __main__:main:353 - [debug] alpha=4.0 eval[0] gen_len=512 text[:120]=' \n \n\n\n\n\n \n\n\n \n\n\n \n\n\n \n\n\n \n\n\n \n\n\n \n\n\n \n\n\n \n\n\n \n\n\n \n\n\n \n\n\n \n\n\n \n\n\n\n \n\n\n \n\n\n\n\n\n\n \n\n\n\n\n\n\n\n\n\n\n\n\n\n \n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n \n\n'
2026-05-05 23:34:03.208 | INFO | __main__:main:355 - t= 0 pmass=0.011 p_true=0.992 argmax='\n'
2026-05-05 23:34:03.208 | INFO | __main__:main:355 - t= 1 pmass=0.010 p_true=0.992 argmax='\n'
2026-05-05 23:34:03.208 | INFO | __main__:main:355 - t= 2 pmass=0.011 p_true=0.993 argmax='\n'
2026-05-05 23:34:03.208 | INFO | __main__:main:355 - t= 3 pmass=0.011 p_true=0.992 argmax='\n'
2026-05-05 23:34:03.208 | INFO | __main__:main:355 - t= 5 pmass=0.010 p_true=0.992 argmax='\n'
2026-05-05 23:34:03.208 | INFO | __main__:main:355 - t= 8 pmass=0.009 p_true=0.992 argmax='\n'
2026-05-05 23:34:03.208 | INFO | __main__:main:355 - t= 13 pmass=0.008 p_true=0.992 argmax='\n'
2026-05-05 23:34:03.208 | INFO | __main__:main:355 - t= 23 pmass=0.008 p_true=0.992 argmax='\n'
2026-05-05 23:34:03.209 | INFO | __main__:main:355 - t= 38 pmass=0.007 p_true=0.992 argmax='\n'
2026-05-05 23:34:03.209 | INFO | __main__:main:355 - t= 64 pmass=0.005 p_true=0.992 argmax='\n'
2026-05-05 23:34:03.209 | INFO | __main__:main:355 - t=108 pmass=0.004 p_true=0.992 argmax='\n'
2026-05-05 23:34:03.209 | INFO | __main__:main:355 - t=181 pmass=0.003 p_true=0.992 argmax='\n'
2026-05-05 23:34:03.209 | INFO | __main__:main:355 - t=304 pmass=0.003 p_true=0.991 argmax='\n'
2026-05-05 23:34:03.209 | INFO | __main__:main:355 - t=512 pmass=0.002 p_true=0.991 argmax='\n'
2026-05-05 23:35:47.209 | INFO | __main__:main:406 - DONE -> outputs_qwen35_w512_v3/Qwen3.5-0.8B_directional_ablation_s2_w512
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,9 @@
{
"c_star": 2.0721502863934007,
"target_kl": 1.0,
"window": 512,
"method": "mean_diff",
"model": "Qwen/Qwen3.5-0.8B",
"seed": 0,
"layer": 14
}
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff

Some files were not shown because too many files have changed in this diff Show More