mirror of
https://github.com/wassname/Brukino_AntiPaSTO_Appetizer.git
synced 2026-06-27 16:58:47 +08:00
Rename project to Brukino's AntiPaSTO Appetizer
This commit is contained in:
+20
-12
@@ -2,10 +2,10 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2dc7c826",
|
||||
"id": "4f22075e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Guided CoT Eval & Frenet-Serret Curvature\n",
|
||||
"# Brukino's AntiPaSTO Appetizer: Guided CoT Eval & Frenet-Serret Curvature\n",
|
||||
"\n",
|
||||
"Testing if $\\kappa$ spikes late in the Chain of Thought when the model's criterion shifts.\n",
|
||||
"*Note: Using `Qwen2.5-0.5B-Instruct` as `Qwen3.5-0.8B` is not publicly available on HuggingFace.*\n"
|
||||
@@ -14,7 +14,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "11ff7ad3",
|
||||
"id": "fdfdeb3c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -32,13 +32,13 @@
|
||||
"DATASET_SPLIT = \"honesty_eval\"\n",
|
||||
"DEVICE = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
|
||||
"N_THINK_TOKENS = 32\n",
|
||||
"NUM_EXAMPLES = 5 \n"
|
||||
"NUM_EXAMPLES = 5 "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "bf833680",
|
||||
"id": "fb3c4b88",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -59,13 +59,14 @@
|
||||
" norm_dd_gamma = torch.norm(dd_gamma, dim=1)\n",
|
||||
" \n",
|
||||
" kappa = norm_dd_gamma / (norm_d_gamma ** 3 + 1e-12)\n",
|
||||
" return kappa\n"
|
||||
" return kappa\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "227501af",
|
||||
"id": "2e52f347",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -110,14 +111,17 @@
|
||||
" \"logratio\": (p_yes - p_no).item(),\n",
|
||||
" \"kappa_trajectory\": compute_curvature(cot_hiddens).cpu().numpy(),\n",
|
||||
" \"generated_text\": tokenizer.decode(generated_ids, skip_special_tokens=True)\n",
|
||||
" }\n"
|
||||
" }\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7cea1129",
|
||||
"metadata": {},
|
||||
"id": "dc78efbb",
|
||||
"metadata": {
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Load model and data\n",
|
||||
@@ -161,11 +165,15 @@
|
||||
"plt.ylabel(r\"$\\kappa(t)$\")\n",
|
||||
"plt.legend()\n",
|
||||
"plt.savefig(\"kappa_trajectory.png\")\n",
|
||||
"print(\"\\nPlot saved to kappa_trajectory.png\")\n"
|
||||
"print(\"\\nPlot saved to kappa_trajectory.png\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"jupytext": {
|
||||
"main_language": "python"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user