mirror of
https://github.com/wassname/Brukino_AntiPaSTO_Appetizer.git
synced 2026-06-27 15:43:29 +08:00
Increase S-space top_k to 1024
This commit is contained in:
+6
-6
@@ -2,7 +2,7 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b8a288c6",
|
||||
"id": "62eec772",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Brukino's AntiPaSTO Appetizer: Guided CoT Eval & Frenet-Serret Curvature\n",
|
||||
@@ -20,7 +20,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e8c081e6",
|
||||
"id": "47c7efe2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -44,11 +44,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7b5b34e4",
|
||||
"id": "8c590e6c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def get_s_space_projector(model, top_k=256):\n",
|
||||
"def get_s_space_projector(model, top_k=1024):\n",
|
||||
" \"\"\"\n",
|
||||
" Gathers all weight matrices that write to the residual stream\n",
|
||||
" (o_proj from attention and down_proj from MLP) across all layers,\n",
|
||||
@@ -123,7 +123,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ab5130a9",
|
||||
"id": "6ae905b2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -205,7 +205,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "96c84bd3",
|
||||
"id": "30e7fb4e",
|
||||
"metadata": {
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
|
||||
+1
-1
@@ -39,7 +39,7 @@ NUM_EXAMPLES = 3
|
||||
|
||||
|
||||
# %%
|
||||
def get_s_space_projector(model, top_k=256):
|
||||
def get_s_space_projector(model, top_k=1024):
|
||||
"""
|
||||
Gathers all weight matrices that write to the residual stream
|
||||
(o_proj from attention and down_proj from MLP) across all layers,
|
||||
|
||||
Reference in New Issue
Block a user