mirror of
https://github.com/wassname/Brukino_AntiPaSTO_Appetizer.git
synced 2026-06-27 17:13:50 +08:00
Increase S-space top_k to 1024
This commit is contained in:
+6
-6
@@ -2,7 +2,7 @@
|
|||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"id": "b8a288c6",
|
"id": "62eec772",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"# Brukino's AntiPaSTO Appetizer: Guided CoT Eval & Frenet-Serret Curvature\n",
|
"# Brukino's AntiPaSTO Appetizer: Guided CoT Eval & Frenet-Serret Curvature\n",
|
||||||
@@ -20,7 +20,7 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"id": "e8c081e6",
|
"id": "47c7efe2",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@@ -44,11 +44,11 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"id": "7b5b34e4",
|
"id": "8c590e6c",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"def get_s_space_projector(model, top_k=256):\n",
|
"def get_s_space_projector(model, top_k=1024):\n",
|
||||||
" \"\"\"\n",
|
" \"\"\"\n",
|
||||||
" Gathers all weight matrices that write to the residual stream\n",
|
" Gathers all weight matrices that write to the residual stream\n",
|
||||||
" (o_proj from attention and down_proj from MLP) across all layers,\n",
|
" (o_proj from attention and down_proj from MLP) across all layers,\n",
|
||||||
@@ -123,7 +123,7 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"id": "ab5130a9",
|
"id": "6ae905b2",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@@ -205,7 +205,7 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"id": "96c84bd3",
|
"id": "30e7fb4e",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"lines_to_next_cell": 2
|
"lines_to_next_cell": 2
|
||||||
},
|
},
|
||||||
|
|||||||
+1
-1
@@ -39,7 +39,7 @@ NUM_EXAMPLES = 3
|
|||||||
|
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
def get_s_space_projector(model, top_k=256):
|
def get_s_space_projector(model, top_k=1024):
|
||||||
"""
|
"""
|
||||||
Gathers all weight matrices that write to the residual stream
|
Gathers all weight matrices that write to the residual stream
|
||||||
(o_proj from attention and down_proj from MLP) across all layers,
|
(o_proj from attention and down_proj from MLP) across all layers,
|
||||||
|
|||||||
Reference in New Issue
Block a user