diff --git a/README.md b/README.md new file mode 100644 index 0000000..b1c8964 --- /dev/null +++ b/README.md @@ -0,0 +1,29 @@ +# Pi Auto-Compressor + +A lightweight, invisible background extension for the Pi coding agent that automatically manages your context window using a Hermes-style "middle-slice" compression strategy. + +## How it works (Hermes-Style) +Unlike Pi's built-in `/compact` command (which flattens your entire conversation history in the database), the Auto-Compressor runs entirely in the background during the `context` event before the LLM even sees the prompt. + +When your context size exceeds the soft threshold (default 50% of max context window): +1. **Middle Slicing:** It safely carves out the "middle" of your conversation using token math, preserving the System Prompt (the head) and your most recent messages (the tail). It never splits `tool_call` and `tool_result` pairs. +2. **Background Summarization:** It passes that middle slice to a fast/cheap LLM (like Gemini Flash) to build a structured "Context Checkpoint" (Goals, Progress, Blockers, Key Decisions). +3. **Seamless Replacement:** It replaces the raw middle slice in the context window with the generated summary, preceded by a `[CONTEXT COMPACTION — REFERENCE ONLY]` tag. + +The main agent never gets bogged down by huge logs, and your API calls stay cheap, but your full history remains intact in Pi's database! + +## Built-in Sweeping +The extension also continuously sweeps tool outputs in the background: +- **Deduplication:** If a tool is called multiple times with the exact same arguments (e.g. `ls` or `cat` on the same file), it replaces older duplicate outputs with a small placeholder tombstone. +- **Error Purging:** If a tool fails, the error stays in context for a few turns so the agent can fix it, but is then purged to keep the context clean of dead failure traces. + +## Commands +You don't *need* to use any commands—the extension runs automatically. However, if you want to inspect its behavior or trigger it manually, use the `/acp` command: + +- `/acp` - Show stats on how many tokens have been saved, tools deduplicated, and whether a summary currently exists. +- `/acp compress` - Force a context compression on the next turn, regardless of token thresholds. + +## Compatibility with `/compact` +This extension **does not conflict** with Pi's built-in `/compact` command. +- **`/compact`**: Destructively modifies your actual session branch in the database, squashing history into a single node. +- **Auto-Compressor**: Ephemeral modification of the context array sent to the API. It saves tokens dynamically without destroying your local branch history. diff --git a/index.ts b/index.ts index 571973e..a7c19f3 100644 --- a/index.ts +++ b/index.ts @@ -84,8 +84,15 @@ export default function (pi: ExtensionAPI) { }) pi.registerCommand("acp", { - description: "Auto-Compressor stats", + description: "Auto-Compressor stats and manual trigger", async handler(args, ctx) { + const argsStr = args.trim().toLowerCase(); + if (argsStr === "compress") { + state.forceCompressNext = true; + ctx.ui.notify("Manual compression triggered. It will run in the background on the next agent turn.", "info"); + return; + } + const usage = ctx.getContextUsage ? ctx.getContextUsage() : null; let tokenStr = "unavailable"; if (usage && usage.tokens !== null) { @@ -99,7 +106,9 @@ export default function (pi: ExtensionAPI) { ` Pruned Tool Outputs (Deduplication/Errors): ${state.prunedToolIds.size}`, ` Current User Turn: ${state.currentTurn}`, ` Summary Exists (Has Compressed): ${state.previousSummary !== null ? "Yes" : "No"}`, - ` Current Context Tokens: ${tokenStr}` + ` Current Context Tokens: ${tokenStr}`, + "", + "Type '/acp compress' to force a compression on the next turn." ]; ctx.ui.notify(lines.join("\n"), "info"); } diff --git a/pruner.ts b/pruner.ts index d28260a..69c3234 100644 --- a/pruner.ts +++ b/pruner.ts @@ -358,7 +358,11 @@ export async function applyPruning( AUTO_COMPRESS_CONFIG.minimumContextLength ); - if (totalTokens >= thresholdTokens && msgs.length > AUTO_COMPRESS_CONFIG.protectFirstN + 4) { + if ( + state.forceCompressNext || + (totalTokens >= thresholdTokens && msgs.length > AUTO_COMPRESS_CONFIG.protectFirstN + 4) + ) { + state.forceCompressNext = false; const tailBudget = Math.floor(thresholdTokens * AUTO_COMPRESS_CONFIG.summaryTargetRatio); const compressStart = alignBoundaryForward(msgs, AUTO_COMPRESS_CONFIG.protectFirstN); const compressEnd = findTailCutByTokens(msgs, compressStart, tailBudget); diff --git a/state.ts b/state.ts index de06afd..9ae21f5 100644 --- a/state.ts +++ b/state.ts @@ -17,6 +17,7 @@ export interface DcpState { totalPruneCount: number previousSummary: string | null compressionCount: number + forceCompressNext?: boolean } export function createState(): DcpState { @@ -28,6 +29,7 @@ export function createState(): DcpState { totalPruneCount: 0, previousSummary: null, compressionCount: 0, + forceCompressNext: false, } } @@ -39,6 +41,7 @@ export function resetState(state: DcpState): void { state.totalPruneCount = 0 state.previousSummary = null state.compressionCount = 0 + state.forceCompressNext = false } function sortObjectKeys(value: unknown): unknown {