mirror of
https://github.com/wassname/pi-auto-compressor.git
synced 2026-06-27 16:46:09 +08:00
feat: Add early trigger, update state logic and README.md
This commit is contained in:
@@ -0,0 +1,29 @@
|
||||
# Pi Auto-Compressor
|
||||
|
||||
A lightweight, invisible background extension for the Pi coding agent that automatically manages your context window using a Hermes-style "middle-slice" compression strategy.
|
||||
|
||||
## How it works (Hermes-Style)
|
||||
Unlike Pi's built-in `/compact` command (which flattens your entire conversation history in the database), the Auto-Compressor runs entirely in the background during the `context` event before the LLM even sees the prompt.
|
||||
|
||||
When your context size exceeds the soft threshold (default 50% of max context window):
|
||||
1. **Middle Slicing:** It safely carves out the "middle" of your conversation using token math, preserving the System Prompt (the head) and your most recent messages (the tail). It never splits `tool_call` and `tool_result` pairs.
|
||||
2. **Background Summarization:** It passes that middle slice to a fast/cheap LLM (like Gemini Flash) to build a structured "Context Checkpoint" (Goals, Progress, Blockers, Key Decisions).
|
||||
3. **Seamless Replacement:** It replaces the raw middle slice in the context window with the generated summary, preceded by a `[CONTEXT COMPACTION — REFERENCE ONLY]` tag.
|
||||
|
||||
The main agent never gets bogged down by huge logs, and your API calls stay cheap, but your full history remains intact in Pi's database!
|
||||
|
||||
## Built-in Sweeping
|
||||
The extension also continuously sweeps tool outputs in the background:
|
||||
- **Deduplication:** If a tool is called multiple times with the exact same arguments (e.g. `ls` or `cat` on the same file), it replaces older duplicate outputs with a small placeholder tombstone.
|
||||
- **Error Purging:** If a tool fails, the error stays in context for a few turns so the agent can fix it, but is then purged to keep the context clean of dead failure traces.
|
||||
|
||||
## Commands
|
||||
You don't *need* to use any commands—the extension runs automatically. However, if you want to inspect its behavior or trigger it manually, use the `/acp` command:
|
||||
|
||||
- `/acp` - Show stats on how many tokens have been saved, tools deduplicated, and whether a summary currently exists.
|
||||
- `/acp compress` - Force a context compression on the next turn, regardless of token thresholds.
|
||||
|
||||
## Compatibility with `/compact`
|
||||
This extension **does not conflict** with Pi's built-in `/compact` command.
|
||||
- **`/compact`**: Destructively modifies your actual session branch in the database, squashing history into a single node.
|
||||
- **Auto-Compressor**: Ephemeral modification of the context array sent to the API. It saves tokens dynamically without destroying your local branch history.
|
||||
@@ -84,8 +84,15 @@ export default function (pi: ExtensionAPI) {
|
||||
})
|
||||
|
||||
pi.registerCommand("acp", {
|
||||
description: "Auto-Compressor stats",
|
||||
description: "Auto-Compressor stats and manual trigger",
|
||||
async handler(args, ctx) {
|
||||
const argsStr = args.trim().toLowerCase();
|
||||
if (argsStr === "compress") {
|
||||
state.forceCompressNext = true;
|
||||
ctx.ui.notify("Manual compression triggered. It will run in the background on the next agent turn.", "info");
|
||||
return;
|
||||
}
|
||||
|
||||
const usage = ctx.getContextUsage ? ctx.getContextUsage() : null;
|
||||
let tokenStr = "unavailable";
|
||||
if (usage && usage.tokens !== null) {
|
||||
@@ -99,7 +106,9 @@ export default function (pi: ExtensionAPI) {
|
||||
` Pruned Tool Outputs (Deduplication/Errors): ${state.prunedToolIds.size}`,
|
||||
` Current User Turn: ${state.currentTurn}`,
|
||||
` Summary Exists (Has Compressed): ${state.previousSummary !== null ? "Yes" : "No"}`,
|
||||
` Current Context Tokens: ${tokenStr}`
|
||||
` Current Context Tokens: ${tokenStr}`,
|
||||
"",
|
||||
"Type '/acp compress' to force a compression on the next turn."
|
||||
];
|
||||
ctx.ui.notify(lines.join("\n"), "info");
|
||||
}
|
||||
|
||||
@@ -358,7 +358,11 @@ export async function applyPruning(
|
||||
AUTO_COMPRESS_CONFIG.minimumContextLength
|
||||
);
|
||||
|
||||
if (totalTokens >= thresholdTokens && msgs.length > AUTO_COMPRESS_CONFIG.protectFirstN + 4) {
|
||||
if (
|
||||
state.forceCompressNext ||
|
||||
(totalTokens >= thresholdTokens && msgs.length > AUTO_COMPRESS_CONFIG.protectFirstN + 4)
|
||||
) {
|
||||
state.forceCompressNext = false;
|
||||
const tailBudget = Math.floor(thresholdTokens * AUTO_COMPRESS_CONFIG.summaryTargetRatio);
|
||||
const compressStart = alignBoundaryForward(msgs, AUTO_COMPRESS_CONFIG.protectFirstN);
|
||||
const compressEnd = findTailCutByTokens(msgs, compressStart, tailBudget);
|
||||
|
||||
@@ -17,6 +17,7 @@ export interface DcpState {
|
||||
totalPruneCount: number
|
||||
previousSummary: string | null
|
||||
compressionCount: number
|
||||
forceCompressNext?: boolean
|
||||
}
|
||||
|
||||
export function createState(): DcpState {
|
||||
@@ -28,6 +29,7 @@ export function createState(): DcpState {
|
||||
totalPruneCount: 0,
|
||||
previousSummary: null,
|
||||
compressionCount: 0,
|
||||
forceCompressNext: false,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -39,6 +41,7 @@ export function resetState(state: DcpState): void {
|
||||
state.totalPruneCount = 0
|
||||
state.previousSummary = null
|
||||
state.compressionCount = 0
|
||||
state.forceCompressNext = false
|
||||
}
|
||||
|
||||
function sortObjectKeys(value: unknown): unknown {
|
||||
|
||||
Reference in New Issue
Block a user