fix: route hermes compression through pi compaction

2026-06-27 15:16:08 +08:00 · 2026-04-23 19:28:43 +08:00
parent 23d837a344
commit e16c804d44
6 changed files with 407 additions and 170 deletions
@@ -2,28 +2,34 @@

 A lightweight, invisible background extension for the Pi coding agent that automatically manages your context window using a Hermes-style "middle-slice" compression strategy.

+## Design References
+- Hermes context compression and caching: https://hermes-agent.nousresearch.com/docs/developer-guide/context-compression-and-caching
+- Pi extension API: https://github.com/badlogic/pi-mono/blob/main/packages/coding-agent/docs/extensions.md
+
 ## How it works (Hermes-Style)
-Unlike Pi's built-in `/compact` command (which flattens your entire conversation history in the database), the Auto-Compressor runs entirely in the background during the `context` event before the LLM even sees the prompt.
+The Auto-Compressor uses Pi's extension hooks for two small jobs: lightweight request-context tool sweeping, and a custom Hermes-style implementation of Pi compaction. If Hermes compaction cannot produce a summary, compaction is cancelled rather than falling back to Pi's default summarizer.

 When your context size exceeds the soft threshold (default 50% of max context window):
-1. **Middle Slicing:** It safely carves out the "middle" of your conversation using token math, preserving the System Prompt (the head) and your most recent messages (the tail). It never splits `tool_call` and `tool_result` pairs.
-2. **Background Summarization:** It passes that middle slice to a fast/cheap LLM (like Gemini Flash) to build a structured "Context Checkpoint" (Goals, Progress, Blockers, Key Decisions).
-3. **Seamless Replacement:** It replaces the raw middle slice in the context window with the generated summary, preceded by a `[CONTEXT COMPACTION — REFERENCE ONLY]` tag.
+1. **Tool Sweeping:** It replaces old, large tool outputs outside the protected head/tail with tombstones while keeping tool result messages in place.
+2. **Middle Slicing:** It overrides Pi compaction with a Hermes-style summary of the middle/old context while preserving Pi's recent-tail handling.
+3. **Background Summarization:** It passes that middle slice to the active model to build a structured "Context Checkpoint" (Goals, Progress, Blockers, Key Decisions).
+4. **Seamless Replacement:** Pi stores the custom Hermes summary as the compaction entry and reloads the session context safely.

 The main agent never gets bogged down by huge logs, and your API calls stay cheap, but your full history remains intact in Pi's database!

 ## Built-in Sweeping
 The extension also continuously sweeps tool outputs in the background:
- **Deduplication:** If a tool is called multiple times with the exact same arguments (e.g. `ls` or `cat` on the same file), it replaces older duplicate outputs with a small placeholder tombstone.
- **Error Purging:** If a tool fails, the error stays in context for a few turns so the agent can fix it, but is then purged to keep the context clean of dead failure traces.
+- **Protected Tail:** Recent messages are left intact so the agent can still reason from fresh evidence.
+- **Tombstones:** Old, large tool outputs are replaced with compact tombstones instead of deleting tool result messages.
+- **Pair Safety:** Tool call/result pairs are sanitized so provider invariants stay valid.

 ## Commands
 You don't *need* to use any commands—the extension runs automatically. However, if you want to inspect its behavior or trigger it manually, use the `/acp` command:

- `/acp` - Show stats on how many tokens have been saved, tools deduplicated, and whether a summary currently exists.
- `/acp compress` - Force a context compression on the next turn, regardless of token thresholds.
+- `/acp` - Show stats on how many tokens have been saved, tool outputs swept, and whether a summary currently exists.
+- `/acp compress` - Run Hermes middle compaction through Pi's compaction lifecycle. This can only run between turns and reports success or failure.

 ## Compatibility with `/compact`
 This extension **does not conflict** with Pi's built-in `/compact` command.
 - **`/compact`**: Destructively modifies your actual session branch in the database, squashing history into a single node.
- **Auto-Compressor**: Ephemeral modification of the context array sent to the API. It saves tokens dynamically without destroying your local branch history.
+- **Auto-Compressor**: Sweeps tool outputs ephemerally in request context, and overrides Pi compaction with a Hermes middle-summary when compaction runs.
@@ -1,14 +1,11 @@
-import * as fs from "node:fs"
-import * as path from "node:path"
-import * as os from "node:os"
-import { parse as parseJsonc } from "jsonc-parser"
-
 export const AUTO_COMPRESS_CONFIG = {
  thresholdPercent: 0.50,      // compress when tokens > 50% of context window
  minimumContextLength: 64000, // never compress below this threshold
  protectFirstN: 3,            // messages: system prompt + first exchange
+  protectLastN: 20,            // keep recent context intact
  summaryTargetRatio: 0.20,    // tail budget = threshold * 0.20
  charsPerToken: 4,            // rough estimate
+  minToolOutputPruneChars: 200,
 };

 export interface DcpConfig {
@@ -0,0 +1,46 @@
+# Fix Tool Output Pruning
+
+## Goal
+Prevent pi-auto-compressor from removing valid Pi tool results from the model context.
+
+## Scope
+In: Pi native assistant tool-call content blocks, tool-result pairing, token estimates, summary serialization, protected-tail tool sweeping, Hermes compaction override status.
+Out: Changing compression thresholds or persistent session storage.
+
+## Requirements
+- R1: Preserve valid tool results. Done means: an assistant `content` block with `type: "toolCall"` and matching `toolResult.toolCallId` survives `applyPruning()`. VERIFY: focused script reports the matching tool result is present.
+- R2: Preserve pair safety during compression. Done means: boundary alignment recognizes `toolResult` messages. VERIFY: TypeScript compiles and the helper paths use Pi message roles.
+- R3: Keep legacy compatibility. Done means: existing OpenAI-style `tool_calls` fallback still works when present. VERIFY: helper handles both shapes.
+- R4: Manual Hermes compaction runs through Pi's compaction lifecycle. Done means: `/acp compress` refuses active/queued runs, calls `ctx.compact()`, and `session_before_compact` supplies a custom Hermes `CompactionResult` or cancels. VERIFY: TypeScript compiles and subagent review finds no blocker.
+- R5: Tool sweeping is simple and tail-safe. Done means: old, large tool outputs outside the protected tail are tombstoned while recent tail outputs are preserved. VERIFY: focused scripts show an old 250-char result is swept and a tail 250-char result is kept.
+
+## Tasks
+- [x] T1 (R1-R3): Patch `pruner.ts`.
+  - verify: `npx tsc --noEmit`
+  - success: no TypeScript errors.
+  - likely_fail: tool result still filtered because assistant IDs are not collected.
+  - sneaky_fail: assistant tool-call block is kept without a corresponding result after pruning.
+- [x] T2 (R1): Run a focused regression script against `applyPruning()`.
+  - verify: script prints preserved tool result count and content.
+  - success: `toolResults=1` and output text is visible.
+  - likely_fail: `toolResults=0`.
+  - sneaky_fail: result exists but text is the pruning placeholder.
+- [x] T3 (R4): Move Hermes compression into `session_before_compact`.
+  - verify: `npx tsc --noEmit`; focused scripts for forced too-short and summary-failed paths.
+  - success: `/acp compress` calls Pi compaction and the extension overrides it with Hermes summary.
+  - likely_fail: hook returns nothing and Pi default compaction runs.
+  - sneaky_fail: truncated/error summary is accepted as successful compaction.
+- [x] T4 (R5): Add protected-tail tool sweeping.
+  - verify: focused old-output and tail-output scripts.
+  - success: old output prints a tombstone; tail output length remains 250.
+  - likely_fail: recent tool output is swept.
+  - sneaky_fail: swept tool result is deleted instead of tombstoned.
+
+## Log
+- Pi messages use assistant `content` blocks with `type: "toolCall"`; they do not use `msg.tool_calls` as the primary shape.
+- `npx tsc --noEmit` passed.
+- Regression script output: `toolResults=1`, `assistantCalls=1`, text `README.md\npruner.ts\n`.
+- Hermes compaction now uses Pi's `session_before_compact` hook. The context hook only performs tool sweeping.
+- The compaction override cancels on no model, auth failure, thrown errors, empty summaries, or non-`stop` summary responses; it does not intentionally fall back to Pi default compaction.
+- Tool sweep script output: old 250-char `bash` result becomes `[Tool output swept: ...]`; protected-tail 250-char result remains length 250.
+- Subagent review found and fixes addressed: reject truncated/error summary responses, avoid tombstoning currently protected-tail messages even when ID was previously marked, and wrap compaction hook with cancel-on-error.
@@ -5,7 +5,7 @@ import {
  resetState,
  createInputFingerprint,
 } from "./state.js"
-import { applyPruning } from "./pruner.js"
+import { applyPruning, generateHermesSummary } from "./pruner.js"

 export default function (pi: ExtensionAPI) {
  const config = loadConfig(process.cwd())
@@ -65,6 +65,7 @@ export default function (pi: ExtensionAPI) {
        if (data?.compressionCount) state.compressionCount = data.compressionCount
        if (data?.tokensSaved) state.tokensSaved = data.tokensSaved
        if (data?.prunedToolIds) state.prunedToolIds = new Set(data.prunedToolIds)
+        if (data?.lastCompressionStatus) state.lastCompressionStatus = data.lastCompressionStatus
      }
    }
  })
@@ -75,12 +76,102 @@ export default function (pi: ExtensionAPI) {
      compressionCount: state.compressionCount,
      tokensSaved: state.tokensSaved,
      prunedToolIds: Array.from(state.prunedToolIds),
+      lastCompressionStatus: state.lastCompressionStatus,
    })
  })
+
+  pi.on("session_before_compact", async (event, ctx) => {
+    try {
+      const model = ctx.model
+      if (!model) {
+        const message = "Hermes compaction cancelled: no model selected."
+        state.lastCompressionStatus = message
+        ctx.ui.notify(message, "warning")
+        return { cancel: true }
+      }
+
+      const auth = await ctx.modelRegistry.getApiKeyAndHeaders(model)
+      if (!auth.ok) {
+        const message = `Hermes compaction cancelled: ${auth.error}`
+        state.lastCompressionStatus = message
+        ctx.ui.notify(message, "warning")
+        return { cancel: true }
+      }
+
+      const { preparation, signal } = event
+      const messagesToSummarize = [
+        ...preparation.messagesToSummarize,
+        ...preparation.turnPrefixMessages,
+      ]
+
+      ctx.ui.notify(
+        `Hermes compaction: summarizing ${messagesToSummarize.length} messages (${preparation.tokensBefore.toLocaleString()} tokens)...`,
+        "info",
+      )
+
+      const result = await generateHermesSummary(
+        messagesToSummarize,
+        preparation.previousSummary ?? null,
+        event.customInstructions ?? null,
+        model,
+        {
+          apiKey: auth.apiKey,
+          headers: auth.headers,
+          signal,
+          maxTokens: 8192,
+        },
+      )
+
+      if (!result.ok || !result.summary) {
+        if (!signal.aborted) {
+          const message = `Hermes compaction cancelled: ${result.error ?? "empty summary"}`
+          state.lastCompressionStatus = message
+          ctx.ui.notify(message, "warning")
+        }
+        return { cancel: true }
+      }
+
+      state.previousSummary = result.summary
+      state.lastCompressionStatus =
+        `Hermes compaction ready: summarized ${messagesToSummarize.length} messages (${preparation.tokensBefore.toLocaleString()} tokens)`
+
+      return {
+        compaction: {
+          summary: result.summary,
+          firstKeptEntryId: preparation.firstKeptEntryId,
+          tokensBefore: preparation.tokensBefore,
+          details: {
+            kind: "hermes-middle",
+            sweptToolOutputs: state.prunedToolIds.size,
+          },
+        },
+      }
+    } catch (error) {
+      const message = `Hermes compaction cancelled: ${error instanceof Error ? error.message : String(error)}`
+      state.lastCompressionStatus = message
+      try {
+        ctx.ui.notify(message, "warning")
+      } catch {
+        // Ignore UI failures; cancellation is the important safety behavior.
+      }
+      return { cancel: true }
+    }
+  })
+
+  pi.on("session_compact", async (event, ctx) => {
+    if (event.fromExtension) {
+      state.compressionCount++
+      state.lastCompressionStatus = "Hermes compaction completed"
+      if (ctx.hasUI) ctx.ui.notify("Hermes compaction completed", "info")
+    }
+  })
  
  pi.on("context", async (event, ctx) => {
-    const prunedMessages = await applyPruning(event.messages, state, config, ctx.model)
-    return { messages: prunedMessages }
+    const result = await applyPruning(event.messages, state, config)
+    if (result.outcome) {
+      state.lastCompressionStatus = result.outcome.message
+    }
+    return { messages: result.messages }
  })

  pi.registerCommand("acp", {
@@ -88,8 +179,27 @@ export default function (pi: ExtensionAPI) {
    async handler(args, ctx) {
      const argsStr = args.trim().toLowerCase();
      if (argsStr === "compress") {
-        state.forceCompressNext = true;
-        ctx.ui.notify("Manual compression scheduled. It will run when you send your next message.", "info");
+        if (!ctx.isIdle()) {
+          ctx.ui.notify("Manual Hermes compaction can only run between turns; the agent is currently running.", "warning");
+          return;
+        }
+        if (ctx.hasPendingMessages()) {
+          ctx.ui.notify("Manual Hermes compaction can only run when there are no queued messages.", "warning");
+          return;
+        }
+        state.lastCompressionStatus = "Manual Hermes compaction started"
+        ctx.ui.notify("Manual Hermes compaction started", "info")
+        ctx.compact({
+          onComplete: () => {
+            state.lastCompressionStatus = "Manual Hermes compaction completed"
+            ctx.ui.notify("Manual Hermes compaction completed", "info")
+          },
+          onError: (error) => {
+            const message = `Manual Hermes compaction failed: ${error.message}`
+            state.lastCompressionStatus = message
+            ctx.ui.notify(message, "error")
+          },
+        })
        return;
      }
      
@@ -112,17 +222,18 @@ export default function (pi: ExtensionAPI) {
      const lines = [
        `Auto-Compressor (Hermes) Stats:`,
        `   Total Compressions: ${state.compressionCount}`,
-        `   Pending Compression: ${state.forceCompressNext ? "YES (Scheduled for next turn)" : "No"}`,
+        `   Pending Compression: No`,
        `   Tokens Saved (Compaction): ~${state.tokensSaved.toLocaleString()}`,
        `   Tokens Saved (Tool Pruning): ~${prunedToolTokens.toLocaleString()}`,
        `   Total Tool Calls Tracked: ${state.toolCalls.size}`,
-        `   Pruned Tool Outputs (Deduplication/Errors): ${state.prunedToolIds.size}`,
+        `   Swept Tool Outputs: ${state.prunedToolIds.size}`,
        `   Total Tool Tokens Generated: ~${totalToolTokens.toLocaleString()}`,
        `   Current User Turn: ${state.currentTurn}`,
        `   Summary Exists (Has Compressed): ${state.previousSummary !== null ? "Yes" : "No"}`,
+        `   Last Compression Status: ${state.lastCompressionStatus ?? "None"}`,
        `   Current Context Tokens: ${tokenStr}`,
        "",
-        "Type '/acp compress' to force a compression on the next turn."
+        "Type '/acp compress' between turns to run Hermes middle compaction."
      ];
      ctx.ui.notify(lines.join("\n"), "info");
    }
@@ -1,7 +1,118 @@
 import type { DcpState } from "./state.js"
 import { type DcpConfig, AUTO_COMPRESS_CONFIG } from "./config.js"

-const ALWAYS_PROTECTED_DEDUP = new Set(["compress", "write", "edit"]);
+const ALWAYS_PROTECTED_TOOLS = new Set(["compress", "write", "edit"]);
+
+export interface PruningOutcome {
+  kind: "compressed" | "skipped" | "failed"
+  message: string
+  tokensSaved?: number
+}
+
+export interface ApplyPruningResult {
+  messages: any[]
+  outcome?: PruningOutcome
+}
+
+export interface SummaryAuth {
+  apiKey?: string
+  headers?: Record<string, string>
+  [key: string]: unknown
+}
+
+export interface SummaryResult {
+  ok: boolean
+  summary?: string
+  error?: string
+}
+
+function isToolResultMessage(msg: any): boolean {
+  return msg?.role === "toolResult" || msg?.role === "tool";
+}
+
+function getToolResultId(msg: any): string | undefined {
+  return msg?.toolCallId || msg?.tool_call_id;
+}
+
+function getAssistantToolCalls(msg: any): any[] {
+  const calls: any[] = [];
+
+  if (Array.isArray(msg?.content)) {
+    for (const block of msg.content) {
+      if (block?.type === "toolCall") calls.push(block);
+    }
+  }
+
+  if (Array.isArray(msg?.tool_calls)) {
+    calls.push(...msg.tool_calls);
+  }
+
+  return calls;
+}
+
+function getToolCallId(call: any): string | undefined {
+  return call?.id;
+}
+
+function getToolCallName(call: any): string {
+  return call?.name || call?.function?.name || "?";
+}
+
+function getToolCallArgsText(call: any): string {
+  const args = call?.arguments ?? call?.function?.arguments ?? "";
+  return typeof args === "string" ? args : JSON.stringify(args);
+}
+
+function getMessageText(msg: any): string {
+  const content = msg?.content;
+  if (typeof content === "string") return content;
+  if (!Array.isArray(content)) return "";
+  return content
+    .map((part: any) => {
+      if (typeof part?.text === "string") return part.text;
+      if (typeof part?.thinking === "string") return part.thinking;
+      return "";
+    })
+    .join("");
+}
+
+function getToolOutputChars(msg: any): number {
+  return getMessageText(msg).length;
+}
+
+function textContent(text: string): Array<{ type: "text"; text: string }> {
+  return [{ type: "text", text }];
+}
+
+function prependTextContent(content: unknown, text: string): Array<any> {
+  const prefix = { type: "text", text };
+  if (Array.isArray(content)) return [prefix, ...content];
+  if (typeof content === "string" && content.length > 0) {
+    return [prefix, { type: "text", text: content }];
+  }
+  return [prefix];
+}
+
+function makeCompressionOutcome(
+  originalCount: number,
+  compressedCount: number,
+  originalTokens: number,
+  compressedTokens: number,
+): PruningOutcome {
+  const saved = originalTokens - compressedTokens;
+  const delta = saved >= 0 ? `saved ~${saved.toLocaleString()} tokens` : `added ~${Math.abs(saved).toLocaleString()} tokens`;
+  return {
+    kind: "compressed",
+    message: `compressed Hermes request context: ${originalCount} -> ${compressedCount} messages, ${delta}`,
+    tokensSaved: saved,
+  };
+}
+
+function markToolPruned(state: DcpState, id: string | undefined): void {
+  if (!id || state.prunedToolIds.has(id)) return;
+  state.prunedToolIds.add(id);
+  state.totalPruneCount++;
+}

 export function estimateMessageTokens(msg: any): number {
  if (!msg) return 0;
@@ -17,16 +128,18 @@ export function estimateMessageTokens(msg: any): number {
        if (typeof part.text === "string") text += part.text;
        else if (typeof part.thinking === "string") text += part.thinking;
        else if (part.type === "image") text += "image";
+        else if (part.type === "toolCall") text += getToolCallArgsText(part);
      }
    }
  }
  
  let tokens = Math.round(text.length / 4) + 10;
  
-  const toolCalls = msg.tool_calls || [];
-  for (const tc of toolCalls) {
-    const args = tc?.function?.arguments || "";
-    tokens += Math.round(args.length / 4);
+  if (Array.isArray(msg.tool_calls)) {
+    for (const tc of msg.tool_calls) {
+      const args = getToolCallArgsText(tc);
+      tokens += Math.round(args.length / 4);
+    }
  }
  
  return tokens;
@@ -37,7 +150,7 @@ export function estimateMessagesTokens(messages: any[]): number {
 }

 function alignBoundaryForward(messages: any[], idx: number): number {
-  while (idx < messages.length && messages[idx]?.role === "tool") {
+  while (idx < messages.length && isToolResultMessage(messages[idx])) {
    idx++;
  }
  return idx;
@@ -46,10 +159,10 @@ function alignBoundaryForward(messages: any[], idx: number): number {
 function alignBoundaryBackward(messages: any[], idx: number): number {
  if (idx <= 0 || idx >= messages.length) return idx;
  let check = idx - 1;
-  while (check >= 0 && messages[check]?.role === "tool") {
+  while (check >= 0 && isToolResultMessage(messages[check])) {
    check--;
  }
-  if (check >= 0 && messages[check]?.role === "assistant" && messages[check]?.tool_calls) {
+  if (check >= 0 && messages[check]?.role === "assistant" && getAssistantToolCalls(messages[check]).length > 0) {
    idx = check;
  }
  return idx;
@@ -62,7 +175,7 @@ function findTailCutByTokens(
  charsPerToken: number = 4
 ): number {
  const n = messages.length;
-  const minTail = Math.min(3, n - headEnd - 1);
+  const minTail = Math.min(AUTO_COMPRESS_CONFIG.protectLastN, n - headEnd - 1);
  const softCeiling = Math.floor(tokenBudget * 1.5);
  let accumulated = 0;
  let cutIdx = n;
@@ -106,15 +219,15 @@ function serializeForSummary(turns: any[]): string {
      content = content.slice(0, HEAD_KEEP) + "\n...[truncated]...\n" + content.slice(-TAIL_KEEP);
    }
    
-    if (role === "tool" || role === "toolResult") {
-      const toolId = msg.tool_call_id || msg.toolCallId || "";
+    if (isToolResultMessage(msg)) {
+      const toolId = getToolResultId(msg) || "";
      parts.push(`[TOOL RESULT ${toolId}]: ${content}`);
    } else if (role === "assistant") {
-      const toolCalls = msg.tool_calls || [];
+      const toolCalls = getAssistantToolCalls(msg);
      if (toolCalls.length > 0) {
        const tcParts = toolCalls.map((tc: any) => {
-          const name = tc?.function?.name || "?";
-          const args = tc?.function?.arguments || "";
+          const name = getToolCallName(tc);
+          const args = getToolCallArgsText(tc);
          const argsShort = args.length > 150 ? args.slice(0, 120) + "..." : args;
          return `  ${name}(${argsShort})`;
        });
@@ -128,12 +241,13 @@ function serializeForSummary(turns: any[]): string {
  return parts.join("\n\n");
 }

-async function generateSummary(
+export async function generateHermesSummary(
  turns: any[],
  previousSummary: string | null,
  focusTopic: string | null,
  model: any,
-): Promise<string | null> {
+  auth?: SummaryAuth,
+): Promise<SummaryResult> {
  const contentToSummarize = serializeForSummary(turns);
  
  const summarizerPreamble = 
@@ -213,11 +327,18 @@ Prioritize preserving all information related to the focus topic.`;
  }

  try {
-    if (!model) return null;
+    if (!model) return { ok: false, error: "no model available" };
    const piAi = await import("@mariozechner/pi-ai");
    const response = await piAi.complete(model, {
      messages: [{ role: "user", content: prompt, timestamp: Date.now() }]
-    });
+    }, auth);
+
+    if (response.stopReason !== "stop") {
+      return {
+        ok: false,
+        error: `summary generation stopped with ${response.stopReason}${response.errorMessage ? `: ${response.errorMessage}` : ""}`,
+      };
+    }

    let text = "";
    if (Array.isArray(response.content)) {
@@ -229,10 +350,12 @@ Prioritize preserving all information related to the focus topic.`;
      text = (response as any).content;
    }

-    return text.trim() || null;
+    const summary = text.trim();
+    return summary ? { ok: true, summary } : { ok: false, error: "summary generation returned empty text" };
  } catch (e) {
+    const error = e instanceof Error ? e.message : String(e);
    console.error("Summary generation failed:", e);
-    return null;
+    return { ok: false, error };
  }
 }

@@ -240,101 +363,139 @@ function sanitizeToolPairs(messages: any[]): any[] {
  const assistantToolIds = new Set<string>();
  for (const msg of messages) {
    if (msg.role !== "assistant") continue;
-    for (const tc of msg.tool_calls || []) {
-      const id = tc?.id || tc?.function?.name;
+    for (const tc of getAssistantToolCalls(msg)) {
+      const id = getToolCallId(tc);
      if (id) assistantToolIds.add(id);
    }
  }
  
  const resultToolIds = new Set<string>();
  for (const msg of messages) {
-    if (msg.role !== "tool" && msg.role !== "toolResult") continue;
-    const id = msg.tool_call_id || msg.toolCallId;
+    if (!isToolResultMessage(msg)) continue;
+    const id = getToolResultId(msg);
    if (id) resultToolIds.add(id);
  }
  
  const cleaned = messages.filter((msg) => {
-    if (msg.role !== "tool" && msg.role !== "toolResult") return true;
-    const id = msg.tool_call_id || msg.toolCallId;
+    if (!isToolResultMessage(msg)) return true;
+    const id = getToolResultId(msg);
    return !id || assistantToolIds.has(id);
  });
  
  for (const msg of cleaned) {
-    if (msg.role !== "assistant" || !msg.tool_calls) continue;
-    msg.tool_calls = msg.tool_calls.filter((tc: any) => {
-      const id = tc?.id || tc?.function?.name;
-      return !id || resultToolIds.has(id);
-    });
+    if (msg.role !== "assistant") continue;
+    if (Array.isArray(msg.content)) {
+      msg.content = msg.content.filter((block: any) => {
+        if (block?.type !== "toolCall") return true;
+        const id = getToolCallId(block);
+        return !id || resultToolIds.has(id);
+      });
+    }
+    if (Array.isArray(msg.tool_calls)) {
+      msg.tool_calls = msg.tool_calls.filter((tc: any) => {
+        const id = getToolCallId(tc);
+        return !id || resultToolIds.has(id);
+      });
+    }
  }
  
  return cleaned;
 }

-function applyDeduplication(messages: any[], state: DcpState, config: DcpConfig): void {
+function applyDeduplication(messages: any[], state: DcpState, config: DcpConfig, sweepEnd: number): void {
  if (!config.strategies.deduplication.enabled) return;

  const protectedTools = new Set([
-    ...ALWAYS_PROTECTED_DEDUP,
+    ...ALWAYS_PROTECTED_TOOLS,
    ...(config.strategies.deduplication.protectedTools ?? []),
  ]);

  const fingerprintMap = new Map<string, string[]>();

-  for (const msg of messages) {
-    if (msg.role !== "toolResult") continue;
+  for (let i = AUTO_COMPRESS_CONFIG.protectFirstN; i < sweepEnd; i++) {
+    const msg = messages[i];
+    if (!isToolResultMessage(msg)) continue;
    const toolName: string = msg.toolName ?? "";
    if (protectedTools.has(toolName)) continue;
+    if (getToolOutputChars(msg) < AUTO_COMPRESS_CONFIG.minToolOutputPruneChars) continue;

-    const record = state.toolCalls.get(msg.toolCallId || msg.tool_call_id);
+    const record = state.toolCalls.get(getToolResultId(msg) || "");
    if (!record) continue;

    const fp = record.inputFingerprint;
    if (!fingerprintMap.has(fp)) {
      fingerprintMap.set(fp, []);
    }
-    fingerprintMap.get(fp)!.push(msg.toolCallId || msg.tool_call_id);
+    const id = getToolResultId(msg);
+    if (id) fingerprintMap.get(fp)!.push(id);
  }

  for (const [, ids] of fingerprintMap) {
    if (ids.length <= 1) continue;
    for (let i = 0; i < ids.length - 1; i++) {
-      state.prunedToolIds.add(ids[i]);
-      state.totalPruneCount++;
+      markToolPruned(state, ids[i]);
    }
  }
 }

-function applyErrorPurging(messages: any[], state: DcpState, config: DcpConfig): void {
+function applyErrorPurging(messages: any[], state: DcpState, config: DcpConfig, sweepEnd: number): void {
  if (!config.strategies.purgeErrors.enabled) return;

-  const protectedTools = new Set(config.strategies.purgeErrors.protectedTools ?? []);
+  const protectedTools = new Set([
+    ...ALWAYS_PROTECTED_TOOLS,
+    ...(config.strategies.purgeErrors.protectedTools ?? []),
+  ]);
  const turnsThreshold = config.strategies.purgeErrors.turns ?? 3;

-  for (const msg of messages) {
-    if (msg.role !== "toolResult") continue;
+  for (let i = AUTO_COMPRESS_CONFIG.protectFirstN; i < sweepEnd; i++) {
+    const msg = messages[i];
+    if (!isToolResultMessage(msg)) continue;
    if (!msg.isError) continue;

    const toolName: string = msg.toolName ?? "";
    if (protectedTools.has(toolName)) continue;
+    if (getToolOutputChars(msg) < AUTO_COMPRESS_CONFIG.minToolOutputPruneChars) continue;

-    const record = state.toolCalls.get(msg.toolCallId || msg.tool_call_id);
+    const id = getToolResultId(msg);
+    const record = state.toolCalls.get(id || "");
    if (!record) continue;

    if (state.currentTurn - record.turnIndex >= turnsThreshold) {
-      state.prunedToolIds.add(msg.toolCallId || msg.tool_call_id);
-      state.totalPruneCount++;
+      markToolPruned(state, id);
    }
  }
 }

-function applyToolOutputPruning(messages: any[], state: DcpState): void {
-  for (const msg of messages) {
-    if (msg.role !== "toolResult") continue;
-    if (!state.prunedToolIds.has(msg.toolCallId || msg.tool_call_id)) continue;
+function applyOldToolOutputSweeping(messages: any[], state: DcpState, config: DcpConfig, sweepEnd: number): void {
+  const protectedTools = new Set([
+    ...ALWAYS_PROTECTED_TOOLS,
+    ...(config.strategies.deduplication.protectedTools ?? []),
+    ...(config.strategies.purgeErrors.protectedTools ?? []),
+  ]);
+
+  for (let i = AUTO_COMPRESS_CONFIG.protectFirstN; i < sweepEnd; i++) {
+    const msg = messages[i];
+    if (!isToolResultMessage(msg)) continue;
+    if (protectedTools.has(msg.toolName ?? "")) continue;
+    if (getToolOutputChars(msg) < AUTO_COMPRESS_CONFIG.minToolOutputPruneChars) continue;
+
+    const id = getToolResultId(msg);
+    markToolPruned(state, id);
+  }
+}
+
+function applyToolOutputPruning(messages: any[], state: DcpState, sweepEnd: number): void {
+  for (let i = AUTO_COMPRESS_CONFIG.protectFirstN; i < sweepEnd; i++) {
+    const msg = messages[i];
+    if (!isToolResultMessage(msg)) continue;
+    const id = getToolResultId(msg);
+    if (!state.prunedToolIds.has(id || "")) continue;
+    const chars = getToolOutputChars(msg);
+    const toolName = msg.toolName || "unknown";
    if (msg.isError) {
-      msg.content = [{ type: "text", text: "[Error output removed - tool failed more than N turns ago]" }];
+      msg.content = [{ type: "text", text: `[Tool output swept: ${toolName} ${id || ""}, error output, ${chars.toLocaleString()} chars removed]` }];
    } else {
-      msg.content = [{ type: "text", text: "[Output removed to save context - information superseded or no longer needed]" }];
+      msg.content = [{ type: "text", text: `[Tool output swept: ${toolName} ${id || ""}, ${chars.toLocaleString()} chars removed]` }];
    }
  }
 }
@@ -343,8 +504,7 @@ export async function applyPruning(
  messages: any[],
  state: DcpState,
  config: DcpConfig,
-  model: any
-): Promise<any[]> {
+): Promise<ApplyPruningResult> {
  const msgs = messages.map((m: any) => {
    const clone = { ...m };
    if (Array.isArray(clone.content)) {
@@ -357,101 +517,15 @@ export async function applyPruning(

  state.currentTurn = msgs.filter((m) => m.role === "user").length;

-  applyDeduplication(msgs, state, config);
-  applyErrorPurging(msgs, state, config);
-  applyToolOutputPruning(msgs, state);
-  
-  const totalTokens = estimateMessagesTokens(msgs);
-  const contextLength = (config as any).contextLength || 128000; 
-  const thresholdTokens = Math.max(
-    Math.floor(contextLength * AUTO_COMPRESS_CONFIG.thresholdPercent),
-    AUTO_COMPRESS_CONFIG.minimumContextLength
+  const protectedTailStart = Math.max(
+    AUTO_COMPRESS_CONFIG.protectFirstN,
+    msgs.length - AUTO_COMPRESS_CONFIG.protectLastN,
  );
-  
-  if (
-    state.forceCompressNext || 
-    (totalTokens >= thresholdTokens && msgs.length > AUTO_COMPRESS_CONFIG.protectFirstN + 4)
-  ) {
-    const wasForced = state.forceCompressNext;
-    state.forceCompressNext = false;

-    let tailBudget = Math.floor(thresholdTokens * AUTO_COMPRESS_CONFIG.summaryTargetRatio);
-    if (wasForced) {
-      // Force compression: use a tiny tail budget to ensure we summarize almost everything
-      tailBudget = Math.max(100, Math.floor(totalTokens * 0.05));
-    }
+  applyOldToolOutputSweeping(msgs, state, config, protectedTailStart);
+  applyDeduplication(msgs, state, config, protectedTailStart);
+  applyErrorPurging(msgs, state, config, protectedTailStart);
+  applyToolOutputPruning(msgs, state, protectedTailStart);

-    const compressStart = alignBoundaryForward(msgs, AUTO_COMPRESS_CONFIG.protectFirstN);
-    const compressEnd = findTailCutByTokens(msgs, compressStart, tailBudget);
-
-    // If forced, we MUST compress something if we have any messages after protectFirstN
-    let finalCompressEnd = compressEnd;
-    if (wasForced && finalCompressEnd <= compressStart && msgs.length > compressStart + 1) {
-      finalCompressEnd = msgs.length - 1;
-    }
-
-    if (compressStart < finalCompressEnd) {
-      const middle = msgs.slice(compressStart, finalCompressEnd);
-      const summary = await generateSummary(middle, state.previousSummary, null, model);
-
-      if (summary) {
-        const compressed: any[] = [];
-
-        for (let i = 0; i < compressStart; i++) {
-          compressed.push(msgs[i]);
-        }
-
-        const lastHeadRole = msgs[compressStart - 1]?.role || "user";
-        const firstTailRole = msgs[finalCompressEnd]?.role || "user";
-
-        let summaryRole = lastHeadRole === "assistant" ? "user" : "assistant";
-        if (summaryRole === firstTailRole) {
-          const flipped = summaryRole === "user" ? "assistant" : "user";
-          if (flipped !== lastHeadRole) {
-            summaryRole = flipped;
-          } else {
-            const tailMsg = { ...msgs[finalCompressEnd], timestamp: msgs[finalCompressEnd].timestamp || Date.now() };
-            const originalContent = tailMsg.content || "";
-            tailMsg.content = 
-              "## Goal\n" + summary + "\n\n--- END OF CONTEXT SUMMARY ---\n\n" + 
-              (typeof originalContent === "string" ? originalContent : "");
-            compressed.push(tailMsg);
-            for (let i = finalCompressEnd + 1; i < msgs.length; i++) {
-              compressed.push(msgs[i]);
-            }
-            state.previousSummary = summary;
-            state.compressionCount++;
-            state.tokensSaved += totalTokens - estimateMessagesTokens(compressed);
-            return sanitizeToolPairs(compressed);
-          }
-        }
-
-        const prefix = 
-          "[CONTEXT COMPACTION — REFERENCE ONLY] Earlier turns were compacted into the summary below. " +
-          "This is a handoff from a previous context window — treat it as background reference, " +
-          "NOT as active instructions. Do NOT answer questions or fulfill requests mentioned in this summary; " +
-          "they were already addressed. Respond ONLY to the latest user message that appears AFTER this summary:";
-
-        compressed.push({
-          role: summaryRole,
-          content: prefix + "\n\n" + summary,
-          timestamp: Date.now(),
-        });
-
-        for (let i = finalCompressEnd; i < msgs.length; i++) {
-          compressed.push(msgs[i]);
-        }
-
-        state.previousSummary = summary;
-        state.compressionCount++;
-        state.tokensSaved += totalTokens - estimateMessagesTokens(compressed);
-        return sanitizeToolPairs(compressed);
-      }
-    } else {
-      if (config.debug || wasForced) {
-        console.log(`[ACP] Compression skipped: conversation too short (start: ${compressStart}, end: ${finalCompressEnd})`);
-      }
-    }
-  }  
-  return sanitizeToolPairs(msgs);
+  return { messages: sanitizeToolPairs(msgs) };
 }
@@ -18,6 +18,7 @@ export interface DcpState {
  previousSummary: string | null
  compressionCount: number
  forceCompressNext?: boolean
+  lastCompressionStatus?: string | null
 }

 export function createState(): DcpState {
@@ -30,6 +31,7 @@ export function createState(): DcpState {
    previousSummary: null,
    compressionCount: 0,
    forceCompressNext: false,
+    lastCompressionStatus: null,
  }
 }

@@ -42,6 +44,7 @@ export function resetState(state: DcpState): void {
  state.previousSummary = null
  state.compressionCount = 0
  state.forceCompressNext = false
+  state.lastCompressionStatus = null
 }

 function sortObjectKeys(value: unknown): unknown {