mirror of
https://github.com/wassname/pi-auto-compressor.git
synced 2026-06-27 15:16:08 +08:00
fix: route hermes compression through pi compaction
This commit is contained in:
@@ -2,28 +2,34 @@
|
||||
|
||||
A lightweight, invisible background extension for the Pi coding agent that automatically manages your context window using a Hermes-style "middle-slice" compression strategy.
|
||||
|
||||
## Design References
|
||||
- Hermes context compression and caching: https://hermes-agent.nousresearch.com/docs/developer-guide/context-compression-and-caching
|
||||
- Pi extension API: https://github.com/badlogic/pi-mono/blob/main/packages/coding-agent/docs/extensions.md
|
||||
|
||||
## How it works (Hermes-Style)
|
||||
Unlike Pi's built-in `/compact` command (which flattens your entire conversation history in the database), the Auto-Compressor runs entirely in the background during the `context` event before the LLM even sees the prompt.
|
||||
The Auto-Compressor uses Pi's extension hooks for two small jobs: lightweight request-context tool sweeping, and a custom Hermes-style implementation of Pi compaction. If Hermes compaction cannot produce a summary, compaction is cancelled rather than falling back to Pi's default summarizer.
|
||||
|
||||
When your context size exceeds the soft threshold (default 50% of max context window):
|
||||
1. **Middle Slicing:** It safely carves out the "middle" of your conversation using token math, preserving the System Prompt (the head) and your most recent messages (the tail). It never splits `tool_call` and `tool_result` pairs.
|
||||
2. **Background Summarization:** It passes that middle slice to a fast/cheap LLM (like Gemini Flash) to build a structured "Context Checkpoint" (Goals, Progress, Blockers, Key Decisions).
|
||||
3. **Seamless Replacement:** It replaces the raw middle slice in the context window with the generated summary, preceded by a `[CONTEXT COMPACTION — REFERENCE ONLY]` tag.
|
||||
1. **Tool Sweeping:** It replaces old, large tool outputs outside the protected head/tail with tombstones while keeping tool result messages in place.
|
||||
2. **Middle Slicing:** It overrides Pi compaction with a Hermes-style summary of the middle/old context while preserving Pi's recent-tail handling.
|
||||
3. **Background Summarization:** It passes that middle slice to the active model to build a structured "Context Checkpoint" (Goals, Progress, Blockers, Key Decisions).
|
||||
4. **Seamless Replacement:** Pi stores the custom Hermes summary as the compaction entry and reloads the session context safely.
|
||||
|
||||
The main agent never gets bogged down by huge logs, and your API calls stay cheap, but your full history remains intact in Pi's database!
|
||||
|
||||
## Built-in Sweeping
|
||||
The extension also continuously sweeps tool outputs in the background:
|
||||
- **Deduplication:** If a tool is called multiple times with the exact same arguments (e.g. `ls` or `cat` on the same file), it replaces older duplicate outputs with a small placeholder tombstone.
|
||||
- **Error Purging:** If a tool fails, the error stays in context for a few turns so the agent can fix it, but is then purged to keep the context clean of dead failure traces.
|
||||
- **Protected Tail:** Recent messages are left intact so the agent can still reason from fresh evidence.
|
||||
- **Tombstones:** Old, large tool outputs are replaced with compact tombstones instead of deleting tool result messages.
|
||||
- **Pair Safety:** Tool call/result pairs are sanitized so provider invariants stay valid.
|
||||
|
||||
## Commands
|
||||
You don't *need* to use any commands—the extension runs automatically. However, if you want to inspect its behavior or trigger it manually, use the `/acp` command:
|
||||
|
||||
- `/acp` - Show stats on how many tokens have been saved, tools deduplicated, and whether a summary currently exists.
|
||||
- `/acp compress` - Force a context compression on the next turn, regardless of token thresholds.
|
||||
- `/acp` - Show stats on how many tokens have been saved, tool outputs swept, and whether a summary currently exists.
|
||||
- `/acp compress` - Run Hermes middle compaction through Pi's compaction lifecycle. This can only run between turns and reports success or failure.
|
||||
|
||||
## Compatibility with `/compact`
|
||||
This extension **does not conflict** with Pi's built-in `/compact` command.
|
||||
- **`/compact`**: Destructively modifies your actual session branch in the database, squashing history into a single node.
|
||||
- **Auto-Compressor**: Ephemeral modification of the context array sent to the API. It saves tokens dynamically without destroying your local branch history.
|
||||
- **Auto-Compressor**: Sweeps tool outputs ephemerally in request context, and overrides Pi compaction with a Hermes middle-summary when compaction runs.
|
||||
|
||||
@@ -1,14 +1,11 @@
|
||||
import * as fs from "node:fs"
|
||||
import * as path from "node:path"
|
||||
import * as os from "node:os"
|
||||
import { parse as parseJsonc } from "jsonc-parser"
|
||||
|
||||
export const AUTO_COMPRESS_CONFIG = {
|
||||
thresholdPercent: 0.50, // compress when tokens > 50% of context window
|
||||
minimumContextLength: 64000, // never compress below this threshold
|
||||
protectFirstN: 3, // messages: system prompt + first exchange
|
||||
protectLastN: 20, // keep recent context intact
|
||||
summaryTargetRatio: 0.20, // tail budget = threshold * 0.20
|
||||
charsPerToken: 4, // rough estimate
|
||||
minToolOutputPruneChars: 200,
|
||||
};
|
||||
|
||||
export interface DcpConfig {
|
||||
|
||||
@@ -0,0 +1,46 @@
|
||||
# Fix Tool Output Pruning
|
||||
|
||||
## Goal
|
||||
Prevent pi-auto-compressor from removing valid Pi tool results from the model context.
|
||||
|
||||
## Scope
|
||||
In: Pi native assistant tool-call content blocks, tool-result pairing, token estimates, summary serialization, protected-tail tool sweeping, Hermes compaction override status.
|
||||
Out: Changing compression thresholds or persistent session storage.
|
||||
|
||||
## Requirements
|
||||
- R1: Preserve valid tool results. Done means: an assistant `content` block with `type: "toolCall"` and matching `toolResult.toolCallId` survives `applyPruning()`. VERIFY: focused script reports the matching tool result is present.
|
||||
- R2: Preserve pair safety during compression. Done means: boundary alignment recognizes `toolResult` messages. VERIFY: TypeScript compiles and the helper paths use Pi message roles.
|
||||
- R3: Keep legacy compatibility. Done means: existing OpenAI-style `tool_calls` fallback still works when present. VERIFY: helper handles both shapes.
|
||||
- R4: Manual Hermes compaction runs through Pi's compaction lifecycle. Done means: `/acp compress` refuses active/queued runs, calls `ctx.compact()`, and `session_before_compact` supplies a custom Hermes `CompactionResult` or cancels. VERIFY: TypeScript compiles and subagent review finds no blocker.
|
||||
- R5: Tool sweeping is simple and tail-safe. Done means: old, large tool outputs outside the protected tail are tombstoned while recent tail outputs are preserved. VERIFY: focused scripts show an old 250-char result is swept and a tail 250-char result is kept.
|
||||
|
||||
## Tasks
|
||||
- [x] T1 (R1-R3): Patch `pruner.ts`.
|
||||
- verify: `npx tsc --noEmit`
|
||||
- success: no TypeScript errors.
|
||||
- likely_fail: tool result still filtered because assistant IDs are not collected.
|
||||
- sneaky_fail: assistant tool-call block is kept without a corresponding result after pruning.
|
||||
- [x] T2 (R1): Run a focused regression script against `applyPruning()`.
|
||||
- verify: script prints preserved tool result count and content.
|
||||
- success: `toolResults=1` and output text is visible.
|
||||
- likely_fail: `toolResults=0`.
|
||||
- sneaky_fail: result exists but text is the pruning placeholder.
|
||||
- [x] T3 (R4): Move Hermes compression into `session_before_compact`.
|
||||
- verify: `npx tsc --noEmit`; focused scripts for forced too-short and summary-failed paths.
|
||||
- success: `/acp compress` calls Pi compaction and the extension overrides it with Hermes summary.
|
||||
- likely_fail: hook returns nothing and Pi default compaction runs.
|
||||
- sneaky_fail: truncated/error summary is accepted as successful compaction.
|
||||
- [x] T4 (R5): Add protected-tail tool sweeping.
|
||||
- verify: focused old-output and tail-output scripts.
|
||||
- success: old output prints a tombstone; tail output length remains 250.
|
||||
- likely_fail: recent tool output is swept.
|
||||
- sneaky_fail: swept tool result is deleted instead of tombstoned.
|
||||
|
||||
## Log
|
||||
- Pi messages use assistant `content` blocks with `type: "toolCall"`; they do not use `msg.tool_calls` as the primary shape.
|
||||
- `npx tsc --noEmit` passed.
|
||||
- Regression script output: `toolResults=1`, `assistantCalls=1`, text `README.md\npruner.ts\n`.
|
||||
- Hermes compaction now uses Pi's `session_before_compact` hook. The context hook only performs tool sweeping.
|
||||
- The compaction override cancels on no model, auth failure, thrown errors, empty summaries, or non-`stop` summary responses; it does not intentionally fall back to Pi default compaction.
|
||||
- Tool sweep script output: old 250-char `bash` result becomes `[Tool output swept: ...]`; protected-tail 250-char result remains length 250.
|
||||
- Subagent review found and fixes addressed: reject truncated/error summary responses, avoid tombstoning currently protected-tail messages even when ID was previously marked, and wrap compaction hook with cancel-on-error.
|
||||
@@ -5,7 +5,7 @@ import {
|
||||
resetState,
|
||||
createInputFingerprint,
|
||||
} from "./state.js"
|
||||
import { applyPruning } from "./pruner.js"
|
||||
import { applyPruning, generateHermesSummary } from "./pruner.js"
|
||||
|
||||
export default function (pi: ExtensionAPI) {
|
||||
const config = loadConfig(process.cwd())
|
||||
@@ -65,6 +65,7 @@ export default function (pi: ExtensionAPI) {
|
||||
if (data?.compressionCount) state.compressionCount = data.compressionCount
|
||||
if (data?.tokensSaved) state.tokensSaved = data.tokensSaved
|
||||
if (data?.prunedToolIds) state.prunedToolIds = new Set(data.prunedToolIds)
|
||||
if (data?.lastCompressionStatus) state.lastCompressionStatus = data.lastCompressionStatus
|
||||
}
|
||||
}
|
||||
})
|
||||
@@ -75,12 +76,102 @@ export default function (pi: ExtensionAPI) {
|
||||
compressionCount: state.compressionCount,
|
||||
tokensSaved: state.tokensSaved,
|
||||
prunedToolIds: Array.from(state.prunedToolIds),
|
||||
lastCompressionStatus: state.lastCompressionStatus,
|
||||
})
|
||||
})
|
||||
|
||||
pi.on("session_before_compact", async (event, ctx) => {
|
||||
try {
|
||||
const model = ctx.model
|
||||
if (!model) {
|
||||
const message = "Hermes compaction cancelled: no model selected."
|
||||
state.lastCompressionStatus = message
|
||||
ctx.ui.notify(message, "warning")
|
||||
return { cancel: true }
|
||||
}
|
||||
|
||||
const auth = await ctx.modelRegistry.getApiKeyAndHeaders(model)
|
||||
if (!auth.ok) {
|
||||
const message = `Hermes compaction cancelled: ${auth.error}`
|
||||
state.lastCompressionStatus = message
|
||||
ctx.ui.notify(message, "warning")
|
||||
return { cancel: true }
|
||||
}
|
||||
|
||||
const { preparation, signal } = event
|
||||
const messagesToSummarize = [
|
||||
...preparation.messagesToSummarize,
|
||||
...preparation.turnPrefixMessages,
|
||||
]
|
||||
|
||||
ctx.ui.notify(
|
||||
`Hermes compaction: summarizing ${messagesToSummarize.length} messages (${preparation.tokensBefore.toLocaleString()} tokens)...`,
|
||||
"info",
|
||||
)
|
||||
|
||||
const result = await generateHermesSummary(
|
||||
messagesToSummarize,
|
||||
preparation.previousSummary ?? null,
|
||||
event.customInstructions ?? null,
|
||||
model,
|
||||
{
|
||||
apiKey: auth.apiKey,
|
||||
headers: auth.headers,
|
||||
signal,
|
||||
maxTokens: 8192,
|
||||
},
|
||||
)
|
||||
|
||||
if (!result.ok || !result.summary) {
|
||||
if (!signal.aborted) {
|
||||
const message = `Hermes compaction cancelled: ${result.error ?? "empty summary"}`
|
||||
state.lastCompressionStatus = message
|
||||
ctx.ui.notify(message, "warning")
|
||||
}
|
||||
return { cancel: true }
|
||||
}
|
||||
|
||||
state.previousSummary = result.summary
|
||||
state.lastCompressionStatus =
|
||||
`Hermes compaction ready: summarized ${messagesToSummarize.length} messages (${preparation.tokensBefore.toLocaleString()} tokens)`
|
||||
|
||||
return {
|
||||
compaction: {
|
||||
summary: result.summary,
|
||||
firstKeptEntryId: preparation.firstKeptEntryId,
|
||||
tokensBefore: preparation.tokensBefore,
|
||||
details: {
|
||||
kind: "hermes-middle",
|
||||
sweptToolOutputs: state.prunedToolIds.size,
|
||||
},
|
||||
},
|
||||
}
|
||||
} catch (error) {
|
||||
const message = `Hermes compaction cancelled: ${error instanceof Error ? error.message : String(error)}`
|
||||
state.lastCompressionStatus = message
|
||||
try {
|
||||
ctx.ui.notify(message, "warning")
|
||||
} catch {
|
||||
// Ignore UI failures; cancellation is the important safety behavior.
|
||||
}
|
||||
return { cancel: true }
|
||||
}
|
||||
})
|
||||
|
||||
pi.on("session_compact", async (event, ctx) => {
|
||||
if (event.fromExtension) {
|
||||
state.compressionCount++
|
||||
state.lastCompressionStatus = "Hermes compaction completed"
|
||||
if (ctx.hasUI) ctx.ui.notify("Hermes compaction completed", "info")
|
||||
}
|
||||
})
|
||||
|
||||
pi.on("context", async (event, ctx) => {
|
||||
const prunedMessages = await applyPruning(event.messages, state, config, ctx.model)
|
||||
return { messages: prunedMessages }
|
||||
const result = await applyPruning(event.messages, state, config)
|
||||
if (result.outcome) {
|
||||
state.lastCompressionStatus = result.outcome.message
|
||||
}
|
||||
return { messages: result.messages }
|
||||
})
|
||||
|
||||
pi.registerCommand("acp", {
|
||||
@@ -88,8 +179,27 @@ export default function (pi: ExtensionAPI) {
|
||||
async handler(args, ctx) {
|
||||
const argsStr = args.trim().toLowerCase();
|
||||
if (argsStr === "compress") {
|
||||
state.forceCompressNext = true;
|
||||
ctx.ui.notify("Manual compression scheduled. It will run when you send your next message.", "info");
|
||||
if (!ctx.isIdle()) {
|
||||
ctx.ui.notify("Manual Hermes compaction can only run between turns; the agent is currently running.", "warning");
|
||||
return;
|
||||
}
|
||||
if (ctx.hasPendingMessages()) {
|
||||
ctx.ui.notify("Manual Hermes compaction can only run when there are no queued messages.", "warning");
|
||||
return;
|
||||
}
|
||||
state.lastCompressionStatus = "Manual Hermes compaction started"
|
||||
ctx.ui.notify("Manual Hermes compaction started", "info")
|
||||
ctx.compact({
|
||||
onComplete: () => {
|
||||
state.lastCompressionStatus = "Manual Hermes compaction completed"
|
||||
ctx.ui.notify("Manual Hermes compaction completed", "info")
|
||||
},
|
||||
onError: (error) => {
|
||||
const message = `Manual Hermes compaction failed: ${error.message}`
|
||||
state.lastCompressionStatus = message
|
||||
ctx.ui.notify(message, "error")
|
||||
},
|
||||
})
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -112,17 +222,18 @@ export default function (pi: ExtensionAPI) {
|
||||
const lines = [
|
||||
`Auto-Compressor (Hermes) Stats:`,
|
||||
` Total Compressions: ${state.compressionCount}`,
|
||||
` Pending Compression: ${state.forceCompressNext ? "YES (Scheduled for next turn)" : "No"}`,
|
||||
` Pending Compression: No`,
|
||||
` Tokens Saved (Compaction): ~${state.tokensSaved.toLocaleString()}`,
|
||||
` Tokens Saved (Tool Pruning): ~${prunedToolTokens.toLocaleString()}`,
|
||||
` Total Tool Calls Tracked: ${state.toolCalls.size}`,
|
||||
` Pruned Tool Outputs (Deduplication/Errors): ${state.prunedToolIds.size}`,
|
||||
` Swept Tool Outputs: ${state.prunedToolIds.size}`,
|
||||
` Total Tool Tokens Generated: ~${totalToolTokens.toLocaleString()}`,
|
||||
` Current User Turn: ${state.currentTurn}`,
|
||||
` Summary Exists (Has Compressed): ${state.previousSummary !== null ? "Yes" : "No"}`,
|
||||
` Last Compression Status: ${state.lastCompressionStatus ?? "None"}`,
|
||||
` Current Context Tokens: ${tokenStr}`,
|
||||
"",
|
||||
"Type '/acp compress' to force a compression on the next turn."
|
||||
"Type '/acp compress' between turns to run Hermes middle compaction."
|
||||
];
|
||||
ctx.ui.notify(lines.join("\n"), "info");
|
||||
}
|
||||
|
||||
@@ -1,7 +1,118 @@
|
||||
import type { DcpState } from "./state.js"
|
||||
import { type DcpConfig, AUTO_COMPRESS_CONFIG } from "./config.js"
|
||||
|
||||
const ALWAYS_PROTECTED_DEDUP = new Set(["compress", "write", "edit"]);
|
||||
const ALWAYS_PROTECTED_TOOLS = new Set(["compress", "write", "edit"]);
|
||||
|
||||
export interface PruningOutcome {
|
||||
kind: "compressed" | "skipped" | "failed"
|
||||
message: string
|
||||
tokensSaved?: number
|
||||
}
|
||||
|
||||
export interface ApplyPruningResult {
|
||||
messages: any[]
|
||||
outcome?: PruningOutcome
|
||||
}
|
||||
|
||||
export interface SummaryAuth {
|
||||
apiKey?: string
|
||||
headers?: Record<string, string>
|
||||
[key: string]: unknown
|
||||
}
|
||||
|
||||
export interface SummaryResult {
|
||||
ok: boolean
|
||||
summary?: string
|
||||
error?: string
|
||||
}
|
||||
|
||||
function isToolResultMessage(msg: any): boolean {
|
||||
return msg?.role === "toolResult" || msg?.role === "tool";
|
||||
}
|
||||
|
||||
function getToolResultId(msg: any): string | undefined {
|
||||
return msg?.toolCallId || msg?.tool_call_id;
|
||||
}
|
||||
|
||||
function getAssistantToolCalls(msg: any): any[] {
|
||||
const calls: any[] = [];
|
||||
|
||||
if (Array.isArray(msg?.content)) {
|
||||
for (const block of msg.content) {
|
||||
if (block?.type === "toolCall") calls.push(block);
|
||||
}
|
||||
}
|
||||
|
||||
if (Array.isArray(msg?.tool_calls)) {
|
||||
calls.push(...msg.tool_calls);
|
||||
}
|
||||
|
||||
return calls;
|
||||
}
|
||||
|
||||
function getToolCallId(call: any): string | undefined {
|
||||
return call?.id;
|
||||
}
|
||||
|
||||
function getToolCallName(call: any): string {
|
||||
return call?.name || call?.function?.name || "?";
|
||||
}
|
||||
|
||||
function getToolCallArgsText(call: any): string {
|
||||
const args = call?.arguments ?? call?.function?.arguments ?? "";
|
||||
return typeof args === "string" ? args : JSON.stringify(args);
|
||||
}
|
||||
|
||||
function getMessageText(msg: any): string {
|
||||
const content = msg?.content;
|
||||
if (typeof content === "string") return content;
|
||||
if (!Array.isArray(content)) return "";
|
||||
return content
|
||||
.map((part: any) => {
|
||||
if (typeof part?.text === "string") return part.text;
|
||||
if (typeof part?.thinking === "string") return part.thinking;
|
||||
return "";
|
||||
})
|
||||
.join("");
|
||||
}
|
||||
|
||||
function getToolOutputChars(msg: any): number {
|
||||
return getMessageText(msg).length;
|
||||
}
|
||||
|
||||
function textContent(text: string): Array<{ type: "text"; text: string }> {
|
||||
return [{ type: "text", text }];
|
||||
}
|
||||
|
||||
function prependTextContent(content: unknown, text: string): Array<any> {
|
||||
const prefix = { type: "text", text };
|
||||
if (Array.isArray(content)) return [prefix, ...content];
|
||||
if (typeof content === "string" && content.length > 0) {
|
||||
return [prefix, { type: "text", text: content }];
|
||||
}
|
||||
return [prefix];
|
||||
}
|
||||
|
||||
function makeCompressionOutcome(
|
||||
originalCount: number,
|
||||
compressedCount: number,
|
||||
originalTokens: number,
|
||||
compressedTokens: number,
|
||||
): PruningOutcome {
|
||||
const saved = originalTokens - compressedTokens;
|
||||
const delta = saved >= 0 ? `saved ~${saved.toLocaleString()} tokens` : `added ~${Math.abs(saved).toLocaleString()} tokens`;
|
||||
return {
|
||||
kind: "compressed",
|
||||
message: `compressed Hermes request context: ${originalCount} -> ${compressedCount} messages, ${delta}`,
|
||||
tokensSaved: saved,
|
||||
};
|
||||
}
|
||||
|
||||
function markToolPruned(state: DcpState, id: string | undefined): void {
|
||||
if (!id || state.prunedToolIds.has(id)) return;
|
||||
state.prunedToolIds.add(id);
|
||||
state.totalPruneCount++;
|
||||
}
|
||||
|
||||
export function estimateMessageTokens(msg: any): number {
|
||||
if (!msg) return 0;
|
||||
@@ -17,16 +128,18 @@ export function estimateMessageTokens(msg: any): number {
|
||||
if (typeof part.text === "string") text += part.text;
|
||||
else if (typeof part.thinking === "string") text += part.thinking;
|
||||
else if (part.type === "image") text += "image";
|
||||
else if (part.type === "toolCall") text += getToolCallArgsText(part);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let tokens = Math.round(text.length / 4) + 10;
|
||||
|
||||
const toolCalls = msg.tool_calls || [];
|
||||
for (const tc of toolCalls) {
|
||||
const args = tc?.function?.arguments || "";
|
||||
tokens += Math.round(args.length / 4);
|
||||
if (Array.isArray(msg.tool_calls)) {
|
||||
for (const tc of msg.tool_calls) {
|
||||
const args = getToolCallArgsText(tc);
|
||||
tokens += Math.round(args.length / 4);
|
||||
}
|
||||
}
|
||||
|
||||
return tokens;
|
||||
@@ -37,7 +150,7 @@ export function estimateMessagesTokens(messages: any[]): number {
|
||||
}
|
||||
|
||||
function alignBoundaryForward(messages: any[], idx: number): number {
|
||||
while (idx < messages.length && messages[idx]?.role === "tool") {
|
||||
while (idx < messages.length && isToolResultMessage(messages[idx])) {
|
||||
idx++;
|
||||
}
|
||||
return idx;
|
||||
@@ -46,10 +159,10 @@ function alignBoundaryForward(messages: any[], idx: number): number {
|
||||
function alignBoundaryBackward(messages: any[], idx: number): number {
|
||||
if (idx <= 0 || idx >= messages.length) return idx;
|
||||
let check = idx - 1;
|
||||
while (check >= 0 && messages[check]?.role === "tool") {
|
||||
while (check >= 0 && isToolResultMessage(messages[check])) {
|
||||
check--;
|
||||
}
|
||||
if (check >= 0 && messages[check]?.role === "assistant" && messages[check]?.tool_calls) {
|
||||
if (check >= 0 && messages[check]?.role === "assistant" && getAssistantToolCalls(messages[check]).length > 0) {
|
||||
idx = check;
|
||||
}
|
||||
return idx;
|
||||
@@ -62,7 +175,7 @@ function findTailCutByTokens(
|
||||
charsPerToken: number = 4
|
||||
): number {
|
||||
const n = messages.length;
|
||||
const minTail = Math.min(3, n - headEnd - 1);
|
||||
const minTail = Math.min(AUTO_COMPRESS_CONFIG.protectLastN, n - headEnd - 1);
|
||||
const softCeiling = Math.floor(tokenBudget * 1.5);
|
||||
let accumulated = 0;
|
||||
let cutIdx = n;
|
||||
@@ -106,15 +219,15 @@ function serializeForSummary(turns: any[]): string {
|
||||
content = content.slice(0, HEAD_KEEP) + "\n...[truncated]...\n" + content.slice(-TAIL_KEEP);
|
||||
}
|
||||
|
||||
if (role === "tool" || role === "toolResult") {
|
||||
const toolId = msg.tool_call_id || msg.toolCallId || "";
|
||||
if (isToolResultMessage(msg)) {
|
||||
const toolId = getToolResultId(msg) || "";
|
||||
parts.push(`[TOOL RESULT ${toolId}]: ${content}`);
|
||||
} else if (role === "assistant") {
|
||||
const toolCalls = msg.tool_calls || [];
|
||||
const toolCalls = getAssistantToolCalls(msg);
|
||||
if (toolCalls.length > 0) {
|
||||
const tcParts = toolCalls.map((tc: any) => {
|
||||
const name = tc?.function?.name || "?";
|
||||
const args = tc?.function?.arguments || "";
|
||||
const name = getToolCallName(tc);
|
||||
const args = getToolCallArgsText(tc);
|
||||
const argsShort = args.length > 150 ? args.slice(0, 120) + "..." : args;
|
||||
return ` ${name}(${argsShort})`;
|
||||
});
|
||||
@@ -128,12 +241,13 @@ function serializeForSummary(turns: any[]): string {
|
||||
return parts.join("\n\n");
|
||||
}
|
||||
|
||||
async function generateSummary(
|
||||
export async function generateHermesSummary(
|
||||
turns: any[],
|
||||
previousSummary: string | null,
|
||||
focusTopic: string | null,
|
||||
model: any,
|
||||
): Promise<string | null> {
|
||||
auth?: SummaryAuth,
|
||||
): Promise<SummaryResult> {
|
||||
const contentToSummarize = serializeForSummary(turns);
|
||||
|
||||
const summarizerPreamble =
|
||||
@@ -213,11 +327,18 @@ Prioritize preserving all information related to the focus topic.`;
|
||||
}
|
||||
|
||||
try {
|
||||
if (!model) return null;
|
||||
if (!model) return { ok: false, error: "no model available" };
|
||||
const piAi = await import("@mariozechner/pi-ai");
|
||||
const response = await piAi.complete(model, {
|
||||
messages: [{ role: "user", content: prompt, timestamp: Date.now() }]
|
||||
});
|
||||
}, auth);
|
||||
|
||||
if (response.stopReason !== "stop") {
|
||||
return {
|
||||
ok: false,
|
||||
error: `summary generation stopped with ${response.stopReason}${response.errorMessage ? `: ${response.errorMessage}` : ""}`,
|
||||
};
|
||||
}
|
||||
|
||||
let text = "";
|
||||
if (Array.isArray(response.content)) {
|
||||
@@ -229,10 +350,12 @@ Prioritize preserving all information related to the focus topic.`;
|
||||
text = (response as any).content;
|
||||
}
|
||||
|
||||
return text.trim() || null;
|
||||
const summary = text.trim();
|
||||
return summary ? { ok: true, summary } : { ok: false, error: "summary generation returned empty text" };
|
||||
} catch (e) {
|
||||
const error = e instanceof Error ? e.message : String(e);
|
||||
console.error("Summary generation failed:", e);
|
||||
return null;
|
||||
return { ok: false, error };
|
||||
}
|
||||
}
|
||||
|
||||
@@ -240,101 +363,139 @@ function sanitizeToolPairs(messages: any[]): any[] {
|
||||
const assistantToolIds = new Set<string>();
|
||||
for (const msg of messages) {
|
||||
if (msg.role !== "assistant") continue;
|
||||
for (const tc of msg.tool_calls || []) {
|
||||
const id = tc?.id || tc?.function?.name;
|
||||
for (const tc of getAssistantToolCalls(msg)) {
|
||||
const id = getToolCallId(tc);
|
||||
if (id) assistantToolIds.add(id);
|
||||
}
|
||||
}
|
||||
|
||||
const resultToolIds = new Set<string>();
|
||||
for (const msg of messages) {
|
||||
if (msg.role !== "tool" && msg.role !== "toolResult") continue;
|
||||
const id = msg.tool_call_id || msg.toolCallId;
|
||||
if (!isToolResultMessage(msg)) continue;
|
||||
const id = getToolResultId(msg);
|
||||
if (id) resultToolIds.add(id);
|
||||
}
|
||||
|
||||
const cleaned = messages.filter((msg) => {
|
||||
if (msg.role !== "tool" && msg.role !== "toolResult") return true;
|
||||
const id = msg.tool_call_id || msg.toolCallId;
|
||||
if (!isToolResultMessage(msg)) return true;
|
||||
const id = getToolResultId(msg);
|
||||
return !id || assistantToolIds.has(id);
|
||||
});
|
||||
|
||||
for (const msg of cleaned) {
|
||||
if (msg.role !== "assistant" || !msg.tool_calls) continue;
|
||||
msg.tool_calls = msg.tool_calls.filter((tc: any) => {
|
||||
const id = tc?.id || tc?.function?.name;
|
||||
return !id || resultToolIds.has(id);
|
||||
});
|
||||
if (msg.role !== "assistant") continue;
|
||||
if (Array.isArray(msg.content)) {
|
||||
msg.content = msg.content.filter((block: any) => {
|
||||
if (block?.type !== "toolCall") return true;
|
||||
const id = getToolCallId(block);
|
||||
return !id || resultToolIds.has(id);
|
||||
});
|
||||
}
|
||||
if (Array.isArray(msg.tool_calls)) {
|
||||
msg.tool_calls = msg.tool_calls.filter((tc: any) => {
|
||||
const id = getToolCallId(tc);
|
||||
return !id || resultToolIds.has(id);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return cleaned;
|
||||
}
|
||||
|
||||
function applyDeduplication(messages: any[], state: DcpState, config: DcpConfig): void {
|
||||
function applyDeduplication(messages: any[], state: DcpState, config: DcpConfig, sweepEnd: number): void {
|
||||
if (!config.strategies.deduplication.enabled) return;
|
||||
|
||||
const protectedTools = new Set([
|
||||
...ALWAYS_PROTECTED_DEDUP,
|
||||
...ALWAYS_PROTECTED_TOOLS,
|
||||
...(config.strategies.deduplication.protectedTools ?? []),
|
||||
]);
|
||||
|
||||
const fingerprintMap = new Map<string, string[]>();
|
||||
|
||||
for (const msg of messages) {
|
||||
if (msg.role !== "toolResult") continue;
|
||||
for (let i = AUTO_COMPRESS_CONFIG.protectFirstN; i < sweepEnd; i++) {
|
||||
const msg = messages[i];
|
||||
if (!isToolResultMessage(msg)) continue;
|
||||
const toolName: string = msg.toolName ?? "";
|
||||
if (protectedTools.has(toolName)) continue;
|
||||
if (getToolOutputChars(msg) < AUTO_COMPRESS_CONFIG.minToolOutputPruneChars) continue;
|
||||
|
||||
const record = state.toolCalls.get(msg.toolCallId || msg.tool_call_id);
|
||||
const record = state.toolCalls.get(getToolResultId(msg) || "");
|
||||
if (!record) continue;
|
||||
|
||||
const fp = record.inputFingerprint;
|
||||
if (!fingerprintMap.has(fp)) {
|
||||
fingerprintMap.set(fp, []);
|
||||
}
|
||||
fingerprintMap.get(fp)!.push(msg.toolCallId || msg.tool_call_id);
|
||||
const id = getToolResultId(msg);
|
||||
if (id) fingerprintMap.get(fp)!.push(id);
|
||||
}
|
||||
|
||||
for (const [, ids] of fingerprintMap) {
|
||||
if (ids.length <= 1) continue;
|
||||
for (let i = 0; i < ids.length - 1; i++) {
|
||||
state.prunedToolIds.add(ids[i]);
|
||||
state.totalPruneCount++;
|
||||
markToolPruned(state, ids[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function applyErrorPurging(messages: any[], state: DcpState, config: DcpConfig): void {
|
||||
function applyErrorPurging(messages: any[], state: DcpState, config: DcpConfig, sweepEnd: number): void {
|
||||
if (!config.strategies.purgeErrors.enabled) return;
|
||||
|
||||
const protectedTools = new Set(config.strategies.purgeErrors.protectedTools ?? []);
|
||||
const protectedTools = new Set([
|
||||
...ALWAYS_PROTECTED_TOOLS,
|
||||
...(config.strategies.purgeErrors.protectedTools ?? []),
|
||||
]);
|
||||
const turnsThreshold = config.strategies.purgeErrors.turns ?? 3;
|
||||
|
||||
for (const msg of messages) {
|
||||
if (msg.role !== "toolResult") continue;
|
||||
for (let i = AUTO_COMPRESS_CONFIG.protectFirstN; i < sweepEnd; i++) {
|
||||
const msg = messages[i];
|
||||
if (!isToolResultMessage(msg)) continue;
|
||||
if (!msg.isError) continue;
|
||||
|
||||
const toolName: string = msg.toolName ?? "";
|
||||
if (protectedTools.has(toolName)) continue;
|
||||
if (getToolOutputChars(msg) < AUTO_COMPRESS_CONFIG.minToolOutputPruneChars) continue;
|
||||
|
||||
const record = state.toolCalls.get(msg.toolCallId || msg.tool_call_id);
|
||||
const id = getToolResultId(msg);
|
||||
const record = state.toolCalls.get(id || "");
|
||||
if (!record) continue;
|
||||
|
||||
if (state.currentTurn - record.turnIndex >= turnsThreshold) {
|
||||
state.prunedToolIds.add(msg.toolCallId || msg.tool_call_id);
|
||||
state.totalPruneCount++;
|
||||
markToolPruned(state, id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function applyToolOutputPruning(messages: any[], state: DcpState): void {
|
||||
for (const msg of messages) {
|
||||
if (msg.role !== "toolResult") continue;
|
||||
if (!state.prunedToolIds.has(msg.toolCallId || msg.tool_call_id)) continue;
|
||||
function applyOldToolOutputSweeping(messages: any[], state: DcpState, config: DcpConfig, sweepEnd: number): void {
|
||||
const protectedTools = new Set([
|
||||
...ALWAYS_PROTECTED_TOOLS,
|
||||
...(config.strategies.deduplication.protectedTools ?? []),
|
||||
...(config.strategies.purgeErrors.protectedTools ?? []),
|
||||
]);
|
||||
|
||||
for (let i = AUTO_COMPRESS_CONFIG.protectFirstN; i < sweepEnd; i++) {
|
||||
const msg = messages[i];
|
||||
if (!isToolResultMessage(msg)) continue;
|
||||
if (protectedTools.has(msg.toolName ?? "")) continue;
|
||||
if (getToolOutputChars(msg) < AUTO_COMPRESS_CONFIG.minToolOutputPruneChars) continue;
|
||||
|
||||
const id = getToolResultId(msg);
|
||||
markToolPruned(state, id);
|
||||
}
|
||||
}
|
||||
|
||||
function applyToolOutputPruning(messages: any[], state: DcpState, sweepEnd: number): void {
|
||||
for (let i = AUTO_COMPRESS_CONFIG.protectFirstN; i < sweepEnd; i++) {
|
||||
const msg = messages[i];
|
||||
if (!isToolResultMessage(msg)) continue;
|
||||
const id = getToolResultId(msg);
|
||||
if (!state.prunedToolIds.has(id || "")) continue;
|
||||
const chars = getToolOutputChars(msg);
|
||||
const toolName = msg.toolName || "unknown";
|
||||
if (msg.isError) {
|
||||
msg.content = [{ type: "text", text: "[Error output removed - tool failed more than N turns ago]" }];
|
||||
msg.content = [{ type: "text", text: `[Tool output swept: ${toolName} ${id || ""}, error output, ${chars.toLocaleString()} chars removed]` }];
|
||||
} else {
|
||||
msg.content = [{ type: "text", text: "[Output removed to save context - information superseded or no longer needed]" }];
|
||||
msg.content = [{ type: "text", text: `[Tool output swept: ${toolName} ${id || ""}, ${chars.toLocaleString()} chars removed]` }];
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -343,8 +504,7 @@ export async function applyPruning(
|
||||
messages: any[],
|
||||
state: DcpState,
|
||||
config: DcpConfig,
|
||||
model: any
|
||||
): Promise<any[]> {
|
||||
): Promise<ApplyPruningResult> {
|
||||
const msgs = messages.map((m: any) => {
|
||||
const clone = { ...m };
|
||||
if (Array.isArray(clone.content)) {
|
||||
@@ -357,101 +517,15 @@ export async function applyPruning(
|
||||
|
||||
state.currentTurn = msgs.filter((m) => m.role === "user").length;
|
||||
|
||||
applyDeduplication(msgs, state, config);
|
||||
applyErrorPurging(msgs, state, config);
|
||||
applyToolOutputPruning(msgs, state);
|
||||
|
||||
const totalTokens = estimateMessagesTokens(msgs);
|
||||
const contextLength = (config as any).contextLength || 128000;
|
||||
const thresholdTokens = Math.max(
|
||||
Math.floor(contextLength * AUTO_COMPRESS_CONFIG.thresholdPercent),
|
||||
AUTO_COMPRESS_CONFIG.minimumContextLength
|
||||
const protectedTailStart = Math.max(
|
||||
AUTO_COMPRESS_CONFIG.protectFirstN,
|
||||
msgs.length - AUTO_COMPRESS_CONFIG.protectLastN,
|
||||
);
|
||||
|
||||
if (
|
||||
state.forceCompressNext ||
|
||||
(totalTokens >= thresholdTokens && msgs.length > AUTO_COMPRESS_CONFIG.protectFirstN + 4)
|
||||
) {
|
||||
const wasForced = state.forceCompressNext;
|
||||
state.forceCompressNext = false;
|
||||
|
||||
let tailBudget = Math.floor(thresholdTokens * AUTO_COMPRESS_CONFIG.summaryTargetRatio);
|
||||
if (wasForced) {
|
||||
// Force compression: use a tiny tail budget to ensure we summarize almost everything
|
||||
tailBudget = Math.max(100, Math.floor(totalTokens * 0.05));
|
||||
}
|
||||
applyOldToolOutputSweeping(msgs, state, config, protectedTailStart);
|
||||
applyDeduplication(msgs, state, config, protectedTailStart);
|
||||
applyErrorPurging(msgs, state, config, protectedTailStart);
|
||||
applyToolOutputPruning(msgs, state, protectedTailStart);
|
||||
|
||||
const compressStart = alignBoundaryForward(msgs, AUTO_COMPRESS_CONFIG.protectFirstN);
|
||||
const compressEnd = findTailCutByTokens(msgs, compressStart, tailBudget);
|
||||
|
||||
// If forced, we MUST compress something if we have any messages after protectFirstN
|
||||
let finalCompressEnd = compressEnd;
|
||||
if (wasForced && finalCompressEnd <= compressStart && msgs.length > compressStart + 1) {
|
||||
finalCompressEnd = msgs.length - 1;
|
||||
}
|
||||
|
||||
if (compressStart < finalCompressEnd) {
|
||||
const middle = msgs.slice(compressStart, finalCompressEnd);
|
||||
const summary = await generateSummary(middle, state.previousSummary, null, model);
|
||||
|
||||
if (summary) {
|
||||
const compressed: any[] = [];
|
||||
|
||||
for (let i = 0; i < compressStart; i++) {
|
||||
compressed.push(msgs[i]);
|
||||
}
|
||||
|
||||
const lastHeadRole = msgs[compressStart - 1]?.role || "user";
|
||||
const firstTailRole = msgs[finalCompressEnd]?.role || "user";
|
||||
|
||||
let summaryRole = lastHeadRole === "assistant" ? "user" : "assistant";
|
||||
if (summaryRole === firstTailRole) {
|
||||
const flipped = summaryRole === "user" ? "assistant" : "user";
|
||||
if (flipped !== lastHeadRole) {
|
||||
summaryRole = flipped;
|
||||
} else {
|
||||
const tailMsg = { ...msgs[finalCompressEnd], timestamp: msgs[finalCompressEnd].timestamp || Date.now() };
|
||||
const originalContent = tailMsg.content || "";
|
||||
tailMsg.content =
|
||||
"## Goal\n" + summary + "\n\n--- END OF CONTEXT SUMMARY ---\n\n" +
|
||||
(typeof originalContent === "string" ? originalContent : "");
|
||||
compressed.push(tailMsg);
|
||||
for (let i = finalCompressEnd + 1; i < msgs.length; i++) {
|
||||
compressed.push(msgs[i]);
|
||||
}
|
||||
state.previousSummary = summary;
|
||||
state.compressionCount++;
|
||||
state.tokensSaved += totalTokens - estimateMessagesTokens(compressed);
|
||||
return sanitizeToolPairs(compressed);
|
||||
}
|
||||
}
|
||||
|
||||
const prefix =
|
||||
"[CONTEXT COMPACTION — REFERENCE ONLY] Earlier turns were compacted into the summary below. " +
|
||||
"This is a handoff from a previous context window — treat it as background reference, " +
|
||||
"NOT as active instructions. Do NOT answer questions or fulfill requests mentioned in this summary; " +
|
||||
"they were already addressed. Respond ONLY to the latest user message that appears AFTER this summary:";
|
||||
|
||||
compressed.push({
|
||||
role: summaryRole,
|
||||
content: prefix + "\n\n" + summary,
|
||||
timestamp: Date.now(),
|
||||
});
|
||||
|
||||
for (let i = finalCompressEnd; i < msgs.length; i++) {
|
||||
compressed.push(msgs[i]);
|
||||
}
|
||||
|
||||
state.previousSummary = summary;
|
||||
state.compressionCount++;
|
||||
state.tokensSaved += totalTokens - estimateMessagesTokens(compressed);
|
||||
return sanitizeToolPairs(compressed);
|
||||
}
|
||||
} else {
|
||||
if (config.debug || wasForced) {
|
||||
console.log(`[ACP] Compression skipped: conversation too short (start: ${compressStart}, end: ${finalCompressEnd})`);
|
||||
}
|
||||
}
|
||||
}
|
||||
return sanitizeToolPairs(msgs);
|
||||
return { messages: sanitizeToolPairs(msgs) };
|
||||
}
|
||||
|
||||
@@ -18,6 +18,7 @@ export interface DcpState {
|
||||
previousSummary: string | null
|
||||
compressionCount: number
|
||||
forceCompressNext?: boolean
|
||||
lastCompressionStatus?: string | null
|
||||
}
|
||||
|
||||
export function createState(): DcpState {
|
||||
@@ -30,6 +31,7 @@ export function createState(): DcpState {
|
||||
previousSummary: null,
|
||||
compressionCount: 0,
|
||||
forceCompressNext: false,
|
||||
lastCompressionStatus: null,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -42,6 +44,7 @@ export function resetState(state: DcpState): void {
|
||||
state.previousSummary = null
|
||||
state.compressionCount = 0
|
||||
state.forceCompressNext = false
|
||||
state.lastCompressionStatus = null
|
||||
}
|
||||
|
||||
function sortObjectKeys(value: unknown): unknown {
|
||||
|
||||
Reference in New Issue
Block a user