From 3f3a90b22b9db7cd336669c04fd3e12febe5ea7b Mon Sep 17 00:00:00 2001 From: wassname <1103714+wassname@users.noreply.github.com> Date: Thu, 23 Apr 2026 20:51:13 +0800 Subject: [PATCH] fix: preserve hermes head and tail layout --- .../2026-04-23_fix-tool-output-pruning.md | 11 +- index.ts | 116 ++++++++++++++++-- pruner.ts | 39 ++++++ state.ts | 14 +++ 4 files changed, 168 insertions(+), 12 deletions(-) diff --git a/docs/spec/2026-04-23_fix-tool-output-pruning.md b/docs/spec/2026-04-23_fix-tool-output-pruning.md index 5b87a67..4ed9804 100644 --- a/docs/spec/2026-04-23_fix-tool-output-pruning.md +++ b/docs/spec/2026-04-23_fix-tool-output-pruning.md @@ -4,7 +4,7 @@ Prevent pi-auto-compressor from removing valid Pi tool results from the model context. ## Scope -In: Pi native assistant tool-call content blocks, tool-result pairing, token estimates, summary serialization, protected-tail tool sweeping, Hermes compaction override status. +In: Pi native assistant tool-call content blocks, tool-result pairing, token estimates, summary serialization, protected-tail tool sweeping, Hermes compaction override status, Hermes head+middle+tail request layout. Out: Changing compression thresholds or persistent session storage. ## Requirements @@ -13,6 +13,7 @@ Out: Changing compression thresholds or persistent session storage. - R3: Keep legacy compatibility. Done means: existing OpenAI-style `tool_calls` fallback still works when present. VERIFY: helper handles both shapes. - R4: Manual Hermes compaction runs through Pi's compaction lifecycle. Done means: `/acp compress` refuses active/queued runs, calls `ctx.compact()`, and `session_before_compact` supplies a custom Hermes `CompactionResult` or cancels. VERIFY: TypeScript compiles and subagent review finds no blocker. - R5: Tool sweeping is simple and tail-safe. Done means: old, large tool outputs outside the protected tail are tombstoned while recent tail outputs are preserved. VERIFY: focused scripts show an old 250-char result is swept and a tail 250-char result is kept. +- R6: Manual Hermes compaction is actual middle compaction. Done means: Pi's natural compaction cut still chooses the tail, but the extension keeps raw head available and the context hook sends `head + summary + tail` to the model. VERIFY: focused script shows a `compactionSummary` moves between raw head and raw tail while middle raw messages are removed. ## Tasks - [x] T1 (R1-R3): Patch `pruner.ts`. @@ -35,6 +36,12 @@ Out: Changing compression thresholds or persistent session storage. - success: old output prints a tombstone; tail output length remains 250. - likely_fail: recent tool output is swept. - sneaky_fail: swept tool result is deleted instead of tombstoned. +- [x] T5 (R6): Shape compacted request context as Hermes middle. + - steps: store compaction layout metadata, expand `firstKeptEntryId` to the first context entry, reshape `context` messages to head/summary/tail before pruning. + - verify: `npx tsc --noEmit`; focused script calling `applyHermesMiddleLayout()`. + - success: output roles are `user, user, compactionSummary, user, user` for a 6-message compacted prefix with head=2/tail=2. + - likely_fail: summary stays first, proving plain Pi `summary + tail`. + - sneaky_fail: middle is summarized but new post-compaction messages are dropped; script includes extra messages after compacted prefix to catch this. ## Log - Pi messages use assistant `content` blocks with `type: "toolCall"`; they do not use `msg.tool_calls` as the primary shape. @@ -44,3 +51,5 @@ Out: Changing compression thresholds or persistent session storage. - The compaction override cancels on no model, auth failure, thrown errors, empty summaries, or non-`stop` summary responses; it does not intentionally fall back to Pi default compaction. - Tool sweep script output: old 250-char `bash` result becomes `[Tool output swept: ...]`; protected-tail 250-char result remains length 250. - Subagent review found and fixes addressed: reject truncated/error summary responses, avoid tombstoning currently protected-tail messages even when ID was previously marked, and wrap compaction hook with cancel-on-error. +- Pi `CompactionResult` only persists `summary`, `firstKeptEntryId`, and `tokensBefore`. To get Hermes `head + middle summary + tail`, use Pi's natural `preparation.firstKeptEntryId` only to count the tail, persist layout metadata in `details`, set the actual saved `firstKeptEntryId` to the first context entry, and reshape the model request in the `context` hook. +- Hermes layout script output: `head-1|head-2|summary|tail-1|tail-2|after-new` and message count `6`. diff --git a/index.ts b/index.ts index 277c9fd..6d0d2cc 100644 --- a/index.ts +++ b/index.ts @@ -4,8 +4,44 @@ import { createState, resetState, createInputFingerprint, + type HermesMiddleLayout, } from "./state.js" -import { applyPruning, generateHermesSummary } from "./pruner.js" +import { + applyHermesMiddleLayout, + applyPruning, + estimateMessagesTokens, + generateHermesSummary, +} from "./pruner.js" +import { AUTO_COMPRESS_CONFIG } from "./config.js" + +function isContextEntry(entry: any): boolean { + if (entry?.type === "message" || entry?.type === "custom_message") return true + return entry?.type === "branch_summary" && Boolean(entry.summary) +} + +function findFirstContextEntryId(entries: any[]): string | null { + return entries.find(isContextEntry)?.id ?? entries[0]?.id ?? null +} + +function countContextEntries(entries: any[]): number { + return entries.filter(isContextEntry).length +} + +function countContextEntriesFrom(entries: any[], firstEntryId: string): number { + const start = entries.findIndex((entry) => entry.id === firstEntryId) + if (start < 0) return 0 + return countContextEntries(entries.slice(start)) +} + +function latestHermesLayout(entries: any[]): HermesMiddleLayout | null { + for (let i = entries.length - 1; i >= 0; i--) { + const entry = entries[i] + if (entry?.type !== "compaction") continue + const details = entry.details + return details?.kind === "hermes-middle" ? details as HermesMiddleLayout : null + } + return null +} export default function (pi: ExtensionAPI) { const config = loadConfig(process.cwd()) @@ -58,7 +94,8 @@ export default function (pi: ExtensionAPI) { pi.on("session_start", async (event, ctx) => { resetState(state) - for (const entry of ctx.sessionManager.getBranch()) { + const branch = ctx.sessionManager.getBranch() + for (const entry of branch) { if (entry.type === "custom" && entry.customType === "dcp-state") { const data = entry.data as any if (data?.previousSummary) state.previousSummary = data.previousSummary @@ -66,8 +103,10 @@ export default function (pi: ExtensionAPI) { if (data?.tokensSaved) state.tokensSaved = data.tokensSaved if (data?.prunedToolIds) state.prunedToolIds = new Set(data.prunedToolIds) if (data?.lastCompressionStatus) state.lastCompressionStatus = data.lastCompressionStatus + if (data?.activeHermesLayout) state.activeHermesLayout = data.activeHermesLayout } } + state.activeHermesLayout = latestHermesLayout(branch) }) pi.on("session_shutdown", async (_event, _ctx) => { @@ -77,6 +116,7 @@ export default function (pi: ExtensionAPI) { tokensSaved: state.tokensSaved, prunedToolIds: Array.from(state.prunedToolIds), lastCompressionStatus: state.lastCompressionStatus, + activeHermesLayout: state.activeHermesLayout, }) }) @@ -99,13 +139,45 @@ export default function (pi: ExtensionAPI) { } const { preparation, signal } = event - const messagesToSummarize = [ + const firstContextEntryId = findFirstContextEntryId(event.branchEntries) + if (!firstContextEntryId) { + const message = "Hermes compaction cancelled: no context entries to keep." + state.lastCompressionStatus = message + ctx.ui.notify(message, "warning") + return { cancel: true } + } + + const compactedMessageCount = countContextEntries(event.branchEntries) + const tailMessageCount = countContextEntriesFrom( + event.branchEntries, + preparation.firstKeptEntryId, + ) + const headMessageCount = Math.min( + AUTO_COMPRESS_CONFIG.protectFirstN, + compactedMessageCount, + ) + const allMessagesBeforeTail = [ ...preparation.messagesToSummarize, ...preparation.turnPrefixMessages, ] + const messagesToSummarize = allMessagesBeforeTail.slice(headMessageCount) + + if (tailMessageCount <= 0) { + const message = "Hermes compaction cancelled: Pi did not identify a tail to keep." + state.lastCompressionStatus = message + ctx.ui.notify(message, "warning") + return { cancel: true } + } + + if (messagesToSummarize.length <= 0) { + const message = "Hermes compaction cancelled: no middle messages to summarize." + state.lastCompressionStatus = message + ctx.ui.notify(message, "warning") + return { cancel: true } + } ctx.ui.notify( - `Hermes compaction: summarizing ${messagesToSummarize.length} messages (${preparation.tokensBefore.toLocaleString()} tokens)...`, + `Hermes compaction: keeping ${headMessageCount} head messages and ${tailMessageCount} Pi-tail messages; summarizing ${messagesToSummarize.length} middle messages...`, "info", ) @@ -132,16 +204,32 @@ export default function (pi: ExtensionAPI) { } state.previousSummary = result.summary + const summaryTokens = estimateMessagesTokens([ + { role: "user", content: [{ type: "text", text: result.summary }] }, + ]) + const estimatedTokensSaved = estimateMessagesTokens(messagesToSummarize) - summaryTokens + const estimatedTokensAfter = preparation.tokensBefore - estimatedTokensSaved + const layout: HermesMiddleLayout = { + kind: "hermes-middle", + headMessageCount, + tailMessageCount, + compactedMessageCount, + originalFirstKeptEntryId: preparation.firstKeptEntryId, + expandedFirstKeptEntryId: firstContextEntryId, + estimatedTokensAfter, + estimatedTokensSaved, + } + state.activeHermesLayout = layout state.lastCompressionStatus = - `Hermes compaction ready: summarized ${messagesToSummarize.length} messages (${preparation.tokensBefore.toLocaleString()} tokens)` + `Hermes compaction ready: head ${headMessageCount}, middle ${messagesToSummarize.length}, tail ${tailMessageCount}, estimated saved ~${estimatedTokensSaved.toLocaleString()} tokens` return { compaction: { summary: result.summary, - firstKeptEntryId: preparation.firstKeptEntryId, + firstKeptEntryId: firstContextEntryId, tokensBefore: preparation.tokensBefore, details: { - kind: "hermes-middle", + ...layout, sweptToolOutputs: state.prunedToolIds.size, }, }, @@ -161,17 +249,23 @@ export default function (pi: ExtensionAPI) { pi.on("session_compact", async (event, ctx) => { if (event.fromExtension) { state.compressionCount++ + if (state.activeHermesLayout) { + state.tokensSaved += state.activeHermesLayout.estimatedTokensSaved + } state.lastCompressionStatus = "Hermes compaction completed" if (ctx.hasUI) ctx.ui.notify("Hermes compaction completed", "info") } }) pi.on("context", async (event, ctx) => { - const result = await applyPruning(event.messages, state, config) - if (result.outcome) { - state.lastCompressionStatus = result.outcome.message + const layoutResult = applyHermesMiddleLayout(event.messages, state) + const pruneResult = await applyPruning(layoutResult.messages, state, config) + if (pruneResult.outcome) { + state.lastCompressionStatus = pruneResult.outcome.message + } else if (layoutResult.outcome) { + state.lastCompressionStatus = layoutResult.outcome.message } - return { messages: result.messages } + return { messages: pruneResult.messages } }) pi.registerCommand("acp", { diff --git a/pruner.ts b/pruner.ts index d254708..663a6a9 100644 --- a/pruner.ts +++ b/pruner.ts @@ -149,6 +149,45 @@ export function estimateMessagesTokens(messages: any[]): number { return messages.reduce((sum, m) => sum + estimateMessageTokens(m), 0); } +export function applyHermesMiddleLayout( + messages: any[], + state: DcpState, +): ApplyPruningResult { + const layout = state.activeHermesLayout; + if (!layout) return { messages }; + + const summaryIdx = messages.findIndex((msg) => msg?.role === "compactionSummary"); + if (summaryIdx < 0) return { messages }; + + const summaryMessage = messages[summaryIdx]; + const rawMessages = messages.filter((_, idx) => idx !== summaryIdx); + const compactedMessageCount = Math.min(layout.compactedMessageCount, rawMessages.length); + const headEnd = Math.min(layout.headMessageCount, compactedMessageCount); + const tailStart = Math.max(headEnd, compactedMessageCount - layout.tailMessageCount); + + const shapedMessages = [ + ...rawMessages.slice(0, headEnd), + summaryMessage, + ...rawMessages.slice(tailStart), + ]; + + const originalTokens = estimateMessagesTokens(messages); + const shapedTokens = estimateMessagesTokens(shapedMessages); + const saved = originalTokens - shapedTokens; + const delta = saved >= 0 + ? `saved ~${saved.toLocaleString()} tokens` + : `added ~${Math.abs(saved).toLocaleString()} tokens`; + + return { + messages: shapedMessages, + outcome: { + kind: "compressed", + message: `Hermes middle layout: ${messages.length} -> ${shapedMessages.length} messages, ${delta}`, + tokensSaved: saved, + }, + }; +} + function alignBoundaryForward(messages: any[], idx: number): number { while (idx < messages.length && isToolResultMessage(messages[idx])) { idx++; diff --git a/state.ts b/state.ts index 049fbf3..163c34c 100644 --- a/state.ts +++ b/state.ts @@ -19,6 +19,18 @@ export interface DcpState { compressionCount: number forceCompressNext?: boolean lastCompressionStatus?: string | null + activeHermesLayout?: HermesMiddleLayout | null +} + +export interface HermesMiddleLayout { + kind: "hermes-middle" + headMessageCount: number + tailMessageCount: number + compactedMessageCount: number + originalFirstKeptEntryId: string + expandedFirstKeptEntryId: string + estimatedTokensAfter: number + estimatedTokensSaved: number } export function createState(): DcpState { @@ -32,6 +44,7 @@ export function createState(): DcpState { compressionCount: 0, forceCompressNext: false, lastCompressionStatus: null, + activeHermesLayout: null, } } @@ -45,6 +58,7 @@ export function resetState(state: DcpState): void { state.compressionCount = 0 state.forceCompressNext = false state.lastCompressionStatus = null + state.activeHermesLayout = null } function sortObjectKeys(value: unknown): unknown {