fix: preserve hermes head and tail layout

This commit is contained in:
wassname
2026-04-23 20:51:13 +08:00
parent e16c804d44
commit 3f3a90b22b
4 changed files with 168 additions and 12 deletions
@@ -4,7 +4,7 @@
Prevent pi-auto-compressor from removing valid Pi tool results from the model context. Prevent pi-auto-compressor from removing valid Pi tool results from the model context.
## Scope ## Scope
In: Pi native assistant tool-call content blocks, tool-result pairing, token estimates, summary serialization, protected-tail tool sweeping, Hermes compaction override status. In: Pi native assistant tool-call content blocks, tool-result pairing, token estimates, summary serialization, protected-tail tool sweeping, Hermes compaction override status, Hermes head+middle+tail request layout.
Out: Changing compression thresholds or persistent session storage. Out: Changing compression thresholds or persistent session storage.
## Requirements ## Requirements
@@ -13,6 +13,7 @@ Out: Changing compression thresholds or persistent session storage.
- R3: Keep legacy compatibility. Done means: existing OpenAI-style `tool_calls` fallback still works when present. VERIFY: helper handles both shapes. - R3: Keep legacy compatibility. Done means: existing OpenAI-style `tool_calls` fallback still works when present. VERIFY: helper handles both shapes.
- R4: Manual Hermes compaction runs through Pi's compaction lifecycle. Done means: `/acp compress` refuses active/queued runs, calls `ctx.compact()`, and `session_before_compact` supplies a custom Hermes `CompactionResult` or cancels. VERIFY: TypeScript compiles and subagent review finds no blocker. - R4: Manual Hermes compaction runs through Pi's compaction lifecycle. Done means: `/acp compress` refuses active/queued runs, calls `ctx.compact()`, and `session_before_compact` supplies a custom Hermes `CompactionResult` or cancels. VERIFY: TypeScript compiles and subagent review finds no blocker.
- R5: Tool sweeping is simple and tail-safe. Done means: old, large tool outputs outside the protected tail are tombstoned while recent tail outputs are preserved. VERIFY: focused scripts show an old 250-char result is swept and a tail 250-char result is kept. - R5: Tool sweeping is simple and tail-safe. Done means: old, large tool outputs outside the protected tail are tombstoned while recent tail outputs are preserved. VERIFY: focused scripts show an old 250-char result is swept and a tail 250-char result is kept.
- R6: Manual Hermes compaction is actual middle compaction. Done means: Pi's natural compaction cut still chooses the tail, but the extension keeps raw head available and the context hook sends `head + summary + tail` to the model. VERIFY: focused script shows a `compactionSummary` moves between raw head and raw tail while middle raw messages are removed.
## Tasks ## Tasks
- [x] T1 (R1-R3): Patch `pruner.ts`. - [x] T1 (R1-R3): Patch `pruner.ts`.
@@ -35,6 +36,12 @@ Out: Changing compression thresholds or persistent session storage.
- success: old output prints a tombstone; tail output length remains 250. - success: old output prints a tombstone; tail output length remains 250.
- likely_fail: recent tool output is swept. - likely_fail: recent tool output is swept.
- sneaky_fail: swept tool result is deleted instead of tombstoned. - sneaky_fail: swept tool result is deleted instead of tombstoned.
- [x] T5 (R6): Shape compacted request context as Hermes middle.
- steps: store compaction layout metadata, expand `firstKeptEntryId` to the first context entry, reshape `context` messages to head/summary/tail before pruning.
- verify: `npx tsc --noEmit`; focused script calling `applyHermesMiddleLayout()`.
- success: output roles are `user, user, compactionSummary, user, user` for a 6-message compacted prefix with head=2/tail=2.
- likely_fail: summary stays first, proving plain Pi `summary + tail`.
- sneaky_fail: middle is summarized but new post-compaction messages are dropped; script includes extra messages after compacted prefix to catch this.
## Log ## Log
- Pi messages use assistant `content` blocks with `type: "toolCall"`; they do not use `msg.tool_calls` as the primary shape. - Pi messages use assistant `content` blocks with `type: "toolCall"`; they do not use `msg.tool_calls` as the primary shape.
@@ -44,3 +51,5 @@ Out: Changing compression thresholds or persistent session storage.
- The compaction override cancels on no model, auth failure, thrown errors, empty summaries, or non-`stop` summary responses; it does not intentionally fall back to Pi default compaction. - The compaction override cancels on no model, auth failure, thrown errors, empty summaries, or non-`stop` summary responses; it does not intentionally fall back to Pi default compaction.
- Tool sweep script output: old 250-char `bash` result becomes `[Tool output swept: ...]`; protected-tail 250-char result remains length 250. - Tool sweep script output: old 250-char `bash` result becomes `[Tool output swept: ...]`; protected-tail 250-char result remains length 250.
- Subagent review found and fixes addressed: reject truncated/error summary responses, avoid tombstoning currently protected-tail messages even when ID was previously marked, and wrap compaction hook with cancel-on-error. - Subagent review found and fixes addressed: reject truncated/error summary responses, avoid tombstoning currently protected-tail messages even when ID was previously marked, and wrap compaction hook with cancel-on-error.
- Pi `CompactionResult` only persists `summary`, `firstKeptEntryId`, and `tokensBefore`. To get Hermes `head + middle summary + tail`, use Pi's natural `preparation.firstKeptEntryId` only to count the tail, persist layout metadata in `details`, set the actual saved `firstKeptEntryId` to the first context entry, and reshape the model request in the `context` hook.
- Hermes layout script output: `head-1|head-2|summary|tail-1|tail-2|after-new` and message count `6`.
+105 -11
View File
@@ -4,8 +4,44 @@ import {
createState, createState,
resetState, resetState,
createInputFingerprint, createInputFingerprint,
type HermesMiddleLayout,
} from "./state.js" } from "./state.js"
import { applyPruning, generateHermesSummary } from "./pruner.js" import {
applyHermesMiddleLayout,
applyPruning,
estimateMessagesTokens,
generateHermesSummary,
} from "./pruner.js"
import { AUTO_COMPRESS_CONFIG } from "./config.js"
function isContextEntry(entry: any): boolean {
if (entry?.type === "message" || entry?.type === "custom_message") return true
return entry?.type === "branch_summary" && Boolean(entry.summary)
}
function findFirstContextEntryId(entries: any[]): string | null {
return entries.find(isContextEntry)?.id ?? entries[0]?.id ?? null
}
function countContextEntries(entries: any[]): number {
return entries.filter(isContextEntry).length
}
function countContextEntriesFrom(entries: any[], firstEntryId: string): number {
const start = entries.findIndex((entry) => entry.id === firstEntryId)
if (start < 0) return 0
return countContextEntries(entries.slice(start))
}
function latestHermesLayout(entries: any[]): HermesMiddleLayout | null {
for (let i = entries.length - 1; i >= 0; i--) {
const entry = entries[i]
if (entry?.type !== "compaction") continue
const details = entry.details
return details?.kind === "hermes-middle" ? details as HermesMiddleLayout : null
}
return null
}
export default function (pi: ExtensionAPI) { export default function (pi: ExtensionAPI) {
const config = loadConfig(process.cwd()) const config = loadConfig(process.cwd())
@@ -58,7 +94,8 @@ export default function (pi: ExtensionAPI) {
pi.on("session_start", async (event, ctx) => { pi.on("session_start", async (event, ctx) => {
resetState(state) resetState(state)
for (const entry of ctx.sessionManager.getBranch()) { const branch = ctx.sessionManager.getBranch()
for (const entry of branch) {
if (entry.type === "custom" && entry.customType === "dcp-state") { if (entry.type === "custom" && entry.customType === "dcp-state") {
const data = entry.data as any const data = entry.data as any
if (data?.previousSummary) state.previousSummary = data.previousSummary if (data?.previousSummary) state.previousSummary = data.previousSummary
@@ -66,8 +103,10 @@ export default function (pi: ExtensionAPI) {
if (data?.tokensSaved) state.tokensSaved = data.tokensSaved if (data?.tokensSaved) state.tokensSaved = data.tokensSaved
if (data?.prunedToolIds) state.prunedToolIds = new Set(data.prunedToolIds) if (data?.prunedToolIds) state.prunedToolIds = new Set(data.prunedToolIds)
if (data?.lastCompressionStatus) state.lastCompressionStatus = data.lastCompressionStatus if (data?.lastCompressionStatus) state.lastCompressionStatus = data.lastCompressionStatus
if (data?.activeHermesLayout) state.activeHermesLayout = data.activeHermesLayout
} }
} }
state.activeHermesLayout = latestHermesLayout(branch)
}) })
pi.on("session_shutdown", async (_event, _ctx) => { pi.on("session_shutdown", async (_event, _ctx) => {
@@ -77,6 +116,7 @@ export default function (pi: ExtensionAPI) {
tokensSaved: state.tokensSaved, tokensSaved: state.tokensSaved,
prunedToolIds: Array.from(state.prunedToolIds), prunedToolIds: Array.from(state.prunedToolIds),
lastCompressionStatus: state.lastCompressionStatus, lastCompressionStatus: state.lastCompressionStatus,
activeHermesLayout: state.activeHermesLayout,
}) })
}) })
@@ -99,13 +139,45 @@ export default function (pi: ExtensionAPI) {
} }
const { preparation, signal } = event const { preparation, signal } = event
const messagesToSummarize = [ const firstContextEntryId = findFirstContextEntryId(event.branchEntries)
if (!firstContextEntryId) {
const message = "Hermes compaction cancelled: no context entries to keep."
state.lastCompressionStatus = message
ctx.ui.notify(message, "warning")
return { cancel: true }
}
const compactedMessageCount = countContextEntries(event.branchEntries)
const tailMessageCount = countContextEntriesFrom(
event.branchEntries,
preparation.firstKeptEntryId,
)
const headMessageCount = Math.min(
AUTO_COMPRESS_CONFIG.protectFirstN,
compactedMessageCount,
)
const allMessagesBeforeTail = [
...preparation.messagesToSummarize, ...preparation.messagesToSummarize,
...preparation.turnPrefixMessages, ...preparation.turnPrefixMessages,
] ]
const messagesToSummarize = allMessagesBeforeTail.slice(headMessageCount)
if (tailMessageCount <= 0) {
const message = "Hermes compaction cancelled: Pi did not identify a tail to keep."
state.lastCompressionStatus = message
ctx.ui.notify(message, "warning")
return { cancel: true }
}
if (messagesToSummarize.length <= 0) {
const message = "Hermes compaction cancelled: no middle messages to summarize."
state.lastCompressionStatus = message
ctx.ui.notify(message, "warning")
return { cancel: true }
}
ctx.ui.notify( ctx.ui.notify(
`Hermes compaction: summarizing ${messagesToSummarize.length} messages (${preparation.tokensBefore.toLocaleString()} tokens)...`, `Hermes compaction: keeping ${headMessageCount} head messages and ${tailMessageCount} Pi-tail messages; summarizing ${messagesToSummarize.length} middle messages...`,
"info", "info",
) )
@@ -132,16 +204,32 @@ export default function (pi: ExtensionAPI) {
} }
state.previousSummary = result.summary state.previousSummary = result.summary
const summaryTokens = estimateMessagesTokens([
{ role: "user", content: [{ type: "text", text: result.summary }] },
])
const estimatedTokensSaved = estimateMessagesTokens(messagesToSummarize) - summaryTokens
const estimatedTokensAfter = preparation.tokensBefore - estimatedTokensSaved
const layout: HermesMiddleLayout = {
kind: "hermes-middle",
headMessageCount,
tailMessageCount,
compactedMessageCount,
originalFirstKeptEntryId: preparation.firstKeptEntryId,
expandedFirstKeptEntryId: firstContextEntryId,
estimatedTokensAfter,
estimatedTokensSaved,
}
state.activeHermesLayout = layout
state.lastCompressionStatus = state.lastCompressionStatus =
`Hermes compaction ready: summarized ${messagesToSummarize.length} messages (${preparation.tokensBefore.toLocaleString()} tokens)` `Hermes compaction ready: head ${headMessageCount}, middle ${messagesToSummarize.length}, tail ${tailMessageCount}, estimated saved ~${estimatedTokensSaved.toLocaleString()} tokens`
return { return {
compaction: { compaction: {
summary: result.summary, summary: result.summary,
firstKeptEntryId: preparation.firstKeptEntryId, firstKeptEntryId: firstContextEntryId,
tokensBefore: preparation.tokensBefore, tokensBefore: preparation.tokensBefore,
details: { details: {
kind: "hermes-middle", ...layout,
sweptToolOutputs: state.prunedToolIds.size, sweptToolOutputs: state.prunedToolIds.size,
}, },
}, },
@@ -161,17 +249,23 @@ export default function (pi: ExtensionAPI) {
pi.on("session_compact", async (event, ctx) => { pi.on("session_compact", async (event, ctx) => {
if (event.fromExtension) { if (event.fromExtension) {
state.compressionCount++ state.compressionCount++
if (state.activeHermesLayout) {
state.tokensSaved += state.activeHermesLayout.estimatedTokensSaved
}
state.lastCompressionStatus = "Hermes compaction completed" state.lastCompressionStatus = "Hermes compaction completed"
if (ctx.hasUI) ctx.ui.notify("Hermes compaction completed", "info") if (ctx.hasUI) ctx.ui.notify("Hermes compaction completed", "info")
} }
}) })
pi.on("context", async (event, ctx) => { pi.on("context", async (event, ctx) => {
const result = await applyPruning(event.messages, state, config) const layoutResult = applyHermesMiddleLayout(event.messages, state)
if (result.outcome) { const pruneResult = await applyPruning(layoutResult.messages, state, config)
state.lastCompressionStatus = result.outcome.message if (pruneResult.outcome) {
state.lastCompressionStatus = pruneResult.outcome.message
} else if (layoutResult.outcome) {
state.lastCompressionStatus = layoutResult.outcome.message
} }
return { messages: result.messages } return { messages: pruneResult.messages }
}) })
pi.registerCommand("acp", { pi.registerCommand("acp", {
+39
View File
@@ -149,6 +149,45 @@ export function estimateMessagesTokens(messages: any[]): number {
return messages.reduce((sum, m) => sum + estimateMessageTokens(m), 0); return messages.reduce((sum, m) => sum + estimateMessageTokens(m), 0);
} }
export function applyHermesMiddleLayout(
messages: any[],
state: DcpState,
): ApplyPruningResult {
const layout = state.activeHermesLayout;
if (!layout) return { messages };
const summaryIdx = messages.findIndex((msg) => msg?.role === "compactionSummary");
if (summaryIdx < 0) return { messages };
const summaryMessage = messages[summaryIdx];
const rawMessages = messages.filter((_, idx) => idx !== summaryIdx);
const compactedMessageCount = Math.min(layout.compactedMessageCount, rawMessages.length);
const headEnd = Math.min(layout.headMessageCount, compactedMessageCount);
const tailStart = Math.max(headEnd, compactedMessageCount - layout.tailMessageCount);
const shapedMessages = [
...rawMessages.slice(0, headEnd),
summaryMessage,
...rawMessages.slice(tailStart),
];
const originalTokens = estimateMessagesTokens(messages);
const shapedTokens = estimateMessagesTokens(shapedMessages);
const saved = originalTokens - shapedTokens;
const delta = saved >= 0
? `saved ~${saved.toLocaleString()} tokens`
: `added ~${Math.abs(saved).toLocaleString()} tokens`;
return {
messages: shapedMessages,
outcome: {
kind: "compressed",
message: `Hermes middle layout: ${messages.length} -> ${shapedMessages.length} messages, ${delta}`,
tokensSaved: saved,
},
};
}
function alignBoundaryForward(messages: any[], idx: number): number { function alignBoundaryForward(messages: any[], idx: number): number {
while (idx < messages.length && isToolResultMessage(messages[idx])) { while (idx < messages.length && isToolResultMessage(messages[idx])) {
idx++; idx++;
+14
View File
@@ -19,6 +19,18 @@ export interface DcpState {
compressionCount: number compressionCount: number
forceCompressNext?: boolean forceCompressNext?: boolean
lastCompressionStatus?: string | null lastCompressionStatus?: string | null
activeHermesLayout?: HermesMiddleLayout | null
}
export interface HermesMiddleLayout {
kind: "hermes-middle"
headMessageCount: number
tailMessageCount: number
compactedMessageCount: number
originalFirstKeptEntryId: string
expandedFirstKeptEntryId: string
estimatedTokensAfter: number
estimatedTokensSaved: number
} }
export function createState(): DcpState { export function createState(): DcpState {
@@ -32,6 +44,7 @@ export function createState(): DcpState {
compressionCount: 0, compressionCount: 0,
forceCompressNext: false, forceCompressNext: false,
lastCompressionStatus: null, lastCompressionStatus: null,
activeHermesLayout: null,
} }
} }
@@ -45,6 +58,7 @@ export function resetState(state: DcpState): void {
state.compressionCount = 0 state.compressionCount = 0
state.forceCompressNext = false state.forceCompressNext = false
state.lastCompressionStatus = null state.lastCompressionStatus = null
state.activeHermesLayout = null
} }
function sortObjectKeys(value: unknown): unknown { function sortObjectKeys(value: unknown): unknown {