fix: preserve hermes head and tail layout

This commit is contained in:
wassname
2026-04-23 20:51:13 +08:00
parent e16c804d44
commit 3f3a90b22b
4 changed files with 168 additions and 12 deletions
@@ -4,7 +4,7 @@
Prevent pi-auto-compressor from removing valid Pi tool results from the model context.
## Scope
In: Pi native assistant tool-call content blocks, tool-result pairing, token estimates, summary serialization, protected-tail tool sweeping, Hermes compaction override status.
In: Pi native assistant tool-call content blocks, tool-result pairing, token estimates, summary serialization, protected-tail tool sweeping, Hermes compaction override status, Hermes head+middle+tail request layout.
Out: Changing compression thresholds or persistent session storage.
## Requirements
@@ -13,6 +13,7 @@ Out: Changing compression thresholds or persistent session storage.
- R3: Keep legacy compatibility. Done means: existing OpenAI-style `tool_calls` fallback still works when present. VERIFY: helper handles both shapes.
- R4: Manual Hermes compaction runs through Pi's compaction lifecycle. Done means: `/acp compress` refuses active/queued runs, calls `ctx.compact()`, and `session_before_compact` supplies a custom Hermes `CompactionResult` or cancels. VERIFY: TypeScript compiles and subagent review finds no blocker.
- R5: Tool sweeping is simple and tail-safe. Done means: old, large tool outputs outside the protected tail are tombstoned while recent tail outputs are preserved. VERIFY: focused scripts show an old 250-char result is swept and a tail 250-char result is kept.
- R6: Manual Hermes compaction is actual middle compaction. Done means: Pi's natural compaction cut still chooses the tail, but the extension keeps raw head available and the context hook sends `head + summary + tail` to the model. VERIFY: focused script shows a `compactionSummary` moves between raw head and raw tail while middle raw messages are removed.
## Tasks
- [x] T1 (R1-R3): Patch `pruner.ts`.
@@ -35,6 +36,12 @@ Out: Changing compression thresholds or persistent session storage.
- success: old output prints a tombstone; tail output length remains 250.
- likely_fail: recent tool output is swept.
- sneaky_fail: swept tool result is deleted instead of tombstoned.
- [x] T5 (R6): Shape compacted request context as Hermes middle.
- steps: store compaction layout metadata, expand `firstKeptEntryId` to the first context entry, reshape `context` messages to head/summary/tail before pruning.
- verify: `npx tsc --noEmit`; focused script calling `applyHermesMiddleLayout()`.
- success: output roles are `user, user, compactionSummary, user, user` for a 6-message compacted prefix with head=2/tail=2.
- likely_fail: summary stays first, proving plain Pi `summary + tail`.
- sneaky_fail: middle is summarized but new post-compaction messages are dropped; script includes extra messages after compacted prefix to catch this.
## Log
- Pi messages use assistant `content` blocks with `type: "toolCall"`; they do not use `msg.tool_calls` as the primary shape.
@@ -44,3 +51,5 @@ Out: Changing compression thresholds or persistent session storage.
- The compaction override cancels on no model, auth failure, thrown errors, empty summaries, or non-`stop` summary responses; it does not intentionally fall back to Pi default compaction.
- Tool sweep script output: old 250-char `bash` result becomes `[Tool output swept: ...]`; protected-tail 250-char result remains length 250.
- Subagent review found and fixes addressed: reject truncated/error summary responses, avoid tombstoning currently protected-tail messages even when ID was previously marked, and wrap compaction hook with cancel-on-error.
- Pi `CompactionResult` only persists `summary`, `firstKeptEntryId`, and `tokensBefore`. To get Hermes `head + middle summary + tail`, use Pi's natural `preparation.firstKeptEntryId` only to count the tail, persist layout metadata in `details`, set the actual saved `firstKeptEntryId` to the first context entry, and reshape the model request in the `context` hook.
- Hermes layout script output: `head-1|head-2|summary|tail-1|tail-2|after-new` and message count `6`.
+105 -11
View File
@@ -4,8 +4,44 @@ import {
createState,
resetState,
createInputFingerprint,
type HermesMiddleLayout,
} from "./state.js"
import { applyPruning, generateHermesSummary } from "./pruner.js"
import {
applyHermesMiddleLayout,
applyPruning,
estimateMessagesTokens,
generateHermesSummary,
} from "./pruner.js"
import { AUTO_COMPRESS_CONFIG } from "./config.js"
function isContextEntry(entry: any): boolean {
if (entry?.type === "message" || entry?.type === "custom_message") return true
return entry?.type === "branch_summary" && Boolean(entry.summary)
}
function findFirstContextEntryId(entries: any[]): string | null {
return entries.find(isContextEntry)?.id ?? entries[0]?.id ?? null
}
function countContextEntries(entries: any[]): number {
return entries.filter(isContextEntry).length
}
function countContextEntriesFrom(entries: any[], firstEntryId: string): number {
const start = entries.findIndex((entry) => entry.id === firstEntryId)
if (start < 0) return 0
return countContextEntries(entries.slice(start))
}
function latestHermesLayout(entries: any[]): HermesMiddleLayout | null {
for (let i = entries.length - 1; i >= 0; i--) {
const entry = entries[i]
if (entry?.type !== "compaction") continue
const details = entry.details
return details?.kind === "hermes-middle" ? details as HermesMiddleLayout : null
}
return null
}
export default function (pi: ExtensionAPI) {
const config = loadConfig(process.cwd())
@@ -58,7 +94,8 @@ export default function (pi: ExtensionAPI) {
pi.on("session_start", async (event, ctx) => {
resetState(state)
for (const entry of ctx.sessionManager.getBranch()) {
const branch = ctx.sessionManager.getBranch()
for (const entry of branch) {
if (entry.type === "custom" && entry.customType === "dcp-state") {
const data = entry.data as any
if (data?.previousSummary) state.previousSummary = data.previousSummary
@@ -66,8 +103,10 @@ export default function (pi: ExtensionAPI) {
if (data?.tokensSaved) state.tokensSaved = data.tokensSaved
if (data?.prunedToolIds) state.prunedToolIds = new Set(data.prunedToolIds)
if (data?.lastCompressionStatus) state.lastCompressionStatus = data.lastCompressionStatus
if (data?.activeHermesLayout) state.activeHermesLayout = data.activeHermesLayout
}
}
state.activeHermesLayout = latestHermesLayout(branch)
})
pi.on("session_shutdown", async (_event, _ctx) => {
@@ -77,6 +116,7 @@ export default function (pi: ExtensionAPI) {
tokensSaved: state.tokensSaved,
prunedToolIds: Array.from(state.prunedToolIds),
lastCompressionStatus: state.lastCompressionStatus,
activeHermesLayout: state.activeHermesLayout,
})
})
@@ -99,13 +139,45 @@ export default function (pi: ExtensionAPI) {
}
const { preparation, signal } = event
const messagesToSummarize = [
const firstContextEntryId = findFirstContextEntryId(event.branchEntries)
if (!firstContextEntryId) {
const message = "Hermes compaction cancelled: no context entries to keep."
state.lastCompressionStatus = message
ctx.ui.notify(message, "warning")
return { cancel: true }
}
const compactedMessageCount = countContextEntries(event.branchEntries)
const tailMessageCount = countContextEntriesFrom(
event.branchEntries,
preparation.firstKeptEntryId,
)
const headMessageCount = Math.min(
AUTO_COMPRESS_CONFIG.protectFirstN,
compactedMessageCount,
)
const allMessagesBeforeTail = [
...preparation.messagesToSummarize,
...preparation.turnPrefixMessages,
]
const messagesToSummarize = allMessagesBeforeTail.slice(headMessageCount)
if (tailMessageCount <= 0) {
const message = "Hermes compaction cancelled: Pi did not identify a tail to keep."
state.lastCompressionStatus = message
ctx.ui.notify(message, "warning")
return { cancel: true }
}
if (messagesToSummarize.length <= 0) {
const message = "Hermes compaction cancelled: no middle messages to summarize."
state.lastCompressionStatus = message
ctx.ui.notify(message, "warning")
return { cancel: true }
}
ctx.ui.notify(
`Hermes compaction: summarizing ${messagesToSummarize.length} messages (${preparation.tokensBefore.toLocaleString()} tokens)...`,
`Hermes compaction: keeping ${headMessageCount} head messages and ${tailMessageCount} Pi-tail messages; summarizing ${messagesToSummarize.length} middle messages...`,
"info",
)
@@ -132,16 +204,32 @@ export default function (pi: ExtensionAPI) {
}
state.previousSummary = result.summary
const summaryTokens = estimateMessagesTokens([
{ role: "user", content: [{ type: "text", text: result.summary }] },
])
const estimatedTokensSaved = estimateMessagesTokens(messagesToSummarize) - summaryTokens
const estimatedTokensAfter = preparation.tokensBefore - estimatedTokensSaved
const layout: HermesMiddleLayout = {
kind: "hermes-middle",
headMessageCount,
tailMessageCount,
compactedMessageCount,
originalFirstKeptEntryId: preparation.firstKeptEntryId,
expandedFirstKeptEntryId: firstContextEntryId,
estimatedTokensAfter,
estimatedTokensSaved,
}
state.activeHermesLayout = layout
state.lastCompressionStatus =
`Hermes compaction ready: summarized ${messagesToSummarize.length} messages (${preparation.tokensBefore.toLocaleString()} tokens)`
`Hermes compaction ready: head ${headMessageCount}, middle ${messagesToSummarize.length}, tail ${tailMessageCount}, estimated saved ~${estimatedTokensSaved.toLocaleString()} tokens`
return {
compaction: {
summary: result.summary,
firstKeptEntryId: preparation.firstKeptEntryId,
firstKeptEntryId: firstContextEntryId,
tokensBefore: preparation.tokensBefore,
details: {
kind: "hermes-middle",
...layout,
sweptToolOutputs: state.prunedToolIds.size,
},
},
@@ -161,17 +249,23 @@ export default function (pi: ExtensionAPI) {
pi.on("session_compact", async (event, ctx) => {
if (event.fromExtension) {
state.compressionCount++
if (state.activeHermesLayout) {
state.tokensSaved += state.activeHermesLayout.estimatedTokensSaved
}
state.lastCompressionStatus = "Hermes compaction completed"
if (ctx.hasUI) ctx.ui.notify("Hermes compaction completed", "info")
}
})
pi.on("context", async (event, ctx) => {
const result = await applyPruning(event.messages, state, config)
if (result.outcome) {
state.lastCompressionStatus = result.outcome.message
const layoutResult = applyHermesMiddleLayout(event.messages, state)
const pruneResult = await applyPruning(layoutResult.messages, state, config)
if (pruneResult.outcome) {
state.lastCompressionStatus = pruneResult.outcome.message
} else if (layoutResult.outcome) {
state.lastCompressionStatus = layoutResult.outcome.message
}
return { messages: result.messages }
return { messages: pruneResult.messages }
})
pi.registerCommand("acp", {
+39
View File
@@ -149,6 +149,45 @@ export function estimateMessagesTokens(messages: any[]): number {
return messages.reduce((sum, m) => sum + estimateMessageTokens(m), 0);
}
export function applyHermesMiddleLayout(
messages: any[],
state: DcpState,
): ApplyPruningResult {
const layout = state.activeHermesLayout;
if (!layout) return { messages };
const summaryIdx = messages.findIndex((msg) => msg?.role === "compactionSummary");
if (summaryIdx < 0) return { messages };
const summaryMessage = messages[summaryIdx];
const rawMessages = messages.filter((_, idx) => idx !== summaryIdx);
const compactedMessageCount = Math.min(layout.compactedMessageCount, rawMessages.length);
const headEnd = Math.min(layout.headMessageCount, compactedMessageCount);
const tailStart = Math.max(headEnd, compactedMessageCount - layout.tailMessageCount);
const shapedMessages = [
...rawMessages.slice(0, headEnd),
summaryMessage,
...rawMessages.slice(tailStart),
];
const originalTokens = estimateMessagesTokens(messages);
const shapedTokens = estimateMessagesTokens(shapedMessages);
const saved = originalTokens - shapedTokens;
const delta = saved >= 0
? `saved ~${saved.toLocaleString()} tokens`
: `added ~${Math.abs(saved).toLocaleString()} tokens`;
return {
messages: shapedMessages,
outcome: {
kind: "compressed",
message: `Hermes middle layout: ${messages.length} -> ${shapedMessages.length} messages, ${delta}`,
tokensSaved: saved,
},
};
}
function alignBoundaryForward(messages: any[], idx: number): number {
while (idx < messages.length && isToolResultMessage(messages[idx])) {
idx++;
+14
View File
@@ -19,6 +19,18 @@ export interface DcpState {
compressionCount: number
forceCompressNext?: boolean
lastCompressionStatus?: string | null
activeHermesLayout?: HermesMiddleLayout | null
}
export interface HermesMiddleLayout {
kind: "hermes-middle"
headMessageCount: number
tailMessageCount: number
compactedMessageCount: number
originalFirstKeptEntryId: string
expandedFirstKeptEntryId: string
estimatedTokensAfter: number
estimatedTokensSaved: number
}
export function createState(): DcpState {
@@ -32,6 +44,7 @@ export function createState(): DcpState {
compressionCount: 0,
forceCompressNext: false,
lastCompressionStatus: null,
activeHermesLayout: null,
}
}
@@ -45,6 +58,7 @@ export function resetState(state: DcpState): void {
state.compressionCount = 0
state.forceCompressNext = false
state.lastCompressionStatus = null
state.activeHermesLayout = null
}
function sortObjectKeys(value: unknown): unknown {