mirror of
https://github.com/wassname/pi-auto-compressor.git
synced 2026-06-27 14:00:32 +08:00
fix: preserve hermes head and tail layout
This commit is contained in:
@@ -4,7 +4,7 @@
|
||||
Prevent pi-auto-compressor from removing valid Pi tool results from the model context.
|
||||
|
||||
## Scope
|
||||
In: Pi native assistant tool-call content blocks, tool-result pairing, token estimates, summary serialization, protected-tail tool sweeping, Hermes compaction override status.
|
||||
In: Pi native assistant tool-call content blocks, tool-result pairing, token estimates, summary serialization, protected-tail tool sweeping, Hermes compaction override status, Hermes head+middle+tail request layout.
|
||||
Out: Changing compression thresholds or persistent session storage.
|
||||
|
||||
## Requirements
|
||||
@@ -13,6 +13,7 @@ Out: Changing compression thresholds or persistent session storage.
|
||||
- R3: Keep legacy compatibility. Done means: existing OpenAI-style `tool_calls` fallback still works when present. VERIFY: helper handles both shapes.
|
||||
- R4: Manual Hermes compaction runs through Pi's compaction lifecycle. Done means: `/acp compress` refuses active/queued runs, calls `ctx.compact()`, and `session_before_compact` supplies a custom Hermes `CompactionResult` or cancels. VERIFY: TypeScript compiles and subagent review finds no blocker.
|
||||
- R5: Tool sweeping is simple and tail-safe. Done means: old, large tool outputs outside the protected tail are tombstoned while recent tail outputs are preserved. VERIFY: focused scripts show an old 250-char result is swept and a tail 250-char result is kept.
|
||||
- R6: Manual Hermes compaction is actual middle compaction. Done means: Pi's natural compaction cut still chooses the tail, but the extension keeps raw head available and the context hook sends `head + summary + tail` to the model. VERIFY: focused script shows a `compactionSummary` moves between raw head and raw tail while middle raw messages are removed.
|
||||
|
||||
## Tasks
|
||||
- [x] T1 (R1-R3): Patch `pruner.ts`.
|
||||
@@ -35,6 +36,12 @@ Out: Changing compression thresholds or persistent session storage.
|
||||
- success: old output prints a tombstone; tail output length remains 250.
|
||||
- likely_fail: recent tool output is swept.
|
||||
- sneaky_fail: swept tool result is deleted instead of tombstoned.
|
||||
- [x] T5 (R6): Shape compacted request context as Hermes middle.
|
||||
- steps: store compaction layout metadata, expand `firstKeptEntryId` to the first context entry, reshape `context` messages to head/summary/tail before pruning.
|
||||
- verify: `npx tsc --noEmit`; focused script calling `applyHermesMiddleLayout()`.
|
||||
- success: output roles are `user, user, compactionSummary, user, user` for a 6-message compacted prefix with head=2/tail=2.
|
||||
- likely_fail: summary stays first, proving plain Pi `summary + tail`.
|
||||
- sneaky_fail: middle is summarized but new post-compaction messages are dropped; script includes extra messages after compacted prefix to catch this.
|
||||
|
||||
## Log
|
||||
- Pi messages use assistant `content` blocks with `type: "toolCall"`; they do not use `msg.tool_calls` as the primary shape.
|
||||
@@ -44,3 +51,5 @@ Out: Changing compression thresholds or persistent session storage.
|
||||
- The compaction override cancels on no model, auth failure, thrown errors, empty summaries, or non-`stop` summary responses; it does not intentionally fall back to Pi default compaction.
|
||||
- Tool sweep script output: old 250-char `bash` result becomes `[Tool output swept: ...]`; protected-tail 250-char result remains length 250.
|
||||
- Subagent review found and fixes addressed: reject truncated/error summary responses, avoid tombstoning currently protected-tail messages even when ID was previously marked, and wrap compaction hook with cancel-on-error.
|
||||
- Pi `CompactionResult` only persists `summary`, `firstKeptEntryId`, and `tokensBefore`. To get Hermes `head + middle summary + tail`, use Pi's natural `preparation.firstKeptEntryId` only to count the tail, persist layout metadata in `details`, set the actual saved `firstKeptEntryId` to the first context entry, and reshape the model request in the `context` hook.
|
||||
- Hermes layout script output: `head-1|head-2|summary|tail-1|tail-2|after-new` and message count `6`.
|
||||
|
||||
@@ -4,8 +4,44 @@ import {
|
||||
createState,
|
||||
resetState,
|
||||
createInputFingerprint,
|
||||
type HermesMiddleLayout,
|
||||
} from "./state.js"
|
||||
import { applyPruning, generateHermesSummary } from "./pruner.js"
|
||||
import {
|
||||
applyHermesMiddleLayout,
|
||||
applyPruning,
|
||||
estimateMessagesTokens,
|
||||
generateHermesSummary,
|
||||
} from "./pruner.js"
|
||||
import { AUTO_COMPRESS_CONFIG } from "./config.js"
|
||||
|
||||
function isContextEntry(entry: any): boolean {
|
||||
if (entry?.type === "message" || entry?.type === "custom_message") return true
|
||||
return entry?.type === "branch_summary" && Boolean(entry.summary)
|
||||
}
|
||||
|
||||
function findFirstContextEntryId(entries: any[]): string | null {
|
||||
return entries.find(isContextEntry)?.id ?? entries[0]?.id ?? null
|
||||
}
|
||||
|
||||
function countContextEntries(entries: any[]): number {
|
||||
return entries.filter(isContextEntry).length
|
||||
}
|
||||
|
||||
function countContextEntriesFrom(entries: any[], firstEntryId: string): number {
|
||||
const start = entries.findIndex((entry) => entry.id === firstEntryId)
|
||||
if (start < 0) return 0
|
||||
return countContextEntries(entries.slice(start))
|
||||
}
|
||||
|
||||
function latestHermesLayout(entries: any[]): HermesMiddleLayout | null {
|
||||
for (let i = entries.length - 1; i >= 0; i--) {
|
||||
const entry = entries[i]
|
||||
if (entry?.type !== "compaction") continue
|
||||
const details = entry.details
|
||||
return details?.kind === "hermes-middle" ? details as HermesMiddleLayout : null
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
export default function (pi: ExtensionAPI) {
|
||||
const config = loadConfig(process.cwd())
|
||||
@@ -58,7 +94,8 @@ export default function (pi: ExtensionAPI) {
|
||||
|
||||
pi.on("session_start", async (event, ctx) => {
|
||||
resetState(state)
|
||||
for (const entry of ctx.sessionManager.getBranch()) {
|
||||
const branch = ctx.sessionManager.getBranch()
|
||||
for (const entry of branch) {
|
||||
if (entry.type === "custom" && entry.customType === "dcp-state") {
|
||||
const data = entry.data as any
|
||||
if (data?.previousSummary) state.previousSummary = data.previousSummary
|
||||
@@ -66,8 +103,10 @@ export default function (pi: ExtensionAPI) {
|
||||
if (data?.tokensSaved) state.tokensSaved = data.tokensSaved
|
||||
if (data?.prunedToolIds) state.prunedToolIds = new Set(data.prunedToolIds)
|
||||
if (data?.lastCompressionStatus) state.lastCompressionStatus = data.lastCompressionStatus
|
||||
if (data?.activeHermesLayout) state.activeHermesLayout = data.activeHermesLayout
|
||||
}
|
||||
}
|
||||
state.activeHermesLayout = latestHermesLayout(branch)
|
||||
})
|
||||
|
||||
pi.on("session_shutdown", async (_event, _ctx) => {
|
||||
@@ -77,6 +116,7 @@ export default function (pi: ExtensionAPI) {
|
||||
tokensSaved: state.tokensSaved,
|
||||
prunedToolIds: Array.from(state.prunedToolIds),
|
||||
lastCompressionStatus: state.lastCompressionStatus,
|
||||
activeHermesLayout: state.activeHermesLayout,
|
||||
})
|
||||
})
|
||||
|
||||
@@ -99,13 +139,45 @@ export default function (pi: ExtensionAPI) {
|
||||
}
|
||||
|
||||
const { preparation, signal } = event
|
||||
const messagesToSummarize = [
|
||||
const firstContextEntryId = findFirstContextEntryId(event.branchEntries)
|
||||
if (!firstContextEntryId) {
|
||||
const message = "Hermes compaction cancelled: no context entries to keep."
|
||||
state.lastCompressionStatus = message
|
||||
ctx.ui.notify(message, "warning")
|
||||
return { cancel: true }
|
||||
}
|
||||
|
||||
const compactedMessageCount = countContextEntries(event.branchEntries)
|
||||
const tailMessageCount = countContextEntriesFrom(
|
||||
event.branchEntries,
|
||||
preparation.firstKeptEntryId,
|
||||
)
|
||||
const headMessageCount = Math.min(
|
||||
AUTO_COMPRESS_CONFIG.protectFirstN,
|
||||
compactedMessageCount,
|
||||
)
|
||||
const allMessagesBeforeTail = [
|
||||
...preparation.messagesToSummarize,
|
||||
...preparation.turnPrefixMessages,
|
||||
]
|
||||
const messagesToSummarize = allMessagesBeforeTail.slice(headMessageCount)
|
||||
|
||||
if (tailMessageCount <= 0) {
|
||||
const message = "Hermes compaction cancelled: Pi did not identify a tail to keep."
|
||||
state.lastCompressionStatus = message
|
||||
ctx.ui.notify(message, "warning")
|
||||
return { cancel: true }
|
||||
}
|
||||
|
||||
if (messagesToSummarize.length <= 0) {
|
||||
const message = "Hermes compaction cancelled: no middle messages to summarize."
|
||||
state.lastCompressionStatus = message
|
||||
ctx.ui.notify(message, "warning")
|
||||
return { cancel: true }
|
||||
}
|
||||
|
||||
ctx.ui.notify(
|
||||
`Hermes compaction: summarizing ${messagesToSummarize.length} messages (${preparation.tokensBefore.toLocaleString()} tokens)...`,
|
||||
`Hermes compaction: keeping ${headMessageCount} head messages and ${tailMessageCount} Pi-tail messages; summarizing ${messagesToSummarize.length} middle messages...`,
|
||||
"info",
|
||||
)
|
||||
|
||||
@@ -132,16 +204,32 @@ export default function (pi: ExtensionAPI) {
|
||||
}
|
||||
|
||||
state.previousSummary = result.summary
|
||||
const summaryTokens = estimateMessagesTokens([
|
||||
{ role: "user", content: [{ type: "text", text: result.summary }] },
|
||||
])
|
||||
const estimatedTokensSaved = estimateMessagesTokens(messagesToSummarize) - summaryTokens
|
||||
const estimatedTokensAfter = preparation.tokensBefore - estimatedTokensSaved
|
||||
const layout: HermesMiddleLayout = {
|
||||
kind: "hermes-middle",
|
||||
headMessageCount,
|
||||
tailMessageCount,
|
||||
compactedMessageCount,
|
||||
originalFirstKeptEntryId: preparation.firstKeptEntryId,
|
||||
expandedFirstKeptEntryId: firstContextEntryId,
|
||||
estimatedTokensAfter,
|
||||
estimatedTokensSaved,
|
||||
}
|
||||
state.activeHermesLayout = layout
|
||||
state.lastCompressionStatus =
|
||||
`Hermes compaction ready: summarized ${messagesToSummarize.length} messages (${preparation.tokensBefore.toLocaleString()} tokens)`
|
||||
`Hermes compaction ready: head ${headMessageCount}, middle ${messagesToSummarize.length}, tail ${tailMessageCount}, estimated saved ~${estimatedTokensSaved.toLocaleString()} tokens`
|
||||
|
||||
return {
|
||||
compaction: {
|
||||
summary: result.summary,
|
||||
firstKeptEntryId: preparation.firstKeptEntryId,
|
||||
firstKeptEntryId: firstContextEntryId,
|
||||
tokensBefore: preparation.tokensBefore,
|
||||
details: {
|
||||
kind: "hermes-middle",
|
||||
...layout,
|
||||
sweptToolOutputs: state.prunedToolIds.size,
|
||||
},
|
||||
},
|
||||
@@ -161,17 +249,23 @@ export default function (pi: ExtensionAPI) {
|
||||
pi.on("session_compact", async (event, ctx) => {
|
||||
if (event.fromExtension) {
|
||||
state.compressionCount++
|
||||
if (state.activeHermesLayout) {
|
||||
state.tokensSaved += state.activeHermesLayout.estimatedTokensSaved
|
||||
}
|
||||
state.lastCompressionStatus = "Hermes compaction completed"
|
||||
if (ctx.hasUI) ctx.ui.notify("Hermes compaction completed", "info")
|
||||
}
|
||||
})
|
||||
|
||||
pi.on("context", async (event, ctx) => {
|
||||
const result = await applyPruning(event.messages, state, config)
|
||||
if (result.outcome) {
|
||||
state.lastCompressionStatus = result.outcome.message
|
||||
const layoutResult = applyHermesMiddleLayout(event.messages, state)
|
||||
const pruneResult = await applyPruning(layoutResult.messages, state, config)
|
||||
if (pruneResult.outcome) {
|
||||
state.lastCompressionStatus = pruneResult.outcome.message
|
||||
} else if (layoutResult.outcome) {
|
||||
state.lastCompressionStatus = layoutResult.outcome.message
|
||||
}
|
||||
return { messages: result.messages }
|
||||
return { messages: pruneResult.messages }
|
||||
})
|
||||
|
||||
pi.registerCommand("acp", {
|
||||
|
||||
@@ -149,6 +149,45 @@ export function estimateMessagesTokens(messages: any[]): number {
|
||||
return messages.reduce((sum, m) => sum + estimateMessageTokens(m), 0);
|
||||
}
|
||||
|
||||
export function applyHermesMiddleLayout(
|
||||
messages: any[],
|
||||
state: DcpState,
|
||||
): ApplyPruningResult {
|
||||
const layout = state.activeHermesLayout;
|
||||
if (!layout) return { messages };
|
||||
|
||||
const summaryIdx = messages.findIndex((msg) => msg?.role === "compactionSummary");
|
||||
if (summaryIdx < 0) return { messages };
|
||||
|
||||
const summaryMessage = messages[summaryIdx];
|
||||
const rawMessages = messages.filter((_, idx) => idx !== summaryIdx);
|
||||
const compactedMessageCount = Math.min(layout.compactedMessageCount, rawMessages.length);
|
||||
const headEnd = Math.min(layout.headMessageCount, compactedMessageCount);
|
||||
const tailStart = Math.max(headEnd, compactedMessageCount - layout.tailMessageCount);
|
||||
|
||||
const shapedMessages = [
|
||||
...rawMessages.slice(0, headEnd),
|
||||
summaryMessage,
|
||||
...rawMessages.slice(tailStart),
|
||||
];
|
||||
|
||||
const originalTokens = estimateMessagesTokens(messages);
|
||||
const shapedTokens = estimateMessagesTokens(shapedMessages);
|
||||
const saved = originalTokens - shapedTokens;
|
||||
const delta = saved >= 0
|
||||
? `saved ~${saved.toLocaleString()} tokens`
|
||||
: `added ~${Math.abs(saved).toLocaleString()} tokens`;
|
||||
|
||||
return {
|
||||
messages: shapedMessages,
|
||||
outcome: {
|
||||
kind: "compressed",
|
||||
message: `Hermes middle layout: ${messages.length} -> ${shapedMessages.length} messages, ${delta}`,
|
||||
tokensSaved: saved,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function alignBoundaryForward(messages: any[], idx: number): number {
|
||||
while (idx < messages.length && isToolResultMessage(messages[idx])) {
|
||||
idx++;
|
||||
|
||||
@@ -19,6 +19,18 @@ export interface DcpState {
|
||||
compressionCount: number
|
||||
forceCompressNext?: boolean
|
||||
lastCompressionStatus?: string | null
|
||||
activeHermesLayout?: HermesMiddleLayout | null
|
||||
}
|
||||
|
||||
export interface HermesMiddleLayout {
|
||||
kind: "hermes-middle"
|
||||
headMessageCount: number
|
||||
tailMessageCount: number
|
||||
compactedMessageCount: number
|
||||
originalFirstKeptEntryId: string
|
||||
expandedFirstKeptEntryId: string
|
||||
estimatedTokensAfter: number
|
||||
estimatedTokensSaved: number
|
||||
}
|
||||
|
||||
export function createState(): DcpState {
|
||||
@@ -32,6 +44,7 @@ export function createState(): DcpState {
|
||||
compressionCount: 0,
|
||||
forceCompressNext: false,
|
||||
lastCompressionStatus: null,
|
||||
activeHermesLayout: null,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -45,6 +58,7 @@ export function resetState(state: DcpState): void {
|
||||
state.compressionCount = 0
|
||||
state.forceCompressNext = false
|
||||
state.lastCompressionStatus = null
|
||||
state.activeHermesLayout = null
|
||||
}
|
||||
|
||||
function sortObjectKeys(value: unknown): unknown {
|
||||
|
||||
Reference in New Issue
Block a user