mirror of
https://github.com/wassname/pi-auto-compressor.git
synced 2026-06-27 15:16:08 +08:00
fix: preserve hermes head and tail layout
This commit is contained in:
@@ -4,7 +4,7 @@
|
|||||||
Prevent pi-auto-compressor from removing valid Pi tool results from the model context.
|
Prevent pi-auto-compressor from removing valid Pi tool results from the model context.
|
||||||
|
|
||||||
## Scope
|
## Scope
|
||||||
In: Pi native assistant tool-call content blocks, tool-result pairing, token estimates, summary serialization, protected-tail tool sweeping, Hermes compaction override status.
|
In: Pi native assistant tool-call content blocks, tool-result pairing, token estimates, summary serialization, protected-tail tool sweeping, Hermes compaction override status, Hermes head+middle+tail request layout.
|
||||||
Out: Changing compression thresholds or persistent session storage.
|
Out: Changing compression thresholds or persistent session storage.
|
||||||
|
|
||||||
## Requirements
|
## Requirements
|
||||||
@@ -13,6 +13,7 @@ Out: Changing compression thresholds or persistent session storage.
|
|||||||
- R3: Keep legacy compatibility. Done means: existing OpenAI-style `tool_calls` fallback still works when present. VERIFY: helper handles both shapes.
|
- R3: Keep legacy compatibility. Done means: existing OpenAI-style `tool_calls` fallback still works when present. VERIFY: helper handles both shapes.
|
||||||
- R4: Manual Hermes compaction runs through Pi's compaction lifecycle. Done means: `/acp compress` refuses active/queued runs, calls `ctx.compact()`, and `session_before_compact` supplies a custom Hermes `CompactionResult` or cancels. VERIFY: TypeScript compiles and subagent review finds no blocker.
|
- R4: Manual Hermes compaction runs through Pi's compaction lifecycle. Done means: `/acp compress` refuses active/queued runs, calls `ctx.compact()`, and `session_before_compact` supplies a custom Hermes `CompactionResult` or cancels. VERIFY: TypeScript compiles and subagent review finds no blocker.
|
||||||
- R5: Tool sweeping is simple and tail-safe. Done means: old, large tool outputs outside the protected tail are tombstoned while recent tail outputs are preserved. VERIFY: focused scripts show an old 250-char result is swept and a tail 250-char result is kept.
|
- R5: Tool sweeping is simple and tail-safe. Done means: old, large tool outputs outside the protected tail are tombstoned while recent tail outputs are preserved. VERIFY: focused scripts show an old 250-char result is swept and a tail 250-char result is kept.
|
||||||
|
- R6: Manual Hermes compaction is actual middle compaction. Done means: Pi's natural compaction cut still chooses the tail, but the extension keeps raw head available and the context hook sends `head + summary + tail` to the model. VERIFY: focused script shows a `compactionSummary` moves between raw head and raw tail while middle raw messages are removed.
|
||||||
|
|
||||||
## Tasks
|
## Tasks
|
||||||
- [x] T1 (R1-R3): Patch `pruner.ts`.
|
- [x] T1 (R1-R3): Patch `pruner.ts`.
|
||||||
@@ -35,6 +36,12 @@ Out: Changing compression thresholds or persistent session storage.
|
|||||||
- success: old output prints a tombstone; tail output length remains 250.
|
- success: old output prints a tombstone; tail output length remains 250.
|
||||||
- likely_fail: recent tool output is swept.
|
- likely_fail: recent tool output is swept.
|
||||||
- sneaky_fail: swept tool result is deleted instead of tombstoned.
|
- sneaky_fail: swept tool result is deleted instead of tombstoned.
|
||||||
|
- [x] T5 (R6): Shape compacted request context as Hermes middle.
|
||||||
|
- steps: store compaction layout metadata, expand `firstKeptEntryId` to the first context entry, reshape `context` messages to head/summary/tail before pruning.
|
||||||
|
- verify: `npx tsc --noEmit`; focused script calling `applyHermesMiddleLayout()`.
|
||||||
|
- success: output roles are `user, user, compactionSummary, user, user` for a 6-message compacted prefix with head=2/tail=2.
|
||||||
|
- likely_fail: summary stays first, proving plain Pi `summary + tail`.
|
||||||
|
- sneaky_fail: middle is summarized but new post-compaction messages are dropped; script includes extra messages after compacted prefix to catch this.
|
||||||
|
|
||||||
## Log
|
## Log
|
||||||
- Pi messages use assistant `content` blocks with `type: "toolCall"`; they do not use `msg.tool_calls` as the primary shape.
|
- Pi messages use assistant `content` blocks with `type: "toolCall"`; they do not use `msg.tool_calls` as the primary shape.
|
||||||
@@ -44,3 +51,5 @@ Out: Changing compression thresholds or persistent session storage.
|
|||||||
- The compaction override cancels on no model, auth failure, thrown errors, empty summaries, or non-`stop` summary responses; it does not intentionally fall back to Pi default compaction.
|
- The compaction override cancels on no model, auth failure, thrown errors, empty summaries, or non-`stop` summary responses; it does not intentionally fall back to Pi default compaction.
|
||||||
- Tool sweep script output: old 250-char `bash` result becomes `[Tool output swept: ...]`; protected-tail 250-char result remains length 250.
|
- Tool sweep script output: old 250-char `bash` result becomes `[Tool output swept: ...]`; protected-tail 250-char result remains length 250.
|
||||||
- Subagent review found and fixes addressed: reject truncated/error summary responses, avoid tombstoning currently protected-tail messages even when ID was previously marked, and wrap compaction hook with cancel-on-error.
|
- Subagent review found and fixes addressed: reject truncated/error summary responses, avoid tombstoning currently protected-tail messages even when ID was previously marked, and wrap compaction hook with cancel-on-error.
|
||||||
|
- Pi `CompactionResult` only persists `summary`, `firstKeptEntryId`, and `tokensBefore`. To get Hermes `head + middle summary + tail`, use Pi's natural `preparation.firstKeptEntryId` only to count the tail, persist layout metadata in `details`, set the actual saved `firstKeptEntryId` to the first context entry, and reshape the model request in the `context` hook.
|
||||||
|
- Hermes layout script output: `head-1|head-2|summary|tail-1|tail-2|after-new` and message count `6`.
|
||||||
|
|||||||
@@ -4,8 +4,44 @@ import {
|
|||||||
createState,
|
createState,
|
||||||
resetState,
|
resetState,
|
||||||
createInputFingerprint,
|
createInputFingerprint,
|
||||||
|
type HermesMiddleLayout,
|
||||||
} from "./state.js"
|
} from "./state.js"
|
||||||
import { applyPruning, generateHermesSummary } from "./pruner.js"
|
import {
|
||||||
|
applyHermesMiddleLayout,
|
||||||
|
applyPruning,
|
||||||
|
estimateMessagesTokens,
|
||||||
|
generateHermesSummary,
|
||||||
|
} from "./pruner.js"
|
||||||
|
import { AUTO_COMPRESS_CONFIG } from "./config.js"
|
||||||
|
|
||||||
|
function isContextEntry(entry: any): boolean {
|
||||||
|
if (entry?.type === "message" || entry?.type === "custom_message") return true
|
||||||
|
return entry?.type === "branch_summary" && Boolean(entry.summary)
|
||||||
|
}
|
||||||
|
|
||||||
|
function findFirstContextEntryId(entries: any[]): string | null {
|
||||||
|
return entries.find(isContextEntry)?.id ?? entries[0]?.id ?? null
|
||||||
|
}
|
||||||
|
|
||||||
|
function countContextEntries(entries: any[]): number {
|
||||||
|
return entries.filter(isContextEntry).length
|
||||||
|
}
|
||||||
|
|
||||||
|
function countContextEntriesFrom(entries: any[], firstEntryId: string): number {
|
||||||
|
const start = entries.findIndex((entry) => entry.id === firstEntryId)
|
||||||
|
if (start < 0) return 0
|
||||||
|
return countContextEntries(entries.slice(start))
|
||||||
|
}
|
||||||
|
|
||||||
|
function latestHermesLayout(entries: any[]): HermesMiddleLayout | null {
|
||||||
|
for (let i = entries.length - 1; i >= 0; i--) {
|
||||||
|
const entry = entries[i]
|
||||||
|
if (entry?.type !== "compaction") continue
|
||||||
|
const details = entry.details
|
||||||
|
return details?.kind === "hermes-middle" ? details as HermesMiddleLayout : null
|
||||||
|
}
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
|
||||||
export default function (pi: ExtensionAPI) {
|
export default function (pi: ExtensionAPI) {
|
||||||
const config = loadConfig(process.cwd())
|
const config = loadConfig(process.cwd())
|
||||||
@@ -58,7 +94,8 @@ export default function (pi: ExtensionAPI) {
|
|||||||
|
|
||||||
pi.on("session_start", async (event, ctx) => {
|
pi.on("session_start", async (event, ctx) => {
|
||||||
resetState(state)
|
resetState(state)
|
||||||
for (const entry of ctx.sessionManager.getBranch()) {
|
const branch = ctx.sessionManager.getBranch()
|
||||||
|
for (const entry of branch) {
|
||||||
if (entry.type === "custom" && entry.customType === "dcp-state") {
|
if (entry.type === "custom" && entry.customType === "dcp-state") {
|
||||||
const data = entry.data as any
|
const data = entry.data as any
|
||||||
if (data?.previousSummary) state.previousSummary = data.previousSummary
|
if (data?.previousSummary) state.previousSummary = data.previousSummary
|
||||||
@@ -66,8 +103,10 @@ export default function (pi: ExtensionAPI) {
|
|||||||
if (data?.tokensSaved) state.tokensSaved = data.tokensSaved
|
if (data?.tokensSaved) state.tokensSaved = data.tokensSaved
|
||||||
if (data?.prunedToolIds) state.prunedToolIds = new Set(data.prunedToolIds)
|
if (data?.prunedToolIds) state.prunedToolIds = new Set(data.prunedToolIds)
|
||||||
if (data?.lastCompressionStatus) state.lastCompressionStatus = data.lastCompressionStatus
|
if (data?.lastCompressionStatus) state.lastCompressionStatus = data.lastCompressionStatus
|
||||||
|
if (data?.activeHermesLayout) state.activeHermesLayout = data.activeHermesLayout
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
state.activeHermesLayout = latestHermesLayout(branch)
|
||||||
})
|
})
|
||||||
|
|
||||||
pi.on("session_shutdown", async (_event, _ctx) => {
|
pi.on("session_shutdown", async (_event, _ctx) => {
|
||||||
@@ -77,6 +116,7 @@ export default function (pi: ExtensionAPI) {
|
|||||||
tokensSaved: state.tokensSaved,
|
tokensSaved: state.tokensSaved,
|
||||||
prunedToolIds: Array.from(state.prunedToolIds),
|
prunedToolIds: Array.from(state.prunedToolIds),
|
||||||
lastCompressionStatus: state.lastCompressionStatus,
|
lastCompressionStatus: state.lastCompressionStatus,
|
||||||
|
activeHermesLayout: state.activeHermesLayout,
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
@@ -99,13 +139,45 @@ export default function (pi: ExtensionAPI) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const { preparation, signal } = event
|
const { preparation, signal } = event
|
||||||
const messagesToSummarize = [
|
const firstContextEntryId = findFirstContextEntryId(event.branchEntries)
|
||||||
|
if (!firstContextEntryId) {
|
||||||
|
const message = "Hermes compaction cancelled: no context entries to keep."
|
||||||
|
state.lastCompressionStatus = message
|
||||||
|
ctx.ui.notify(message, "warning")
|
||||||
|
return { cancel: true }
|
||||||
|
}
|
||||||
|
|
||||||
|
const compactedMessageCount = countContextEntries(event.branchEntries)
|
||||||
|
const tailMessageCount = countContextEntriesFrom(
|
||||||
|
event.branchEntries,
|
||||||
|
preparation.firstKeptEntryId,
|
||||||
|
)
|
||||||
|
const headMessageCount = Math.min(
|
||||||
|
AUTO_COMPRESS_CONFIG.protectFirstN,
|
||||||
|
compactedMessageCount,
|
||||||
|
)
|
||||||
|
const allMessagesBeforeTail = [
|
||||||
...preparation.messagesToSummarize,
|
...preparation.messagesToSummarize,
|
||||||
...preparation.turnPrefixMessages,
|
...preparation.turnPrefixMessages,
|
||||||
]
|
]
|
||||||
|
const messagesToSummarize = allMessagesBeforeTail.slice(headMessageCount)
|
||||||
|
|
||||||
|
if (tailMessageCount <= 0) {
|
||||||
|
const message = "Hermes compaction cancelled: Pi did not identify a tail to keep."
|
||||||
|
state.lastCompressionStatus = message
|
||||||
|
ctx.ui.notify(message, "warning")
|
||||||
|
return { cancel: true }
|
||||||
|
}
|
||||||
|
|
||||||
|
if (messagesToSummarize.length <= 0) {
|
||||||
|
const message = "Hermes compaction cancelled: no middle messages to summarize."
|
||||||
|
state.lastCompressionStatus = message
|
||||||
|
ctx.ui.notify(message, "warning")
|
||||||
|
return { cancel: true }
|
||||||
|
}
|
||||||
|
|
||||||
ctx.ui.notify(
|
ctx.ui.notify(
|
||||||
`Hermes compaction: summarizing ${messagesToSummarize.length} messages (${preparation.tokensBefore.toLocaleString()} tokens)...`,
|
`Hermes compaction: keeping ${headMessageCount} head messages and ${tailMessageCount} Pi-tail messages; summarizing ${messagesToSummarize.length} middle messages...`,
|
||||||
"info",
|
"info",
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -132,16 +204,32 @@ export default function (pi: ExtensionAPI) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
state.previousSummary = result.summary
|
state.previousSummary = result.summary
|
||||||
|
const summaryTokens = estimateMessagesTokens([
|
||||||
|
{ role: "user", content: [{ type: "text", text: result.summary }] },
|
||||||
|
])
|
||||||
|
const estimatedTokensSaved = estimateMessagesTokens(messagesToSummarize) - summaryTokens
|
||||||
|
const estimatedTokensAfter = preparation.tokensBefore - estimatedTokensSaved
|
||||||
|
const layout: HermesMiddleLayout = {
|
||||||
|
kind: "hermes-middle",
|
||||||
|
headMessageCount,
|
||||||
|
tailMessageCount,
|
||||||
|
compactedMessageCount,
|
||||||
|
originalFirstKeptEntryId: preparation.firstKeptEntryId,
|
||||||
|
expandedFirstKeptEntryId: firstContextEntryId,
|
||||||
|
estimatedTokensAfter,
|
||||||
|
estimatedTokensSaved,
|
||||||
|
}
|
||||||
|
state.activeHermesLayout = layout
|
||||||
state.lastCompressionStatus =
|
state.lastCompressionStatus =
|
||||||
`Hermes compaction ready: summarized ${messagesToSummarize.length} messages (${preparation.tokensBefore.toLocaleString()} tokens)`
|
`Hermes compaction ready: head ${headMessageCount}, middle ${messagesToSummarize.length}, tail ${tailMessageCount}, estimated saved ~${estimatedTokensSaved.toLocaleString()} tokens`
|
||||||
|
|
||||||
return {
|
return {
|
||||||
compaction: {
|
compaction: {
|
||||||
summary: result.summary,
|
summary: result.summary,
|
||||||
firstKeptEntryId: preparation.firstKeptEntryId,
|
firstKeptEntryId: firstContextEntryId,
|
||||||
tokensBefore: preparation.tokensBefore,
|
tokensBefore: preparation.tokensBefore,
|
||||||
details: {
|
details: {
|
||||||
kind: "hermes-middle",
|
...layout,
|
||||||
sweptToolOutputs: state.prunedToolIds.size,
|
sweptToolOutputs: state.prunedToolIds.size,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@@ -161,17 +249,23 @@ export default function (pi: ExtensionAPI) {
|
|||||||
pi.on("session_compact", async (event, ctx) => {
|
pi.on("session_compact", async (event, ctx) => {
|
||||||
if (event.fromExtension) {
|
if (event.fromExtension) {
|
||||||
state.compressionCount++
|
state.compressionCount++
|
||||||
|
if (state.activeHermesLayout) {
|
||||||
|
state.tokensSaved += state.activeHermesLayout.estimatedTokensSaved
|
||||||
|
}
|
||||||
state.lastCompressionStatus = "Hermes compaction completed"
|
state.lastCompressionStatus = "Hermes compaction completed"
|
||||||
if (ctx.hasUI) ctx.ui.notify("Hermes compaction completed", "info")
|
if (ctx.hasUI) ctx.ui.notify("Hermes compaction completed", "info")
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
pi.on("context", async (event, ctx) => {
|
pi.on("context", async (event, ctx) => {
|
||||||
const result = await applyPruning(event.messages, state, config)
|
const layoutResult = applyHermesMiddleLayout(event.messages, state)
|
||||||
if (result.outcome) {
|
const pruneResult = await applyPruning(layoutResult.messages, state, config)
|
||||||
state.lastCompressionStatus = result.outcome.message
|
if (pruneResult.outcome) {
|
||||||
|
state.lastCompressionStatus = pruneResult.outcome.message
|
||||||
|
} else if (layoutResult.outcome) {
|
||||||
|
state.lastCompressionStatus = layoutResult.outcome.message
|
||||||
}
|
}
|
||||||
return { messages: result.messages }
|
return { messages: pruneResult.messages }
|
||||||
})
|
})
|
||||||
|
|
||||||
pi.registerCommand("acp", {
|
pi.registerCommand("acp", {
|
||||||
|
|||||||
@@ -149,6 +149,45 @@ export function estimateMessagesTokens(messages: any[]): number {
|
|||||||
return messages.reduce((sum, m) => sum + estimateMessageTokens(m), 0);
|
return messages.reduce((sum, m) => sum + estimateMessageTokens(m), 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function applyHermesMiddleLayout(
|
||||||
|
messages: any[],
|
||||||
|
state: DcpState,
|
||||||
|
): ApplyPruningResult {
|
||||||
|
const layout = state.activeHermesLayout;
|
||||||
|
if (!layout) return { messages };
|
||||||
|
|
||||||
|
const summaryIdx = messages.findIndex((msg) => msg?.role === "compactionSummary");
|
||||||
|
if (summaryIdx < 0) return { messages };
|
||||||
|
|
||||||
|
const summaryMessage = messages[summaryIdx];
|
||||||
|
const rawMessages = messages.filter((_, idx) => idx !== summaryIdx);
|
||||||
|
const compactedMessageCount = Math.min(layout.compactedMessageCount, rawMessages.length);
|
||||||
|
const headEnd = Math.min(layout.headMessageCount, compactedMessageCount);
|
||||||
|
const tailStart = Math.max(headEnd, compactedMessageCount - layout.tailMessageCount);
|
||||||
|
|
||||||
|
const shapedMessages = [
|
||||||
|
...rawMessages.slice(0, headEnd),
|
||||||
|
summaryMessage,
|
||||||
|
...rawMessages.slice(tailStart),
|
||||||
|
];
|
||||||
|
|
||||||
|
const originalTokens = estimateMessagesTokens(messages);
|
||||||
|
const shapedTokens = estimateMessagesTokens(shapedMessages);
|
||||||
|
const saved = originalTokens - shapedTokens;
|
||||||
|
const delta = saved >= 0
|
||||||
|
? `saved ~${saved.toLocaleString()} tokens`
|
||||||
|
: `added ~${Math.abs(saved).toLocaleString()} tokens`;
|
||||||
|
|
||||||
|
return {
|
||||||
|
messages: shapedMessages,
|
||||||
|
outcome: {
|
||||||
|
kind: "compressed",
|
||||||
|
message: `Hermes middle layout: ${messages.length} -> ${shapedMessages.length} messages, ${delta}`,
|
||||||
|
tokensSaved: saved,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
function alignBoundaryForward(messages: any[], idx: number): number {
|
function alignBoundaryForward(messages: any[], idx: number): number {
|
||||||
while (idx < messages.length && isToolResultMessage(messages[idx])) {
|
while (idx < messages.length && isToolResultMessage(messages[idx])) {
|
||||||
idx++;
|
idx++;
|
||||||
|
|||||||
@@ -19,6 +19,18 @@ export interface DcpState {
|
|||||||
compressionCount: number
|
compressionCount: number
|
||||||
forceCompressNext?: boolean
|
forceCompressNext?: boolean
|
||||||
lastCompressionStatus?: string | null
|
lastCompressionStatus?: string | null
|
||||||
|
activeHermesLayout?: HermesMiddleLayout | null
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface HermesMiddleLayout {
|
||||||
|
kind: "hermes-middle"
|
||||||
|
headMessageCount: number
|
||||||
|
tailMessageCount: number
|
||||||
|
compactedMessageCount: number
|
||||||
|
originalFirstKeptEntryId: string
|
||||||
|
expandedFirstKeptEntryId: string
|
||||||
|
estimatedTokensAfter: number
|
||||||
|
estimatedTokensSaved: number
|
||||||
}
|
}
|
||||||
|
|
||||||
export function createState(): DcpState {
|
export function createState(): DcpState {
|
||||||
@@ -32,6 +44,7 @@ export function createState(): DcpState {
|
|||||||
compressionCount: 0,
|
compressionCount: 0,
|
||||||
forceCompressNext: false,
|
forceCompressNext: false,
|
||||||
lastCompressionStatus: null,
|
lastCompressionStatus: null,
|
||||||
|
activeHermesLayout: null,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -45,6 +58,7 @@ export function resetState(state: DcpState): void {
|
|||||||
state.compressionCount = 0
|
state.compressionCount = 0
|
||||||
state.forceCompressNext = false
|
state.forceCompressNext = false
|
||||||
state.lastCompressionStatus = null
|
state.lastCompressionStatus = null
|
||||||
|
state.activeHermesLayout = null
|
||||||
}
|
}
|
||||||
|
|
||||||
function sortObjectKeys(value: unknown): unknown {
|
function sortObjectKeys(value: unknown): unknown {
|
||||||
|
|||||||
Reference in New Issue
Block a user