diff --git a/package.json b/package.json index c6cd363..51e5115 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@complexthings/pi-dynamic-context-pruning", - "version": "1.0.0", + "version": "1.0.3", "description": "PI coding agent extension — Dynamic Context Pruning (DCP)", "type": "module", "pi": { diff --git a/pruner.test.ts b/pruner.test.ts new file mode 100644 index 0000000..c67c1b3 --- /dev/null +++ b/pruner.test.ts @@ -0,0 +1,284 @@ +/** + * Minimal self-contained tests for the applyCompressionBlocks logic inside + * applyPruning. No test framework — just assert + console.log. + * + * Run with: bun run pruner.test.ts + */ + +import assert from "assert"; +import { applyPruning } from "./pruner.js"; +import type { DcpState } from "./state.js"; +import type { DcpConfig } from "./config.js"; + +// --------------------------------------------------------------------------- +// Minimal factories +// --------------------------------------------------------------------------- + +function makeConfig(): DcpConfig { + return { + enabled: true, + debug: false, + manualMode: { enabled: false, automaticStrategies: false }, + compress: { + maxContextPercent: 0.8, + minContextPercent: 0.4, + nudgeFrequency: 5, + iterationNudgeThreshold: 15, + nudgeForce: "soft", + protectedTools: [], + protectUserMessages: false, + }, + strategies: { + deduplication: { enabled: false, protectedTools: [] }, + purgeErrors: { enabled: false, turns: 4, protectedTools: [] }, + }, + protectedFilePatterns: [], + pruneNotification: "off", + }; +} + +function makeState(compressionBlocks: DcpState["compressionBlocks"] = []): DcpState { + return { + toolCalls: new Map(), + prunedToolIds: new Set(), + compressionBlocks, + nextBlockId: 1, + messageIdSnapshot: new Map(), + currentTurn: 0, + tokensSaved: 0, + totalPruneCount: 0, + manualMode: false, + nudgeCounter: 0, + lastNudgeTurn: -1, + }; +} + +// Four-message sequence that exercises the bug: +// user(1000) → assistant+toolCall(2000) → toolResult(3000) → user(4000) +function makeMessages(): any[] { + return [ + { + role: "user", + content: [{ type: "text", text: "please read the file" }], + timestamp: 1000, + }, + { + role: "assistant", + content: [{ type: "toolCall", id: "toolu_abc", name: "read", arguments: {} }], + timestamp: 2000, + }, + { + role: "toolResult", + toolCallId: "toolu_abc", + toolName: "read", + content: [{ type: "text", text: "file content" }], + isError: false, + timestamp: 3000, + }, + { + role: "user", + content: [{ type: "text", text: "thanks" }], + timestamp: 4000, + }, + ]; +} + +// --------------------------------------------------------------------------- +// Helper: find the first orphaned tool_use in a result array +// +// An assistant message is "orphaned" if it contains a toolCall block whose +// id does NOT have a matching toolResult as the very next message. +// --------------------------------------------------------------------------- +function findOrphanedToolUse(result: any[]): string | null { + for (let i = 0; i < result.length; i++) { + const msg = result[i]; + if (msg.role !== "assistant") continue; + + const content: any[] = Array.isArray(msg.content) ? msg.content : []; + const toolCallBlocks = content.filter((b: any) => b.type === "toolCall"); + if (toolCallBlocks.length === 0) continue; + + for (const tc of toolCallBlocks) { + const next = result[i + 1]; + const nextIsMatchingResult = + next && + next.role === "toolResult" && + next.toolCallId === tc.id; + + if (!nextIsMatchingResult) { + return ( + `assistant at index ${i} (ts=${msg.timestamp}) has toolCall id="${tc.id}" ` + + `but next message is: ${next ? `role="${next.role}" toolCallId="${next.toolCallId}"` : ""}` + ); + } + } + } + return null; // no orphan found +} + +// --------------------------------------------------------------------------- +// Test 1 — BUG SCENARIO +// +// Compression block covers ONLY the toolResult (startTimestamp=3000, +// endTimestamp=3000). Without the backward-expansion fix, the assistant +// message with the toolCall block survives but its toolResult is gone → +// orphaned tool_use. With the fix the assistant is pulled into the range +// and both messages are removed together. +// --------------------------------------------------------------------------- +{ + console.log("TEST 1: compression block covers only the toolResult (bug scenario)"); + + const messages = makeMessages(); + const state = makeState([ + { + id: 1, + topic: "file read", + summary: "The file was read and contained some data.", + startTimestamp: 3000, + endTimestamp: 3000, + anchorTimestamp: 4000, + active: true, + summaryTokenEstimate: 15, + createdAt: Date.now(), + }, + ]); + const config = makeConfig(); + + const result = applyPruning(messages, state, config); + + console.log(" Result messages (role, timestamp):"); + for (const m of result) { + const ts = m.timestamp; + const preview = + typeof m.content === "string" + ? m.content.slice(0, 60) + : Array.isArray(m.content) + ? m.content.map((b: any) => b.text ?? b.type ?? "?").join(" | ").slice(0, 60) + : "?"; + console.log(` role="${m.role}" ts=${ts} content="${preview}"`); + } + + // 1a. No orphaned tool_use + const orphan = findOrphanedToolUse(result); + assert.strictEqual( + orphan, + null, + `FAIL — orphaned tool_use detected: ${orphan}` + ); + console.log(" PASS: no orphaned tool_use in result"); + + // 1b. The assistant message at ts=2000 must NOT survive without its partner + const assistantInResult = result.find( + (m) => m.role === "assistant" && m.timestamp === 2000 + ); + if (assistantInResult) { + // If it survived, its immediate successor must be the matching toolResult + const idx = result.indexOf(assistantInResult); + const successor = result[idx + 1]; + assert.ok( + successor && successor.role === "toolResult" && successor.toolCallId === "toolu_abc", + `FAIL — assistant(ts=2000) survived but successor is not the matching toolResult ` + + `(got role="${successor?.role}" toolCallId="${successor?.toolCallId}")` + ); + console.log(" PASS: assistant survived with its toolResult partner intact"); + } else { + // The preferred outcome: both removed together + const toolResultInResult = result.find( + (m) => m.role === "toolResult" && m.toolCallId === "toolu_abc" + ); + assert.strictEqual( + toolResultInResult, + undefined, + "FAIL — assistant removed but orphaned toolResult still present" + ); + console.log(" PASS: both assistant and toolResult removed together"); + } + + console.log("TEST 1 PASSED\n"); +} + +// --------------------------------------------------------------------------- +// Test 2 — PASSING SCENARIO +// +// Compression block covers BOTH the assistant and the toolResult +// (startTimestamp=2000, endTimestamp=3000). Both messages must be removed +// and no orphaned tool_use must remain. +// --------------------------------------------------------------------------- +{ + console.log("TEST 2: compression block covers both assistant and toolResult (passing scenario)"); + + const messages = makeMessages(); + const state = makeState([ + { + id: 1, + topic: "file read", + summary: "The file was read and contained some data.", + startTimestamp: 2000, + endTimestamp: 3000, + anchorTimestamp: 4000, + active: true, + summaryTokenEstimate: 15, + createdAt: Date.now(), + }, + ]); + const config = makeConfig(); + + const result = applyPruning(messages, state, config); + + console.log(" Result messages (role, timestamp):"); + for (const m of result) { + const ts = m.timestamp; + const preview = + typeof m.content === "string" + ? m.content.slice(0, 60) + : Array.isArray(m.content) + ? m.content.map((b: any) => b.text ?? b.type ?? "?").join(" | ").slice(0, 60) + : "?"; + console.log(` role="${m.role}" ts=${ts} content="${preview}"`); + } + + // 2a. No orphaned tool_use + const orphan = findOrphanedToolUse(result); + assert.strictEqual( + orphan, + null, + `FAIL — orphaned tool_use detected: ${orphan}` + ); + console.log(" PASS: no orphaned tool_use in result"); + + // 2b. The assistant at ts=2000 must be absent from the result + const assistantInResult = result.find( + (m) => m.role === "assistant" && m.timestamp === 2000 + ); + assert.strictEqual( + assistantInResult, + undefined, + `FAIL — assistant(ts=2000) should have been removed but is still present` + ); + console.log(" PASS: assistant(ts=2000) removed"); + + // 2c. The toolResult must also be absent + const toolResultInResult = result.find( + (m) => m.role === "toolResult" && m.toolCallId === "toolu_abc" + ); + assert.strictEqual( + toolResultInResult, + undefined, + `FAIL — toolResult(toolCallId="toolu_abc") should have been removed but is still present` + ); + console.log(" PASS: toolResult(toolu_abc) removed"); + + // 2d. A synthetic summary message should be present + const synthetic = result.find( + (m) => m.role === "user" && typeof m.content?.[0]?.text === "string" && m.content[0].text.includes("Compressed section") + ); + assert.ok( + synthetic, + "FAIL — expected a synthetic [Compressed section] user message in result" + ); + console.log(" PASS: synthetic summary message present"); + + console.log("TEST 2 PASSED\n"); +} + +console.log("All tests passed."); diff --git a/pruner.ts b/pruner.ts index 285b4f3..3ef4ac5 100644 --- a/pruner.ts +++ b/pruner.ts @@ -278,12 +278,35 @@ function injectMessageIds(messages: any[], state: DcpState): void { } else if (Array.isArray(msg.content)) { msg.content = [...msg.content, { type: "text", text: idTag }]; } - } else if (role === "assistant" || role === "toolResult" || role === "bashExecution") { + } else if (role === "toolResult" || role === "bashExecution") { if (Array.isArray(msg.content)) { msg.content = [...msg.content, { type: "text", text: idTag }]; } else if (typeof msg.content === "string") { msg.content = msg.content + idTag; } + } else if (role === "assistant") { + if (Array.isArray(msg.content)) { + // Insert the ID tag before any tool_use (toolCall) blocks. + // Anthropic requires: thinking → text → tool_use. + // Appending after tool_use blocks violates that constraint. + const firstToolCallIdx = msg.content.findIndex( + (b: any) => b.type === "toolCall", + ); + const idBlock = { type: "text", text: idTag }; + if (firstToolCallIdx === -1) { + // No tool_use blocks — append as usual + msg.content = [...msg.content, idBlock]; + } else { + // Insert immediately before the first tool_use block + msg.content = [ + ...msg.content.slice(0, firstToolCallIdx), + idBlock, + ...msg.content.slice(firstToolCallIdx), + ]; + } + } else if (typeof msg.content === "string") { + msg.content = msg.content + idTag; + } } if (msg.timestamp !== undefined) {