diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..f869201 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,43 @@ +# Changelog + +## [1.0.6] - 2026-04-09 + +### Fixed + +- **Orphaned tool_use/tool_result after compression** — Compression ranges that touched part of an assistant→toolResult group could leave orphaned `tool_use` or `tool_result` blocks, causing Anthropic API 400 errors (`unexpected tool_use_id found in tool_result blocks`). The backward and forward expansion logic now correctly skips PI-internal passthrough roles (`compaction`, `branch_summary`, `custom_message`) when scanning for paired messages, ensuring atomic removal of complete tool groups. +- **Content mutation across context events** — `applyPruning` now deep-clones message content instead of shallow-copying, preventing injected `dcp-id` blocks from accumulating on shared message objects across successive context events. + +### Added + +- **Post-compression repair function** — `repairOrphanedToolPairs` runs after all compression blocks are applied as a safety net. It removes orphaned `toolResult`/`bashExecution` messages whose `toolCallId` has no matching `toolCall` in any assistant message, and strips orphaned `toolCall` blocks from assistant messages whose results no longer exist. +- **New test cases** — Tests 5–9 covering passthrough role handling (backward and forward expansion), content mutation isolation, multi-block orphan repair, and direct orphan cleanup. + +## [1.0.5] - 2026-04-06 + +### Fixed + +- Prevent orphaned tool_use blocks from compression and harden autocomplete. + +## [1.0.4] - 2026-04-05 + +### Fixed + +- Tool crash on compression. + +## [1.0.3] - 2026-04-04 + +### Fixed + +- Various errors and issues. + +## [1.0.2] - 2026-04-03 + +### Changed + +- Added pi package details to package.json. + +## [1.0.1] - 2026-04-02 + +### Added + +- Initial release. diff --git a/README.md b/README.md index 4151e4c..83d147f 100644 --- a/README.md +++ b/README.md @@ -115,6 +115,10 @@ When the LLM calls the `compress` tool it provides one or more `{startId, endId, Message IDs (`m001`, `m042`, etc.) and block IDs (`b1`, `b3`) are injected into every message in the context so the LLM can reference exact boundaries. +### Atomic tool pair removal + +When a compression range touches any part of an assistant→toolResult group, DCP automatically expands the range to include the entire group. This prevents orphaned `tool_use` or `tool_result` blocks that would cause API validation errors. The expansion logic skips over PI-internal passthrough messages (`compaction`, `branch_summary`, `custom_message`) that may sit between an assistant and its tool results. A post-compression repair pass acts as a safety net to catch any orphaned pairs that the expansion heuristics miss. + ### Nudge types | Nudge | Condition | diff --git a/package.json b/package.json index 7fc6f89..3aff46f 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@complexthings/pi-dynamic-context-pruning", - "version": "1.0.5", + "version": "1.0.6", "description": "PI coding agent extension — Dynamic Context Pruning (DCP)", "keywords": [ "pi-package", diff --git a/pruner.test.ts b/pruner.test.ts index 2f9f601..321ba90 100644 --- a/pruner.test.ts +++ b/pruner.test.ts @@ -423,4 +423,289 @@ function findOrphanedToolUse(result: any[]): string | null { console.log("TEST 4 PASSED\n"); } +// --------------------------------------------------------------------------- +// Test 5 — PASSTHROUGH ROLE BETWEEN ASSISTANT AND TOOLRESULT (BACKWARD) +// +// A `compaction` message sits between the assistant and the toolResult. +// The compression range covers only the toolResult. Backward expansion +// must skip the compaction to find the assistant and include it atomically. +// +// Sequence: +// user(1000) → assistant(2000, toolCall_X) → compaction(2500) +// → toolResult_X(3000) → user(4000) +// Compression block: [3000..3000] +// Expected: assistant + toolResult removed together (no orphans) +// --------------------------------------------------------------------------- +{ + console.log("TEST 5: passthrough role between assistant and toolResult (backward expansion)"); + + const messages: any[] = [ + { role: "user", content: [{ type: "text", text: "read file" }], timestamp: 1000 }, + { role: "assistant", content: [{ type: "toolCall", id: "toolu_X", name: "read", arguments: {} }], timestamp: 2000 }, + { role: "compaction", content: [{ type: "text", text: "compaction summary" }], timestamp: 2500 }, + { role: "toolResult", toolCallId: "toolu_X", toolName: "read", isError: false, content: [{ type: "text", text: "file data" }], timestamp: 3000 }, + { role: "user", content: [{ type: "text", text: "thanks" }], timestamp: 4000 }, + ]; + + const state = makeState([ + { + id: 1, + topic: "file read", + summary: "File was read successfully.", + startTimestamp: 3000, + endTimestamp: 3000, + anchorTimestamp: 4000, + active: true, + summaryTokenEstimate: 10, + createdAt: Date.now(), + }, + ]); + + const result = applyPruning(messages, state, makeConfig()); + + console.log(" Result messages:"); + for (const m of result) { + const preview = Array.isArray(m.content) + ? m.content.map((b: any) => b.text ?? b.type ?? "?").join(" | ").slice(0, 60) + : String(m.content).slice(0, 60); + console.log(` role="${m.role}" ts=${m.timestamp} content="${preview}"`); + } + + const orphan = findOrphanedToolUse(result); + assert.strictEqual(orphan, null, `FAIL — orphaned tool_use detected: ${orphan}`); + console.log(" PASS: no orphaned tool_use in result"); + + const assistantPresent = result.some((m: any) => m.role === "assistant" && m.timestamp === 2000); + const toolResultPresent = result.some((m: any) => m.role === "toolResult" && m.toolCallId === "toolu_X"); + assert.ok(!assistantPresent, "FAIL — assistant should have been removed"); + assert.ok(!toolResultPresent, "FAIL — toolResult should have been removed"); + console.log(" PASS: assistant + toolResult removed atomically despite compaction in between"); + + console.log("TEST 5 PASSED\n"); +} + +// --------------------------------------------------------------------------- +// Test 6 — PASSTHROUGH ROLE BETWEEN TOOLRESULTS (FORWARD EXPANSION) +// +// An assistant has two tool calls. A `branch_summary` message sits between +// the two toolResults. The compression range covers the assistant. +// Forward expansion must skip the branch_summary to find both toolResults. +// +// Sequence: +// user(1000) → assistant(2000, toolCall_A + toolCall_B) +// → toolResult_A(3000) → branch_summary(3500) +// → toolResult_B(4000) → user(5000) +// Compression block: [2000..2000] +// Expected: assistant + both toolResults removed together (no orphans) +// --------------------------------------------------------------------------- +{ + console.log("TEST 6: passthrough role between toolResults (forward expansion)"); + + const messages: any[] = [ + { role: "user", content: [{ type: "text", text: "do things" }], timestamp: 1000 }, + { role: "assistant", content: [ + { type: "toolCall", id: "toolu_A", name: "read", arguments: {} }, + { type: "toolCall", id: "toolu_B", name: "write", arguments: {} }, + ], timestamp: 2000 }, + { role: "toolResult", toolCallId: "toolu_A", toolName: "read", isError: false, content: [{ type: "text", text: "A result" }], timestamp: 3000 }, + { role: "branch_summary", content: [{ type: "text", text: "branch summary" }], timestamp: 3500 }, + { role: "toolResult", toolCallId: "toolu_B", toolName: "write", isError: false, content: [{ type: "text", text: "B result" }], timestamp: 4000 }, + { role: "user", content: [{ type: "text", text: "thanks" }], timestamp: 5000 }, + ]; + + const state = makeState([ + { + id: 1, + topic: "two tools", + summary: "Both tools were called.", + startTimestamp: 2000, + endTimestamp: 2000, + anchorTimestamp: 5000, + active: true, + summaryTokenEstimate: 10, + createdAt: Date.now(), + }, + ]); + + const result = applyPruning(messages, state, makeConfig()); + + console.log(" Result messages:"); + for (const m of result) { + const preview = Array.isArray(m.content) + ? m.content.map((b: any) => b.text ?? b.type ?? "?").join(" | ").slice(0, 60) + : String(m.content).slice(0, 60); + console.log(` role="${m.role}" ts=${m.timestamp} content="${preview}"`); + } + + const orphan = findOrphanedToolUse(result); + assert.strictEqual(orphan, null, `FAIL — orphaned tool_use detected: ${orphan}`); + console.log(" PASS: no orphaned tool_use in result"); + + const assistantPresent = result.some((m: any) => m.role === "assistant" && m.timestamp === 2000); + const toolResultAPresent = result.some((m: any) => m.role === "toolResult" && m.toolCallId === "toolu_A"); + const toolResultBPresent = result.some((m: any) => m.role === "toolResult" && m.toolCallId === "toolu_B"); + assert.ok(!assistantPresent, "FAIL — assistant should have been removed"); + assert.ok(!toolResultAPresent, "FAIL — toolResult_A should have been removed"); + assert.ok(!toolResultBPresent, "FAIL — toolResult_B should have been removed"); + console.log(" PASS: assistant + both toolResults removed despite branch_summary in between"); + + console.log("TEST 6 PASSED\n"); +} + +// --------------------------------------------------------------------------- +// Test 7 — CONTENT MUTATION ISOLATION +// +// Verifies that applyPruning does not mutate the original message objects. +// After calling applyPruning, the original messages' content arrays should +// remain unchanged (no injected dcp-id blocks). +// --------------------------------------------------------------------------- +{ + console.log("TEST 7: content mutation isolation"); + + const messages = makeMessages(); + // Deep-snapshot the original content for comparison + const originalContents = messages.map((m: any) => + JSON.stringify(m.content) + ); + + const state = makeState(); // no compression blocks + const config = makeConfig(); + + // Run applyPruning — this should NOT mutate the originals + applyPruning(messages, state, config); + + let mutated = false; + for (let i = 0; i < messages.length; i++) { + const current = JSON.stringify(messages[i].content); + if (current !== originalContents[i]) { + console.log(` FAIL — message[${i}] content was mutated`); + console.log(` before: ${originalContents[i]}`); + console.log(` after: ${current}`); + mutated = true; + } + } + + assert.ok(!mutated, "FAIL — original message content was mutated by applyPruning"); + console.log(" PASS: original message content unchanged after applyPruning"); + + console.log("TEST 7 PASSED\n"); +} + +// --------------------------------------------------------------------------- +// Test 8 — ORPHANED TOOLRESULT REPAIR +// +// Two compression blocks where the second removes an assistant but forward +// expansion cannot reach its toolResult due to processing order. The repair +// function should clean up the orphan. +// +// Sequence: +// user(1000) → assistant_1(2000, toolCall_X) → toolResult_X(3000) → +// user(4000) → assistant_2(5000, toolCall_Y) → toolResult_Y(6000) → user(7000) +// +// Block 1: [1000..3000] — removes user, assistant_1, toolResult_X +// Block 2: [4000..5000] — removes user, assistant_2 (toolResult_Y is outside) +// Forward expansion from assistant_2 should catch toolResult_Y, but if it +// doesn't (edge case), repair must clean it up. +// --------------------------------------------------------------------------- +{ + console.log("TEST 8: orphaned toolResult repair (post-compression safety net)"); + + const messages: any[] = [ + { role: "user", content: [{ type: "text", text: "first" }], timestamp: 1000 }, + { role: "assistant", content: [{ type: "toolCall", id: "toolu_X", name: "read", arguments: {} }], timestamp: 2000 }, + { role: "toolResult", toolCallId: "toolu_X", toolName: "read", isError: false, content: [{ type: "text", text: "X data" }], timestamp: 3000 }, + { role: "user", content: [{ type: "text", text: "second" }], timestamp: 4000 }, + { role: "assistant", content: [{ type: "toolCall", id: "toolu_Y", name: "write", arguments: {} }], timestamp: 5000 }, + { role: "toolResult", toolCallId: "toolu_Y", toolName: "write", isError: false, content: [{ type: "text", text: "Y data" }], timestamp: 6000 }, + { role: "user", content: [{ type: "text", text: "done" }], timestamp: 7000 }, + ]; + + const state = makeState([ + { + id: 1, + topic: "block one", + summary: "First block compressed.", + startTimestamp: 1000, + endTimestamp: 3000, + anchorTimestamp: 4000, + active: true, + summaryTokenEstimate: 10, + createdAt: Date.now(), + }, + { + id: 2, + topic: "block two", + summary: "Second block compressed.", + startTimestamp: 4000, + endTimestamp: 5000, + anchorTimestamp: 7000, + active: true, + summaryTokenEstimate: 10, + createdAt: Date.now(), + }, + ]); + + const result = applyPruning(messages, state, makeConfig()); + + console.log(" Result messages:"); + for (const m of result) { + const preview = Array.isArray(m.content) + ? m.content.map((b: any) => b.text ?? b.type ?? "?").join(" | ").slice(0, 60) + : String(m.content).slice(0, 60); + console.log(` role="${m.role}" ts=${m.timestamp} content="${preview}"`); + } + + // No orphaned tool_use or tool_result should remain + const orphan = findOrphanedToolUse(result); + assert.strictEqual(orphan, null, `FAIL — orphaned tool_use detected: ${orphan}`); + + const orphanedResults = result.filter( + (m: any) => (m.role === "toolResult" || m.role === "bashExecution") && + !result.some((a: any) => + a.role === "assistant" && + Array.isArray(a.content) && + a.content.some((b: any) => b.type === "toolCall" && b.id === m.toolCallId) + ) + ); + assert.strictEqual(orphanedResults.length, 0, `FAIL — ${orphanedResults.length} orphaned toolResult(s) found`); + console.log(" PASS: no orphaned tool_use or toolResult in result"); + + console.log("TEST 8 PASSED\n"); +} + +// --------------------------------------------------------------------------- +// Test 9 — DIRECT ORPHAN REPAIR (pre-broken state) +// +// Directly construct a message array with an orphaned toolResult (no matching +// assistant toolCall exists). The repair function should remove it. +// --------------------------------------------------------------------------- +{ + console.log("TEST 9: direct orphan repair (pre-broken toolResult)"); + + const messages: any[] = [ + { role: "user", content: [{ type: "text", text: "hello" }], timestamp: 1000 }, + { role: "toolResult", toolCallId: "orphan_id", toolName: "read", isError: false, content: [{ type: "text", text: "orphan data" }], timestamp: 2000 }, + { role: "user", content: [{ type: "text", text: "bye" }], timestamp: 3000 }, + ]; + + const state = makeState(); // no compression blocks — repair runs as safety net + const config = makeConfig(); + + const result = applyPruning(messages, state, config); + + console.log(" Result messages:"); + for (const m of result) { + const preview = Array.isArray(m.content) + ? m.content.map((b: any) => b.text ?? b.type ?? "?").join(" | ").slice(0, 60) + : String(m.content).slice(0, 60); + console.log(` role="${m.role}" ts=${m.timestamp} content="${preview}"`); + } + + const orphanPresent = result.some((m: any) => m.role === "toolResult" && m.toolCallId === "orphan_id"); + assert.ok(!orphanPresent, "FAIL — orphaned toolResult should have been removed by repair"); + console.log(" PASS: orphaned toolResult removed by repair function"); + + console.log("TEST 9 PASSED\n"); +} + console.log("All tests passed."); diff --git a/pruner.ts b/pruner.ts index e7e09c2..cf69df2 100644 --- a/pruner.ts +++ b/pruner.ts @@ -70,7 +70,7 @@ function applyCompressionBlocks(messages: any[], state: DcpState): any[] { let scanIdx = lo - 1; while (scanIdx >= 0) { const r = (messages[scanIdx] as any).role as string; - if (r !== "toolResult" && r !== "bashExecution") break; + if (r !== "toolResult" && r !== "bashExecution" && !PASSTHROUGH_ROLES.has(r)) break; scanIdx--; } if (scanIdx < 0 || (messages[scanIdx] as any).role !== "assistant") break; @@ -120,6 +120,8 @@ function applyCompressionBlocks(messages: any[], state: DcpState): any[] { assistantToolCallIds.has(next.toolCallId) ) { hi++; + } else if (PASSTHROUGH_ROLES.has(next.role)) { + hi++; } else { break; } @@ -171,6 +173,64 @@ function applyCompressionBlocks(messages: any[], state: DcpState): any[] { return messages; } +/** + * Remove orphaned toolResult/bashExecution messages whose corresponding + * assistant toolCall was removed, and strip orphaned toolCall blocks from + * assistant messages whose toolResult was removed. + * + * This is a safety net that runs after all compression blocks are applied. + */ +function repairOrphanedToolPairs(messages: any[]): void { + // 1. Build set of all toolCall IDs present in assistant messages + const assistantToolCallIds = new Set(); + for (const msg of messages) { + if (msg.role !== "assistant") continue; + const content: any[] = Array.isArray(msg.content) ? msg.content : []; + for (const block of content) { + if (block.type === "toolCall" && typeof block.id === "string") { + assistantToolCallIds.add(block.id); + } + } + } + + // 2. Build set of all toolCallIds present in toolResult/bashExecution messages + const resultToolCallIds = new Set(); + for (const msg of messages) { + if (msg.role !== "toolResult" && msg.role !== "bashExecution") continue; + if (typeof msg.toolCallId === "string") { + resultToolCallIds.add(msg.toolCallId); + } + } + + // 3. Remove orphaned toolResult/bashExecution messages (no matching assistant toolCall) + for (let i = messages.length - 1; i >= 0; i--) { + const msg = messages[i]; + if (msg.role !== "toolResult" && msg.role !== "bashExecution") continue; + if (typeof msg.toolCallId === "string" && !assistantToolCallIds.has(msg.toolCallId)) { + messages.splice(i, 1); + } + } + + // 4. Strip orphaned toolCall blocks from assistant messages (no matching toolResult) + for (const msg of messages) { + if (msg.role !== "assistant") continue; + const content: any[] = Array.isArray(msg.content) ? msg.content : []; + const hasToolCalls = content.some((b: any) => b.type === "toolCall"); + if (!hasToolCalls) continue; + + const filtered = content.filter((block: any) => { + if (block.type !== "toolCall") return true; + return typeof block.id === "string" && resultToolCallIds.has(block.id); + }); + + // Only update if we actually removed something + if (filtered.length !== content.length) { + // If the assistant has no content left at all, keep at least an empty array + msg.content = filtered.length > 0 ? filtered : []; + } + } +} + /** * Apply deduplication: mark redundant tool outputs for pruning. * Mutates state.prunedToolIds. @@ -344,8 +404,17 @@ export function applyPruning( state: DcpState, config: DcpConfig ): any[] { - // Work on a shallow copy of the array (individual message objects may be mutated) - const msgs: any[] = [...messages]; + // Deep-clone each message and its content to prevent mutations from + // affecting the original objects across context events. + const msgs: any[] = messages.map((m: any) => { + const clone = { ...m }; + if (Array.isArray(clone.content)) { + clone.content = clone.content.map((block: any) => + typeof block === "object" && block !== null ? { ...block } : block + ); + } + return clone; + }); // 1. Count user turns → update state.currentTurn state.currentTurn = msgs.filter((m) => m.role === "user").length; @@ -353,6 +422,10 @@ export function applyPruning( // 2. Apply active compression blocks applyCompressionBlocks(msgs, state); + // 2b. Post-compression safety net: remove any orphaned tool pairs that the + // expansion logic could not catch (e.g. multi-block interactions, pre-broken state). + repairOrphanedToolPairs(msgs); + // 3. Apply deduplication applyDeduplication(msgs, state, config);