From 24cf4e5b80c62a333fda0102eb31666f72664eca Mon Sep 17 00:00:00 2001 From: Greg Harvell Date: Mon, 30 Mar 2026 17:30:53 -0400 Subject: [PATCH] Bugfix for tool crash on compression --- AGENTS.md | 136 ++++++++++++++++++++++++++++++++++++++++++++++ package.json | 2 +- pruner.test.ts | 142 +++++++++++++++++++++++++++++++++++++++++++++++++ pruner.ts | 36 ++++++++++--- 4 files changed, 307 insertions(+), 9 deletions(-) create mode 100644 AGENTS.md diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..7348d8d --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,136 @@ +# AGENTS.md — pi-dynamic-context-pruning + +Reference for agentic coding agents operating in this repository. + +--- + +## Project Overview + +A **pi coding agent extension** (TypeScript/ESM) that implements Dynamic Context Pruning (DCP). +Pi loads extension `.ts` files directly — there is no build step and no compiled output. + +**Runtime:** Bun (used to run tests and the extension). +**Package type:** `"type": "module"` — all files are ES modules. + +--- + +## Commands + +| Task | Command | +|------|---------| +| Run tests | `bun run pruner.test.ts` | +| Build | _(none — pi loads `.ts` directly)_ | +| Lint | _(no lint config present)_ | +| Format | _(no formatter config present)_ | + +**Single test:** All tests live in `pruner.test.ts`. There is no test framework — tests use Node.js `assert` and plain `console.log`. To isolate one test scenario, comment out other `{}` blocks in that file. + +--- + +## Module Structure + +| File | Purpose | +|------|---------| +| `index.ts` | Extension entry point; registers all hooks with the pi `ExtensionAPI` | +| `config.ts` | JSONC config loading with 4-layer merge (defaults → global → env → project) | +| `state.ts` | `DcpState` type + `createState` / `resetState` / `createInputFingerprint` | +| `pruner.ts` | `applyPruning`, `injectNudge`, `getNudgeType`, `estimateTokens` | +| `compress-tool.ts` | Registers the `compress` tool with the pi tool registry | +| `commands.ts` | Registers `/dcp` slash commands | +| `prompts.ts` | All system prompt strings and nudge text constants | +| `pruner.test.ts` | Self-contained tests for `applyPruning` | + +--- + +## Imports + +- **Always use `.js` extension** for local imports, even when the source file is `.ts`: + ```ts + import { loadConfig } from "./config.js" + ``` +- **Use `import type`** for type-only imports: + ```ts + import type { DcpState } from "./state.js" + import type { ExtensionAPI } from "@mariozechner/pi-coding-agent" + ``` +- Named imports preferred; default exports only for the extension entry point (`index.ts`). +- Import order: Node built-ins → external packages → local modules. + +--- + +## Code Style + +### Naming +| Kind | Convention | Examples | +|------|-----------|---------| +| Files | kebab-case or camelCase | `compress-tool.ts`, `pruner.ts` | +| Interfaces / Types | PascalCase | `DcpState`, `CompressionBlock`, `ToolRecord` | +| Functions | camelCase | `applyPruning`, `loadConfig`, `createState` | +| Constants (module-level) | UPPER_SNAKE_CASE | `ALWAYS_PROTECTED_DEDUP`, `DEFAULT_CONFIG`, `SYSTEM_PROMPT` | +| Variables / parameters | camelCase | `activeBlocks`, `toolCallId`, `contextPercent` | + +### Section separators +Use the long-dash pattern with a label for logical sections within a file: +```ts +// --------------------------------------------------------------------------- +// Section Name +// --------------------------------------------------------------------------- +``` +Use `// ── Label ──────────...` for subsections within `index.ts` event handler blocks. + +### JSDoc +Add JSDoc comments to all exported functions and non-trivial interfaces. Keep them concise and factual. + +### Type annotations +- Explicit return types on all exported functions. +- Use `unknown` instead of `any` when the shape is genuinely unknown, unless you are working with external API message shapes (message content arrays), where `any` is acceptable at the boundary. +- Prefer `as const` for literal arrays (e.g., `["compress", "write", "edit"] as const`). + +### TypeBox schemas +Use `@sinclair/typebox` `Type.*` helpers for tool input schemas in `compress-tool.ts`. + +--- + +## Error Handling + +Two established patterns — do not mix them: + +1. **Silent/best-effort** (config loading, file I/O): + ```ts + try { + raw = fs.readFileSync(filePath, "utf8") + } catch { + return {} + } + ``` + Use when failure is non-fatal and a safe default can be returned. + +2. **Throw domain errors** (ID resolution, invalid tool args): + ```ts + throw new Error(`Unknown message ID: ${id}`) + ``` + Use when the caller must handle the failure explicitly. + +- No silent swallowing of errors that indicate programming mistakes. +- `console.error` / `console.warn` are not used; errors surface via throws or safe returns. + +--- + +## Key Architectural Constraints + +- **Message timestamps are the stable identifier** for positioning compression blocks. Never use array indices as durable references; always use `timestamp`. +- **`assistant` + `toolResult` pairs must be removed atomically.** If a compression range covers a `toolResult`, the preceding `assistant` message (with the matching `toolCall` block) must be included in the range — see backward-expansion logic in `pruner.ts`. +- **State is mutated in-place** by `resetState` so all module references stay valid; do not replace `state` with a new object. +- **Config is read-only after `loadConfig`.** Never mutate the returned config object. +- **No external runtime dependencies** beyond `jsonc-parser`. Do not add new `dependencies`; prefer `peerDependencies` for pi-ecosystem packages. + +--- + +## Dependencies + +| Package | Role | +|---------|------| +| `jsonc-parser` | Parse JSONC config files | +| `@mariozechner/pi-coding-agent` | Peer — `ExtensionAPI`, event types | +| `@mariozechner/pi-tui` | Peer — `AutocompleteItem`, UI types | +| `@sinclair/typebox` | Peer — `Type.*` schema builders for tool registration | diff --git a/package.json b/package.json index bbab01b..7fc6f89 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@complexthings/pi-dynamic-context-pruning", - "version": "1.0.4", + "version": "1.0.5", "description": "PI coding agent extension — Dynamic Context Pruning (DCP)", "keywords": [ "pi-package", diff --git a/pruner.test.ts b/pruner.test.ts index c67c1b3..2f9f601 100644 --- a/pruner.test.ts +++ b/pruner.test.ts @@ -281,4 +281,146 @@ function findOrphanedToolUse(result: any[]): string | null { console.log("TEST 2 PASSED\n"); } +// --------------------------------------------------------------------------- +// Test 3 — MULTI-TOOLRESULT BACKWARD GAP +// +// assistant has TWO tool_calls (A + B) producing two consecutive toolResult +// messages. The compression range starts at toolResult_B — meaning there is +// a toolResult message (A) sitting between lo and the assistant. +// +// Bug: backward expansion stopped at toolResult_A (not an assistant) and +// never found the assistant → assistant was kept without its toolResult_B. +// Fix: backward scan skips past toolResult messages to reach the assistant. +// +// Sequence: +// user(1000) → assistant(2000, toolCall_A + toolCall_B) +// → toolResult_A(3000) → toolResult_B(4000) → user(5000) +// Compression block: [4000..4000] (only toolResult_B) +// Expected: assistant + toolResult_A + toolResult_B all removed together +// --------------------------------------------------------------------------- +{ + console.log("TEST 3: multi-toolResult backward gap (assistant has 2 tool_calls)"); + + const messages: any[] = [ + { role: "user", content: [{ type: "text", text: "do two things" }], timestamp: 1000 }, + { role: "assistant", content: [ + { type: "toolCall", id: "toolu_A", name: "read", arguments: {} }, + { type: "toolCall", id: "toolu_B", name: "write", arguments: {} }, + ], timestamp: 2000 }, + { role: "toolResult", toolCallId: "toolu_A", toolName: "read", isError: false, content: [{ type: "text", text: "A result" }], timestamp: 3000 }, + { role: "toolResult", toolCallId: "toolu_B", toolName: "write", isError: false, content: [{ type: "text", text: "B result" }], timestamp: 4000 }, + { role: "user", content: [{ type: "text", text: "thanks" }], timestamp: 5000 }, + ]; + + const state = makeState([ + { + id: 1, + topic: "two-tool work", + summary: "Both tools were called successfully.", + startTimestamp: 4000, // only toolResult_B + endTimestamp: 4000, + anchorTimestamp: 5000, + active: true, + summaryTokenEstimate: 10, + createdAt: Date.now(), + }, + ]); + + const result = applyPruning(messages, state, makeConfig()); + + console.log(" Result messages:"); + for (const m of result) { + const preview = Array.isArray(m.content) + ? m.content.map((b: any) => b.text ?? b.type ?? "?").join(" | ").slice(0, 60) + : String(m.content).slice(0, 60); + console.log(` role="${m.role}" ts=${m.timestamp} content="${preview}"`); + } + + // Neither the orphaned assistant nor its toolResults should survive unpaired + const assistantPresent = result.some((m: any) => m.role === "assistant" && m.timestamp === 2000); + const toolResultAPresent = result.some((m: any) => m.role === "toolResult" && m.toolCallId === "toolu_A"); + const toolResultBPresent = result.some((m: any) => m.role === "toolResult" && m.toolCallId === "toolu_B"); + + // All three must be absent (removed atomically) or all three present as a valid group + if (assistantPresent) { + assert.ok(toolResultAPresent, "FAIL — assistant present but toolResult_A missing"); + assert.ok(toolResultBPresent, "FAIL — assistant present but toolResult_B missing"); + // Verify ordering: assistant → toolResult_A → toolResult_B + const aIdx = result.findIndex((m: any) => m.role === "assistant" && m.timestamp === 2000); + const rAIdx = result.findIndex((m: any) => m.role === "toolResult" && m.toolCallId === "toolu_A"); + const rBIdx = result.findIndex((m: any) => m.role === "toolResult" && m.toolCallId === "toolu_B"); + assert.ok(aIdx < rAIdx && rAIdx < rBIdx, "FAIL — assistant + toolResult ordering wrong"); + console.log(" PASS: assistant + both toolResults kept as a coherent group"); + } else { + assert.ok(!toolResultAPresent, "FAIL — assistant removed but orphaned toolResult_A still present"); + assert.ok(!toolResultBPresent, "FAIL — assistant removed but orphaned toolResult_B still present"); + console.log(" PASS: assistant + both toolResults removed atomically"); + } + + console.log("TEST 3 PASSED\n"); +} + +// --------------------------------------------------------------------------- +// Test 4 — BASHEXECUTION FORWARD GAP +// +// An assistant calls a tool whose result is stored as role="bashExecution". +// The compression range covers the assistant but NOT the bashExecution result. +// +// Bug (before fix): forward expansion only checked role==="toolResult", so +// bashExecution was left behind as an orphan. +// Fix: forward expansion now also advances hi over bashExecution messages. +// +// Sequence: +// user(1000) → assistant(2000, toolCall_bash) → bashExecution(3000) → user(4000) +// Compression block: [2000..2000] (only the assistant) +// Expected: assistant + bashExecution removed together +// --------------------------------------------------------------------------- +{ + console.log("TEST 4: bashExecution forward gap"); + + const messages: any[] = [ + { role: "user", content: [{ type: "text", text: "run bash" }], timestamp: 1000 }, + { role: "assistant", content: [{ type: "toolCall", id: "toolu_bash1", name: "bash", arguments: {} }], timestamp: 2000 }, + { role: "bashExecution", toolCallId: "toolu_bash1", toolName: "bash", isError: false, content: [{ type: "text", text: "exit 0" }], timestamp: 3000 }, + { role: "user", content: [{ type: "text", text: "done" }], timestamp: 4000 }, + ]; + + const state = makeState([ + { + id: 1, + topic: "bash run", + summary: "Ran bash command successfully.", + startTimestamp: 2000, + endTimestamp: 2000, + anchorTimestamp: 4000, + active: true, + summaryTokenEstimate: 8, + createdAt: Date.now(), + }, + ]); + + const result = applyPruning(messages, state, makeConfig()); + + console.log(" Result messages:"); + for (const m of result) { + const preview = Array.isArray(m.content) + ? m.content.map((b: any) => b.text ?? b.type ?? "?").join(" | ").slice(0, 60) + : String(m.content).slice(0, 60); + console.log(` role="${m.role}" ts=${m.timestamp} content="${preview}"`); + } + + const assistantPresent = result.some((m: any) => m.role === "assistant" && m.timestamp === 2000); + const bashPresent = result.some((m: any) => m.role === "bashExecution" && m.toolCallId === "toolu_bash1"); + + if (assistantPresent) { + assert.ok(bashPresent, "FAIL — assistant present but bashExecution result missing"); + console.log(" PASS: assistant + bashExecution kept as a coherent group"); + } else { + assert.ok(!bashPresent, "FAIL — assistant removed but orphaned bashExecution still present"); + console.log(" PASS: assistant + bashExecution removed atomically"); + } + + console.log("TEST 4 PASSED\n"); +} + console.log("All tests passed."); diff --git a/pruner.ts b/pruner.ts index 3ef4ac5..e7e09c2 100644 --- a/pruner.ts +++ b/pruner.ts @@ -57,16 +57,32 @@ function applyCompressionBlocks(messages: any[], state: DcpState): any[] { let lo = Math.min(startIdx, endIdx); let hi = Math.max(startIdx, endIdx); - // Expand lo backward: if the message immediately before lo is an assistant - // whose tool_use blocks have matching tool_results inside [lo..hi], pull - // it into the range so the pair is always removed together. + // Expand lo backward: if there is an assistant before lo whose tool_use + // blocks have matching tool_results inside [lo..hi], pull the entire + // assistant + any intermediate result messages into the range so the + // group is always removed atomically. + // + // Critically we must skip backward past any toolResult / bashExecution + // messages before lo, because an assistant with multiple tool_calls emits + // N consecutive result messages — the assistant itself sits further back. while (lo > 0) { - const prev = messages[lo - 1] as any; - if (prev.role !== "assistant") break; + // Walk backward past tool-result messages to find the preceding assistant + let scanIdx = lo - 1; + while (scanIdx >= 0) { + const r = (messages[scanIdx] as any).role as string; + if (r !== "toolResult" && r !== "bashExecution") break; + scanIdx--; + } + if (scanIdx < 0 || (messages[scanIdx] as any).role !== "assistant") break; + + const prev = messages[scanIdx] as any; const toolCallIdsInRange = new Set(); for (let i = lo; i <= hi; i++) { const m = messages[i] as any; - if (m.role === "toolResult" && typeof m.toolCallId === "string") { + if ( + (m.role === "toolResult" || m.role === "bashExecution") && + typeof m.toolCallId === "string" + ) { toolCallIdsInRange.add(m.toolCallId); } } @@ -75,7 +91,8 @@ function applyCompressionBlocks(messages: any[], state: DcpState): any[] { (block: any) => block.type === "toolCall" && toolCallIdsInRange.has(block.id) ); if (!hasMatchingToolCalls) break; - lo--; + // Pull assistant + all intermediate result messages into the range + lo = scanIdx; } // Expand hi forward: for every assistant message in [lo..hi] that has @@ -98,7 +115,10 @@ function applyCompressionBlocks(messages: any[], state: DcpState): any[] { } while (hi + 1 < messages.length) { const next = messages[hi + 1] as any; - if (next.role === "toolResult" && assistantToolCallIds.has(next.toolCallId)) { + if ( + (next.role === "toolResult" || next.role === "bashExecution") && + assistantToolCallIds.has(next.toolCallId) + ) { hi++; } else { break;