Bugfix for tool crash on compression

2026-06-27 15:16:10 +08:00 · 2026-03-30 17:30:53 -04:00
parent cc4ac20aa7
commit 24cf4e5b80
4 changed files with 307 additions and 9 deletions
@@ -0,0 +1,136 @@
 # AGENTS.md — pi-dynamic-context-pruning
 Reference for agentic coding agents operating in this repository.
 ---
 ## Project Overview
 A **pi coding agent extension** (TypeScript/ESM) that implements Dynamic Context Pruning (DCP).
 Pi loads extension `.ts` files directly — there is no build step and no compiled output.
 **Runtime:** Bun (used to run tests and the extension).  
 **Package type:** `"type": "module"` — all files are ES modules.
 ---
 ## Commands
 | Task | Command |
 |------|---------|
 | Run tests | `bun run pruner.test.ts` |
 | Build | _(none — pi loads `.ts` directly)_ |
 | Lint | _(no lint config present)_ |
 | Format | _(no formatter config present)_ |
 **Single test:** All tests live in `pruner.test.ts`. There is no test framework — tests use Node.js `assert` and plain `console.log`. To isolate one test scenario, comment out other `{}` blocks in that file.
 ---
 ## Module Structure
 | File | Purpose |
 |------|---------|
 | `index.ts` | Extension entry point; registers all hooks with the pi `ExtensionAPI` |
 | `config.ts` | JSONC config loading with 4-layer merge (defaults → global → env → project) |
 | `state.ts` | `DcpState` type + `createState` / `resetState` / `createInputFingerprint` |
 | `pruner.ts` | `applyPruning`, `injectNudge`, `getNudgeType`, `estimateTokens` |
 | `compress-tool.ts` | Registers the `compress` tool with the pi tool registry |
 | `commands.ts` | Registers `/dcp` slash commands |
 | `prompts.ts` | All system prompt strings and nudge text constants |
 | `pruner.test.ts` | Self-contained tests for `applyPruning` |
 ---
 ## Imports
 - **Always use `.js` extension** for local imports, even when the source file is `.ts`:
  ```ts
  import { loadConfig } from "./config.js"
  ```
 - **Use `import type`** for type-only imports:
  ```ts
  import type { DcpState } from "./state.js"
  import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"
  ```
 - Named imports preferred; default exports only for the extension entry point (`index.ts`).
 - Import order: Node built-ins → external packages → local modules.
 ---
 ## Code Style
 ### Naming
 | Kind | Convention | Examples |
 |------|-----------|---------|
 | Files | kebab-case or camelCase | `compress-tool.ts`, `pruner.ts` |
 | Interfaces / Types | PascalCase | `DcpState`, `CompressionBlock`, `ToolRecord` |
 | Functions | camelCase | `applyPruning`, `loadConfig`, `createState` |
 | Constants (module-level) | UPPER_SNAKE_CASE | `ALWAYS_PROTECTED_DEDUP`, `DEFAULT_CONFIG`, `SYSTEM_PROMPT` |
 | Variables / parameters | camelCase | `activeBlocks`, `toolCallId`, `contextPercent` |
 ### Section separators
 Use the long-dash pattern with a label for logical sections within a file:
 ```ts
 // ---------------------------------------------------------------------------
 // Section Name
 // ---------------------------------------------------------------------------
 ```
 Use `// ── Label ──────────...` for subsections within `index.ts` event handler blocks.
 ### JSDoc
 Add JSDoc comments to all exported functions and non-trivial interfaces. Keep them concise and factual.
 ### Type annotations
 - Explicit return types on all exported functions.
 - Use `unknown` instead of `any` when the shape is genuinely unknown, unless you are working with external API message shapes (message content arrays), where `any` is acceptable at the boundary.
 - Prefer `as const` for literal arrays (e.g., `["compress", "write", "edit"] as const`).
 ### TypeBox schemas
 Use `@sinclair/typebox` `Type.*` helpers for tool input schemas in `compress-tool.ts`.
 ---
 ## Error Handling
 Two established patterns — do not mix them:
 1. **Silent/best-effort** (config loading, file I/O):
   ```ts
   try {
     raw = fs.readFileSync(filePath, "utf8")
   } catch {
     return {}
   }
   ```
   Use when failure is non-fatal and a safe default can be returned.
 2. **Throw domain errors** (ID resolution, invalid tool args):
   ```ts
   throw new Error(`Unknown message ID: ${id}`)
   ```
   Use when the caller must handle the failure explicitly.
 - No silent swallowing of errors that indicate programming mistakes.
 - `console.error` / `console.warn` are not used; errors surface via throws or safe returns.
 ---
 ## Key Architectural Constraints
 - **Message timestamps are the stable identifier** for positioning compression blocks. Never use array indices as durable references; always use `timestamp`.
 - **`assistant` + `toolResult` pairs must be removed atomically.** If a compression range covers a `toolResult`, the preceding `assistant` message (with the matching `toolCall` block) must be included in the range — see backward-expansion logic in `pruner.ts`.
 - **State is mutated in-place** by `resetState` so all module references stay valid; do not replace `state` with a new object.
 - **Config is read-only after `loadConfig`.** Never mutate the returned config object.
 - **No external runtime dependencies** beyond `jsonc-parser`. Do not add new `dependencies`; prefer `peerDependencies` for pi-ecosystem packages.
 ---
 ## Dependencies
 | Package | Role |
 |---------|------|
 | `jsonc-parser` | Parse JSONC config files |
 | `@mariozechner/pi-coding-agent` | Peer — `ExtensionAPI`, event types |
 | `@mariozechner/pi-tui` | Peer — `AutocompleteItem`, UI types |
 | `@sinclair/typebox` | Peer — `Type.*` schema builders for tool registration |
@@ -1,6 +1,6 @@
 {
  "name": "@complexthings/pi-dynamic-context-pruning",
-  "version": "1.0.4",
+  "version": "1.0.5",
  "description": "PI coding agent extension — Dynamic Context Pruning (DCP)",
  "keywords": [
    "pi-package",
@@ -281,4 +281,146 @@ function findOrphanedToolUse(result: any[]): string | null {
  console.log("TEST 2 PASSED\n");
 }
 // ---------------------------------------------------------------------------
 // Test 3 — MULTI-TOOLRESULT BACKWARD GAP
 //
 // assistant has TWO tool_calls (A + B) producing two consecutive toolResult
 // messages.  The compression range starts at toolResult_B — meaning there is
 // a toolResult message (A) sitting between lo and the assistant.
 //
 // Bug: backward expansion stopped at toolResult_A (not an assistant) and
 // never found the assistant → assistant was kept without its toolResult_B.
 // Fix: backward scan skips past toolResult messages to reach the assistant.
 //
 // Sequence:
 //   user(1000) → assistant(2000, toolCall_A + toolCall_B)
 //              → toolResult_A(3000) → toolResult_B(4000) → user(5000)
 // Compression block: [4000..4000] (only toolResult_B)
 // Expected: assistant + toolResult_A + toolResult_B all removed together
 // ---------------------------------------------------------------------------
 {
  console.log("TEST 3: multi-toolResult backward gap (assistant has 2 tool_calls)");
  const messages: any[] = [
    { role: "user",        content: [{ type: "text", text: "do two things" }], timestamp: 1000 },
    { role: "assistant",   content: [
        { type: "toolCall", id: "toolu_A", name: "read",  arguments: {} },
        { type: "toolCall", id: "toolu_B", name: "write", arguments: {} },
      ], timestamp: 2000 },
    { role: "toolResult",  toolCallId: "toolu_A", toolName: "read",  isError: false, content: [{ type: "text", text: "A result" }], timestamp: 3000 },
    { role: "toolResult",  toolCallId: "toolu_B", toolName: "write", isError: false, content: [{ type: "text", text: "B result" }], timestamp: 4000 },
    { role: "user",        content: [{ type: "text", text: "thanks" }], timestamp: 5000 },
  ];
  const state = makeState([
    {
      id: 1,
      topic: "two-tool work",
      summary: "Both tools were called successfully.",
      startTimestamp: 4000,  // only toolResult_B
      endTimestamp:   4000,
      anchorTimestamp: 5000,
      active: true,
      summaryTokenEstimate: 10,
      createdAt: Date.now(),
    },
  ]);
  const result = applyPruning(messages, state, makeConfig());
  console.log("  Result messages:");
  for (const m of result) {
    const preview = Array.isArray(m.content)
      ? m.content.map((b: any) => b.text ?? b.type ?? "?").join(" | ").slice(0, 60)
      : String(m.content).slice(0, 60);
    console.log(`    role="${m.role}"  ts=${m.timestamp}  content="${preview}"`);
  }
  // Neither the orphaned assistant nor its toolResults should survive unpaired
  const assistantPresent = result.some((m: any) => m.role === "assistant" && m.timestamp === 2000);
  const toolResultAPresent = result.some((m: any) => m.role === "toolResult" && m.toolCallId === "toolu_A");
  const toolResultBPresent = result.some((m: any) => m.role === "toolResult" && m.toolCallId === "toolu_B");
  // All three must be absent (removed atomically) or all three present as a valid group
  if (assistantPresent) {
    assert.ok(toolResultAPresent, "FAIL — assistant present but toolResult_A missing");
    assert.ok(toolResultBPresent, "FAIL — assistant present but toolResult_B missing");
    // Verify ordering: assistant → toolResult_A → toolResult_B
    const aIdx = result.findIndex((m: any) => m.role === "assistant" && m.timestamp === 2000);
    const rAIdx = result.findIndex((m: any) => m.role === "toolResult" && m.toolCallId === "toolu_A");
    const rBIdx = result.findIndex((m: any) => m.role === "toolResult" && m.toolCallId === "toolu_B");
    assert.ok(aIdx < rAIdx && rAIdx < rBIdx, "FAIL — assistant + toolResult ordering wrong");
    console.log("  PASS: assistant + both toolResults kept as a coherent group");
  } else {
    assert.ok(!toolResultAPresent, "FAIL — assistant removed but orphaned toolResult_A still present");
    assert.ok(!toolResultBPresent, "FAIL — assistant removed but orphaned toolResult_B still present");
    console.log("  PASS: assistant + both toolResults removed atomically");
  }
  console.log("TEST 3 PASSED\n");
 }
 // ---------------------------------------------------------------------------
 // Test 4 — BASHEXECUTION FORWARD GAP
 //
 // An assistant calls a tool whose result is stored as role="bashExecution".
 // The compression range covers the assistant but NOT the bashExecution result.
 //
 // Bug (before fix): forward expansion only checked role==="toolResult", so
 // bashExecution was left behind as an orphan.
 // Fix: forward expansion now also advances hi over bashExecution messages.
 //
 // Sequence:
 //   user(1000) → assistant(2000, toolCall_bash) → bashExecution(3000) → user(4000)
 // Compression block: [2000..2000] (only the assistant)
 // Expected: assistant + bashExecution removed together
 // ---------------------------------------------------------------------------
 {
  console.log("TEST 4: bashExecution forward gap");
  const messages: any[] = [
    { role: "user",          content: [{ type: "text", text: "run bash" }], timestamp: 1000 },
    { role: "assistant",     content: [{ type: "toolCall", id: "toolu_bash1", name: "bash", arguments: {} }], timestamp: 2000 },
    { role: "bashExecution", toolCallId: "toolu_bash1", toolName: "bash", isError: false, content: [{ type: "text", text: "exit 0" }], timestamp: 3000 },
    { role: "user",          content: [{ type: "text", text: "done" }], timestamp: 4000 },
  ];
  const state = makeState([
    {
      id: 1,
      topic: "bash run",
      summary: "Ran bash command successfully.",
      startTimestamp: 2000,
      endTimestamp:   2000,
      anchorTimestamp: 4000,
      active: true,
      summaryTokenEstimate: 8,
      createdAt: Date.now(),
    },
  ]);
  const result = applyPruning(messages, state, makeConfig());
  console.log("  Result messages:");
  for (const m of result) {
    const preview = Array.isArray(m.content)
      ? m.content.map((b: any) => b.text ?? b.type ?? "?").join(" | ").slice(0, 60)
      : String(m.content).slice(0, 60);
    console.log(`    role="${m.role}"  ts=${m.timestamp}  content="${preview}"`);
  }
  const assistantPresent   = result.some((m: any) => m.role === "assistant"     && m.timestamp === 2000);
  const bashPresent        = result.some((m: any) => m.role === "bashExecution" && m.toolCallId === "toolu_bash1");
  if (assistantPresent) {
    assert.ok(bashPresent, "FAIL — assistant present but bashExecution result missing");
    console.log("  PASS: assistant + bashExecution kept as a coherent group");
  } else {
    assert.ok(!bashPresent, "FAIL — assistant removed but orphaned bashExecution still present");
    console.log("  PASS: assistant + bashExecution removed atomically");
  }
  console.log("TEST 4 PASSED\n");
 }
 console.log("All tests passed.");
@@ -57,16 +57,32 @@ function applyCompressionBlocks(messages: any[], state: DcpState): any[] {
    let lo = Math.min(startIdx, endIdx);
    let hi = Math.max(startIdx, endIdx);
-    // Expand lo backward: if the message immediately before lo is an assistant
+    // Expand lo backward: if there is an assistant before lo whose tool_use
-    // whose tool_use blocks have matching tool_results inside [lo..hi], pull
+    // blocks have matching tool_results inside [lo..hi], pull the entire
-    // it into the range so the pair is always removed together.
+    // assistant + any intermediate result messages into the range so the
    // group is always removed atomically.
    //
    // Critically we must skip backward past any toolResult / bashExecution
    // messages before lo, because an assistant with multiple tool_calls emits
    // N consecutive result messages — the assistant itself sits further back.
    while (lo > 0) {
-      const prev = messages[lo - 1] as any;
+      // Walk backward past tool-result messages to find the preceding assistant
-      if (prev.role !== "assistant") break;
+      let scanIdx = lo - 1;
      while (scanIdx >= 0) {
        const r = (messages[scanIdx] as any).role as string;
        if (r !== "toolResult" && r !== "bashExecution") break;
        scanIdx--;
      }
      if (scanIdx < 0 || (messages[scanIdx] as any).role !== "assistant") break;
      const prev = messages[scanIdx] as any;
      const toolCallIdsInRange = new Set<string>();
      for (let i = lo; i <= hi; i++) {
        const m = messages[i] as any;
-        if (m.role === "toolResult" && typeof m.toolCallId === "string") {
+        if (
          (m.role === "toolResult" || m.role === "bashExecution") &&
          typeof m.toolCallId === "string"
        ) {
          toolCallIdsInRange.add(m.toolCallId);
        }
      }
@@ -75,7 +91,8 @@ function applyCompressionBlocks(messages: any[], state: DcpState): any[] {
        (block: any) => block.type === "toolCall" && toolCallIdsInRange.has(block.id)
      );
      if (!hasMatchingToolCalls) break;
-      lo--;
+      // Pull assistant + all intermediate result messages into the range
      lo = scanIdx;
    }
    // Expand hi forward: for every assistant message in [lo..hi] that has
@@ -98,7 +115,10 @@ function applyCompressionBlocks(messages: any[], state: DcpState): any[] {
      }
      while (hi + 1 < messages.length) {
        const next = messages[hi + 1] as any;
-        if (next.role === "toolResult" && assistantToolCallIds.has(next.toolCallId)) {
+        if (
          (next.role === "toolResult" || next.role === "bashExecution") &&
          assistantToolCallIds.has(next.toolCallId)
        ) {
          hi++;
        } else {
          break;