From 60b4249501ed87019ed427dd3d1b77595bace4e1 Mon Sep 17 00:00:00 2001 From: Greg Harvell Date: Fri, 27 Mar 2026 17:47:33 -0400 Subject: [PATCH] Initial Commit --- .gitignore | 1 + README.md | 146 +++++++++++++++++++ commands.ts | 352 ++++++++++++++++++++++++++++++++++++++++++++++ compress-tool.ts | 208 +++++++++++++++++++++++++++ config.ts | 251 +++++++++++++++++++++++++++++++++ index.ts | 228 ++++++++++++++++++++++++++++++ package-lock.json | 21 +++ package.json | 23 +++ prompts.ts | 228 ++++++++++++++++++++++++++++++ pruner.ts | 321 ++++++++++++++++++++++++++++++++++++++++++ state.ts | 203 ++++++++++++++++++++++++++ 11 files changed, 1982 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 commands.ts create mode 100644 compress-tool.ts create mode 100644 config.ts create mode 100644 index.ts create mode 100644 package-lock.json create mode 100644 package.json create mode 100644 prompts.ts create mode 100644 pruner.ts create mode 100644 state.ts diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..dbf0821 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +node_modules/* \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..4151e4c --- /dev/null +++ b/README.md @@ -0,0 +1,146 @@ +# Dynamic Context Pruning (DCP) for Pi + +Automatically reduces token usage in Pi coding agent sessions by managing conversation context through compression, deduplication, and smart nudges. + +## Features + +- **Compress tool** — LLM-callable tool that replaces stale conversation ranges with exhaustive technical summaries, preserving full context fidelity at a fraction of the token cost +- **Deduplication** — automatically removes duplicate tool call outputs (same tool, same args) keeping only the most recent result +- **Error purging** — cleans up failed tool inputs after a configurable number of user turns +- **Context nudges** — injects compression reminders into the context at configurable thresholds: soft housekeeping notices, strong emergency warnings, and iteration reminders after long tool-call chains +- **Manual mode** — disable autonomous compression nudges; trigger compression only via `/dcp compress` or explicit user request +- **Session persistence** — compression blocks and pruning state survive session restarts +- **`/dcp` commands** — inspect context usage, view stats, sweep tool outputs, and manage compression blocks interactively + +## Installation + +### Global (applies to all pi sessions) + +```bash +pi install npm:@complexthings/pi-dynamic-context-pruning +``` + +### Install globally from GitHub + +```bash +pi install https://github.com/complexthings/pi-dynamic-context-pruning +``` + +### Try it without installing + +```bash +pi -e https://github.com/complexthings/pi-dynamic-context-pruning +``` + +## Configuration + +DCP uses a layered configuration system (later layers override earlier ones): + +1. Built-in defaults +2. `~/.config/pi/dcp.jsonc` — global user config (auto-created with defaults on first run) +3. `$PI_CONFIG_DIR/dcp.jsonc` — if the env var is set +4. `/.pi/dcp.jsonc` — project-local overrides (walk up from cwd) + +### Example: `~/.config/pi/dcp.jsonc` + +```jsonc +{ + // Disable the extension entirely + // "enabled": false, + + // Start every session in manual mode + // "manualMode": { "enabled": true, "automaticStrategies": true }, + + "compress": { + // Above 80 % context: fire a nudge (every nudgeFrequency context events) + "maxContextPercent": 0.8, + // Below 40 % context: no nudges + "minContextPercent": 0.4, + // How many context events between nudges + "nudgeFrequency": 5, + // Nudge after this many tool calls since the last user message + "iterationNudgeThreshold": 15, + // "strong" = emergency tone, "soft" = housekeeping tone + "nudgeForce": "soft", + // These tool outputs are never auto-pruned + "protectedTools": ["compress", "write", "edit"] + }, + "strategies": { + "deduplication": { + "enabled": true, + // Additional tools to exclude from dedup + "protectedTools": [] + }, + "purgeErrors": { + "enabled": true, + // Purge failed tool inputs after N user turns + "turns": 4, + "protectedTools": [] + } + }, + // Glob patterns — matching file paths are never pruned + "protectedFilePatterns": [], + // "off" | "minimal" | "detailed" + "pruneNotification": "detailed" +} +``` + +## Commands + +All commands are available in the pi TUI via `/dcp `: + +| Command | Description | +|---|---| +| `/dcp` or `/dcp help` | Show command reference | +| `/dcp context` | Show context window usage and session stats | +| `/dcp stats` | Show pruning statistics (tokens saved, blocks, operations) | +| `/dcp sweep [N]` | Mark last N tool outputs for pruning (default: all since last user message) | +| `/dcp manual` | Show current manual mode status | +| `/dcp manual on` | Enable manual mode — autonomous nudges disabled | +| `/dcp manual off` | Disable manual mode — autonomous nudges re-enabled | +| `/dcp compress` | Trigger LLM compression immediately (sends a followUp message) | +| `/dcp decompress` | List all active compression blocks | +| `/dcp decompress N` | Restore compression block `bN` (re-expands it in context) | + +## How It Works + +### Compression blocks + +When the LLM calls the `compress` tool it provides one or more `{startId, endId, summary}` ranges. DCP: + +1. Records the range as a `CompressionBlock` with start/end timestamps +2. On every `context` event, splices out the raw messages in that range +3. Injects a synthetic `[Compressed section: …]` user message containing the summary +4. Keeps the block state in the session so it survives restarts + +Message IDs (`m001`, `m042`, etc.) and block IDs (`b1`, `b3`) are injected into every message in the context so the LLM can reference exact boundaries. + +### Nudge types + +| Nudge | Condition | +|---|---| +| **context-strong** | Above `maxContextPercent`, nudge counter ≥ `nudgeFrequency`, `nudgeForce = "strong"` | +| **context-soft** | Same as above with `nudgeForce = "soft"` | +| **iteration** | Between min/max percent AND ≥ `iterationNudgeThreshold` tool calls since last user message | +| **turn** | Between min/max percent, standard cadence | + +### Deduplication + +Two tool results share the same fingerprint (`toolName::JSON(sorted-args)`) if they were called with identical arguments. All but the last occurrence are replaced with a tombstone message. + +### Error purging + +Tool results that were errors are replaced with a tombstone after `purgeErrors.turns` user turns have passed, keeping the context clean of long-dead failure traces. + +## Status indicator + +A `DCP` badge is shown in the pi status bar. In manual mode it displays `DCP [manual]`. + +## Development + +```bash +npm install +npx tsc --noEmit # type-check without emitting +``` + +The extension is loaded by pi via [jiti](https://github.com/unjs/jiti) so TypeScript is executed directly — no build step required for normal use. diff --git a/commands.ts b/commands.ts new file mode 100644 index 0000000..7034599 --- /dev/null +++ b/commands.ts @@ -0,0 +1,352 @@ +import type { ExtensionAPI, ExtensionCommandContext } from "@mariozechner/pi-coding-agent" +import type { DcpState } from "./state.js" +import type { DcpConfig } from "./config.js" + +// --------------------------------------------------------------------------- +// Constants +// --------------------------------------------------------------------------- + +/** Tools whose outputs are always protected from sweep regardless of config. */ +const ALWAYS_PROTECTED_TOOLS = ["compress", "write", "edit"] as const + +// --------------------------------------------------------------------------- +// Formatting helpers +// --------------------------------------------------------------------------- + +function fmt(n: number): string { + return n.toLocaleString() +} + +// --------------------------------------------------------------------------- +// Help +// --------------------------------------------------------------------------- + +const HELP_TEXT = `DCP — Dynamic Context Pruning + +Commands: + /dcp context — Show context window usage breakdown + /dcp stats — Show pruning statistics for this session + /dcp sweep [N] — Prune last N tool outputs (default: all since last user msg) + /dcp manual — Show manual mode status + /dcp manual on — Enable manual mode (disable autonomous compression) + /dcp manual off — Disable manual mode (enable autonomous compression) + /dcp decompress — List active compression blocks + /dcp decompress N — Restore compression block N + /dcp compress — Trigger compression (sends compress tool invocation to LLM)` + +function handleHelp(ctx: ExtensionCommandContext): void { + ctx.ui.notify(HELP_TEXT, "info") +} + +// --------------------------------------------------------------------------- +// Context usage +// --------------------------------------------------------------------------- + +function handleContext(ctx: ExtensionCommandContext, state: DcpState): void { + const usage = ctx.getContextUsage() + + const lines: string[] = [] + + if (usage) { + if (usage.tokens !== null) { + const pct = ((usage.tokens / usage.contextWindow) * 100).toFixed(1) + lines.push( + `Context Usage: ${pct}% (${fmt(usage.tokens)} / ${fmt(usage.contextWindow)} tokens)`, + ) + } else { + lines.push(`Context Usage: unknown / ${fmt(usage.contextWindow)} tokens`) + } + } else { + lines.push("Context Usage: unavailable") + } + + lines.push("") + lines.push("Session Stats:") + lines.push(` Tool calls tracked: ${fmt(state.toolCalls.size)}`) + lines.push(` Pruned tools: ${fmt(state.prunedToolIds.size)}`) + lines.push(` Compression blocks: ${state.compressionBlocks.filter((b) => b.active).length}`) + lines.push(` Tokens saved (estimated): ${fmt(state.tokensSaved)}`) + + ctx.ui.notify(lines.join("\n"), "info") +} + +// --------------------------------------------------------------------------- +// Stats +// --------------------------------------------------------------------------- + +function handleStats(ctx: ExtensionCommandContext, state: DcpState): void { + const activeBlocks = state.compressionBlocks.filter((b) => b.active).length + const totalBlocks = state.compressionBlocks.length + + const lines: string[] = [] + lines.push("DCP Session Statistics:") + lines.push(` Tokens saved (estimated): ${fmt(state.tokensSaved)}`) + lines.push(` Total pruning operations: ${fmt(state.totalPruneCount)}`) + lines.push(` Compression blocks active: ${activeBlocks} / ${totalBlocks} total`) + lines.push(` Manual mode: ${state.manualMode ? "on" : "off"}`) + + ctx.ui.notify(lines.join("\n"), "info") +} + +// --------------------------------------------------------------------------- +// Sweep +// --------------------------------------------------------------------------- + +async function handleSweep( + ctx: ExtensionCommandContext, + state: DcpState, + config: DcpConfig, + n: number, +): Promise { + await ctx.waitForIdle() + + const branch = ctx.sessionManager.getBranch() + + // Build the full set of protected tool names. + const protectedTools = new Set([ + ...ALWAYS_PROTECTED_TOOLS, + ...config.strategies.deduplication.protectedTools, + ]) + + // Walk the branch (root → leaf) collecting toolCallIds in encounter order, + // and tracking where the last real user message was. + const allToolCallIds: string[] = [] + const toolCallIdsSinceLastUser: string[] = [] + let lastUserMsgBranchIndex = -1 + + // First pass: find the last user message index. + for (let i = 0; i < branch.length; i++) { + const entry = branch[i] + if (entry.type !== "message") continue + const msg = (entry as any).message + if (msg.role === "user") { + lastUserMsgBranchIndex = i + } + } + + // Second pass: collect tool result IDs in encounter order. + for (let i = 0; i < branch.length; i++) { + const entry = branch[i] + if (entry.type !== "message") continue + const msg = (entry as any).message + if (msg.role !== "toolResult") continue + + const toolCallId = msg.toolCallId as string + allToolCallIds.push(toolCallId) + + if (lastUserMsgBranchIndex >= 0 && i > lastUserMsgBranchIndex) { + toolCallIdsSinceLastUser.push(toolCallId) + } + } + + // Determine the candidate set based on the N argument. + let candidates: string[] + if (n > 0) { + // Last N tool results from the full session branch. + candidates = allToolCallIds.slice(-n) + } else { + // All tool results since the last user message (or everything if no user + // message exists yet — e.g. in a purely agentic session). + candidates = + lastUserMsgBranchIndex >= 0 ? toolCallIdsSinceLastUser : allToolCallIds + } + + // Filter: skip already-pruned IDs and protected tool names. + const toAdd = candidates.filter((toolCallId) => { + if (state.prunedToolIds.has(toolCallId)) return false + + // Tool name lookup: prefer the DCP tool-call record if tracked; fall back + // to the AgentMessage itself (msg.toolName is present on ToolResultMessage). + const record = state.toolCalls.get(toolCallId) + const toolName = record?.toolName + + if (toolName !== undefined && protectedTools.has(toolName)) return false + + return true + }) + + for (const toolCallId of toAdd) { + state.prunedToolIds.add(toolCallId) + } + + const count = toAdd.length + ctx.ui.notify(`Swept ${count} tool output${count === 1 ? "" : "s"}`, "info") +} + +// --------------------------------------------------------------------------- +// Manual mode +// --------------------------------------------------------------------------- + +function handleManual( + ctx: ExtensionCommandContext, + state: DcpState, + subArg: string | undefined, +): void { + if (subArg === "on") { + state.manualMode = true + ctx.ui.notify( + "Manual mode: on\nAutonomous compression is disabled. Use /dcp compress to trigger manually.", + "info", + ) + } else if (subArg === "off") { + state.manualMode = false + ctx.ui.notify("Manual mode: off\nAutonomous compression is enabled.", "info") + } else { + // Status display (no argument). + const status = state.manualMode ? "on" : "off" + ctx.ui.notify( + `Manual mode: ${status}\nWhen on: compress tool only fires when you explicitly request it.`, + "info", + ) + } +} + +// --------------------------------------------------------------------------- +// Decompress +// --------------------------------------------------------------------------- + +function handleDecompress( + ctx: ExtensionCommandContext, + state: DcpState, + nArg: string | undefined, +): void { + if (nArg === undefined) { + // List all active compression blocks. + const activeBlocks = state.compressionBlocks.filter((b) => b.active) + + if (activeBlocks.length === 0) { + ctx.ui.notify("No active compression blocks.", "info") + return + } + + const lines: string[] = ["Active compression blocks:"] + for (const block of activeBlocks) { + lines.push( + ` b${block.id} — "${block.topic}" (est. ${fmt(block.summaryTokenEstimate)} tokens)`, + ) + } + lines.push("") + lines.push("Run /dcp decompress N to restore a block.") + + ctx.ui.notify(lines.join("\n"), "info") + } else { + // Restore block N. + const id = parseInt(nArg, 10) + + if (isNaN(id)) { + ctx.ui.notify( + `Invalid block ID: "${nArg}". Usage: /dcp decompress N`, + "error", + ) + return + } + + const block = state.compressionBlocks.find((b) => b.id === id) + + if (!block) { + ctx.ui.notify(`No compression block found with id ${id}.`, "error") + return + } + + if (!block.active) { + ctx.ui.notify(`Compression block b${id} is already decompressed.`, "info") + return + } + + block.active = false + ctx.ui.notify(`Decompressed block b${id}: "${block.topic}"`, "info") + } +} + +// --------------------------------------------------------------------------- +// Compress (trigger) +// --------------------------------------------------------------------------- + +async function handleCompress(pi: ExtensionAPI, ctx: ExtensionCommandContext): Promise { + await ctx.waitForIdle() + + pi.sendMessage( + { + customType: "dcp-compress-trigger", + content: + "Please compress stale conversation sections using the compress tool now.", + display: false, + }, + { triggerTurn: true, deliverAs: "followUp" }, + ) + + ctx.ui.notify("Triggered compression", "info") +} + +// --------------------------------------------------------------------------- +// Public API +// --------------------------------------------------------------------------- + +export function registerCommands( + pi: ExtensionAPI, + state: DcpState, + config: DcpConfig, +): void { + pi.registerCommand("dcp", { + description: "Dynamic Context Pruning — manage context window usage", + getArgumentCompletions(prefix: string) { + const subcommands = [ + { label: "context", description: "Show context window usage breakdown" }, + { label: "stats", description: "Show pruning statistics" }, + { label: "sweep", description: "Prune tool outputs" }, + { label: "manual", description: "Toggle manual mode" }, + { label: "decompress", description: "List or restore compression blocks" }, + { label: "compress", description: "Trigger LLM compression" }, + { label: "help", description: "Show help" }, + ] + const matched = subcommands.filter((s) => s.label.startsWith(prefix)) + return matched.length > 0 ? matched : null + }, + + async handler(args: string, ctx: ExtensionCommandContext): Promise { + const parts = args.trim().split(/\s+/).filter(Boolean) + const sub = parts[0] ?? "" + + switch (sub) { + case "": + case "help": + handleHelp(ctx) + break + + case "context": + handleContext(ctx, state) + break + + case "stats": + handleStats(ctx, state) + break + + case "sweep": { + const rawN = parts[1] !== undefined ? parseInt(parts[1], 10) : 0 + const n = isNaN(rawN) || rawN < 0 ? 0 : rawN + await handleSweep(ctx, state, config, n) + break + } + + case "manual": + handleManual(ctx, state, parts[1]) + break + + case "decompress": + handleDecompress(ctx, state, parts[1]) + break + + case "compress": + await handleCompress(pi, ctx) + break + + default: + ctx.ui.notify( + `Unknown DCP command: "${sub}". Run /dcp help for available commands.`, + "error", + ) + break + } + }, + }) +} diff --git a/compress-tool.ts b/compress-tool.ts new file mode 100644 index 0000000..f7e18bb --- /dev/null +++ b/compress-tool.ts @@ -0,0 +1,208 @@ +// --------------------------------------------------------------------------- +// Dynamic Context Pruning (DCP) — compress tool registration +// --------------------------------------------------------------------------- + +import { Type } from "@sinclair/typebox" +import type { ExtensionAPI } from "@mariozechner/pi-coding-agent" +import type { CompressionBlock, DcpState } from "./state.js" +import type { DcpConfig } from "./config.js" +import { COMPRESS_RANGE_DESCRIPTION } from "./prompts.js" +import { estimateTokens } from "./pruner.js" + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +/** + * Replace `(bN)` placeholders in a summary with the stored content of the + * referenced compression block. Unrecognised placeholders are left as-is. + */ +function expandBlockPlaceholders(summary: string, state: DcpState): string { + return summary.replace(/\(b(\d+)\)/g, (match, idStr) => { + const id = parseInt(idStr, 10) + const block = state.compressionBlocks.find((b) => b.id === id && b.active) + return block + ? `[Previously compressed: ${block.topic}]\n${block.summary}` + : match + }) +} + +/** + * Resolve a user-supplied ID string (e.g. "m001" or "b3") to an actual + * message timestamp. + * + * - `mNNN` ids → looked up directly in `state.messageIdSnapshot` + * - `bN` ids → matched against `state.compressionBlocks` by integer id; + * `field` selects whether we return the block's start or end + * timestamp depending on whether the id is used as a range + * start or end boundary. + * + * Throws `Error("Unknown message ID: ")` when the id cannot be resolved. + */ +function resolveIdToTimestamp( + rawId: string, + field: "startTimestamp" | "endTimestamp", + state: DcpState, +): number { + const id = rawId.trim() + + // Block ID: b1, b2, b10, … + const blockMatch = id.match(/^b(\d+)$/i) + if (blockMatch) { + const blockId = parseInt(blockMatch[1]!, 10) + const block = state.compressionBlocks.find((b) => b.id === blockId && b.active) + if (!block) throw new Error(`Unknown message ID: ${id}`) + return block[field] + } + + // Message ID: m001, m042, … + const ts = state.messageIdSnapshot.get(id) + if (ts === undefined) throw new Error(`Unknown message ID: ${id}`) + return ts +} + +/** + * Determine the anchor timestamp for a compression block — the timestamp of + * the first raw message that appears strictly after `endTimestamp`. + * + * Returns `Infinity` when the range extends to the very end of the visible + * conversation (nothing comes after it). + */ +function resolveAnchorTimestamp(endTimestamp: number, state: DcpState): number { + let anchor = Infinity + for (const ts of state.messageIdSnapshot.values()) { + if (ts > endTimestamp && ts < anchor) { + anchor = ts + } + } + return anchor +} + +// --------------------------------------------------------------------------- +// Tool registration +// --------------------------------------------------------------------------- + +export function registerCompressTool( + pi: ExtensionAPI, + state: DcpState, + config: DcpConfig, +): void { + pi.registerTool({ + name: "compress", + label: "Compress Context", + description: COMPRESS_RANGE_DESCRIPTION, + promptSnippet: "Compress ranges of conversation into summaries to manage context", + parameters: Type.Object({ + topic: Type.String({ + description: + "Short label (3-5 words) for display - e.g., 'Auth System Exploration'", + }), + ranges: Type.Array( + Type.Object({ + startId: Type.String({ + description: + "Message ID marking start of range (e.g. m001, b2)", + }), + endId: Type.String({ + description: + "Message ID marking end of range (e.g. m042, b5)", + }), + summary: Type.String({ + description: + "Complete technical summary replacing all content in range", + }), + }), + { description: "One or more ranges to compress" }, + ), + }), + + async execute(_toolCallId, params, _signal, _onUpdate, ctx) { + const newBlockIds: number[] = [] + + for (const range of params.ranges) { + const { startId, endId, summary } = range + + // ── Resolve boundary timestamps ────────────────────────────────── + const startTimestamp = resolveIdToTimestamp(startId, "startTimestamp", state) + const endTimestamp = resolveIdToTimestamp(endId, "endTimestamp", state) + + if (startTimestamp > endTimestamp) { + throw new Error( + `Range start "${startId}" must appear before end "${endId}" in the conversation`, + ) + } + + // ── Overlap check against existing active blocks ───────────────── + for (const existing of state.compressionBlocks) { + if (!existing.active) continue + const overlaps = + startTimestamp <= existing.endTimestamp && + existing.startTimestamp <= endTimestamp + if (overlaps) { + throw new Error( + `Overlapping compression ranges are not supported. ` + + `New range (${startId}..${endId}) overlaps existing block ` + + `b${existing.id} "${existing.topic}"`, + ) + } + } + + // ── Anchor: first raw message after the range ──────────────────── + const anchorTimestamp = resolveAnchorTimestamp(endTimestamp, state) + + // ── Expand any (bN) placeholders in the summary ────────────────── + const expandedSummary = expandBlockPlaceholders(summary, state) + + // ── Create and store the compression block ─────────────────────── + const block: CompressionBlock = { + id: state.nextBlockId++, + topic: params.topic, + summary: expandedSummary, + startTimestamp, + endTimestamp, + anchorTimestamp, + active: true, + summaryTokenEstimate: estimateTokens(expandedSummary), + createdAt: Date.now(), + } + + state.compressionBlocks.push(block) + newBlockIds.push(block.id) + } + + // ── Notification ──────────────────────────────────────────────────── + if (config.pruneNotification !== "off") { + const count = params.ranges.length + const rangeWord = count === 1 ? "range" : "ranges" + + if (config.pruneNotification === "detailed") { + const totalTokens = newBlockIds.reduce((sum, id) => { + const b = state.compressionBlocks.find((block) => block.id === id) + return sum + (b?.summaryTokenEstimate ?? 0) + }, 0) + ctx.ui.notify( + `Compressed: ${params.topic} (${count} ${rangeWord}, ~${totalTokens} tokens in summaries)`, + "info", + ) + } else { + // "minimal" + ctx.ui.notify(`Compressed: ${params.topic}`, "info") + } + } + + // ── Return result ─────────────────────────────────────────────────── + return { + content: [ + { + type: "text", + text: `Compressed ${params.ranges.length} range(s): ${params.topic}`, + }, + ], + details: { + blockIds: newBlockIds, + topic: params.topic, + }, + } + }, + }) +} diff --git a/config.ts b/config.ts new file mode 100644 index 0000000..2abf735 --- /dev/null +++ b/config.ts @@ -0,0 +1,251 @@ +import * as fs from "node:fs" +import * as path from "node:path" +import * as os from "node:os" +import { parse as parseJsonc } from "jsonc-parser" + +// --------------------------------------------------------------------------- +// Types +// --------------------------------------------------------------------------- + +export interface DcpConfig { + enabled: boolean + debug: boolean + manualMode: { + enabled: boolean + automaticStrategies: boolean // run dedup/purge even in manual mode + } + compress: { + maxContextPercent: number // 0-1, e.g. 0.8 — above this, aggressive nudges + minContextPercent: number // 0-1, e.g. 0.4 — below this, no nudges + nudgeFrequency: number // inject nudge every N context events (default: 5) + iterationNudgeThreshold: number // nudge after N tool calls since last user msg (default: 15) + nudgeForce: "strong" | "soft" + protectedTools: string[] // these tool outputs always protected from pruning + protectUserMessages: boolean + } + strategies: { + deduplication: { + enabled: boolean + protectedTools: string[] + } + purgeErrors: { + enabled: boolean + turns: number // prune error inputs after N user turns (default: 4) + protectedTools: string[] + } + } + protectedFilePatterns: string[] + pruneNotification: "off" | "minimal" | "detailed" +} + +// --------------------------------------------------------------------------- +// Defaults +// --------------------------------------------------------------------------- + +const DEFAULT_CONFIG: DcpConfig = { + enabled: true, + debug: false, + manualMode: { + enabled: false, + automaticStrategies: true, + }, + compress: { + maxContextPercent: 0.8, + minContextPercent: 0.4, + nudgeFrequency: 5, + iterationNudgeThreshold: 15, + nudgeForce: "soft", + protectedTools: ["compress", "write", "edit"], + protectUserMessages: false, + }, + strategies: { + deduplication: { + enabled: true, + protectedTools: [], + }, + purgeErrors: { + enabled: true, + turns: 4, + protectedTools: [], + }, + }, + protectedFilePatterns: [], + pruneNotification: "detailed", +} + +const DEFAULT_CONFIG_FILE_CONTENT = `{ + // Dynamic Context Pruning (DCP) configuration + // Full schema reference: https://github.com/your-org/pi-dynamic-context-pruning + // + // "$schema": "...", + // + // Uncomment and edit properties you want to override: + // + // "enabled": true, + // "debug": false, + // "manualMode": { + // "enabled": false, + // "automaticStrategies": true + // }, + // "compress": { + // "maxContextPercent": 0.8, + // "minContextPercent": 0.4, + // "nudgeFrequency": 5, + // "iterationNudgeThreshold": 15, + // "nudgeForce": "soft", + // "protectedTools": ["compress", "write", "edit"], + // "protectUserMessages": false + // }, + // "strategies": { + // "deduplication": { "enabled": true, "protectedTools": [] }, + // "purgeErrors": { "enabled": true, "turns": 4, "protectedTools": [] } + // }, + // "protectedFilePatterns": [], + // "pruneNotification": "detailed" +} +` + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +/** + * Recursively merge `override` into `base`. Arrays are union-merged (deduped). + * Returns a new object; does not mutate inputs. + */ +function deepMerge(base: T, override: Partial): T { + if (override === null || override === undefined) return base + if (typeof base !== "object" || typeof override !== "object") { + return override as T + } + + const result: Record = { ...(base as Record) } + + for (const key of Object.keys(override as Record)) { + const baseVal = (base as Record)[key] + const overVal = (override as Record)[key] + + if (Array.isArray(baseVal) && Array.isArray(overVal)) { + // Union merge: combine and deduplicate by value + const combined = [...baseVal, ...overVal] + result[key] = [...new Set(combined)] + } else if ( + overVal !== null && + typeof overVal === "object" && + !Array.isArray(overVal) && + baseVal !== null && + typeof baseVal === "object" && + !Array.isArray(baseVal) + ) { + result[key] = deepMerge( + baseVal as Record, + overVal as Record, + ) + } else if (overVal !== undefined) { + result[key] = overVal + } + } + + return result as T +} + +/** + * Parse a JSONC file and return a plain object. + * Returns `{}` on any error (missing file, parse error). + */ +function readJsoncFile(filePath: string): Record { + let raw: string + try { + raw = fs.readFileSync(filePath, "utf8") + } catch { + return {} + } + + const errors: unknown[] = [] + const parsed = parseJsonc(raw, errors) + if (errors.length > 0) { + // Non-fatal: return whatever was parsed (jsonc-parser is lenient) + } + if (parsed === null || typeof parsed !== "object" || Array.isArray(parsed)) { + return {} + } + return parsed as Record +} + +/** + * Ensure the global config file exists, creating it with defaults if missing. + */ +function ensureGlobalConfig(filePath: string): void { + const dir = path.dirname(filePath) + try { + fs.mkdirSync(dir, { recursive: true }) + if (!fs.existsSync(filePath)) { + fs.writeFileSync(filePath, DEFAULT_CONFIG_FILE_CONTENT, "utf8") + } + } catch { + // Best-effort; do not crash if we cannot write + } +} + +/** + * Walk up from `startDir` looking for `.pi/dcp.jsonc`. + * Returns the path if found, otherwise null. + */ +function findProjectConfig(startDir: string): string | null { + let dir = path.resolve(startDir) + const root = path.parse(dir).root + + while (true) { + const candidate = path.join(dir, ".pi", "dcp.jsonc") + if (fs.existsSync(candidate)) return candidate + if (dir === root) return null + const parent = path.dirname(dir) + if (parent === dir) return null + dir = parent + } +} + +// --------------------------------------------------------------------------- +// Public API +// --------------------------------------------------------------------------- + +/** + * Load the DCP configuration by merging (in order): + * 1. Built-in defaults + * 2. ~/.config/pi/dcp.jsonc (global; auto-created if missing) + * 3. $PI_CONFIG_DIR/dcp.jsonc (if env var is set) + * 4. /.pi/dcp.jsonc (walked up from projectDir) + */ +export function loadConfig(projectDir: string): DcpConfig { + // Layer 1: defaults (deep clone so we never mutate the constant) + let config: DcpConfig = deepMerge(DEFAULT_CONFIG, {}) + + // Layer 2: global config + const globalConfigPath = path.join(os.homedir(), ".config", "pi", "dcp.jsonc") + ensureGlobalConfig(globalConfigPath) + const globalRaw = readJsoncFile(globalConfigPath) + if (Object.keys(globalRaw).length > 0) { + config = deepMerge(config, globalRaw as Partial) + } + + // Layer 3: $PI_CONFIG_DIR/dcp.jsonc + const piConfigDir = process.env["PI_CONFIG_DIR"] + if (piConfigDir) { + const envConfigPath = path.join(piConfigDir, "dcp.jsonc") + const envRaw = readJsoncFile(envConfigPath) + if (Object.keys(envRaw).length > 0) { + config = deepMerge(config, envRaw as Partial) + } + } + + // Layer 4: project-local config (walk up from projectDir) + const projectConfigPath = findProjectConfig(projectDir) + if (projectConfigPath) { + const projectRaw = readJsoncFile(projectConfigPath) + if (Object.keys(projectRaw).length > 0) { + config = deepMerge(config, projectRaw as Partial) + } + } + + return config +} diff --git a/index.ts b/index.ts new file mode 100644 index 0000000..0efbb8c --- /dev/null +++ b/index.ts @@ -0,0 +1,228 @@ +// --------------------------------------------------------------------------- +// Dynamic Context Pruning (DCP) — PI extension entry point +// --------------------------------------------------------------------------- + +import type { ExtensionAPI } from "@mariozechner/pi-coding-agent" +import { loadConfig } from "./config.js" +import { + createState, + resetState, + createInputFingerprint, + type DcpState, +} from "./state.js" +import { + SYSTEM_PROMPT, + MANUAL_MODE_SYSTEM_PROMPT, + CONTEXT_LIMIT_NUDGE_STRONG, + CONTEXT_LIMIT_NUDGE_SOFT, + TURN_NUDGE, + ITERATION_NUDGE, +} from "./prompts.js" +import { applyPruning, injectNudge, getNudgeType } from "./pruner.js" +import { registerCompressTool } from "./compress-tool.js" +import { registerCommands } from "./commands.js" + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +/** + * Persist the current DCP runtime state as a custom session entry so it + * survives session restarts and pi process restarts. + */ +function saveState(pi: ExtensionAPI, state: DcpState): void { + pi.appendEntry("dcp-state", { + compressionBlocks: state.compressionBlocks, + nextBlockId: state.nextBlockId, + prunedToolIds: Array.from(state.prunedToolIds), + tokensSaved: state.tokensSaved, + totalPruneCount: state.totalPruneCount, + manualMode: state.manualMode, + }) +} + +// --------------------------------------------------------------------------- +// Extension entry point +// --------------------------------------------------------------------------- + +export default function (pi: ExtensionAPI) { + // ── 1. Load config ──────────────────────────────────────────────────────── + const config = loadConfig(process.cwd()) + + if (!config.enabled) return + + // ── 2. Create state ─────────────────────────────────────────────────────── + const state = createState() + + // Apply config baseline for manual mode before any session events fire. + if (config.manualMode.enabled) { + state.manualMode = true + } + + // ── 3. Register compress tool ───────────────────────────────────────────── + registerCompressTool(pi, state, config) + + // ── 4. Register /dcp commands ───────────────────────────────────────────── + registerCommands(pi, state, config) + + // ── 5. session_start: restore state from session entries ────────────────── + pi.on("session_start", async (event, ctx) => { + // Reset to a clean slate first. + resetState(state) + + // Re-apply config baseline so manual mode survives a session_start reset. + if (config.manualMode.enabled) { + state.manualMode = true + } + + // Walk the branch looking for the most-recent persisted dcp-state entry. + for (const entry of ctx.sessionManager.getBranch()) { + if (entry.type === "custom" && entry.customType === "dcp-state") { + const data = entry.data as any + + if (data?.compressionBlocks) { + state.compressionBlocks = data.compressionBlocks + state.nextBlockId = data.nextBlockId ?? state.compressionBlocks.length + state.tokensSaved = data.tokensSaved ?? 0 + state.totalPruneCount = data.totalPruneCount ?? 0 + } + + if (data?.prunedToolIds) { + state.prunedToolIds = new Set(data.prunedToolIds) + } + + // Saved manualMode takes precedence over config baseline so the user's + // last /dcp manual on|off choice is honoured across restarts. + if (data?.manualMode !== undefined) { + state.manualMode = data.manualMode + } + } + } + + // Show a status indicator in the pi TUI. + ctx.ui.setStatus("dcp", state.manualMode ? "DCP [manual]" : "DCP") + }) + + // ── 6. session_shutdown: save state ─────────────────────────────────────── + pi.on("session_shutdown", async (_event, _ctx) => { + saveState(pi, state) + }) + + // ── 7. before_agent_start: inject system prompt ─────────────────────────── + pi.on("before_agent_start", async (event, _ctx) => { + const promptAddition = state.manualMode + ? MANUAL_MODE_SYSTEM_PROMPT + : SYSTEM_PROMPT + + return { + systemPrompt: event.systemPrompt + "\n\n" + promptAddition, + } + }) + + // ── 8. tool_call: record input args for dedup / purge fingerprinting ─────── + pi.on("tool_call", async (event, _ctx) => { + // Only create a record if we haven't seen this toolCallId yet. The + // tool_result handler may also create one if the tool_call event was + // somehow missed. + if (!state.toolCalls.has(event.toolCallId)) { + state.toolCalls.set(event.toolCallId, { + toolCallId: event.toolCallId, + toolName: event.toolName, + inputArgs: event.input as Record, + inputFingerprint: createInputFingerprint( + event.toolName, + event.input as Record, + ), + isError: false, + turnIndex: state.currentTurn, + timestamp: 0, // filled in by the tool_result handler + tokenEstimate: 0, + }) + } + }) + + // ── 9. tool_result: finalise tool record with result info ───────────────── + pi.on("tool_result", async (event, _ctx) => { + const record = state.toolCalls.get(event.toolCallId) + + const outputText = event.content + .map((c: any) => (c.type === "text" ? c.text : "")) + .join("") + const tokenEstimate = Math.round(outputText.length / 4) + + if (record) { + // Update the record created in tool_call. + record.isError = event.isError + record.timestamp = Date.now() + record.tokenEstimate = tokenEstimate + } else { + // Fallback: create a record even when tool_call event was not observed. + state.toolCalls.set(event.toolCallId, { + toolCallId: event.toolCallId, + toolName: event.toolName, + inputArgs: {}, + inputFingerprint: createInputFingerprint(event.toolName, {}), + isError: event.isError, + turnIndex: state.currentTurn, + timestamp: Date.now(), + tokenEstimate, + }) + } + }) + + // ── 10. context: apply pruning and inject nudges ────────────────────────── + pi.on("context", async (event, ctx) => { + // Apply all pruning transforms (compression blocks, dedup, error purge, + // tool output replacement, message ID injection). + const prunedMessages = applyPruning(event.messages, state, config) + + // In manual mode we still apply pruning strategies (if + // automaticStrategies is on) but skip autonomous nudge injection. + const usage = ctx.getContextUsage() + if (usage && usage.tokens !== null && !state.manualMode) { + const contextPercent = usage.tokens / usage.contextWindow + + // Count tool calls since the last user message (used for iteration nudge). + let toolCallsSinceLastUser = 0 + for (let i = prunedMessages.length - 1; i >= 0; i--) { + const msg = prunedMessages[i] as any + if (msg.role === "user") break + if (msg.role === "toolResult") toolCallsSinceLastUser++ + } + + const nudgeType = getNudgeType( + contextPercent, + state, + config, + toolCallsSinceLastUser, + ) + + if (nudgeType) { + let nudgeText: string + + if (nudgeType === "context-strong") { + nudgeText = CONTEXT_LIMIT_NUDGE_STRONG + } else if (nudgeType === "context-soft") { + nudgeText = CONTEXT_LIMIT_NUDGE_SOFT + } else if (nudgeType === "iteration") { + nudgeText = ITERATION_NUDGE + } else { + // "turn" + nudgeText = TURN_NUDGE + } + + injectNudge(prunedMessages, nudgeText) + state.nudgeCounter = 0 + } else { + state.nudgeCounter++ + } + } + + return { messages: prunedMessages } + }) + + // ── 11. agent_end: persist state after each agent run ──────────────────── + pi.on("agent_end", async (_event, _ctx) => { + saveState(pi, state) + }) +} diff --git a/package-lock.json b/package-lock.json new file mode 100644 index 0000000..2f2d17e --- /dev/null +++ b/package-lock.json @@ -0,0 +1,21 @@ +{ + "name": "pi-dynamic-context-pruning", + "version": "1.0.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "pi-dynamic-context-pruning", + "version": "1.0.0", + "dependencies": { + "jsonc-parser": "^3.3.1" + } + }, + "node_modules/jsonc-parser": { + "version": "3.3.1", + "resolved": "https://registry.npmjs.org/jsonc-parser/-/jsonc-parser-3.3.1.tgz", + "integrity": "sha512-HUgH65KyejrUFPvHFPbqOY0rsFip3Bo5wb4ngvdi1EpCYWUQDC5V+Y7mZws+DLkr4M//zQJoanu1SP+87Dv1oQ==", + "license": "MIT" + } + } +} diff --git a/package.json b/package.json new file mode 100644 index 0000000..cda71d9 --- /dev/null +++ b/package.json @@ -0,0 +1,23 @@ +{ + "name": "@complexthings/pi-dynamic-context-pruning", + "version": "1.0.0", + "description": "PI coding agent extension — Dynamic Context Pruning (DCP)", + "type": "module", + "pi": { + "extensions": [ + "./index.ts" + ] + }, + "author": { + "name": "Greg", + "email": "greg.harvell@complexthings.com", + "url": "https://github.com/complexthings" + }, + "repository": { + "type": "git", + "url": "" + }, + "dependencies": { + "jsonc-parser": "^3.3.1" + } +} diff --git a/prompts.ts b/prompts.ts new file mode 100644 index 0000000..630f09e --- /dev/null +++ b/prompts.ts @@ -0,0 +1,228 @@ +// --------------------------------------------------------------------------- +// Dynamic Context Pruning (DCP) — PI extension prompts +// --------------------------------------------------------------------------- +// All prompt text is exported as plain strings so the extension index can +// reference them by name without executing any logic here. +// --------------------------------------------------------------------------- + +/** + * Appended to the existing system prompt when DCP is enabled (automatic mode). + */ +export const SYSTEM_PROMPT = ` +You operate in a context-constrained environment. Manage context continuously to avoid buildup and preserve retrieval quality. Efficient context management is paramount for your agentic performance. + +The ONLY tool you have for context management is \`compress\`. It replaces older conversation content with technical summaries you produce. + +\`\` and \`\` tags are environment-injected metadata. Do not output them. + +THE PHILOSOPHY OF COMPRESS +\`compress\` transforms conversation content into dense, high-fidelity summaries. This is not cleanup — it is crystallization. Your summary becomes the authoritative record of what transpired. + +Think of compression as phase transitions: raw exploration becomes refined understanding. The original context served its purpose; your summary now carries that understanding forward. + +OPERATING STANCE +Prefer short, closed, summary-safe compressions. +When multiple independent stale sections exist, prefer several focused compressions (in parallel when possible) over one broad compression. + +Use \`compress\` as steady housekeeping while you work. + +CADENCE, SIGNALS, AND LATENCY + +- No fixed threshold mandates compression +- Prioritize closedness and independence over raw size +- Prefer smaller, regular compressions over infrequent massive compressions for better latency and summary quality +- When multiple independent stale sections are ready, batch compressions in parallel + +COMPRESS WHEN + +A section is genuinely closed and the raw conversation has served its purpose: + +- Research concluded and findings are clear +- Implementation finished and verified +- Exploration exhausted and patterns understood +- Dead-end noise can be discarded without waiting for a whole chapter to close + +DO NOT COMPRESS IF + +- Raw context is still relevant and needed for edits or precise references +- The target content is still actively in progress +- You may need exact code, error messages, or file contents in the immediate next steps + +Before compressing, ask: _"Is this section closed enough to become summary-only right now?"_ + +Evaluate conversation signal-to-noise REGULARLY. Use \`compress\` deliberately with quality-first summaries. Prioritize stale content intelligently to maintain a high-signal context window that supports your agency. + +It is your responsibility to keep a sharp, high-quality context window for optimal performance. +`.trim() + +/** + * Used as the \`description\` field when registering the \`compress\` tool. + * + * Tool signature: + * { + * topic: string // 3-5 word label for this compression + * ranges: Array<{ + * startId: string // mNNN or bN + * endId: string // mNNN or bN + * summary: string // exhaustive technical summary + * }> + * } + */ +export const COMPRESS_RANGE_DESCRIPTION = `Collapse one or more ranges of the conversation into detailed summaries. + +THE SUMMARY +Your summary must be EXHAUSTIVE. Capture file paths, function signatures, decisions made, constraints discovered, key findings... EVERYTHING that maintains context integrity. This is not a brief note — it is an authoritative record so faithful that the original conversation adds no value. + +USER INTENT FIDELITY +When the compressed range includes user messages, preserve the user's intent with extra care. Do not change scope, constraints, priorities, acceptance criteria, or requested outcomes. +Directly quote user messages when they are short enough to include safely. Direct quotes are preferred when they best preserve exact meaning. + +Yet be LEAN. Strip away the noise: failed attempts that led nowhere, verbose tool outputs, back-and-forth exploration. What remains should be pure signal — golden nuggets of detail that preserve full understanding with zero ambiguity. + +COMPRESSED BLOCK PLACEHOLDERS +When the selected range includes previously compressed blocks, use this exact placeholder format when referencing one: + +- \`(bN)\` + +Compressed block sections in context are clearly marked with a header: + +- \`[Compressed conversation section]\` + +Compressed block IDs always use the \`bN\` form (never \`mNNN\`) and are represented in the same XML metadata tag format. + +Rules: + +- Include every required block placeholder exactly once. +- Do not invent placeholders for blocks outside the selected range. +- Treat \`(bN)\` placeholders as RESERVED TOKENS. Do not emit \`(bN)\` text anywhere except intentional placeholders. +- If you need to mention a block in prose, use plain text like \`compressed bN\` (not as a placeholder). +- Preflight check before finalizing: the set of \`(bN)\` placeholders in your summary must exactly match the required set, with no duplicates. + +These placeholders are semantic references. They will be replaced with the full stored compressed block content when the tool processes your output. + +FLOW PRESERVATION WITH PLACEHOLDERS +When you use compressed block placeholders, write the surrounding summary text so it still reads correctly AFTER placeholder expansion. + +- Treat each placeholder as a stand-in for a full conversation segment, not as a short label. +- Ensure transitions before and after each placeholder preserve chronology and causality. +- Do not write text that depends on the placeholder staying literal (for example, "as noted in \`(b2)\`"). +- Your final meaning must be coherent once each placeholder is replaced with its full compressed block content. + +BOUNDARY IDS +You specify boundaries by ID using the injected IDs visible in the conversation: + +- \`mNNN\` IDs identify raw messages (3 digits, zero-padded, e.g. \`m001\`, \`m042\`) +- \`bN\` IDs identify previously compressed blocks + +Each message has an ID inside XML metadata tags like \`...\`. +The ID tag appears at the end of the message it belongs to — it identifies the message above it, not the one below it. +Treat these tags as boundary metadata only, not as tool result content. + +Rules: + +- Pick \`startId\` and \`endId\` directly from injected IDs in context. +- IDs must exist in the current visible context. +- \`startId\` must appear before \`endId\`. +- Do not invent IDs. Use only IDs that are present in context. + +BATCHING +When multiple independent ranges are ready and their boundaries do not overlap, include all of them as separate entries in the \`ranges\` array of a single tool call. Each entry must have its own \`startId\`, \`endId\`, and \`summary\`.` + +/** + * Injected into messages when context usage exceeds maxContextPercent. + * nudgeForce = "strong" — emergency recovery tone. + */ +export const CONTEXT_LIMIT_NUDGE_STRONG = ` +CRITICAL WARNING: MAX CONTEXT LIMIT REACHED + +You are at or beyond the configured max context threshold. This is an emergency context-recovery moment. + +You MUST use the \`compress\` tool now. Do not continue normal exploration until compression is handled. + +If you are in the middle of a critical atomic operation, finish that atomic step first, then compress immediately. + +RANGE STRATEGY (MANDATORY) +Prioritize one large, closed, high-yield compression range first. +This overrides the normal preference for many small compressions. +Only split into multiple compressions if one large range would reduce summary quality or make boundary selection unsafe. + +RANGE SELECTION +Start from older, resolved history and capture as much stale context as safely possible in one pass. +Avoid the newest active working slice unless it is clearly closed. +Use visible injected boundary IDs for compression (\`mNNN\` for messages, \`bN\` for compressed blocks), and ensure \`startId\` appears before \`endId\`. + +SUMMARY REQUIREMENTS +Your summary must cover all essential details from the selected range so work can continue without reopening raw messages. +If the compressed range includes user messages, preserve user intent exactly. Prefer direct quotes for short user messages to avoid semantic drift. +` + +/** + * Injected into messages when context usage exceeds maxContextPercent. + * nudgeForce = "soft" — steady housekeeping tone. + */ +export const CONTEXT_LIMIT_NUDGE_SOFT = ` +NOTICE: Context usage is high. + +Look for a closed, self-contained range that no longer needs to stay raw and compress it now. + +RANGE SELECTION +Prefer older, resolved history. Avoid the newest active working slice unless it is clearly done. +Use visible boundary IDs (\`mNNN\` for messages, \`bN\` for compressed blocks) and ensure \`startId\` appears before \`endId\`. + +If multiple independent ranges are ready, batch them in a single \`compress\` call. +If nothing is cleanly closed yet, continue — but compress at the earliest opportunity. +` + +/** + * Injected as a lightweight reminder between minContextPercent and maxContextPercent + * at the configured nudgeFrequency cadence. + */ +export const TURN_NUDGE = ` +Evaluate the conversation for compressible ranges. + +If any range is cleanly closed and unlikely to be needed again, use the compress tool on it. +If direction has shifted, compress earlier ranges that are now less relevant. + +Prefer small, closed-range compressions over one broad compression. +The goal is to filter noise and distill key information so context accumulation stays under control. +Keep active context uncompressed. +` + +/** + * Injected after iterationNudgeThreshold tool calls since the last user message. + */ +export const ITERATION_NUDGE = ` +You've been iterating for a while after the last user message. + +If there is a closed portion that is unlikely to be referenced immediately (for example, finished research before implementation), use the compress tool on it now. + +Prefer multiple short, closed ranges over one large range when several independent slices are ready. +` + +/** + * Replaces SYSTEM_PROMPT when manualMode.enabled = true. + * The agent should NOT proactively compress — only compress when explicitly + * requested by the user or when a context-limit nudge fires. + */ +export const MANUAL_MODE_SYSTEM_PROMPT = ` +You are operating in DCP manual mode for context management. + +\`\` and \`\` tags are environment-injected metadata. Do not output them. + +In manual mode you do NOT proactively compress conversation content. Compression is a deliberate, user-directed action. + +WHEN TO COMPRESS +- Only when the user explicitly asks you to compress +- Only when a \`\` nudge instructs you to (context-limit emergency) +- Never as background housekeeping or on your own initiative + +WHEN YOU DO COMPRESS +Apply the same quality standards as always: + +- Summaries must be EXHAUSTIVE — file paths, decisions, findings, exact constraints +- Preserve user intent precisely; prefer direct quotes for short user messages +- Use only boundary IDs visible in context (\`mNNN\` for messages, \`bN\` for compressed blocks) +- Batch independent ranges in a single \`compress\` call when possible + +Do not compress active, still-needed context. Only compress ranges that are genuinely closed and whose raw form is no longer required. +`.trim() diff --git a/pruner.ts b/pruner.ts new file mode 100644 index 0000000..31b2cad --- /dev/null +++ b/pruner.ts @@ -0,0 +1,321 @@ +import type { DcpState } from "./state.js"; +import type { DcpConfig } from "./config.js"; + +// Always-protected tool names for deduplication +const ALWAYS_PROTECTED_DEDUP = new Set(["compress", "write", "edit"]); + +// Roles that get message IDs injected +const ID_ELIGIBLE_ROLES = new Set(["user", "assistant", "toolResult", "bashExecution"]); + +// Roles that are PI-internal and should pass through unchanged +const PASSTHROUGH_ROLES = new Set(["compaction", "branch_summary", "custom_message"]); + +/** + * Simple token estimator: chars / 4, rounded. + */ +export function estimateTokens(text: string): number { + return Math.round(text.length / 4); +} + +/** + * Estimate tokens from a message's content, whatever shape it takes. + */ +function estimateMessageTokens(msg: any): number { + if (!msg) return 0; + const content = msg.content; + if (!content) return 0; + if (typeof content === "string") return estimateTokens(content); + if (Array.isArray(content)) { + let total = 0; + for (const part of content) { + if (part && typeof part === "object") { + if (typeof part.text === "string") total += estimateTokens(part.text); + else if (typeof part.thinking === "string") total += estimateTokens(part.thinking); + else if (part.type === "image") total += 500; // rough estimate for images + } + } + return total; + } + return 0; +} + +/** + * Apply active compression blocks to the message array. + * Mutates messages in place (via splice/sort) and returns it. + */ +function applyCompressionBlocks(messages: any[], state: DcpState): any[] { + const activeBlocks = state.compressionBlocks.filter((b) => b.active); + if (activeBlocks.length === 0) return messages; + + for (const block of activeBlocks) { + // Find start and end indices by timestamp + const startIdx = messages.findIndex((m) => m.timestamp === block.startTimestamp); + const endIdx = messages.findIndex((m) => m.timestamp === block.endTimestamp); + + if (startIdx === -1 || endIdx === -1) continue; + + const lo = Math.min(startIdx, endIdx); + const hi = Math.max(startIdx, endIdx); + + // Estimate tokens removed + let removedTokens = 0; + for (let i = lo; i <= hi; i++) { + removedTokens += estimateMessageTokens(messages[i]); + } + + // Remove the range (inclusive) + messages.splice(lo, hi - lo + 1); + + // Build synthetic user message for the compressed block + const syntheticMsg = { + role: "user", + content: [ + { + type: "text", + text: + "[Compressed section: " + + block.topic + + "]\n\n" + + block.summary + + "\n\nb" + + block.id + + "", + }, + ], + timestamp: block.anchorTimestamp - 0.5, + }; + + // Estimate tokens added by the summary + const addedTokens = estimateMessageTokens(syntheticMsg); + + // Insert the synthetic message + messages.push(syntheticMsg); + + // Re-sort by timestamp + messages.sort((a, b) => (a.timestamp ?? 0) - (b.timestamp ?? 0)); + + // Update tokens saved + const saved = removedTokens - addedTokens; + if (saved > 0) state.tokensSaved += saved; + } + + return messages; +} + +/** + * Apply deduplication: mark redundant tool outputs for pruning. + * Mutates state.prunedToolIds. + */ +function applyDeduplication(messages: any[], state: DcpState, config: DcpConfig): void { + if (!config.strategies.deduplication.enabled) return; + if (state.manualMode && !config.manualMode.automaticStrategies) return; + + const protectedTools = new Set([ + ...ALWAYS_PROTECTED_DEDUP, + ...(config.strategies.deduplication.protectedTools ?? []), + ]); + + // fingerprint → array of toolCallIds in timestamp order + const fingerprintMap = new Map(); + + for (const msg of messages) { + if (msg.role !== "toolResult") continue; + const toolName: string = msg.toolName ?? ""; + if (protectedTools.has(toolName)) continue; + + // Look up the fingerprint from the recorded tool call + const record = state.toolCalls.get(msg.toolCallId); + if (!record) continue; + + const fp = record.inputFingerprint; + if (!fingerprintMap.has(fp)) { + fingerprintMap.set(fp, []); + } + fingerprintMap.get(fp)!.push(msg.toolCallId); + } + + // For each fingerprint with duplicates, prune all but the last + for (const [, ids] of fingerprintMap) { + if (ids.length <= 1) continue; + // Keep the last one; prune the rest + for (let i = 0; i < ids.length - 1; i++) { + state.prunedToolIds.add(ids[i]); + state.totalPruneCount++; + } + } +} + +/** + * Apply error purging: mark old error tool outputs for pruning. + * Mutates state.prunedToolIds. + */ +function applyErrorPurging(messages: any[], state: DcpState, config: DcpConfig): void { + if (!config.strategies.purgeErrors.enabled) return; + if (state.manualMode && !config.manualMode.automaticStrategies) return; + + const protectedTools = new Set(config.strategies.purgeErrors.protectedTools ?? []); + const turnsThreshold = config.strategies.purgeErrors.turns ?? 3; + + for (const msg of messages) { + if (msg.role !== "toolResult") continue; + if (!msg.isError) continue; + + const toolName: string = msg.toolName ?? ""; + if (protectedTools.has(toolName)) continue; + + const record = state.toolCalls.get(msg.toolCallId); + if (!record) continue; + + if (state.currentTurn - record.turnIndex >= turnsThreshold) { + state.prunedToolIds.add(msg.toolCallId); + state.totalPruneCount++; + } + } +} + +/** + * Apply explicit tool output pruning from state.prunedToolIds. + * Replaces content of matching toolResult messages in place. + */ +function applyToolOutputPruning(messages: any[], state: DcpState): void { + for (const msg of messages) { + if (msg.role !== "toolResult") continue; + if (!state.prunedToolIds.has(msg.toolCallId)) continue; + + if (msg.isError) { + msg.content = [ + { + type: "text", + text: "[Error output removed - tool failed more than N turns ago]", + }, + ]; + } else { + msg.content = [ + { + type: "text", + text: "[Output removed to save context - information superseded or no longer needed]", + }, + ]; + } + } +} + +/** + * Inject sequential message IDs into eligible messages. + * Updates state.messageIdSnapshot. + */ +function injectMessageIds(messages: any[], state: DcpState): void { + // Clear the snapshot and rebuild + state.messageIdSnapshot.clear(); + + let counter = 1; + + for (const msg of messages) { + const role: string = msg.role ?? ""; + + // Skip PI-internal passthrough messages + if (PASSTHROUGH_ROLES.has(role)) continue; + // Skip non-eligible roles + if (!ID_ELIGIBLE_ROLES.has(role)) continue; + + const id = "m" + String(counter).padStart(3, "0"); + counter++; + + const idTag = `\n${id}`; + + if (role === "user") { + if (typeof msg.content === "string") { + msg.content = msg.content + `\n\n${id}`; + } else if (Array.isArray(msg.content)) { + msg.content = [...msg.content, { type: "text", text: idTag }]; + } + } else if (role === "assistant" || role === "toolResult" || role === "bashExecution") { + if (Array.isArray(msg.content)) { + msg.content = [...msg.content, { type: "text", text: idTag }]; + } else if (typeof msg.content === "string") { + msg.content = msg.content + idTag; + } + } + + if (msg.timestamp !== undefined) { + state.messageIdSnapshot.set(id, msg.timestamp); + } + } +} + +/** + * Main transform: applies all pruning and returns modified message array. + * Called from the `context` event handler. + */ +export function applyPruning( + messages: any[], + state: DcpState, + config: DcpConfig +): any[] { + // Work on a shallow copy of the array (individual message objects may be mutated) + const msgs: any[] = [...messages]; + + // 1. Count user turns → update state.currentTurn + state.currentTurn = msgs.filter((m) => m.role === "user").length; + + // 2. Apply active compression blocks + applyCompressionBlocks(msgs, state); + + // 3. Apply deduplication + applyDeduplication(msgs, state, config); + + // 4. Apply error purging + applyErrorPurging(msgs, state, config); + + // 5. Apply explicit tool output pruning (prunedToolIds) + applyToolOutputPruning(msgs, state); + + // 6. Inject message IDs into visible messages + injectMessageIds(msgs, state); + + // 7. state.messageIdSnapshot is already updated by injectMessageIds + + return msgs; +} + +/** + * Inject context limit nudge as a synthetic user message at the end of messages. + * Mutates messages in place. + */ +export function injectNudge(messages: any[], nudgeText: string): void { + messages.push({ + role: "user", + content: nudgeText, + timestamp: Date.now(), + }); +} + +/** + * Determine if a nudge should fire and return the nudge type, or null. + */ +export function getNudgeType( + contextPercent: number, + state: DcpState, + config: DcpConfig, + toolCallsSinceLastUser: number +): "context-strong" | "context-soft" | "turn" | "iteration" | null { + const { maxContextPercent, minContextPercent, nudgeFrequency, nudgeForce, iterationNudgeThreshold } = + config.compress; + + if (contextPercent > maxContextPercent) { + // Only fire if nudge counter has reached frequency threshold + if (state.nudgeCounter >= nudgeFrequency) { + return nudgeForce === "strong" ? "context-strong" : "context-soft"; + } + // Still above max but haven't hit frequency yet — fall through to lower checks + } + + if (contextPercent > minContextPercent && contextPercent <= maxContextPercent) { + if (toolCallsSinceLastUser >= iterationNudgeThreshold) { + return "iteration"; + } + return "turn"; + } + + return null; +} diff --git a/state.ts b/state.ts new file mode 100644 index 0000000..0f6c6f8 --- /dev/null +++ b/state.ts @@ -0,0 +1,203 @@ +// --------------------------------------------------------------------------- +// Types +// --------------------------------------------------------------------------- + +/** + * A record of a single tool call, keyed by toolCallId in DcpState.toolCalls. + */ +export interface ToolRecord { + /** Matches ToolResultMessage.toolCallId */ + toolCallId: string + /** Matches ToolResultMessage.toolName */ + toolName: string + /** The arguments passed to the tool (from the corresponding ToolCall) */ + inputArgs: Record + /** + * Deduplication fingerprint: `toolName::JSON(sortedArgs)` + * Two calls with the same name + identical args share the same fingerprint. + */ + inputFingerprint: string + /** Whether the tool result was an error */ + isError: boolean + /** + * Zero-based index of the user turn during which this tool was called. + * Incremented each time a user message is encountered in the context stream. + */ + turnIndex: number + /** message.timestamp from the ToolResultMessage */ + timestamp: number + /** Rough token estimate: sum of result text content lengths divided by 4 */ + tokenEstimate: number +} + +/** + * A compression block created by the `compress` tool. + * Tracks the range of messages that were summarised and where to inject the + * summary back into the context. + */ +export interface CompressionBlock { + /** Auto-incrementing integer ID */ + id: number + /** Short human-readable topic label */ + topic: string + /** LLM-generated summary text */ + summary: string + /** Timestamp of the first message in the compressed range */ + startTimestamp: number + /** Timestamp of the last message in the compressed range */ + endTimestamp: number + /** + * Timestamp of the first message *after* the range — the summary is injected + * immediately before this message. Set to `Infinity` when the range extends + * to the end of the conversation. + */ + anchorTimestamp: number + /** Whether this block is still being applied (false = soft-deleted) */ + active: boolean + /** Token estimate for the summary text itself */ + summaryTokenEstimate: number + /** Wall-clock time the block was created (Date.now()) */ + createdAt: number +} + +/** + * Full runtime state for the DCP extension. + */ +export interface DcpState { + // ── Tool tracking ────────────────────────────────────────────────────────── + /** toolCallId → ToolRecord, populated when a tool_result event fires */ + toolCalls: Map + /** Set of toolCallIds whose result messages should be suppressed in context */ + prunedToolIds: Set + + // ── Compression ──────────────────────────────────────────────────────────── + /** All compression blocks (both active and soft-deleted) */ + compressionBlocks: CompressionBlock[] + /** Monotonically increasing counter used to assign CompressionBlock.id */ + nextBlockId: number + + // ── Message ID snapshot ──────────────────────────────────────────────────── + /** + * Maps the short LLM-visible message IDs (e.g. "m001") to the actual + * `timestamp` of that message as seen in the last `context` event. + * + * The `compress` tool receives ID strings from the LLM; this map lets us + * translate them back to real timestamps so compression blocks can reference + * message positions by timestamp (which is stable across pruning passes). + */ + messageIdSnapshot: Map + + // ── Turn tracking ────────────────────────────────────────────────────────── + /** + * Zero-based index of the current user turn. + * Incremented each time a user message is encountered while processing the + * context array in the `context` event handler. + */ + currentTurn: number + + // ── Statistics ───────────────────────────────────────────────────────────── + /** Running total of tokens estimated to have been saved by pruning/compression */ + tokensSaved: number + /** Number of discrete pruning operations performed */ + totalPruneCount: number + + // ── Mode ─────────────────────────────────────────────────────────────────── + /** + * When true, the extension will not autonomously emit compress nudges. + * Automatic deduplication/error-purge strategies may still run depending on + * the `manualMode.automaticStrategies` config flag. + */ + manualMode: boolean + + // ── Nudge state ──────────────────────────────────────────────────────────── + /** + * How many `context` events have fired since the last compress nudge was + * emitted. Reset to 0 after each nudge. + */ + nudgeCounter: number + /** + * The value of `currentTurn` at the time the last nudge was emitted. + * Used to avoid nudging more than once per user turn when nudgeFrequency is + * satisfied within the same turn. + */ + lastNudgeTurn: number +} + +// --------------------------------------------------------------------------- +// Factory functions +// --------------------------------------------------------------------------- + +/** Create a fresh, zeroed DcpState instance. */ +export function createState(): DcpState { + return { + toolCalls: new Map(), + prunedToolIds: new Set(), + compressionBlocks: [], + nextBlockId: 1, + messageIdSnapshot: new Map(), + currentTurn: 0, + tokensSaved: 0, + totalPruneCount: 0, + manualMode: false, + nudgeCounter: 0, + lastNudgeTurn: -1, + } +} + +/** + * Reset `state` back to its initial values **in-place**. + * Preserves the object reference so other modules holding a reference see the + * reset immediately. + */ +export function resetState(state: DcpState): void { + state.toolCalls.clear() + state.prunedToolIds.clear() + state.compressionBlocks = [] + state.nextBlockId = 1 + state.messageIdSnapshot.clear() + state.currentTurn = 0 + state.tokensSaved = 0 + state.totalPruneCount = 0 + state.manualMode = false + state.nudgeCounter = 0 + state.lastNudgeTurn = -1 +} + +// --------------------------------------------------------------------------- +// Fingerprinting +// --------------------------------------------------------------------------- + +/** + * Recursively sort the keys of a plain object so that two argument objects + * with the same entries in different key-insertion order produce the same JSON. + */ +function sortObjectKeys(value: unknown): unknown { + if (Array.isArray(value)) { + return value.map(sortObjectKeys) + } + if (value !== null && typeof value === "object") { + const obj = value as Record + const sorted: Record = {} + for (const key of Object.keys(obj).sort()) { + sorted[key] = sortObjectKeys(obj[key]) + } + return sorted + } + return value +} + +/** + * Create a stable deduplication fingerprint for a tool call. + * + * Two calls with the same `toolName` and semantically identical `args` + * (regardless of key ordering) will produce the same fingerprint. + * + * Format: `::` + */ +export function createInputFingerprint( + toolName: string, + args: Record, +): string { + const sorted = sortObjectKeys(args) + return `${toolName}::${JSON.stringify(sorted)}` +}