Files
pi-lgtm/test/subagent-integration.test.ts
T
tintinweb 578b1c9a6b fix nudge/sysprompt cc like
description in tools
fix widget refresh flickering
2026-03-16 21:54:02 +01:00

702 lines
23 KiB
TypeScript

/**
* Tests for task-subagent integration: TaskExecute tool, completion listener,
* auto-cascade, and widget agent ID display.
*/
import { describe, it, expect, beforeEach, afterEach, vi } from "vitest";
import { TaskStore } from "../src/task-store.js";
import { TaskWidget, type UICtx, type Theme } from "../src/ui/task-widget.js";
import initExtension from "../src/index.js";
// Force in-memory task store for all integration tests — prevents file-backed
// store from loading stale tasks across test instances.
beforeEach(() => { process.env.PI_TASKS = "off"; });
afterEach(() => { delete process.env.PI_TASKS; });
// ---- Mock pi ----
/** Minimal mock of ExtensionAPI with events, tool capture, and event hooks. */
function mockPi() {
const tools = new Map<string, any>();
const commands = new Map<string, any>();
const eventHandlers = new Map<string, ((data: unknown) => void)[]>();
const lifecycleHandlers = new Map<string, ((...args: any[]) => any)[]>();
const pi = {
registerTool(def: any) { tools.set(def.name, def); },
registerCommand(name: string, def: any) { commands.set(name, def); },
on(event: string, handler: any) {
if (!lifecycleHandlers.has(event)) lifecycleHandlers.set(event, []);
lifecycleHandlers.get(event)!.push(handler);
},
events: {
emit(channel: string, data: unknown) {
for (const h of eventHandlers.get(channel) ?? []) h(data);
},
on(channel: string, handler: (data: unknown) => void) {
if (!eventHandlers.has(channel)) eventHandlers.set(channel, []);
eventHandlers.get(channel)!.push(handler);
return () => {
const arr = eventHandlers.get(channel);
if (arr) eventHandlers.set(channel, arr.filter(h => h !== handler));
};
},
},
sendUserMessage: vi.fn(),
};
return {
pi,
tools,
commands,
/** Execute a registered tool by name. */
async executeTool(name: string, params: any, ctx?: any) {
const tool = tools.get(name);
if (!tool) throw new Error(`Tool ${name} not registered`);
return tool.execute("call-1", params, undefined, undefined, ctx ?? mockCtx());
},
/** Fire lifecycle event handlers (turn_start, tool_result, etc.) */
async fireLifecycle(event: string, ...args: any[]) {
for (const h of lifecycleHandlers.get(event) ?? []) {
await h(...args);
}
},
/** Emit an event on pi.events (simulates subagent extension). */
emitEvent(channel: string, data: unknown) {
pi.events.emit(channel, data);
},
};
}
/** Minimal mock ExtensionContext. */
function mockCtx() {
return {
model: { id: "test-model", name: "Test" },
modelRegistry: {},
ui: {
setWidget: vi.fn(),
setStatus: vi.fn(),
},
};
}
// ---- Mock subagents extension (RPC responders) ----
/** Simulates the @tintinweb/pi-subagents extension: responds to ping + spawn RPCs and emits ready. */
function installSubagentsMock(pi: { events: { on: Function; emit: Function } }) {
let idCounter = 0;
const spawned: Array<{ id: string; type: string; prompt: string; options: any }> = [];
// Respond to ping — reply on scoped channel
const unsubPing = pi.events.on("subagents:rpc:ping", (data: unknown) => {
const { requestId } = data as { requestId: string };
pi.events.emit(`subagents:rpc:ping:reply:${requestId}`, {});
});
// Respond to spawn — reply on scoped channel
const unsubSpawn = pi.events.on("subagents:rpc:spawn", (data: unknown) => {
const { requestId, type, prompt, options } = data as {
requestId: string; type: string; prompt: string; options?: any;
};
const id = `agent-${++idCounter}`;
spawned.push({ id, type, prompt, options });
pi.events.emit(`subagents:rpc:spawn:reply:${requestId}`, { id });
});
// Broadcast readiness
pi.events.emit("subagents:ready", {});
return {
spawned,
unsub() { unsubPing(); unsubSpawn(); },
};
}
// ---- Tests ----
describe("TaskExecute", () => {
let mock: ReturnType<typeof mockPi>;
let rpc: ReturnType<typeof installSubagentsMock>;
beforeEach(() => {
mock = mockPi();
// Install mock BEFORE init so ping reply is received during extension init
rpc = installSubagentsMock(mock.pi);
initExtension(mock.pi as any);
});
afterEach(() => {
rpc.unsub();
});
it("is registered as a tool", () => {
expect(mock.tools.has("TaskExecute")).toBe(true);
});
it("returns error when subagent extension is not loaded", async () => {
// Re-init without mock to simulate missing extension
const freshMock = mockPi();
initExtension(freshMock.pi as any);
await freshMock.executeTool("TaskCreate", {
subject: "Test task",
description: "Do something",
agentType: "general-purpose",
});
const result = await freshMock.executeTool("TaskExecute", { task_ids: ["1"] });
expect(result.content[0].text).toContain("requires the @tintinweb/pi-subagents extension");
});
it("rejects non-existent tasks", async () => {
const result = await mock.executeTool("TaskExecute", { task_ids: ["999"] });
expect(result.content[0].text).toContain("#999: not found");
});
it("rejects tasks without agentType", async () => {
await mock.executeTool("TaskCreate", {
subject: "No agent type",
description: "Plain task",
});
const result = await mock.executeTool("TaskExecute", { task_ids: ["1"] });
expect(result.content[0].text).toContain("#1: no agentType set");
});
it("rejects non-pending tasks", async () => {
await mock.executeTool("TaskCreate", {
subject: "Already started",
description: "Desc",
agentType: "general-purpose",
});
await mock.executeTool("TaskUpdate", { taskId: "1", status: "in_progress" });
const result = await mock.executeTool("TaskExecute", { task_ids: ["1"] });
expect(result.content[0].text).toContain("#1: not pending");
});
it("rejects tasks with unresolved blockers", async () => {
await mock.executeTool("TaskCreate", {
subject: "Blocker",
description: "Desc",
agentType: "general-purpose",
});
await mock.executeTool("TaskCreate", {
subject: "Blocked",
description: "Desc",
agentType: "general-purpose",
});
await mock.executeTool("TaskUpdate", { taskId: "2", addBlockedBy: ["1"] });
const result = await mock.executeTool("TaskExecute", { task_ids: ["2"] });
expect(result.content[0].text).toContain("#2: blocked by #1");
});
it("spawns agent for valid task and updates metadata", async () => {
await mock.executeTool("TaskCreate", {
subject: "Run tests",
description: "Run the test suite",
agentType: "general-purpose",
});
const result = await mock.executeTool("TaskExecute", { task_ids: ["1"] });
expect(result.content[0].text).toContain("Launched 1 agent");
expect(result.content[0].text).toContain("#1 → agent agent-1");
// Verify the RPC responder was called
expect(rpc.spawned).toHaveLength(1);
expect(rpc.spawned[0].type).toBe("general-purpose");
expect(rpc.spawned[0].prompt).toContain("Run the test suite");
expect(rpc.spawned[0].options.isBackground).toBe(true);
});
it("passes additional_context and max_turns to spawned agents", async () => {
await mock.executeTool("TaskCreate", {
subject: "Explore codebase",
description: "Find all API endpoints",
agentType: "Explore",
});
await mock.executeTool("TaskExecute", {
task_ids: ["1"],
additional_context: "Focus on REST endpoints only",
max_turns: 10,
});
expect(rpc.spawned[0].prompt).toContain("Focus on REST endpoints only");
expect(rpc.spawned[0].options.maxTurns).toBe(10);
});
it("allows executing tasks whose blockers are all completed", async () => {
await mock.executeTool("TaskCreate", {
subject: "Blocker",
description: "Desc",
agentType: "general-purpose",
});
await mock.executeTool("TaskCreate", {
subject: "Dependent",
description: "Desc",
agentType: "general-purpose",
});
await mock.executeTool("TaskUpdate", { taskId: "2", addBlockedBy: ["1"] });
await mock.executeTool("TaskUpdate", { taskId: "1", status: "completed" });
const result = await mock.executeTool("TaskExecute", { task_ids: ["2"] });
expect(result.content[0].text).toContain("Launched 1 agent");
});
it("handles mixed valid and invalid tasks in one call", async () => {
await mock.executeTool("TaskCreate", {
subject: "Valid",
description: "Desc",
agentType: "general-purpose",
});
await mock.executeTool("TaskCreate", {
subject: "No agent type",
description: "Desc",
});
const result = await mock.executeTool("TaskExecute", { task_ids: ["1", "2", "999"] });
const text = result.content[0].text;
expect(text).toContain("Launched 1 agent");
expect(text).toContain("#2: no agentType set");
expect(text).toContain("#999: not found");
});
});
describe("TaskExecute via ready broadcast", () => {
it("detects subagents when ready fires after tasks init", async () => {
// Init tasks WITHOUT the mock — subagents not available yet
const mock = mockPi();
initExtension(mock.pi as any);
// Now install the mock (simulates subagents loading later) and broadcast ready
const rpc = installSubagentsMock(mock.pi);
// Create a task and execute — should work because ready was received
await mock.executeTool("TaskCreate", {
subject: "Late-loaded test",
description: "Desc",
agentType: "general-purpose",
});
const result = await mock.executeTool("TaskExecute", { task_ids: ["1"] });
expect(result.content[0].text).toContain("Launched 1 agent");
expect(rpc.spawned).toHaveLength(1);
rpc.unsub();
});
});
describe("Completion listener", () => {
let mock: ReturnType<typeof mockPi>;
let rpc: ReturnType<typeof installSubagentsMock>;
beforeEach(() => {
mock = mockPi();
rpc = installSubagentsMock(mock.pi);
initExtension(mock.pi as any);
});
afterEach(() => {
rpc.unsub();
});
it("marks task completed on subagents:completed event", async () => {
await mock.executeTool("TaskCreate", {
subject: "Agent task",
description: "Desc",
agentType: "general-purpose",
});
await mock.executeTool("TaskExecute", { task_ids: ["1"] });
// Simulate agent completion
mock.emitEvent("subagents:completed", { id: "agent-1" });
const result = await mock.executeTool("TaskGet", { taskId: "1" });
expect(result.content[0].text).toContain("Status: completed");
});
it("reverts task to pending on subagents:failed event", async () => {
await mock.executeTool("TaskCreate", {
subject: "Failing task",
description: "Desc",
agentType: "general-purpose",
});
await mock.executeTool("TaskExecute", { task_ids: ["1"] });
// Simulate agent failure
mock.emitEvent("subagents:failed", { id: "agent-1", error: "Out of turns", status: "error" });
const result = await mock.executeTool("TaskGet", { taskId: "1" });
expect(result.content[0].text).toContain("Status: pending");
});
it("ignores events for unknown agent IDs", async () => {
await mock.executeTool("TaskCreate", {
subject: "Unrelated",
description: "Desc",
});
// Should not throw or modify anything
mock.emitEvent("subagents:completed", { id: "unknown-agent" });
mock.emitEvent("subagents:failed", { id: "unknown-agent", error: "boom", status: "error" });
const result = await mock.executeTool("TaskGet", { taskId: "1" });
expect(result.content[0].text).toContain("Status: pending");
});
});
describe("Auto-cascade", () => {
let mock: ReturnType<typeof mockPi>;
let rpc: ReturnType<typeof installSubagentsMock>;
beforeEach(() => {
mock = mockPi();
rpc = installSubagentsMock(mock.pi);
initExtension(mock.pi as any);
});
afterEach(() => {
rpc.unsub();
});
it("does NOT cascade when auto-cascade is off (default)", async () => {
// Create A → B chain
await mock.executeTool("TaskCreate", {
subject: "Task A",
description: "Desc",
agentType: "general-purpose",
});
await mock.executeTool("TaskCreate", {
subject: "Task B",
description: "Desc",
agentType: "general-purpose",
});
await mock.executeTool("TaskUpdate", { taskId: "2", addBlockedBy: ["1"] });
// Execute A
await mock.executeTool("TaskExecute", { task_ids: ["1"] });
expect(rpc.spawned).toHaveLength(1);
// Complete A
mock.emitEvent("subagents:completed", { id: "agent-1" });
// B should NOT have been auto-started
expect(rpc.spawned).toHaveLength(1);
// B should still be pending
const result = await mock.executeTool("TaskGet", { taskId: "2" });
expect(result.content[0].text).toContain("Status: pending");
});
it("does NOT cascade on failure (branch stops)", async () => {
await mock.executeTool("TaskCreate", {
subject: "Task A",
description: "Desc",
agentType: "general-purpose",
});
await mock.executeTool("TaskCreate", {
subject: "Task B",
description: "Desc",
agentType: "general-purpose",
});
await mock.executeTool("TaskUpdate", { taskId: "2", addBlockedBy: ["1"] });
await mock.executeTool("TaskExecute", { task_ids: ["1"] });
mock.emitEvent("subagents:failed", { id: "agent-1", error: "crashed", status: "error" });
// B should not start
expect(rpc.spawned).toHaveLength(1);
const result = await mock.executeTool("TaskGet", { taskId: "2" });
expect(result.content[0].text).toContain("Status: pending");
});
it("tasks without agentType are not cascaded even if unblocked", async () => {
await mock.executeTool("TaskCreate", {
subject: "Agent task",
description: "Desc",
agentType: "general-purpose",
});
await mock.executeTool("TaskCreate", {
subject: "Manual task",
description: "Desc",
// No agentType — manual
});
await mock.executeTool("TaskUpdate", { taskId: "2", addBlockedBy: ["1"] });
await mock.executeTool("TaskExecute", { task_ids: ["1"] });
mock.emitEvent("subagents:completed", { id: "agent-1" });
// Manual task should stay pending
expect(rpc.spawned).toHaveLength(1);
});
});
describe("Standalone operation (no subagents extension)", () => {
let mock: ReturnType<typeof mockPi>;
beforeEach(() => {
// Init WITHOUT installSubagentsMock — no subagents extension present
mock = mockPi();
initExtension(mock.pi as any);
});
it("all core task tools are registered", () => {
for (const name of ["TaskCreate", "TaskList", "TaskGet", "TaskUpdate", "TaskExecute"]) {
expect(mock.tools.has(name)).toBe(true);
}
});
it("TaskCreate works without subagents", async () => {
const result = await mock.executeTool("TaskCreate", {
subject: "Write tests",
description: "Add unit tests for the parser",
});
expect(result.content[0].text).toContain("Write tests");
});
it("TaskList works without subagents", async () => {
await mock.executeTool("TaskCreate", { subject: "A", description: "desc" });
await mock.executeTool("TaskCreate", { subject: "B", description: "desc" });
const result = await mock.executeTool("TaskList", {});
expect(result.content[0].text).toContain("#1");
expect(result.content[0].text).toContain("#2");
});
it("TaskGet works without subagents", async () => {
await mock.executeTool("TaskCreate", { subject: "Read me", description: "details here" });
const result = await mock.executeTool("TaskGet", { taskId: "1" });
expect(result.content[0].text).toContain("Read me");
expect(result.content[0].text).toContain("details here");
});
it("TaskUpdate works without subagents", async () => {
await mock.executeTool("TaskCreate", { subject: "Update me", description: "desc" });
await mock.executeTool("TaskUpdate", { taskId: "1", status: "in_progress" });
const result = await mock.executeTool("TaskGet", { taskId: "1" });
expect(result.content[0].text).toContain("in_progress");
});
it("TaskExecute gracefully refuses without subagents", async () => {
await mock.executeTool("TaskCreate", {
subject: "Agent task",
description: "desc",
agentType: "general-purpose",
});
const result = await mock.executeTool("TaskExecute", { task_ids: ["1"] });
expect(result.content[0].text).toContain("requires the @tintinweb/pi-subagents extension");
});
it("subagents lifecycle events are silently ignored without mapped agents", () => {
// These should not throw even though no subagents extension is loaded
mock.emitEvent("subagents:completed", { id: "ghost-agent", result: "done" });
mock.emitEvent("subagents:failed", { id: "ghost-agent", error: "boom", status: "error" });
// No crash = pass
});
it("task dependencies work without subagents", async () => {
await mock.executeTool("TaskCreate", { subject: "First", description: "desc" });
await mock.executeTool("TaskCreate", { subject: "Second", description: "desc" });
await mock.executeTool("TaskUpdate", { taskId: "2", addBlockedBy: ["1"] });
const result = await mock.executeTool("TaskGet", { taskId: "2" });
expect(result.content[0].text).toContain("Blocked by");
expect(result.content[0].text).toContain("#1");
});
});
describe("RPC protocol correctness", () => {
it("ping uses scoped reply channel (not shared channel)", () => {
const mock = mockPi();
const emitted: Array<{ channel: string; data: unknown }> = [];
const origEmit = mock.pi.events.emit.bind(mock.pi.events);
mock.pi.events.emit = (channel: string, data: unknown) => {
emitted.push({ channel, data });
origEmit(channel, data);
};
initExtension(mock.pi as any);
// Find the ping emit
const pingEmit = emitted.find(e => e.channel === "subagents:rpc:ping");
expect(pingEmit).toBeDefined();
const pingData = pingEmit!.data as { requestId: string };
expect(pingData.requestId).toBeDefined();
expect(typeof pingData.requestId).toBe("string");
});
it("spawn reply cleans up listener and timer on success", async () => {
const mock = mockPi();
const rpc = installSubagentsMock(mock.pi);
initExtension(mock.pi as any);
await mock.executeTool("TaskCreate", {
subject: "Test",
description: "desc",
agentType: "general-purpose",
});
await mock.executeTool("TaskExecute", { task_ids: ["1"] });
expect(rpc.spawned).toHaveLength(1);
// Second spawn should get a fresh requestId (not conflict with first)
await mock.executeTool("TaskCreate", {
subject: "Test 2",
description: "desc",
agentType: "general-purpose",
});
await mock.executeTool("TaskExecute", { task_ids: ["2"] });
expect(rpc.spawned).toHaveLength(2);
expect(rpc.spawned[0].id).not.toBe(rpc.spawned[1].id);
rpc.unsub();
});
it("spawn RPC rejects on timeout when no responder exists", async () => {
const mock = mockPi();
initExtension(mock.pi as any);
// Emit ready AFTER init so the listener is registered — marks subagents
// as available, but there's no spawn handler installed
mock.pi.events.emit("subagents:ready", {});
await mock.executeTool("TaskCreate", {
subject: "Timeout test",
description: "desc",
agentType: "general-purpose",
});
// spawnSubagent has a 30s timeout — we'll advance timers
vi.useFakeTimers();
const execPromise = mock.executeTool("TaskExecute", { task_ids: ["1"] });
await vi.advanceTimersByTimeAsync(31000);
const result = await execPromise;
expect(result.content[0].text).toContain("timeout");
vi.useRealTimers();
});
it("ready broadcast sets subagentsAvailable even after init", async () => {
const mock = mockPi();
initExtension(mock.pi as any);
// Initially no subagents
await mock.executeTool("TaskCreate", {
subject: "Test",
description: "desc",
agentType: "general-purpose",
});
let result = await mock.executeTool("TaskExecute", { task_ids: ["1"] });
expect(result.content[0].text).toContain("requires the @tintinweb/pi-subagents extension");
// Reset task status
await mock.executeTool("TaskUpdate", { taskId: "1", status: "pending" });
// Late subagents extension broadcasts ready
const rpc = installSubagentsMock(mock.pi);
result = await mock.executeTool("TaskExecute", { task_ids: ["1"] });
expect(result.content[0].text).toContain("Launched 1 agent");
rpc.unsub();
});
});
describe("Widget agent ID display", () => {
let store: TaskStore;
let widget: TaskWidget;
let ui: ReturnType<typeof mockUICtx>;
function mockUICtx() {
const state = {
widgets: new Map<string, any>(),
statuses: new Map<string, string | undefined>(),
};
const ctx: UICtx = {
setWidget(key, content, options) { state.widgets.set(key, { content, options }); },
setStatus(key, text) { state.statuses.set(key, text); },
};
return { ctx, state };
}
function mockTheme(): Theme {
return {
fg: (_color: string, text: string) => text,
bold: (text: string) => text,
strikethrough: (text: string) => `~~${text}~~`,
};
}
function renderWidget(state: ReturnType<typeof mockUICtx>["state"]): string[] {
const entry = state.widgets.get("tasks");
if (!entry?.content) return [];
const theme = mockTheme();
const tui = { terminal: { columns: 200 } };
return entry.content(tui, theme).render();
}
beforeEach(() => {
vi.useFakeTimers();
store = new TaskStore();
widget = new TaskWidget(store);
ui = mockUICtx();
widget.setUICtx(ui.ctx);
});
afterEach(() => {
widget.dispose();
vi.useRealTimers();
});
it("shows agent ID for active agent-backed tasks", () => {
store.create("Agent task", "Desc", "Running tests", { agentType: "general-purpose", agentId: "abc1234567890" });
store.update("1", { status: "in_progress" });
widget.setActiveTask("1", true);
const lines = renderWidget(ui.state);
expect(lines[1]).toContain("agent abc12");
expect(lines[1]).toContain("Running tests");
});
it("shows agent ID for non-active in_progress agent-backed tasks", () => {
store.create("Agent task", "Desc", undefined, { agentType: "general-purpose", agentId: "xyz9876543210" });
store.update("1", { status: "in_progress" });
// NOT calling setActiveTask — simulates external agent management
widget.update();
const lines = renderWidget(ui.state);
expect(lines[1]).toContain("agent xyz98");
expect(lines[1]).toContain("Agent task");
});
it("does not show agent ID for tasks without agentId", () => {
store.create("Manual task", "Desc");
store.update("1", { status: "in_progress" });
widget.update();
const lines = renderWidget(ui.state);
expect(lines[1]).not.toContain("agent");
expect(lines[1]).toContain("Manual task");
});
it("does not show agent ID for pending tasks", () => {
store.create("Pending agent task", "Desc", undefined, { agentType: "general-purpose", agentId: "abc12345" });
widget.update();
const lines = renderWidget(ui.state);
expect(lines[1]).not.toContain("agent abc");
});
it("does not show agent ID for completed tasks", () => {
store.create("Done", "Desc", undefined, { agentType: "general-purpose", agentId: "abc12345" });
store.update("1", { status: "completed" });
widget.update();
const lines = renderWidget(ui.state);
expect(lines[1]).not.toContain("agent abc");
});
});