Files
pi-lgtm/src/index.ts
T
wassname 8ea225d119 feat: pi-lgtm -- LGTM sign-off layer on pi-tasks
- Strip: TaskExecute, TaskOutput, TaskStop, process-tracker, subagent RPC, settings menu
- Add done_criterion (required, falsifiable) to TaskCreate
- Block status=completed in TaskUpdate -- must use /lgtm
- Add lgtm_ask tool: evidence + 2 failure modes + evidence_vs_failures + remaining_uncertainty
- Add /lgtm command: human-only sign-off with stored evidence review
- Persist all lgtm_ask fields in task.metadata for async review
- Widget shows 👀 for pending_approval tasks
- Update README, package.json author

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-17 05:41:18 +08:00

592 lines
25 KiB
TypeScript

/**
* pi-lgtm — Task tracking with structured human sign-off for pi coding agent.
*
* Tools:
* TaskCreate — Create a task with done_criterion
* TaskList — List all tasks with status
* TaskGet — Get full task details
* TaskUpdate — Update task fields (completion requires /lgtm)
* lgtm_ask — Present evidence + failure modes for sign-off
*
* Commands:
* /tasks — Interactive task management menu
* /lgtm <id> — Human signs off on a task (only way to complete)
*/
import { existsSync } from "node:fs";
import { join, resolve } from "node:path";
import type { ExtensionAPI, ExtensionCommandContext, ExtensionContext } from "@mariozechner/pi-coding-agent";
import { Type } from "@sinclair/typebox";
import { AutoClearManager } from "./auto-clear.js";
import { TaskStore } from "./task-store.js";
import { loadTasksConfig } from "./tasks-config.js";
import { TaskWidget, type UICtx } from "./ui/task-widget.js";
const DEBUG = !!process.env.PI_TASKS_DEBUG;
function debug(...args: unknown[]) {
if (DEBUG) console.error("[pi-lgtm]", ...args);
}
function textResult(msg: string) {
return { content: [{ type: "text" as const, text: msg }], details: undefined as any };
}
const TASK_TOOL_NAMES = new Set(["TaskCreate", "TaskList", "TaskGet", "TaskUpdate", "lgtm_ask"]);
const REMINDER_INTERVAL = 4;
const AUTO_CLEAR_DELAY = 4;
const SYSTEM_REMINDER = `<system-reminder>
The task tools haven't been used recently. If working on tasks, use TaskCreate (requires done_criterion), TaskUpdate for status, and lgtm_ask when ready for human sign-off. Tasks can only be completed via /lgtm after calling lgtm_ask. Ignore if not applicable. Never mention this reminder to the user.
</system-reminder>`;
export default function (pi: ExtensionAPI) {
const cfg = loadTasksConfig();
const piTasks = process.env.PI_TASKS;
const taskScope = cfg.taskScope ?? "session";
function resolveStorePath(sessionId?: string): string | undefined {
if (piTasks === "off") return undefined;
if (piTasks?.startsWith("/")) return piTasks;
if (piTasks?.startsWith(".")) return resolve(piTasks);
if (piTasks) return piTasks;
if (taskScope === "memory") return undefined;
if (taskScope === "session" && sessionId) {
return join(process.cwd(), ".pi", "tasks", `tasks-${sessionId}.json`);
}
if (taskScope === "session") return undefined;
return join(process.cwd(), ".pi", "tasks", "tasks.json");
}
let store = new TaskStore(resolveStorePath());
const widget = new TaskWidget(store);
const autoClear = new AutoClearManager(() => store, () => cfg.autoClearCompleted ?? "on_list_complete", AUTO_CLEAR_DELAY);
let storeUpgraded = false;
let persistedTasksShown = false;
function upgradeStoreIfNeeded(ctx: ExtensionContext) {
if (storeUpgraded) return;
if (taskScope === "session" && !piTasks) {
const sessionId = ctx.sessionManager.getSessionId();
const path = resolveStorePath(sessionId);
store = new TaskStore(path);
widget.setStore(store);
}
storeUpgraded = true;
}
function showPersistedTasks(isResume = false) {
if (persistedTasksShown) return;
persistedTasksShown = true;
const tasks = store.list();
if (tasks.length > 0) {
if (!isResume && tasks.every(t => t.status === "completed")) {
store.clearCompleted();
if (taskScope === "session") store.deleteFileIfEmpty();
} else {
widget.update();
}
}
}
let currentTurn = 0;
let lastTaskToolUseTurn = 0;
let reminderInjectedThisCycle = false;
pi.on("turn_start", async (_event, ctx) => {
currentTurn++;
widget.setUICtx(ctx.ui as UICtx);
upgradeStoreIfNeeded(ctx);
if (autoClear.onTurnStart(currentTurn)) widget.update();
});
pi.on("turn_end", async (event) => {
const msg = event.message as any;
if (msg?.role === "assistant" && msg.usage) {
widget.addTokenUsage(msg.usage.input ?? 0, msg.usage.output ?? 0);
}
});
pi.on("tool_result", async (event) => {
if (TASK_TOOL_NAMES.has(event.toolName)) {
lastTaskToolUseTurn = currentTurn;
reminderInjectedThisCycle = false;
return {};
}
if (currentTurn - lastTaskToolUseTurn < REMINDER_INTERVAL) return {};
if (reminderInjectedThisCycle) return {};
const tasks = store.list();
if (tasks.length === 0) return {};
reminderInjectedThisCycle = true;
lastTaskToolUseTurn = currentTurn;
return { content: [...event.content, { type: "text" as const, text: SYSTEM_REMINDER }] };
});
pi.on("before_agent_start", async (_event, ctx) => {
widget.setUICtx(ctx.ui as UICtx);
upgradeStoreIfNeeded(ctx);
showPersistedTasks();
});
pi.on("session_switch" as any, async (event: any, ctx: ExtensionContext) => {
widget.setUICtx(ctx.ui as UICtx);
const isResume = event?.reason === "resume";
storeUpgraded = false;
persistedTasksShown = false;
currentTurn = 0;
lastTaskToolUseTurn = 0;
reminderInjectedThisCycle = false;
autoClear.reset();
if (!isResume && taskScope === "memory") store.clearAll();
upgradeStoreIfNeeded(ctx);
showPersistedTasks(isResume);
});
// ──────────────────────────────────────────────────
// Tool 1: TaskCreate
// ──────────────────────────────────────────────────
pi.registerTool({
name: "TaskCreate",
label: "TaskCreate",
description: `Create a task with a clear done_criterion.
## When to Use
- Complex multi-step tasks (3+ steps)
- When user provides a list of things to do
## Task Fields
- **subject**: Brief actionable title
- **description**: Detailed description with context
- **done_criterion**: REQUIRED. Falsifiable observation that distinguishes done from fail/null/incomplete/silent-fail. State expected AND wrong-case observations (e.g., "All 92 tests pass. If wrong: type errors in build or test failures in task-store.test.ts")
- **activeForm** (optional): Present continuous for spinner
Tasks are completed only via /lgtm after calling lgtm_ask with evidence.`,
promptGuidelines: [
"Use TaskCreate for complex tasks. Include a specific done_criterion.",
"Mark tasks in_progress before starting. Use lgtm_ask when done.",
"Tasks cannot be marked completed directly — human must /lgtm them.",
],
parameters: Type.Object({
subject: Type.String({ description: "Brief task title" }),
description: Type.String({ description: "Detailed description" }),
done_criterion: Type.String({ description: "Falsifiable observation that distinguishes DONE from fail, null result, incomplete, or silent failure. State what you expect to see AND what you'd see if it's wrong." }),
activeForm: Type.Optional(Type.String({ description: "Present continuous for spinner" })),
metadata: Type.Optional(Type.Record(Type.String(), Type.Any())),
}),
execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
autoClear.resetBatchCountdown();
const task = store.create(params.subject, params.description, params.done_criterion, params.activeForm, params.metadata);
widget.update();
return Promise.resolve(textResult(`Task #${task.id} created: ${task.subject}\nDone criterion: ${task.done_criterion}`));
},
});
// ──────────────────────────────────────────────────
// Tool 2: TaskList
// ──────────────────────────────────────────────────
pi.registerTool({
name: "TaskList",
label: "TaskList",
description: `List all tasks. Tasks with 👀 are pending human sign-off via /lgtm.`,
parameters: Type.Object({}),
execute(_toolCallId, _params, _signal, _onUpdate, _ctx) {
const tasks = store.list();
if (tasks.length === 0) return Promise.resolve(textResult("No tasks found"));
const statusOrder: Record<string, number> = { pending: 0, in_progress: 1, completed: 2 };
const sorted = [...tasks].sort((a, b) => {
const so = (statusOrder[a.status] ?? 0) - (statusOrder[b.status] ?? 0);
if (so !== 0) return so;
return Number(a.id) - Number(b.id);
});
const lines = sorted.map(task => {
let line = `#${task.id} [${task.status}] ${task.subject}`;
if (task.pending_approval && task.status !== "completed") line += " 👀";
if (task.owner) line += ` (${task.owner})`;
if (task.blockedBy.length > 0) {
const openBlockers = task.blockedBy.filter(bid => {
const blocker = store.get(bid);
return blocker && blocker.status !== "completed";
});
if (openBlockers.length > 0) line += ` [blocked by ${openBlockers.map(id => "#" + id).join(", ")}]`;
}
return line;
});
return Promise.resolve(textResult(lines.join("\n")));
},
});
// ──────────────────────────────────────────────────
// Tool 3: TaskGet
// ──────────────────────────────────────────────────
pi.registerTool({
name: "TaskGet",
label: "TaskGet",
description: `Get full task details including done_criterion and approval state.`,
parameters: Type.Object({
taskId: Type.String({ description: "Task ID to retrieve" }),
}),
execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
const task = store.get(params.taskId);
if (!task) return Promise.resolve(textResult("Task not found"));
const desc = task.description.replace(/\\n/g, "\n");
const lines: string[] = [
`Task #${task.id}: ${task.subject}`,
`Status: ${task.status}${task.pending_approval && task.status !== "completed" ? " 👀 (pending sign-off)" : ""}`,
`Done criterion: ${task.done_criterion}`,
];
if (task.owner) lines.push(`Owner: ${task.owner}`);
lines.push(`Description: ${desc}`);
if (task.blockedBy.length > 0) {
const openBlockers = task.blockedBy.filter(bid => {
const blocker = store.get(bid);
return blocker && blocker.status !== "completed";
});
if (openBlockers.length > 0) lines.push(`Blocked by: ${openBlockers.map(id => "#" + id).join(", ")}`);
}
if (task.blocks.length > 0) lines.push(`Blocks: ${task.blocks.map(id => "#" + id).join(", ")}`);
const metaKeys = Object.keys(task.metadata);
if (metaKeys.length > 0) lines.push(`Metadata: ${JSON.stringify(task.metadata)}`);
return Promise.resolve(textResult(lines.join("\n")));
},
});
// ──────────────────────────────────────────────────
// Tool 4: TaskUpdate
// ──────────────────────────────────────────────────
pi.registerTool({
name: "TaskUpdate",
label: "TaskUpdate",
description: `Update task fields or status.
Status: pending -> in_progress -> (call lgtm_ask) -> /lgtm -> completed
Cannot set status=completed here. Use lgtm_ask then /lgtm <id>.`,
parameters: Type.Object({
taskId: Type.String({ description: "Task ID to update" }),
status: Type.Optional(Type.Unsafe<"pending" | "in_progress" | "deleted">({
anyOf: [
{ type: "string", enum: ["pending", "in_progress"] },
{ type: "string", const: "deleted" },
],
description: "New status. Cannot set completed — use /lgtm after lgtm_ask.",
})),
subject: Type.Optional(Type.String()),
description: Type.Optional(Type.String()),
done_criterion: Type.Optional(Type.String()),
activeForm: Type.Optional(Type.String()),
owner: Type.Optional(Type.String()),
metadata: Type.Optional(Type.Record(Type.String(), Type.Any())),
addBlocks: Type.Optional(Type.Array(Type.String())),
addBlockedBy: Type.Optional(Type.Array(Type.String())),
}),
execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
const { taskId, ...fields } = params;
let task: any, changedFields: string[], warnings: string[];
try {
({ task, changedFields, warnings } = store.update(taskId, fields));
} catch (err: any) {
return Promise.resolve(textResult(err.message));
}
if (changedFields.length === 0 && !task) {
return Promise.resolve(textResult(`Task #${taskId} not found`));
}
if (fields.status === "in_progress") {
widget.setActiveTask(taskId);
autoClear.resetBatchCountdown();
} else if (fields.status === "pending") {
autoClear.resetBatchCountdown();
} else if (fields.status === "deleted") {
widget.setActiveTask(taskId, false);
}
widget.update();
let msg = `Updated task #${taskId}: ${changedFields.join(", ")}`;
if (warnings.length > 0) msg += ` (warning: ${warnings.join("; ")})`;
return Promise.resolve(textResult(msg));
},
});
// ──────────────────────────────────────────────────
// Tool 5: lgtm_ask
// ──────────────────────────────────────────────────
pi.registerTool({
name: "lgtm_ask",
label: "lgtm_ask",
description: `Present evidence that a task meets its done_criterion and request human sign-off.
Forces structured thinking about failure modes. All text fields required.
After this, task enters pending sign-off state — only completable via /lgtm <id>.
## Fields
- **evidence**: Auditable proof — command output, table, file path, link
- **failure_mode_1**: Most likely way this could be wrong despite evidence
- **failure_mode_2**: Second most likely failure mode
- **evidence_vs_failures**: How would evidence look different if failure modes were true?
- **evidence_files** (optional): File paths human should inspect -- must exist
- **remaining_uncertainty** (optional): What's NOT tested, known limitations, deferred edge cases`,
parameters: Type.Object({
taskId: Type.String({ description: "Task ID to submit for sign-off" }),
evidence: Type.String({ description: "Auditable proof with full reproducibility: exact command run and its output, commit hash, config/seeds used, output file paths. Must be re-runnable by the human. 'I wrote X' is not evidence -- 'I ran X and got Y' is. Include counts, snippets, test output." }),
failure_mode_1: Type.String({ description: "Most likely way this could be wrong despite evidence" }),
failure_mode_2: Type.String({ description: "Second most likely failure mode" }),
evidence_vs_failures: Type.String({ description: "How would evidence differ if failure modes were true?" }),
evidence_files: Type.Optional(Type.Array(Type.String(), { description: "File paths to inspect (must exist)" })),
remaining_uncertainty: Type.Optional(Type.String({ description: "What's NOT tested, known limitations, edge cases deferred. Be honest about scope boundaries." })),
}),
execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
const task = store.get(params.taskId);
if (!task) return Promise.resolve(textResult(`Task #${params.taskId} not found`));
if (task.status === "completed") return Promise.resolve(textResult(`Task #${params.taskId} already completed`));
if (params.evidence_files?.length) {
for (const f of params.evidence_files) {
if (!existsSync(f)) return Promise.resolve(textResult(`Evidence file not found: ${f}`));
}
}
store.update(params.taskId, {
pending_approval: true,
metadata: {
lgtm_evidence: params.evidence,
lgtm_failure_mode_1: params.failure_mode_1,
lgtm_failure_mode_2: params.failure_mode_2,
lgtm_evidence_vs_failures: params.evidence_vs_failures,
lgtm_evidence_files: params.evidence_files ?? [],
lgtm_remaining_uncertainty: params.remaining_uncertainty ?? "",
lgtm_submitted_at: new Date().toISOString(),
},
});
widget.update();
const filesSection = params.evidence_files?.length
? `\n### Evidence files\n${params.evidence_files.map(f => `- ${f}`).join("\n")}`
: "";
const uncertaintySection = params.remaining_uncertainty
? `\n### Remaining uncertainty\n${params.remaining_uncertainty}`
: "";
const result =
`## Task #${task.id}: ${task.subject}\n` +
`Done criterion: ${task.done_criterion}\n\n` +
`### Evidence\n${params.evidence}\n\n` +
`### Failure mode 1\n${params.failure_mode_1}\n\n` +
`### Failure mode 2\n${params.failure_mode_2}\n\n` +
`### Evidence vs failure modes\n${params.evidence_vs_failures}` +
filesSection +
uncertaintySection +
`\n\n---\n` +
`Task #${task.id} is now pending human sign-off via \`/lgtm ${task.id}\`.\n\n` +
`**Self-check (non-blocking):** Look at this as the human will see it. ` +
`Does the evidence directly address the done_criterion "${task.done_criterion}"? ` +
`Would a skeptical reviewer find this convincing, or would they immediately ask ` +
`"but what about..."? If evidence feels thin, call lgtm_ask again with stronger evidence.`;
return Promise.resolve(textResult(result));
},
});
// ──────────────────────────────────────────────────
// /tasks command
// ──────────────────────────────────────────────────
pi.registerCommand("tasks", {
description: "Manage tasks — view, create, clear completed",
handler: async (_args: string, ctx: ExtensionCommandContext) => {
const ui = ctx.ui;
const mainMenu = async (): Promise<void> => {
const tasks = store.list();
const taskCount = tasks.length;
const completedCount = tasks.filter(t => t.status === "completed").length;
const choices: string[] = [`View all tasks (${taskCount})`, "Create task"];
if (completedCount > 0) choices.push(`Clear completed (${completedCount})`);
if (taskCount > 0) choices.push(`Clear all (${taskCount})`);
const choice = await ui.select("Tasks", choices);
if (!choice) return;
if (choice.startsWith("View")) await viewTasks();
else if (choice === "Create task") await createTask();
else if (choice.startsWith("Clear completed")) {
store.clearCompleted();
if (taskScope === "session") store.deleteFileIfEmpty();
widget.update();
await mainMenu();
} else if (choice.startsWith("Clear all")) {
store.clearAll();
if (taskScope === "session") store.deleteFileIfEmpty();
widget.update();
await mainMenu();
}
};
const viewTasks = async (): Promise<void> => {
const tasks = store.list();
if (tasks.length === 0) {
await ui.select("No tasks", ["← Back"]);
return mainMenu();
}
const statusIcon = (t: (typeof tasks)[0]) => {
if (t.status === "completed") return "✔";
if (t.pending_approval) return "👀";
if (t.status === "in_progress") return "◼";
return "◻";
};
const choices = tasks.map(t => `${statusIcon(t)} #${t.id} [${t.status}] ${t.subject}`);
choices.push("← Back");
const selected = await ui.select("Tasks", choices);
if (!selected || selected === "← Back") return mainMenu();
const match = selected.match(/#(\d+)/);
if (match) await viewTaskDetail(match[1]);
else return viewTasks();
};
const viewTaskDetail = async (taskId: string): Promise<void> => {
const task = store.get(taskId);
if (!task) return viewTasks();
const actions: string[] = [];
if (task.status === "pending") actions.push("▸ Start (in_progress)");
if (task.pending_approval && task.status !== "completed") {
actions.push(`(type /lgtm ${taskId} to sign off)`);
}
actions.push("✗ Delete");
actions.push("← Back");
const pendingNote = task.pending_approval && task.status !== "completed" ? "\n👀 Pending /lgtm sign-off" : "";
const em = task.metadata;
let evidenceNote = "";
if (em.lgtm_evidence) {
const parts = [`\n\nEvidence (${em.lgtm_submitted_at ?? "?"}):\n${em.lgtm_evidence}`];
parts.push(`FM1: ${em.lgtm_failure_mode_1}`);
parts.push(`FM2: ${em.lgtm_failure_mode_2}`);
if (em.lgtm_remaining_uncertainty) parts.push(`Uncertainty: ${em.lgtm_remaining_uncertainty}`);
if (em.lgtm_evidence_files?.length) parts.push(`Files: ${em.lgtm_evidence_files.join(", ")}`);
evidenceNote = parts.join("\n");
}
const title = `#${task.id} [${task.status}] ${task.subject}\nDone: ${task.done_criterion}${pendingNote}\n${task.description}${evidenceNote}`;
const action = await ui.select(title, actions);
if (action === "▸ Start (in_progress)") {
store.update(taskId, { status: "in_progress" });
widget.setActiveTask(taskId);
widget.update();
return viewTasks();
} else if (action === "✗ Delete") {
store.update(taskId, { status: "deleted" });
widget.setActiveTask(taskId, false);
widget.update();
return viewTasks();
}
return viewTasks();
};
const createTask = async (): Promise<void> => {
const subject = await ui.input("Task subject");
if (!subject) return mainMenu();
const description = await ui.input("Task description");
if (!description) return mainMenu();
const done_criterion = await ui.input("Done criterion (what does done look like?)");
if (!done_criterion) return mainMenu();
store.create(subject, description, done_criterion);
widget.update();
return mainMenu();
};
await mainMenu();
},
});
// ──────────────────────────────────────────────────
// /lgtm command — human sign-off only
// ──────────────────────────────────────────────────
async function signOff(taskId: string, ctx: ExtensionCommandContext): Promise<void> {
const task = store.get(taskId);
if (!task) { ctx.ui.notify(`Task #${taskId} not found`, "error"); return; }
if (task.status === "completed") { ctx.ui.notify(`Task #${taskId} already completed`, "info"); return; }
if (!task.pending_approval) {
ctx.ui.notify(`Task #${taskId} not ready. Agent must call lgtm_ask first.`, "error");
return;
}
// Show stored evidence for review before sign-off
const m = task.metadata;
const evidenceParts: string[] = [];
if (m.lgtm_evidence) {
evidenceParts.push(`Evidence:\n${m.lgtm_evidence}`);
evidenceParts.push(`FM1: ${m.lgtm_failure_mode_1}`);
evidenceParts.push(`FM2: ${m.lgtm_failure_mode_2}`);
evidenceParts.push(`Evidence vs failures: ${m.lgtm_evidence_vs_failures}`);
if (m.lgtm_remaining_uncertainty) evidenceParts.push(`Remaining uncertainty: ${m.lgtm_remaining_uncertainty}`);
if (m.lgtm_evidence_files?.length) evidenceParts.push(`Files: ${m.lgtm_evidence_files.join(", ")}`);
evidenceParts.push(`Submitted: ${m.lgtm_submitted_at}`);
}
const evidenceSummary = evidenceParts.length > 0 ? evidenceParts.join("\n\n") : "(no stored evidence)";
const confirm = await ctx.ui.select(
`Sign off #${taskId}: ${task.subject}\nDone criterion: ${task.done_criterion}\n\n${evidenceSummary}`,
["✓ LGTM — sign off", "✗ Cancel"],
);
if (confirm !== "✓ LGTM — sign off") return;
try {
store.complete(taskId);
} catch (err: any) {
ctx.ui.notify(err.message, "error");
return;
}
autoClear.trackCompletion(taskId, currentTurn);
widget.setActiveTask(taskId, false);
widget.update();
ctx.ui.notify(`Task #${taskId} signed off. ✓`, "info");
}
pi.registerCommand("lgtm", {
description: "Sign off on a task — /lgtm <id>",
handler: async (args: string, ctx: ExtensionCommandContext) => {
const taskId = args.trim();
if (!taskId) {
const pending = store.list().filter(t => t.pending_approval && t.status !== "completed");
if (pending.length === 0) {
ctx.ui.notify("No tasks pending sign-off. Agent must call lgtm_ask first.", "info");
return;
}
const choice = await ctx.ui.select(
"Sign off on:",
pending.map(t => `#${t.id} ${t.subject}`).concat(["← Cancel"]),
);
if (!choice || choice === "← Cancel") return;
const match = choice.match(/#(\d+)/);
if (match) signOff(match[1], ctx);
return;
}
signOff(taskId, ctx);
},
});
}