diff --git a/src/index.ts b/src/index.ts index 2f0fd29..45b8be4 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,67 +1,56 @@ /** - * pi-proof-tasks — Hermes-style evidence + judge task list for pi coding agent. + * pi-lgtm — Lean task list with evidence sign-off. * - * Two-tier model: - * - Subtasks: agent self-manages. Checklist work completes via TaskUpdate. - * - Top-level tasks: goals. TaskClaimDone submits a compact proof/UAT packet, - * a fresh judge gives an independent perspective, and explicit rejection keeps - * the task open for a stronger retry. + * Three kinds of items: + * - Goal: has done_criterion + failure_mode. Completes via TaskComplete with evidence. + * - Subtask: has parentId. Just subject. Completes via TaskUpdate. + * - Task: no parentId, no done_criterion. Plain checklist item. Completes via TaskUpdate. * * Tools: - * TaskCreate — Create a task with done_criterion - * TaskList — List tasks grouped by status - * TaskGet — Get full task details - * TaskUpdate — Update task fields/status (gated for top-level proof goals) - * TaskClaimDone — Present evidence + failure modes for proof review - * robot_review_ask — Attach observational review from a fresh-perspective agent - * robot_review_run — Re-run the automatic robot reviewer + * TaskCreate — Create goal/task/subtask + * TaskList — One line per item + * TaskGet — Readable detail card + * TaskUpdate — Update fields, mark tasks/subtasks done + * TaskComplete — Sign off a goal with evidence * * Commands: - * /tasks — Interactive task management menu - * /lgtm — View the proof log for one or more tasks - * /lgtm * — View all open task proof logs - * /lgtm — Pick a task to inspect proof logs + * /tasks — Interactive task management menu + * /lgtm — View proof log */ -import { spawn } from "node:child_process"; -import { createHash } from "node:crypto"; +import { randomUUID } from "node:crypto"; import { readFileSync } from "node:fs"; -import { join, resolve } from "node:path"; -import type { - ExtensionAPI, - ExtensionCommandContext, - ExtensionContext, -} from "@mariozechner/pi-coding-agent"; +import { join } from "node:path"; +import type { ExtensionCommandContext, ExtensionContext } from "@mariozechner/pi-coding-agent"; import { Type } from "@sinclair/typebox"; import { AutoClearManager } from "./auto-clear.js"; import { - type CompletionMode, - getCompletionMode, - getDisplayStatus, - getGateStatus, - getReviewState, - type ReviewState, -} from "./review-badges.js"; -import { - appendRobotReviewMetadata, - getLatestRobotReview, - getRobotReviews, - type RobotReviewRecord, - relaxAdvisoryVerificationHints, - shouldCompleteAfterAcceptedReview, -} from "./robot-review.js"; + createCadenceState, + drainReminderForContext, + evaluateToolResult, + type CadenceConfig, + type CadenceState, +} from "./reminder-cadence.js"; import { TaskStore } from "./task-store.js"; import { loadTasksConfig } from "./tasks-config.js"; import type { Task } from "./types.js"; import { TaskWidget, type UICtx } from "./ui/task-widget.js"; +// ---- Helpers ---- + function textResult(msg: string) { - return { - content: [{ type: "text" as const, text: msg }], - details: undefined as any, - }; + return { content: [{ type: "text" as const, text: msg }], details: undefined as any }; } +const TASK_TOOL_NAMES = new Set([ + "TaskCreate", "TaskList", "TaskGet", "TaskUpdate", "TaskComplete", +]); + +const REMINDER_INTERVAL = 4; +const SYSTEM_REMINDER = `You have active tasks. Check TaskList and keep working toward them. Mark in_progress before starting, and TaskComplete goals with evidence when done.`; + +// ---- /lgtm command parser ---- + export type LgtmCommandSpec = | { kind: "menu" } | { kind: "view_all" } @@ -72,1628 +61,269 @@ export function parseLgtmArgs(args: string): LgtmCommandSpec { const trimmed = args.trim(); if (!trimmed) return { kind: "menu" }; if (trimmed === "*") return { kind: "view_all" }; - - const tokens = trimmed - .split(/[\s,]+/) - .map((token) => token.trim()) - .filter(Boolean); - if (["clear", "delete"].includes(tokens[0])) { - return { - kind: "error", - message: "Task management lives in /tasks now. /lgtm is viewer-only.", - }; - } - - return { - kind: "view", - ids: tokens.map((token) => token.replace(/^#/, "")).filter(Boolean), - }; + const tokens = trimmed.split(/[\s,]+/).map((t) => t.trim()).filter(Boolean); + return { kind: "view", ids: tokens.map((t) => t.replace(/^#/, "")).filter(Boolean) }; } -const TASK_TOOL_NAMES = new Set([ - "TaskCreate", - "TaskList", - "TaskGet", - "TaskUpdate", - "TaskClaimDone", - "lgtm_supersede", - "robot_review_ask", - "robot_review_run", -]); -const REMINDER_INTERVAL = 4; -const AUTO_CLEAR_DELAY = 4; -export const DEFAULT_ROBOT_REVIEW_TIMEOUT_MS = 120_000; +// ---- RPC to pi-subagents ---- -type CommandResult = { - stdout: string; - stderr: string; - exitCode: number | null; -}; - -export function getPiInvocation( - args: string[], - env: NodeJS.ProcessEnv = process.env, -): { command: string; args: string[] } { - const configured = env.PI_PROOF_TASKS_PI_BIN?.trim(); - return { command: configured || "pi", args }; -} - -export function getRobotReviewTimeoutMs( - env: NodeJS.ProcessEnv = process.env, -): number { - const configured = Number.parseInt( - env.PI_PROOF_TASKS_ROBOT_REVIEW_TIMEOUT_MS ?? "", - 10, - ); - return Number.isFinite(configured) && configured > 0 - ? configured - : DEFAULT_ROBOT_REVIEW_TIMEOUT_MS; -} - -/** Format pi's current model object as the CLI's provider/model reference. */ -export function getCurrentModelRef(model: unknown): string | undefined { - if (!model || typeof model !== "object") return undefined; - const provider = - typeof (model as any).provider === "string" - ? (model as any).provider - : typeof (model as any).providerId === "string" - ? (model as any).providerId - : undefined; - const id = - typeof (model as any).id === "string" - ? (model as any).id - : typeof (model as any).modelId === "string" - ? (model as any).modelId - : undefined; - return provider && id ? `${provider}/${id}` : undefined; -} - -function getAssistantTextFromPiEvent(event: any): string | undefined { - if ( - event?.type !== "message_end" || - event.message?.role !== "assistant" || - !Array.isArray(event.message.content) - ) { - return undefined; - } - const text = event.message.content.find( - (part: any) => part?.type === "text", - )?.text; - return typeof text === "string" ? text : undefined; -} - -export function extractFinalAssistantTextFromPiJsonl(output: string): string { - let buffer = ""; - let finalAssistantText = ""; - const lines = output.split("\n"); - for (const line of lines) { - if (!line.trim()) continue; - buffer = line; - try { - const text = getAssistantTextFromPiEvent(JSON.parse(line)); - if (text) finalAssistantText = text; - buffer = ""; - } catch { - // ignore malformed line noise from the child process - } - } - if (buffer.trim()) { - try { - const text = getAssistantTextFromPiEvent(JSON.parse(buffer)); - if (text) finalAssistantText = text; - } catch { - // ignore malformed trailing line - } - } - return finalAssistantText; -} - -export async function runRobotReviewCommand( - invocation: { command: string; args: string[] }, - signal?: AbortSignal, - timeoutMs = getRobotReviewTimeoutMs(), -): Promise { - return new Promise((resolve, reject) => { - const child = spawn(invocation.command, invocation.args, { - shell: false, - stdio: ["ignore", "pipe", "pipe"], - }); - const stdoutChunks: Buffer[] = []; - const stderrChunks: Buffer[] = []; - let settled = false; - - const finish = (fn: () => void) => { - if (settled) return; - settled = true; - fn(); - }; - - const killTimer = setTimeout(() => { - child.kill("SIGTERM"); - finish(() => - reject(new Error(`Robot reviewer timed out after ${timeoutMs}ms.`)), - ); - }, timeoutMs); - - child.stdout.on("data", (data) => stdoutChunks.push(data)); - child.stderr.on("data", (data) => stderrChunks.push(data)); - child.on("error", (err) => { - clearTimeout(killTimer); - finish(() => reject(err)); - }); - const onAbort = () => { - clearTimeout(killTimer); - child.kill("SIGTERM"); - }; - signal?.addEventListener("abort", onAbort, { once: true }); - child.on("close", (exitCode) => { - clearTimeout(killTimer); - signal?.removeEventListener("abort", onAbort); - if (signal?.aborted) { - finish(() => reject(new Error("aborted"))); - return; - } - const stdout = Buffer.concat(stdoutChunks).toString("utf-8"); - finish(() => - resolve({ - stdout: extractFinalAssistantTextFromPiJsonl(stdout) || stdout, - stderr: Buffer.concat(stderrChunks).toString("utf-8"), - exitCode, - }), - ); +function rpcCall(events: any, channel: string, params: Record, timeoutMs: number): Promise { + const requestId = randomUUID(); + return new Promise((resolve, reject) => { + const timer = setTimeout(() => { unsub(); reject(new Error(`RPC timeout: ${channel}`)); }, timeoutMs); + const unsub = events.on(`${channel}:reply:${requestId}`, (raw: unknown) => { + unsub(); clearTimeout(timer); + const reply = raw as { success: boolean; data?: T; error?: string }; + if (reply.success) resolve(reply.data as T); + else reject(new Error(reply.error ?? "RPC failed")); }); + events.emit(channel, { requestId, ...params }); }); } -function summarizeRawOutput(output: string, maxChars = 400): string { - const singleLine = output.replace(/\s+/g, " ").trim(); - if (singleLine.length <= maxChars) return singleLine; - return `${singleLine.slice(0, maxChars)}...`; -} +let subagentsAvailable = false; -function stripMarkdownCodeFence(text: string): string { - const trimmed = text.trim(); - const fence = trimmed.match(/^```(?:json)?\s*([\s\S]*?)\s*```$/i); - return fence ? fence[1].trim() : trimmed; -} - -function extractBalancedJsonObject(text: string): string | undefined { - let start = -1; - let depth = 0; - let inString = false; - let escaped = false; - - for (let index = 0; index < text.length; index++) { - const char = text[index]; - if (escaped) { - escaped = false; - continue; - } - if (char === "\\") { - escaped = true; - continue; - } - if (char === '"') { - inString = !inString; - continue; - } - if (inString) continue; - if (char === "{") { - if (depth === 0) start = index; - depth++; - continue; - } - if (char === "}") { - if (depth === 0) continue; - depth--; - if (depth === 0 && start >= 0) return text.slice(start, index + 1); - } - } - return undefined; -} - -interface EvidenceCommandRecord { - cmd: string; - exit_code: number; - stdout_path?: string; - stderr_path?: string; -} - -interface EvidenceArtifactRecord { - path: string; - sha256: string; - bytes: number; -} - -interface EvidenceIterationRecord { - iteration: number; - submitted_at: string; - superseded_at?: string; - supersede_reason?: string; - evidence: string; - failure_likely: string; - failure_sneaky: string; - failure_unknown: string; - falsification_test: string; - evidence_reasoning: string; - verification_hints: string[]; - remaining_uncertainty: string; - commands: EvidenceCommandRecord[]; - evidence_artifacts: EvidenceArtifactRecord[]; - falsification_artifacts: EvidenceArtifactRecord[]; - robot_reviews: RobotReviewRecord[]; - automatic_review_failure?: { message: string; raw_output?: string }; -} - -const AUTOMATIC_REVIEW_ERROR_KEYS = [ - "robot_review_last_error", - "robot_review_last_error_output", - "robot_review_last_error_at", -] as const; - -const ROBOT_REVIEW_KEYS = [ - "robot_reviews", - "robot_review_reviewer", - "robot_review_scope", - "robot_review_observations", - "robot_review_concerns", - "robot_review_suggestions", - "robot_review_blind_spots", - "robot_review_accepted", - "robot_review_evidence_complete", - "robot_review_evidence_convincing", - "robot_review_missing_evidence", - "robot_review_submitted_at", - "robot_review_mode", - "robot_review_raw_output", - "robot_review_reason", - "robot_review_requires_followup", - "robot_review_iteration_count", -] as const; - -const CURRENT_EVIDENCE_KEYS = [ - "lgtm_evidence", - "lgtm_failure_likely", - "lgtm_failure_sneaky", - "lgtm_failure_unknown", - "lgtm_falsification_test", - "lgtm_evidence_reasoning", - "lgtm_verification_hints", - "lgtm_remaining_uncertainty", - "lgtm_submitted_at", - "lgtm_commands", - "lgtm_evidence_artifacts", - "lgtm_falsification_artifacts", -] as const; - -const RESERVED_METADATA_PREFIXES = ["lgtm_", "robot_review"]; - -function assertNoReservedMetadata( - metadata: Record | undefined, -): string | null { - if (!metadata) return null; - for (const key of Object.keys(metadata)) { - if (RESERVED_METADATA_PREFIXES.some((prefix) => key.startsWith(prefix))) { - return `Metadata key ${key} is reserved for proof/review internals. Use TaskClaimDone or robot_review_run instead.`; - } - } - return null; -} - -function requiredTextError( - fields: Record, - names: string[], -): string | null { - for (const name of names) { - const value = fields[name]; - if (typeof value !== "string" || value.trim().length === 0) - return `${name} is required and cannot be blank.`; - } - return null; -} - -function nullRecord(keys: readonly string[]): Record { - return Object.fromEntries(keys.map((key) => [key, null])); -} - -function getAutomaticReviewFailureMetadata( - message: string, - rawOutput?: string, -): Record { - return { - robot_review_last_error: message, - robot_review_last_error_output: rawOutput ?? null, - robot_review_last_error_at: new Date().toISOString(), - }; -} - -function clearAutomaticReviewFailureMetadata(): Record { - return nullRecord(AUTOMATIC_REVIEW_ERROR_KEYS); -} - -function clearRobotReviewMetadata(): Record { - return nullRecord(ROBOT_REVIEW_KEYS); -} - -function clearCurrentEvidenceMetadata(): Record { - return nullRecord(CURRENT_EVIDENCE_KEYS); -} - -function normalizeCommandRecords(value: unknown): EvidenceCommandRecord[] { - return Array.isArray(value) - ? value.flatMap((entry) => { - if (!entry || typeof entry !== "object") return []; - const command = entry as Record; - if ( - typeof command.cmd !== "string" || - typeof command.exit_code !== "number" - ) - return []; - return [ - { - cmd: command.cmd, - exit_code: command.exit_code, - stdout_path: - typeof command.stdout_path === "string" - ? command.stdout_path - : undefined, - stderr_path: - typeof command.stderr_path === "string" - ? command.stderr_path - : undefined, - }, - ]; - }) - : []; -} - -function normalizeArtifactRecords(value: unknown): EvidenceArtifactRecord[] { - return Array.isArray(value) - ? value.flatMap((entry) => { - if (!entry || typeof entry !== "object") return []; - const artifact = entry as Record; - if ( - typeof artifact.path !== "string" || - typeof artifact.sha256 !== "string" || - typeof artifact.bytes !== "number" - ) - return []; - return [ - { - path: artifact.path, - sha256: artifact.sha256, - bytes: artifact.bytes, - }, - ]; - }) - : []; -} - -export function buildArtifactRecords( - paths?: string[], -): EvidenceArtifactRecord[] { - return (paths ?? []).map((path) => { - const resolvedPath = resolve(path); - const content = readFileSync(resolvedPath); - return { - path: resolvedPath, - sha256: createHash("sha256").update(content).digest("hex"), - bytes: content.length, - }; +function checkSubagents(events: any): void { + const requestId = randomUUID(); + const timer = setTimeout(() => { unsub(); }, 5_000); + const unsub = events.on(`subagents:rpc:ping:reply:${requestId}`, (raw: unknown) => { + unsub(); clearTimeout(timer); + if ((raw as any)?.success) subagentsAvailable = true; }); + events.emit("subagents:rpc:ping", { requestId }); } -export function getEvidenceHistory(task: Task): EvidenceIterationRecord[] { - return Array.isArray(task.metadata?.lgtm_history) - ? task.metadata.lgtm_history.filter( - (entry: unknown): entry is EvidenceIterationRecord => - !!entry && typeof entry === "object", - ) - : []; -} - -export function getCurrentEvidenceIteration( - task: Task, -): EvidenceIterationRecord | undefined { - const metadata = task.metadata ?? {}; - if (typeof metadata.lgtm_evidence !== "string") return undefined; - return { - iteration: getEvidenceHistory(task).length + 1, - submitted_at: - typeof metadata.lgtm_submitted_at === "string" - ? metadata.lgtm_submitted_at - : new Date(0).toISOString(), - evidence: metadata.lgtm_evidence, - failure_likely: - typeof metadata.lgtm_failure_likely === "string" - ? metadata.lgtm_failure_likely - : "", - failure_sneaky: - typeof metadata.lgtm_failure_sneaky === "string" - ? metadata.lgtm_failure_sneaky - : "", - failure_unknown: - typeof metadata.lgtm_failure_unknown === "string" - ? metadata.lgtm_failure_unknown - : "", - falsification_test: - typeof metadata.lgtm_falsification_test === "string" - ? metadata.lgtm_falsification_test - : "", - evidence_reasoning: - typeof metadata.lgtm_evidence_reasoning === "string" - ? metadata.lgtm_evidence_reasoning - : "", - verification_hints: Array.isArray(metadata.lgtm_verification_hints) - ? metadata.lgtm_verification_hints.filter( - (hint: unknown): hint is string => typeof hint === "string", - ) - : [], - remaining_uncertainty: - typeof metadata.lgtm_remaining_uncertainty === "string" - ? metadata.lgtm_remaining_uncertainty - : "", - commands: normalizeCommandRecords(metadata.lgtm_commands), - evidence_artifacts: normalizeArtifactRecords( - metadata.lgtm_evidence_artifacts, - ), - falsification_artifacts: normalizeArtifactRecords( - metadata.lgtm_falsification_artifacts, - ), - robot_reviews: getRobotReviews(task), - automatic_review_failure: - typeof metadata.robot_review_last_error === "string" - ? { - message: metadata.robot_review_last_error, - raw_output: - typeof metadata.robot_review_last_error_output === "string" - ? metadata.robot_review_last_error_output - : undefined, - } - : undefined, - }; -} - -export function getEvidenceIterationCount(task: Task): number { - return ( - getEvidenceHistory(task).length + - (getCurrentEvidenceIteration(task) ? 1 : 0) - ); -} - -export function archiveCurrentEvidence( - task: Task, - reason: string, -): Record { - const current = getCurrentEvidenceIteration(task); - if (!current) return {}; - return { - lgtm_history: [ - ...getEvidenceHistory(task), - { - ...current, - superseded_at: new Date().toISOString(), - supersede_reason: reason, - }, - ], - }; -} - -function presentOrMissing(value: string | undefined): string { - return value && value.trim().length > 0 ? value : "(missing)"; -} - -function formatBulletList( - title: string, - items: string[], - empty = "(none)", -): string { - return `### ${title}\n${items.length > 0 ? items.map((item) => `- ${item}`).join("\n") : `- ${empty}`}`; -} - -function formatCommandRecords( - commands: EvidenceCommandRecord[], -): string | undefined { - if (commands.length === 0) return undefined; - return `### Commands\n${commands.map((command) => `- \`${command.cmd}\` (exit ${command.exit_code})${command.stdout_path ? ` stdout: ${command.stdout_path}` : ""}${command.stderr_path ? ` stderr: ${command.stderr_path}` : ""}`).join("\n")}`; -} - -function formatArtifactRecords( - title: string, - artifacts: EvidenceArtifactRecord[], -): string | undefined { - if (artifacts.length === 0) return undefined; - return `### ${title}\n${artifacts.map((artifact) => `- ${artifact.path} (${artifact.bytes} bytes, sha256 ${artifact.sha256})`).join("\n")}`; -} - -const MAX_INLINE_PROOF_LINES = 16; -const MAX_INLINE_TOOL_LINES = 8; -const MAX_INLINE_REVIEW_ITEMS = 3; - -function truncateProofBlock( - body: string, - maxLines = MAX_INLINE_PROOF_LINES, -): { - preview: string; - truncated: boolean; - totalLines: number; - headLines: number; - tailLines: number; -} { - const lines = body.split("\n"); - if (lines.length <= maxLines) { - return { - preview: body, - truncated: false, - totalLines: lines.length, - headLines: lines.length, - tailLines: 0, - }; - } - const headLines = Math.ceil(maxLines / 2); - const tailLines = Math.floor(maxLines / 2); - const omitted = lines.length - headLines - tailLines; - return { - preview: [ - ...lines.slice(0, headLines), - `[... ${omitted} middle lines omitted ...]`, - ...lines.slice(lines.length - tailLines), - ].join("\n"), - truncated: true, - totalLines: lines.length, - headLines, - tailLines, - }; -} - -function summarizeList( - items: string[], - maxItems = MAX_INLINE_REVIEW_ITEMS, -): string[] { - if (items.length <= maxItems) return items; - return [ - ...items.slice(0, maxItems), - `(${items.length - maxItems} more omitted)`, - ]; -} - -function getEvidenceOverflowPath( - entry: EvidenceIterationRecord, -): string | undefined { - return ( - entry.evidence_artifacts[0]?.path ?? - entry.commands.find((command) => typeof command.stdout_path === "string") - ?.stdout_path ?? - entry.commands.find((command) => typeof command.stderr_path === "string") - ?.stderr_path - ); -} - -function formatReviewTextBlock( - title: string, - body: string, - options?: { maxLines?: number; overflowPath?: string }, -): string { - const truncated = options?.maxLines - ? truncateProofBlock(body, options.maxLines) - : { - preview: body, - truncated: false, - totalLines: body.split("\n").length, - headLines: body.split("\n").length, - tailLines: 0, - }; - const overflowNote = truncated.truncated - ? `\n\n[truncated at ${options?.maxLines ?? MAX_INLINE_PROOF_LINES} lines from ${truncated.totalLines}; showing first ${truncated.headLines} and last ${truncated.tailLines}; full text: ${options?.overflowPath ?? "(no stored artifact path)"}]` - : ""; - return `### ${title}\n\n\`\`\`text\n${truncated.preview}${overflowNote}\n\`\`\``; -} - -function formatTaskStatusLine(task: Task): string { - return `Status: ${task.status}`; -} - -function formatTaskToolMetadata( - task: Task, - options?: { updatedFields?: string[] }, -): string { - const current = getCurrentEvidenceIteration(task); - const metadataKeys = Object.keys(getNonReviewMetadata(task)); - return [ - "### Metadata", - `- Completion mode: ${getCompletionMode(task)}`, - `- Review state: ${getReviewState(task)}`, - `- Gate status: ${getGateStatus(task)}`, - options?.updatedFields?.length - ? `- Updated fields: ${options.updatedFields.join(", ")}` - : undefined, - `- Metadata keys: ${metadataKeys.length}`, - `- Proof iterations: ${getEvidenceIterationCount(task)}`, - `- Robot reviews: ${getRobotReviews(task).length}`, - current?.submitted_at - ? `- Submitted at: ${current.submitted_at}` - : undefined, - `- Updated at: ${new Date(task.updatedAt).toISOString()}`, - ] - .filter(Boolean) - .join("\n"); -} - -function renderTaskToolResult( - title: string, - task: Task, - body: string, - options?: { updatedFields?: string[] }, -): string { - return [ - `## ${title} -> Task #${task.id}: ${task.subject}`, - formatTaskStatusLine(task), - formatTaskToolMetadata(task, options), - body, - ].join("\n\n"); -} - -function renderTaskSnapshot( - task: Task, - options?: { - includeDescription?: boolean; - includeDoneCriterion?: boolean; - includeProgressLabel?: boolean; - includeMetadata?: boolean; - }, -): string { - const sections: string[] = []; - if (options?.includeDoneCriterion !== false) { - sections.push( - formatReviewTextBlock( - "Done criterion", - presentOrMissing(task.done_criterion), - { maxLines: MAX_INLINE_TOOL_LINES }, - ), - ); - } - if (options?.includeDescription) { - sections.push( - formatReviewTextBlock("Description", presentOrMissing(task.description), { - maxLines: MAX_INLINE_TOOL_LINES, - }), - ); - } - if (options?.includeProgressLabel && task.progress_label) { - sections.push( - formatReviewTextBlock("Progress label", task.progress_label, { - maxLines: MAX_INLINE_TOOL_LINES, - }), - ); - } - if (options?.includeMetadata) { - const metadata = getNonReviewMetadata(task); - if (Object.keys(metadata).length > 0) { - sections.push( - formatReviewTextBlock( - "Metadata preview", - JSON.stringify(metadata, null, 2), - { maxLines: MAX_INLINE_TOOL_LINES }, - ), - ); - } - } - return sections.join("\n\n"); -} - -function renderTaskUpdateSummary( - before: Task | undefined, - task: Task, - changedFields: string[], - metadataPatch?: Record, -): string { - const lines = ["### Changes"]; - for (const field of changedFields) { - if (field === "status") { - lines.push( - `- status: ${before?.status ?? "(missing)"} -> ${task.status}`, - ); - continue; - } - if (field === "subject") { - lines.push( - `- subject: ${before?.subject ?? "(missing)"} -> ${task.subject}`, - ); - continue; - } - if (field === "progress_label") { - lines.push( - `- progress_label: ${before?.progress_label ?? "(missing)"} -> ${task.progress_label ?? "(missing)"}`, - ); - continue; - } - if (field === "description") { - lines.push( - formatReviewTextBlock( - "Description", - presentOrMissing(task.description), - { maxLines: MAX_INLINE_TOOL_LINES }, - ), - ); - continue; - } - if (field === "done_criterion") { - lines.push( - formatReviewTextBlock( - "Done criterion", - presentOrMissing(task.done_criterion), - { maxLines: MAX_INLINE_TOOL_LINES }, - ), - ); - continue; - } - if (field === "metadata") { - const metadata = metadataPatch ?? getNonReviewMetadata(task); - lines.push( - formatReviewTextBlock( - "Metadata patch", - JSON.stringify(metadata, null, 2), - { maxLines: MAX_INLINE_TOOL_LINES }, - ), - ); - continue; - } - if (field === "blocks") { - lines.push( - `- blocks: ${task.blocks.length > 0 ? task.blocks.map((id) => `#${id}`).join(", ") : "(none)"}`, - ); - continue; - } - if (field === "blockedBy") { - lines.push( - `- blockedBy: ${task.blockedBy.length > 0 ? task.blockedBy.map((id) => `#${id}`).join(", ") : "(none)"}`, - ); - continue; - } - lines.push(`- ${field}`); - } - return lines.join("\n"); -} - -function renderCompactRobotReview(review: RobotReviewRecord): string { - const verdict = review.accepted ? "Accepted" : "Refused"; - const lines = [`${verdict} by ${review.reviewer}.`]; - if (review.reason) { - lines.push(review.reason); - } else if (review.observations.length > 0) { - lines.push(review.observations[0]); - } - if (review.blind_spots) lines.push(`Blind spots: ${review.blind_spots}`); - if (!review.accepted && review.missing_evidence.length > 0) { - lines.push(`Needs: ${review.missing_evidence.join("; ")}`); - } - if (!review.accepted && review.suggestions.length > 0) { - lines.push(`Next: ${review.suggestions.join("; ")}`); - } - return lines.join(" "); -} - -function renderCurrentProofSummary(task: Task): string { - const sections = [renderEvidencePacket(task)]; - const latestReview = getLatestRobotReview(task); - if (latestReview) sections.push(renderCompactRobotReview(latestReview)); - const automaticReviewFailure = renderAutomaticReviewFailure(task); - if (automaticReviewFailure) sections.push(automaticReviewFailure); - return sections.join("\n\n"); -} - -function renderPlannedEvidence( - entry: EvidenceIterationRecord, - options?: { truncateFalsification?: boolean }, -): string { - return [ - "### Verify", - formatBulletList( - "Verification hints", - entry.verification_hints, - "(missing)", - ), - formatReviewTextBlock( - "Falsification test", - presentOrMissing(entry.falsification_test), - options?.truncateFalsification === false - ? undefined - : { - maxLines: MAX_INLINE_PROOF_LINES, - overflowPath: entry.falsification_artifacts[0]?.path, - }, - ), - ].join("\n\n"); -} - -function summarizeJudgement(entry: EvidenceIterationRecord): { - title: string; - body: string; - observations: string[]; - concerns: string[]; - suggestions: string[]; - missingEvidence: string[]; -} { - const latestReview = entry.robot_reviews[entry.robot_reviews.length - 1]; - if (latestReview) { - return { - title: latestReview.accepted ? "Accepted" : "Refused", - body: `${latestReview.accepted ? "Accepted" : "Refused"} by ${latestReview.reviewer} on ${latestReview.submitted_at}.`, - observations: latestReview.observations, - concerns: latestReview.concerns, - suggestions: - latestReview.suggestions.length > 0 - ? latestReview.suggestions - : latestReview.accepted - ? [] - : latestReview.missing_evidence.map( - (item) => `Strengthen the proof for: ${item}`, - ), - missingEvidence: latestReview.missing_evidence, - }; - } - if (entry.automatic_review_failure) { - return { - title: "Reviewer unavailable", - body: entry.automatic_review_failure.message, - observations: [], - concerns: [], - suggestions: [ - "Autonomy continued without blocking completion.", - "Inspect the reviewer failure note if you want a fresh external perspective later.", - ], - missingEvidence: [], - }; - } - return { - title: "Pending review", - body: "No judge result recorded yet.", - observations: [], - concerns: [], - suggestions: [], - missingEvidence: [], - }; -} - -function renderAttempt( - entry: EvidenceIterationRecord, - options?: { truncateEvidence?: boolean; truncateFalsification?: boolean }, -): string { - const judgement = summarizeJudgement(entry); - const evidenceBlock = - options?.truncateEvidence === false - ? formatReviewTextBlock("Evidence", presentOrMissing(entry.evidence)) - : formatReviewTextBlock("Evidence", presentOrMissing(entry.evidence), { - maxLines: MAX_INLINE_PROOF_LINES, - overflowPath: getEvidenceOverflowPath(entry), - }); - return [ - `## Attempt ${entry.iteration}`, - evidenceBlock, - renderPlannedEvidence(entry, options), - "### Check notes", - `- likely wrong: ${presentOrMissing(entry.failure_likely)}`, - `- sneaky wrong: ${presentOrMissing(entry.failure_sneaky)}`, - `- unknown left: ${presentOrMissing(entry.failure_unknown)}`, - `- why this counts: ${presentOrMissing(entry.evidence_reasoning)}`, - `- remaining uncertainty: ${presentOrMissing(entry.remaining_uncertainty)}`, - `### Judgement\n${judgement.title}`, - judgement.body, - judgement.observations.length > 0 ? judgement.observations[0] : "", - judgement.concerns.length > 0 ? `Concerns: ${judgement.concerns.join("; ")}` : "", - judgement.missingEvidence.length > 0 ? `Needs: ${judgement.missingEvidence.join("; ")}` : "", - judgement.suggestions.length > 0 ? `Next: ${judgement.suggestions.join("; ")}` : "", - ] - .filter(Boolean) - .join("\n\n"); -} - -export function renderEvidencePacket( - task: Task, - options?: { truncateEvidence?: boolean; truncateFalsification?: boolean }, -): string { - const current = getCurrentEvidenceIteration(task); - if (!current) - return "(No current proof claim. The agent never called TaskClaimDone, or the prior claim was superseded.)"; - - return [ - "## Goal", - `Task #${task.id}: ${task.subject}`, - `Done criterion: ${presentOrMissing(task.done_criterion)}`, - renderAttempt(current, options), - formatCommandRecords(current.commands), - formatArtifactRecords("Evidence artifacts", current.evidence_artifacts), - formatArtifactRecords( - "Falsification artifacts", - current.falsification_artifacts, - ), - ] - .filter( - (section): section is string => - typeof section === "string" && section.length > 0, - ) - .join("\n\n"); -} - -function renderAutomaticReviewFailure(task: Task): string | undefined { - if (typeof task.metadata?.robot_review_last_error !== "string") - return undefined; - const sections = [ - `### Automatic robot review failure\n${task.metadata.robot_review_last_error}`, - ]; - if ( - typeof task.metadata?.robot_review_last_error_output === "string" && - task.metadata.robot_review_last_error_output.trim() - ) { - sections.push( - formatReviewTextBlock( - "Reviewer raw output", - task.metadata.robot_review_last_error_output, - { maxLines: MAX_INLINE_PROOF_LINES }, - ), - ); - } - return sections.join("\n\n"); -} - -export function renderProofLog(task: Task): string { - const history = getEvidenceHistory(task); - const attempts = history.map((entry) => renderAttempt(entry)); - const current = getCurrentEvidenceIteration(task); - const lines = [ - `# Task #${task.id}: ${task.subject}`, - `Status: ${task.status}`, - `Gate status: ${getGateStatus(task)}`, - "", - "## Goal", - `Done criterion: ${presentOrMissing(task.done_criterion)}`, - ]; - if (current) { - lines.push("", ...attempts, renderAttempt(current)); - } else if (attempts.length > 0) { - lines.push("", ...attempts); - } else { - lines.push("", "(No current proof claim.)"); - } - return lines.join("\n"); -} - -function getNonReviewMetadata(task: Task): Record { - return Object.fromEntries( - Object.entries(task.metadata ?? {}).filter( - ([key]) => - !key.startsWith("lgtm_") && - !key.startsWith("robot_review_") && - key !== "lgtm_history" && - key !== "robot_reviews", - ), - ); -} - -function formatHistorySummary(task: Task): string | undefined { - const history = getEvidenceHistory(task); - if (history.length === 0) return undefined; - return `Superseded evidence:\n${history.map((entry) => `- #${entry.iteration} superseded ${entry.superseded_at ?? "?"}: ${entry.supersede_reason ?? "(no reason recorded)"}`).join("\n")}`; -} - -export function extractRobotReviewJson( - output: string, -): Record { - const match = output.match( - /ROBOT_REVIEW_JSON_START\s*([\s\S]*?)\s*ROBOT_REVIEW_JSON_END/, - ); - const source = match ? match[1] : output; - const candidates = [ - source.trim(), - stripMarkdownCodeFence(source), - extractBalancedJsonObject(source) ?? "", - extractBalancedJsonObject(stripMarkdownCodeFence(source)) ?? "", - ].filter(Boolean); - - let lastError: unknown; - for (const candidate of [...new Set(candidates)]) { - try { - return JSON.parse(candidate) as Record; - } catch (error) { - lastError = error; - } - } - - const prefix = match - ? "Robot reviewer returned invalid JSON" - : "Robot reviewer did not return the expected JSON markers or a parseable JSON object"; - const detail = lastError instanceof Error ? `: ${lastError.message}` : ""; - throw new Error( - `${prefix}${detail}. Raw output: ${summarizeRawOutput(output)}`, - ); -} - -export function buildRobotReviewPrompt(task: Task): string { - return [ - "You are a fresh validation judge for a Hermes-style proof log.", - "Question: does this packet prove the exact user-visible success condition in the done criterion?", - "If the done criterion asks for a specific output or direction of change, check that the quoted output actually shows that result, not merely that a command ran.", - "If not, say no and explain what concrete output is still missing. Suggestions are advisory guidance, not a separate gate.", - "", - "## Critical: Evidence must be verbatim", - "", - "Evidence should contain literal output, exact log lines, markdown block quotes, table rows, and URLs, not summaries or interpretations.", - "A human must be able to inspect the evidence alone without re-running anything.", - "", - "## Rubric (rate each item pass/fail)", - "", - "1. evidence_covers_done_criterion: Does the packet show the concrete observable thing the done criterion asks for, in the right direction or state?", - "2. falsification_test_runnable: Is there a concrete check with literal output that would come out differently if the claim were wrong?", - "3. failure_modes_addressed: Are the likely, sneaky, and unknown failure modes plausible enough to guide what evidence matters? Advisory.", - "4. evidence_distinguishes_success: Does the packet explain, at least briefly, why the shown evidence rules out the main failure modes? Advisory.", - "5. verification_hints_actionable: Can a human inspect the claim without re-running everything? Advisory.", - "", - "Set evidence_complete=true only if items 1 and 2 pass.", - "Set evidence_convincing=true if items 1 and 2 pass and you do not see a concrete contradiction in the packet.", - "Set accepted=true if items 1 and 2 pass and you do not see a concrete contradiction in the packet. Do not reject solely because items 3, 4, or 5 are weak if the verbatim evidence already proves the done criterion.", - "", - "reason: 1-3 sentence summary of why you accepted or rejected. Be concrete: cite specific test counts, file paths, or output lines you checked. Example: 'Pass, I checked the evidence it shows all 142 tests pass and HEAD=origin/main.'", - "observations: kept for audit only. One line max, not a repeat of the evidence.", - "When rejecting, prefer missing outputs like 'nll_val never decreases in the quoted log' over process complaints like 'too much text'.", - "concerns: kept for audit only. One line max when rejecting, empty when accepting.", - "suggestions: what the agent should do next if rejected. 1-3 bullets max.", - "missing_evidence: concrete missing artifacts or outputs that block acceptance. Only when rejecting.", - "blind_spots: what you could not check. Always include this. Example: 'only reviewed the verbatim packet, did not inspect the actual artifact files.'", - "", - "Return exactly one JSON object between the markers ROBOT_REVIEW_JSON_START and ROBOT_REVIEW_JSON_END.", - "JSON schema:", - '{"reviewer":"string","scope":"string","rubric":{"evidence_covers_done_criterion":{"reason":"...","pass":true},"falsification_test_runnable":{"reason":"...","pass":true},"failure_modes_addressed":{"reason":"...","pass":true},"evidence_distinguishes_success":{"reason":"...","pass":true},"verification_hints_actionable":{"reason":"...","pass":true}},"reason":"1-3 sentence summary of why you accepted or rejected","observations":["string"],"concerns":["string"],"suggestions":["string"],"blind_spots":"string","missing_evidence":["string"],"evidence_complete":true,"evidence_convincing":true,"accepted":true}', - "", - "You are reviewing exactly the same proof packet shown by TaskGet and /lgtm. Do not assume hidden context beyond this packet.", - "", - renderEvidencePacket(task, { truncateEvidence: false }), - "Output format:", - "ROBOT_REVIEW_JSON_START", - '{"reviewer":"...","scope":"...","rubric":{...},"reason":"Pass, I checked the evidence it shows all 142 tests pass and HEAD=origin/main.","observations":["..."],"concerns":["..."],"suggestions":["..."],"blind_spots":"...","missing_evidence":["..."],"evidence_complete":true,"evidence_convincing":true,"accepted":true}', - "ROBOT_REVIEW_JSON_END", +async function spawnSanityCheck( + events: any, taskId: string, subject: string, done_criterion: string, evidence: string, failure_likely: string, +): Promise { + if (!subagentsAvailable) return "(sanity check skipped: pi-subagents not available)"; + const prompt = [ + `Verify task #${taskId} is actually done.`, + `Subject: ${subject}`, + `Done when: ${done_criterion}`, + `Evidence: ${evidence}`, + `Likely failure: ${failure_likely}`, + ``, + `Read the actual files mentioned in the evidence. Run the actual commands if possible.`, + `Answer: Is the evidence real and does it match the done criterion? One paragraph.`, ].join("\n"); -} - -async function runAutomaticRobotReview( - task: any, - signal?: AbortSignal, - currentModelRef?: string, -): Promise<{ review: Omit; command: string }> { - if (!currentModelRef) { - throw new Error( - "Automatic robot review requires an active current session model.", - ); - } - const prompt = buildRobotReviewPrompt(task); - // Keep reviewer model selection simple: reuse the active session model in a fresh Pi process. - // This avoids picking a registry-listed judge model that exists but lacks working auth. - const args = [ - "--mode", - "json", - "-p", - "--no-session", - "--no-tools", - "--no-extensions", - "--model", - currentModelRef, - ]; - args.push(prompt); - const invocation = getPiInvocation(args); - const timeoutMs = getRobotReviewTimeoutMs(); - const commandLabel = `${invocation.command} ${invocation.args.slice(0, -1).join(" ")}`; - const result = await runRobotReviewCommand(invocation, signal, timeoutMs); - if (result.exitCode !== 0) { - const error = new Error( - `Robot reviewer failed (${result.exitCode ?? "?"}): ${(result.stderr || result.stdout).trim()}`, - ) as Error & { rawOutput?: string }; - error.rawOutput = (result.stderr || result.stdout).trim(); - throw error; - } - let parsed: Record; try { - parsed = extractRobotReviewJson(result.stdout); - } catch (error) { - const wrapped = new Error( - error instanceof Error ? error.message : String(error), - ) as Error & { rawOutput?: string }; - wrapped.rawOutput = result.stdout.trim(); - throw wrapped; + const result = await rpcCall<{ id: string }>(events, "subagents:rpc:spawn", { + type: "Explore", + prompt, + options: {}, + }, 30_000); + return `(sanity check spawned: ${result.id})`; + } catch (err: any) { + return `(sanity check failed: ${err.message})`; } - const observations = Array.isArray(parsed.observations) - ? parsed.observations.filter( - (item): item is string => typeof item === "string", - ) - : []; - if (observations.length === 0) { - const error = new Error( - "Robot reviewer returned no observations.", - ) as Error & { rawOutput?: string }; - error.rawOutput = result.stdout.trim(); - throw error; - } - const concerns = Array.isArray(parsed.concerns) - ? parsed.concerns.filter((item): item is string => typeof item === "string") - : []; - const suggestions = Array.isArray(parsed.suggestions) - ? parsed.suggestions.filter( - (item): item is string => typeof item === "string", - ) - : []; - const rawMissing: string[] = Array.isArray(parsed.missing_evidence) - ? parsed.missing_evidence.filter( - (item): item is string => typeof item === "string", - ) - : []; - const missing_evidence = rawMissing; - // Extract rubric with per-item reasoning - let rubric: Record | undefined; - if (parsed.rubric && typeof parsed.rubric === "object") { - const r: Record = {}; - for (const [key, val] of Object.entries( - parsed.rubric as Record, - )) { - if ( - val && - typeof val === "object" && - "reason" in (val as any) && - "pass" in (val as any) - ) { - const v = val as { reason: unknown; pass: unknown }; - r[key] = { - reason: typeof v.reason === "string" ? v.reason : "", - pass: v.pass === true, - }; - } - } - if (Object.keys(r).length > 0) rubric = r; - } - const review = relaxAdvisoryVerificationHints({ - reviewer: - typeof parsed.reviewer === "string" ? parsed.reviewer : commandLabel, - scope: - typeof parsed.scope === "string" ? parsed.scope : "task evidence package", - observations, - concerns, - suggestions, - blind_spots: - typeof parsed.blind_spots === "string" - ? parsed.blind_spots - : "not stated", - accepted: - typeof parsed.accepted === "boolean" - ? parsed.accepted - : parsed.evidence_complete === true && - parsed.evidence_convincing === true, - evidence_complete: parsed.evidence_complete === true, - evidence_convincing: parsed.evidence_convincing === true, - missing_evidence, - submitted_at: new Date().toISOString(), - mode: "auto", - raw_output: result.stdout.trim(), - rubric, - }); - return { - command: commandLabel, - review, - }; } -const SYSTEM_REMINDER = ` -The user is trusting you to be autonomous and work towards acheiving these goals. +// ---- Rendering ---- -Goal tools haven't been used in a while, so check the goal list and keep it accurate: -- Progress existing open goals before drifting to unrelated work. -- Treat rejected proof-gated top-level goals as needing immediate follow-up: strengthen proof, block, supersede, or delete them explicitly. -- Mark goals in_progress when you start them (TaskUpdate status=in_progress). -- Complete subtasks directly: TaskUpdate(status=completed). Drop irrelevant ones with status=deleted. -A stale goal list is worse than no goal list. Ignore this reminder if not applicable. Never mention it to the user. -`; +function renderTaskOneLine(task: Task): string { + const icon = task.status === "completed" ? "✓" : task.status === "in_progress" ? "►" : "○"; + const label = task.progress_label && task.status === "in_progress" ? task.progress_label : task.subject; + const isGoal = task.done_criterion ? "★" : " "; + return `${icon}${isGoal} #${task.id} ${label}`; +} -export default function (pi: ExtensionAPI) { - const cfg = loadTasksConfig(); - const piTasks = process.env.PI_TASKS; - const taskScope = cfg.taskScope ?? "session"; +function renderTaskDetail(task: Task): string { + const lines: string[] = []; + lines.push(`#${task.id} ${task.subject}`); + lines.push(`Status: ${task.status}`); + if (task.description) lines.push(task.description); + if (task.done_criterion) lines.push(`Done when: ${task.done_criterion}`); + if (task.failure_mode) lines.push(`Failure mode: ${task.failure_mode}`); + if (task.parentId) lines.push(`Parent: #${task.parentId}`); + if (task.blockedBy.length > 0) lines.push(`Blocked by: ${task.blockedBy.map((id) => `#${id}`).join(", ")}`); - function resolveStorePath(sessionId?: string): string | undefined { - if (piTasks === "off") return undefined; - if (piTasks?.startsWith("/")) return piTasks; - if (piTasks?.startsWith(".")) return resolve(piTasks); - if (piTasks) return join(process.cwd(), ".pi", "tasks", `${piTasks}.json`); - if (taskScope === "memory") return undefined; - if (taskScope === "session" && sessionId) { - return join(process.cwd(), ".pi", "tasks", `tasks-${sessionId}.json`); + // Evidence (from TaskComplete) + if (task.metadata?.lgtm_evidence) { + lines.push(``); + lines.push(`Evidence: ${task.metadata.lgtm_evidence}`); + if (task.metadata?.lgtm_failure_likely) { + lines.push(`Likely failure: ${task.metadata.lgtm_failure_likely}`); } - if (taskScope === "session") return undefined; - return join(process.cwd(), ".pi", "tasks", "tasks.json"); } - let store = new TaskStore(resolveStorePath()); + return lines.join("\n"); +} + +function renderLgtmLog(task: Task): string { + if (!task.metadata?.lgtm_evidence) { + return `#${task.id} ${task.subject}\nStatus: ${task.status}\nNo evidence yet.`; + } + return renderTaskDetail(task); +} + +// ---- Extension ---- + +export default function register(pi: any, ctx: ExtensionContext): void { + const config = loadTasksConfig(); + const store = new TaskStore(config.taskScope === "project" ? "default" : undefined); const widget = new TaskWidget(store); const autoClear = new AutoClearManager( () => store, - () => cfg.autoClearCompleted ?? "never", - AUTO_CLEAR_DELAY, + () => (config.autoClearCompleted ?? "never") as any, + config.clearDelayTurns ?? 4, ); - let storeUpgraded = false; - let persistedTasksShown = false; - function upgradeStoreIfNeeded(ctx: ExtensionContext) { - if (storeUpgraded) return; - if (taskScope === "session" && !piTasks) { - const sessionId = ctx.sessionManager.getSessionId(); - const path = resolveStorePath(sessionId); - store = new TaskStore(path); - widget.setStore(store); - } - storeUpgraded = true; + // Cadence state for reminders + const cadence: CadenceState = createCadenceState(); + const cadenceConfig: CadenceConfig = { + reminderInterval: config.reminderInterval ?? REMINDER_INTERVAL, + taskToolNames: TASK_TOOL_NAMES, + }; + + // Detect pi-subagents + checkSubagents(pi.events); + + // Widget setup + if (ctx?.ui) { + widget.setUICtx(ctx.ui as unknown as UICtx); + widget.setStore(store); } - function showPersistedTasks(_isResume = false) { - if (persistedTasksShown) return; - persistedTasksShown = true; - const tasks = store.list(); - if (tasks.length > 0) widget.update(); - } - - let currentTurn = 0; - let lastTaskToolUseTurn = 0; - let reminderInjectedThisCycle = false; - - pi.on("turn_start", async (_event, ctx) => { - currentTurn++; - widget.setUICtx(ctx.ui as UICtx); - upgradeStoreIfNeeded(ctx); - if (autoClear.onTurnStart(currentTurn)) widget.update(); - }); - - pi.on("turn_end", async (event) => { - const msg = event.message as any; - if (msg?.role === "assistant" && msg.usage) { - widget.addTokenUsage(msg.usage.input ?? 0, msg.usage.output ?? 0); - } - }); - - pi.on("tool_result", async (event) => { - if (TASK_TOOL_NAMES.has(event.toolName)) { - lastTaskToolUseTurn = currentTurn; - reminderInjectedThisCycle = false; - return {}; - } - if (currentTurn - lastTaskToolUseTurn < REMINDER_INTERVAL) return {}; - if (reminderInjectedThisCycle) return {}; - const tasks = store.list(); - if (tasks.length === 0) return {}; - reminderInjectedThisCycle = true; - lastTaskToolUseTurn = currentTurn; - return { - content: [ - ...event.content, - { type: "text" as const, text: SYSTEM_REMINDER }, - ], - }; - }); - - pi.on("before_agent_start", async (_event, ctx) => { - widget.setUICtx(ctx.ui as UICtx); - upgradeStoreIfNeeded(ctx); - showPersistedTasks(); - }); - - pi.on("before_agent_start", async (event) => { - const followups = store.list().flatMap((task) => { - const latest = getLatestRobotReview(task); - return latest && !latest.accepted ? [{ task, latest }] : []; - }); - if (followups.length === 0) return undefined; - - const reminder = followups - .map(({ task, latest }) => { - const missing = - latest.missing_evidence.length > 0 - ? ` Missing evidence: ${latest.missing_evidence.join("; ")}.` - : ""; - return `- Task #${task.id} ${task.subject}: latest proof review rejected the evidence.${missing} Strengthen the evidence and call TaskClaimDone again.`; - }) - .join("\n"); - - return { - systemPrompt: - event.systemPrompt + - `\n\n\nLatest proof review follow-up required:\n${reminder}\nDo not complete the top-level task until the latest proof review accepts the evidence.\n\n`, - }; - }); - - pi.on("session_switch" as any, async (event: any, ctx: ExtensionContext) => { - widget.setUICtx(ctx.ui as UICtx); - const isResume = event?.reason === "resume"; - storeUpgraded = false; - persistedTasksShown = false; - currentTurn = 0; - lastTaskToolUseTurn = 0; - reminderInjectedThisCycle = false; - autoClear.reset(); - if (!isResume && taskScope === "memory") store.clearAll(); - upgradeStoreIfNeeded(ctx); - showPersistedTasks(isResume); - }); - - // ────────────────────────────────────────────────── - // Tool 1: TaskCreate - // ────────────────────────────────────────────────── + // ── TaskCreate ────────────────────────────────────── pi.registerTool({ name: "TaskCreate", label: "TaskCreate", - description: `Create a task with a clear done_criterion. + description: `Create a task, goal, or subtask. -## Two tiers +- Task: just a subject (checklist item). Mark done via TaskUpdate. +- Goal: add done_criterion + failure_mode. Sign off via TaskComplete with evidence. +- Subtask: add parentId. Mark done via TaskUpdate. -- **Top-level tasks**: goals with proof. They cannot be completed directly; call TaskClaimDone with evidence and failure modes. -- **Subtasks**: agent-managed checklist items under a top-level task. They can be completed directly via TaskUpdate(status=completed). - -## Task Fields - -- **subject**: Brief actionable title -- **description**: Detailed description with context -- **done_criterion**: REQUIRED. Falsifiable observation that distinguishes done from fail/null/incomplete/silent-fail. State expected AND wrong-case observations (e.g., "All 92 tests pass. If wrong: type errors in build or test failures in task-store.test.ts") -- **progress_label** (optional): What the agent is currently doing, shown during in-progress tasks -- **parentId** (optional): Set this to make a directly tickable subtask. Omit it for a proof-gated top-level goal.`, +When creating a goal, break it into subtasks too.`, promptGuidelines: [ - "Use TaskCreate for complex top-level goals. Include a specific done_criterion.", - "Mark tasks in_progress before starting. Complete subtasks via TaskUpdate; complete top-level tasks via TaskClaimDone with proof evidence.", + "Create tasks BEFORE starting work. Mark in_progress before doing them.", + "done_criterion must be externally verifiable — not 'I implemented X' but 'test X passes'.", + "failure_mode: how could this still be wrong even if it looks done? Optional but valuable for goals.", + "When creating a goal, break it into subtasks too.", ], parameters: Type.Object({ - subject: Type.String({ description: "Brief task title" }), - description: Type.String({ description: "Detailed description" }), - done_criterion: Type.String({ - description: - "Falsifiable observation that distinguishes DONE from fail, null result, incomplete, or silent failure. State what you expect to see AND what you'd see if it's wrong.", - }), - progress_label: Type.Optional( - Type.String({ - description: - "What the agent is currently doing, shown during in-progress tasks", - }), - ), - metadata: Type.Optional(Type.Record(Type.String(), Type.Any())), - parentId: Type.Optional( - Type.String({ - description: - "Parent task ID. If set, this task is a directly tickable subtask; if omitted, this is a proof-gated top-level goal.", - }), - ), + subject: Type.String({ description: "What to do (imperative, e.g. 'Fix auth bug')" }), + done_criterion: Type.Optional(Type.String({ description: "Falsifiable test distinguishing done from not-done. Makes this a goal." })), + failure_mode: Type.Optional(Type.String({ description: "How this could still be wrong even if it looks done. Optional — add for tricky goals." })), + description: Type.Optional(Type.String({ description: "Extra context about what this goal involves" })), + parentId: Type.Optional(Type.String({ description: "Make this a subtask of another task" })), + progress_label: Type.Optional(Type.String({ description: "Present-continuous label shown while working (e.g. 'Fixing auth bug')" })), + metadata: Type.Optional(Type.Record(Type.String(), Type.Any(), { description: "Metadata keys to set. Set a key to null to delete it." })), + add_blocks: Type.Optional(Type.Array(Type.String(), { description: "Task IDs that cannot start until this one finishes" })), + add_blocked_by: Type.Optional(Type.Array(Type.String(), { description: "Task IDs that must finish before this one can start" })), }), - execute(_toolCallId, params, _signal, _onUpdate, _ctx) { - const metadataError = assertNoReservedMetadata(params.metadata); - if (metadataError) return Promise.resolve(textResult(metadataError)); - autoClear.resetBatchCountdown(); - let task: Task; - try { - task = store.create( - params.subject, - params.description, - params.done_criterion, - params.progress_label, - params.metadata, - params.parentId, - ); - } catch (err: any) { - return Promise.resolve(textResult(err.message)); - } - widget.update(); - return Promise.resolve( - textResult( - renderTaskToolResult( - "TaskCreate", - task, - renderTaskSnapshot(task, { - includeDescription: true, - includeDoneCriterion: true, - includeProgressLabel: true, - includeMetadata: true, - }), - ), - ), + execute(_toolCallId: string, params: any, _signal: any, _onUpdate: any, _ctx: any) { + const task = store.create( + params.subject, + params.done_criterion ?? "", + params.failure_mode, + params.progress_label, + params.metadata, + params.parentId, ); + + // Set description separately + if (params.description) { + store.update(task.id, { metadata: { lgtm_description: params.description } }); + } + + if (params.add_blocks?.length || params.add_blocked_by?.length) { + store.update(task.id, { + add_blocks: params.add_blocks, + add_blocked_by: params.add_blocked_by, + }); + } + + autoClear.resetBatchCountdown(); + widget.setActiveTask(task.id); + widget.update(); + + const kind = params.parentId ? "subtask" : params.done_criterion ? "goal" : "task"; + let msg = `#${task.id} ${task.subject}`; + if (params.done_criterion) msg += `\nDone when: ${params.done_criterion}`; + if (params.failure_mode) msg += `\nFailure mode: ${params.failure_mode}`; + if (params.description) msg += `\n${params.description}`; + msg += `\n[${kind}]`; + return Promise.resolve(textResult(msg)); }, }); - // ────────────────────────────────────────────────── - // Tool 2: TaskList - // ────────────────────────────────────────────────── + // ── TaskList ──────────────────────────────────────── pi.registerTool({ name: "TaskList", label: "TaskList", - description: `List all tasks in a compact one-line format with one primary state per row. Proof details live in TaskGet and /lgtm.`, + description: "List all tasks, one line each. ★ = goal, no star = task/subtask.", parameters: Type.Object({}), - execute(_toolCallId, _params, _signal, _onUpdate, _ctx) { + execute(_toolCallId: string, _params: any, _signal: any, _onUpdate: any, _ctx: any) { const tasks = store.list(); - if (tasks.length === 0) - return Promise.resolve(textResult("No tasks found")); - - const counts = { completed: 0, in_progress: 0, pending: 0 }; - for (const task of tasks) counts[getDisplayStatus(task)]++; - - const visibleTasks = tasks.filter((task) => task.status !== "completed"); - - const parts: string[] = []; - if (counts.completed > 0) parts.push(`${counts.completed} done hidden`); - if (counts.in_progress > 0) - parts.push(`${counts.in_progress} in progress`); - if (counts.pending > 0) parts.push(`${counts.pending} open`); - - const statusIcon = (task: (typeof tasks)[number]) => { - if (task.status === "in_progress") return "◼"; - return "◻"; - }; - - const renderTask = (task: (typeof tasks)[number]) => { - const parent = task.parentId ? ` › subtask of #${task.parentId}` : ""; - let blocked = ""; - if (task.blockedBy.length > 0) { - const openBlockers = task.blockedBy.filter((bid) => { - const blocker = store.get(bid); - return blocker && blocker.status !== "completed"; - }); - if (openBlockers.length > 0) - blocked = ` › blocked by ${openBlockers.map((id) => "#" + id).join(", ")}`; - } - return ` ${statusIcon(task)} #${task.id} ${task.subject}${parent}${blocked}`; - }; - - const lines = [`● ${tasks.length} goals (${parts.join(", ")})`]; - if (visibleTasks.length === 0) { - lines.push(" No open tasks. Completed tasks are hidden by default."); - } else { - lines.push( - ...visibleTasks - .sort((a, b) => Number(a.id) - Number(b.id)) - .map(renderTask), - ); - } + if (tasks.length === 0) return Promise.resolve(textResult("No tasks.")); + const open = tasks.filter((t) => t.status !== "completed"); + const done = tasks.filter((t) => t.status === "completed"); + const lines = [ + ...open.map(renderTaskOneLine), + ...done.map(renderTaskOneLine), + ]; + if (done.length > 0) lines.push(`(${done.length} completed)`); return Promise.resolve(textResult(lines.join("\n"))); }, }); - // ────────────────────────────────────────────────── - // Tool 3: TaskGet - // ────────────────────────────────────────────────── + // ── TaskGet ──────────────────────────────────────── pi.registerTool({ name: "TaskGet", label: "TaskGet", - description: `Get full proof-gated task details including done_criterion, evidence packet, and reviewer state.`, + description: "Get task detail — short, readable.", parameters: Type.Object({ - taskId: Type.String({ description: "Task ID to retrieve" }), + taskId: Type.String({ description: "Task ID" }), }), - execute(_toolCallId, params, _signal, _onUpdate, _ctx) { + execute(_toolCallId: string, params: any, _signal: any, _onUpdate: any, _ctx: any) { const task = store.get(params.taskId); - if (!task) return Promise.resolve(textResult("Task not found")); - - const desc = task.description.replace(/\\n/g, "\n"); - const robotReviews = getRobotReviews(task); - const completionMode: CompletionMode = getCompletionMode(task); - const reviewState: ReviewState = getReviewState(task); - const currentEvidence = getCurrentEvidenceIteration(task); - const history = getEvidenceHistory(task); - const lines: string[] = [ - `Task #${task.id}: ${task.subject}`, - `Status: ${task.status}`, - `Completion mode: ${completionMode}`, - `Review state: ${reviewState}`, - `Gate status: ${getGateStatus(task)}`, - `Done criterion: ${task.done_criterion}`, - `Description: ${desc}`, - ]; - lines.push( - `Evidence iterations: total=${getEvidenceIterationCount(task)}, current=${currentEvidence ? currentEvidence.iteration : 0}, superseded=${history.length}`, - ); - lines.push( - `Task kind: ${task.parentId ? `subtask of #${task.parentId}` : "top-level proof goal"}`, - ); - if (robotReviews.length > 0) { - const latest = robotReviews[robotReviews.length - 1]; - lines.push( - `Robot reviews on current evidence: ${robotReviews.length} (latest: accepted=${latest.accepted ? "yes" : "no"}, complete=${latest.evidence_complete ? "yes" : "no"}, convincing=${latest.evidence_convincing ? "yes" : "no"})`, - ); - } - lines.push(renderEvidencePacket(task)); - const automaticReviewFailure = renderAutomaticReviewFailure(task); - if (automaticReviewFailure) lines.push(automaticReviewFailure); - if (robotReviews.length > 0) { - lines.push( - `### Robot reviews\n${robotReviews.map(renderCompactRobotReview).join("\n\n")}`, - ); - } - const historySummary = formatHistorySummary(task); - if (historySummary) lines.push(historySummary); - if (task.blockedBy.length > 0) { - const openBlockers = task.blockedBy.filter((bid) => { - const blocker = store.get(bid); - return blocker && blocker.status !== "completed"; - }); - if (openBlockers.length > 0) - lines.push( - `Blocked by: ${openBlockers.map((id) => "#" + id).join(", ")}`, - ); - } - if (task.blocks.length > 0) - lines.push(`Blocks: ${task.blocks.map((id) => "#" + id).join(", ")}`); - const metadata = getNonReviewMetadata(task); - if (Object.keys(metadata).length > 0) - lines.push(`Metadata: ${JSON.stringify(metadata)}`); - - return Promise.resolve(textResult(lines.join("\n\n"))); + if (!task) return Promise.resolve(textResult(`Task #${params.taskId} not found`)); + return Promise.resolve(textResult(renderTaskDetail(task))); }, }); - // ────────────────────────────────────────────────── - // Tool 4: TaskUpdate - // ────────────────────────────────────────────────── + // ── TaskUpdate ───────────────────────────────────── pi.registerTool({ name: "TaskUpdate", label: "TaskUpdate", - description: `Update task fields or status. + description: `Update a task. Mark in_progress before starting, completed when done. -Two-tier model: -- Subtasks can be marked completed directly here. -- Top-level tasks are proof goals: TaskUpdate(status=completed) is rejected. Use TaskClaimDone so the failure-mode/evidence form and automatic reviewer run.`, +Subtasks and plain tasks: mark completed directly. +Goals: use TaskComplete with evidence.`, + promptGuidelines: [ + "Mark in_progress BEFORE starting work.", + "Subtasks/tasks: mark completed directly.", + "Goals: use TaskComplete with evidence, not this.", + ], parameters: Type.Object({ - taskId: Type.String({ description: "Task ID to update" }), - status: Type.Optional( - Type.Unsafe<"pending" | "in_progress" | "completed" | "deleted">({ - anyOf: [ - { type: "string", enum: ["pending", "in_progress", "completed"] }, - { type: "string", const: "deleted" }, - ], - description: - "New status. Setting completed is allowed for subtasks only; top-level tasks must complete via TaskClaimDone.", - }), - ), - subject: Type.Optional(Type.String({ description: "Brief task title" })), - description: Type.Optional( - Type.String({ description: "Detailed description" }), - ), - done_criterion: Type.Optional( - Type.String({ - description: "Falsifiable observation distinguishing done from fail", - }), - ), - progress_label: Type.Optional( - Type.String({ description: "What the agent is currently doing" }), - ), - metadata: Type.Optional(Type.Record(Type.String(), Type.Any())), - add_blocks: Type.Optional( - Type.Array(Type.String(), { description: "Task IDs this task blocks" }), - ), - add_blocked_by: Type.Optional( - Type.Array(Type.String(), { - description: "Task IDs that block this task", - }), - ), + taskId: Type.String({ description: "Task ID" }), + status: Type.Optional(Type.Unsafe<"pending" | "in_progress" | "completed" | "deleted">({ + type: "string", + enum: ["pending", "in_progress", "completed", "deleted"], + description: "New status. Use 'deleted' to remove.", + })), + subject: Type.Optional(Type.String({ description: "New subject" })), + done_criterion: Type.Optional(Type.String({ description: "New done criterion" })), + failure_mode: Type.Optional(Type.String({ description: "New failure mode" })), + progress_label: Type.Optional(Type.String({ description: "Label shown while working" })), + metadata: Type.Optional(Type.Record(Type.String(), Type.Any(), { description: "Merge metadata. Set key to null to delete." })), + add_blocks: Type.Optional(Type.Array(Type.String(), { description: "Task IDs this blocks" })), + add_blocked_by: Type.Optional(Type.Array(Type.String(), { description: "Task IDs that block this" })), }), - execute(_toolCallId, params, _signal, _onUpdate, _ctx) { - const metadataError = assertNoReservedMetadata(params.metadata); - if (metadataError) return Promise.resolve(textResult(metadataError)); - + execute(_toolCallId: string, params: any, _signal: any, _onUpdate: any, _ctx: any) { const { taskId, ...fields } = params; - const currentTask = store.get(taskId); - const before = currentTask - ? (JSON.parse(JSON.stringify(currentTask)) as Task) - : undefined; - let task: any, changedFields: string[], warnings: string[]; - try { - ({ task, changedFields, warnings } = store.update(taskId, fields)); - } catch (err: any) { - return Promise.resolve(textResult(err.message)); - } + const { task, changedFields, warnings } = store.update(taskId, fields); if (changedFields.length === 0 && !task) { return Promise.resolve(textResult(`Task #${taskId} not found`)); @@ -1702,781 +332,213 @@ Two-tier model: if (fields.status === "in_progress") { widget.setActiveTask(taskId); autoClear.resetBatchCountdown(); - } else if (fields.status === "pending") { - autoClear.resetBatchCountdown(); } else if (fields.status === "completed") { widget.setActiveTask(taskId, false); - autoClear.trackCompletion(taskId, currentTurn); + autoClear.trackCompletion(taskId, cadence.currentTurn); + } else if (fields.status === "pending") { + autoClear.resetBatchCountdown(); } else if (fields.status === "deleted") { widget.setActiveTask(taskId, false); - warnings.push( - "Task deleted via agent tool. Use /tasks to confirm or undo. Deleting tasks should be reserved for dismissed or irrelevant work.", - ); } widget.update(); - const warningBlock = - warnings.length > 0 - ? `\n\n### Warnings\n- ${warnings.join("\n- ")}` - : ""; - if (!task && fields.status === "deleted" && before) { - return Promise.resolve( - textResult( - [ - `## TaskUpdate -> Task #${before.id}: ${before.subject}`, - "Status: deleted", - "### Metadata", - `- Completion mode: ${getCompletionMode(before)}`, - "- Review state: deleted", - "- Updated fields: deleted", - "### Changes", - "- task removed from the store", - warningBlock.trim(), - ] - .filter(Boolean) - .join("\n\n"), - ), - ); - } - const summary = renderTaskUpdateSummary( - before, - task, - changedFields, - fields.metadata, - ); - return Promise.resolve( - textResult( - renderTaskToolResult( - "TaskUpdate", - task, - `${summary}${warningBlock}`, - { updatedFields: changedFields }, - ), - ), - ); + let msg = `Updated #${taskId} ${changedFields.join(", ")}`; + if (warnings.length > 0) msg += ` (${warnings.join("; ")})`; + return Promise.resolve(textResult(msg)); }, }); - // ────────────────────────────────────────────────── - // Tool 5: TaskClaimDone - // ────────────────────────────────────────────────── + // ── TaskComplete ─────────────────────────────────── pi.registerTool({ - name: "TaskClaimDone", - label: "TaskClaimDone", - description: `Claim that a top-level task meets its done_criterion. + name: "TaskComplete", + label: "TaskComplete", + description: `Sign off a goal with evidence and failure analysis. -Forces structured thinking about failure modes and cheap evidence. All text fields required. -Accepted automatic review completes the task. Rejected review leaves it open with guidance. Reviewer infrastructure failure is logged but does not block autonomy. +Two fields: +- evidence: what you actually saw (verbatim output, file paths, test results). Not summaries. +- failure_likely: the most likely way this could still be wrong. One sentence. -## CRITICAL: Evidence must be verbatim +A sanity check subagent is spawned to verify the evidence is real. -Do NOT summarize or interpret. Paste literal command output, exact log lines, markdown block quotes, table rows, URLs. 'I ran X and it worked' is not evidence — paste the actual output of X. A human must be able to verify from the evidence alone without re-running anything. - -## Fields - -- **evidence**: Verbatim auditable proof — literal output, not summaries -- **failure_likely**: Most likely way this could be wrong despite evidence -- **failure_sneaky**: Subtle/sneaky failure -- one that looks like success superficially, corrupts silently, or only breaks under specific conditions (scale, time, edge case). E.g. feature active but wrong mechanism, works in tests but degrades in prod, correct output for wrong reason. -- **failure_unknown**: What class of unknown/untested failure could remain even if the evidence is true -- **falsification_test**: What you ran and the literal output you got, with reasoning why that output disproves the failure mode -- **evidence_reasoning**: Why this evidence cheaply distinguishes done-criterion success from the likely/subtle/unknown failures -- **verification_hints**: Where to look and what to check, with specific content quoted (not bare paths or counts) -- **remaining_uncertainty**: What's NOT tested, known limitations, deferred edge cases -- **commands**: Optional first-class command records for the evidence package -- **evidence_paths / falsification_paths**: Optional local artifact paths. The tool stores absolute path, sha256, and byte size for auditability. -- **supersede_reason**: Optional reason when this submission replaces an older one on the same task`, +Example: + TaskComplete({ taskId: "1", evidence: "npm test → 12/12 passed", failure_likely: "doesn't cover expired tokens" })`, + promptGuidelines: [ + "Evidence must be verbatim — paste actual output, not 'I ran the tests and they passed'.", + "failure_likely: the ONE most plausible way this is still wrong, not a list.", + "For subtasks and plain tasks, just use TaskUpdate with status:completed.", + ], parameters: Type.Object({ - taskId: Type.String({ description: "Top-level task ID to claim done" }), - evidence: Type.String({ - description: - "Verbatim auditable proof: literal command output, exact log lines, markdown block quotes, table rows, URLs. NOT summaries or interpretations. 'I ran X and got Y' is not evidence -- paste the actual output of X. A human must verify from this alone without re-running. (One short paragraph is fine; verbatim matters more than length.)", - }), - failure_likely: Type.String({ - description: - "Most likely way this could be wrong despite evidence. One short sentence preferred — pick the top one, not a list.", - }), - failure_sneaky: Type.String({ - description: - "Subtle/sneaky failure: looks like success superficially, corrupts silently, or only breaks at scale/time/edge case. One short sentence preferred.", - }), - failure_unknown: Type.String({ - description: - "What unknown or untested failure class could remain even if this evidence is true. One short sentence preferred.", - }), - falsification_test: Type.String({ - description: - "What you ran and the literal output you got. Include verbatim command + output, not 'it worked'. State why that output could not occur if a failure mode were real. Brevity is fine; the verbatim output is what counts.", - }), - evidence_reasoning: Type.String({ - description: - "Why this evidence cheaply distinguishes done-criterion success from the likely/subtle/unknown failures.", - }), - verification_hints: Type.Array(Type.String(), { - description: - "Where to look, with specific content quoted (not bare paths or counts). E.g. 'src/loss.py:45-60 shows grad_norm=0.001'. One or two short hints is enough.", - }), - remaining_uncertainty: Type.String({ - description: - "What's NOT tested, known limitations, deferred edges. One short sentence preferred. If you can't articulate uncertainty, you haven't thought hard enough.", - }), - commands: Type.Optional( - Type.Array( - Type.Object({ - cmd: Type.String({ description: "Exact command that was run" }), - exit_code: Type.Number({ description: "Process exit code" }), - stdout_path: Type.Optional( - Type.String({ description: "Optional path to captured stdout" }), - ), - stderr_path: Type.Optional( - Type.String({ description: "Optional path to captured stderr" }), - ), - }), - ), - ), - evidence_paths: Type.Optional( - Type.Array(Type.String(), { - description: - "Optional local artifact paths backing the evidence. Stored as absolute path + sha256 + byte size.", - }), - ), - falsification_paths: Type.Optional( - Type.Array(Type.String(), { - description: - "Optional local artifact paths backing the falsification test. Stored as absolute path + sha256 + byte size.", - }), - ), - supersede_reason: Type.Optional( - Type.String({ - description: - "Why this evidence replaces an older submission on the same task.", - }), - ), + taskId: Type.String({ description: "Task ID to complete" }), + evidence: Type.String({ description: "Verbatim proof — paste actual output, file paths, test results. Not summaries." }), + failure_likely: Type.String({ description: "Most likely way this could still be wrong despite the evidence. One sentence." }), }), - async execute(_toolCallId, params, signal, _onUpdate, ctx) { - const task = store.get(params.taskId); - if (!task) - return Promise.resolve(textResult(`Task #${params.taskId} not found`)); - if (task.status === "completed") - return Promise.resolve( - textResult(`Task #${params.taskId} already completed`), - ); - - // verification_hints are descriptions, not validated file paths - - if (task.parentId) - return Promise.resolve( - textResult( - `Task #${params.taskId} is a subtask. Use TaskUpdate(status=completed) for subtasks; TaskClaimDone is for top-level proof goals.`, - ), - ); - const blankField = requiredTextError(params, [ - "evidence", - "failure_likely", - "failure_sneaky", - "failure_unknown", - "falsification_test", - "evidence_reasoning", - "remaining_uncertainty", - ]); - if (blankField) return Promise.resolve(textResult(blankField)); - if ( - !params.verification_hints.some( - (hint: string) => hint.trim().length > 0, - ) - ) { - return Promise.resolve( - textResult( - "verification_hints must include at least one non-blank hint.", - ), - ); - } - - store.update(params.taskId, { - metadata: { - ...archiveCurrentEvidence( - task, - params.supersede_reason ?? "replaced by newer proof claim", - ), - ...clearCurrentEvidenceMetadata(), - ...clearRobotReviewMetadata(), - lgtm_evidence: params.evidence, - lgtm_failure_likely: params.failure_likely, - lgtm_failure_sneaky: params.failure_sneaky, - lgtm_failure_unknown: params.failure_unknown, - lgtm_falsification_test: params.falsification_test, - lgtm_evidence_reasoning: params.evidence_reasoning, - lgtm_verification_hints: params.verification_hints, - lgtm_remaining_uncertainty: params.remaining_uncertainty, - lgtm_submitted_at: new Date().toISOString(), - lgtm_commands: params.commands ?? [], - lgtm_evidence_artifacts: buildArtifactRecords(params.evidence_paths), - lgtm_falsification_artifacts: buildArtifactRecords( - params.falsification_paths, - ), - ...clearAutomaticReviewFailureMetadata(), - }, - }); - let robotReviewNote = ""; - const refreshedTask = store.get(params.taskId); - if (!refreshedTask) - return textResult( - `Task #${params.taskId} not found after evidence update`, - ); - try { - const { review, command } = await runAutomaticRobotReview( - refreshedTask, - signal, - getCurrentModelRef(ctx.model), - ); - store.update(params.taskId, { - metadata: { - ...appendRobotReviewMetadata(refreshedTask, review), - ...clearAutomaticReviewFailureMetadata(), - }, - }); - if ( - shouldCompleteAfterAcceptedReview( - store.get(params.taskId) ?? refreshedTask, - review.accepted, - ) - ) { - store.complete(params.taskId); - autoClear.trackCompletion(params.taskId, currentTurn); - widget.setActiveTask(params.taskId, false); - } - const storedReview = getLatestRobotReview( - store.get(params.taskId) ?? refreshedTask, - ); - robotReviewNote = - `\n\n### Automatic robot review\n` + - `Reviewer command: ${command}\n\n` + - `${storedReview ? renderCompactRobotReview(storedReview) : renderCompactRobotReview({ ...review, iteration: 1 })}`; - if (!review.accepted) { - robotReviewNote += `\n\nResult: task remains open until the evidence is strengthened and reviewed again.`; - } - } catch (err: any) { - store.update(params.taskId, { - metadata: getAutomaticReviewFailureMetadata( - err.message, - err.rawOutput, - ), - }); - const taskAfterFailure = store.get(params.taskId) ?? refreshedTask; - if (!taskAfterFailure.parentId) { - store.complete(params.taskId); - autoClear.trackCompletion(params.taskId, currentTurn); - widget.setActiveTask(params.taskId, false); - } - robotReviewNote = - `\n\n### Automatic robot review\n` + - `Reviewer unavailable: ${err.message}\n` + - `Autonomy continued without blocking completion.` + - (typeof err.rawOutput === "string" && err.rawOutput.trim() - ? `\n\n${formatReviewTextBlock("Reviewer raw output", err.rawOutput.trim(), { maxLines: MAX_INLINE_PROOF_LINES })}` - : ""); - } - widget.update(); - - const updatedTask = store.get(task.id) ?? task; - const result = renderTaskToolResult( - "TaskClaimDone", - updatedTask, - `${renderCurrentProofSummary(updatedTask)}` + - robotReviewNote + - `\n\nSelf-check: if a skeptical reviewer would still ask "but what about...", call TaskClaimDone again with stronger proof.`, - ); - - return textResult(result); - }, - }); - - pi.registerTool({ - name: "lgtm_supersede", - label: "lgtm_supersede", - description: `Mark the current proof package as superseded without completing the task. - -Use this when a prior claim is stale or wrong and reviewers should stop treating it as the current evidence. The current evidence, robot reviews, and reviewer-failure context are archived into history with your reason. Submit a fresh TaskClaimDone claim to complete the task.`, - parameters: Type.Object({ - taskId: Type.String({ - description: "Task ID whose current evidence should be superseded", - }), - reason: Type.String({ - description: "Why the current evidence is stale or replaced", - }), - }), - - execute(_toolCallId, params, _signal, _onUpdate, _ctx) { - const task = store.get(params.taskId); - if (!task) - return Promise.resolve(textResult(`Task #${params.taskId} not found`)); - if (!getCurrentEvidenceIteration(task)) { - return Promise.resolve( - textResult( - `Task #${params.taskId} has no current evidence to supersede.`, - ), - ); - } - - store.update(params.taskId, { - metadata: { - ...archiveCurrentEvidence(task, params.reason), - ...clearCurrentEvidenceMetadata(), - ...clearRobotReviewMetadata(), - ...clearAutomaticReviewFailureMetadata(), - }, - }); - widget.update(); - - const updatedTask = store.get(params.taskId) ?? task; - return Promise.resolve( - textResult( - renderTaskToolResult( - "lgtm_supersede", - updatedTask, - `Reason: ${params.reason}\n\n` + - `${formatHistorySummary(updatedTask) ?? "No evidence history found."}`, - ), - ), - ); - }, - }); - - pi.registerTool({ - name: "robot_review_ask", - label: "robot_review_ask", - description: `Attach fresh-perspective robot review observations to a task. - -Use this from a separate subagent or model when possible, ideally from a different model family/class than the implementation agent. -Your role is VALIDATION, not flaw-finding. Sanity-check that the evidence addresses the done criterion. Observations, concerns, and suggestions are welcome, but the gate is only the rubric items. - -This records an independent review but does not itself complete the task. Use TaskClaimDone or robot_review_run for the automatic completion gate.`, - parameters: Type.Object({ - taskId: Type.String({ description: "Task ID to attach robot review to" }), - reviewer: Type.String({ - description: "Reviewer identity, model family, or class", - }), - scope: Type.String({ description: "What the reviewer examined" }), - observations: Type.Array(Type.String(), { - minItems: 1, - description: "Concrete things noticed in the artifacts.", - }), - concerns: Type.Optional( - Type.Array(Type.String(), { - description: "Why the current evidence may not yet prove success.", - }), - ), - suggestions: Type.Optional( - Type.Array(Type.String(), { - description: - "What the agent should do next if the evidence is not yet enough.", - }), - ), - blind_spots: Type.String({ - description: "What the reviewer did not inspect or could not verify", - }), - evidence_complete: Type.Boolean({ - description: - "Whether the supplied evidence covers the claimed done criterion.", - }), - evidence_convincing: Type.Boolean({ - description: - "Whether the supplied evidence would convince a skeptical reviewer.", - }), - accepted: Type.Optional( - Type.Boolean({ - description: - "Overall review decision. Defaults to evidence_complete && evidence_convincing.", - }), - ), - missing_evidence: Type.Optional( - Type.Array(Type.String(), { - description: - "Concrete missing checks, artifacts, or observations needed before completion.", - }), - ), - }), - - execute(_toolCallId, params, _signal, _onUpdate, _ctx) { - const task = store.get(params.taskId); - if (!task) - return Promise.resolve(textResult(`Task #${params.taskId} not found`)); - if (task.status === "completed") - return Promise.resolve( - textResult(`Task #${params.taskId} already completed`), - ); - - const accepted = - params.accepted ?? - (params.evidence_complete && params.evidence_convincing); - store.update(params.taskId, { - metadata: { - ...appendRobotReviewMetadata(task, { - reviewer: params.reviewer, - scope: params.scope, - observations: params.observations, - concerns: params.concerns ?? [], - suggestions: params.suggestions ?? [], - blind_spots: params.blind_spots, - accepted, - evidence_complete: params.evidence_complete, - evidence_convincing: params.evidence_convincing, - missing_evidence: params.missing_evidence ?? [], - submitted_at: new Date().toISOString(), - mode: "manual", - }), - ...clearAutomaticReviewFailureMetadata(), - }, - }); - widget.update(); - - const updatedTask = store.get(params.taskId) ?? task; - const result = renderTaskToolResult( - "robot_review_ask", - updatedTask, - [ - `Iteration: ${getRobotReviews(updatedTask).length}`, - `Reviewer: ${params.reviewer}`, - `Scope: ${params.scope}`, - `Accepted: ${accepted ? "yes" : "no"}`, - `Evidence complete: ${params.evidence_complete ? "yes" : "no"}`, - `Evidence convincing: ${params.evidence_convincing ? "yes" : "no"}`, - formatBulletList("Observations", summarizeList(params.observations)), - (params.concerns?.length ?? 0) > 0 - ? formatBulletList("Concerns", summarizeList(params.concerns ?? [])) - : "", - (params.suggestions?.length ?? 0) > 0 - ? formatBulletList( - "Suggestions", - summarizeList(params.suggestions ?? []), - ) - : "", - (params.missing_evidence?.length ?? 0) > 0 - ? formatBulletList( - "Missing evidence", - summarizeList(params.missing_evidence ?? []), - ) - : "", - `### Blind spots\n${params.blind_spots}`, - `Robot review stored. Manual reviews are advisory; the automatic proof gate runs through TaskClaimDone or robot_review_run.`, - ] - .filter(Boolean) - .join("\n\n"), - ); - - return Promise.resolve(textResult(result)); - }, - }); - - pi.registerTool({ - name: "robot_review_run", - label: "robot_review_run", - description: `Run the automatic robot reviewer against the current task evidence using the current session model. - -Runs the same Pi-native reviewer stage used automatically by \`TaskClaimDone\`. - -This appends a new robot-review iteration. If accepted for a top-level proof task, the task completes. If rejected, the task stays open. Reviewer infrastructure failure is logged but does not block autonomy.`, - parameters: Type.Object({ - taskId: Type.String({ description: "Task ID to review" }), - }), - - async execute(_toolCallId, params, signal, _onUpdate, _ctx) { + async execute(_toolCallId: string, params: any, _signal: any, _onUpdate: any, _ctx: any) { const task = store.get(params.taskId); if (!task) return textResult(`Task #${params.taskId} not found`); - if (!task.metadata?.lgtm_evidence) { - return textResult( - `Task #${params.taskId} has no stored evidence yet. Call TaskClaimDone first.`, - ); - } + if (task.status === "completed") return textResult(`#${params.taskId} already completed`); - try { - const { review, command } = await runAutomaticRobotReview( - task, - signal, - getCurrentModelRef(_ctx.model), - ); - store.update(params.taskId, { - metadata: { - ...appendRobotReviewMetadata(task, review), - ...clearAutomaticReviewFailureMetadata(), - }, - }); - const reviewedTask = store.get(params.taskId) ?? task; - if ( - !reviewedTask.parentId && - shouldCompleteAfterAcceptedReview(reviewedTask, review.accepted) - ) { - store.complete(params.taskId); - autoClear.trackCompletion(params.taskId, currentTurn); - widget.setActiveTask(params.taskId, false); - } - widget.update(); + // Store evidence, then mark completed + store.update(params.taskId, { + metadata: { + lgtm_evidence: params.evidence, + lgtm_failure_likely: params.failure_likely, + lgtm_completed_at: new Date().toISOString(), + }, + }); + store.complete(params.taskId); - const updatedTask = store.get(params.taskId) ?? task; - const storedReview = getLatestRobotReview(updatedTask); - return textResult( - renderTaskToolResult( - "robot_review_run", - updatedTask, - `${renderCurrentProofSummary(updatedTask)}\n\n` + - `### Automatic robot review\nReviewer command: ${command}` + - `${storedReview ? `\n\n${renderCompactRobotReview(storedReview)}` : `\n\n${renderCompactRobotReview({ ...review, iteration: 1 })}`}`, - ), - ); - } catch (err: any) { - store.update(params.taskId, { - metadata: getAutomaticReviewFailureMetadata( - err.message, - err.rawOutput, - ), - }); - const failedTask = store.get(params.taskId) ?? task; - if (!failedTask.parentId && failedTask.status !== "completed") { - store.complete(params.taskId); - autoClear.trackCompletion(params.taskId, currentTurn); - widget.setActiveTask(params.taskId, false); - } - widget.update(); - const updatedTask = store.get(params.taskId) ?? task; - return textResult( - renderTaskToolResult( - "robot_review_run", - updatedTask, - `${renderCurrentProofSummary(updatedTask)}\n\n` + - `### Automatic robot review\nReviewer unavailable: ${err.message}\n\nAutonomy continued without blocking completion.` + - (typeof err.rawOutput === "string" && err.rawOutput.trim() - ? `\n\n${formatReviewTextBlock("Reviewer raw output", err.rawOutput.trim(), { maxLines: MAX_INLINE_PROOF_LINES })}` - : ""), - ), - ); - } + widget.setActiveTask(params.taskId, false); + autoClear.trackCompletion(params.taskId, cadence.currentTurn); + widget.update(); + + // Spawn sanity check (non-blocking, result appended to output) + const sanityResult = await spawnSanityCheck( + pi.events, params.taskId, task.subject, + task.done_criterion ?? "", params.evidence, params.failure_likely, + ); + + return textResult( + `✓ #${params.taskId} ${task.subject}\n` + + `Evidence: ${params.evidence}\n` + + `Likely failure: ${params.failure_likely}\n` + + sanityResult, + ); }, }); - // ────────────────────────────────────────────────── - // /tasks command - // ────────────────────────────────────────────────── + // ── /tasks command ────────────────────────────────── pi.registerCommand("tasks", { - description: "Manage goals — view, create, clear completed", - handler: async (_args: string, ctx: ExtensionCommandContext) => { - const ui = ctx.ui; - + description: "Manage tasks — view, create, clear completed", + handler: async (_args: string, commandCtx: ExtensionCommandContext) => { + const ui = commandCtx.ui; const mainMenu = async (): Promise => { const tasks = store.list(); - const taskCount = tasks.length; - const completedCount = tasks.filter( - (t) => t.status === "completed", - ).length; + const completedCount = tasks.filter((t) => t.status === "completed").length; + const choices: string[] = ["View all tasks", "Create task"]; + if (completedCount > 0) choices.push(`Clear completed (${completedCount})`); + if (tasks.length > 0) choices.push(`Clear all (${tasks.length})`); - const choices: string[] = [ - `View all goals (${taskCount})`, - "Create goal", - ]; - if (completedCount > 0) - choices.push(`Clear completed (${completedCount})`); - if (taskCount > 0) choices.push(`Clear all (${taskCount})`); - - const choice = await ui.select("Goals", choices); + const choice = await ui.select("Tasks", choices); if (!choice) return; - if (choice.startsWith("View")) await viewTasks(); - else if (choice === "Create goal") await createTask(); - else if (choice.startsWith("Clear completed")) { + if (choice.startsWith("View")) { + const items = tasks.map((t) => renderTaskOneLine(t)); + await ui.select("Tasks", [...items, "← Back"]); + await mainMenu(); + } else if (choice === "Create task") { + const subject = await ui.input("Subject"); + if (!subject) return; + const doneCriterion = await ui.input("Done when (optional — adds goal)"); + if (!doneCriterion) return; + const failureMode = await ui.input("Failure mode (optional)"); + store.create(subject, doneCriterion || undefined, failureMode || undefined); + widget.update(); + await mainMenu(); + } else if (choice.startsWith("Clear completed")) { store.clearCompleted(); - if (taskScope === "session") store.deleteFileIfEmpty(); widget.update(); await mainMenu(); } else if (choice.startsWith("Clear all")) { store.clearAll(); - if (taskScope === "session") store.deleteFileIfEmpty(); widget.update(); await mainMenu(); } }; - - const viewTasks = async (): Promise => { - const tasks = store.list(); - if (tasks.length === 0) { - await ui.select("No goals", ["← Back"]); - return mainMenu(); - } - - const statusIcon = (t: (typeof tasks)[0]) => { - if (t.status === "completed") return "done"; - if (t.status === "in_progress") return "◼"; - return "◻"; - }; - - const choices = tasks.map( - (t) => `${statusIcon(t)} #${t.id} ${t.subject}`, - ); - choices.push("← Back"); - - const selected = await ui.select("Goals", choices); - if (!selected || selected === "← Back") return mainMenu(); - - const match = selected.match(/#(\d+)/); - if (match) await viewTaskDetail(match[1]); - else return viewTasks(); - }; - - const viewTaskDetail = async (taskId: string): Promise => { - const task = store.get(taskId); - if (!task) return viewTasks(); - - const actions: string[] = []; - if (task.status === "pending") actions.push("▸ Start (in_progress)"); - if (task.metadata.lgtm_evidence) { - actions.push(`(type /lgtm ${taskId} to view proof evidence)`); - } - actions.push("✗ Delete"); - actions.push("← Back"); - - const pendingNote = - task.metadata.lgtm_evidence && task.status !== "completed" - ? `\nProof review: ${getGateStatus(task)}` - : ""; - const em = task.metadata; - let evidenceNote = ""; - if (em.lgtm_evidence) { - evidenceNote = `\n\n${renderEvidencePacket(task)}`; - const automaticReviewFailure = renderAutomaticReviewFailure(task); - if (automaticReviewFailure) - evidenceNote += `\n\n${automaticReviewFailure}`; - } - let robotNote = ""; - const robotReviews = getRobotReviews(task); - if (robotReviews.length > 0) { - const latest = robotReviews[robotReviews.length - 1]; - const parts = [`\n\nRobot reviews: ${robotReviews.length}`]; - parts.push(renderCompactRobotReview(latest)); - robotNote = parts.join("\n"); - } - const title = `#${task.id} [${task.status}] ${task.subject}\nDone: ${task.done_criterion}${pendingNote}\n${task.description}${evidenceNote}${robotNote}`; - const action = await ui.select(title, actions); - - if (action === "▸ Start (in_progress)") { - store.update(taskId, { status: "in_progress" }); - widget.setActiveTask(taskId); - widget.update(); - return viewTasks(); - } else if (action === "✗ Delete") { - store.update(taskId, { status: "deleted" }); - widget.setActiveTask(taskId, false); - widget.update(); - return viewTasks(); - } - return viewTasks(); - }; - - const createTask = async (): Promise => { - const subject = await ui.input("Goal subject"); - if (!subject) return mainMenu(); - const description = await ui.input("Goal description"); - if (!description) return mainMenu(); - const done_criterion = await ui.input( - "Done criterion (what does done look like?)", - ); - if (!done_criterion) return mainMenu(); - - store.create(subject, description, done_criterion); - widget.update(); - return mainMenu(); - }; - await mainMenu(); }, }); - // ────────────────────────────────────────────────── - // /lgtm command — proof log viewer - // ────────────────────────────────────────────────── - - function renderTaskEvidenceForHuman(task: Task): string { - return renderProofLog(task); - } - - function showProofLog(task: Task) { - pi.sendMessage({ - customType: "proof-log", - content: renderTaskEvidenceForHuman(task), - display: true, - details: { taskId: task.id }, - }); - } - - function getLgtmTaskLabel(task: Task): string { - const tag = - task.status === "completed" - ? "[DONE] " - : task.status === "in_progress" - ? "[ACTIVE] " - : "[PENDING] "; - return `${tag}#${task.id} ${task.subject}`; - } - - async function viewEvidence( - taskId: string, - ctx: ExtensionCommandContext, - ): Promise { - const task = store.get(taskId); - if (!task) { - ctx.ui.notify(`Task #${taskId} not found`, "error"); - return; - } - showProofLog(task); - } - - async function viewAllOpenProofLogs( - ctx: ExtensionCommandContext, - ): Promise { - const open = store.list().filter((t) => t.status !== "completed"); - if (open.length === 0) { - ctx.ui.notify("No open tasks to inspect.", "info"); - return; - } - for (const task of open) showProofLog(task); - } + // ── /lgtm command ────────────────────────────────── pi.registerCommand("lgtm", { - description: - "View the proof log and judge notes. /lgtm [...] shows specific tasks; /lgtm * shows all open tasks; task management lives in /tasks.", - handler: async (args: string, ctx: ExtensionCommandContext) => { - const parsed = parseLgtmArgs(args); - if (parsed.kind === "error") { - ctx.ui.notify(parsed.message, "error"); + description: "View proof log for tasks", + handler: async (args: string, commandCtx: ExtensionCommandContext) => { + const spec = parseLgtmArgs(args); + const ui = commandCtx.ui; + + if (spec.kind === "error") { + await ui.select("Error", [spec.message, "← Back"]); return; } - if (parsed.kind === "menu") { - const tasks = store.list(); - const choice = await ctx.ui.select("LGTM", [ - "View all open proof logs", - ...tasks.map(getLgtmTaskLabel), - "← Cancel", - ]); - if (!choice || choice === "← Cancel") return; - if (choice === "View all open proof logs") - return viewAllOpenProofLogs(ctx); - const match = choice.match(/#(\d+)/); - if (match) return viewEvidence(match[1], ctx); - return; - } - if (parsed.kind === "view_all") return viewAllOpenProofLogs(ctx); - for (const id of parsed.ids) await viewEvidence(id, ctx); - }, - getArgumentCompletions: (args: string) => { - const trimmed = args.trim(); + const tasks = store.list(); - if (!trimmed) return [{ value: "*", label: "*" }]; - const prefix = trimmed.replace(/^#/, ""); - return [ - "*", - ...tasks - .filter((task) => task.id.startsWith(prefix)) - .map((task) => task.id), - ].map((value) => ({ value, label: value })); + if (tasks.length === 0) { + await ui.select("No tasks", ["← Back"]); + return; + } + + if (spec.kind === "menu") { + const items = tasks.map((t) => renderTaskOneLine(t)); + const choice = await ui.select("Proof logs", [...items, "← Back"]); + if (!choice || choice === "← Back") return; + const idx = items.indexOf(choice); + if (idx >= 0 && idx < tasks.length) { + await ui.select(`Task #${tasks[idx].id}`, [renderLgtmLog(tasks[idx]), "← Back"]); + } + return; + } + + if (spec.kind === "view_all") { + const logs = tasks.map(renderLgtmLog).join("\n---\n"); + await ui.select("All proof logs", [logs, "← Back"]); + return; + } + + // View specific IDs + const selected = spec.ids + .map((id) => store.get(id)) + .filter((t): t is Task => t !== undefined); + if (selected.length === 0) { + await ui.select("Not found", ["No matching tasks", "← Back"]); + return; + } + const logs = selected.map(renderLgtmLog).join("\n---\n"); + await ui.select("Proof logs", [logs, "← Back"]); }, }); + + // ── Turn lifecycle ────────────────────────────────── + + pi.on("turn_start", async (_event: any, turnCtx: any) => { + onTurnStart(cadence); + if (turnCtx?.ui) widget.setUICtx(turnCtx.ui as UICtx); + if (autoClear.onTurnStart(cadence.currentTurn)) widget.update(); + }); + + pi.on("tool_result", async (event: any) => { + const isTaskTool = TASK_TOOL_NAMES.has(event.toolName); + if ( + !isTaskTool && + cadence.currentTurn - cadence.lastTaskToolUseTurn < cadenceConfig.reminderInterval + ) { + return {}; + } + if (!isTaskTool && cadence.reminderInjectedThisCycle) return {}; + + const hasTasks = isTaskTool ? false : store.list().length > 0; + evaluateToolResult(cadence, event.toolName, hasTasks, cadenceConfig); + return {}; + }); + + pi.on("context", async (event: any) => { + if (!drainReminderForContext(cadence)) return {}; + return { + messages: [ + ...event.messages, + { + role: "user" as const, + content: [{ type: "text" as const, text: SYSTEM_REMINDER }], + timestamp: Date.now(), + }, + ], + }; + }); +} + +// Re-export cadence helpers for use in the extension +function onTurnStart(state: CadenceState): void { + state.currentTurn++; } diff --git a/src/reminder-cadence.ts b/src/reminder-cadence.ts new file mode 100644 index 0000000..3a41a7b --- /dev/null +++ b/src/reminder-cadence.ts @@ -0,0 +1,66 @@ +/** + * reminder-cadence.ts — Pure cadence logic for system-reminder injection. + * + * When the agent hasn't used task tools for N turns, inject a reminder + * to keep working toward active goals. Ported from pi-tasks. + */ + +export interface CadenceState { + currentTurn: number; + lastTaskToolUseTurn: number; + reminderInjectedThisCycle: boolean; + reminderDue: boolean; +} + +export interface CadenceConfig { + reminderInterval: number; + taskToolNames: ReadonlySet; +} + +export function createCadenceState(): CadenceState { + return { + currentTurn: 0, + lastTaskToolUseTurn: 0, + reminderInjectedThisCycle: false, + reminderDue: false, + }; +} + +export function resetCadenceState(state: CadenceState): void { + state.currentTurn = 0; + state.lastTaskToolUseTurn = 0; + state.reminderInjectedThisCycle = false; + state.reminderDue = false; +} + +export function onTurnStart(state: CadenceState): void { + state.currentTurn++; +} + +export function evaluateToolResult( + state: CadenceState, + toolName: string, + hasTasks: boolean, + config: CadenceConfig, +): void { + if (config.taskToolNames.has(toolName)) { + state.lastTaskToolUseTurn = state.currentTurn; + state.reminderInjectedThisCycle = false; + state.reminderDue = false; + return; + } + + if (state.currentTurn - state.lastTaskToolUseTurn < config.reminderInterval) return; + if (state.reminderInjectedThisCycle) return; + if (!hasTasks) return; + + state.reminderDue = true; +} + +export function drainReminderForContext(state: CadenceState): boolean { + if (!state.reminderDue) return false; + state.reminderDue = false; + state.reminderInjectedThisCycle = true; + state.lastTaskToolUseTurn = state.currentTurn; + return true; +} diff --git a/src/review-badges.ts b/src/review-badges.ts deleted file mode 100644 index da34a51..0000000 --- a/src/review-badges.ts +++ /dev/null @@ -1,86 +0,0 @@ -import { getLatestRobotReview } from "./robot-review.js"; -import type { Task } from "./types.js"; - -function hasCurrentEvidence(task: Task): boolean { - return ( - typeof task.metadata?.lgtm_evidence === "string" && - task.metadata.lgtm_evidence.length > 0 - ); -} - -function hasEvidenceHistory(task: Task): boolean { - return ( - Array.isArray(task.metadata?.lgtm_history) && - task.metadata.lgtm_history.length > 0 - ); -} - -export type DisplayStatus = "in_progress" | "pending" | "completed"; - -export function getDisplayStatus(task: Task): DisplayStatus { - return task.status; -} - -export type CompletionMode = "direct" | "proof"; -export type ReviewState = - | "no_claim" - | "claim_submitted" - | "reviewer_failed_to_run" - | "reviewer_rejected" - | "reviewer_accepted" - | "superseded" - | "completed"; -export function getCompletionMode(task: Task): CompletionMode { - return task.parentId ? "direct" : "proof"; -} - -export function getReviewState(task: Task): ReviewState { - if (task.status === "completed") return "completed"; - const latest = getLatestRobotReview(task); - if (latest && !latest.accepted) return "reviewer_rejected"; - if (latest?.accepted) return "reviewer_accepted"; - if (typeof task.metadata?.robot_review_last_error === "string") - return "reviewer_failed_to_run"; - if (hasCurrentEvidence(task)) return "claim_submitted"; - if (hasEvidenceHistory(task)) return "superseded"; - return "no_claim"; -} - -export function needsProofAttention(task: Task): boolean { - if (task.parentId || task.status === "completed") return false; - const state = getReviewState(task); - return ( - state === "reviewer_rejected" || - state === "reviewer_accepted" || - state === "reviewer_failed_to_run" - ); -} - -export function getGateStatus(task: Task): string { - const state = getReviewState(task); - if (task.parentId) { - return task.status === "completed" - ? "completed directly as subtask" - : "subtask: direct completion allowed"; - } - if (task.status === "completed") { - if (typeof task.metadata?.robot_review_last_error === "string") { - return `completed with reviewer unavailable: ${task.metadata.robot_review_last_error}`; - } - if (getLatestRobotReview(task)?.accepted) - return "completed after accepted proof review"; - return "completed"; - } - if (state === "no_claim") - return "top-level task requires TaskClaimDone evidence before completion"; - if (state === "reviewer_accepted") - return "review accepted; task should be completed"; - if (state === "reviewer_failed_to_run") { - return `review unavailable; autonomy continues: ${task.metadata.robot_review_last_error}`; - } - if (state === "reviewer_rejected") - return "latest proof review rejected the evidence; strengthen the proof and try again"; - if (state === "superseded") - return "current evidence superseded, waiting for a new proof claim"; - return "proof claim submitted, automatic review still required"; -} diff --git a/src/robot-review.ts b/src/robot-review.ts deleted file mode 100644 index 9c2a43c..0000000 --- a/src/robot-review.ts +++ /dev/null @@ -1,311 +0,0 @@ -import type { Task } from "./types.js"; - -export type RobotReviewMode = "manual" | "auto"; - -export interface RobotReviewRecord { - iteration: number; - reviewer: string; - scope: string; - reason?: string; - observations: string[]; - concerns: string[]; - suggestions: string[]; - blind_spots: string; - accepted: boolean; - evidence_complete: boolean; - evidence_convincing: boolean; - missing_evidence: string[]; - submitted_at: string; - mode: RobotReviewMode; - raw_output?: string; - rubric?: Record; -} - -function toStringArray(value: unknown): string[] { - return Array.isArray(value) - ? value.filter((item): item is string => typeof item === "string") - : []; -} - -function extractRubric( - value: unknown, -): Record | undefined { - if (!value || typeof value !== "object") return undefined; - const r: Record = {}; - for (const [key, val] of Object.entries(value as Record)) { - if ( - val && - typeof val === "object" && - "reason" in (val as any) && - "pass" in (val as any) - ) { - const v = val as { reason: unknown; pass: unknown }; - r[key] = { - reason: typeof v.reason === "string" ? v.reason : "", - pass: v.pass === true, - }; - } - } - return Object.keys(r).length > 0 ? r : undefined; -} - -function normalizeReview( - value: unknown, - index: number, -): RobotReviewRecord | undefined { - if (!value || typeof value !== "object") return undefined; - const review = value as Record; - const reviewer = - typeof review.reviewer === "string" ? review.reviewer : "unknown"; - const scope = typeof review.scope === "string" ? review.scope : "unknown"; - const observations = toStringArray(review.observations); - if (observations.length === 0) return undefined; - return { - iteration: - typeof review.iteration === "number" ? review.iteration : index + 1, - reviewer, - scope, - reason: - typeof review.reason === "string" ? review.reason : undefined, - observations, - concerns: toStringArray(review.concerns), - suggestions: toStringArray(review.suggestions), - blind_spots: - typeof review.blind_spots === "string" - ? review.blind_spots - : "not recorded", - accepted: - typeof review.accepted === "boolean" - ? review.accepted - : (typeof review.evidence_complete === "boolean" - ? review.evidence_complete - : true) && - (typeof review.evidence_convincing === "boolean" - ? review.evidence_convincing - : true), - evidence_complete: - typeof review.evidence_complete === "boolean" - ? review.evidence_complete - : true, - evidence_convincing: - typeof review.evidence_convincing === "boolean" - ? review.evidence_convincing - : true, - missing_evidence: toStringArray(review.missing_evidence), - submitted_at: - typeof review.submitted_at === "string" - ? review.submitted_at - : new Date(0).toISOString(), - mode: review.mode === "auto" ? "auto" : "manual", - raw_output: - typeof review.raw_output === "string" ? review.raw_output : undefined, - rubric: extractRubric(review.rubric), - }; -} - -function getLegacyRobotReview(task: Task): RobotReviewRecord | undefined { - const observations = toStringArray(task.metadata?.robot_review_observations); - if (observations.length === 0) return undefined; - return { - iteration: 1, - reviewer: - typeof task.metadata?.robot_review_reviewer === "string" - ? task.metadata.robot_review_reviewer - : "unknown", - scope: - typeof task.metadata?.robot_review_scope === "string" - ? task.metadata.robot_review_scope - : "unknown", - reason: - typeof task.metadata?.robot_review_reason === "string" - ? task.metadata.robot_review_reason - : undefined, - observations, - concerns: toStringArray(task.metadata?.robot_review_concerns), - suggestions: toStringArray(task.metadata?.robot_review_suggestions), - blind_spots: - typeof task.metadata?.robot_review_blind_spots === "string" - ? task.metadata.robot_review_blind_spots - : "not recorded", - accepted: - typeof task.metadata?.robot_review_accepted === "boolean" - ? task.metadata.robot_review_accepted - : (typeof task.metadata?.robot_review_evidence_complete === "boolean" - ? task.metadata.robot_review_evidence_complete - : true) && - (typeof task.metadata?.robot_review_evidence_convincing === "boolean" - ? task.metadata.robot_review_evidence_convincing - : true), - evidence_complete: - typeof task.metadata?.robot_review_evidence_complete === "boolean" - ? task.metadata.robot_review_evidence_complete - : true, - evidence_convincing: - typeof task.metadata?.robot_review_evidence_convincing === "boolean" - ? task.metadata.robot_review_evidence_convincing - : true, - missing_evidence: toStringArray( - task.metadata?.robot_review_missing_evidence, - ), - submitted_at: - typeof task.metadata?.robot_review_submitted_at === "string" - ? task.metadata.robot_review_submitted_at - : new Date(0).toISOString(), - mode: task.metadata?.robot_review_mode === "auto" ? "auto" : "manual", - raw_output: - typeof task.metadata?.robot_review_raw_output === "string" - ? task.metadata.robot_review_raw_output - : undefined, - }; -} - -export function getRobotReviews(task: Task): RobotReviewRecord[] { - const reviews = Array.isArray(task.metadata?.robot_reviews) - ? task.metadata.robot_reviews - .map((review: unknown, index: number) => normalizeReview(review, index)) - .filter((review): review is RobotReviewRecord => review !== undefined) - : []; - if (reviews.length > 0) { - return reviews.map((review, index) => ({ - ...review, - iteration: index + 1, - })); - } - const legacy = getLegacyRobotReview(task); - return legacy ? [legacy] : []; -} - -export function getLatestRobotReview( - task: Task, -): RobotReviewRecord | undefined { - const reviews = getRobotReviews(task); - return reviews.length > 0 ? reviews[reviews.length - 1] : undefined; -} - -function hasNonEmptyString(value: unknown): boolean { - return typeof value === "string" && value.trim().length > 0; -} - -export function hasCompleteProofClaim(task: Task): boolean { - const metadata = task.metadata ?? {}; - return ( - [ - metadata.lgtm_evidence, - metadata.lgtm_failure_likely, - metadata.lgtm_failure_sneaky, - metadata.lgtm_failure_unknown, - metadata.lgtm_falsification_test, - metadata.lgtm_evidence_reasoning, - metadata.lgtm_remaining_uncertainty, - ].every(hasNonEmptyString) && - Array.isArray(metadata.lgtm_verification_hints) && - metadata.lgtm_verification_hints.some(hasNonEmptyString) - ); -} - -export function shouldCompleteAfterAcceptedReview( - task: Task, - reviewAccepted: boolean, -): boolean { - return reviewAccepted && hasCompleteProofClaim(task); -} - -export function relaxAdvisoryVerificationHints( - review: Omit, -): Omit { - const rubric = review.rubric; - if (!rubric || review.evidence_complete !== true) return review; - const requiredCoreKeys = [ - "evidence_covers_done_criterion", - "falsification_test_runnable", - ]; - if (!requiredCoreKeys.every((key) => rubric[key]?.pass === true)) - return review; - const failedKeys = Object.entries(rubric) - .filter(([, item]) => item.pass !== true) - .map(([key]) => key); - const advisoryKeys = [ - "failure_modes_addressed", - "evidence_distinguishes_success", - "verification_hints_actionable", - ]; - if ( - failedKeys.length === 0 || - !failedKeys.every((key) => advisoryKeys.includes(key)) - ) - return review; - - const advisoryNotes: string[] = []; - if (failedKeys.includes("failure_modes_addressed")) { - advisoryNotes.push( - "Failure-mode writeup was weak, but treated as advisory because the verbatim evidence already covered the done criterion.", - ); - } - if (failedKeys.includes("evidence_distinguishes_success")) { - advisoryNotes.push( - "Why-this-proves-it reasoning was weak, but treated as advisory because the packet already contained direct success evidence.", - ); - } - if (failedKeys.includes("verification_hints_actionable")) { - advisoryNotes.push( - "Verification hints were weak, but treated as advisory because the verbatim evidence already covered the done criterion.", - ); - } - - return { - ...review, - accepted: true, - evidence_convincing: true, - observations: [...review.observations, ...advisoryNotes], - concerns: review.concerns, - suggestions: review.suggestions, - missing_evidence: review.missing_evidence.filter( - (item) => - !advisoryKeys.includes(item) && - !/verification hint/i.test(item) && - !/failure[- ]?mode/i.test(item) && - !/distinguish/i.test(item), - ), - }; -} - -export function appendRobotReviewMetadata( - task: Task, - review: Omit, -): Record { - const robot_reviews = [ - ...getRobotReviews(task), - { ...review, iteration: 0 }, - ].map((entry, index) => ({ - ...entry, - accepted: entry.accepted, - iteration: index + 1, - })); - const latest = robot_reviews[robot_reviews.length - 1]; - return { - robot_reviews, - robot_review_reviewer: latest.reviewer, - robot_review_scope: latest.scope, - robot_review_observations: latest.observations, - robot_review_concerns: latest.concerns, - robot_review_suggestions: latest.suggestions, - robot_review_blind_spots: latest.blind_spots, - robot_review_accepted: latest.accepted, - robot_review_evidence_complete: latest.evidence_complete, - robot_review_evidence_convincing: latest.evidence_convincing, - robot_review_missing_evidence: latest.missing_evidence, - robot_review_submitted_at: latest.submitted_at, - robot_review_mode: latest.mode, - robot_review_reason: latest.reason ?? null, - robot_review_raw_output: latest.raw_output ?? null, - robot_review_requires_followup: !( - latest.evidence_complete && latest.evidence_convincing - ), - robot_review_iteration_count: robot_reviews.length, - }; -} - -export function latestRobotReviewPasses(task: Task): boolean { - const latest = getLatestRobotReview(task); - return latest ? latest.accepted : false; -} diff --git a/src/task-store.ts b/src/task-store.ts index dd4bde6..757dc4c 100644 --- a/src/task-store.ts +++ b/src/task-store.ts @@ -126,11 +126,12 @@ export class TaskStore { create( subject: string, - description: string, - done_criterion: string, + done_criterion?: string, + failure_mode?: string, progress_label?: string, metadata?: Record, parentId?: string, + description?: string, ): Task { return this.withLock(() => { if (parentId && !this.tasks.has(parentId)) @@ -140,7 +141,8 @@ export class TaskStore { id: String(this.nextId++), subject, description, - done_criterion, + done_criterion: done_criterion || undefined, + failure_mode, parentId, status: "pending", progress_label, @@ -174,6 +176,7 @@ export class TaskStore { subject?: string; description?: string; done_criterion?: string; + failure_mode?: string; progress_label?: string; metadata?: Record; parentId?: string | null; @@ -188,14 +191,6 @@ export class TaskStore { const changedFields: string[] = []; const warnings: string[] = []; - // Subtasks are normal checklist items. Top-level tasks are goals and need a proof - // claim plus automatic review; TaskClaimDone is the only agent path that completes them. - if (fields.status === "completed" && !task.parentId) { - throw new Error( - `Top-level task #${id} requires proof. Use TaskClaimDone with evidence and failure modes; subtasks can be completed directly.`, - ); - } - if (fields.status === "deleted") { this.tasks.delete(id); for (const t of this.tasks.values()) { @@ -213,14 +208,18 @@ export class TaskStore { task.subject = fields.subject; changedFields.push("subject"); } - if (fields.description !== undefined) { - task.description = fields.description; - changedFields.push("description"); - } if (fields.done_criterion !== undefined) { task.done_criterion = fields.done_criterion; changedFields.push("done_criterion"); } + if (fields.failure_mode !== undefined) { + task.failure_mode = fields.failure_mode; + changedFields.push("failure_mode"); + } + if (fields.description !== undefined) { + task.description = fields.description; + changedFields.push("description"); + } if (fields.progress_label !== undefined) { task.progress_label = fields.progress_label; changedFields.push("progress_label"); diff --git a/src/tasks-config.ts b/src/tasks-config.ts index 3c9baee..f7899b3 100644 --- a/src/tasks-config.ts +++ b/src/tasks-config.ts @@ -1,12 +1,13 @@ // /.pi/tasks-config.json — persists extension settings across sessions -import { mkdirSync, readFileSync, writeFileSync } from "node:fs"; -import { dirname, join } from "node:path"; +import { readFileSync } from "node:fs"; +import { join } from "node:path"; export interface TasksConfig { taskScope?: "memory" | "session" | "project"; // default: "session" - autoCascade?: boolean; // default: false autoClearCompleted?: "never" | "on_list_complete" | "on_task_complete"; // default: "never" + reminderInterval?: number; // turns without task tool use before reminder. default: 4 + clearDelayTurns?: number; // how many turns completed tasks linger. default: 4 } const CONFIG_PATH = join(process.cwd(), ".pi", "tasks-config.json"); @@ -18,8 +19,3 @@ export function loadTasksConfig(): TasksConfig { return {}; } } - -export function saveTasksConfig(config: TasksConfig): void { - mkdirSync(dirname(CONFIG_PATH), { recursive: true }); - writeFileSync(CONFIG_PATH, JSON.stringify(config, null, 2)); -} diff --git a/src/types.ts b/src/types.ts index 220ec9d..f95a824 100644 --- a/src/types.ts +++ b/src/types.ts @@ -1,5 +1,10 @@ /** * types.ts — Type definitions for the task management system. + * + * Three kinds of items, all stored as Task: + * - Goal: has done_criterion + failure_mode. Completes via TaskComplete with evidence. + * - Subtask: has parentId. Just subject. Completes via TaskUpdate. + * - Task: no parentId, no done_criterion. Plain checklist item. Completes via TaskUpdate. */ export type TaskStatus = "pending" | "in_progress" | "completed"; @@ -7,9 +12,10 @@ export type TaskStatus = "pending" | "in_progress" | "completed"; export interface Task { id: string; subject: string; - description: string; - done_criterion: string; // required: what "done" looks like - parentId?: string; // no parent = top-level goal, requires proof claim to complete + description?: string; + done_criterion?: string; + failure_mode?: string; + parentId?: string; status: TaskStatus; progress_label?: string; metadata: Record; diff --git a/src/ui/task-widget.ts b/src/ui/task-widget.ts index ff8aa2f..d647b0d 100644 --- a/src/ui/task-widget.ts +++ b/src/ui/task-widget.ts @@ -8,10 +8,19 @@ * Completed tasks stay in storage but are hidden from the collapsed widget. */ -import { truncateToWidth } from "@mariozechner/pi-tui"; -import { getDisplayStatus } from "../review-badges.js"; +import type { Task } from "../types.js"; import type { TaskStore } from "../task-store.js"; +// Simple truncation fallback +function truncateToWidth(line: string, maxWidth: number): string { + if (line.length <= maxWidth) return line; + return line.slice(0, maxWidth - 1) + "…"; +} + +function getDisplayStatus(task: Task): "in_progress" | "pending" | "completed" { + return task.status; +} + // ---- Types ---- export type Theme = { diff --git a/test/lgtm-command.test.ts b/test/lgtm-command.test.ts index ab84234..aedad20 100644 --- a/test/lgtm-command.test.ts +++ b/test/lgtm-command.test.ts @@ -8,24 +8,22 @@ type RegisteredTool = { type RegisteredCommand = { handler: (args: string, ctx: any) => Promise; - getArgumentCompletions?: (args: string) => Promise; }; function makeHarness() { const tools = new Map(); const commands = new Map(); - const sentMessages: any[] = []; const pi = { on: vi.fn(), + events: { on: vi.fn(() => vi.fn()), emit: vi.fn() }, registerTool: vi.fn((tool: RegisteredTool) => tools.set(tool.name, tool)), registerCommand: vi.fn((name: string, command: RegisteredCommand) => commands.set(name, command), ), - sendMessage: vi.fn((message: any) => sentMessages.push(message)), }; - proofTasksExtension(pi as any); + proofTasksExtension(pi as any, { ui: undefined } as any); async function execTool(name: string, params: Record) { const tool = tools.get(name); @@ -33,22 +31,16 @@ function makeHarness() { return tool.execute("tool-call", params, undefined, undefined, {}); } - function makeUi( - overrides: { - select?: Array; - confirm?: Array; - } = {}, - ) { + function makeUi(overrides: { select?: Array } = {}) { const selectQueue = [...(overrides.select ?? [])]; - const confirmQueue = [...(overrides.confirm ?? [])]; return { notify: vi.fn(), select: vi.fn(async () => selectQueue.shift()), - confirm: vi.fn(async () => confirmQueue.shift() ?? false), + input: vi.fn(async () => ""), }; } - return { tools, commands, sentMessages, execTool, makeUi }; + return { tools, commands, execTool, makeUi }; } describe("parseLgtmArgs", () => { @@ -58,109 +50,28 @@ describe("parseLgtmArgs", () => { expect(parseLgtmArgs("1 #2")).toEqual({ kind: "view", ids: ["1", "2"] }); }); - it("rejects task-management forms", () => { - expect(parseLgtmArgs("clear")).toEqual({ - kind: "error", - message: "Task management lives in /tasks now. /lgtm is viewer-only.", - }); - expect(parseLgtmArgs("clear *")).toEqual({ - kind: "error", - message: "Task management lives in /tasks now. /lgtm is viewer-only.", - }); - expect(parseLgtmArgs("clear #7")).toEqual({ - kind: "error", - message: "Task management lives in /tasks now. /lgtm is viewer-only.", - }); - expect(parseLgtmArgs("delete #7")).toEqual({ - kind: "error", - message: "Task management lives in /tasks now. /lgtm is viewer-only.", - }); + it("treats unknown args as view IDs", () => { + // "clear" and "delete" are just treated as task IDs now + expect(parseLgtmArgs("clear")).toEqual({ kind: "view", ids: ["clear"] }); + expect(parseLgtmArgs("1 2")).toEqual({ kind: "view", ids: ["1", "2"] }); }); }); describe("/lgtm command", () => { - it("shows all open proof logs from the picker", async () => { + it("shows proof logs from picker", async () => { const harness = makeHarness(); await harness.execTool("TaskCreate", { - subject: "Task A", - description: "Desc", - done_criterion: "done", - }); - await harness.execTool("TaskCreate", { - subject: "Task B", - description: "Desc", - done_criterion: "done", + subject: "Goal A", + done_criterion: "test passes", }); - const ui = harness.makeUi({ select: ["View all open proof logs"] }); + const ui = harness.makeUi({ select: ["○★ #1 Goal A", "← Back"] }); const command = harness.commands.get("lgtm"); if (!command) throw new Error("/lgtm not registered"); await command.handler("", { ui }); - expect(harness.sentMessages).toHaveLength(2); - expect(harness.sentMessages[0].customType).toBe("proof-log"); - expect(harness.sentMessages[0].content).toContain("Task #1"); - expect(harness.sentMessages[1].content).toContain("Task #2"); - }); - - it("shows one proof log from the picker", async () => { - const harness = makeHarness(); - await harness.execTool("TaskCreate", { - subject: "Task A", - description: "Desc", - done_criterion: "done", - }); - - const ui = harness.makeUi({ select: ["[PENDING] #1 Task A"] }); - const command = harness.commands.get("lgtm"); - if (!command) throw new Error("/lgtm not registered"); - - await command.handler("", { ui }); - - expect(harness.sentMessages).toHaveLength(1); - expect(harness.sentMessages[0].content).toContain("Task #1"); - }); - - it("rejects /lgtm clear and points task management back to /tasks", async () => { - const harness = makeHarness(); - await harness.execTool("TaskCreate", { - subject: "Task A", - description: "Desc", - done_criterion: "done", - }); - - const ui = harness.makeUi(); - const command = harness.commands.get("lgtm"); - if (!command) throw new Error("/lgtm not registered"); - - await command.handler("clear 1", { ui }); - - expect(harness.sentMessages).toHaveLength(0); - expect(ui.notify).toHaveBeenCalledWith( - "Task management lives in /tasks now. /lgtm is viewer-only.", - "error", - ); - }); - - it("rejects /lgtm delete and points task management back to /tasks", async () => { - const harness = makeHarness(); - await harness.execTool("TaskCreate", { - subject: "Task A", - description: "Desc", - done_criterion: "done", - }); - - const ui = harness.makeUi(); - const command = harness.commands.get("lgtm"); - if (!command) throw new Error("/lgtm not registered"); - - await command.handler("delete 1", { ui }); - - expect(harness.sentMessages).toHaveLength(0); - expect(ui.notify).toHaveBeenCalledWith( - "Task management lives in /tasks now. /lgtm is viewer-only.", - "error", - ); + // Should have shown the task in the select options + expect(ui.select).toHaveBeenCalled(); }); }); diff --git a/test/review-badges.test.ts b/test/review-badges.test.ts deleted file mode 100644 index 6713087..0000000 --- a/test/review-badges.test.ts +++ /dev/null @@ -1,145 +0,0 @@ -import { describe, expect, it } from "vitest"; -import { - getCompletionMode, - getDisplayStatus, - getGateStatus, - getReviewState, -} from "../src/review-badges.js"; -import type { Task } from "../src/types.js"; - -function makeTask(overrides: Partial = {}): Task { - return { - id: "1", - subject: "Test", - description: "Desc", - done_criterion: "done", - status: "pending", - progress_label: undefined, - metadata: {}, - blocks: [], - blockedBy: [], - createdAt: 0, - updatedAt: 0, - ...overrides, - }; -} - -describe("review state helpers", () => { - it("reports completion mode as proof for top-level tasks", () => { - expect(getCompletionMode(makeTask())).toBe("proof"); - }); - - it("reports completion mode as direct for subtasks", () => { - expect(getCompletionMode(makeTask({ parentId: "1" }))).toBe("direct"); - }); - - it("reports superseded when only history remains", () => { - expect( - getReviewState( - makeTask({ metadata: { lgtm_history: [{ iteration: 1 }] } }), - ), - ).toBe("superseded"); - }); -}); - -describe("getGateStatus", () => { - it("reports top-level proof requirement before evidence", () => { - expect(getGateStatus(makeTask())).toBe( - "top-level task requires TaskClaimDone evidence before completion", - ); - }); - - it("reports non-blocking reviewer failure", () => { - expect( - getGateStatus( - makeTask({ - metadata: { - lgtm_evidence: "ok", - robot_review_last_error: "Unexpected token 'a'", - }, - }), - ), - ).toContain("review unavailable; autonomy continues"); - }); - - it("reports rejected robot review when latest review does not accept", () => { - expect( - getGateStatus( - makeTask({ - metadata: { - lgtm_evidence: "ok", - robot_reviews: [ - { - iteration: 1, - reviewer: "opencode", - scope: "task evidence", - observations: ["Observed missing output"], - concerns: ["The current evidence is summary-only."], - suggestions: ["Paste the literal output."], - blind_spots: "none", - accepted: false, - evidence_complete: false, - evidence_convincing: false, - missing_evidence: ["literal output"], - submitted_at: "2026-04-17T00:00:00.000Z", - mode: "manual", - }, - ], - }, - }), - ), - ).toBe( - "latest proof review rejected the evidence; strengthen the proof and try again", - ); - }); - - it("keeps rejection higher priority than a later reviewer warning", () => { - expect( - getGateStatus( - makeTask({ - metadata: { - lgtm_evidence: "ok", - robot_review_last_error: "timeout", - robot_reviews: [ - { - iteration: 1, - reviewer: "opencode", - scope: "task evidence", - observations: ["Observed missing output"], - concerns: ["The current evidence is summary-only."], - suggestions: ["Paste the literal output."], - blind_spots: "none", - accepted: false, - evidence_complete: false, - evidence_convincing: false, - missing_evidence: ["literal output"], - submitted_at: "2026-04-17T00:00:00.000Z", - mode: "manual", - }, - ], - }, - }), - ), - ).toBe( - "latest proof review rejected the evidence; strengthen the proof and try again", - ); - }); -}); - -describe("getDisplayStatus", () => { - it("returns pending for fresh tasks", () => { - expect(getDisplayStatus(makeTask())).toBe("pending"); - }); - - it("returns in_progress for active tasks not yet escalated", () => { - expect(getDisplayStatus(makeTask({ status: "in_progress" }))).toBe( - "in_progress", - ); - }); - - it("returns completed for completed tasks", () => { - expect(getDisplayStatus(makeTask({ status: "completed" }))).toBe( - "completed", - ); - }); -}); diff --git a/test/robot-review-runner.test.ts b/test/robot-review-runner.test.ts deleted file mode 100644 index 6b30957..0000000 --- a/test/robot-review-runner.test.ts +++ /dev/null @@ -1,115 +0,0 @@ -import { describe, expect, it } from "vitest"; -import { - DEFAULT_ROBOT_REVIEW_TIMEOUT_MS, - extractFinalAssistantTextFromPiJsonl, - extractRobotReviewJson, - getCurrentModelRef, - getPiInvocation, - getRobotReviewTimeoutMs, - runRobotReviewCommand, -} from "../src/index.js"; - -describe("robot review runner helpers", () => { - it("uses plain pi by default and allows override", () => { - expect( - getPiInvocation(["--mode", "json"], {} as NodeJS.ProcessEnv), - ).toEqual({ - command: "pi", - args: ["--mode", "json"], - }); - expect( - getPiInvocation(["-p"], { - PI_PROOF_TASKS_PI_BIN: "/custom/pi", - } as NodeJS.ProcessEnv), - ).toEqual({ - command: "/custom/pi", - args: ["-p"], - }); - }); - - it("parses the final assistant text from pi jsonl", () => { - const output = [ - '{"type":"message_update"}', - '{"type":"message_end","message":{"role":"assistant","content":[{"type":"text","text":"ROBOT_REVIEW_JSON_START {\\"accepted\\":true} ROBOT_REVIEW_JSON_END"}]}}', - ].join("\n"); - expect(extractFinalAssistantTextFromPiJsonl(output)).toContain( - "ROBOT_REVIEW_JSON_START", - ); - }); - - it("parses noisy JSON wrapped in review markers", () => { - const output = [ - "ROBOT_REVIEW_JSON_START", - "and here is the JSON you asked for:", - "```json", - '{"accepted":true,"observations":["ok"]}', - "```", - "ROBOT_REVIEW_JSON_END", - ].join("\n"); - expect(extractRobotReviewJson(output)).toEqual({ - accepted: true, - observations: ["ok"], - }); - }); - - it("includes raw output context on parse failure", () => { - expect(() => - extractRobotReviewJson( - "ROBOT_REVIEW_JSON_START and nope ROBOT_REVIEW_JSON_END", - ), - ).toThrow(/Raw output:/); - }); - - it("uses configured timeout or falls back to default", () => { - expect( - getRobotReviewTimeoutMs({ - PI_PROOF_TASKS_ROBOT_REVIEW_TIMEOUT_MS: "2500", - } as NodeJS.ProcessEnv), - ).toBe(2500); - expect( - getRobotReviewTimeoutMs({ - PI_PROOF_TASKS_ROBOT_REVIEW_TIMEOUT_MS: "bad", - } as NodeJS.ProcessEnv), - ).toBe(DEFAULT_ROBOT_REVIEW_TIMEOUT_MS); - }); - - it("formats the current model as the reviewer model ref", () => { - expect(getCurrentModelRef({ provider: "openai", id: "gpt-5" })).toBe( - "openai/gpt-5", - ); - expect( - getCurrentModelRef({ providerId: "anthropic", modelId: "claude-haiku" }), - ).toBe("anthropic/claude-haiku"); - expect(getCurrentModelRef({ provider: "openai" })).toBeUndefined(); - }); - - it("times out bounded child commands", async () => { - await expect( - runRobotReviewCommand( - { - command: process.execPath, - args: ["-e", "setTimeout(() => {}, 1000)"], - }, - undefined, - 25, - ), - ).rejects.toThrow(/timed out/i); - }); - - it("extracts assistant text from a child jsonl process", async () => { - const script = [ - "process.stdout.write(JSON.stringify({type:'message_update'}) + '\\n');", - "process.stdout.write(JSON.stringify({type:'message_end',message:{role:'assistant',content:[{type:'text',text:'ROBOT_REVIEW_JSON_START {\\\"accepted\\\":true,\\\"observations\\\":[\\\"ok\\\"]} ROBOT_REVIEW_JSON_END'}]}}) + '\\n');", - ].join(""); - const result = await runRobotReviewCommand( - { - command: process.execPath, - args: ["-e", script], - }, - undefined, - 500, - ); - expect(result.exitCode).toBe(0); - expect(result.stdout).toContain("ROBOT_REVIEW_JSON_END"); - }); -}); diff --git a/test/robot-review.test.ts b/test/robot-review.test.ts deleted file mode 100644 index 362281a..0000000 --- a/test/robot-review.test.ts +++ /dev/null @@ -1,442 +0,0 @@ -import { mkdtempSync, writeFileSync } from "node:fs"; -import { tmpdir } from "node:os"; -import { join } from "node:path"; -import { describe, expect, it } from "vitest"; -import { - archiveCurrentEvidence, - buildArtifactRecords, - buildRobotReviewPrompt, - getCurrentEvidenceIteration, - getEvidenceHistory, - renderEvidencePacket, - renderProofLog, -} from "../src/index.js"; -import { - appendRobotReviewMetadata, - getLatestRobotReview, - getRobotReviews, - hasCompleteProofClaim, - relaxAdvisoryVerificationHints, - shouldCompleteAfterAcceptedReview, -} from "../src/robot-review.js"; -import type { Task } from "../src/types.js"; - -function makeTask(overrides: Partial = {}): Task { - return { - id: "1", - subject: "Test", - description: "Desc", - done_criterion: "done", - status: "pending", - progress_label: undefined, - metadata: {}, - blocks: [], - blockedBy: [], - createdAt: 0, - updatedAt: 0, - ...overrides, - }; -} - -describe("robot review helpers", () => { - it("completes only after accepted review and complete proof claim", () => { - const task = makeTask({ - metadata: { - lgtm_evidence: "literal output", - lgtm_failure_likely: "wrong command", - lgtm_failure_sneaky: "right output for wrong reason", - lgtm_failure_unknown: "untested platform", - lgtm_falsification_test: "npm test\npass", - lgtm_evidence_reasoning: - "the test output rules out the named failures for this scope", - lgtm_verification_hints: [ - "test/robot-review.test.ts shows the expectation", - ], - lgtm_remaining_uncertainty: "does not test prod install", - }, - }); - expect(hasCompleteProofClaim(task)).toBe(true); - expect(shouldCompleteAfterAcceptedReview(task, true)).toBe(true); - expect(shouldCompleteAfterAcceptedReview(task, false)).toBe(false); - expect( - shouldCompleteAfterAcceptedReview( - makeTask({ metadata: { lgtm_evidence: "literal output" } }), - true, - ), - ).toBe(false); - }); - - it("reads legacy single-review metadata", () => { - const task = makeTask({ - metadata: { - robot_review_reviewer: "opencode", - robot_review_scope: "task evidence", - robot_review_observations: [ - "Observed no command output for the core claim", - ], - robot_review_blind_spots: "Did not rerun tests", - robot_review_submitted_at: "2026-04-17T00:00:00.000Z", - }, - }); - - const reviews = getRobotReviews(task); - expect(reviews).toHaveLength(1); - expect(reviews[0].reviewer).toBe("opencode"); - expect(reviews[0].iteration).toBe(1); - expect(reviews[0].accepted).toBe(true); - }); - - it("builds artifact records with absolute path and sha256", () => { - const dir = mkdtempSync(join(tmpdir(), "pi-proof-tasks-")); - const path = join(dir, "evidence.log"); - writeFileSync(path, "hello\n"); - - const [artifact] = buildArtifactRecords([path]); - expect(artifact.path).toBe(path); - expect(artifact.bytes).toBe(6); - expect(artifact.sha256).toHaveLength(64); - }); - - it("archives current evidence with reason", () => { - const task = makeTask({ - metadata: { - lgtm_evidence: "literal output", - lgtm_failure_likely: "wrong seed", - lgtm_failure_sneaky: "wrong threshold", - lgtm_failure_unknown: "untested environment", - lgtm_falsification_test: "pytest -k check", - lgtm_evidence_reasoning: - "pytest output distinguishes the expected passing path from the named failures", - lgtm_verification_hints: ["see line 5"], - lgtm_remaining_uncertainty: "not load tested", - lgtm_submitted_at: "2026-06-07T00:00:00.000Z", - lgtm_commands: [{ cmd: "pytest", exit_code: 0 }], - }, - }); - - const archived = archiveCurrentEvidence(task, "threshold changed"); - const taskWithHistory = makeTask({ metadata: archived }); - expect(getCurrentEvidenceIteration(task)?.iteration).toBe(1); - expect(getEvidenceHistory(taskWithHistory)).toHaveLength(1); - expect(getEvidenceHistory(taskWithHistory)[0].supersede_reason).toBe( - "threshold changed", - ); - }); - - it("treats advisory rubric failures as non-blocking when core evidence already passes", () => { - const review = relaxAdvisoryVerificationHints({ - reviewer: "auto", - scope: "task evidence", - observations: ["Observed commit, push, and test logs"], - concerns: [], - suggestions: [], - blind_spots: "Did not inspect interactive UI", - accepted: false, - evidence_complete: true, - evidence_convincing: false, - missing_evidence: [ - "verification_hints_actionable", - "evidence_distinguishes_success", - ], - submitted_at: "2026-06-13T00:00:00.000Z", - mode: "auto", - rubric: { - evidence_covers_done_criterion: { - reason: "verbatim logs match", - pass: true, - }, - falsification_test_runnable: { - reason: "command and output shown", - pass: true, - }, - failure_modes_addressed: { - reason: "plausible top risks named", - pass: true, - }, - evidence_distinguishes_success: { - reason: "reasoning writeup is thin", - pass: false, - }, - verification_hints_actionable: { - reason: "paths are vague", - pass: false, - }, - }, - }); - - expect(review.accepted).toBe(true); - expect(review.evidence_convincing).toBe(true); - expect( - review.observations.some((item) => item.includes("treated as advisory")), - ).toBe(true); - expect(review.missing_evidence).toEqual([]); - }); - - it("does not relax verification hints unless the core rubric passes", () => { - const review = relaxAdvisoryVerificationHints({ - reviewer: "auto", - scope: "task evidence", - observations: ["Observed vague summary only"], - concerns: [], - suggestions: [], - blind_spots: "Did not rerun tests", - accepted: false, - evidence_complete: true, - evidence_convincing: false, - missing_evidence: ["verification_hints_actionable"], - submitted_at: "2026-06-13T00:00:00.000Z", - mode: "auto", - rubric: { - evidence_covers_done_criterion: { reason: "summary only", pass: false }, - falsification_test_runnable: { - reason: "command and output shown", - pass: true, - }, - failure_modes_addressed: { - reason: "plausible top risks named", - pass: true, - }, - evidence_distinguishes_success: { - reason: "evidence does not rule out summary-only failure", - pass: false, - }, - verification_hints_actionable: { - reason: "paths are vague", - pass: false, - }, - }, - }); - - expect(review.accepted).toBe(false); - expect(review.evidence_convincing).toBe(false); - }); - - it("renders one compact evidence packet for both human and robot review", () => { - const task = makeTask({ - metadata: { - lgtm_evidence: "literal output", - lgtm_failure_likely: "wrong seed", - lgtm_failure_sneaky: "wrong threshold", - lgtm_failure_unknown: "does not test UI rendering", - lgtm_falsification_test: "pytest -k check\nPASSED", - lgtm_evidence_reasoning: - "The passing pytest transcript distinguishes success from wrong-threshold and wrong-seed failures for this test scope.", - lgtm_verification_hints: [ - "test/robot-review.test.ts contains the new guard test", - ], - lgtm_remaining_uncertainty: "not load tested", - lgtm_submitted_at: "2026-06-14T00:00:00.000Z", - lgtm_commands: [ - { cmd: "npm test", exit_code: 0, stdout_path: "/tmp/test.log" }, - ], - lgtm_evidence_artifacts: [ - { path: "/tmp/test.log", sha256: "abc", bytes: 123 }, - ], - }, - }); - - const packet = renderEvidencePacket(task); - const prompt = buildRobotReviewPrompt(task); - expect(packet).toContain("## Goal"); - expect(packet).toContain("## Attempt 1"); - expect(packet).toContain("### Evidence"); - expect(packet).toContain("### Verify"); - expect(prompt).toContain(packet); - expect(prompt).toContain( - "does this packet prove the exact user-visible success condition", - ); - expect(prompt).toContain( - "Do not reject solely because items 3, 4, or 5 are weak", - ); - expect(prompt).toContain( - "concrete missing artifacts or outputs that block acceptance", - ); - }); - - it("truncates long submitted evidence in the rendered proof log and points to the full artifact", () => { - const longEvidence = Array.from( - { length: 35 }, - (_, i) => `line ${i + 1}`, - ).join("\n"); - const task = makeTask({ - metadata: { - lgtm_evidence: longEvidence, - lgtm_failure_likely: "wrong seed", - lgtm_failure_sneaky: "wrong threshold", - lgtm_failure_unknown: "untested environment", - lgtm_falsification_test: "pytest -k check\nPASSED", - lgtm_evidence_reasoning: - "The transcript rules out the named failures for this scope.", - lgtm_verification_hints: ["see /tmp/test.log"], - lgtm_remaining_uncertainty: "not load tested", - lgtm_submitted_at: "2026-06-14T00:00:00.000Z", - lgtm_evidence_artifacts: [ - { path: "/tmp/test.log", sha256: "abc", bytes: 123 }, - ], - }, - }); - - const log = renderProofLog(task); - expect(log).toContain("line 1"); - expect(log).toContain("line 8"); - expect(log).toContain("line 35"); - expect(log).not.toContain("line 9"); - expect(log).toContain("[... 19 middle lines omitted ...]"); - expect(log).toContain( - "[truncated at 16 lines from 35; showing first 8 and last 8; full text: /tmp/test.log]", - ); - }); - - it("appends robot reviews as iterations", () => { - const task = makeTask(); - const metadata1 = appendRobotReviewMetadata(task, { - reviewer: "opencode", - scope: "task evidence", - observations: ["Observed missing benchmark output"], - concerns: ["The current evidence does not show the claimed speedup."], - suggestions: ["Add the benchmark transcript for the claimed speedup."], - blind_spots: "Did not inspect prod config", - accepted: false, - evidence_complete: false, - evidence_convincing: false, - missing_evidence: ["Benchmark output for the claimed speedup"], - submitted_at: "2026-04-17T00:00:00.000Z", - mode: "auto", - }); - const task1 = makeTask({ metadata: metadata1 }); - const metadata2 = appendRobotReviewMetadata(task1, { - reviewer: "opencode", - scope: "updated task evidence", - observations: ["Observed benchmark output and test transcript"], - concerns: [], - suggestions: [], - blind_spots: "Did not inspect long-run stability", - accepted: true, - evidence_complete: true, - evidence_convincing: true, - missing_evidence: [], - submitted_at: "2026-04-17T01:00:00.000Z", - mode: "auto", - }); - - const task2 = makeTask({ metadata: metadata2 }); - const reviews = getRobotReviews(task2); - expect(reviews).toHaveLength(2); - expect(reviews[0].iteration).toBe(1); - expect(reviews[1].iteration).toBe(2); - expect(getLatestRobotReview(task2)?.evidence_convincing).toBe(true); - expect(task2.metadata.robot_review_iteration_count).toBe(2); - }); - - it("renders a simple proof log with judgement and suggestions", () => { - const taskWithEvidence = makeTask({ - metadata: { - lgtm_evidence: "npm test\n125 passed", - lgtm_failure_likely: "old package name still in README", - lgtm_failure_sneaky: "top-level direct completion still slips through", - lgtm_failure_unknown: "fresh judge command fails in a real session", - lgtm_falsification_test: "npm test\n125 passed", - lgtm_evidence_reasoning: - "The test transcript and grep distinguish the intended behavior from stale workflow regressions.", - lgtm_verification_hints: [ - "README.md install block shows pi-proof-tasks", - ], - lgtm_remaining_uncertainty: "Did not exercise every model provider.", - lgtm_submitted_at: "2026-06-14T00:00:00.000Z", - }, - }); - const task = makeTask({ - metadata: { - ...taskWithEvidence.metadata, - ...appendRobotReviewMetadata(taskWithEvidence, { - reviewer: "auto", - scope: "proof log", - observations: ["Observed the test transcript and renamed package."], - concerns: ["The live Pi session path is still untested."], - suggestions: ["Run one self-hosted TaskClaimDone UAT."], - blind_spots: "Did not inspect external auth state", - accepted: false, - evidence_complete: true, - evidence_convincing: false, - missing_evidence: ["self-hosted TaskClaimDone UAT"], - submitted_at: "2026-06-14T00:01:00.000Z", - mode: "auto", - }), - }, - }); - - const log = renderProofLog(task); - expect(log).toContain("# Task #1: Test"); - expect(log).toContain("## Goal"); - expect(log).toContain("## Attempt 1"); - expect(log).toContain("### Evidence"); - expect(log).toContain("### Verify"); - expect(log).toContain("### Judgement"); - expect(log).toContain("Refused by auto"); - expect(log).toContain("Needs:"); - expect(log).toContain("Next:"); - expect(log).toContain("Run one self-hosted TaskClaimDone UAT."); - }); - - it("keeps full submitted evidence in the automatic review packet even when proof logs truncate it", () => { - const artifactPath = join(tmpdir(), "proof-packet-long-evidence.log"); - const longEvidence = Array.from( - { length: 35 }, - (_, i) => `line ${i + 1}`, - ).join("\n"); - writeFileSync(artifactPath, longEvidence); - const task = makeTask({ - metadata: { - lgtm_evidence: longEvidence, - lgtm_failure_likely: "missing artifact", - lgtm_failure_sneaky: "wrong slice shown", - lgtm_failure_unknown: "untested provider path", - lgtm_falsification_test: "npm test\npass", - lgtm_evidence_reasoning: - "The full evidence must stay visible to the judge even if humans see a shortened preview.", - lgtm_verification_hints: [ - "Open the artifact if the inline preview truncates.", - ], - lgtm_remaining_uncertainty: "Did not inspect live TUI.", - lgtm_evidence_artifacts: buildArtifactRecords([artifactPath]), - }, - }); - - const proofLog = renderProofLog(task); - const reviewPacket = renderEvidencePacket(task, { - truncateEvidence: false, - }); - expect(proofLog).toContain("line 8"); - expect(proofLog).toContain("line 35"); - expect(proofLog).not.toContain("line 9"); - expect(reviewPacket).toContain("line 35"); - expect(reviewPacket).not.toContain("[truncated at 16 lines"); - }); - - it("renders reviewer-unavailable proof logs for fail-open completion notes", () => { - const task = makeTask({ - status: "completed", - metadata: { - lgtm_evidence: "npm test\n125 passed", - lgtm_failure_likely: "old package name still in README", - lgtm_failure_sneaky: "top-level direct completion still slips through", - lgtm_failure_unknown: "fresh judge command fails in a real session", - lgtm_falsification_test: "npm test\n125 passed", - lgtm_evidence_reasoning: - "The test transcript and grep distinguish the intended behavior from stale workflow regressions.", - lgtm_verification_hints: [ - "README.md install block shows pi-proof-tasks", - ], - lgtm_remaining_uncertainty: "Did not exercise every model provider.", - robot_review_last_error: "judge auth failed", - }, - }); - - const log = renderProofLog(task); - expect(log).toContain("completed with reviewer unavailable"); - expect(log).toContain("### Judgement"); - expect(log).toContain("judge auth failed"); - expect(log).toContain("Autonomy continued without blocking completion."); - expect(log).not.toContain("Needs:"); - }); -}); diff --git a/test/task-claim-done-flow.test.ts b/test/task-claim-done-flow.test.ts deleted file mode 100644 index 16b5417..0000000 --- a/test/task-claim-done-flow.test.ts +++ /dev/null @@ -1,187 +0,0 @@ -import { chmodSync, mkdtempSync, writeFileSync } from "node:fs"; -import { tmpdir } from "node:os"; -import { join } from "node:path"; -import { afterEach, describe, expect, it, vi } from "vitest"; -import proofTasksExtension from "../src/index.js"; - -type RegisteredTool = { - name: string; - execute: (...args: any[]) => Promise; -}; - -function makeHarness() { - const tools = new Map(); - const pi = { - on: vi.fn(), - registerTool: vi.fn((tool: RegisteredTool) => tools.set(tool.name, tool)), - registerCommand: vi.fn(), - sendMessage: vi.fn(), - }; - - proofTasksExtension(pi as any); - - async function execTool( - name: string, - params: Record, - ctx: Record = {}, - ) { - const tool = tools.get(name); - if (!tool) throw new Error(`Tool ${name} not registered`); - return tool.execute("tool-call", params, undefined, undefined, ctx); - } - - return { execTool }; -} - -function writeReviewerScript(source: string): string { - const dir = mkdtempSync(join(tmpdir(), "pi-proof-reviewer-")); - const path = join(dir, "reviewer.js"); - writeFileSync(path, `#!/usr/bin/env node\n${source}\n`); - chmodSync(path, 0o755); - return path; -} - -const ORIGINAL_PI_BIN = process.env.PI_PROOF_TASKS_PI_BIN; -afterEach(() => { - if (ORIGINAL_PI_BIN === undefined) delete process.env.PI_PROOF_TASKS_PI_BIN; - else process.env.PI_PROOF_TASKS_PI_BIN = ORIGINAL_PI_BIN; -}); - -describe("TaskClaimDone end-to-end proof flow", () => { - it("keeps the task open on rejected review and /lgtm-style TaskGet shows truncated evidence", async () => { - const reviewer = writeReviewerScript(` -const review = { - reviewer: "fake-judge", - scope: "task evidence", - rubric: { - evidence_covers_done_criterion: { reason: "missing one artifact", pass: false }, - falsification_test_runnable: { reason: "ok", pass: true }, - failure_modes_addressed: { reason: "ok", pass: true }, - evidence_distinguishes_success: { reason: "not enough", pass: false }, - verification_hints_actionable: { reason: "ok", pass: true } - }, - observations: ["Observed truncated proof packet"], - concerns: ["Need stronger evidence"], - suggestions: ["Add one more artifact"], - blind_spots: "Did not inspect live TUI", - missing_evidence: ["evidence_covers_done_criterion", "evidence_distinguishes_success"], - evidence_complete: false, - evidence_convincing: false, - accepted: false -}; -console.log("ROBOT_REVIEW_JSON_START"); -console.log(JSON.stringify(review)); -console.log("ROBOT_REVIEW_JSON_END"); -`); - process.env.PI_PROOF_TASKS_PI_BIN = reviewer; - - const harness = makeHarness(); - await harness.execTool("TaskCreate", { - subject: "Proof task", - description: "Desc", - done_criterion: "done", - }); - - const artifactPath = join(tmpdir(), "proof-long-evidence.log"); - const longEvidence = Array.from( - { length: 35 }, - (_, i) => `line ${i + 1}`, - ).join("\n"); - writeFileSync(artifactPath, longEvidence); - - const claim = await harness.execTool( - "TaskClaimDone", - { - taskId: "1", - evidence: longEvidence, - failure_likely: "missing artifact", - failure_sneaky: "right shape for wrong reason", - failure_unknown: "untested provider path", - falsification_test: "npm test\npass", - evidence_reasoning: - "The packet distinguishes the named failures for this test scope.", - verification_hints: ["look at the proof log"], - remaining_uncertainty: "Did not inspect live TUI", - evidence_paths: [artifactPath], - }, - { model: { provider: "openai", id: "gpt-5" } }, - ); - - const claimText = claim.content[0].text; - - const taskGet = await harness.execTool("TaskGet", { taskId: "1" }); - const text = taskGet.content[0].text; - - expect(claimText).toContain("## TaskClaimDone -> Task #1: Proof task"); - expect(claimText).toContain("### Metadata"); - expect(claimText).toContain("- Proof iterations: 1"); - expect(claimText).toContain("- Robot reviews: 1"); - expect(text).toContain("Status: pending"); - expect(text).toContain( - "Gate status: latest proof review rejected the evidence; strengthen the proof and try again", - ); - expect(text).toContain("line 1"); - expect(text).toContain("line 8"); - expect(text).toContain("line 35"); - expect(text).not.toContain("line 9"); - expect(text).toContain("[... 19 middle lines omitted ...]"); - expect(text).toContain( - `[truncated at 16 lines from 35; showing first 8 and last 8; full text: ${artifactPath}]`, - ); - expect(text).toContain("### Judgement"); - expect(text).toContain("Refused"); - expect(text).toContain("Needs:"); - expect(text).toContain("Add one more artifact"); - }); - - it("completes the task fail-open on parse failure and preserves the failure note", async () => { - const reviewer = writeReviewerScript(` -console.log("ROBOT_REVIEW_JSON_START and nope ROBOT_REVIEW_JSON_END"); -`); - process.env.PI_PROOF_TASKS_PI_BIN = reviewer; - - const harness = makeHarness(); - await harness.execTool("TaskCreate", { - subject: "Proof task", - description: "Desc", - done_criterion: "done", - }); - - const claim = await harness.execTool( - "TaskClaimDone", - { - taskId: "1", - evidence: "short evidence", - failure_likely: "missing artifact", - failure_sneaky: "right shape for wrong reason", - failure_unknown: "untested provider path", - falsification_test: "npm test\npass", - evidence_reasoning: - "The packet distinguishes the named failures for this test scope.", - verification_hints: ["look at the proof log"], - remaining_uncertainty: "Did not inspect live TUI", - }, - { model: { provider: "openai", id: "gpt-5" } }, - ); - - const claimText = claim.content[0].text; - - const taskGet = await harness.execTool("TaskGet", { taskId: "1" }); - const text = taskGet.content[0].text; - - expect(claimText).toContain("## TaskClaimDone -> Task #1: Proof task"); - expect(claimText).toContain("### Metadata"); - expect(claimText).toContain( - "- Gate status: completed with reviewer unavailable", - ); - expect(text).toContain("Status: completed"); - expect(text).toContain("completed with reviewer unavailable"); - expect(text).toContain("Raw output:"); - expect(text).toContain("Autonomy continued without blocking completion."); - expect(text).not.toContain("Needs:"); - expect(text).toContain( - "ROBOT_REVIEW_JSON_START and nope ROBOT_REVIEW_JSON_END", - ); - expect(text).toContain("Autonomy continued without blocking completion."); - }); -}); diff --git a/test/task-list-render.test.ts b/test/task-list-render.test.ts index 42842af..9038a35 100644 --- a/test/task-list-render.test.ts +++ b/test/task-list-render.test.ts @@ -3,7 +3,6 @@ import { tmpdir } from "node:os"; import { join } from "node:path"; import { afterEach, describe, expect, it, vi } from "vitest"; import proofTasksExtension from "../src/index.js"; -import { TaskStore } from "../src/task-store.js"; type RegisteredTool = { name: string; @@ -12,19 +11,14 @@ type RegisteredTool = { function makeHarness() { const tools = new Map(); - const handlers = new Map any>>(); const pi = { - on: vi.fn((event: string, handler: (...args: any[]) => any) => { - const existing = handlers.get(event) ?? []; - existing.push(handler); - handlers.set(event, existing); - }), + on: vi.fn(), + events: { on: vi.fn(() => vi.fn()), emit: vi.fn() }, registerTool: vi.fn((tool: RegisteredTool) => tools.set(tool.name, tool)), registerCommand: vi.fn(), - sendMessage: vi.fn(), }; - proofTasksExtension(pi as any); + proofTasksExtension(pi as any, { ui: undefined } as any); async function execTool(name: string, params: Record) { const tool = tools.get(name); @@ -32,281 +26,115 @@ function makeHarness() { return tool.execute("tool-call", params, undefined, undefined, {}); } - async function trigger(event: string, payload: any = {}, ctx: any = {}) { - for (const handler of handlers.get(event) ?? []) { - await handler(payload, ctx); - } - } - - return { execTool, trigger }; + return { execTool }; } const tempDirs: string[] = []; afterEach(() => { - delete process.env.PI_TASKS; - while (tempDirs.length > 0) - rmSync(tempDirs.pop()!, { recursive: true, force: true }); + while (tempDirs.length > 0) rmSync(tempDirs.pop()!, { recursive: true, force: true }); }); describe("Task tools", () => { it("renders a compact one-line-per-task summary", async () => { const harness = makeHarness(); - await harness.execTool("TaskCreate", { - subject: "Design the flux capacitor", - description: "Desc", - done_criterion: "done", - }); - await harness.execTool("TaskCreate", { - subject: "Acquiring plutonium", - description: "Desc", - done_criterion: "done", - progress_label: "Acquiring plutonium", - }); - await harness.execTool("TaskCreate", { - subject: "Install flux capacitor in DeLorean", - description: "Desc", - done_criterion: "done", - parentId: "1", - }); - await harness.execTool("TaskCreate", { - subject: "Test time travel at 88 mph", - description: "Desc", - done_criterion: "done", - }); + await harness.execTool("TaskCreate", { subject: "Design flux capacitor", done_criterion: "blueprint approved" }); + await harness.execTool("TaskCreate", { subject: "Get plutonium", done_criterion: "1.21 GW available" }); + await harness.execTool("TaskCreate", { subject: "Install in DeLorean", parentId: "1" }); + await harness.execTool("TaskCreate", { subject: "Simple task" }); - await harness.execTool("TaskUpdate", { taskId: "1", status: "completed" }); - await harness.execTool("TaskUpdate", { - taskId: "2", - status: "in_progress", - }); - await harness.execTool("TaskUpdate", { - taskId: "3", - add_blocked_by: ["1"], - }); - await harness.execTool("TaskUpdate", { - taskId: "4", - add_blocked_by: ["2", "3"], - }); + const list = await harness.execTool("TaskList", {}); + const text = list.content[0].text; - const result = await harness.execTool("TaskList", {}); - const text = result.content[0].text; - - expect(text).toContain("● 4 goals (1 in progress, 3 open)"); - expect(text).toContain("◻ #1 Design the flux capacitor"); - expect(text).toContain("◼ #2 Acquiring plutonium"); - expect(text).toContain( - "◻ #3 Install flux capacitor in DeLorean › subtask of #1 › blocked by #1", - ); - expect(text).toContain( - "◻ #4 Test time travel at 88 mph › blocked by #2, #3", - ); - expect(text).not.toContain("[ACTIVE]"); - expect(text).not.toContain("[PENDING]"); - expect(text).not.toContain("[DONE"); - expect(text).not.toContain("proof claim submitted"); - expect(text).not.toContain("test:"); + // Goals get ★, subtasks and plain tasks don't + expect(text).toContain("★ #1"); + expect(text).toContain("★ #2"); + expect(text).toContain("#3"); // subtask + expect(text).toContain("#4"); // plain task }); - it("shows TaskCreate output with metadata and compact previews", async () => { + it("shows TaskCreate output with goal info", async () => { const harness = makeHarness(); const result = await harness.execTool("TaskCreate", { - subject: "Top-level goal", - description: "Line 1\nLine 2\nLine 3", - done_criterion: "observe line a\nobserve line b", - progress_label: "Running check", - metadata: { owner: "pi", note: "short" }, + subject: "Fix auth bug", + done_criterion: "pytest test_auth passes", + failure_mode: "doesn't cover expired tokens", }); - const text = result.content[0].text; - expect(text).toContain("## TaskCreate -> Task #1: Top-level goal"); - expect(text).toContain("### Metadata"); - expect(text).toContain("- Metadata keys: 2"); - expect(text).toContain("### Done criterion"); - expect(text).toContain("### Description"); - expect(text).toContain("### Progress label"); - expect(text).toContain("### Metadata preview"); + + expect(text).toContain("#1 Fix auth bug"); + expect(text).toContain("Done when: pytest test_auth passes"); + expect(text).toContain("Failure mode: doesn't cover expired tokens"); + expect(text).toContain("[goal]"); }); - it("shows TaskUpdate output with changed fields and previews", async () => { + it("shows TaskCreate output for plain task", async () => { + const harness = makeHarness(); + const result = await harness.execTool("TaskCreate", { + subject: "Write docs", + }); + const text = result.content[0].text; + + expect(text).toContain("#1 Write docs"); + expect(text).toContain("[task]"); + }); + + it("shows TaskUpdate output", async () => { + const harness = makeHarness(); + await harness.execTool("TaskCreate", { subject: "Fix bug" }); + const result = await harness.execTool("TaskUpdate", { taskId: "1", status: "in_progress" }); + const text = result.content[0].text; + + expect(text).toContain("Updated #1 status"); + }); + + it("completes subtasks via TaskUpdate", async () => { + const harness = makeHarness(); + await harness.execTool("TaskCreate", { subject: "Parent goal", done_criterion: "all done" }); + await harness.execTool("TaskCreate", { subject: "Subtask", parentId: "1" }); + const result = await harness.execTool("TaskUpdate", { taskId: "2", status: "completed" }); + const text = result.content[0].text; + + expect(text).toContain("Updated #2 status"); + + const detail = await harness.execTool("TaskGet", { taskId: "2" }); + expect(detail.content[0].text).toContain("completed"); + }); + + it("completes goals via TaskComplete with evidence", async () => { const harness = makeHarness(); await harness.execTool("TaskCreate", { - subject: "Top-level goal", - description: "Desc", - done_criterion: "done", + subject: "Fix auth bug", + done_criterion: "test passes", }); - - const result = await harness.execTool("TaskUpdate", { + const result = await harness.execTool("TaskComplete", { taskId: "1", - status: "in_progress", - progress_label: "Running check", - metadata: { owner: "pi" }, + evidence: "pytest test_auth → 12/12 passed", + failure_likely: "doesn't cover expired tokens", }); - - const text = result.content[0].text; - expect(text).toContain("## TaskUpdate -> Task #1: Top-level goal"); - expect(text).toContain( - "- Updated fields: status, progress_label, metadata", - ); - expect(text).toContain("- status: pending -> in_progress"); - expect(text).toContain("- progress_label: (missing) -> Running check"); - expect(text).toContain("### Metadata patch"); - }); - - it("shows completed subtasks without proof-lane clutter", async () => { - const harness = makeHarness(); - await harness.execTool("TaskCreate", { - subject: "Top-level goal", - description: "Desc", - done_criterion: "done", - }); - await harness.execTool("TaskCreate", { - subject: "Finished checklist item", - description: "Desc", - done_criterion: "done", - parentId: "1", - }); - - await harness.execTool("TaskUpdate", { taskId: "2", status: "completed" }); - - const result = await harness.execTool("TaskList", {}); const text = result.content[0].text; - expect(text).toContain("● 2 goals (1 done hidden, 1 open)"); - expect(text).toContain("◻ #1 Top-level goal"); - expect(text).not.toContain("#2 Finished checklist item"); - expect(text).not.toContain("[DONE"); - expect(text).not.toContain("proof claim submitted"); + expect(text).toContain("✓ #1 Fix auth bug"); + expect(text).toContain("Evidence: pytest test_auth → 12/12 passed"); + expect(text).toContain("Likely failure: doesn't cover expired tokens"); + + const detail = await harness.execTool("TaskGet", { taskId: "1" }); + expect(detail.content[0].text).toContain("completed"); }); - it("keeps persisted completed tasks on startup but hides them from the collapsed list", async () => { - const dir = mkdtempSync(join(tmpdir(), "pi-proof-tasks-")); - tempDirs.push(dir); - const taskPath = join(dir, "tasks.json"); - process.env.PI_TASKS = taskPath; - - const seeded = new TaskStore(taskPath); - seeded.create("Finished work", "Desc", "done"); - seeded.complete("1"); - + it("shows TaskGet detail for a goal", async () => { const harness = makeHarness(); - await harness.trigger( - "before_agent_start", - {}, - { - ui: { setWidget() {}, setStatus() {} }, - sessionManager: { getSessionId: () => "session-test" }, - }, - ); - - const result = await harness.execTool("TaskList", {}); - expect(result.content[0].text).toContain("● 1 goals (1 done hidden)"); - expect(result.content[0].text).toContain( - "No open tasks. Completed tasks are hidden by default.", - ); - - const reloaded = new TaskStore(taskPath); - expect(reloaded.get("1")?.status).toBe("completed"); - }); - - it("keeps persisted completed tasks on startup even when one open goal remains", async () => { - const dir = mkdtempSync(join(tmpdir(), "pi-proof-tasks-")); - tempDirs.push(dir); - const taskPath = join(dir, "tasks.json"); - process.env.PI_TASKS = taskPath; - - const seeded = new TaskStore(taskPath); - seeded.create("Open goal", "Desc", "done"); - seeded.create("Finished work", "Desc", "done", undefined, undefined, "1"); - seeded.complete("2"); - - const harness = makeHarness(); - await harness.trigger( - "before_agent_start", - {}, - { - ui: { setWidget() {}, setStatus() {} }, - sessionManager: { getSessionId: () => "session-test" }, - }, - ); - - const result = await harness.execTool("TaskList", {}); + await harness.execTool("TaskCreate", { + subject: "Fix auth bug", + done_criterion: "test passes", + failure_mode: "doesn't cover expired tokens", + }); + const result = await harness.execTool("TaskGet", { taskId: "1" }); const text = result.content[0].text; - expect(text).toContain("● 2 goals (1 done hidden, 1 open)"); - expect(text).toContain("◻ #1 Open goal"); - expect(text).not.toContain("Finished work"); - const reloaded = new TaskStore(taskPath); - expect(reloaded.get("2")?.status).toBe("completed"); - }); - - it("keeps completed tasks persisted by default across later turns", async () => { - const dir = mkdtempSync(join(tmpdir(), "pi-proof-tasks-")); - tempDirs.push(dir); - const taskPath = join(dir, "tasks.json"); - process.env.PI_TASKS = taskPath; - - const harness = makeHarness(); - await harness.execTool("TaskCreate", { - subject: "Persistent completed goal", - description: "Desc", - done_criterion: "done", - }); - await harness.execTool("TaskCreate", { - subject: "Checklist item", - description: "Desc", - done_criterion: "done", - parentId: "1", - }); - await harness.execTool("TaskUpdate", { taskId: "2", status: "completed" }); - - for (let turn = 0; turn < 8; turn++) { - await harness.trigger("turn_start", {}, { - ui: { setWidget() {}, setStatus() {} }, - sessionManager: { getSessionId: () => "session-test" }, - }); - } - - const reloaded = new TaskStore(taskPath); - expect(reloaded.get("2")?.status).toBe("completed"); - }); - - it("stores named PI_TASKS lists inside the repo .pi/tasks directory", async () => { - process.env.PI_TASKS = `named-${Date.now()}`; - const expectedPath = join( - process.cwd(), - ".pi", - "tasks", - `${process.env.PI_TASKS}.json`, - ); - try { - rmSync(expectedPath); - } catch {} - try { - rmSync(expectedPath + ".lock"); - } catch {} - try { - rmSync(expectedPath + ".tmp"); - } catch {} - - const harness = makeHarness(); - await harness.execTool("TaskCreate", { - subject: "Repo local task", - description: "Desc", - done_criterion: "done", - }); - - const reloaded = new TaskStore(expectedPath); - expect(reloaded.get("1")?.subject).toBe("Repo local task"); - - try { - rmSync(expectedPath); - } catch {} - try { - rmSync(expectedPath + ".lock"); - } catch {} - try { - rmSync(expectedPath + ".tmp"); - } catch {} + expect(text).toContain("#1 Fix auth bug"); + expect(text).toContain("Status: pending"); + expect(text).toContain("Done when: test passes"); + expect(text).toContain("Failure mode: doesn't cover expired tokens"); }); }); diff --git a/test/task-store.test.ts b/test/task-store.test.ts index b2fac51..684e089 100644 --- a/test/task-store.test.ts +++ b/test/task-store.test.ts @@ -6,7 +6,7 @@ import { TaskStore } from "../src/task-store.js"; // Helper: create a subtask, which can be ticked off directly. function createSubtask(store: TaskStore, subject: string) { - const parent = store.create(`${subject} parent`, "Desc", "done criterion"); + const parent = store.create(`${subject} parent`, "done criterion"); return store.create( subject, "Desc", @@ -25,19 +25,19 @@ describe("TaskStore (in-memory)", () => { }); it("creates tasks with auto-incrementing IDs", () => { - const t1 = store.create("First task", "Description 1", "criterion 1"); - const t2 = store.create("Second task", "Description 2", "criterion 2"); + const t1 = store.create("First task", "criterion 1"); + const t2 = store.create("Second task", "criterion 2"); expect(t1.id).toBe("1"); expect(t2.id).toBe("2"); expect(t1.status).toBe("pending"); expect(t1.subject).toBe("First task"); - expect(t1.description).toBe("Description 1"); + expect(t1.done_criterion).toBe("criterion 1"); expect(t1.done_criterion).toBe("criterion 1"); }); it("creates tasks with optional fields", () => { - const t = store.create("Task", "Desc", "done criterion", "Running task", { + const t = store.create("Task", "done criterion", undefined, "Running task", { key: "value", }); @@ -46,7 +46,7 @@ describe("TaskStore (in-memory)", () => { }); it("gets a task by ID", () => { - store.create("Test", "Desc", "done"); + store.create("Test", "done"); const task = store.get("1"); expect(task).toBeDefined(); @@ -58,16 +58,16 @@ describe("TaskStore (in-memory)", () => { }); it("lists all tasks sorted by ID", () => { - store.create("Task 3", "Desc", "done"); - store.create("Task 1", "Desc", "done"); - store.create("Task 2", "Desc", "done"); + store.create("Task 3", "done"); + store.create("Task 1", "done"); + store.create("Task 2", "done"); const tasks = store.list(); expect(tasks.map((t) => t.id)).toEqual(["1", "2", "3"]); }); it("updates task status", () => { - store.create("Test", "Desc", "done"); + store.create("Test", "done"); const { task, changedFields } = store.update("1", { status: "in_progress", }); @@ -77,7 +77,7 @@ describe("TaskStore (in-memory)", () => { }); it("updates multiple fields at once", () => { - store.create("Test", "Desc", "done"); + store.create("Test", "done"); const { changedFields } = store.update("1", { subject: "Updated subject", description: "Updated desc", @@ -94,7 +94,7 @@ describe("TaskStore (in-memory)", () => { }); it("deletes a task with status: deleted", () => { - store.create("Test", "Desc", "done"); + store.create("Test", "done"); const { changedFields } = store.update("1", { status: "deleted" }); expect(changedFields).toEqual(["deleted"]); @@ -103,16 +103,16 @@ describe("TaskStore (in-memory)", () => { }); it("preserves ID counter after deletion", () => { - store.create("Task 1", "Desc", "done"); - store.create("Task 2", "Desc", "done"); + store.create("Task 1", "done"); + store.create("Task 2", "done"); store.update("1", { status: "deleted" }); - const t3 = store.create("Task 3", "Desc", "done"); + const t3 = store.create("Task 3", "done"); expect(t3.id).toBe("3"); // Not "1" — counter continues }); it("merges metadata with null key deletion", () => { - store.create("Test", "Desc", "done", undefined, { a: 1, b: 2, c: 3 }); + store.create("Test", "done", undefined, undefined, { a: 1, b: 2, c: 3 }); store.update("1", { metadata: { b: null, d: 4 } }); const task = store.get("1")!; @@ -120,8 +120,8 @@ describe("TaskStore (in-memory)", () => { }); it("sets up bidirectional blocks via add_blocks", () => { - store.create("Blocker", "Desc", "done"); - store.create("Blocked", "Desc", "done"); + store.create("Blocker", "done"); + store.create("Blocked", "done"); store.update("1", { add_blocks: ["2"] }); @@ -132,8 +132,8 @@ describe("TaskStore (in-memory)", () => { }); it("sets up bidirectional blocks via add_blocked_by", () => { - store.create("Blocker", "Desc", "done"); - store.create("Blocked", "Desc", "done"); + store.create("Blocker", "done"); + store.create("Blocked", "done"); store.update("2", { add_blocked_by: ["1"] }); @@ -144,8 +144,8 @@ describe("TaskStore (in-memory)", () => { }); it("does not duplicate dependency edges", () => { - store.create("A", "Desc", "done"); - store.create("B", "Desc", "done"); + store.create("A", "done"); + store.create("B", "done"); store.update("1", { add_blocks: ["2"] }); store.update("1", { add_blocks: ["2"] }); // duplicate @@ -155,8 +155,8 @@ describe("TaskStore (in-memory)", () => { }); it("cleans up dependency edges on deletion", () => { - store.create("A", "Desc", "done"); - store.create("B", "Desc", "done"); + store.create("A", "done"); + store.create("B", "done"); store.update("1", { add_blocks: ["2"] }); store.update("1", { status: "deleted" }); @@ -166,8 +166,8 @@ describe("TaskStore (in-memory)", () => { }); it("clears completed tasks", () => { - store.create("Completed", "Desc", "done"); - store.create("Pending", "Desc", "done"); + store.create("Completed", "done"); + store.create("Pending", "done"); store.complete("1"); const count = store.clearCompleted(); @@ -184,24 +184,22 @@ describe("TaskStore (in-memory)", () => { expect(changedFields).toContain("status"); }); - it("blocks TaskUpdate(status=completed) for top-level tasks", () => { - store.create("Goal", "Desc", "done"); - expect(() => store.update("1", { status: "completed" })).toThrow( - "Top-level task #1 requires proof", - ); + it("allows TaskUpdate(status=completed) for top-level tasks (no proof gate)", () => { + store.create("Goal", "done"); + const { task } = store.update("1", { status: "completed" }); + expect(task?.status).toBe("completed"); }); - it("keeps top-level completion gated even after proof evidence exists", () => { - store.create("Escalated", "Desc", "done"); + it("allows top-level completion via TaskUpdate (evidence is for TaskComplete)", () => { + store.create("Escalated", "done"); store.update("1", { metadata: { lgtm_evidence: "literal output" } }); - expect(() => store.update("1", { status: "completed" })).toThrow( - "TaskClaimDone", - ); + const { task } = store.update("1", { status: "completed" }); + expect(task?.status).toBe("completed"); }); it("rejects changing parentId after creation", () => { - store.create("Parent", "Desc", "done"); - store.create("Child", "Desc", "done"); + store.create("Parent", "done"); + store.create("Child", "done"); expect(() => store.update("2", { parentId: "1" })).toThrow( "parentId is creation-only", ); @@ -216,7 +214,7 @@ describe("TaskStore (in-memory)", () => { }); it("complete() is the internal proof-review completion path", () => { - store.create("Test", "Desc", "done"); + store.create("Test", "done"); const task = store.complete("1"); expect(task.status).toBe("completed"); }); @@ -232,7 +230,7 @@ describe("TaskStore (in-memory)", () => { }); it("delete method works", () => { - store.create("Test", "Desc", "done"); + store.create("Test", "done"); expect(store.delete("1")).toBe(true); expect(store.delete("1")).toBe(false); // already deleted expect(store.list()).toHaveLength(0); @@ -250,8 +248,8 @@ describe("TaskStore (in-memory)", () => { }); it("allows circular dependencies with warning", () => { - store.create("A", "Desc", "done"); - store.create("B", "Desc", "done"); + store.create("A", "done"); + store.create("B", "done"); store.update("1", { add_blocks: ["2"] }); const { warnings } = store.update("2", { add_blocks: ["1"] }); @@ -261,33 +259,33 @@ describe("TaskStore (in-memory)", () => { }); it("allows self-dependency with warning", () => { - store.create("Self", "Desc", "done"); + store.create("Self", "done"); const { warnings } = store.update("1", { add_blocks: ["1"] }); expect(store.get("1")!.blocks).toContain("1"); expect(warnings).toContain("#1 blocks itself"); }); it("stores dangling edge IDs with warning", () => { - store.create("Real", "Desc", "done"); + store.create("Real", "done"); const { warnings } = store.update("1", { add_blocks: ["9999"] }); expect(store.get("1")!.blocks).toContain("9999"); expect(warnings).toContain("#9999 does not exist"); }); it("returns no warnings for valid dependencies", () => { - store.create("A", "Desc", "done"); - store.create("B", "Desc", "done"); + store.create("A", "done"); + store.create("B", "done"); const { warnings } = store.update("1", { add_blocks: ["2"] }); expect(warnings).toEqual([]); }); it("accepts whitespace-only subjects (matches Claude Code)", () => { - const t = store.create(" ", "Desc", "done"); + const t = store.create(" ", "done"); expect(t.subject).toBe(" "); }); it("updates progress_label field", () => { - store.create("Test", "Desc", "done"); + store.create("Test", "done"); const { changedFields } = store.update("1", { progress_label: "Running tests", }); @@ -305,7 +303,7 @@ describe("TaskStore (in-memory)", () => { }); it("updates done_criterion field", () => { - store.create("Test", "Desc", "original criterion"); + store.create("Test", "original criterion"); const { changedFields } = store.update("1", { done_criterion: "updated criterion", }); @@ -323,8 +321,8 @@ describe("TaskStore (in-memory)", () => { }); it("clearCompleted cleans up dependency edges", () => { - store.create("Blocker", "Desc", "done"); - store.create("Blocked", "Desc", "done"); + store.create("Blocker", "done"); + store.create("Blocked", "done"); store.update("1", { add_blocks: ["2"] }); // complete() is the internal proof-review completion path. store.complete("1"); @@ -336,9 +334,9 @@ describe("TaskStore (in-memory)", () => { }); it("handles multiple add_blocks in one call", () => { - store.create("Blocker", "Desc", "done"); - store.create("B1", "Desc", "done"); - store.create("B2", "Desc", "done"); + store.create("Blocker", "done"); + store.create("B1", "done"); + store.create("B2", "done"); store.update("1", { add_blocks: ["2", "3"] }); @@ -348,37 +346,37 @@ describe("TaskStore (in-memory)", () => { }); it("add_blocked_by warns on self-dependency", () => { - store.create("Self", "Desc", "done"); + store.create("Self", "done"); const { warnings } = store.update("1", { add_blocked_by: ["1"] }); expect(store.get("1")!.blockedBy).toContain("1"); expect(warnings).toContain("#1 blocks itself"); }); it("add_blocked_by warns on dangling ref", () => { - store.create("Real", "Desc", "done"); + store.create("Real", "done"); const { warnings } = store.update("1", { add_blocked_by: ["9999"] }); expect(store.get("1")!.blockedBy).toContain("9999"); expect(warnings).toContain("#9999 does not exist"); }); it("add_blocked_by warns on cycle", () => { - store.create("A", "Desc", "done"); - store.create("B", "Desc", "done"); + store.create("A", "done"); + store.create("B", "done"); store.update("1", { add_blocks: ["2"] }); const { warnings } = store.update("1", { add_blocked_by: ["2"] }); expect(warnings).toContain("cycle: #1 and #2 block each other"); }); it("clearCompleted returns 0 when no completed tasks", () => { - store.create("Pending", "Desc", "done"); + store.create("Pending", "done"); expect(store.clearCompleted()).toBe(0); }); it("list sorts pending → in_progress → completed with all three present", () => { - store.create("Pending task", "Desc", "done"); - store.create("Completed task", "Desc", "done"); - store.create("In-progress task", "Desc", "done"); - store.create("Another pending", "Desc", "done"); + store.create("Pending task", "done"); + store.create("Completed task", "done"); + store.create("In-progress task", "done"); + store.create("Another pending", "done"); store.complete("2"); store.update("3", { status: "in_progress" });