diff --git a/README.md b/README.md index 71af3e7..f6da920 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ A [pi](https://pi.dev) extension that adds proof-gated top-level tasks to task t The core idea: subtasks are normal checklist items, but top-level tasks are goals. Agents cannot mark top-level tasks complete directly. They must call `TaskClaimDone` with auditable evidence, UAT hints, and explicit failure-mode analysis. A fresh judge then accepts or rejects the claim. Accepted review completes the task; rejected review leaves it open with suggestions. -Humans can use `/lgtm` to view the proof log and sanity-check the reviewer notes later. `/lgtm` is intentionally thin: proof viewing lives there, task management stays in `/tasks`. +Humans can use `/lgtm` to view the proof log and sanity-check the reviewer notes later. `/lgtm` is intentionally thin: proof viewing lives there, task management stays in `/tasks`. Long submitted-evidence blocks are previewed inline and truncated after about 16 lines, with the full artifact path shown in the proof log. ## Install @@ -44,9 +44,8 @@ Stripped: `TaskExecute`, `TaskOutput`, `TaskStop`, `process-tracker.ts`, subagen ## Widget ``` -● 3 tasks (1 done, 1 in progress, 1 open) - ✔ #1 Design schema - ✳ #2 Implementing cache layer… (2m 49s · ↑ 4.1k ↓ 1.2k) +● 3 goals (1 done hidden, 1 in progress, 1 open) + ✳ #2 Implementing cache layer… (2m 49s, ↑ 4.1k ↓ 1.2k) ◻ #3 Load test ``` @@ -142,9 +141,9 @@ Interactive task-management menu: view tasks, create task, delete a selected tas ```text Top-level task: pending -> in_progress -> TaskClaimDone - -> current evidence iteration N 🛠 - -> robot review iteration(s) 🤖 - -> completed ✓ if latest robot review accepts + -> current evidence iteration N + -> robot review iteration(s) + -> completed if latest robot review accepts -> remains open if reviewer rejects -> completed if reviewer infrastructure fails (fail-open, note logged) -> lgtm_supersede or newer TaskClaimDone -> superseded history + fresh current evidence @@ -168,7 +167,7 @@ Override via env: ```bash PI_TASKS=off # in-memory (CI) -PI_TASKS=sprint-1 # named shared list at ~/.pi/tasks/sprint-1.json +PI_TASKS=sprint-1 # named project-local list at .pi/tasks/sprint-1.json PI_TASKS=/abs/path # explicit path PI_TASKS_DEBUG=1 # trace to stderr ``` @@ -178,7 +177,7 @@ PI_TASKS_DEBUG=1 # trace to stderr ```text src/ ├── index.ts # tools + /tasks + /lgtm evidence viewer + widget + event handlers -├── review-badges.ts # Review badge helpers for evidence/review/completion lanes +├── review-badges.ts # Review state helpers for proof/completion lanes ├── robot-review.ts # Robot review iteration storage + compatibility helpers ├── types.ts # Task, TaskStatus types ├── task-store.ts # File-backed store with CRUD, locking, complete() method diff --git a/src/auto-clear.ts b/src/auto-clear.ts index 08464ed..e442775 100644 --- a/src/auto-clear.ts +++ b/src/auto-clear.ts @@ -13,79 +13,83 @@ import type { TaskStore } from "./task-store.js"; export type AutoClearMode = "never" | "on_list_complete" | "on_task_complete"; export class AutoClearManager { - /** Per-task: turn when task was marked completed ("on_task_complete" mode). */ - private completedAtTurn = new Map(); - /** Turn when ALL tasks became completed ("on_list_complete" mode). */ - private allCompletedAtTurn: number | null = null; + /** Per-task: turn when task was marked completed ("on_task_complete" mode). */ + private completedAtTurn = new Map(); + /** Turn when ALL tasks became completed ("on_list_complete" mode). */ + private allCompletedAtTurn: number | null = null; - constructor( - private getStore: () => TaskStore, - private getMode: () => AutoClearMode, - /** How many turns completed tasks linger before auto-clearing. */ - private clearDelayTurns = 4, - ) {} + constructor( + private getStore: () => TaskStore, + private getMode: () => AutoClearMode, + /** How many turns completed tasks linger before auto-clearing. */ + private clearDelayTurns = 4, + ) {} - /** Record a task completion. Call AFTER cascade logic. */ - trackCompletion(taskId: string, currentTurn: number): void { - const mode = this.getMode(); - if (mode === "never") return; + /** Record a task completion. Call AFTER cascade logic. */ + trackCompletion(taskId: string, currentTurn: number): void { + const mode = this.getMode(); + if (mode === "never") return; - if (mode === "on_task_complete") { - this.completedAtTurn.set(taskId, currentTurn); - } else if (mode === "on_list_complete") { - this.checkAllCompleted(currentTurn); - } - } + if (mode === "on_task_complete") { + this.completedAtTurn.set(taskId, currentTurn); + } else if (mode === "on_list_complete") { + this.checkAllCompleted(currentTurn); + } + } - /** Check if all tasks are completed and start/reset the batch countdown. */ - private checkAllCompleted(currentTurn: number): void { - const tasks = this.getStore().list(); - if (tasks.length > 0 && tasks.every(t => t.status === "completed")) { - if (this.allCompletedAtTurn === null) this.allCompletedAtTurn = currentTurn; - } else { - this.allCompletedAtTurn = null; - } - } + /** Check if all tasks are completed and start/reset the batch countdown. */ + private checkAllCompleted(currentTurn: number): void { + const tasks = this.getStore().list(); + if (tasks.length > 0 && tasks.every((t) => t.status === "completed")) { + if (this.allCompletedAtTurn === null) + this.allCompletedAtTurn = currentTurn; + } else { + this.allCompletedAtTurn = null; + } + } - /** Reset batch countdown (e.g., when a new task is created or task goes non-completed). */ - resetBatchCountdown(): void { - this.allCompletedAtTurn = null; - } + /** Reset batch countdown (e.g., when a new task is created or task goes non-completed). */ + resetBatchCountdown(): void { + this.allCompletedAtTurn = null; + } - /** Reset all tracking state (e.g., on new session). */ - reset(): void { - this.completedAtTurn.clear(); - this.allCompletedAtTurn = null; - } + /** Reset all tracking state (e.g., on new session). */ + reset(): void { + this.completedAtTurn.clear(); + this.allCompletedAtTurn = null; + } - /** - * Called on each turn start. Deletes tasks whose linger period has expired. - * Returns true if any tasks were cleared. - */ - onTurnStart(currentTurn: number): boolean { - const mode = this.getMode(); - let cleared = false; + /** + * Called on each turn start. Deletes tasks whose linger period has expired. + * Returns true if any tasks were cleared. + */ + onTurnStart(currentTurn: number): boolean { + const mode = this.getMode(); + let cleared = false; - if (mode === "on_task_complete") { - for (const [taskId, turn] of this.completedAtTurn) { - const task = this.getStore().get(taskId); - if (!task || task.status !== "completed") { - // Task was deleted or reverted — drop stale tracking entry - this.completedAtTurn.delete(taskId); - } else if (currentTurn - turn >= this.clearDelayTurns) { - this.getStore().delete(taskId); - this.completedAtTurn.delete(taskId); - cleared = true; - } - } - } else if (mode === "on_list_complete" && this.allCompletedAtTurn !== null) { - if (currentTurn - this.allCompletedAtTurn >= this.clearDelayTurns) { - this.getStore().clearCompleted(); - this.allCompletedAtTurn = null; - cleared = true; - } - } + if (mode === "on_task_complete") { + for (const [taskId, turn] of this.completedAtTurn) { + const task = this.getStore().get(taskId); + if (!task || task.status !== "completed") { + // Task was deleted or reverted — drop stale tracking entry + this.completedAtTurn.delete(taskId); + } else if (currentTurn - turn >= this.clearDelayTurns) { + this.getStore().delete(taskId); + this.completedAtTurn.delete(taskId); + cleared = true; + } + } + } else if ( + mode === "on_list_complete" && + this.allCompletedAtTurn !== null + ) { + if (currentTurn - this.allCompletedAtTurn >= this.clearDelayTurns) { + this.getStore().clearCompleted(); + this.allCompletedAtTurn = null; + cleared = true; + } + } - return cleared; - } + return cleared; + } } diff --git a/src/index.ts b/src/index.ts index 3dae1c8..44693fa 100644 --- a/src/index.ts +++ b/src/index.ts @@ -27,25 +27,28 @@ import { spawn } from "node:child_process"; import { createHash } from "node:crypto"; import { readFileSync } from "node:fs"; import { join, resolve } from "node:path"; -import type { ExtensionAPI, ExtensionCommandContext, ExtensionContext } from "@mariozechner/pi-coding-agent"; +import type { + ExtensionAPI, + ExtensionCommandContext, + ExtensionContext, +} from "@mariozechner/pi-coding-agent"; import { Type } from "@sinclair/typebox"; import { AutoClearManager } from "./auto-clear.js"; import { - type CompletionMode, - getCompletionMode, - getDisplayStatus, - getGateStatus, - getReviewBadges, - getReviewState, - type ReviewState, + type CompletionMode, + getCompletionMode, + getDisplayStatus, + getGateStatus, + getReviewState, + type ReviewState, } from "./review-badges.js"; import { - appendRobotReviewMetadata, - getLatestRobotReview, - getRobotReviews, - type RobotReviewRecord, - relaxAdvisoryVerificationHints, - shouldCompleteAfterAcceptedReview, + appendRobotReviewMetadata, + getLatestRobotReview, + getRobotReviews, + type RobotReviewRecord, + relaxAdvisoryVerificationHints, + shouldCompleteAfterAcceptedReview, } from "./robot-review.js"; import { TaskStore } from "./task-store.js"; import { loadTasksConfig } from "./tasks-config.js"; @@ -53,858 +56,1390 @@ import type { Task } from "./types.js"; import { TaskWidget, type UICtx } from "./ui/task-widget.js"; function textResult(msg: string) { - return { content: [{ type: "text" as const, text: msg }], details: undefined as any }; + return { + content: [{ type: "text" as const, text: msg }], + details: undefined as any, + }; } export type LgtmCommandSpec = - | { kind: "menu" } - | { kind: "view_all" } - | { kind: "view"; ids: string[] } - | { kind: "error"; message: string }; + | { kind: "menu" } + | { kind: "view_all" } + | { kind: "view"; ids: string[] } + | { kind: "error"; message: string }; export function parseLgtmArgs(args: string): LgtmCommandSpec { - const trimmed = args.trim(); - if (!trimmed) return { kind: "menu" }; - if (trimmed === "*") return { kind: "view_all" }; + const trimmed = args.trim(); + if (!trimmed) return { kind: "menu" }; + if (trimmed === "*") return { kind: "view_all" }; - const tokens = trimmed.split(/[\s,]+/).map(token => token.trim()).filter(Boolean); - if (["clear", "delete"].includes(tokens[0])) { - return { kind: "error", message: "Task management lives in /tasks now. /lgtm is viewer-only." }; - } + const tokens = trimmed + .split(/[\s,]+/) + .map((token) => token.trim()) + .filter(Boolean); + if (["clear", "delete"].includes(tokens[0])) { + return { + kind: "error", + message: "Task management lives in /tasks now. /lgtm is viewer-only.", + }; + } - return { kind: "view", ids: tokens.map(token => token.replace(/^#/, "")).filter(Boolean) }; + return { + kind: "view", + ids: tokens.map((token) => token.replace(/^#/, "")).filter(Boolean), + }; } -const TASK_TOOL_NAMES = new Set(["TaskCreate", "TaskList", "TaskGet", "TaskUpdate", "TaskClaimDone", "lgtm_supersede", "robot_review_ask", "robot_review_run"]); +const TASK_TOOL_NAMES = new Set([ + "TaskCreate", + "TaskList", + "TaskGet", + "TaskUpdate", + "TaskClaimDone", + "lgtm_supersede", + "robot_review_ask", + "robot_review_run", +]); const REMINDER_INTERVAL = 4; const AUTO_CLEAR_DELAY = 4; export const DEFAULT_ROBOT_REVIEW_TIMEOUT_MS = 120_000; -type CommandResult = { stdout: string; stderr: string; exitCode: number | null }; +type CommandResult = { + stdout: string; + stderr: string; + exitCode: number | null; +}; export function getPiInvocation( - args: string[], - env: NodeJS.ProcessEnv = process.env, + args: string[], + env: NodeJS.ProcessEnv = process.env, ): { command: string; args: string[] } { - const configured = env.PI_PROOF_TASKS_PI_BIN?.trim(); - return { command: configured || "pi", args }; + const configured = env.PI_PROOF_TASKS_PI_BIN?.trim(); + return { command: configured || "pi", args }; } -export function getRobotReviewTimeoutMs(env: NodeJS.ProcessEnv = process.env): number { - const configured = Number.parseInt(env.PI_PROOF_TASKS_ROBOT_REVIEW_TIMEOUT_MS ?? "", 10); - return Number.isFinite(configured) && configured > 0 ? configured : DEFAULT_ROBOT_REVIEW_TIMEOUT_MS; +export function getRobotReviewTimeoutMs( + env: NodeJS.ProcessEnv = process.env, +): number { + const configured = Number.parseInt( + env.PI_PROOF_TASKS_ROBOT_REVIEW_TIMEOUT_MS ?? "", + 10, + ); + return Number.isFinite(configured) && configured > 0 + ? configured + : DEFAULT_ROBOT_REVIEW_TIMEOUT_MS; } /** Format pi's current model object as the CLI's provider/model reference. */ export function getCurrentModelRef(model: unknown): string | undefined { - if (!model || typeof model !== "object") return undefined; - const provider = typeof (model as any).provider === "string" - ? (model as any).provider - : typeof (model as any).providerId === "string" - ? (model as any).providerId - : undefined; - const id = typeof (model as any).id === "string" - ? (model as any).id - : typeof (model as any).modelId === "string" - ? (model as any).modelId - : undefined; - return provider && id ? `${provider}/${id}` : undefined; + if (!model || typeof model !== "object") return undefined; + const provider = + typeof (model as any).provider === "string" + ? (model as any).provider + : typeof (model as any).providerId === "string" + ? (model as any).providerId + : undefined; + const id = + typeof (model as any).id === "string" + ? (model as any).id + : typeof (model as any).modelId === "string" + ? (model as any).modelId + : undefined; + return provider && id ? `${provider}/${id}` : undefined; } function getAssistantTextFromPiEvent(event: any): string | undefined { - if (event?.type !== "message_end" || event.message?.role !== "assistant" || !Array.isArray(event.message.content)) { - return undefined; - } - const text = event.message.content.find((part: any) => part?.type === "text")?.text; - return typeof text === "string" ? text : undefined; + if ( + event?.type !== "message_end" || + event.message?.role !== "assistant" || + !Array.isArray(event.message.content) + ) { + return undefined; + } + const text = event.message.content.find( + (part: any) => part?.type === "text", + )?.text; + return typeof text === "string" ? text : undefined; } export function extractFinalAssistantTextFromPiJsonl(output: string): string { - let buffer = ""; - let finalAssistantText = ""; - const lines = output.split("\n"); - for (const line of lines) { - if (!line.trim()) continue; - buffer = line; - try { - const text = getAssistantTextFromPiEvent(JSON.parse(line)); - if (text) finalAssistantText = text; - buffer = ""; - } catch { - // ignore malformed line noise from the child process - } - } - if (buffer.trim()) { - try { - const text = getAssistantTextFromPiEvent(JSON.parse(buffer)); - if (text) finalAssistantText = text; - } catch { - // ignore malformed trailing line - } - } - return finalAssistantText; + let buffer = ""; + let finalAssistantText = ""; + const lines = output.split("\n"); + for (const line of lines) { + if (!line.trim()) continue; + buffer = line; + try { + const text = getAssistantTextFromPiEvent(JSON.parse(line)); + if (text) finalAssistantText = text; + buffer = ""; + } catch { + // ignore malformed line noise from the child process + } + } + if (buffer.trim()) { + try { + const text = getAssistantTextFromPiEvent(JSON.parse(buffer)); + if (text) finalAssistantText = text; + } catch { + // ignore malformed trailing line + } + } + return finalAssistantText; } export async function runRobotReviewCommand( - invocation: { command: string; args: string[] }, - signal?: AbortSignal, - timeoutMs = getRobotReviewTimeoutMs(), + invocation: { command: string; args: string[] }, + signal?: AbortSignal, + timeoutMs = getRobotReviewTimeoutMs(), ): Promise { - return new Promise((resolve, reject) => { - const child = spawn(invocation.command, invocation.args, { shell: false, stdio: ["ignore", "pipe", "pipe"] }); - const stdoutChunks: Buffer[] = []; - const stderrChunks: Buffer[] = []; - let settled = false; + return new Promise((resolve, reject) => { + const child = spawn(invocation.command, invocation.args, { + shell: false, + stdio: ["ignore", "pipe", "pipe"], + }); + const stdoutChunks: Buffer[] = []; + const stderrChunks: Buffer[] = []; + let settled = false; - const finish = (fn: () => void) => { - if (settled) return; - settled = true; - fn(); - }; + const finish = (fn: () => void) => { + if (settled) return; + settled = true; + fn(); + }; - const killTimer = setTimeout(() => { - child.kill("SIGTERM"); - finish(() => reject(new Error(`Robot reviewer timed out after ${timeoutMs}ms.`))); - }, timeoutMs); + const killTimer = setTimeout(() => { + child.kill("SIGTERM"); + finish(() => + reject(new Error(`Robot reviewer timed out after ${timeoutMs}ms.`)), + ); + }, timeoutMs); - child.stdout.on("data", (data) => stdoutChunks.push(data)); - child.stderr.on("data", (data) => stderrChunks.push(data)); - child.on("error", (err) => { - clearTimeout(killTimer); - finish(() => reject(err)); - }); - const onAbort = () => { - clearTimeout(killTimer); - child.kill("SIGTERM"); - }; - signal?.addEventListener("abort", onAbort, { once: true }); - child.on("close", (exitCode) => { - clearTimeout(killTimer); - signal?.removeEventListener("abort", onAbort); - if (signal?.aborted) { - finish(() => reject(new Error("aborted"))); - return; - } - const stdout = Buffer.concat(stdoutChunks).toString("utf-8"); - finish(() => resolve({ - stdout: extractFinalAssistantTextFromPiJsonl(stdout) || stdout, - stderr: Buffer.concat(stderrChunks).toString("utf-8"), - exitCode, - })); - }); - }); + child.stdout.on("data", (data) => stdoutChunks.push(data)); + child.stderr.on("data", (data) => stderrChunks.push(data)); + child.on("error", (err) => { + clearTimeout(killTimer); + finish(() => reject(err)); + }); + const onAbort = () => { + clearTimeout(killTimer); + child.kill("SIGTERM"); + }; + signal?.addEventListener("abort", onAbort, { once: true }); + child.on("close", (exitCode) => { + clearTimeout(killTimer); + signal?.removeEventListener("abort", onAbort); + if (signal?.aborted) { + finish(() => reject(new Error("aborted"))); + return; + } + const stdout = Buffer.concat(stdoutChunks).toString("utf-8"); + finish(() => + resolve({ + stdout: extractFinalAssistantTextFromPiJsonl(stdout) || stdout, + stderr: Buffer.concat(stderrChunks).toString("utf-8"), + exitCode, + }), + ); + }); + }); } function summarizeRawOutput(output: string, maxChars = 400): string { - const singleLine = output.replace(/\s+/g, " ").trim(); - if (singleLine.length <= maxChars) return singleLine; - return `${singleLine.slice(0, maxChars)}...`; + const singleLine = output.replace(/\s+/g, " ").trim(); + if (singleLine.length <= maxChars) return singleLine; + return `${singleLine.slice(0, maxChars)}...`; } function stripMarkdownCodeFence(text: string): string { - const trimmed = text.trim(); - const fence = trimmed.match(/^```(?:json)?\s*([\s\S]*?)\s*```$/i); - return fence ? fence[1].trim() : trimmed; + const trimmed = text.trim(); + const fence = trimmed.match(/^```(?:json)?\s*([\s\S]*?)\s*```$/i); + return fence ? fence[1].trim() : trimmed; } function extractBalancedJsonObject(text: string): string | undefined { - let start = -1; - let depth = 0; - let inString = false; - let escaped = false; + let start = -1; + let depth = 0; + let inString = false; + let escaped = false; - for (let index = 0; index < text.length; index++) { - const char = text[index]; - if (escaped) { - escaped = false; - continue; - } - if (char === "\\") { - escaped = true; - continue; - } - if (char === '"') { - inString = !inString; - continue; - } - if (inString) continue; - if (char === "{") { - if (depth === 0) start = index; - depth++; - continue; - } - if (char === "}") { - if (depth === 0) continue; - depth--; - if (depth === 0 && start >= 0) return text.slice(start, index + 1); - } - } - return undefined; + for (let index = 0; index < text.length; index++) { + const char = text[index]; + if (escaped) { + escaped = false; + continue; + } + if (char === "\\") { + escaped = true; + continue; + } + if (char === '"') { + inString = !inString; + continue; + } + if (inString) continue; + if (char === "{") { + if (depth === 0) start = index; + depth++; + continue; + } + if (char === "}") { + if (depth === 0) continue; + depth--; + if (depth === 0 && start >= 0) return text.slice(start, index + 1); + } + } + return undefined; } interface EvidenceCommandRecord { - cmd: string; - exit_code: number; - stdout_path?: string; - stderr_path?: string; + cmd: string; + exit_code: number; + stdout_path?: string; + stderr_path?: string; } interface EvidenceArtifactRecord { - path: string; - sha256: string; - bytes: number; + path: string; + sha256: string; + bytes: number; } interface EvidenceIterationRecord { - iteration: number; - submitted_at: string; - superseded_at?: string; - supersede_reason?: string; - evidence: string; - failure_likely: string; - failure_sneaky: string; - failure_unknown: string; - falsification_test: string; - evidence_reasoning: string; - verification_hints: string[]; - remaining_uncertainty: string; - commands: EvidenceCommandRecord[]; - evidence_artifacts: EvidenceArtifactRecord[]; - falsification_artifacts: EvidenceArtifactRecord[]; - robot_reviews: RobotReviewRecord[]; - automatic_review_failure?: { message: string; raw_output?: string }; + iteration: number; + submitted_at: string; + superseded_at?: string; + supersede_reason?: string; + evidence: string; + failure_likely: string; + failure_sneaky: string; + failure_unknown: string; + falsification_test: string; + evidence_reasoning: string; + verification_hints: string[]; + remaining_uncertainty: string; + commands: EvidenceCommandRecord[]; + evidence_artifacts: EvidenceArtifactRecord[]; + falsification_artifacts: EvidenceArtifactRecord[]; + robot_reviews: RobotReviewRecord[]; + automatic_review_failure?: { message: string; raw_output?: string }; } const AUTOMATIC_REVIEW_ERROR_KEYS = [ - "robot_review_last_error", - "robot_review_last_error_output", - "robot_review_last_error_at", + "robot_review_last_error", + "robot_review_last_error_output", + "robot_review_last_error_at", ] as const; const ROBOT_REVIEW_KEYS = [ - "robot_reviews", - "robot_review_reviewer", - "robot_review_scope", - "robot_review_observations", - "robot_review_concerns", - "robot_review_suggestions", - "robot_review_blind_spots", - "robot_review_accepted", - "robot_review_evidence_complete", - "robot_review_evidence_convincing", - "robot_review_missing_evidence", - "robot_review_submitted_at", - "robot_review_mode", - "robot_review_raw_output", - "robot_review_requires_followup", - "robot_review_iteration_count", + "robot_reviews", + "robot_review_reviewer", + "robot_review_scope", + "robot_review_observations", + "robot_review_concerns", + "robot_review_suggestions", + "robot_review_blind_spots", + "robot_review_accepted", + "robot_review_evidence_complete", + "robot_review_evidence_convincing", + "robot_review_missing_evidence", + "robot_review_submitted_at", + "robot_review_mode", + "robot_review_raw_output", + "robot_review_requires_followup", + "robot_review_iteration_count", ] as const; const CURRENT_EVIDENCE_KEYS = [ - "lgtm_evidence", - "lgtm_failure_likely", - "lgtm_failure_sneaky", - "lgtm_failure_unknown", - "lgtm_falsification_test", - "lgtm_evidence_reasoning", - "lgtm_verification_hints", - "lgtm_remaining_uncertainty", - "lgtm_submitted_at", - "lgtm_commands", - "lgtm_evidence_artifacts", - "lgtm_falsification_artifacts", + "lgtm_evidence", + "lgtm_failure_likely", + "lgtm_failure_sneaky", + "lgtm_failure_unknown", + "lgtm_falsification_test", + "lgtm_evidence_reasoning", + "lgtm_verification_hints", + "lgtm_remaining_uncertainty", + "lgtm_submitted_at", + "lgtm_commands", + "lgtm_evidence_artifacts", + "lgtm_falsification_artifacts", ] as const; const RESERVED_METADATA_PREFIXES = ["lgtm_", "robot_review"]; -function assertNoReservedMetadata(metadata: Record | undefined): string | null { - if (!metadata) return null; - for (const key of Object.keys(metadata)) { - if (RESERVED_METADATA_PREFIXES.some(prefix => key.startsWith(prefix))) { - return `Metadata key ${key} is reserved for proof/review internals. Use TaskClaimDone or robot_review_run instead.`; - } - } - return null; +function assertNoReservedMetadata( + metadata: Record | undefined, +): string | null { + if (!metadata) return null; + for (const key of Object.keys(metadata)) { + if (RESERVED_METADATA_PREFIXES.some((prefix) => key.startsWith(prefix))) { + return `Metadata key ${key} is reserved for proof/review internals. Use TaskClaimDone or robot_review_run instead.`; + } + } + return null; } -function requiredTextError(fields: Record, names: string[]): string | null { - for (const name of names) { - const value = fields[name]; - if (typeof value !== "string" || value.trim().length === 0) return `${name} is required and cannot be blank.`; - } - return null; +function requiredTextError( + fields: Record, + names: string[], +): string | null { + for (const name of names) { + const value = fields[name]; + if (typeof value !== "string" || value.trim().length === 0) + return `${name} is required and cannot be blank.`; + } + return null; } function nullRecord(keys: readonly string[]): Record { - return Object.fromEntries(keys.map((key) => [key, null])); + return Object.fromEntries(keys.map((key) => [key, null])); } -function getAutomaticReviewFailureMetadata(message: string, rawOutput?: string): Record { - return { - robot_review_last_error: message, - robot_review_last_error_output: rawOutput ?? null, - robot_review_last_error_at: new Date().toISOString(), - }; +function getAutomaticReviewFailureMetadata( + message: string, + rawOutput?: string, +): Record { + return { + robot_review_last_error: message, + robot_review_last_error_output: rawOutput ?? null, + robot_review_last_error_at: new Date().toISOString(), + }; } function clearAutomaticReviewFailureMetadata(): Record { - return nullRecord(AUTOMATIC_REVIEW_ERROR_KEYS); + return nullRecord(AUTOMATIC_REVIEW_ERROR_KEYS); } function clearRobotReviewMetadata(): Record { - return nullRecord(ROBOT_REVIEW_KEYS); + return nullRecord(ROBOT_REVIEW_KEYS); } function clearCurrentEvidenceMetadata(): Record { - return nullRecord(CURRENT_EVIDENCE_KEYS); + return nullRecord(CURRENT_EVIDENCE_KEYS); } function normalizeCommandRecords(value: unknown): EvidenceCommandRecord[] { - return Array.isArray(value) - ? value.flatMap((entry) => { - if (!entry || typeof entry !== "object") return []; - const command = entry as Record; - if (typeof command.cmd !== "string" || typeof command.exit_code !== "number") return []; - return [{ - cmd: command.cmd, - exit_code: command.exit_code, - stdout_path: typeof command.stdout_path === "string" ? command.stdout_path : undefined, - stderr_path: typeof command.stderr_path === "string" ? command.stderr_path : undefined, - }]; - }) - : []; + return Array.isArray(value) + ? value.flatMap((entry) => { + if (!entry || typeof entry !== "object") return []; + const command = entry as Record; + if ( + typeof command.cmd !== "string" || + typeof command.exit_code !== "number" + ) + return []; + return [ + { + cmd: command.cmd, + exit_code: command.exit_code, + stdout_path: + typeof command.stdout_path === "string" + ? command.stdout_path + : undefined, + stderr_path: + typeof command.stderr_path === "string" + ? command.stderr_path + : undefined, + }, + ]; + }) + : []; } function normalizeArtifactRecords(value: unknown): EvidenceArtifactRecord[] { - return Array.isArray(value) - ? value.flatMap((entry) => { - if (!entry || typeof entry !== "object") return []; - const artifact = entry as Record; - if (typeof artifact.path !== "string" || typeof artifact.sha256 !== "string" || typeof artifact.bytes !== "number") return []; - return [{ path: artifact.path, sha256: artifact.sha256, bytes: artifact.bytes }]; - }) - : []; + return Array.isArray(value) + ? value.flatMap((entry) => { + if (!entry || typeof entry !== "object") return []; + const artifact = entry as Record; + if ( + typeof artifact.path !== "string" || + typeof artifact.sha256 !== "string" || + typeof artifact.bytes !== "number" + ) + return []; + return [ + { + path: artifact.path, + sha256: artifact.sha256, + bytes: artifact.bytes, + }, + ]; + }) + : []; } -export function buildArtifactRecords(paths?: string[]): EvidenceArtifactRecord[] { - return (paths ?? []).map((path) => { - const resolvedPath = resolve(path); - const content = readFileSync(resolvedPath); - return { - path: resolvedPath, - sha256: createHash("sha256").update(content).digest("hex"), - bytes: content.length, - }; - }); +export function buildArtifactRecords( + paths?: string[], +): EvidenceArtifactRecord[] { + return (paths ?? []).map((path) => { + const resolvedPath = resolve(path); + const content = readFileSync(resolvedPath); + return { + path: resolvedPath, + sha256: createHash("sha256").update(content).digest("hex"), + bytes: content.length, + }; + }); } export function getEvidenceHistory(task: Task): EvidenceIterationRecord[] { - return Array.isArray(task.metadata?.lgtm_history) - ? task.metadata.lgtm_history.filter((entry: unknown): entry is EvidenceIterationRecord => !!entry && typeof entry === "object") - : []; + return Array.isArray(task.metadata?.lgtm_history) + ? task.metadata.lgtm_history.filter( + (entry: unknown): entry is EvidenceIterationRecord => + !!entry && typeof entry === "object", + ) + : []; } -export function getCurrentEvidenceIteration(task: Task): EvidenceIterationRecord | undefined { - const metadata = task.metadata ?? {}; - if (typeof metadata.lgtm_evidence !== "string") return undefined; - return { - iteration: getEvidenceHistory(task).length + 1, - submitted_at: typeof metadata.lgtm_submitted_at === "string" ? metadata.lgtm_submitted_at : new Date(0).toISOString(), - evidence: metadata.lgtm_evidence, - failure_likely: typeof metadata.lgtm_failure_likely === "string" ? metadata.lgtm_failure_likely : "", - failure_sneaky: typeof metadata.lgtm_failure_sneaky === "string" ? metadata.lgtm_failure_sneaky : "", - failure_unknown: typeof metadata.lgtm_failure_unknown === "string" ? metadata.lgtm_failure_unknown : "", - falsification_test: typeof metadata.lgtm_falsification_test === "string" ? metadata.lgtm_falsification_test : "", - evidence_reasoning: typeof metadata.lgtm_evidence_reasoning === "string" ? metadata.lgtm_evidence_reasoning : "", - verification_hints: Array.isArray(metadata.lgtm_verification_hints) ? metadata.lgtm_verification_hints.filter((hint: unknown): hint is string => typeof hint === "string") : [], - remaining_uncertainty: typeof metadata.lgtm_remaining_uncertainty === "string" ? metadata.lgtm_remaining_uncertainty : "", - commands: normalizeCommandRecords(metadata.lgtm_commands), - evidence_artifacts: normalizeArtifactRecords(metadata.lgtm_evidence_artifacts), - falsification_artifacts: normalizeArtifactRecords(metadata.lgtm_falsification_artifacts), - robot_reviews: getRobotReviews(task), - automatic_review_failure: typeof metadata.robot_review_last_error === "string" - ? { - message: metadata.robot_review_last_error, - raw_output: typeof metadata.robot_review_last_error_output === "string" ? metadata.robot_review_last_error_output : undefined, - } - : undefined, - }; +export function getCurrentEvidenceIteration( + task: Task, +): EvidenceIterationRecord | undefined { + const metadata = task.metadata ?? {}; + if (typeof metadata.lgtm_evidence !== "string") return undefined; + return { + iteration: getEvidenceHistory(task).length + 1, + submitted_at: + typeof metadata.lgtm_submitted_at === "string" + ? metadata.lgtm_submitted_at + : new Date(0).toISOString(), + evidence: metadata.lgtm_evidence, + failure_likely: + typeof metadata.lgtm_failure_likely === "string" + ? metadata.lgtm_failure_likely + : "", + failure_sneaky: + typeof metadata.lgtm_failure_sneaky === "string" + ? metadata.lgtm_failure_sneaky + : "", + failure_unknown: + typeof metadata.lgtm_failure_unknown === "string" + ? metadata.lgtm_failure_unknown + : "", + falsification_test: + typeof metadata.lgtm_falsification_test === "string" + ? metadata.lgtm_falsification_test + : "", + evidence_reasoning: + typeof metadata.lgtm_evidence_reasoning === "string" + ? metadata.lgtm_evidence_reasoning + : "", + verification_hints: Array.isArray(metadata.lgtm_verification_hints) + ? metadata.lgtm_verification_hints.filter( + (hint: unknown): hint is string => typeof hint === "string", + ) + : [], + remaining_uncertainty: + typeof metadata.lgtm_remaining_uncertainty === "string" + ? metadata.lgtm_remaining_uncertainty + : "", + commands: normalizeCommandRecords(metadata.lgtm_commands), + evidence_artifacts: normalizeArtifactRecords( + metadata.lgtm_evidence_artifacts, + ), + falsification_artifacts: normalizeArtifactRecords( + metadata.lgtm_falsification_artifacts, + ), + robot_reviews: getRobotReviews(task), + automatic_review_failure: + typeof metadata.robot_review_last_error === "string" + ? { + message: metadata.robot_review_last_error, + raw_output: + typeof metadata.robot_review_last_error_output === "string" + ? metadata.robot_review_last_error_output + : undefined, + } + : undefined, + }; } export function getEvidenceIterationCount(task: Task): number { - return getEvidenceHistory(task).length + (getCurrentEvidenceIteration(task) ? 1 : 0); + return ( + getEvidenceHistory(task).length + + (getCurrentEvidenceIteration(task) ? 1 : 0) + ); } -export function archiveCurrentEvidence(task: Task, reason: string): Record { - const current = getCurrentEvidenceIteration(task); - if (!current) return {}; - return { - lgtm_history: [ - ...getEvidenceHistory(task), - { - ...current, - superseded_at: new Date().toISOString(), - supersede_reason: reason, - }, - ], - }; -} - -function formatReviewTextBlock(title: string, body: string): string { - return `### ${title}\n\n\`\`\`text\n${body}\n\`\`\``; +export function archiveCurrentEvidence( + task: Task, + reason: string, +): Record { + const current = getCurrentEvidenceIteration(task); + if (!current) return {}; + return { + lgtm_history: [ + ...getEvidenceHistory(task), + { + ...current, + superseded_at: new Date().toISOString(), + supersede_reason: reason, + }, + ], + }; } function presentOrMissing(value: string | undefined): string { - return value && value.trim().length > 0 ? value : "(missing)"; + return value && value.trim().length > 0 ? value : "(missing)"; } -function formatBulletList(title: string, items: string[], empty = "(none)"): string { - return `### ${title}\n${items.length > 0 ? items.map((item) => `- ${item}`).join("\n") : `- ${empty}`}`; +function formatBulletList( + title: string, + items: string[], + empty = "(none)", +): string { + return `### ${title}\n${items.length > 0 ? items.map((item) => `- ${item}`).join("\n") : `- ${empty}`}`; } -function formatCommandRecords(commands: EvidenceCommandRecord[]): string | undefined { - if (commands.length === 0) return undefined; - return `### Commands\n${commands.map((command) => `- \`${command.cmd}\` (exit ${command.exit_code})${command.stdout_path ? ` stdout: ${command.stdout_path}` : ""}${command.stderr_path ? ` stderr: ${command.stderr_path}` : ""}`).join("\n")}`; +function formatCommandRecords( + commands: EvidenceCommandRecord[], +): string | undefined { + if (commands.length === 0) return undefined; + return `### Commands\n${commands.map((command) => `- \`${command.cmd}\` (exit ${command.exit_code})${command.stdout_path ? ` stdout: ${command.stdout_path}` : ""}${command.stderr_path ? ` stderr: ${command.stderr_path}` : ""}`).join("\n")}`; } -function formatArtifactRecords(title: string, artifacts: EvidenceArtifactRecord[]): string | undefined { - if (artifacts.length === 0) return undefined; - return `### ${title}\n${artifacts.map((artifact) => `- ${artifact.path} (${artifact.bytes} bytes, sha256 ${artifact.sha256})`).join("\n")}`; +function formatArtifactRecords( + title: string, + artifacts: EvidenceArtifactRecord[], +): string | undefined { + if (artifacts.length === 0) return undefined; + return `### ${title}\n${artifacts.map((artifact) => `- ${artifact.path} (${artifact.bytes} bytes, sha256 ${artifact.sha256})`).join("\n")}`; } -function renderPlannedEvidence(entry: EvidenceIterationRecord): string { - return [ - "## Planned evidence / UAT", - formatBulletList("Verification hints", entry.verification_hints, "(missing)"), - formatReviewTextBlock("Falsification test", presentOrMissing(entry.falsification_test)), - ].join("\n\n"); +const MAX_INLINE_PROOF_LINES = 16; +const MAX_INLINE_TOOL_LINES = 8; +const MAX_INLINE_REVIEW_ITEMS = 3; + +function truncateProofBlock( + body: string, + maxLines = MAX_INLINE_PROOF_LINES, +): { + preview: string; + truncated: boolean; + totalLines: number; + headLines: number; + tailLines: number; +} { + const lines = body.split("\n"); + if (lines.length <= maxLines) { + return { + preview: body, + truncated: false, + totalLines: lines.length, + headLines: lines.length, + tailLines: 0, + }; + } + const headLines = Math.ceil(maxLines / 2); + const tailLines = Math.floor(maxLines / 2); + const omitted = lines.length - headLines - tailLines; + return { + preview: [ + ...lines.slice(0, headLines), + `[... ${omitted} middle lines omitted ...]`, + ...lines.slice(lines.length - tailLines), + ].join("\n"), + truncated: true, + totalLines: lines.length, + headLines, + tailLines, + }; } -function summarizeJudgement(entry: EvidenceIterationRecord): { title: string; body: string; suggestions: string[] } { - const latestReview = entry.robot_reviews[entry.robot_reviews.length - 1]; - if (latestReview) { - const judgement = latestReview.accepted ? "Accepted" : "Refused"; - const concerns = [ - ...latestReview.observations, - ...latestReview.concerns, - ...latestReview.missing_evidence.map((item) => `Missing evidence: ${item}`), - ]; - const suggestions = latestReview.suggestions.length > 0 - ? latestReview.suggestions - : latestReview.accepted - ? [] - : latestReview.missing_evidence.map((item) => `Strengthen the proof for: ${item}`); - return { - title: judgement, - body: `${judgement} by ${latestReview.reviewer} on ${latestReview.submitted_at}.`, - suggestions: [...concerns, ...suggestions], - }; - } - if (entry.automatic_review_failure) { - return { - title: "Reviewer unavailable", - body: entry.automatic_review_failure.message, - suggestions: [ - "Autonomy continued without blocking completion.", - "Inspect the reviewer failure note if you want a fresh external perspective later.", - ], - }; - } - return { - title: "Pending review", - body: "No judge result recorded yet.", - suggestions: [], - }; +function summarizeList( + items: string[], + maxItems = MAX_INLINE_REVIEW_ITEMS, +): string[] { + if (items.length <= maxItems) return items; + return [ + ...items.slice(0, maxItems), + `(${items.length - maxItems} more omitted)`, + ]; } -function renderAttempt(entry: EvidenceIterationRecord): string { - const judgement = summarizeJudgement(entry); - return [ - `## Attempt ${entry.iteration}`, - formatReviewTextBlock("Submitted evidence", presentOrMissing(entry.evidence)), - `### Judgement\n${judgement.title}\n\n${judgement.body}`, - formatBulletList("Suggestions / concerns", judgement.suggestions, "(none)"), - ].join("\n\n"); +function getEvidenceOverflowPath( + entry: EvidenceIterationRecord, +): string | undefined { + return ( + entry.evidence_artifacts[0]?.path ?? + entry.commands.find((command) => typeof command.stdout_path === "string") + ?.stdout_path ?? + entry.commands.find((command) => typeof command.stderr_path === "string") + ?.stderr_path + ); } -export function renderEvidencePacket(task: Task): string { - const current = getCurrentEvidenceIteration(task); - if (!current) return "(No current proof claim. The agent never called TaskClaimDone, or the prior claim was superseded.)"; +function formatReviewTextBlock( + title: string, + body: string, + options?: { maxLines?: number; overflowPath?: string }, +): string { + const truncated = options?.maxLines + ? truncateProofBlock(body, options.maxLines) + : { + preview: body, + truncated: false, + totalLines: body.split("\n").length, + headLines: body.split("\n").length, + tailLines: 0, + }; + const overflowNote = truncated.truncated + ? `\n\n[truncated at ${options?.maxLines ?? MAX_INLINE_PROOF_LINES} lines from ${truncated.totalLines}; showing first ${truncated.headLines} and last ${truncated.tailLines}; full text: ${options?.overflowPath ?? "(no stored artifact path)"}]` + : ""; + return `### ${title}\n\n\`\`\`text\n${truncated.preview}${overflowNote}\n\`\`\``; +} - return [ - "## Goal", - `Task #${task.id}: ${task.subject}`, - presentOrMissing(task.done_criterion), - renderPlannedEvidence(current), - renderAttempt(current), - formatBulletList("Failure modes", [ - `Likely: ${presentOrMissing(current.failure_likely)}`, - `Sneaky: ${presentOrMissing(current.failure_sneaky)}`, - `Unknown: ${presentOrMissing(current.failure_unknown)}`, - ]), - formatReviewTextBlock("Why this proves success", presentOrMissing(current.evidence_reasoning)), - formatReviewTextBlock("Remaining uncertainty", presentOrMissing(current.remaining_uncertainty)), - formatCommandRecords(current.commands), - formatArtifactRecords("Evidence artifacts", current.evidence_artifacts), - formatArtifactRecords("Falsification artifacts", current.falsification_artifacts), - ].filter((section): section is string => typeof section === "string" && section.length > 0).join("\n\n"); +function formatTaskStatusLine(task: Task): string { + return `Status: ${task.status}`; +} + +function formatTaskToolMetadata( + task: Task, + options?: { updatedFields?: string[] }, +): string { + const current = getCurrentEvidenceIteration(task); + const metadataKeys = Object.keys(getNonReviewMetadata(task)); + return [ + "### Metadata", + `- Completion mode: ${getCompletionMode(task)}`, + `- Review state: ${getReviewState(task)}`, + `- Gate status: ${getGateStatus(task)}`, + options?.updatedFields?.length + ? `- Updated fields: ${options.updatedFields.join(", ")}` + : undefined, + `- Metadata keys: ${metadataKeys.length}`, + `- Proof iterations: ${getEvidenceIterationCount(task)}`, + `- Robot reviews: ${getRobotReviews(task).length}`, + current?.submitted_at + ? `- Submitted at: ${current.submitted_at}` + : undefined, + `- Updated at: ${new Date(task.updatedAt).toISOString()}`, + ] + .filter(Boolean) + .join("\n"); +} + +function renderTaskToolResult( + title: string, + task: Task, + body: string, + options?: { updatedFields?: string[] }, +): string { + return [ + `## ${title} -> Task #${task.id}: ${task.subject}`, + formatTaskStatusLine(task), + formatTaskToolMetadata(task, options), + body, + ].join("\n\n"); +} + +function renderTaskSnapshot( + task: Task, + options?: { + includeDescription?: boolean; + includeDoneCriterion?: boolean; + includeProgressLabel?: boolean; + includeMetadata?: boolean; + }, +): string { + const sections: string[] = []; + if (options?.includeDoneCriterion !== false) { + sections.push( + formatReviewTextBlock( + "Done criterion", + presentOrMissing(task.done_criterion), + { maxLines: MAX_INLINE_TOOL_LINES }, + ), + ); + } + if (options?.includeDescription) { + sections.push( + formatReviewTextBlock("Description", presentOrMissing(task.description), { + maxLines: MAX_INLINE_TOOL_LINES, + }), + ); + } + if (options?.includeProgressLabel && task.progress_label) { + sections.push( + formatReviewTextBlock("Progress label", task.progress_label, { + maxLines: MAX_INLINE_TOOL_LINES, + }), + ); + } + if (options?.includeMetadata) { + const metadata = getNonReviewMetadata(task); + if (Object.keys(metadata).length > 0) { + sections.push( + formatReviewTextBlock( + "Metadata preview", + JSON.stringify(metadata, null, 2), + { maxLines: MAX_INLINE_TOOL_LINES }, + ), + ); + } + } + return sections.join("\n\n"); +} + +function renderTaskUpdateSummary( + before: Task | undefined, + task: Task, + changedFields: string[], + metadataPatch?: Record, +): string { + const lines = ["### Changes"]; + for (const field of changedFields) { + if (field === "status") { + lines.push( + `- status: ${before?.status ?? "(missing)"} -> ${task.status}`, + ); + continue; + } + if (field === "subject") { + lines.push( + `- subject: ${before?.subject ?? "(missing)"} -> ${task.subject}`, + ); + continue; + } + if (field === "progress_label") { + lines.push( + `- progress_label: ${before?.progress_label ?? "(missing)"} -> ${task.progress_label ?? "(missing)"}`, + ); + continue; + } + if (field === "description") { + lines.push( + formatReviewTextBlock( + "Description", + presentOrMissing(task.description), + { maxLines: MAX_INLINE_TOOL_LINES }, + ), + ); + continue; + } + if (field === "done_criterion") { + lines.push( + formatReviewTextBlock( + "Done criterion", + presentOrMissing(task.done_criterion), + { maxLines: MAX_INLINE_TOOL_LINES }, + ), + ); + continue; + } + if (field === "metadata") { + const metadata = metadataPatch ?? getNonReviewMetadata(task); + lines.push( + formatReviewTextBlock( + "Metadata patch", + JSON.stringify(metadata, null, 2), + { maxLines: MAX_INLINE_TOOL_LINES }, + ), + ); + continue; + } + if (field === "blocks") { + lines.push( + `- blocks: ${task.blocks.length > 0 ? task.blocks.map((id) => `#${id}`).join(", ") : "(none)"}`, + ); + continue; + } + if (field === "blockedBy") { + lines.push( + `- blockedBy: ${task.blockedBy.length > 0 ? task.blockedBy.map((id) => `#${id}`).join(", ") : "(none)"}`, + ); + continue; + } + lines.push(`- ${field}`); + } + return lines.join("\n"); +} + +function renderCompactRobotReview(review: RobotReviewRecord): string { + return [ + `### Judge`, + `${review.accepted ? "Accepted" : "Refused"} by ${review.reviewer} on ${review.submitted_at}.`, + `Evidence complete: ${review.evidence_complete ? "yes" : "no"}`, + `Evidence convincing: ${review.evidence_convincing ? "yes" : "no"}`, + review.observations.length > 0 + ? formatBulletList("Observations", summarizeList(review.observations)) + : "", + review.concerns.length > 0 + ? formatBulletList("Concerns", summarizeList(review.concerns)) + : "", + review.missing_evidence.length > 0 + ? formatBulletList( + "Missing evidence", + summarizeList(review.missing_evidence), + ) + : "", + review.suggestions.length > 0 + ? formatBulletList("Suggestions", summarizeList(review.suggestions)) + : "", + ] + .filter(Boolean) + .join("\n\n"); +} + +function renderCurrentProofSummary(task: Task): string { + const sections = [renderEvidencePacket(task)]; + const latestReview = getLatestRobotReview(task); + if (latestReview) sections.push(renderCompactRobotReview(latestReview)); + const automaticReviewFailure = renderAutomaticReviewFailure(task); + if (automaticReviewFailure) sections.push(automaticReviewFailure); + return sections.join("\n\n"); +} + +function renderPlannedEvidence( + entry: EvidenceIterationRecord, + options?: { truncateFalsification?: boolean }, +): string { + return [ + "### Verify", + formatBulletList( + "Verification hints", + entry.verification_hints, + "(missing)", + ), + formatReviewTextBlock( + "Falsification test", + presentOrMissing(entry.falsification_test), + options?.truncateFalsification === false + ? undefined + : { + maxLines: MAX_INLINE_PROOF_LINES, + overflowPath: entry.falsification_artifacts[0]?.path, + }, + ), + ].join("\n\n"); +} + +function summarizeJudgement(entry: EvidenceIterationRecord): { + title: string; + body: string; + observations: string[]; + concerns: string[]; + suggestions: string[]; + missingEvidence: string[]; +} { + const latestReview = entry.robot_reviews[entry.robot_reviews.length - 1]; + if (latestReview) { + return { + title: latestReview.accepted ? "Accepted" : "Refused", + body: `${latestReview.accepted ? "Accepted" : "Refused"} by ${latestReview.reviewer} on ${latestReview.submitted_at}.`, + observations: latestReview.observations, + concerns: latestReview.concerns, + suggestions: + latestReview.suggestions.length > 0 + ? latestReview.suggestions + : latestReview.accepted + ? [] + : latestReview.missing_evidence.map( + (item) => `Strengthen the proof for: ${item}`, + ), + missingEvidence: latestReview.missing_evidence, + }; + } + if (entry.automatic_review_failure) { + return { + title: "Reviewer unavailable", + body: entry.automatic_review_failure.message, + observations: [], + concerns: [], + suggestions: [ + "Autonomy continued without blocking completion.", + "Inspect the reviewer failure note if you want a fresh external perspective later.", + ], + missingEvidence: [], + }; + } + return { + title: "Pending review", + body: "No judge result recorded yet.", + observations: [], + concerns: [], + suggestions: [], + missingEvidence: [], + }; +} + +function renderAttempt( + entry: EvidenceIterationRecord, + options?: { truncateEvidence?: boolean; truncateFalsification?: boolean }, +): string { + const judgement = summarizeJudgement(entry); + const evidenceBlock = + options?.truncateEvidence === false + ? formatReviewTextBlock("Evidence", presentOrMissing(entry.evidence)) + : formatReviewTextBlock("Evidence", presentOrMissing(entry.evidence), { + maxLines: MAX_INLINE_PROOF_LINES, + overflowPath: getEvidenceOverflowPath(entry), + }); + return [ + `## Attempt ${entry.iteration}`, + evidenceBlock, + renderPlannedEvidence(entry, options), + "### Check notes", + `- likely wrong: ${presentOrMissing(entry.failure_likely)}`, + `- sneaky wrong: ${presentOrMissing(entry.failure_sneaky)}`, + `- unknown left: ${presentOrMissing(entry.failure_unknown)}`, + `- why this counts: ${presentOrMissing(entry.evidence_reasoning)}`, + `- remaining uncertainty: ${presentOrMissing(entry.remaining_uncertainty)}`, + `### Judgement\n${judgement.title}\n\n${judgement.body}`, + judgement.observations.length > 0 + ? formatBulletList("Observations", summarizeList(judgement.observations)) + : "", + judgement.concerns.length > 0 + ? formatBulletList("Concerns", summarizeList(judgement.concerns)) + : "", + judgement.missingEvidence.length > 0 + ? formatBulletList( + "Missing evidence", + summarizeList(judgement.missingEvidence), + ) + : "", + judgement.suggestions.length > 0 + ? formatBulletList("Suggestions", summarizeList(judgement.suggestions)) + : "", + ] + .filter(Boolean) + .join("\n\n"); +} + +export function renderEvidencePacket( + task: Task, + options?: { truncateEvidence?: boolean; truncateFalsification?: boolean }, +): string { + const current = getCurrentEvidenceIteration(task); + if (!current) + return "(No current proof claim. The agent never called TaskClaimDone, or the prior claim was superseded.)"; + + return [ + "## Goal", + `Task #${task.id}: ${task.subject}`, + `Done criterion: ${presentOrMissing(task.done_criterion)}`, + renderAttempt(current, options), + formatCommandRecords(current.commands), + formatArtifactRecords("Evidence artifacts", current.evidence_artifacts), + formatArtifactRecords( + "Falsification artifacts", + current.falsification_artifacts, + ), + ] + .filter( + (section): section is string => + typeof section === "string" && section.length > 0, + ) + .join("\n\n"); } function renderAutomaticReviewFailure(task: Task): string | undefined { - if (typeof task.metadata?.robot_review_last_error !== "string") return undefined; - const sections = [`### Automatic robot review failure\n${task.metadata.robot_review_last_error}`]; - if (typeof task.metadata?.robot_review_last_error_output === "string" && task.metadata.robot_review_last_error_output.trim()) { - sections.push(formatReviewTextBlock("Reviewer raw output", task.metadata.robot_review_last_error_output)); - } - return sections.join("\n\n"); + if (typeof task.metadata?.robot_review_last_error !== "string") + return undefined; + const sections = [ + `### Automatic robot review failure\n${task.metadata.robot_review_last_error}`, + ]; + if ( + typeof task.metadata?.robot_review_last_error_output === "string" && + task.metadata.robot_review_last_error_output.trim() + ) { + sections.push( + formatReviewTextBlock( + "Reviewer raw output", + task.metadata.robot_review_last_error_output, + { maxLines: MAX_INLINE_PROOF_LINES }, + ), + ); + } + return sections.join("\n\n"); } export function renderProofLog(task: Task): string { - const history = getEvidenceHistory(task); - const attempts = history.map(renderAttempt); - const current = getCurrentEvidenceIteration(task); - const lines = [ - `# Task #${task.id}: ${task.subject}`, - `Status: ${task.status}`, - `Gate status: ${getGateStatus(task)}`, - "", - "## Goal", - presentOrMissing(task.done_criterion), - ]; - if (current) { - lines.push("", renderPlannedEvidence(current), "", ...attempts, renderAttempt(current)); - } else if (attempts.length > 0) { - lines.push("", ...attempts); - } else { - lines.push("", "(No current proof claim.)"); - } - return lines.join("\n"); + const history = getEvidenceHistory(task); + const attempts = history.map((entry) => renderAttempt(entry)); + const current = getCurrentEvidenceIteration(task); + const lines = [ + `# Task #${task.id}: ${task.subject}`, + `Status: ${task.status}`, + `Gate status: ${getGateStatus(task)}`, + "", + "## Goal", + `Done criterion: ${presentOrMissing(task.done_criterion)}`, + ]; + if (current) { + lines.push("", ...attempts, renderAttempt(current)); + } else if (attempts.length > 0) { + lines.push("", ...attempts); + } else { + lines.push("", "(No current proof claim.)"); + } + return lines.join("\n"); } function getNonReviewMetadata(task: Task): Record { - return Object.fromEntries( - Object.entries(task.metadata ?? {}).filter(([key]) => - !key.startsWith("lgtm_") && !key.startsWith("robot_review_") && key !== "lgtm_history" - ), - ); + return Object.fromEntries( + Object.entries(task.metadata ?? {}).filter( + ([key]) => + !key.startsWith("lgtm_") && + !key.startsWith("robot_review_") && + key !== "lgtm_history" && + key !== "robot_reviews", + ), + ); } function formatHistorySummary(task: Task): string | undefined { - const history = getEvidenceHistory(task); - if (history.length === 0) return undefined; - return `Superseded evidence:\n${history.map((entry) => `- #${entry.iteration} superseded ${entry.superseded_at ?? "?"}: ${entry.supersede_reason ?? "(no reason recorded)"}`).join("\n")}`; + const history = getEvidenceHistory(task); + if (history.length === 0) return undefined; + return `Superseded evidence:\n${history.map((entry) => `- #${entry.iteration} superseded ${entry.superseded_at ?? "?"}: ${entry.supersede_reason ?? "(no reason recorded)"}`).join("\n")}`; } -export function extractRobotReviewJson(output: string): Record { - const match = output.match(/ROBOT_REVIEW_JSON_START\s*([\s\S]*?)\s*ROBOT_REVIEW_JSON_END/); - const source = match ? match[1] : output; - const candidates = [ - source.trim(), - stripMarkdownCodeFence(source), - extractBalancedJsonObject(source) ?? "", - extractBalancedJsonObject(stripMarkdownCodeFence(source)) ?? "", - ].filter(Boolean); +export function extractRobotReviewJson( + output: string, +): Record { + const match = output.match( + /ROBOT_REVIEW_JSON_START\s*([\s\S]*?)\s*ROBOT_REVIEW_JSON_END/, + ); + const source = match ? match[1] : output; + const candidates = [ + source.trim(), + stripMarkdownCodeFence(source), + extractBalancedJsonObject(source) ?? "", + extractBalancedJsonObject(stripMarkdownCodeFence(source)) ?? "", + ].filter(Boolean); - let lastError: unknown; - for (const candidate of [...new Set(candidates)]) { - try { - return JSON.parse(candidate) as Record; - } catch (error) { - lastError = error; - } - } + let lastError: unknown; + for (const candidate of [...new Set(candidates)]) { + try { + return JSON.parse(candidate) as Record; + } catch (error) { + lastError = error; + } + } - const prefix = match - ? "Robot reviewer returned invalid JSON" - : "Robot reviewer did not return the expected JSON markers or a parseable JSON object"; - const detail = lastError instanceof Error ? `: ${lastError.message}` : ""; - throw new Error(`${prefix}${detail}. Raw output: ${summarizeRawOutput(output)}`); -} - -function formatRobotReview(review: RobotReviewRecord): string { - const parts = [ - `Robot review #${review.iteration} (${review.submitted_at})`, - `Reviewer: ${review.reviewer}${review.mode === "auto" ? " [auto]" : ""}`, - `Scope: ${review.scope}`, - ]; - if (review.rubric) { - const rubricLines = Object.entries(review.rubric).map(([key, val]) => - ` ${val.pass ? "PASS" : "FAIL"} ${key}: ${val.reason}` - ); - parts.push(`Rubric:\n${rubricLines.join("\n")}`); - } - parts.push( - `Accepted: ${review.accepted ? "yes" : "no"}`, - `Evidence complete: ${review.evidence_complete ? "yes" : "no"}`, - `Evidence convincing: ${review.evidence_convincing ? "yes" : "no"}`, - `Observations:\n- ${review.observations.join("\n- ")}`, - ); - if (review.concerns.length > 0) parts.push(`Concerns:\n- ${review.concerns.join("\n- ")}`); - if (review.suggestions.length > 0) parts.push(`Suggestions:\n- ${review.suggestions.join("\n- ")}`); - if (review.missing_evidence.length > 0) parts.push(`Missing evidence:\n- ${review.missing_evidence.join("\n- ")}`); - if (review.blind_spots) parts.push(`Blind spots: ${review.blind_spots}`); - return parts.join("\n"); + const prefix = match + ? "Robot reviewer returned invalid JSON" + : "Robot reviewer did not return the expected JSON markers or a parseable JSON object"; + const detail = lastError instanceof Error ? `: ${lastError.message}` : ""; + throw new Error( + `${prefix}${detail}. Raw output: ${summarizeRawOutput(output)}`, + ); } export function buildRobotReviewPrompt(task: Task): string { - return [ - "You are a fresh validation judge for a Hermes-style proof log.", - "Question: in retrospect, does this evidence prove success for the stated goal?", - "If not, say no and explain what the agent should do next. Suggestions are advisory guidance, not a separate gate.", - "", - "## Critical: Evidence must be verbatim", - "", - "Evidence should contain literal output, exact log lines, markdown block quotes, table rows, and URLs, not summaries or interpretations.", - "A human must be able to inspect the evidence alone without re-running anything.", - "", - "## Rubric (rate each item pass/fail)", - "", - "1. evidence_covers_done_criterion: Does the evidence directly address the stated done criterion?", - "2. falsification_test_runnable: Is the falsification test concrete enough that someone could run it and get a yes/no result?", - "3. failure_modes_addressed: Are the likely, sneaky, and unknown failure modes plausible enough to guide evidence choice?", - "4. evidence_distinguishes_success: Does the agent explain why the evidence distinguishes success from those failure modes?", - "5. verification_hints_actionable: Can a human follow the verification hints to inspect the claim without re-running experiments?", - "", - "Set evidence_complete=true only if items 1 and 2 pass.", - "Set evidence_convincing=true only if items 1 and 2 pass. Item 4 is advisory unless it reveals that items 1 or 2 were overstated.", - "Set accepted=true only if items 1, 2, 3, and 4 pass. Do not reject solely because verification hints are weak if the verbatim evidence already proves the done criterion.", - "", - "observations: what you saw in the packet.", - "concerns: concise reasons the current evidence may not prove success yet.", - "suggestions: what the agent should do next if the evidence is not yet enough. Nonblocking guidance only.", - "missing_evidence: ONLY items from the rubric that failed. Do NOT add new dimensions.", - "", - "Return exactly one JSON object between the markers ROBOT_REVIEW_JSON_START and ROBOT_REVIEW_JSON_END.", - "JSON schema:", - '{"reviewer":"string","scope":"string","rubric":{"evidence_covers_done_criterion":{"reason":"...","pass":true},"falsification_test_runnable":{"reason":"...","pass":true},"failure_modes_addressed":{"reason":"...","pass":true},"evidence_distinguishes_success":{"reason":"...","pass":true},"verification_hints_actionable":{"reason":"...","pass":true}},"observations":["string"],"concerns":["string"],"suggestions":["string"],"blind_spots":"string","missing_evidence":["string"],"evidence_complete":true,"evidence_convincing":true,"accepted":true}', - "", - "You are reviewing exactly the same proof packet shown by TaskGet and /lgtm. Do not assume hidden context beyond this packet.", - "", - renderEvidencePacket(task), - "Output format:", - "ROBOT_REVIEW_JSON_START", - '{"reviewer":"...","scope":"...","rubric":{...},"observations":["..."],"concerns":["..."],"suggestions":["..."],"blind_spots":"...","missing_evidence":["..."],"evidence_complete":true,"evidence_convincing":true,"accepted":true}', - "ROBOT_REVIEW_JSON_END", - ].join("\n"); + return [ + "You are a fresh validation judge for a Hermes-style proof log.", + "Question: does this packet prove the exact user-visible success condition in the done criterion?", + "If the done criterion asks for a specific output or direction of change, check that the quoted output actually shows that result, not merely that a command ran.", + "If not, say no and explain what concrete output is still missing. Suggestions are advisory guidance, not a separate gate.", + "", + "## Critical: Evidence must be verbatim", + "", + "Evidence should contain literal output, exact log lines, markdown block quotes, table rows, and URLs, not summaries or interpretations.", + "A human must be able to inspect the evidence alone without re-running anything.", + "", + "## Rubric (rate each item pass/fail)", + "", + "1. evidence_covers_done_criterion: Does the packet show the concrete observable thing the done criterion asks for, in the right direction or state?", + "2. falsification_test_runnable: Is there a concrete check with literal output that would come out differently if the claim were wrong?", + "3. failure_modes_addressed: Are the likely, sneaky, and unknown failure modes plausible enough to guide what evidence matters? Advisory.", + "4. evidence_distinguishes_success: Does the packet explain, at least briefly, why the shown evidence rules out the main failure modes? Advisory.", + "5. verification_hints_actionable: Can a human inspect the claim without re-running everything? Advisory.", + "", + "Set evidence_complete=true only if items 1 and 2 pass.", + "Set evidence_convincing=true if items 1 and 2 pass and you do not see a concrete contradiction in the packet.", + "Set accepted=true if items 1 and 2 pass and you do not see a concrete contradiction in the packet. Do not reject solely because items 3, 4, or 5 are weak if the verbatim evidence already proves the done criterion.", + "", + "observations: what you literally saw in the packet.", + "When rejecting, prefer missing outputs like 'nll_val never decreases in the quoted log' over process complaints like 'too much text'.", + "concerns: concise reasons the current evidence may not prove success yet.", + "suggestions: what the agent should do next if the evidence is not yet enough. Keep this short, ideally 1-3 bullets.", + "missing_evidence: concrete missing artifacts, command outputs, written-file checks, or observations that block acceptance. Prefer phrases like 'literal pytest output' or 'contents of output.json', not abstract rubric labels.", + "", + "Return exactly one JSON object between the markers ROBOT_REVIEW_JSON_START and ROBOT_REVIEW_JSON_END.", + "JSON schema:", + '{"reviewer":"string","scope":"string","rubric":{"evidence_covers_done_criterion":{"reason":"...","pass":true},"falsification_test_runnable":{"reason":"...","pass":true},"failure_modes_addressed":{"reason":"...","pass":true},"evidence_distinguishes_success":{"reason":"...","pass":true},"verification_hints_actionable":{"reason":"...","pass":true}},"observations":["string"],"concerns":["string"],"suggestions":["string"],"blind_spots":"string","missing_evidence":["string"],"evidence_complete":true,"evidence_convincing":true,"accepted":true}', + "", + "You are reviewing exactly the same proof packet shown by TaskGet and /lgtm. Do not assume hidden context beyond this packet.", + "", + renderEvidencePacket(task, { truncateEvidence: false }), + "Output format:", + "ROBOT_REVIEW_JSON_START", + '{"reviewer":"...","scope":"...","rubric":{...},"observations":["..."],"concerns":["..."],"suggestions":["..."],"blind_spots":"...","missing_evidence":["..."],"evidence_complete":true,"evidence_convincing":true,"accepted":true}', + "ROBOT_REVIEW_JSON_END", + ].join("\n"); } async function runAutomaticRobotReview( - task: any, - signal?: AbortSignal, - currentModelRef?: string, + task: any, + signal?: AbortSignal, + currentModelRef?: string, ): Promise<{ review: Omit; command: string }> { - if (!currentModelRef) { - throw new Error("Automatic robot review requires an active current session model."); - } - const prompt = buildRobotReviewPrompt(task); - // Keep reviewer model selection simple: reuse the active session model in a fresh Pi process. - // This avoids picking a registry-listed judge model that exists but lacks working auth. - const args = ["--mode", "json", "-p", "--no-session", "--no-tools", "--no-extensions", "--model", currentModelRef]; - args.push(prompt); - const invocation = getPiInvocation(args); - const timeoutMs = getRobotReviewTimeoutMs(); - const commandLabel = `${invocation.command} ${invocation.args.slice(0, -1).join(" ")}`; - const result = await runRobotReviewCommand(invocation, signal, timeoutMs); - if (result.exitCode !== 0) { - const error = new Error(`Robot reviewer failed (${result.exitCode ?? "?"}): ${(result.stderr || result.stdout).trim()}`) as Error & { rawOutput?: string }; - error.rawOutput = (result.stderr || result.stdout).trim(); - throw error; - } - let parsed: Record; - try { - parsed = extractRobotReviewJson(result.stdout); - } catch (error) { - const wrapped = new Error(error instanceof Error ? error.message : String(error)) as Error & { rawOutput?: string }; - wrapped.rawOutput = result.stdout.trim(); - throw wrapped; - } - const observations = Array.isArray(parsed.observations) ? parsed.observations.filter((item): item is string => typeof item === "string") : []; - if (observations.length === 0) { - const error = new Error("Robot reviewer returned no observations.") as Error & { rawOutput?: string }; - error.rawOutput = result.stdout.trim(); - throw error; - } - const concerns = Array.isArray(parsed.concerns) ? parsed.concerns.filter((item): item is string => typeof item === "string") : []; - const suggestions = Array.isArray(parsed.suggestions) ? parsed.suggestions.filter((item): item is string => typeof item === "string") : []; - const rawMissing: string[] = Array.isArray(parsed.missing_evidence) - ? parsed.missing_evidence.filter((item): item is string => typeof item === "string") - : []; - const missing_evidence = rawMissing; - // Extract rubric with per-item reasoning - let rubric: Record | undefined; - if (parsed.rubric && typeof parsed.rubric === "object") { - const r: Record = {}; - for (const [key, val] of Object.entries(parsed.rubric as Record)) { - if (val && typeof val === "object" && "reason" in (val as any) && "pass" in (val as any)) { - const v = val as { reason: unknown; pass: unknown }; - r[key] = { reason: typeof v.reason === "string" ? v.reason : "", pass: v.pass === true }; - } - } - if (Object.keys(r).length > 0) rubric = r; - } - const review = relaxAdvisoryVerificationHints({ - reviewer: typeof parsed.reviewer === "string" ? parsed.reviewer : commandLabel, - scope: typeof parsed.scope === "string" ? parsed.scope : "task evidence package", - observations, - concerns, - suggestions, - blind_spots: typeof parsed.blind_spots === "string" ? parsed.blind_spots : "not stated", - accepted: typeof parsed.accepted === "boolean" - ? parsed.accepted - : parsed.evidence_complete === true && parsed.evidence_convincing === true, - evidence_complete: parsed.evidence_complete === true, - evidence_convincing: parsed.evidence_convincing === true, - missing_evidence, - submitted_at: new Date().toISOString(), - mode: "auto", - raw_output: result.stdout.trim(), - rubric, - }); - return { - command: commandLabel, - review, - }; + if (!currentModelRef) { + throw new Error( + "Automatic robot review requires an active current session model.", + ); + } + const prompt = buildRobotReviewPrompt(task); + // Keep reviewer model selection simple: reuse the active session model in a fresh Pi process. + // This avoids picking a registry-listed judge model that exists but lacks working auth. + const args = [ + "--mode", + "json", + "-p", + "--no-session", + "--no-tools", + "--no-extensions", + "--model", + currentModelRef, + ]; + args.push(prompt); + const invocation = getPiInvocation(args); + const timeoutMs = getRobotReviewTimeoutMs(); + const commandLabel = `${invocation.command} ${invocation.args.slice(0, -1).join(" ")}`; + const result = await runRobotReviewCommand(invocation, signal, timeoutMs); + if (result.exitCode !== 0) { + const error = new Error( + `Robot reviewer failed (${result.exitCode ?? "?"}): ${(result.stderr || result.stdout).trim()}`, + ) as Error & { rawOutput?: string }; + error.rawOutput = (result.stderr || result.stdout).trim(); + throw error; + } + let parsed: Record; + try { + parsed = extractRobotReviewJson(result.stdout); + } catch (error) { + const wrapped = new Error( + error instanceof Error ? error.message : String(error), + ) as Error & { rawOutput?: string }; + wrapped.rawOutput = result.stdout.trim(); + throw wrapped; + } + const observations = Array.isArray(parsed.observations) + ? parsed.observations.filter( + (item): item is string => typeof item === "string", + ) + : []; + if (observations.length === 0) { + const error = new Error( + "Robot reviewer returned no observations.", + ) as Error & { rawOutput?: string }; + error.rawOutput = result.stdout.trim(); + throw error; + } + const concerns = Array.isArray(parsed.concerns) + ? parsed.concerns.filter((item): item is string => typeof item === "string") + : []; + const suggestions = Array.isArray(parsed.suggestions) + ? parsed.suggestions.filter( + (item): item is string => typeof item === "string", + ) + : []; + const rawMissing: string[] = Array.isArray(parsed.missing_evidence) + ? parsed.missing_evidence.filter( + (item): item is string => typeof item === "string", + ) + : []; + const missing_evidence = rawMissing; + // Extract rubric with per-item reasoning + let rubric: Record | undefined; + if (parsed.rubric && typeof parsed.rubric === "object") { + const r: Record = {}; + for (const [key, val] of Object.entries( + parsed.rubric as Record, + )) { + if ( + val && + typeof val === "object" && + "reason" in (val as any) && + "pass" in (val as any) + ) { + const v = val as { reason: unknown; pass: unknown }; + r[key] = { + reason: typeof v.reason === "string" ? v.reason : "", + pass: v.pass === true, + }; + } + } + if (Object.keys(r).length > 0) rubric = r; + } + const review = relaxAdvisoryVerificationHints({ + reviewer: + typeof parsed.reviewer === "string" ? parsed.reviewer : commandLabel, + scope: + typeof parsed.scope === "string" ? parsed.scope : "task evidence package", + observations, + concerns, + suggestions, + blind_spots: + typeof parsed.blind_spots === "string" + ? parsed.blind_spots + : "not stated", + accepted: + typeof parsed.accepted === "boolean" + ? parsed.accepted + : parsed.evidence_complete === true && + parsed.evidence_convincing === true, + evidence_complete: parsed.evidence_complete === true, + evidence_convincing: parsed.evidence_convincing === true, + missing_evidence, + submitted_at: new Date().toISOString(), + mode: "auto", + raw_output: result.stdout.trim(), + rubric, + }); + return { + command: commandLabel, + review, + }; } const SYSTEM_REMINDER = ` -Task tools haven't been used recently. Check the task list and keep it accurate: -- Mark tasks in_progress when you start them (TaskUpdate status=in_progress). +The user is trusting you to be autonomous and work towards acheiving these goals. + +Goal tools haven't been used in a while, so check the goal list and keep it accurate: +- Progress existing open goals before drifting to unrelated work. +- Treat rejected proof-gated top-level goals as needing immediate follow-up: strengthen proof, block, supersede, or delete them explicitly. +- Mark goals in_progress when you start them (TaskUpdate status=in_progress). - Complete subtasks directly: TaskUpdate(status=completed). Drop irrelevant ones with status=deleted. -- Complete top-level tasks with TaskClaimDone: include verbatim evidence, likely/subtle/unknown failure modes, falsification test, and remaining uncertainty. Explicit rejection keeps the task open; reviewer infrastructure failures are logged but do not block autonomy. -A stale list is worse than no list. Ignore this reminder if not applicable. Never mention it to the user. +A stale goal list is worse than no goal list. Ignore this reminder if not applicable. Never mention it to the user. `; export default function (pi: ExtensionAPI) { - const cfg = loadTasksConfig(); - const piTasks = process.env.PI_TASKS; - const taskScope = cfg.taskScope ?? "session"; + const cfg = loadTasksConfig(); + const piTasks = process.env.PI_TASKS; + const taskScope = cfg.taskScope ?? "session"; - function resolveStorePath(sessionId?: string): string | undefined { - if (piTasks === "off") return undefined; - if (piTasks?.startsWith("/")) return piTasks; - if (piTasks?.startsWith(".")) return resolve(piTasks); - if (piTasks) return piTasks; - if (taskScope === "memory") return undefined; - if (taskScope === "session" && sessionId) { - return join(process.cwd(), ".pi", "tasks", `tasks-${sessionId}.json`); - } - if (taskScope === "session") return undefined; - return join(process.cwd(), ".pi", "tasks", "tasks.json"); - } + function resolveStorePath(sessionId?: string): string | undefined { + if (piTasks === "off") return undefined; + if (piTasks?.startsWith("/")) return piTasks; + if (piTasks?.startsWith(".")) return resolve(piTasks); + if (piTasks) return join(process.cwd(), ".pi", "tasks", `${piTasks}.json`); + if (taskScope === "memory") return undefined; + if (taskScope === "session" && sessionId) { + return join(process.cwd(), ".pi", "tasks", `tasks-${sessionId}.json`); + } + if (taskScope === "session") return undefined; + return join(process.cwd(), ".pi", "tasks", "tasks.json"); + } - let store = new TaskStore(resolveStorePath()); - const widget = new TaskWidget(store); - const autoClear = new AutoClearManager(() => store, () => cfg.autoClearCompleted ?? "on_list_complete", AUTO_CLEAR_DELAY); + let store = new TaskStore(resolveStorePath()); + const widget = new TaskWidget(store); + const autoClear = new AutoClearManager( + () => store, + () => cfg.autoClearCompleted ?? "never", + AUTO_CLEAR_DELAY, + ); - let storeUpgraded = false; - let persistedTasksShown = false; - function upgradeStoreIfNeeded(ctx: ExtensionContext) { - if (storeUpgraded) return; - if (taskScope === "session" && !piTasks) { - const sessionId = ctx.sessionManager.getSessionId(); - const path = resolveStorePath(sessionId); - store = new TaskStore(path); - widget.setStore(store); - } - storeUpgraded = true; - } + let storeUpgraded = false; + let persistedTasksShown = false; + function upgradeStoreIfNeeded(ctx: ExtensionContext) { + if (storeUpgraded) return; + if (taskScope === "session" && !piTasks) { + const sessionId = ctx.sessionManager.getSessionId(); + const path = resolveStorePath(sessionId); + store = new TaskStore(path); + widget.setStore(store); + } + storeUpgraded = true; + } - function showPersistedTasks(isResume = false) { - if (persistedTasksShown) return; - persistedTasksShown = true; - const tasks = store.list(); - if (tasks.length > 0) { - if (!isResume && tasks.every(t => t.status === "completed")) { - store.clearCompleted(); - if (taskScope === "session") store.deleteFileIfEmpty(); - } else { - widget.update(); - } - } - } + function showPersistedTasks(_isResume = false) { + if (persistedTasksShown) return; + persistedTasksShown = true; + const tasks = store.list(); + if (tasks.length > 0) widget.update(); + } - let currentTurn = 0; - let lastTaskToolUseTurn = 0; - let reminderInjectedThisCycle = false; + let currentTurn = 0; + let lastTaskToolUseTurn = 0; + let reminderInjectedThisCycle = false; - pi.on("turn_start", async (_event, ctx) => { - currentTurn++; - widget.setUICtx(ctx.ui as UICtx); - upgradeStoreIfNeeded(ctx); - if (autoClear.onTurnStart(currentTurn)) widget.update(); - }); + pi.on("turn_start", async (_event, ctx) => { + currentTurn++; + widget.setUICtx(ctx.ui as UICtx); + upgradeStoreIfNeeded(ctx); + if (autoClear.onTurnStart(currentTurn)) widget.update(); + }); - pi.on("turn_end", async (event) => { - const msg = event.message as any; - if (msg?.role === "assistant" && msg.usage) { - widget.addTokenUsage(msg.usage.input ?? 0, msg.usage.output ?? 0); - } - }); + pi.on("turn_end", async (event) => { + const msg = event.message as any; + if (msg?.role === "assistant" && msg.usage) { + widget.addTokenUsage(msg.usage.input ?? 0, msg.usage.output ?? 0); + } + }); - pi.on("tool_result", async (event) => { - if (TASK_TOOL_NAMES.has(event.toolName)) { - lastTaskToolUseTurn = currentTurn; - reminderInjectedThisCycle = false; - return {}; - } - if (currentTurn - lastTaskToolUseTurn < REMINDER_INTERVAL) return {}; - if (reminderInjectedThisCycle) return {}; - const tasks = store.list(); - if (tasks.length === 0) return {}; - reminderInjectedThisCycle = true; - lastTaskToolUseTurn = currentTurn; - return { content: [...event.content, { type: "text" as const, text: SYSTEM_REMINDER }] }; - }); + pi.on("tool_result", async (event) => { + if (TASK_TOOL_NAMES.has(event.toolName)) { + lastTaskToolUseTurn = currentTurn; + reminderInjectedThisCycle = false; + return {}; + } + if (currentTurn - lastTaskToolUseTurn < REMINDER_INTERVAL) return {}; + if (reminderInjectedThisCycle) return {}; + const tasks = store.list(); + if (tasks.length === 0) return {}; + reminderInjectedThisCycle = true; + lastTaskToolUseTurn = currentTurn; + return { + content: [ + ...event.content, + { type: "text" as const, text: SYSTEM_REMINDER }, + ], + }; + }); - pi.on("before_agent_start", async (_event, ctx) => { - widget.setUICtx(ctx.ui as UICtx); - upgradeStoreIfNeeded(ctx); - showPersistedTasks(); - }); + pi.on("before_agent_start", async (_event, ctx) => { + widget.setUICtx(ctx.ui as UICtx); + upgradeStoreIfNeeded(ctx); + showPersistedTasks(); + }); - pi.on("before_agent_start", async (event) => { - const followups = store.list().flatMap(task => { - const latest = getLatestRobotReview(task); - return latest && !latest.accepted ? [{ task, latest }] : []; - }); - if (followups.length === 0) return undefined; + pi.on("before_agent_start", async (event) => { + const followups = store.list().flatMap((task) => { + const latest = getLatestRobotReview(task); + return latest && !latest.accepted ? [{ task, latest }] : []; + }); + if (followups.length === 0) return undefined; - const reminder = followups.map(({ task, latest }) => { - const missing = latest.missing_evidence.length > 0 - ? ` Missing evidence: ${latest.missing_evidence.join("; ")}.` - : ""; - return `- Task #${task.id} ${task.subject}: latest proof review rejected the evidence.${missing} Strengthen the evidence and call TaskClaimDone again.`; - }).join("\n"); + const reminder = followups + .map(({ task, latest }) => { + const missing = + latest.missing_evidence.length > 0 + ? ` Missing evidence: ${latest.missing_evidence.join("; ")}.` + : ""; + return `- Task #${task.id} ${task.subject}: latest proof review rejected the evidence.${missing} Strengthen the evidence and call TaskClaimDone again.`; + }) + .join("\n"); - return { - systemPrompt: - event.systemPrompt + - `\n\n\nLatest proof review follow-up required:\n${reminder}\nDo not complete the top-level task until the latest proof review accepts the evidence.\n\n`, - }; - }); + return { + systemPrompt: + event.systemPrompt + + `\n\n\nLatest proof review follow-up required:\n${reminder}\nDo not complete the top-level task until the latest proof review accepts the evidence.\n\n`, + }; + }); - pi.on("session_switch" as any, async (event: any, ctx: ExtensionContext) => { - widget.setUICtx(ctx.ui as UICtx); - const isResume = event?.reason === "resume"; - storeUpgraded = false; - persistedTasksShown = false; - currentTurn = 0; - lastTaskToolUseTurn = 0; - reminderInjectedThisCycle = false; - autoClear.reset(); - if (!isResume && taskScope === "memory") store.clearAll(); - upgradeStoreIfNeeded(ctx); - showPersistedTasks(isResume); - }); + pi.on("session_switch" as any, async (event: any, ctx: ExtensionContext) => { + widget.setUICtx(ctx.ui as UICtx); + const isResume = event?.reason === "resume"; + storeUpgraded = false; + persistedTasksShown = false; + currentTurn = 0; + lastTaskToolUseTurn = 0; + reminderInjectedThisCycle = false; + autoClear.reset(); + if (!isResume && taskScope === "memory") store.clearAll(); + upgradeStoreIfNeeded(ctx); + showPersistedTasks(isResume); + }); - // ────────────────────────────────────────────────── - // Tool 1: TaskCreate - // ────────────────────────────────────────────────── + // ────────────────────────────────────────────────── + // Tool 1: TaskCreate + // ────────────────────────────────────────────────── - pi.registerTool({ - name: "TaskCreate", - label: "TaskCreate", - description: `Create a task with a clear done_criterion. + pi.registerTool({ + name: "TaskCreate", + label: "TaskCreate", + description: `Create a task with a clear done_criterion. ## Two tiers @@ -918,219 +1453,333 @@ export default function (pi: ExtensionAPI) { - **done_criterion**: REQUIRED. Falsifiable observation that distinguishes done from fail/null/incomplete/silent-fail. State expected AND wrong-case observations (e.g., "All 92 tests pass. If wrong: type errors in build or test failures in task-store.test.ts") - **progress_label** (optional): What the agent is currently doing, shown during in-progress tasks - **parentId** (optional): Set this to make a directly tickable subtask. Omit it for a proof-gated top-level goal.`, - promptGuidelines: [ - "Use TaskCreate for complex top-level goals. Include a specific done_criterion.", - "Mark tasks in_progress before starting. Complete subtasks via TaskUpdate; complete top-level tasks via TaskClaimDone with proof evidence.", - ], - parameters: Type.Object({ - subject: Type.String({ description: "Brief task title" }), - description: Type.String({ description: "Detailed description" }), - done_criterion: Type.String({ description: "Falsifiable observation that distinguishes DONE from fail, null result, incomplete, or silent failure. State what you expect to see AND what you'd see if it's wrong." }), - progress_label: Type.Optional(Type.String({ description: "What the agent is currently doing, shown during in-progress tasks" })), - metadata: Type.Optional(Type.Record(Type.String(), Type.Any())), - parentId: Type.Optional(Type.String({ description: "Parent task ID. If set, this task is a directly tickable subtask; if omitted, this is a proof-gated top-level goal." })), - }), + promptGuidelines: [ + "Use TaskCreate for complex top-level goals. Include a specific done_criterion.", + "Mark tasks in_progress before starting. Complete subtasks via TaskUpdate; complete top-level tasks via TaskClaimDone with proof evidence.", + ], + parameters: Type.Object({ + subject: Type.String({ description: "Brief task title" }), + description: Type.String({ description: "Detailed description" }), + done_criterion: Type.String({ + description: + "Falsifiable observation that distinguishes DONE from fail, null result, incomplete, or silent failure. State what you expect to see AND what you'd see if it's wrong.", + }), + progress_label: Type.Optional( + Type.String({ + description: + "What the agent is currently doing, shown during in-progress tasks", + }), + ), + metadata: Type.Optional(Type.Record(Type.String(), Type.Any())), + parentId: Type.Optional( + Type.String({ + description: + "Parent task ID. If set, this task is a directly tickable subtask; if omitted, this is a proof-gated top-level goal.", + }), + ), + }), - execute(_toolCallId, params, _signal, _onUpdate, _ctx) { - const metadataError = assertNoReservedMetadata(params.metadata); - if (metadataError) return Promise.resolve(textResult(metadataError)); - autoClear.resetBatchCountdown(); - let task: Task; - try { - task = store.create(params.subject, params.description, params.done_criterion, params.progress_label, params.metadata, params.parentId); - } catch (err: any) { - return Promise.resolve(textResult(err.message)); - } - widget.update(); - return Promise.resolve(textResult(`Task #${task.id} created: ${task.subject}\nDone criterion: ${task.done_criterion}`)); - }, - }); + execute(_toolCallId, params, _signal, _onUpdate, _ctx) { + const metadataError = assertNoReservedMetadata(params.metadata); + if (metadataError) return Promise.resolve(textResult(metadataError)); + autoClear.resetBatchCountdown(); + let task: Task; + try { + task = store.create( + params.subject, + params.description, + params.done_criterion, + params.progress_label, + params.metadata, + params.parentId, + ); + } catch (err: any) { + return Promise.resolve(textResult(err.message)); + } + widget.update(); + return Promise.resolve( + textResult( + renderTaskToolResult( + "TaskCreate", + task, + renderTaskSnapshot(task, { + includeDescription: true, + includeDoneCriterion: true, + includeProgressLabel: true, + includeMetadata: true, + }), + ), + ), + ); + }, + }); - // ────────────────────────────────────────────────── - // Tool 2: TaskList - // ────────────────────────────────────────────────── + // ────────────────────────────────────────────────── + // Tool 2: TaskList + // ────────────────────────────────────────────────── - pi.registerTool({ - name: "TaskList", - label: "TaskList", - description: `List all tasks in a compact one-line format with one primary state per row. Proof details live in TaskGet and /lgtm.`, - parameters: Type.Object({}), + pi.registerTool({ + name: "TaskList", + label: "TaskList", + description: `List all tasks in a compact one-line format with one primary state per row. Proof details live in TaskGet and /lgtm.`, + parameters: Type.Object({}), - execute(_toolCallId, _params, _signal, _onUpdate, _ctx) { - const tasks = store.list(); - if (tasks.length === 0) return Promise.resolve(textResult("No tasks found")); + execute(_toolCallId, _params, _signal, _onUpdate, _ctx) { + const tasks = store.list(); + if (tasks.length === 0) + return Promise.resolve(textResult("No tasks found")); - const counts = { completed: 0, in_progress: 0, pending: 0 }; - for (const task of tasks) counts[getDisplayStatus(task)]++; + const counts = { completed: 0, in_progress: 0, pending: 0 }; + for (const task of tasks) counts[getDisplayStatus(task)]++; - const parts: string[] = []; - if (counts.completed > 0) parts.push(`${counts.completed} done`); - if (counts.in_progress > 0) parts.push(`${counts.in_progress} in progress`); - if (counts.pending > 0) parts.push(`${counts.pending} open`); + const visibleTasks = tasks.filter((task) => task.status !== "completed"); - const statusIcon = (task: typeof tasks[number]) => { - if (task.status === "completed") return "✔"; - if (task.status === "in_progress") return "◼"; - return "◻"; - }; + const parts: string[] = []; + if (counts.completed > 0) parts.push(`${counts.completed} done hidden`); + if (counts.in_progress > 0) + parts.push(`${counts.in_progress} in progress`); + if (counts.pending > 0) parts.push(`${counts.pending} open`); - const renderTask = (task: typeof tasks[number]) => { - const parent = task.parentId ? ` › subtask of #${task.parentId}` : ""; - let blocked = ""; - if (task.blockedBy.length > 0) { - const openBlockers = task.blockedBy.filter(bid => { - const blocker = store.get(bid); - return blocker && blocker.status !== "completed"; - }); - if (openBlockers.length > 0) blocked = ` › blocked by ${openBlockers.map(id => "#" + id).join(", ")}`; - } - const subject = task.status === "completed" ? `${task.subject}` : task.subject; - return ` ${statusIcon(task)} #${task.id} ${subject}${parent}${blocked}`; - }; + const statusIcon = (task: (typeof tasks)[number]) => { + if (task.status === "in_progress") return "◼"; + return "◻"; + }; - const lines = [ - `● ${tasks.length} tasks (${parts.join(", ")})`, - ...tasks.sort((a, b) => Number(a.id) - Number(b.id)).map(renderTask), - ]; + const renderTask = (task: (typeof tasks)[number]) => { + const parent = task.parentId ? ` › subtask of #${task.parentId}` : ""; + let blocked = ""; + if (task.blockedBy.length > 0) { + const openBlockers = task.blockedBy.filter((bid) => { + const blocker = store.get(bid); + return blocker && blocker.status !== "completed"; + }); + if (openBlockers.length > 0) + blocked = ` › blocked by ${openBlockers.map((id) => "#" + id).join(", ")}`; + } + return ` ${statusIcon(task)} #${task.id} ${task.subject}${parent}${blocked}`; + }; - return Promise.resolve(textResult(lines.join("\n"))); - }, - }); + const lines = [`● ${tasks.length} goals (${parts.join(", ")})`]; + if (visibleTasks.length === 0) { + lines.push(" No open tasks. Completed tasks are hidden by default."); + } else { + lines.push( + ...visibleTasks + .sort((a, b) => Number(a.id) - Number(b.id)) + .map(renderTask), + ); + } - // ────────────────────────────────────────────────── - // Tool 3: TaskGet - // ────────────────────────────────────────────────── + return Promise.resolve(textResult(lines.join("\n"))); + }, + }); - pi.registerTool({ - name: "TaskGet", - label: "TaskGet", - description: `Get full proof-gated task details including done_criterion, evidence packet, and reviewer state.`, - parameters: Type.Object({ - taskId: Type.String({ description: "Task ID to retrieve" }), - }), + // ────────────────────────────────────────────────── + // Tool 3: TaskGet + // ────────────────────────────────────────────────── - execute(_toolCallId, params, _signal, _onUpdate, _ctx) { - const task = store.get(params.taskId); - if (!task) return Promise.resolve(textResult("Task not found")); + pi.registerTool({ + name: "TaskGet", + label: "TaskGet", + description: `Get full proof-gated task details including done_criterion, evidence packet, and reviewer state.`, + parameters: Type.Object({ + taskId: Type.String({ description: "Task ID to retrieve" }), + }), - const desc = task.description.replace(/\\n/g, "\n"); - const robotReviews = getRobotReviews(task); - const completionMode: CompletionMode = getCompletionMode(task); - const reviewState: ReviewState = getReviewState(task); - const currentEvidence = getCurrentEvidenceIteration(task); - const history = getEvidenceHistory(task); - const lines: string[] = [ - `Task #${task.id}: ${task.subject}`, - `Status: ${task.status} ${getReviewBadges(task)}`, - `Completion mode: ${completionMode}`, - `Review state: ${reviewState}`, - `Gate status: ${getGateStatus(task)}`, - `Done criterion: ${task.done_criterion}`, - `Description: ${desc}`, - ]; - lines.push(`Evidence iterations: total=${getEvidenceIterationCount(task)}, current=${currentEvidence ? currentEvidence.iteration : 0}, superseded=${history.length}`); - lines.push(`Task kind: ${task.parentId ? `subtask of #${task.parentId}` : "top-level proof goal"}`); - if (robotReviews.length > 0) { - const latest = robotReviews[robotReviews.length - 1]; - lines.push(`Robot reviews on current evidence: ${robotReviews.length} (latest: accepted=${latest.accepted ? "yes" : "no"}, complete=${latest.evidence_complete ? "yes" : "no"}, convincing=${latest.evidence_convincing ? "yes" : "no"})`); - } - lines.push(renderEvidencePacket(task)); - const automaticReviewFailure = renderAutomaticReviewFailure(task); - if (automaticReviewFailure) lines.push(automaticReviewFailure); - if (robotReviews.length > 0) { - lines.push(`### Robot reviews\n${robotReviews.map(formatRobotReview).join("\n\n")}`); - } - const historySummary = formatHistorySummary(task); - if (historySummary) lines.push(historySummary); - if (task.blockedBy.length > 0) { - const openBlockers = task.blockedBy.filter(bid => { - const blocker = store.get(bid); - return blocker && blocker.status !== "completed"; - }); - if (openBlockers.length > 0) lines.push(`Blocked by: ${openBlockers.map(id => "#" + id).join(", ")}`); - } - if (task.blocks.length > 0) lines.push(`Blocks: ${task.blocks.map(id => "#" + id).join(", ")}`); - const metadata = getNonReviewMetadata(task); - if (Object.keys(metadata).length > 0) lines.push(`Metadata: ${JSON.stringify(metadata)}`); + execute(_toolCallId, params, _signal, _onUpdate, _ctx) { + const task = store.get(params.taskId); + if (!task) return Promise.resolve(textResult("Task not found")); - return Promise.resolve(textResult(lines.join("\n\n"))); - }, - }); + const desc = task.description.replace(/\\n/g, "\n"); + const robotReviews = getRobotReviews(task); + const completionMode: CompletionMode = getCompletionMode(task); + const reviewState: ReviewState = getReviewState(task); + const currentEvidence = getCurrentEvidenceIteration(task); + const history = getEvidenceHistory(task); + const lines: string[] = [ + `Task #${task.id}: ${task.subject}`, + `Status: ${task.status}`, + `Completion mode: ${completionMode}`, + `Review state: ${reviewState}`, + `Gate status: ${getGateStatus(task)}`, + `Done criterion: ${task.done_criterion}`, + `Description: ${desc}`, + ]; + lines.push( + `Evidence iterations: total=${getEvidenceIterationCount(task)}, current=${currentEvidence ? currentEvidence.iteration : 0}, superseded=${history.length}`, + ); + lines.push( + `Task kind: ${task.parentId ? `subtask of #${task.parentId}` : "top-level proof goal"}`, + ); + if (robotReviews.length > 0) { + const latest = robotReviews[robotReviews.length - 1]; + lines.push( + `Robot reviews on current evidence: ${robotReviews.length} (latest: accepted=${latest.accepted ? "yes" : "no"}, complete=${latest.evidence_complete ? "yes" : "no"}, convincing=${latest.evidence_convincing ? "yes" : "no"})`, + ); + } + lines.push(renderEvidencePacket(task)); + const automaticReviewFailure = renderAutomaticReviewFailure(task); + if (automaticReviewFailure) lines.push(automaticReviewFailure); + if (robotReviews.length > 0) { + lines.push( + `### Robot reviews\n${robotReviews.map(renderCompactRobotReview).join("\n\n")}`, + ); + } + const historySummary = formatHistorySummary(task); + if (historySummary) lines.push(historySummary); + if (task.blockedBy.length > 0) { + const openBlockers = task.blockedBy.filter((bid) => { + const blocker = store.get(bid); + return blocker && blocker.status !== "completed"; + }); + if (openBlockers.length > 0) + lines.push( + `Blocked by: ${openBlockers.map((id) => "#" + id).join(", ")}`, + ); + } + if (task.blocks.length > 0) + lines.push(`Blocks: ${task.blocks.map((id) => "#" + id).join(", ")}`); + const metadata = getNonReviewMetadata(task); + if (Object.keys(metadata).length > 0) + lines.push(`Metadata: ${JSON.stringify(metadata)}`); - // ────────────────────────────────────────────────── - // Tool 4: TaskUpdate - // ────────────────────────────────────────────────── + return Promise.resolve(textResult(lines.join("\n\n"))); + }, + }); - pi.registerTool({ - name: "TaskUpdate", - label: "TaskUpdate", - description: `Update task fields or status. + // ────────────────────────────────────────────────── + // Tool 4: TaskUpdate + // ────────────────────────────────────────────────── + + pi.registerTool({ + name: "TaskUpdate", + label: "TaskUpdate", + description: `Update task fields or status. Two-tier model: - Subtasks can be marked completed directly here. - Top-level tasks are proof goals: TaskUpdate(status=completed) is rejected. Use TaskClaimDone so the failure-mode/evidence form and automatic reviewer run.`, - parameters: Type.Object({ - taskId: Type.String({ description: "Task ID to update" }), - status: Type.Optional(Type.Unsafe<"pending" | "in_progress" | "completed" | "deleted">({ - anyOf: [ - { type: "string", enum: ["pending", "in_progress", "completed"] }, - { type: "string", const: "deleted" }, - ], - description: "New status. Setting completed is allowed for subtasks only; top-level tasks must complete via TaskClaimDone.", - })), - subject: Type.Optional(Type.String({ description: "Brief task title" })), - description: Type.Optional(Type.String({ description: "Detailed description" })), - done_criterion: Type.Optional(Type.String({ description: "Falsifiable observation distinguishing done from fail" })), - progress_label: Type.Optional(Type.String({ description: "What the agent is currently doing" })), - metadata: Type.Optional(Type.Record(Type.String(), Type.Any())), - add_blocks: Type.Optional(Type.Array(Type.String(), { description: "Task IDs this task blocks" })), - add_blocked_by: Type.Optional(Type.Array(Type.String(), { description: "Task IDs that block this task" })), - }), + parameters: Type.Object({ + taskId: Type.String({ description: "Task ID to update" }), + status: Type.Optional( + Type.Unsafe<"pending" | "in_progress" | "completed" | "deleted">({ + anyOf: [ + { type: "string", enum: ["pending", "in_progress", "completed"] }, + { type: "string", const: "deleted" }, + ], + description: + "New status. Setting completed is allowed for subtasks only; top-level tasks must complete via TaskClaimDone.", + }), + ), + subject: Type.Optional(Type.String({ description: "Brief task title" })), + description: Type.Optional( + Type.String({ description: "Detailed description" }), + ), + done_criterion: Type.Optional( + Type.String({ + description: "Falsifiable observation distinguishing done from fail", + }), + ), + progress_label: Type.Optional( + Type.String({ description: "What the agent is currently doing" }), + ), + metadata: Type.Optional(Type.Record(Type.String(), Type.Any())), + add_blocks: Type.Optional( + Type.Array(Type.String(), { description: "Task IDs this task blocks" }), + ), + add_blocked_by: Type.Optional( + Type.Array(Type.String(), { + description: "Task IDs that block this task", + }), + ), + }), - execute(_toolCallId, params, _signal, _onUpdate, _ctx) { - const metadataError = assertNoReservedMetadata(params.metadata); - if (metadataError) return Promise.resolve(textResult(metadataError)); + execute(_toolCallId, params, _signal, _onUpdate, _ctx) { + const metadataError = assertNoReservedMetadata(params.metadata); + if (metadataError) return Promise.resolve(textResult(metadataError)); - const { taskId, ...fields } = params; - let task: any, changedFields: string[], warnings: string[]; - try { - ({ task, changedFields, warnings } = store.update(taskId, fields)); - } catch (err: any) { - return Promise.resolve(textResult(err.message)); - } + const { taskId, ...fields } = params; + const currentTask = store.get(taskId); + const before = currentTask + ? (JSON.parse(JSON.stringify(currentTask)) as Task) + : undefined; + let task: any, changedFields: string[], warnings: string[]; + try { + ({ task, changedFields, warnings } = store.update(taskId, fields)); + } catch (err: any) { + return Promise.resolve(textResult(err.message)); + } - if (changedFields.length === 0 && !task) { - return Promise.resolve(textResult(`Task #${taskId} not found`)); - } + if (changedFields.length === 0 && !task) { + return Promise.resolve(textResult(`Task #${taskId} not found`)); + } - if (fields.status === "in_progress") { - widget.setActiveTask(taskId); - autoClear.resetBatchCountdown(); - } else if (fields.status === "pending") { - autoClear.resetBatchCountdown(); - } else if (fields.status === "completed") { - widget.setActiveTask(taskId, false); - autoClear.trackCompletion(taskId, currentTurn); - } else if (fields.status === "deleted") { - widget.setActiveTask(taskId, false); - warnings.push("Task deleted via agent tool. Use /tasks to confirm or undo. Deleting tasks should be reserved for dismissed or irrelevant work."); - } + if (fields.status === "in_progress") { + widget.setActiveTask(taskId); + autoClear.resetBatchCountdown(); + } else if (fields.status === "pending") { + autoClear.resetBatchCountdown(); + } else if (fields.status === "completed") { + widget.setActiveTask(taskId, false); + autoClear.trackCompletion(taskId, currentTurn); + } else if (fields.status === "deleted") { + widget.setActiveTask(taskId, false); + warnings.push( + "Task deleted via agent tool. Use /tasks to confirm or undo. Deleting tasks should be reserved for dismissed or irrelevant work.", + ); + } - widget.update(); - let msg = `Updated task #${taskId}: ${changedFields.join(", ")}`; - if (warnings.length > 0) msg += ` (warning: ${warnings.join("; ")})`; - return Promise.resolve(textResult(msg)); - }, - }); + widget.update(); + const warningBlock = + warnings.length > 0 + ? `\n\n### Warnings\n- ${warnings.join("\n- ")}` + : ""; + if (!task && fields.status === "deleted" && before) { + return Promise.resolve( + textResult( + [ + `## TaskUpdate -> Task #${before.id}: ${before.subject}`, + "Status: deleted", + "### Metadata", + `- Completion mode: ${getCompletionMode(before)}`, + "- Review state: deleted", + "- Updated fields: deleted", + "### Changes", + "- task removed from the store", + warningBlock.trim(), + ] + .filter(Boolean) + .join("\n\n"), + ), + ); + } + const summary = renderTaskUpdateSummary( + before, + task, + changedFields, + fields.metadata, + ); + return Promise.resolve( + textResult( + renderTaskToolResult( + "TaskUpdate", + task, + `${summary}${warningBlock}`, + { updatedFields: changedFields }, + ), + ), + ); + }, + }); - // ────────────────────────────────────────────────── - // Tool 5: TaskClaimDone - // ────────────────────────────────────────────────── + // ────────────────────────────────────────────────── + // Tool 5: TaskClaimDone + // ────────────────────────────────────────────────── - pi.registerTool({ - name: "TaskClaimDone", - label: "TaskClaimDone", - description: `Claim that a top-level task meets its done_criterion. + pi.registerTool({ + name: "TaskClaimDone", + label: "TaskClaimDone", + description: `Claim that a top-level task meets its done_criterion. Forces structured thinking about failure modes and cheap evidence. All text fields required. Accepted automatic review completes the task. Rejected review leaves it open with guidance. Reviewer infrastructure failure is logged but does not block autonomy. @@ -1152,496 +1801,697 @@ Do NOT summarize or interpret. Paste literal command output, exact log lines, ma - **commands**: Optional first-class command records for the evidence package - **evidence_paths / falsification_paths**: Optional local artifact paths. The tool stores absolute path, sha256, and byte size for auditability. - **supersede_reason**: Optional reason when this submission replaces an older one on the same task`, - parameters: Type.Object({ - taskId: Type.String({ description: "Top-level task ID to claim done" }), - evidence: Type.String({ description: "Verbatim auditable proof: literal command output, exact log lines, markdown block quotes, table rows, URLs. NOT summaries or interpretations. 'I ran X and got Y' is not evidence -- paste the actual output of X. A human must verify from this alone without re-running. (One short paragraph is fine; verbatim matters more than length.)" }), - failure_likely: Type.String({ description: "Most likely way this could be wrong despite evidence. One short sentence preferred — pick the top one, not a list." }), - failure_sneaky: Type.String({ description: "Subtle/sneaky failure: looks like success superficially, corrupts silently, or only breaks at scale/time/edge case. One short sentence preferred." }), - failure_unknown: Type.String({ description: "What unknown or untested failure class could remain even if this evidence is true. One short sentence preferred." }), - falsification_test: Type.String({ description: "What you ran and the literal output you got. Include verbatim command + output, not 'it worked'. State why that output could not occur if a failure mode were real. Brevity is fine; the verbatim output is what counts." }), - evidence_reasoning: Type.String({ description: "Why this evidence cheaply distinguishes done-criterion success from the likely/subtle/unknown failures." }), - verification_hints: Type.Array(Type.String(), { description: "Where to look, with specific content quoted (not bare paths or counts). E.g. 'src/loss.py:45-60 shows grad_norm=0.001'. One or two short hints is enough." }), - remaining_uncertainty: Type.String({ description: "What's NOT tested, known limitations, deferred edges. One short sentence preferred. If you can't articulate uncertainty, you haven't thought hard enough." }), - commands: Type.Optional(Type.Array(Type.Object({ - cmd: Type.String({ description: "Exact command that was run" }), - exit_code: Type.Number({ description: "Process exit code" }), - stdout_path: Type.Optional(Type.String({ description: "Optional path to captured stdout" })), - stderr_path: Type.Optional(Type.String({ description: "Optional path to captured stderr" })), - }))), - evidence_paths: Type.Optional(Type.Array(Type.String(), { description: "Optional local artifact paths backing the evidence. Stored as absolute path + sha256 + byte size." })), - falsification_paths: Type.Optional(Type.Array(Type.String(), { description: "Optional local artifact paths backing the falsification test. Stored as absolute path + sha256 + byte size." })), - supersede_reason: Type.Optional(Type.String({ description: "Why this evidence replaces an older submission on the same task." })), - }), + parameters: Type.Object({ + taskId: Type.String({ description: "Top-level task ID to claim done" }), + evidence: Type.String({ + description: + "Verbatim auditable proof: literal command output, exact log lines, markdown block quotes, table rows, URLs. NOT summaries or interpretations. 'I ran X and got Y' is not evidence -- paste the actual output of X. A human must verify from this alone without re-running. (One short paragraph is fine; verbatim matters more than length.)", + }), + failure_likely: Type.String({ + description: + "Most likely way this could be wrong despite evidence. One short sentence preferred — pick the top one, not a list.", + }), + failure_sneaky: Type.String({ + description: + "Subtle/sneaky failure: looks like success superficially, corrupts silently, or only breaks at scale/time/edge case. One short sentence preferred.", + }), + failure_unknown: Type.String({ + description: + "What unknown or untested failure class could remain even if this evidence is true. One short sentence preferred.", + }), + falsification_test: Type.String({ + description: + "What you ran and the literal output you got. Include verbatim command + output, not 'it worked'. State why that output could not occur if a failure mode were real. Brevity is fine; the verbatim output is what counts.", + }), + evidence_reasoning: Type.String({ + description: + "Why this evidence cheaply distinguishes done-criterion success from the likely/subtle/unknown failures.", + }), + verification_hints: Type.Array(Type.String(), { + description: + "Where to look, with specific content quoted (not bare paths or counts). E.g. 'src/loss.py:45-60 shows grad_norm=0.001'. One or two short hints is enough.", + }), + remaining_uncertainty: Type.String({ + description: + "What's NOT tested, known limitations, deferred edges. One short sentence preferred. If you can't articulate uncertainty, you haven't thought hard enough.", + }), + commands: Type.Optional( + Type.Array( + Type.Object({ + cmd: Type.String({ description: "Exact command that was run" }), + exit_code: Type.Number({ description: "Process exit code" }), + stdout_path: Type.Optional( + Type.String({ description: "Optional path to captured stdout" }), + ), + stderr_path: Type.Optional( + Type.String({ description: "Optional path to captured stderr" }), + ), + }), + ), + ), + evidence_paths: Type.Optional( + Type.Array(Type.String(), { + description: + "Optional local artifact paths backing the evidence. Stored as absolute path + sha256 + byte size.", + }), + ), + falsification_paths: Type.Optional( + Type.Array(Type.String(), { + description: + "Optional local artifact paths backing the falsification test. Stored as absolute path + sha256 + byte size.", + }), + ), + supersede_reason: Type.Optional( + Type.String({ + description: + "Why this evidence replaces an older submission on the same task.", + }), + ), + }), - async execute(_toolCallId, params, signal, _onUpdate, ctx) { - const task = store.get(params.taskId); - if (!task) return Promise.resolve(textResult(`Task #${params.taskId} not found`)); - if (task.status === "completed") return Promise.resolve(textResult(`Task #${params.taskId} already completed`)); + async execute(_toolCallId, params, signal, _onUpdate, ctx) { + const task = store.get(params.taskId); + if (!task) + return Promise.resolve(textResult(`Task #${params.taskId} not found`)); + if (task.status === "completed") + return Promise.resolve( + textResult(`Task #${params.taskId} already completed`), + ); - // verification_hints are descriptions, not validated file paths + // verification_hints are descriptions, not validated file paths - if (task.parentId) return Promise.resolve(textResult(`Task #${params.taskId} is a subtask. Use TaskUpdate(status=completed) for subtasks; TaskClaimDone is for top-level proof goals.`)); - const blankField = requiredTextError(params, ["evidence", "failure_likely", "failure_sneaky", "failure_unknown", "falsification_test", "evidence_reasoning", "remaining_uncertainty"]); - if (blankField) return Promise.resolve(textResult(blankField)); - if (!params.verification_hints.some((hint: string) => hint.trim().length > 0)) { - return Promise.resolve(textResult("verification_hints must include at least one non-blank hint.")); - } + if (task.parentId) + return Promise.resolve( + textResult( + `Task #${params.taskId} is a subtask. Use TaskUpdate(status=completed) for subtasks; TaskClaimDone is for top-level proof goals.`, + ), + ); + const blankField = requiredTextError(params, [ + "evidence", + "failure_likely", + "failure_sneaky", + "failure_unknown", + "falsification_test", + "evidence_reasoning", + "remaining_uncertainty", + ]); + if (blankField) return Promise.resolve(textResult(blankField)); + if ( + !params.verification_hints.some( + (hint: string) => hint.trim().length > 0, + ) + ) { + return Promise.resolve( + textResult( + "verification_hints must include at least one non-blank hint.", + ), + ); + } - store.update(params.taskId, { - metadata: { - ...archiveCurrentEvidence(task, params.supersede_reason ?? "replaced by newer proof claim"), - ...clearCurrentEvidenceMetadata(), - ...clearRobotReviewMetadata(), - lgtm_evidence: params.evidence, - lgtm_failure_likely: params.failure_likely, - lgtm_failure_sneaky: params.failure_sneaky, - lgtm_failure_unknown: params.failure_unknown, - lgtm_falsification_test: params.falsification_test, - lgtm_evidence_reasoning: params.evidence_reasoning, - lgtm_verification_hints: params.verification_hints, - lgtm_remaining_uncertainty: params.remaining_uncertainty, - lgtm_submitted_at: new Date().toISOString(), - lgtm_commands: params.commands ?? [], - lgtm_evidence_artifacts: buildArtifactRecords(params.evidence_paths), - lgtm_falsification_artifacts: buildArtifactRecords(params.falsification_paths), - ...clearAutomaticReviewFailureMetadata(), - }, - }); - let robotReviewNote = ""; - const refreshedTask = store.get(params.taskId); - if (!refreshedTask) return textResult(`Task #${params.taskId} not found after evidence update`); - try { - const { review, command } = await runAutomaticRobotReview(refreshedTask, signal, getCurrentModelRef(ctx.model)); - store.update(params.taskId, { - metadata: { - ...appendRobotReviewMetadata(refreshedTask, review), - ...clearAutomaticReviewFailureMetadata(), - }, - }); - if (shouldCompleteAfterAcceptedReview(store.get(params.taskId) ?? refreshedTask, review.accepted)) { - store.complete(params.taskId); - autoClear.trackCompletion(params.taskId, currentTurn); - widget.setActiveTask(params.taskId, false); - } - const storedReview = getLatestRobotReview(store.get(params.taskId) ?? refreshedTask); - robotReviewNote = - `\n\n### Automatic robot review\n` + - `Reviewer command: ${command}\n\n` + - `${storedReview ? formatRobotReview(storedReview) : formatRobotReview({ ...review, iteration: 1 })}`; - if (!review.accepted) { - robotReviewNote += `\n\nResult: task remains open until the evidence is strengthened and reviewed again.`; - } - } catch (err: any) { - store.update(params.taskId, { - metadata: getAutomaticReviewFailureMetadata(err.message, err.rawOutput), - }); - const taskAfterFailure = store.get(params.taskId) ?? refreshedTask; - if (!taskAfterFailure.parentId) { - store.complete(params.taskId); - autoClear.trackCompletion(params.taskId, currentTurn); - widget.setActiveTask(params.taskId, false); - } - robotReviewNote = - `\n\n### Automatic robot review\n` + - `Reviewer unavailable: ${err.message}\n` + - `Autonomy continued without blocking completion.` + - (typeof err.rawOutput === "string" && err.rawOutput.trim() - ? `\n\n${formatReviewTextBlock("Reviewer raw output", err.rawOutput.trim())}` - : ""); - } - widget.update(); + store.update(params.taskId, { + metadata: { + ...archiveCurrentEvidence( + task, + params.supersede_reason ?? "replaced by newer proof claim", + ), + ...clearCurrentEvidenceMetadata(), + ...clearRobotReviewMetadata(), + lgtm_evidence: params.evidence, + lgtm_failure_likely: params.failure_likely, + lgtm_failure_sneaky: params.failure_sneaky, + lgtm_failure_unknown: params.failure_unknown, + lgtm_falsification_test: params.falsification_test, + lgtm_evidence_reasoning: params.evidence_reasoning, + lgtm_verification_hints: params.verification_hints, + lgtm_remaining_uncertainty: params.remaining_uncertainty, + lgtm_submitted_at: new Date().toISOString(), + lgtm_commands: params.commands ?? [], + lgtm_evidence_artifacts: buildArtifactRecords(params.evidence_paths), + lgtm_falsification_artifacts: buildArtifactRecords( + params.falsification_paths, + ), + ...clearAutomaticReviewFailureMetadata(), + }, + }); + let robotReviewNote = ""; + const refreshedTask = store.get(params.taskId); + if (!refreshedTask) + return textResult( + `Task #${params.taskId} not found after evidence update`, + ); + try { + const { review, command } = await runAutomaticRobotReview( + refreshedTask, + signal, + getCurrentModelRef(ctx.model), + ); + store.update(params.taskId, { + metadata: { + ...appendRobotReviewMetadata(refreshedTask, review), + ...clearAutomaticReviewFailureMetadata(), + }, + }); + if ( + shouldCompleteAfterAcceptedReview( + store.get(params.taskId) ?? refreshedTask, + review.accepted, + ) + ) { + store.complete(params.taskId); + autoClear.trackCompletion(params.taskId, currentTurn); + widget.setActiveTask(params.taskId, false); + } + const storedReview = getLatestRobotReview( + store.get(params.taskId) ?? refreshedTask, + ); + robotReviewNote = + `\n\n### Automatic robot review\n` + + `Reviewer command: ${command}\n\n` + + `${storedReview ? renderCompactRobotReview(storedReview) : renderCompactRobotReview({ ...review, iteration: 1 })}`; + if (!review.accepted) { + robotReviewNote += `\n\nResult: task remains open until the evidence is strengthened and reviewed again.`; + } + } catch (err: any) { + store.update(params.taskId, { + metadata: getAutomaticReviewFailureMetadata( + err.message, + err.rawOutput, + ), + }); + const taskAfterFailure = store.get(params.taskId) ?? refreshedTask; + if (!taskAfterFailure.parentId) { + store.complete(params.taskId); + autoClear.trackCompletion(params.taskId, currentTurn); + widget.setActiveTask(params.taskId, false); + } + robotReviewNote = + `\n\n### Automatic robot review\n` + + `Reviewer unavailable: ${err.message}\n` + + `Autonomy continued without blocking completion.` + + (typeof err.rawOutput === "string" && err.rawOutput.trim() + ? `\n\n${formatReviewTextBlock("Reviewer raw output", err.rawOutput.trim(), { maxLines: MAX_INLINE_PROOF_LINES })}` + : ""); + } + widget.update(); - const updatedTask = store.get(task.id) ?? task; - const result = - `${renderProofLog(updatedTask)}` + - robotReviewNote + - `\n\n---\n` + - `Gate status: ${getGateStatus(updatedTask)}\n\n` + - `Self-check: if a skeptical reviewer would still ask "but what about...", call TaskClaimDone again with stronger proof.`; + const updatedTask = store.get(task.id) ?? task; + const result = renderTaskToolResult( + "TaskClaimDone", + updatedTask, + `${renderCurrentProofSummary(updatedTask)}` + + robotReviewNote + + `\n\nSelf-check: if a skeptical reviewer would still ask "but what about...", call TaskClaimDone again with stronger proof.`, + ); - return textResult(result); - }, - }); + return textResult(result); + }, + }); - pi.registerTool({ - name: "lgtm_supersede", - label: "lgtm_supersede", - description: `Mark the current proof package as superseded without completing the task. + pi.registerTool({ + name: "lgtm_supersede", + label: "lgtm_supersede", + description: `Mark the current proof package as superseded without completing the task. Use this when a prior claim is stale or wrong and reviewers should stop treating it as the current evidence. The current evidence, robot reviews, and reviewer-failure context are archived into history with your reason. Submit a fresh TaskClaimDone claim to complete the task.`, - parameters: Type.Object({ - taskId: Type.String({ description: "Task ID whose current evidence should be superseded" }), - reason: Type.String({ description: "Why the current evidence is stale or replaced" }), - }), + parameters: Type.Object({ + taskId: Type.String({ + description: "Task ID whose current evidence should be superseded", + }), + reason: Type.String({ + description: "Why the current evidence is stale or replaced", + }), + }), - execute(_toolCallId, params, _signal, _onUpdate, _ctx) { - const task = store.get(params.taskId); - if (!task) return Promise.resolve(textResult(`Task #${params.taskId} not found`)); - if (!getCurrentEvidenceIteration(task)) { - return Promise.resolve(textResult(`Task #${params.taskId} has no current evidence to supersede.`)); - } + execute(_toolCallId, params, _signal, _onUpdate, _ctx) { + const task = store.get(params.taskId); + if (!task) + return Promise.resolve(textResult(`Task #${params.taskId} not found`)); + if (!getCurrentEvidenceIteration(task)) { + return Promise.resolve( + textResult( + `Task #${params.taskId} has no current evidence to supersede.`, + ), + ); + } - store.update(params.taskId, { - metadata: { - ...archiveCurrentEvidence(task, params.reason), - ...clearCurrentEvidenceMetadata(), - ...clearRobotReviewMetadata(), - ...clearAutomaticReviewFailureMetadata(), - }, - }); - widget.update(); + store.update(params.taskId, { + metadata: { + ...archiveCurrentEvidence(task, params.reason), + ...clearCurrentEvidenceMetadata(), + ...clearRobotReviewMetadata(), + ...clearAutomaticReviewFailureMetadata(), + }, + }); + widget.update(); - const updatedTask = store.get(params.taskId) ?? task; - return Promise.resolve(textResult( - `## Evidence superseded for task #${task.id}: ${task.subject}\n` + - `Reason: ${params.reason}\n\n` + - `Review state: ${getReviewState(updatedTask)}\n` + - `Gate status: ${getGateStatus(updatedTask)}\n\n` + - `${formatHistorySummary(updatedTask) ?? "No evidence history found."}`, - )); - }, - }); + const updatedTask = store.get(params.taskId) ?? task; + return Promise.resolve( + textResult( + renderTaskToolResult( + "lgtm_supersede", + updatedTask, + `Reason: ${params.reason}\n\n` + + `${formatHistorySummary(updatedTask) ?? "No evidence history found."}`, + ), + ), + ); + }, + }); - pi.registerTool({ - name: "robot_review_ask", - label: "robot_review_ask", - description: `Attach fresh-perspective robot review observations to a task. + pi.registerTool({ + name: "robot_review_ask", + label: "robot_review_ask", + description: `Attach fresh-perspective robot review observations to a task. Use this from a separate subagent or model when possible, ideally from a different model family/class than the implementation agent. Your role is VALIDATION, not flaw-finding. Sanity-check that the evidence addresses the done criterion. Observations, concerns, and suggestions are welcome, but the gate is only the rubric items. This records an independent review but does not itself complete the task. Use TaskClaimDone or robot_review_run for the automatic completion gate.`, - parameters: Type.Object({ - taskId: Type.String({ description: "Task ID to attach robot review to" }), - reviewer: Type.String({ description: "Reviewer identity, model family, or class" }), - scope: Type.String({ description: "What the reviewer examined" }), - observations: Type.Array(Type.String(), { - minItems: 1, - description: "Concrete things noticed in the artifacts.", - }), - concerns: Type.Optional(Type.Array(Type.String(), { description: "Why the current evidence may not yet prove success." })), - suggestions: Type.Optional(Type.Array(Type.String(), { description: "What the agent should do next if the evidence is not yet enough." })), - blind_spots: Type.String({ description: "What the reviewer did not inspect or could not verify" }), - evidence_complete: Type.Boolean({ description: "Whether the supplied evidence covers the claimed done criterion." }), - evidence_convincing: Type.Boolean({ description: "Whether the supplied evidence would convince a skeptical reviewer." }), - accepted: Type.Optional(Type.Boolean({ description: "Overall review decision. Defaults to evidence_complete && evidence_convincing." })), - missing_evidence: Type.Optional(Type.Array(Type.String(), { description: "Concrete missing checks, artifacts, or observations needed before completion." })), - }), + parameters: Type.Object({ + taskId: Type.String({ description: "Task ID to attach robot review to" }), + reviewer: Type.String({ + description: "Reviewer identity, model family, or class", + }), + scope: Type.String({ description: "What the reviewer examined" }), + observations: Type.Array(Type.String(), { + minItems: 1, + description: "Concrete things noticed in the artifacts.", + }), + concerns: Type.Optional( + Type.Array(Type.String(), { + description: "Why the current evidence may not yet prove success.", + }), + ), + suggestions: Type.Optional( + Type.Array(Type.String(), { + description: + "What the agent should do next if the evidence is not yet enough.", + }), + ), + blind_spots: Type.String({ + description: "What the reviewer did not inspect or could not verify", + }), + evidence_complete: Type.Boolean({ + description: + "Whether the supplied evidence covers the claimed done criterion.", + }), + evidence_convincing: Type.Boolean({ + description: + "Whether the supplied evidence would convince a skeptical reviewer.", + }), + accepted: Type.Optional( + Type.Boolean({ + description: + "Overall review decision. Defaults to evidence_complete && evidence_convincing.", + }), + ), + missing_evidence: Type.Optional( + Type.Array(Type.String(), { + description: + "Concrete missing checks, artifacts, or observations needed before completion.", + }), + ), + }), - execute(_toolCallId, params, _signal, _onUpdate, _ctx) { - const task = store.get(params.taskId); - if (!task) return Promise.resolve(textResult(`Task #${params.taskId} not found`)); - if (task.status === "completed") return Promise.resolve(textResult(`Task #${params.taskId} already completed`)); + execute(_toolCallId, params, _signal, _onUpdate, _ctx) { + const task = store.get(params.taskId); + if (!task) + return Promise.resolve(textResult(`Task #${params.taskId} not found`)); + if (task.status === "completed") + return Promise.resolve( + textResult(`Task #${params.taskId} already completed`), + ); - const accepted = params.accepted ?? (params.evidence_complete && params.evidence_convincing); - store.update(params.taskId, { - metadata: { - ...appendRobotReviewMetadata(task, { - reviewer: params.reviewer, - scope: params.scope, - observations: params.observations, - concerns: params.concerns ?? [], - suggestions: params.suggestions ?? [], - blind_spots: params.blind_spots, - accepted, - evidence_complete: params.evidence_complete, - evidence_convincing: params.evidence_convincing, - missing_evidence: params.missing_evidence ?? [], - submitted_at: new Date().toISOString(), - mode: "manual", - }), - ...clearAutomaticReviewFailureMetadata(), - }, - }); - widget.update(); + const accepted = + params.accepted ?? + (params.evidence_complete && params.evidence_convincing); + store.update(params.taskId, { + metadata: { + ...appendRobotReviewMetadata(task, { + reviewer: params.reviewer, + scope: params.scope, + observations: params.observations, + concerns: params.concerns ?? [], + suggestions: params.suggestions ?? [], + blind_spots: params.blind_spots, + accepted, + evidence_complete: params.evidence_complete, + evidence_convincing: params.evidence_convincing, + missing_evidence: params.missing_evidence ?? [], + submitted_at: new Date().toISOString(), + mode: "manual", + }), + ...clearAutomaticReviewFailureMetadata(), + }, + }); + widget.update(); - const result = - `## Robot review attached to task #${task.id}: ${task.subject}\n` + - `Iteration: ${getRobotReviews(store.get(params.taskId)!).length}\n` + - `Reviewer: ${params.reviewer}\n` + - `Scope: ${params.scope}\n\n` + - `Accepted: ${accepted ? "yes" : "no"}\n` + - `Evidence complete: ${params.evidence_complete ? "yes" : "no"}\n` + - `Evidence convincing: ${params.evidence_convincing ? "yes" : "no"}\n\n` + - `### Observations\n${params.observations.map(o => `- ${o}`).join("\n")}\n\n` + - `${(params.concerns?.length ?? 0) > 0 ? `### Concerns\n${(params.concerns ?? []).map(item => `- ${item}`).join("\n")}\n\n` : ""}` + - `${(params.suggestions?.length ?? 0) > 0 ? `### Suggestions\n${(params.suggestions ?? []).map(item => `- ${item}`).join("\n")}\n\n` : ""}` + - `${(params.missing_evidence?.length ?? 0) > 0 ? `### Missing evidence\n${(params.missing_evidence ?? []).map(item => `- ${item}`).join("\n")}\n\n` : ""}` + - `### Blind spots\n${params.blind_spots}\n\n` + - `Gate status: ${getGateStatus(store.get(params.taskId) ?? task)}\n\n` + - `🤖 Robot review stored. Manual reviews are advisory; the automatic proof gate runs through TaskClaimDone or robot_review_run.`; + const updatedTask = store.get(params.taskId) ?? task; + const result = renderTaskToolResult( + "robot_review_ask", + updatedTask, + [ + `Iteration: ${getRobotReviews(updatedTask).length}`, + `Reviewer: ${params.reviewer}`, + `Scope: ${params.scope}`, + `Accepted: ${accepted ? "yes" : "no"}`, + `Evidence complete: ${params.evidence_complete ? "yes" : "no"}`, + `Evidence convincing: ${params.evidence_convincing ? "yes" : "no"}`, + formatBulletList("Observations", summarizeList(params.observations)), + (params.concerns?.length ?? 0) > 0 + ? formatBulletList("Concerns", summarizeList(params.concerns ?? [])) + : "", + (params.suggestions?.length ?? 0) > 0 + ? formatBulletList( + "Suggestions", + summarizeList(params.suggestions ?? []), + ) + : "", + (params.missing_evidence?.length ?? 0) > 0 + ? formatBulletList( + "Missing evidence", + summarizeList(params.missing_evidence ?? []), + ) + : "", + `### Blind spots\n${params.blind_spots}`, + `Robot review stored. Manual reviews are advisory; the automatic proof gate runs through TaskClaimDone or robot_review_run.`, + ] + .filter(Boolean) + .join("\n\n"), + ); - return Promise.resolve(textResult(result)); - }, - }); + return Promise.resolve(textResult(result)); + }, + }); - pi.registerTool({ - name: "robot_review_run", - label: "robot_review_run", - description: `Run the automatic robot reviewer against the current task evidence using the current session model. + pi.registerTool({ + name: "robot_review_run", + label: "robot_review_run", + description: `Run the automatic robot reviewer against the current task evidence using the current session model. Runs the same Pi-native reviewer stage used automatically by \`TaskClaimDone\`. This appends a new robot-review iteration. If accepted for a top-level proof task, the task completes. If rejected, the task stays open. Reviewer infrastructure failure is logged but does not block autonomy.`, - parameters: Type.Object({ - taskId: Type.String({ description: "Task ID to review" }), - }), + parameters: Type.Object({ + taskId: Type.String({ description: "Task ID to review" }), + }), - async execute(_toolCallId, params, signal, _onUpdate, _ctx) { - const task = store.get(params.taskId); - if (!task) return textResult(`Task #${params.taskId} not found`); - if (!task.metadata?.lgtm_evidence) { - return textResult(`Task #${params.taskId} has no stored evidence yet. Call TaskClaimDone first.`); - } + async execute(_toolCallId, params, signal, _onUpdate, _ctx) { + const task = store.get(params.taskId); + if (!task) return textResult(`Task #${params.taskId} not found`); + if (!task.metadata?.lgtm_evidence) { + return textResult( + `Task #${params.taskId} has no stored evidence yet. Call TaskClaimDone first.`, + ); + } - try { - const { review, command } = await runAutomaticRobotReview(task, signal, getCurrentModelRef(_ctx.model)); - store.update(params.taskId, { - metadata: { - ...appendRobotReviewMetadata(task, review), - ...clearAutomaticReviewFailureMetadata(), - }, - }); - const reviewedTask = store.get(params.taskId) ?? task; - if (!reviewedTask.parentId && shouldCompleteAfterAcceptedReview(reviewedTask, review.accepted)) { - store.complete(params.taskId); - autoClear.trackCompletion(params.taskId, currentTurn); - widget.setActiveTask(params.taskId, false); - } - widget.update(); + try { + const { review, command } = await runAutomaticRobotReview( + task, + signal, + getCurrentModelRef(_ctx.model), + ); + store.update(params.taskId, { + metadata: { + ...appendRobotReviewMetadata(task, review), + ...clearAutomaticReviewFailureMetadata(), + }, + }); + const reviewedTask = store.get(params.taskId) ?? task; + if ( + !reviewedTask.parentId && + shouldCompleteAfterAcceptedReview(reviewedTask, review.accepted) + ) { + store.complete(params.taskId); + autoClear.trackCompletion(params.taskId, currentTurn); + widget.setActiveTask(params.taskId, false); + } + widget.update(); - const updatedTask = store.get(params.taskId) ?? task; - const storedReview = getLatestRobotReview(updatedTask); - return textResult( - `${renderProofLog(updatedTask)}\n\n` + - `### Automatic robot review\n` + - `Reviewer command: ${command}\n\n` + - `${storedReview ? formatRobotReview(storedReview) : formatRobotReview({ ...review, iteration: 1 })}\n\n` + - `Gate status: ${getGateStatus(updatedTask)}`, - ); - } catch (err: any) { - store.update(params.taskId, { - metadata: getAutomaticReviewFailureMetadata(err.message, err.rawOutput), - }); - const failedTask = store.get(params.taskId) ?? task; - if (!failedTask.parentId && failedTask.status !== "completed") { - store.complete(params.taskId); - autoClear.trackCompletion(params.taskId, currentTurn); - widget.setActiveTask(params.taskId, false); - } - widget.update(); - const updatedTask = store.get(params.taskId) ?? task; - return textResult( - `${renderProofLog(updatedTask)}\n\n` + - `### Automatic robot review\n` + - `Reviewer unavailable: ${err.message}\n\n` + - `Autonomy continued without blocking completion.\n\n` + - `Gate status: ${getGateStatus(updatedTask)}` + - (typeof err.rawOutput === "string" && err.rawOutput.trim() - ? `\n\n${formatReviewTextBlock("Reviewer raw output", err.rawOutput.trim())}` - : ""), - ); - } - }, - }); + const updatedTask = store.get(params.taskId) ?? task; + const storedReview = getLatestRobotReview(updatedTask); + return textResult( + renderTaskToolResult( + "robot_review_run", + updatedTask, + `${renderCurrentProofSummary(updatedTask)}\n\n` + + `### Automatic robot review\nReviewer command: ${command}` + + `${storedReview ? `\n\n${renderCompactRobotReview(storedReview)}` : `\n\n${renderCompactRobotReview({ ...review, iteration: 1 })}`}`, + ), + ); + } catch (err: any) { + store.update(params.taskId, { + metadata: getAutomaticReviewFailureMetadata( + err.message, + err.rawOutput, + ), + }); + const failedTask = store.get(params.taskId) ?? task; + if (!failedTask.parentId && failedTask.status !== "completed") { + store.complete(params.taskId); + autoClear.trackCompletion(params.taskId, currentTurn); + widget.setActiveTask(params.taskId, false); + } + widget.update(); + const updatedTask = store.get(params.taskId) ?? task; + return textResult( + renderTaskToolResult( + "robot_review_run", + updatedTask, + `${renderCurrentProofSummary(updatedTask)}\n\n` + + `### Automatic robot review\nReviewer unavailable: ${err.message}\n\nAutonomy continued without blocking completion.` + + (typeof err.rawOutput === "string" && err.rawOutput.trim() + ? `\n\n${formatReviewTextBlock("Reviewer raw output", err.rawOutput.trim(), { maxLines: MAX_INLINE_PROOF_LINES })}` + : ""), + ), + ); + } + }, + }); - // ────────────────────────────────────────────────── - // /tasks command - // ────────────────────────────────────────────────── + // ────────────────────────────────────────────────── + // /tasks command + // ────────────────────────────────────────────────── - pi.registerCommand("tasks", { - description: "Manage tasks — view, create, clear completed", - handler: async (_args: string, ctx: ExtensionCommandContext) => { - const ui = ctx.ui; + pi.registerCommand("tasks", { + description: "Manage goals — view, create, clear completed", + handler: async (_args: string, ctx: ExtensionCommandContext) => { + const ui = ctx.ui; - const mainMenu = async (): Promise => { - const tasks = store.list(); - const taskCount = tasks.length; - const completedCount = tasks.filter(t => t.status === "completed").length; + const mainMenu = async (): Promise => { + const tasks = store.list(); + const taskCount = tasks.length; + const completedCount = tasks.filter( + (t) => t.status === "completed", + ).length; - const choices: string[] = [`View all tasks (${taskCount})`, "Create task"]; - if (completedCount > 0) choices.push(`Clear completed (${completedCount})`); - if (taskCount > 0) choices.push(`Clear all (${taskCount})`); + const choices: string[] = [ + `View all goals (${taskCount})`, + "Create goal", + ]; + if (completedCount > 0) + choices.push(`Clear completed (${completedCount})`); + if (taskCount > 0) choices.push(`Clear all (${taskCount})`); - const choice = await ui.select("Tasks", choices); - if (!choice) return; + const choice = await ui.select("Goals", choices); + if (!choice) return; - if (choice.startsWith("View")) await viewTasks(); - else if (choice === "Create task") await createTask(); - else if (choice.startsWith("Clear completed")) { - store.clearCompleted(); - if (taskScope === "session") store.deleteFileIfEmpty(); - widget.update(); - await mainMenu(); - } else if (choice.startsWith("Clear all")) { - store.clearAll(); - if (taskScope === "session") store.deleteFileIfEmpty(); - widget.update(); - await mainMenu(); - } - }; + if (choice.startsWith("View")) await viewTasks(); + else if (choice === "Create goal") await createTask(); + else if (choice.startsWith("Clear completed")) { + store.clearCompleted(); + if (taskScope === "session") store.deleteFileIfEmpty(); + widget.update(); + await mainMenu(); + } else if (choice.startsWith("Clear all")) { + store.clearAll(); + if (taskScope === "session") store.deleteFileIfEmpty(); + widget.update(); + await mainMenu(); + } + }; - const viewTasks = async (): Promise => { - const tasks = store.list(); - if (tasks.length === 0) { - await ui.select("No tasks", ["← Back"]); - return mainMenu(); - } + const viewTasks = async (): Promise => { + const tasks = store.list(); + if (tasks.length === 0) { + await ui.select("No goals", ["← Back"]); + return mainMenu(); + } - const statusIcon = (t: (typeof tasks)[0]) => { - if (t.status === "completed") return "✔"; - if (t.status === "in_progress") return "◼"; - return "◻"; - }; + const statusIcon = (t: (typeof tasks)[0]) => { + if (t.status === "completed") return "done"; + if (t.status === "in_progress") return "◼"; + return "◻"; + }; - const choices = tasks.map(t => `${statusIcon(t)} #${t.id} ${t.subject}`); - choices.push("← Back"); + const choices = tasks.map( + (t) => `${statusIcon(t)} #${t.id} ${t.subject}`, + ); + choices.push("← Back"); - const selected = await ui.select("Tasks", choices); - if (!selected || selected === "← Back") return mainMenu(); + const selected = await ui.select("Goals", choices); + if (!selected || selected === "← Back") return mainMenu(); - const match = selected.match(/#(\d+)/); - if (match) await viewTaskDetail(match[1]); - else return viewTasks(); - }; + const match = selected.match(/#(\d+)/); + if (match) await viewTaskDetail(match[1]); + else return viewTasks(); + }; - const viewTaskDetail = async (taskId: string): Promise => { - const task = store.get(taskId); - if (!task) return viewTasks(); + const viewTaskDetail = async (taskId: string): Promise => { + const task = store.get(taskId); + if (!task) return viewTasks(); - const actions: string[] = []; - if (task.status === "pending") actions.push("▸ Start (in_progress)"); - if (task.metadata.lgtm_evidence) { - actions.push(`(type /lgtm ${taskId} to view proof evidence)`); - } - actions.push("✗ Delete"); - actions.push("← Back"); + const actions: string[] = []; + if (task.status === "pending") actions.push("▸ Start (in_progress)"); + if (task.metadata.lgtm_evidence) { + actions.push(`(type /lgtm ${taskId} to view proof evidence)`); + } + actions.push("✗ Delete"); + actions.push("← Back"); - const pendingNote = task.metadata.lgtm_evidence && task.status !== "completed" ? `\nProof review: ${getGateStatus(task)}` : ""; - const em = task.metadata; - let evidenceNote = ""; - if (em.lgtm_evidence) { - evidenceNote = `\n\n${renderEvidencePacket(task)}`; - const automaticReviewFailure = renderAutomaticReviewFailure(task); - if (automaticReviewFailure) evidenceNote += `\n\n${automaticReviewFailure}`; - } - let robotNote = ""; - const robotReviews = getRobotReviews(task); - if (robotReviews.length > 0) { - const latest = robotReviews[robotReviews.length - 1]; - const parts = [`\n\nRobot reviews: ${robotReviews.length}`]; - parts.push(formatRobotReview(latest)); - robotNote = parts.join("\n"); - } - const title = `#${task.id} [${task.status}] ${task.subject}\nDone: ${task.done_criterion}${pendingNote}\n${task.description}${evidenceNote}${robotNote}`; - const action = await ui.select(title, actions); + const pendingNote = + task.metadata.lgtm_evidence && task.status !== "completed" + ? `\nProof review: ${getGateStatus(task)}` + : ""; + const em = task.metadata; + let evidenceNote = ""; + if (em.lgtm_evidence) { + evidenceNote = `\n\n${renderEvidencePacket(task)}`; + const automaticReviewFailure = renderAutomaticReviewFailure(task); + if (automaticReviewFailure) + evidenceNote += `\n\n${automaticReviewFailure}`; + } + let robotNote = ""; + const robotReviews = getRobotReviews(task); + if (robotReviews.length > 0) { + const latest = robotReviews[robotReviews.length - 1]; + const parts = [`\n\nRobot reviews: ${robotReviews.length}`]; + parts.push(renderCompactRobotReview(latest)); + robotNote = parts.join("\n"); + } + const title = `#${task.id} [${task.status}] ${task.subject}\nDone: ${task.done_criterion}${pendingNote}\n${task.description}${evidenceNote}${robotNote}`; + const action = await ui.select(title, actions); - if (action === "▸ Start (in_progress)") { - store.update(taskId, { status: "in_progress" }); - widget.setActiveTask(taskId); - widget.update(); - return viewTasks(); - } else if (action === "✗ Delete") { - store.update(taskId, { status: "deleted" }); - widget.setActiveTask(taskId, false); - widget.update(); - return viewTasks(); - } - return viewTasks(); - }; + if (action === "▸ Start (in_progress)") { + store.update(taskId, { status: "in_progress" }); + widget.setActiveTask(taskId); + widget.update(); + return viewTasks(); + } else if (action === "✗ Delete") { + store.update(taskId, { status: "deleted" }); + widget.setActiveTask(taskId, false); + widget.update(); + return viewTasks(); + } + return viewTasks(); + }; - const createTask = async (): Promise => { - const subject = await ui.input("Task subject"); - if (!subject) return mainMenu(); - const description = await ui.input("Task description"); - if (!description) return mainMenu(); - const done_criterion = await ui.input("Done criterion (what does done look like?)"); - if (!done_criterion) return mainMenu(); + const createTask = async (): Promise => { + const subject = await ui.input("Goal subject"); + if (!subject) return mainMenu(); + const description = await ui.input("Goal description"); + if (!description) return mainMenu(); + const done_criterion = await ui.input( + "Done criterion (what does done look like?)", + ); + if (!done_criterion) return mainMenu(); - store.create(subject, description, done_criterion); - widget.update(); - return mainMenu(); - }; + store.create(subject, description, done_criterion); + widget.update(); + return mainMenu(); + }; - await mainMenu(); - }, - }); + await mainMenu(); + }, + }); - // ────────────────────────────────────────────────── - // /lgtm command — proof log viewer - // ────────────────────────────────────────────────── + // ────────────────────────────────────────────────── + // /lgtm command — proof log viewer + // ────────────────────────────────────────────────── - function renderTaskEvidenceForHuman(task: Task): string { - return renderProofLog(task); - } + function renderTaskEvidenceForHuman(task: Task): string { + return renderProofLog(task); + } - function showProofLog(task: Task) { - pi.sendMessage({ - customType: "proof-log", - content: renderTaskEvidenceForHuman(task), - display: true, - details: { taskId: task.id }, - }); - } + function showProofLog(task: Task) { + pi.sendMessage({ + customType: "proof-log", + content: renderTaskEvidenceForHuman(task), + display: true, + details: { taskId: task.id }, + }); + } - function getLgtmTaskLabel(task: Task): string { - const tag = task.status === "completed" - ? "[DONE] " - : task.status === "in_progress" - ? "[ACTIVE] " - : "[PENDING] "; - return `${tag}#${task.id} ${task.subject}`; - } + function getLgtmTaskLabel(task: Task): string { + const tag = + task.status === "completed" + ? "[DONE] " + : task.status === "in_progress" + ? "[ACTIVE] " + : "[PENDING] "; + return `${tag}#${task.id} ${task.subject}`; + } - async function viewEvidence(taskId: string, ctx: ExtensionCommandContext): Promise { - const task = store.get(taskId); - if (!task) { - ctx.ui.notify(`Task #${taskId} not found`, "error"); - return; - } - showProofLog(task); - } + async function viewEvidence( + taskId: string, + ctx: ExtensionCommandContext, + ): Promise { + const task = store.get(taskId); + if (!task) { + ctx.ui.notify(`Task #${taskId} not found`, "error"); + return; + } + showProofLog(task); + } - async function viewAllOpenProofLogs(ctx: ExtensionCommandContext): Promise { - const open = store.list().filter(t => t.status !== "completed"); - if (open.length === 0) { - ctx.ui.notify("No open tasks to inspect.", "info"); - return; - } - for (const task of open) showProofLog(task); - } + async function viewAllOpenProofLogs( + ctx: ExtensionCommandContext, + ): Promise { + const open = store.list().filter((t) => t.status !== "completed"); + if (open.length === 0) { + ctx.ui.notify("No open tasks to inspect.", "info"); + return; + } + for (const task of open) showProofLog(task); + } - pi.registerCommand("lgtm", { - description: - "View the proof log and judge notes. /lgtm [...] shows specific tasks; /lgtm * shows all open tasks; task management lives in /tasks.", - handler: async (args: string, ctx: ExtensionCommandContext) => { - const parsed = parseLgtmArgs(args); - if (parsed.kind === "error") { - ctx.ui.notify(parsed.message, "error"); - return; - } - if (parsed.kind === "menu") { - const tasks = store.list(); - const choice = await ctx.ui.select( - "LGTM", - ["View all open proof logs", ...tasks.map(getLgtmTaskLabel), "← Cancel"], - ); - if (!choice || choice === "← Cancel") return; - if (choice === "View all open proof logs") return viewAllOpenProofLogs(ctx); - const match = choice.match(/#(\d+)/); - if (match) return viewEvidence(match[1], ctx); - return; - } - if (parsed.kind === "view_all") return viewAllOpenProofLogs(ctx); - for (const id of parsed.ids) await viewEvidence(id, ctx); - }, - getArgumentCompletions: (args: string) => { - const trimmed = args.trim(); - const tasks = store.list(); - if (!trimmed) return [{ value: "*", label: "*" }]; - const prefix = trimmed.replace(/^#/, ""); - return ["*", ...tasks.filter(task => task.id.startsWith(prefix)).map(task => task.id)] - .map(value => ({ value, label: value })); - }, - }); + pi.registerCommand("lgtm", { + description: + "View the proof log and judge notes. /lgtm [...] shows specific tasks; /lgtm * shows all open tasks; task management lives in /tasks.", + handler: async (args: string, ctx: ExtensionCommandContext) => { + const parsed = parseLgtmArgs(args); + if (parsed.kind === "error") { + ctx.ui.notify(parsed.message, "error"); + return; + } + if (parsed.kind === "menu") { + const tasks = store.list(); + const choice = await ctx.ui.select("LGTM", [ + "View all open proof logs", + ...tasks.map(getLgtmTaskLabel), + "← Cancel", + ]); + if (!choice || choice === "← Cancel") return; + if (choice === "View all open proof logs") + return viewAllOpenProofLogs(ctx); + const match = choice.match(/#(\d+)/); + if (match) return viewEvidence(match[1], ctx); + return; + } + if (parsed.kind === "view_all") return viewAllOpenProofLogs(ctx); + for (const id of parsed.ids) await viewEvidence(id, ctx); + }, + getArgumentCompletions: (args: string) => { + const trimmed = args.trim(); + const tasks = store.list(); + if (!trimmed) return [{ value: "*", label: "*" }]; + const prefix = trimmed.replace(/^#/, ""); + return [ + "*", + ...tasks + .filter((task) => task.id.startsWith(prefix)) + .map((task) => task.id), + ].map((value) => ({ value, label: value })); + }, + }); } diff --git a/src/review-badges.ts b/src/review-badges.ts index 871df8e..da34a51 100644 --- a/src/review-badges.ts +++ b/src/review-badges.ts @@ -1,99 +1,86 @@ -import { getLatestRobotReview, getRobotReviews } from "./robot-review.js"; +import { getLatestRobotReview } from "./robot-review.js"; import type { Task } from "./types.js"; -const STAGES = ["🛠", "🤖", "✓"] as const; - function hasCurrentEvidence(task: Task): boolean { - return typeof task.metadata?.lgtm_evidence === "string" && task.metadata.lgtm_evidence.length > 0; + return ( + typeof task.metadata?.lgtm_evidence === "string" && + task.metadata.lgtm_evidence.length > 0 + ); } function hasEvidenceHistory(task: Task): boolean { - return Array.isArray(task.metadata?.lgtm_history) && task.metadata.lgtm_history.length > 0; + return ( + Array.isArray(task.metadata?.lgtm_history) && + task.metadata.lgtm_history.length > 0 + ); } -/** Pipeline stages: `[🛠·🤖·✓]` fills left-to-right as evidence→review→completed progresses. */ -export function getReviewBadges(task: Task): string { - const filled = [ - !!task.metadata?.lgtm_evidence, - getRobotReviews(task).length > 0, - task.status === "completed", - ]; - const slots = STAGES.map((emoji, i) => filled[i] ? emoji : "·"); - return `[${slots.join("")}]`; -} - -export const REVIEW_BADGES = { - evidence: STAGES[0], - robot: STAGES[1], - complete: STAGES[2], - pipeline: STAGES, -}; - export type DisplayStatus = "in_progress" | "pending" | "completed"; export function getDisplayStatus(task: Task): DisplayStatus { - return task.status; + return task.status; } export type CompletionMode = "direct" | "proof"; export type ReviewState = - | "no_claim" - | "claim_submitted" - | "reviewer_failed_to_run" - | "reviewer_rejected" - | "reviewer_accepted" - | "superseded" - | "completed"; -export type StateTag = "ACTIVE" | "PENDING" | "DONE"; - + | "no_claim" + | "claim_submitted" + | "reviewer_failed_to_run" + | "reviewer_rejected" + | "reviewer_accepted" + | "superseded" + | "completed"; export function getCompletionMode(task: Task): CompletionMode { - return task.parentId ? "direct" : "proof"; + return task.parentId ? "direct" : "proof"; } export function getReviewState(task: Task): ReviewState { - if (task.status === "completed") return "completed"; - const latest = getLatestRobotReview(task); - if (latest && !latest.accepted) return "reviewer_rejected"; - if (latest?.accepted) return "reviewer_accepted"; - if (typeof task.metadata?.robot_review_last_error === "string") return "reviewer_failed_to_run"; - if (hasCurrentEvidence(task)) return "claim_submitted"; - if (hasEvidenceHistory(task)) return "superseded"; - return "no_claim"; + if (task.status === "completed") return "completed"; + const latest = getLatestRobotReview(task); + if (latest && !latest.accepted) return "reviewer_rejected"; + if (latest?.accepted) return "reviewer_accepted"; + if (typeof task.metadata?.robot_review_last_error === "string") + return "reviewer_failed_to_run"; + if (hasCurrentEvidence(task)) return "claim_submitted"; + if (hasEvidenceHistory(task)) return "superseded"; + return "no_claim"; +} + +export function needsProofAttention(task: Task): boolean { + if (task.parentId || task.status === "completed") return false; + const state = getReviewState(task); + return ( + state === "reviewer_rejected" || + state === "reviewer_accepted" || + state === "reviewer_failed_to_run" + ); } export function getGateStatus(task: Task): string { - const state = getReviewState(task); - if (task.parentId) { - return task.status === "completed" ? "completed directly as subtask" : "subtask: direct completion allowed"; - } - if (task.status === "completed") { - if (typeof task.metadata?.robot_review_last_error === "string") { - return `completed with reviewer unavailable: ${task.metadata.robot_review_last_error}`; - } - if (getLatestRobotReview(task)?.accepted) return "completed after accepted proof review"; - return "completed"; - } - if (state === "no_claim") return "top-level task requires TaskClaimDone evidence before completion"; - if (state === "reviewer_accepted") return "review accepted; task should be completed"; - if (state === "reviewer_failed_to_run") { - return `review unavailable; autonomy continues: ${task.metadata.robot_review_last_error}`; - } - if (state === "reviewer_rejected") return "latest proof review rejected the evidence; strengthen the proof and try again"; - if (state === "superseded") return "current evidence superseded, waiting for a new proof claim"; - return "proof claim submitted, automatic review still required"; -} - -/** Short uppercase tag for compact task-list display. */ -export function getStateTag(task: Task): StateTag { - const s = getDisplayStatus(task); - if (s === "completed") return "DONE"; - if (s === "in_progress") return "ACTIVE"; - return "PENDING"; -} - -/** Theme colour key for each state tag (only theme colours present in pi-tui are used). */ -export function getStateTagColor(tag: StateTag): "accent" | "dim" | undefined { - if (tag === "ACTIVE") return "accent"; - if (tag === "DONE") return "dim"; - return undefined; // PENDING — default fg + const state = getReviewState(task); + if (task.parentId) { + return task.status === "completed" + ? "completed directly as subtask" + : "subtask: direct completion allowed"; + } + if (task.status === "completed") { + if (typeof task.metadata?.robot_review_last_error === "string") { + return `completed with reviewer unavailable: ${task.metadata.robot_review_last_error}`; + } + if (getLatestRobotReview(task)?.accepted) + return "completed after accepted proof review"; + return "completed"; + } + if (state === "no_claim") + return "top-level task requires TaskClaimDone evidence before completion"; + if (state === "reviewer_accepted") + return "review accepted; task should be completed"; + if (state === "reviewer_failed_to_run") { + return `review unavailable; autonomy continues: ${task.metadata.robot_review_last_error}`; + } + if (state === "reviewer_rejected") + return "latest proof review rejected the evidence; strengthen the proof and try again"; + if (state === "superseded") + return "current evidence superseded, waiting for a new proof claim"; + return "proof claim submitted, automatic review still required"; } diff --git a/src/robot-review.ts b/src/robot-review.ts index 9846873..118c492 100644 --- a/src/robot-review.ts +++ b/src/robot-review.ts @@ -3,185 +3,301 @@ import type { Task } from "./types.js"; export type RobotReviewMode = "manual" | "auto"; export interface RobotReviewRecord { - iteration: number; - reviewer: string; - scope: string; - observations: string[]; - concerns: string[]; - suggestions: string[]; - blind_spots: string; - accepted: boolean; - evidence_complete: boolean; - evidence_convincing: boolean; - missing_evidence: string[]; - submitted_at: string; - mode: RobotReviewMode; - raw_output?: string; - rubric?: Record; + iteration: number; + reviewer: string; + scope: string; + observations: string[]; + concerns: string[]; + suggestions: string[]; + blind_spots: string; + accepted: boolean; + evidence_complete: boolean; + evidence_convincing: boolean; + missing_evidence: string[]; + submitted_at: string; + mode: RobotReviewMode; + raw_output?: string; + rubric?: Record; } function toStringArray(value: unknown): string[] { - return Array.isArray(value) ? value.filter((item): item is string => typeof item === "string") : []; + return Array.isArray(value) + ? value.filter((item): item is string => typeof item === "string") + : []; } -function extractRubric(value: unknown): Record | undefined { - if (!value || typeof value !== "object") return undefined; - const r: Record = {}; - for (const [key, val] of Object.entries(value as Record)) { - if (val && typeof val === "object" && "reason" in (val as any) && "pass" in (val as any)) { - const v = val as { reason: unknown; pass: unknown }; - r[key] = { reason: typeof v.reason === "string" ? v.reason : "", pass: v.pass === true }; - } - } - return Object.keys(r).length > 0 ? r : undefined; +function extractRubric( + value: unknown, +): Record | undefined { + if (!value || typeof value !== "object") return undefined; + const r: Record = {}; + for (const [key, val] of Object.entries(value as Record)) { + if ( + val && + typeof val === "object" && + "reason" in (val as any) && + "pass" in (val as any) + ) { + const v = val as { reason: unknown; pass: unknown }; + r[key] = { + reason: typeof v.reason === "string" ? v.reason : "", + pass: v.pass === true, + }; + } + } + return Object.keys(r).length > 0 ? r : undefined; } -function normalizeReview(value: unknown, index: number): RobotReviewRecord | undefined { - if (!value || typeof value !== "object") return undefined; - const review = value as Record; - const reviewer = typeof review.reviewer === "string" ? review.reviewer : "unknown"; - const scope = typeof review.scope === "string" ? review.scope : "unknown"; - const observations = toStringArray(review.observations); - if (observations.length === 0) return undefined; - return { - iteration: typeof review.iteration === "number" ? review.iteration : index + 1, - reviewer, - scope, - observations, - concerns: toStringArray(review.concerns), - suggestions: toStringArray(review.suggestions), - blind_spots: typeof review.blind_spots === "string" ? review.blind_spots : "not recorded", - accepted: typeof review.accepted === "boolean" - ? review.accepted - : (typeof review.evidence_complete === "boolean" ? review.evidence_complete : true) - && (typeof review.evidence_convincing === "boolean" ? review.evidence_convincing : true), - evidence_complete: typeof review.evidence_complete === "boolean" ? review.evidence_complete : true, - evidence_convincing: typeof review.evidence_convincing === "boolean" ? review.evidence_convincing : true, - missing_evidence: toStringArray(review.missing_evidence), - submitted_at: typeof review.submitted_at === "string" ? review.submitted_at : new Date(0).toISOString(), - mode: review.mode === "auto" ? "auto" : "manual", - raw_output: typeof review.raw_output === "string" ? review.raw_output : undefined, - rubric: extractRubric(review.rubric), - }; +function normalizeReview( + value: unknown, + index: number, +): RobotReviewRecord | undefined { + if (!value || typeof value !== "object") return undefined; + const review = value as Record; + const reviewer = + typeof review.reviewer === "string" ? review.reviewer : "unknown"; + const scope = typeof review.scope === "string" ? review.scope : "unknown"; + const observations = toStringArray(review.observations); + if (observations.length === 0) return undefined; + return { + iteration: + typeof review.iteration === "number" ? review.iteration : index + 1, + reviewer, + scope, + observations, + concerns: toStringArray(review.concerns), + suggestions: toStringArray(review.suggestions), + blind_spots: + typeof review.blind_spots === "string" + ? review.blind_spots + : "not recorded", + accepted: + typeof review.accepted === "boolean" + ? review.accepted + : (typeof review.evidence_complete === "boolean" + ? review.evidence_complete + : true) && + (typeof review.evidence_convincing === "boolean" + ? review.evidence_convincing + : true), + evidence_complete: + typeof review.evidence_complete === "boolean" + ? review.evidence_complete + : true, + evidence_convincing: + typeof review.evidence_convincing === "boolean" + ? review.evidence_convincing + : true, + missing_evidence: toStringArray(review.missing_evidence), + submitted_at: + typeof review.submitted_at === "string" + ? review.submitted_at + : new Date(0).toISOString(), + mode: review.mode === "auto" ? "auto" : "manual", + raw_output: + typeof review.raw_output === "string" ? review.raw_output : undefined, + rubric: extractRubric(review.rubric), + }; } function getLegacyRobotReview(task: Task): RobotReviewRecord | undefined { - const observations = toStringArray(task.metadata?.robot_review_observations); - if (observations.length === 0) return undefined; - return { - iteration: 1, - reviewer: typeof task.metadata?.robot_review_reviewer === "string" ? task.metadata.robot_review_reviewer : "unknown", - scope: typeof task.metadata?.robot_review_scope === "string" ? task.metadata.robot_review_scope : "unknown", - observations, - concerns: toStringArray(task.metadata?.robot_review_concerns), - suggestions: toStringArray(task.metadata?.robot_review_suggestions), - blind_spots: typeof task.metadata?.robot_review_blind_spots === "string" ? task.metadata.robot_review_blind_spots : "not recorded", - accepted: typeof task.metadata?.robot_review_accepted === "boolean" - ? task.metadata.robot_review_accepted - : (typeof task.metadata?.robot_review_evidence_complete === "boolean" ? task.metadata.robot_review_evidence_complete : true) - && (typeof task.metadata?.robot_review_evidence_convincing === "boolean" ? task.metadata.robot_review_evidence_convincing : true), - evidence_complete: typeof task.metadata?.robot_review_evidence_complete === "boolean" ? task.metadata.robot_review_evidence_complete : true, - evidence_convincing: typeof task.metadata?.robot_review_evidence_convincing === "boolean" ? task.metadata.robot_review_evidence_convincing : true, - missing_evidence: toStringArray(task.metadata?.robot_review_missing_evidence), - submitted_at: typeof task.metadata?.robot_review_submitted_at === "string" ? task.metadata.robot_review_submitted_at : new Date(0).toISOString(), - mode: task.metadata?.robot_review_mode === "auto" ? "auto" : "manual", - raw_output: typeof task.metadata?.robot_review_raw_output === "string" ? task.metadata.robot_review_raw_output : undefined, - }; + const observations = toStringArray(task.metadata?.robot_review_observations); + if (observations.length === 0) return undefined; + return { + iteration: 1, + reviewer: + typeof task.metadata?.robot_review_reviewer === "string" + ? task.metadata.robot_review_reviewer + : "unknown", + scope: + typeof task.metadata?.robot_review_scope === "string" + ? task.metadata.robot_review_scope + : "unknown", + observations, + concerns: toStringArray(task.metadata?.robot_review_concerns), + suggestions: toStringArray(task.metadata?.robot_review_suggestions), + blind_spots: + typeof task.metadata?.robot_review_blind_spots === "string" + ? task.metadata.robot_review_blind_spots + : "not recorded", + accepted: + typeof task.metadata?.robot_review_accepted === "boolean" + ? task.metadata.robot_review_accepted + : (typeof task.metadata?.robot_review_evidence_complete === "boolean" + ? task.metadata.robot_review_evidence_complete + : true) && + (typeof task.metadata?.robot_review_evidence_convincing === "boolean" + ? task.metadata.robot_review_evidence_convincing + : true), + evidence_complete: + typeof task.metadata?.robot_review_evidence_complete === "boolean" + ? task.metadata.robot_review_evidence_complete + : true, + evidence_convincing: + typeof task.metadata?.robot_review_evidence_convincing === "boolean" + ? task.metadata.robot_review_evidence_convincing + : true, + missing_evidence: toStringArray( + task.metadata?.robot_review_missing_evidence, + ), + submitted_at: + typeof task.metadata?.robot_review_submitted_at === "string" + ? task.metadata.robot_review_submitted_at + : new Date(0).toISOString(), + mode: task.metadata?.robot_review_mode === "auto" ? "auto" : "manual", + raw_output: + typeof task.metadata?.robot_review_raw_output === "string" + ? task.metadata.robot_review_raw_output + : undefined, + }; } export function getRobotReviews(task: Task): RobotReviewRecord[] { - const reviews = Array.isArray(task.metadata?.robot_reviews) - ? task.metadata.robot_reviews - .map((review: unknown, index: number) => normalizeReview(review, index)) - .filter((review): review is RobotReviewRecord => review !== undefined) - : []; - if (reviews.length > 0) { - return reviews.map((review, index) => ({ ...review, iteration: index + 1 })); - } - const legacy = getLegacyRobotReview(task); - return legacy ? [legacy] : []; + const reviews = Array.isArray(task.metadata?.robot_reviews) + ? task.metadata.robot_reviews + .map((review: unknown, index: number) => normalizeReview(review, index)) + .filter((review): review is RobotReviewRecord => review !== undefined) + : []; + if (reviews.length > 0) { + return reviews.map((review, index) => ({ + ...review, + iteration: index + 1, + })); + } + const legacy = getLegacyRobotReview(task); + return legacy ? [legacy] : []; } -export function getLatestRobotReview(task: Task): RobotReviewRecord | undefined { - const reviews = getRobotReviews(task); - return reviews.length > 0 ? reviews[reviews.length - 1] : undefined; +export function getLatestRobotReview( + task: Task, +): RobotReviewRecord | undefined { + const reviews = getRobotReviews(task); + return reviews.length > 0 ? reviews[reviews.length - 1] : undefined; } function hasNonEmptyString(value: unknown): boolean { - return typeof value === "string" && value.trim().length > 0; + return typeof value === "string" && value.trim().length > 0; } export function hasCompleteProofClaim(task: Task): boolean { - const metadata = task.metadata ?? {}; - return [ - metadata.lgtm_evidence, - metadata.lgtm_failure_likely, - metadata.lgtm_failure_sneaky, - metadata.lgtm_failure_unknown, - metadata.lgtm_falsification_test, - metadata.lgtm_evidence_reasoning, - metadata.lgtm_remaining_uncertainty, - ].every(hasNonEmptyString) - && Array.isArray(metadata.lgtm_verification_hints) - && metadata.lgtm_verification_hints.some(hasNonEmptyString); + const metadata = task.metadata ?? {}; + return ( + [ + metadata.lgtm_evidence, + metadata.lgtm_failure_likely, + metadata.lgtm_failure_sneaky, + metadata.lgtm_failure_unknown, + metadata.lgtm_falsification_test, + metadata.lgtm_evidence_reasoning, + metadata.lgtm_remaining_uncertainty, + ].every(hasNonEmptyString) && + Array.isArray(metadata.lgtm_verification_hints) && + metadata.lgtm_verification_hints.some(hasNonEmptyString) + ); } -export function shouldCompleteAfterAcceptedReview(task: Task, reviewAccepted: boolean): boolean { - return reviewAccepted && hasCompleteProofClaim(task); +export function shouldCompleteAfterAcceptedReview( + task: Task, + reviewAccepted: boolean, +): boolean { + return reviewAccepted && hasCompleteProofClaim(task); } -export function relaxAdvisoryVerificationHints(review: Omit): Omit { - const rubric = review.rubric; - if (!rubric || review.evidence_complete !== true) return review; - const requiredCoreKeys = ["evidence_covers_done_criterion", "falsification_test_runnable", "failure_modes_addressed", "evidence_distinguishes_success"]; - if (!requiredCoreKeys.every((key) => rubric[key]?.pass === true)) return review; - const failedKeys = Object.entries(rubric) - .filter(([, item]) => item.pass !== true) - .map(([key]) => key); - if (failedKeys.length !== 1 || failedKeys[0] !== "verification_hints_actionable") return review; - return { - ...review, - accepted: true, - evidence_convincing: true, - observations: [ - ...review.observations, - "Verification hints were weak, but treated as advisory because the verbatim evidence already covered the done criterion.", - ], - concerns: review.concerns, - suggestions: review.suggestions, - missing_evidence: review.missing_evidence.filter((item) => item !== "verification_hints_actionable" && !/verification hint/i.test(item)), - }; +export function relaxAdvisoryVerificationHints( + review: Omit, +): Omit { + const rubric = review.rubric; + if (!rubric || review.evidence_complete !== true) return review; + const requiredCoreKeys = [ + "evidence_covers_done_criterion", + "falsification_test_runnable", + ]; + if (!requiredCoreKeys.every((key) => rubric[key]?.pass === true)) + return review; + const failedKeys = Object.entries(rubric) + .filter(([, item]) => item.pass !== true) + .map(([key]) => key); + const advisoryKeys = [ + "failure_modes_addressed", + "evidence_distinguishes_success", + "verification_hints_actionable", + ]; + if ( + failedKeys.length === 0 || + !failedKeys.every((key) => advisoryKeys.includes(key)) + ) + return review; + + const advisoryNotes: string[] = []; + if (failedKeys.includes("failure_modes_addressed")) { + advisoryNotes.push( + "Failure-mode writeup was weak, but treated as advisory because the verbatim evidence already covered the done criterion.", + ); + } + if (failedKeys.includes("evidence_distinguishes_success")) { + advisoryNotes.push( + "Why-this-proves-it reasoning was weak, but treated as advisory because the packet already contained direct success evidence.", + ); + } + if (failedKeys.includes("verification_hints_actionable")) { + advisoryNotes.push( + "Verification hints were weak, but treated as advisory because the verbatim evidence already covered the done criterion.", + ); + } + + return { + ...review, + accepted: true, + evidence_convincing: true, + observations: [...review.observations, ...advisoryNotes], + concerns: review.concerns, + suggestions: review.suggestions, + missing_evidence: review.missing_evidence.filter( + (item) => + !advisoryKeys.includes(item) && + !/verification hint/i.test(item) && + !/failure[- ]?mode/i.test(item) && + !/distinguish/i.test(item), + ), + }; } -export function appendRobotReviewMetadata(task: Task, review: Omit): Record { - const robot_reviews = [...getRobotReviews(task), { ...review, iteration: 0 }].map((entry, index) => ({ - ...entry, - accepted: entry.accepted, - iteration: index + 1, - })); - const latest = robot_reviews[robot_reviews.length - 1]; - return { - robot_reviews, - robot_review_reviewer: latest.reviewer, - robot_review_scope: latest.scope, - robot_review_observations: latest.observations, - robot_review_concerns: latest.concerns, - robot_review_suggestions: latest.suggestions, - robot_review_blind_spots: latest.blind_spots, - robot_review_accepted: latest.accepted, - robot_review_evidence_complete: latest.evidence_complete, - robot_review_evidence_convincing: latest.evidence_convincing, - robot_review_missing_evidence: latest.missing_evidence, - robot_review_submitted_at: latest.submitted_at, - robot_review_mode: latest.mode, - robot_review_raw_output: latest.raw_output ?? null, - robot_review_requires_followup: !(latest.evidence_complete && latest.evidence_convincing), - robot_review_iteration_count: robot_reviews.length, - }; +export function appendRobotReviewMetadata( + task: Task, + review: Omit, +): Record { + const robot_reviews = [ + ...getRobotReviews(task), + { ...review, iteration: 0 }, + ].map((entry, index) => ({ + ...entry, + accepted: entry.accepted, + iteration: index + 1, + })); + const latest = robot_reviews[robot_reviews.length - 1]; + return { + robot_reviews, + robot_review_reviewer: latest.reviewer, + robot_review_scope: latest.scope, + robot_review_observations: latest.observations, + robot_review_concerns: latest.concerns, + robot_review_suggestions: latest.suggestions, + robot_review_blind_spots: latest.blind_spots, + robot_review_accepted: latest.accepted, + robot_review_evidence_complete: latest.evidence_complete, + robot_review_evidence_convincing: latest.evidence_convincing, + robot_review_missing_evidence: latest.missing_evidence, + robot_review_submitted_at: latest.submitted_at, + robot_review_mode: latest.mode, + robot_review_raw_output: latest.raw_output ?? null, + robot_review_requires_followup: !( + latest.evidence_complete && latest.evidence_convincing + ), + robot_review_iteration_count: robot_reviews.length, + }; } export function latestRobotReviewPasses(task: Task): boolean { - const latest = getLatestRobotReview(task); - return latest ? latest.accepted : false; + const latest = getLatestRobotReview(task); + return latest ? latest.accepted : false; } - diff --git a/src/task-store.ts b/src/task-store.ts index 36c5699..dd4bde6 100644 --- a/src/task-store.ts +++ b/src/task-store.ts @@ -2,248 +2,341 @@ * task-store.ts — File-backed task store with CRUD, dependency management, and file locking. * * Session-scoped (default): in-memory Map — no disk I/O. - * Shared (PI_TASK_LIST_ID set): ~/.pi/tasks/.json with file locking. + * Named or project stores live under /.pi/tasks/ unless an absolute path is given. */ -import { existsSync, mkdirSync, readFileSync, renameSync, unlinkSync, writeFileSync } from "node:fs"; -import { homedir } from "node:os"; +import { + existsSync, + mkdirSync, + readFileSync, + renameSync, + unlinkSync, + writeFileSync, +} from "node:fs"; import { dirname, isAbsolute, join } from "node:path"; import type { Task, TaskStatus, TaskStoreData } from "./types.js"; -const TASKS_DIR = join(homedir(), ".pi", "tasks"); +const TASKS_DIR = join(process.cwd(), ".pi", "tasks"); const LOCK_RETRY_MS = 50; const LOCK_MAX_RETRIES = 100; // 5s max function acquireLock(lockPath: string): void { - for (let i = 0; i < LOCK_MAX_RETRIES; i++) { - try { - writeFileSync(lockPath, `${process.pid}`, { flag: "wx" }); - return; - } catch (e: any) { - if (e.code === "EEXIST") { - try { - const pid = parseInt(readFileSync(lockPath, "utf-8"), 10); - if (pid && !isProcessRunning(pid)) { unlinkSync(lockPath); continue; } - } catch { /* ignore */ } - const start = Date.now(); - while (Date.now() - start < LOCK_RETRY_MS) { /* busy wait */ } - continue; - } - throw e; - } - } - throw new Error(`Failed to acquire lock: ${lockPath}`); + for (let i = 0; i < LOCK_MAX_RETRIES; i++) { + try { + writeFileSync(lockPath, `${process.pid}`, { flag: "wx" }); + return; + } catch (e: any) { + if (e.code === "EEXIST") { + try { + const pid = parseInt(readFileSync(lockPath, "utf-8"), 10); + if (pid && !isProcessRunning(pid)) { + unlinkSync(lockPath); + continue; + } + } catch { + /* ignore */ + } + const start = Date.now(); + while (Date.now() - start < LOCK_RETRY_MS) { + /* busy wait */ + } + continue; + } + throw e; + } + } + throw new Error(`Failed to acquire lock: ${lockPath}`); } function releaseLock(lockPath: string): void { - try { unlinkSync(lockPath); } catch { /* ignore */ } + try { + unlinkSync(lockPath); + } catch { + /* ignore */ + } } function isProcessRunning(pid: number): boolean { - try { process.kill(pid, 0); return true; } catch { return false; } + try { + process.kill(pid, 0); + return true; + } catch { + return false; + } } export class TaskStore { - private filePath: string | undefined; - private lockPath: string | undefined; - private nextId = 1; - private tasks = new Map(); + private filePath: string | undefined; + private lockPath: string | undefined; + private nextId = 1; + private tasks = new Map(); - constructor(listIdOrPath?: string) { - if (!listIdOrPath) return; - const isAbsPath = isAbsolute(listIdOrPath); - const filePath = isAbsPath ? listIdOrPath : join(TASKS_DIR, `${listIdOrPath}.json`); - mkdirSync(dirname(filePath), { recursive: true }); - this.filePath = filePath; - this.lockPath = filePath + ".lock"; - this.load(); - } + constructor(listIdOrPath?: string) { + if (!listIdOrPath) return; + const isAbsPath = isAbsolute(listIdOrPath); + const filePath = isAbsPath + ? listIdOrPath + : join(TASKS_DIR, `${listIdOrPath}.json`); + mkdirSync(dirname(filePath), { recursive: true }); + this.filePath = filePath; + this.lockPath = filePath + ".lock"; + this.load(); + } - private load(): void { - if (!this.filePath || !existsSync(this.filePath)) return; - try { - const data: TaskStoreData = JSON.parse(readFileSync(this.filePath, "utf-8")); - this.nextId = data.nextId; - this.tasks.clear(); - for (const t of data.tasks) this.tasks.set(t.id, t); - } catch { /* corrupt file — start fresh */ } - } + private load(): void { + if (!this.filePath || !existsSync(this.filePath)) return; + try { + const data: TaskStoreData = JSON.parse( + readFileSync(this.filePath, "utf-8"), + ); + this.nextId = data.nextId; + this.tasks.clear(); + for (const t of data.tasks) this.tasks.set(t.id, t); + } catch { + /* corrupt file — start fresh */ + } + } - private save(): void { - if (!this.filePath) return; - const tmpPath = this.filePath + ".tmp"; - writeFileSync(tmpPath, JSON.stringify({ nextId: this.nextId, tasks: Array.from(this.tasks.values()) }, null, 2)); - renameSync(tmpPath, this.filePath); - } + private save(): void { + if (!this.filePath) return; + const tmpPath = this.filePath + ".tmp"; + writeFileSync( + tmpPath, + JSON.stringify( + { nextId: this.nextId, tasks: Array.from(this.tasks.values()) }, + null, + 2, + ), + ); + renameSync(tmpPath, this.filePath); + } - private withLock(fn: () => T): T { - if (!this.lockPath) return fn(); - acquireLock(this.lockPath); - try { this.load(); const result = fn(); this.save(); return result; } - finally { releaseLock(this.lockPath); } - } + private withLock(fn: () => T): T { + if (!this.lockPath) return fn(); + acquireLock(this.lockPath); + try { + this.load(); + const result = fn(); + this.save(); + return result; + } finally { + releaseLock(this.lockPath); + } + } - create(subject: string, description: string, done_criterion: string, progress_label?: string, metadata?: Record, parentId?: string): Task { - return this.withLock(() => { - if (parentId && !this.tasks.has(parentId)) throw new Error(`Parent task #${parentId} not found`); - const now = Date.now(); - const task: Task = { - id: String(this.nextId++), - subject, description, done_criterion, - parentId, - status: "pending", - progress_label, - metadata: metadata ?? {}, - blocks: [], blockedBy: [], - createdAt: now, updatedAt: now, - }; - this.tasks.set(task.id, task); - return task; - }); - } + create( + subject: string, + description: string, + done_criterion: string, + progress_label?: string, + metadata?: Record, + parentId?: string, + ): Task { + return this.withLock(() => { + if (parentId && !this.tasks.has(parentId)) + throw new Error(`Parent task #${parentId} not found`); + const now = Date.now(); + const task: Task = { + id: String(this.nextId++), + subject, + description, + done_criterion, + parentId, + status: "pending", + progress_label, + metadata: metadata ?? {}, + blocks: [], + blockedBy: [], + createdAt: now, + updatedAt: now, + }; + this.tasks.set(task.id, task); + return task; + }); + } - get(id: string): Task | undefined { - if (this.filePath) this.load(); - return this.tasks.get(id); - } + get(id: string): Task | undefined { + if (this.filePath) this.load(); + return this.tasks.get(id); + } - list(): Task[] { - if (this.filePath) this.load(); - return Array.from(this.tasks.values()).sort((a, b) => Number(a.id) - Number(b.id)); - } + list(): Task[] { + if (this.filePath) this.load(); + return Array.from(this.tasks.values()).sort( + (a, b) => Number(a.id) - Number(b.id), + ); + } - update(id: string, fields: { - status?: TaskStatus | "deleted"; - subject?: string; - description?: string; - done_criterion?: string; - progress_label?: string; - metadata?: Record; - parentId?: string | null; - add_blocks?: string[]; - add_blocked_by?: string[]; - }): { task: Task | undefined; changedFields: string[]; warnings: string[] } { - return this.withLock(() => { - const task = this.tasks.get(id); - if (!task) return { task: undefined, changedFields: [], warnings: [] }; + update( + id: string, + fields: { + status?: TaskStatus | "deleted"; + subject?: string; + description?: string; + done_criterion?: string; + progress_label?: string; + metadata?: Record; + parentId?: string | null; + add_blocks?: string[]; + add_blocked_by?: string[]; + }, + ): { task: Task | undefined; changedFields: string[]; warnings: string[] } { + return this.withLock(() => { + const task = this.tasks.get(id); + if (!task) return { task: undefined, changedFields: [], warnings: [] }; - const changedFields: string[] = []; - const warnings: string[] = []; + const changedFields: string[] = []; + const warnings: string[] = []; - // Subtasks are normal checklist items. Top-level tasks are goals and need a proof - // claim plus automatic review; TaskClaimDone is the only agent path that completes them. - if (fields.status === "completed" && !task.parentId) { - throw new Error(`Top-level task #${id} requires proof. Use TaskClaimDone with evidence and failure modes; subtasks can be completed directly.`); - } + // Subtasks are normal checklist items. Top-level tasks are goals and need a proof + // claim plus automatic review; TaskClaimDone is the only agent path that completes them. + if (fields.status === "completed" && !task.parentId) { + throw new Error( + `Top-level task #${id} requires proof. Use TaskClaimDone with evidence and failure modes; subtasks can be completed directly.`, + ); + } - if (fields.status === "deleted") { - this.tasks.delete(id); - for (const t of this.tasks.values()) { - t.blocks = t.blocks.filter(bid => bid !== id); - t.blockedBy = t.blockedBy.filter(bid => bid !== id); - } - return { task: undefined, changedFields: ["deleted"], warnings: [] }; - } + if (fields.status === "deleted") { + this.tasks.delete(id); + for (const t of this.tasks.values()) { + t.blocks = t.blocks.filter((bid) => bid !== id); + t.blockedBy = t.blockedBy.filter((bid) => bid !== id); + } + return { task: undefined, changedFields: ["deleted"], warnings: [] }; + } - if (fields.status !== undefined) { task.status = fields.status as TaskStatus; changedFields.push("status"); } - if (fields.subject !== undefined) { task.subject = fields.subject; changedFields.push("subject"); } - if (fields.description !== undefined) { task.description = fields.description; changedFields.push("description"); } - if (fields.done_criterion !== undefined) { task.done_criterion = fields.done_criterion; changedFields.push("done_criterion"); } - if (fields.progress_label !== undefined) { task.progress_label = fields.progress_label; changedFields.push("progress_label"); } + if (fields.status !== undefined) { + task.status = fields.status as TaskStatus; + changedFields.push("status"); + } + if (fields.subject !== undefined) { + task.subject = fields.subject; + changedFields.push("subject"); + } + if (fields.description !== undefined) { + task.description = fields.description; + changedFields.push("description"); + } + if (fields.done_criterion !== undefined) { + task.done_criterion = fields.done_criterion; + changedFields.push("done_criterion"); + } + if (fields.progress_label !== undefined) { + task.progress_label = fields.progress_label; + changedFields.push("progress_label"); + } - if (fields.metadata !== undefined) { - for (const [key, value] of Object.entries(fields.metadata)) { - if (value === null) delete task.metadata[key]; - else task.metadata[key] = value; - } - changedFields.push("metadata"); - } + if (fields.metadata !== undefined) { + for (const [key, value] of Object.entries(fields.metadata)) { + if (value === null) delete task.metadata[key]; + else task.metadata[key] = value; + } + changedFields.push("metadata"); + } - if (fields.parentId !== undefined) { - throw new Error("parentId is creation-only. Create subtasks with TaskCreate(parentId); do not downgrade top-level proof goals."); - } + if (fields.parentId !== undefined) { + throw new Error( + "parentId is creation-only. Create subtasks with TaskCreate(parentId); do not downgrade top-level proof goals.", + ); + } - if (fields.add_blocks?.length) { - for (const targetId of fields.add_blocks) { - if (!task.blocks.includes(targetId)) task.blocks.push(targetId); - const target = this.tasks.get(targetId); - if (target && !target.blockedBy.includes(id)) { target.blockedBy.push(id); target.updatedAt = Date.now(); } - if (targetId === id) warnings.push(`#${id} blocks itself`); - else if (!target) warnings.push(`#${targetId} does not exist`); - else if (target.blocks.includes(id)) warnings.push(`cycle: #${id} and #${targetId} block each other`); - } - changedFields.push("blocks"); - } + if (fields.add_blocks?.length) { + for (const targetId of fields.add_blocks) { + if (!task.blocks.includes(targetId)) task.blocks.push(targetId); + const target = this.tasks.get(targetId); + if (target && !target.blockedBy.includes(id)) { + target.blockedBy.push(id); + target.updatedAt = Date.now(); + } + if (targetId === id) warnings.push(`#${id} blocks itself`); + else if (!target) warnings.push(`#${targetId} does not exist`); + else if (target.blocks.includes(id)) + warnings.push(`cycle: #${id} and #${targetId} block each other`); + } + changedFields.push("blocks"); + } - if (fields.add_blocked_by?.length) { - for (const targetId of fields.add_blocked_by) { - if (!task.blockedBy.includes(targetId)) task.blockedBy.push(targetId); - const target = this.tasks.get(targetId); - if (target && !target.blocks.includes(id)) { target.blocks.push(id); target.updatedAt = Date.now(); } - if (targetId === id) warnings.push(`#${id} blocks itself`); - else if (!target) warnings.push(`#${targetId} does not exist`); - else if (task.blocks.includes(targetId)) warnings.push(`cycle: #${id} and #${targetId} block each other`); - } - changedFields.push("blockedBy"); - } + if (fields.add_blocked_by?.length) { + for (const targetId of fields.add_blocked_by) { + if (!task.blockedBy.includes(targetId)) task.blockedBy.push(targetId); + const target = this.tasks.get(targetId); + if (target && !target.blocks.includes(id)) { + target.blocks.push(id); + target.updatedAt = Date.now(); + } + if (targetId === id) warnings.push(`#${id} blocks itself`); + else if (!target) warnings.push(`#${targetId} does not exist`); + else if (task.blocks.includes(targetId)) + warnings.push(`cycle: #${id} and #${targetId} block each other`); + } + changedFields.push("blockedBy"); + } - task.updatedAt = Date.now(); - return { task, changedFields, warnings }; - }); - } + task.updatedAt = Date.now(); + return { task, changedFields, warnings }; + }); + } - /** Complete a task. Called by accepted proof review or direct subtask completion paths. */ - complete(id: string): Task { - return this.withLock(() => { - const task = this.tasks.get(id); - if (!task) throw new Error(`Task #${id} not found`); - if (task.status === "completed") throw new Error(`Task #${id} already completed`); - task.status = "completed"; - task.updatedAt = Date.now(); - return task; - }); - } + /** Complete a task. Called by accepted proof review or direct subtask completion paths. */ + complete(id: string): Task { + return this.withLock(() => { + const task = this.tasks.get(id); + if (!task) throw new Error(`Task #${id} not found`); + if (task.status === "completed") + throw new Error(`Task #${id} already completed`); + task.status = "completed"; + task.updatedAt = Date.now(); + return task; + }); + } - delete(id: string): boolean { - return this.withLock(() => { - if (!this.tasks.has(id)) return false; - this.tasks.delete(id); - for (const t of this.tasks.values()) { - t.blocks = t.blocks.filter(bid => bid !== id); - t.blockedBy = t.blockedBy.filter(bid => bid !== id); - } - return true; - }); - } + delete(id: string): boolean { + return this.withLock(() => { + if (!this.tasks.has(id)) return false; + this.tasks.delete(id); + for (const t of this.tasks.values()) { + t.blocks = t.blocks.filter((bid) => bid !== id); + t.blockedBy = t.blockedBy.filter((bid) => bid !== id); + } + return true; + }); + } - clearAll(): number { - return this.withLock(() => { - const count = this.tasks.size; - this.tasks.clear(); - return count; - }); - } + clearAll(): number { + return this.withLock(() => { + const count = this.tasks.size; + this.tasks.clear(); + return count; + }); + } - deleteFileIfEmpty(): boolean { - if (!this.filePath || this.tasks.size > 0) return false; - try { unlinkSync(this.filePath); } catch { /* ignore */ } - return true; - } + deleteFileIfEmpty(): boolean { + if (!this.filePath || this.tasks.size > 0) return false; + try { + unlinkSync(this.filePath); + } catch { + /* ignore */ + } + return true; + } - clearCompleted(): number { - return this.withLock(() => { - let count = 0; - for (const [id, task] of this.tasks) { - if (task.status === "completed") { this.tasks.delete(id); count++; } - } - if (count > 0) { - const validIds = new Set(this.tasks.keys()); - for (const t of this.tasks.values()) { - t.blocks = t.blocks.filter(bid => validIds.has(bid)); - t.blockedBy = t.blockedBy.filter(bid => validIds.has(bid)); - } - } - return count; - }); - } + clearCompleted(): number { + return this.withLock(() => { + let count = 0; + for (const [id, task] of this.tasks) { + if (task.status === "completed") { + this.tasks.delete(id); + count++; + } + } + if (count > 0) { + const validIds = new Set(this.tasks.keys()); + for (const t of this.tasks.values()) { + t.blocks = t.blocks.filter((bid) => validIds.has(bid)); + t.blockedBy = t.blockedBy.filter((bid) => validIds.has(bid)); + } + } + return count; + }); + } } diff --git a/src/tasks-config.ts b/src/tasks-config.ts index 016b1a4..3c9baee 100644 --- a/src/tasks-config.ts +++ b/src/tasks-config.ts @@ -4,20 +4,22 @@ import { mkdirSync, readFileSync, writeFileSync } from "node:fs"; import { dirname, join } from "node:path"; export interface TasksConfig { - taskScope?: "memory" | "session" | "project"; // default: "session" - autoCascade?: boolean; // default: false - autoClearCompleted?: "never" | "on_list_complete" | "on_task_complete"; // default: "on_list_complete" + taskScope?: "memory" | "session" | "project"; // default: "session" + autoCascade?: boolean; // default: false + autoClearCompleted?: "never" | "on_list_complete" | "on_task_complete"; // default: "never" } const CONFIG_PATH = join(process.cwd(), ".pi", "tasks-config.json"); export function loadTasksConfig(): TasksConfig { - try { - return JSON.parse(readFileSync(CONFIG_PATH, "utf-8")); - } catch { return {}; } + try { + return JSON.parse(readFileSync(CONFIG_PATH, "utf-8")); + } catch { + return {}; + } } export function saveTasksConfig(config: TasksConfig): void { - mkdirSync(dirname(CONFIG_PATH), { recursive: true }); - writeFileSync(CONFIG_PATH, JSON.stringify(config, null, 2)); + mkdirSync(dirname(CONFIG_PATH), { recursive: true }); + writeFileSync(CONFIG_PATH, JSON.stringify(config, null, 2)); } diff --git a/src/types.ts b/src/types.ts index 28326be..220ec9d 100644 --- a/src/types.ts +++ b/src/types.ts @@ -5,22 +5,22 @@ export type TaskStatus = "pending" | "in_progress" | "completed"; export interface Task { - id: string; - subject: string; - description: string; - done_criterion: string; // required: what "done" looks like - parentId?: string; // no parent = top-level goal, requires proof claim to complete - status: TaskStatus; - progress_label?: string; - metadata: Record; - blocks: string[]; - blockedBy: string[]; - createdAt: number; - updatedAt: number; + id: string; + subject: string; + description: string; + done_criterion: string; // required: what "done" looks like + parentId?: string; // no parent = top-level goal, requires proof claim to complete + status: TaskStatus; + progress_label?: string; + metadata: Record; + blocks: string[]; + blockedBy: string[]; + createdAt: number; + updatedAt: number; } /** Serialized store format on disk. */ export interface TaskStoreData { - nextId: number; - tasks: Task[]; + nextId: number; + tasks: Task[]; } diff --git a/src/ui/task-widget.ts b/src/ui/task-widget.ts index cc0bba6..ff8aa2f 100644 --- a/src/ui/task-widget.ts +++ b/src/ui/task-widget.ts @@ -1,11 +1,11 @@ /** - * task-widget.ts — Persistent widget showing task list with status icons and progress. + * task-widget.ts — Persistent widget showing open goals with simple status icons and progress. * - * Display style matches Claude Code's task list: - * ✔ completed tasks (strikethrough + dim) + * Display style: * ◼ in_progress tasks * ◻ pending tasks * ✳/✽ actively executing task (star spinner with progress_label text) + * Completed tasks stay in storage but are hidden from the collapsed widget. */ import { truncateToWidth } from "@mariozechner/pi-tui"; @@ -15,18 +15,23 @@ import type { TaskStore } from "../task-store.js"; // ---- Types ---- export type Theme = { - fg(color: string, text: string): string; - bold(text: string): string; - strikethrough(text: string): string; + fg(color: string, text: string): string; + bold(text: string): string; + strikethrough(text: string): string; }; export type UICtx = { - setStatus(key: string, text: string | undefined): void; - setWidget( - key: string, - content: undefined | ((tui: any, theme: Theme) => { render(): string[]; invalidate(): void }), - options?: { placement?: "aboveEditor" | "belowEditor" }, - ): void; + setStatus(key: string, text: string | undefined): void; + setWidget( + key: string, + content: + | undefined + | (( + tui: any, + theme: Theme, + ) => { render(): string[]; invalidate(): void }), + options?: { placement?: "aboveEditor" | "belowEditor" }, + ): void; }; /** Star spinner frames for animated active task indicator (matches Claude Code). */ @@ -36,225 +41,254 @@ const MAX_VISIBLE_TASKS = 5; /** Per-task runtime metrics (elapsed time, token usage). */ export interface TaskMetrics { - startedAt: number; - inputTokens: number; - outputTokens: number; + startedAt: number; + inputTokens: number; + outputTokens: number; } /** Format milliseconds as a human-readable duration (e.g., "2m 49s", "1h 3m"). */ function formatDuration(ms: number): string { - const totalSec = Math.floor(ms / 1000); - if (totalSec < 60) return `${totalSec}s`; - const min = Math.floor(totalSec / 60); - const sec = totalSec % 60; - if (min < 60) return sec > 0 ? `${min}m ${sec}s` : `${min}m`; - const hr = Math.floor(min / 60); - const remMin = min % 60; - return remMin > 0 ? `${hr}h ${remMin}m` : `${hr}h`; + const totalSec = Math.floor(ms / 1000); + if (totalSec < 60) return `${totalSec}s`; + const min = Math.floor(totalSec / 60); + const sec = totalSec % 60; + if (min < 60) return sec > 0 ? `${min}m ${sec}s` : `${min}m`; + const hr = Math.floor(min / 60); + const remMin = min % 60; + return remMin > 0 ? `${hr}h ${remMin}m` : `${hr}h`; } /** Format token count with k suffix (e.g., "4.1k", "850"). */ function formatTokens(n: number): string { - if (n < 1000) return String(n); - return (n / 1000).toFixed(1).replace(/\.0$/, "") + "k"; + if (n < 1000) return String(n); + return (n / 1000).toFixed(1).replace(/\.0$/, "") + "k"; } // ---- Widget ---- export class TaskWidget { - private uiCtx: UICtx | undefined; - private widgetFrame = 0; - private widgetInterval: ReturnType | undefined; - /** IDs of tasks currently being actively executed (show spinner). */ - private activeTaskIds = new Set(); - /** Per-task runtime metrics keyed by task ID. */ - private metrics = new Map(); - /** Cached TUI instance for requestRender() calls. */ - private tui: any | undefined; - /** Whether the widget callback is currently registered. */ - private widgetRegistered = false; + private uiCtx: UICtx | undefined; + private widgetFrame = 0; + private widgetInterval: ReturnType | undefined; + /** IDs of tasks currently being actively executed (show spinner). */ + private activeTaskIds = new Set(); + /** Per-task runtime metrics keyed by task ID. */ + private metrics = new Map(); + /** Cached TUI instance for requestRender() calls. */ + private tui: any | undefined; + /** Whether the widget callback is currently registered. */ + private widgetRegistered = false; - constructor(private store: TaskStore) {} + constructor(private store: TaskStore) {} - setStore(store: TaskStore) { - this.store = store; - } + setStore(store: TaskStore) { + this.store = store; + } - setUICtx(ctx: UICtx) { - this.uiCtx = ctx; - } + setUICtx(ctx: UICtx) { + this.uiCtx = ctx; + } - /** Add or remove a task from the active spinner set. */ - setActiveTask(taskId: string | undefined, active = true) { - if (taskId && active) { - this.activeTaskIds.add(taskId); - if (!this.metrics.has(taskId)) { - this.metrics.set(taskId, { startedAt: Date.now(), inputTokens: 0, outputTokens: 0 }); - } - this.ensureTimer(); - } else if (taskId) { - this.activeTaskIds.delete(taskId); - } - this.update(); - } + /** Add or remove a task from the active spinner set. */ + setActiveTask(taskId: string | undefined, active = true) { + if (taskId && active) { + this.activeTaskIds.add(taskId); + if (!this.metrics.has(taskId)) { + this.metrics.set(taskId, { + startedAt: Date.now(), + inputTokens: 0, + outputTokens: 0, + }); + } + this.ensureTimer(); + } else if (taskId) { + this.activeTaskIds.delete(taskId); + } + this.update(); + } - /** Record token usage for the currently active task(s). */ - addTokenUsage(inputTokens: number, outputTokens: number) { - // Distribute to all currently active tasks - for (const id of this.activeTaskIds) { - const m = this.metrics.get(id); - if (m) { - m.inputTokens += inputTokens; - m.outputTokens += outputTokens; - } - } - } + /** Record token usage for the currently active task(s). */ + addTokenUsage(inputTokens: number, outputTokens: number) { + // Distribute to all currently active tasks + for (const id of this.activeTaskIds) { + const m = this.metrics.get(id); + if (m) { + m.inputTokens += inputTokens; + m.outputTokens += outputTokens; + } + } + } - /** Ensure the widget update timer is running. */ - ensureTimer() { - if (!this.widgetInterval) { - this.widgetInterval = setInterval(() => this.update(), 80); - } - } + /** Ensure the widget update timer is running. */ + ensureTimer() { + if (!this.widgetInterval) { + this.widgetInterval = setInterval(() => this.update(), 80); + } + } - /** Build widget lines from current live state. Called from the render callback. */ - private renderWidget(tui: any, theme: Theme): string[] { - const tasks = this.store.list(); - const w = tui.terminal.columns; - const truncate = (line: string) => truncateToWidth(line, w); + /** Build widget lines from current live state. Called from the render callback. */ + private renderWidget(tui: any, theme: Theme): string[] { + const tasks = this.store.list(); + const w = tui.terminal.columns; + const truncate = (line: string) => truncateToWidth(line, w); - if (tasks.length === 0) return []; + if (tasks.length === 0) return []; - const counts = { completed: 0, in_progress: 0, pending: 0 }; - for (const t of tasks) counts[getDisplayStatus(t)]++; + const counts = { completed: 0, in_progress: 0, pending: 0 }; + for (const t of tasks) counts[getDisplayStatus(t)]++; - const parts: string[] = []; - if (counts.completed > 0) parts.push(`${counts.completed} done`); - if (counts.in_progress > 0) parts.push(`${counts.in_progress} in progress`); - if (counts.pending > 0) parts.push(`${counts.pending} open`); - const statusText = `${tasks.length} tasks (${parts.join(", ")})`; + const visibleTasks = tasks.filter((task) => task.status !== "completed"); + if (visibleTasks.length === 0) return []; - const spinnerChar = SPINNER[this.widgetFrame % SPINNER.length]; - const lines: string[] = [truncate(theme.fg("accent", "●") + " " + theme.fg("accent", statusText))]; + const parts: string[] = []; + if (counts.completed > 0) parts.push(`${counts.completed} done hidden`); + if (counts.in_progress > 0) parts.push(`${counts.in_progress} in progress`); + if (counts.pending > 0) parts.push(`${counts.pending} open`); + const statusText = `${tasks.length} goals (${parts.join(", ")})`; - const visible = tasks.slice(0, MAX_VISIBLE_TASKS); - for (let i = 0; i < visible.length; i++) { - const task = visible[i]; - const isActive = this.activeTaskIds.has(task.id) && task.status === "in_progress"; + const spinnerChar = SPINNER[this.widgetFrame % SPINNER.length]; + const lines: string[] = [ + truncate(theme.fg("accent", "●") + " " + theme.fg("accent", statusText)), + ]; - let icon: string; - if (isActive) { - icon = theme.fg("accent", spinnerChar); - } else if (task.status === "completed") { - icon = theme.fg("success", "✔"); - } else if (task.status === "in_progress") { - icon = theme.fg("accent", "◼"); - } else { - icon = "◻"; - } + const visible = visibleTasks.slice(0, MAX_VISIBLE_TASKS); + for (let i = 0; i < visible.length; i++) { + const task = visible[i]; + const isActive = + this.activeTaskIds.has(task.id) && task.status === "in_progress"; - let suffix = ""; - if (task.status === "pending" && task.blockedBy.length > 0) { - const openBlockers = task.blockedBy.filter(bid => { - const blocker = this.store.get(bid); - return blocker && blocker.status !== "completed"; - }); - if (openBlockers.length > 0) { - suffix = theme.fg("dim", ` › blocked by ${openBlockers.map(id => "#" + id).join(", ")}`); - } - } + let icon: string; + if (isActive) { + icon = theme.fg("accent", spinnerChar); + } else if (task.status === "in_progress") { + icon = theme.fg("accent", "◼"); + } else { + icon = "◻"; + } - let text: string; - if (isActive) { - const form = task.progress_label || task.subject; - const m = this.metrics.get(task.id); - let stats = ""; - if (m) { - const elapsed = formatDuration(Date.now() - m.startedAt); - const tokenParts: string[] = []; - if (m.inputTokens > 0) tokenParts.push(`↑ ${formatTokens(m.inputTokens)}`); - if (m.outputTokens > 0) tokenParts.push(`↓ ${formatTokens(m.outputTokens)}`); - stats = tokenParts.length > 0 - ? ` ${theme.fg("dim", `(${elapsed} · ${tokenParts.join(" ")})`)}` - : ` ${theme.fg("dim", `(${elapsed})`)}`; - } - text = ` ${icon} ${theme.fg("dim", "#" + task.id)} ${theme.fg("accent", form + "…")}${stats}`; - } else if (task.status === "completed") { - text = ` ${icon} ${theme.fg("dim", theme.strikethrough("#" + task.id + " " + task.subject))}`; - } else { - text = ` ${icon} ${theme.fg("dim", "#" + task.id)} ${task.subject}`; - } + let suffix = ""; + if (task.status === "pending" && task.blockedBy.length > 0) { + const openBlockers = task.blockedBy.filter((bid) => { + const blocker = this.store.get(bid); + return blocker && blocker.status !== "completed"; + }); + if (openBlockers.length > 0) { + suffix = theme.fg( + "dim", + ` › blocked by ${openBlockers.map((id) => "#" + id).join(", ")}`, + ); + } + } - lines.push(truncate(text + suffix)); - } + let text: string; + if (isActive) { + const form = task.progress_label || task.subject; + const m = this.metrics.get(task.id); + let stats = ""; + if (m) { + const elapsed = formatDuration(Date.now() - m.startedAt); + const tokenParts: string[] = []; + if (m.inputTokens > 0) + tokenParts.push(`↑ ${formatTokens(m.inputTokens)}`); + if (m.outputTokens > 0) + tokenParts.push(`↓ ${formatTokens(m.outputTokens)}`); + stats = + tokenParts.length > 0 + ? ` ${theme.fg("dim", `(${elapsed}, ${tokenParts.join(" ")})`)}` + : ` ${theme.fg("dim", `(${elapsed})`)}`; + } + text = ` ${icon} ${theme.fg("dim", "#" + task.id)} ${theme.fg("accent", form + "…")}${stats}`; + } else { + text = ` ${icon} ${theme.fg("dim", "#" + task.id)} ${task.subject}`; + } - if (tasks.length > MAX_VISIBLE_TASKS) { - lines.push(truncate(theme.fg("dim", ` … and ${tasks.length - MAX_VISIBLE_TASKS} more`))); - } + lines.push(truncate(text + suffix)); + } - return lines; - } + if (visibleTasks.length > MAX_VISIBLE_TASKS) { + lines.push( + truncate( + theme.fg( + "dim", + ` … and ${visibleTasks.length - MAX_VISIBLE_TASKS} more open`, + ), + ), + ); + } - /** Force an immediate widget update. */ - update() { - if (!this.uiCtx) return; - const tasks = this.store.list(); + return lines; + } - // Transition: visible → hidden - if (tasks.length === 0) { - if (this.widgetRegistered) { - this.uiCtx.setWidget("tasks", undefined); - this.widgetRegistered = false; - } - if (this.widgetInterval) { - clearInterval(this.widgetInterval); - this.widgetInterval = undefined; - } - return; - } + /** Force an immediate widget update. */ + update() { + if (!this.uiCtx) return; + const tasks = this.store.list(); + const visibleTasks = tasks.filter((task) => task.status !== "completed"); - // Prune stale active IDs (deleted or no longer in_progress) - for (const id of this.activeTaskIds) { - const t = this.store.get(id); - if (!t || t.status !== "in_progress") { - this.activeTaskIds.delete(id); - this.metrics.delete(id); - } - } + // Transition: visible → hidden + if (visibleTasks.length === 0) { + if (this.widgetRegistered) { + this.uiCtx.setWidget("tasks", undefined); + this.widgetRegistered = false; + } + if (this.widgetInterval) { + clearInterval(this.widgetInterval); + this.widgetInterval = undefined; + } + return; + } - // Check if any task needs animation - const hasActiveSpinner = tasks.some(t => this.activeTaskIds.has(t.id) && t.status === "in_progress"); - if (hasActiveSpinner) { - this.ensureTimer(); - } else if (!hasActiveSpinner && this.widgetInterval) { - clearInterval(this.widgetInterval); - this.widgetInterval = undefined; - } + // Prune stale active IDs (deleted or no longer in_progress) + for (const id of this.activeTaskIds) { + const t = this.store.get(id); + if (!t || t.status !== "in_progress") { + this.activeTaskIds.delete(id); + this.metrics.delete(id); + } + } - this.widgetFrame++; + // Check if any task needs animation + const hasActiveSpinner = tasks.some( + (t) => this.activeTaskIds.has(t.id) && t.status === "in_progress", + ); + if (hasActiveSpinner) { + this.ensureTimer(); + } else if (!hasActiveSpinner && this.widgetInterval) { + clearInterval(this.widgetInterval); + this.widgetInterval = undefined; + } - // Transition: hidden → visible — register widget callback once - if (!this.widgetRegistered) { - this.uiCtx.setWidget("tasks", (tui, theme) => { - this.tui = tui; - return { render: () => this.renderWidget(tui, theme), invalidate: () => {} }; - }, { placement: "aboveEditor" }); - this.widgetRegistered = true; - } else if (this.tui) { - // Widget already registered — just request a re-render - this.tui.requestRender(); - } - } + this.widgetFrame++; - dispose() { - if (this.widgetInterval) { - clearInterval(this.widgetInterval); - this.widgetInterval = undefined; - } - if (this.uiCtx) { - this.uiCtx.setWidget("tasks", undefined); - } - this.widgetRegistered = false; - this.tui = undefined; - } + // Transition: hidden → visible — register widget callback once + if (!this.widgetRegistered) { + this.uiCtx.setWidget( + "tasks", + (tui, theme) => { + this.tui = tui; + return { + render: () => this.renderWidget(tui, theme), + invalidate: () => {}, + }; + }, + { placement: "aboveEditor" }, + ); + this.widgetRegistered = true; + } else if (this.tui) { + // Widget already registered — just request a re-render + this.tui.requestRender(); + } + } + + dispose() { + if (this.widgetInterval) { + clearInterval(this.widgetInterval); + this.widgetInterval = undefined; + } + if (this.uiCtx) { + this.uiCtx.setWidget("tasks", undefined); + } + this.widgetRegistered = false; + this.tui = undefined; + } } diff --git a/test/auto-clear.test.ts b/test/auto-clear.test.ts index 5d165b7..e448e19 100644 --- a/test/auto-clear.test.ts +++ b/test/auto-clear.test.ts @@ -4,318 +4,345 @@ import { AutoClearManager } from "../src/auto-clear.js"; import { TaskStore } from "../src/task-store.js"; describe("auto-clear: on_task_complete mode", () => { - let store: TaskStore; - let manager: AutoClearManager; + let store: TaskStore; + let manager: AutoClearManager; - beforeEach(() => { - store = new TaskStore(); - manager = new AutoClearManager(() => store, () => "on_task_complete"); - }); + beforeEach(() => { + store = new TaskStore(); + manager = new AutoClearManager( + () => store, + () => "on_task_complete", + ); + }); - it("does not clear completed task before REMINDER_INTERVAL turns", () => { - store.create("Task", "Desc", "done"); - store.complete("1"); - manager.trackCompletion("1", 1); + it("does not clear completed task before REMINDER_INTERVAL turns", () => { + store.create("Task", "Desc", "done"); + store.complete("1"); + manager.trackCompletion("1", 1); - // Turns 2, 3, 4 — not enough - for (let turn = 2; turn <= 4; turn++) { - manager.onTurnStart(turn); - } - expect(store.get("1")).toBeDefined(); - expect(store.get("1")!.status).toBe("completed"); - }); + // Turns 2, 3, 4 — not enough + for (let turn = 2; turn <= 4; turn++) { + manager.onTurnStart(turn); + } + expect(store.get("1")).toBeDefined(); + expect(store.get("1")!.status).toBe("completed"); + }); - it("clears completed task after REMINDER_INTERVAL turns", () => { - store.create("Task", "Desc", "done"); - store.complete("1"); - manager.trackCompletion("1", 1); + it("clears completed task after REMINDER_INTERVAL turns", () => { + store.create("Task", "Desc", "done"); + store.complete("1"); + manager.trackCompletion("1", 1); - // Turn 5 = turn 1 + 4 (REMINDER_INTERVAL) - manager.onTurnStart(5); - expect(store.get("1")).toBeUndefined(); - expect(store.list()).toHaveLength(0); - }); + // Turn 5 = turn 1 + 4 (REMINDER_INTERVAL) + manager.onTurnStart(5); + expect(store.get("1")).toBeUndefined(); + expect(store.list()).toHaveLength(0); + }); - it("clears each task independently based on its own completion turn", () => { - store.create("Task A", "Desc", "done"); - store.create("Task B", "Desc", "done"); + it("clears each task independently based on its own completion turn", () => { + store.create("Task A", "Desc", "done"); + store.create("Task B", "Desc", "done"); - store.complete("1"); - manager.trackCompletion("1", 1); + store.complete("1"); + manager.trackCompletion("1", 1); - store.complete("2"); - manager.trackCompletion("2", 3); + store.complete("2"); + manager.trackCompletion("2", 3); - // Turn 5: Task A expires (1+4), Task B still lingers (3+4=7) - manager.onTurnStart(5); - expect(store.get("1")).toBeUndefined(); - expect(store.get("2")).toBeDefined(); + // Turn 5: Task A expires (1+4), Task B still lingers (3+4=7) + manager.onTurnStart(5); + expect(store.get("1")).toBeUndefined(); + expect(store.get("2")).toBeDefined(); - // Turn 7: Task B expires - manager.onTurnStart(7); - expect(store.get("2")).toBeUndefined(); - }); + // Turn 7: Task B expires + manager.onTurnStart(7); + expect(store.get("2")).toBeUndefined(); + }); - it("does not clear pending or in_progress tasks", () => { - store.create("Pending", "Desc", "done"); - store.create("In Progress", "Desc", "done"); - store.create("Completed", "Desc", "done"); - store.update("2", { status: "in_progress" }); - store.complete("3"); - manager.trackCompletion("3", 1); + it("does not clear pending or in_progress tasks", () => { + store.create("Pending", "Desc", "done"); + store.create("In Progress", "Desc", "done"); + store.create("Completed", "Desc", "done"); + store.update("2", { status: "in_progress" }); + store.complete("3"); + manager.trackCompletion("3", 1); - manager.onTurnStart(5); - expect(store.get("1")).toBeDefined(); // pending — untouched - expect(store.get("2")).toBeDefined(); // in_progress — untouched - expect(store.get("3")).toBeUndefined(); // completed — cleared - }); + manager.onTurnStart(5); + expect(store.get("1")).toBeDefined(); // pending — untouched + expect(store.get("2")).toBeDefined(); // in_progress — untouched + expect(store.get("3")).toBeUndefined(); // completed — cleared + }); - it("cleans up dependency edges when auto-clearing", () => { - store.create("Blocker", "Desc", "done"); - store.create("Blocked", "Desc", "done"); - store.update("1", { add_blocks: ["2"] }); - store.complete("1"); - manager.trackCompletion("1", 1); + it("cleans up dependency edges when auto-clearing", () => { + store.create("Blocker", "Desc", "done"); + store.create("Blocked", "Desc", "done"); + store.update("1", { add_blocks: ["2"] }); + store.complete("1"); + manager.trackCompletion("1", 1); - manager.onTurnStart(5); - expect(store.get("1")).toBeUndefined(); - expect(store.get("2")!.blockedBy).toEqual([]); - }); + manager.onTurnStart(5); + expect(store.get("1")).toBeUndefined(); + expect(store.get("2")!.blockedBy).toEqual([]); + }); - it("returns true when tasks are cleared", () => { - store.create("Task", "Desc", "done"); - store.complete("1"); - manager.trackCompletion("1", 1); + it("returns true when tasks are cleared", () => { + store.create("Task", "Desc", "done"); + store.complete("1"); + manager.trackCompletion("1", 1); - expect(manager.onTurnStart(4)).toBe(false); - expect(manager.onTurnStart(5)).toBe(true); - }); + expect(manager.onTurnStart(4)).toBe(false); + expect(manager.onTurnStart(5)).toBe(true); + }); }); describe("auto-clear: on_list_complete mode", () => { - let store: TaskStore; - let manager: AutoClearManager; + let store: TaskStore; + let manager: AutoClearManager; - beforeEach(() => { - store = new TaskStore(); - manager = new AutoClearManager(() => store, () => "on_list_complete"); - }); + beforeEach(() => { + store = new TaskStore(); + manager = new AutoClearManager( + () => store, + () => "on_list_complete", + ); + }); - it("does not clear when some tasks are still pending", () => { - store.create("Done", "Desc", "done"); - store.create("Pending", "Desc", "done"); - store.complete("1"); - manager.trackCompletion("1", 1); + it("does not clear when some tasks are still pending", () => { + store.create("Done", "Desc", "done"); + store.create("Pending", "Desc", "done"); + store.complete("1"); + manager.trackCompletion("1", 1); - for (let turn = 2; turn <= 10; turn++) { - manager.onTurnStart(turn); - } - expect(store.get("1")).toBeDefined(); - expect(store.list()).toHaveLength(2); - }); + for (let turn = 2; turn <= 10; turn++) { + manager.onTurnStart(turn); + } + expect(store.get("1")).toBeDefined(); + expect(store.list()).toHaveLength(2); + }); - it("does not clear immediately when all tasks complete", () => { - store.create("A", "Desc", "done"); - store.create("B", "Desc", "done"); - store.complete("1"); - store.complete("2"); - manager.trackCompletion("2", 1); + it("does not clear immediately when all tasks complete", () => { + store.create("A", "Desc", "done"); + store.create("B", "Desc", "done"); + store.complete("1"); + store.complete("2"); + manager.trackCompletion("2", 1); - // Turns 2-4: not enough - for (let turn = 2; turn <= 4; turn++) { - manager.onTurnStart(turn); - } - expect(store.list()).toHaveLength(2); - }); + // Turns 2-4: not enough + for (let turn = 2; turn <= 4; turn++) { + manager.onTurnStart(turn); + } + expect(store.list()).toHaveLength(2); + }); - it("clears all completed tasks after REMINDER_INTERVAL turns when all are completed", () => { - store.create("A", "Desc", "done"); - store.create("B", "Desc", "done"); - store.complete("1"); - store.complete("2"); - manager.trackCompletion("2", 1); + it("clears all completed tasks after REMINDER_INTERVAL turns when all are completed", () => { + store.create("A", "Desc", "done"); + store.create("B", "Desc", "done"); + store.complete("1"); + store.complete("2"); + manager.trackCompletion("2", 1); - manager.onTurnStart(5); - expect(store.list()).toHaveLength(0); - }); + manager.onTurnStart(5); + expect(store.list()).toHaveLength(0); + }); - it("resets countdown when a new task is created before REMINDER_INTERVAL", () => { - store.create("A", "Desc", "done"); - store.complete("1"); - manager.trackCompletion("1", 1); + it("resets countdown when a new task is created before REMINDER_INTERVAL", () => { + store.create("A", "Desc", "done"); + store.complete("1"); + manager.trackCompletion("1", 1); - // Turn 3: new task created — reset countdown - manager.onTurnStart(3); - manager.resetBatchCountdown(); - store.create("B", "Desc", "done"); + // Turn 3: new task created — reset countdown + manager.onTurnStart(3); + manager.resetBatchCountdown(); + store.create("B", "Desc", "done"); - // Turn 5 would have cleared, but countdown was reset at turn 3 - manager.onTurnStart(5); - expect(store.get("1")).toBeDefined(); // still around — list isn't all completed - }); + // Turn 5 would have cleared, but countdown was reset at turn 3 + manager.onTurnStart(5); + expect(store.get("1")).toBeDefined(); // still around — list isn't all completed + }); - it("resets countdown when a task goes back to in_progress", () => { - store.create("A", "Desc", "done"); - store.create("B", "Desc", "done"); - store.complete("1"); - store.complete("2"); - manager.trackCompletion("2", 1); + it("resets countdown when a task goes back to in_progress", () => { + store.create("A", "Desc", "done"); + store.create("B", "Desc", "done"); + store.complete("1"); + store.complete("2"); + manager.trackCompletion("2", 1); - // Turn 3: task 2 goes back to in_progress - manager.onTurnStart(3); - store.update("2", { status: "in_progress" }); - manager.resetBatchCountdown(); + // Turn 3: task 2 goes back to in_progress + manager.onTurnStart(3); + store.update("2", { status: "in_progress" }); + manager.resetBatchCountdown(); - // Turn 5: would have cleared, but countdown was reset - manager.onTurnStart(5); - expect(store.list()).toHaveLength(2); // both still here - }); + // Turn 5: would have cleared, but countdown was reset + manager.onTurnStart(5); + expect(store.list()).toHaveLength(2); // both still here + }); - it("returns true when tasks are cleared", () => { - store.create("Task", "Desc", "done"); - store.complete("1"); - manager.trackCompletion("1", 1); + it("returns true when tasks are cleared", () => { + store.create("Task", "Desc", "done"); + store.complete("1"); + manager.trackCompletion("1", 1); - expect(manager.onTurnStart(4)).toBe(false); - expect(manager.onTurnStart(5)).toBe(true); - }); + expect(manager.onTurnStart(4)).toBe(false); + expect(manager.onTurnStart(5)).toBe(true); + }); }); describe("auto-clear: never mode", () => { - let store: TaskStore; - let manager: AutoClearManager; + let store: TaskStore; + let manager: AutoClearManager; - beforeEach(() => { - store = new TaskStore(); - manager = new AutoClearManager(() => store, () => "never"); - }); + beforeEach(() => { + store = new TaskStore(); + manager = new AutoClearManager( + () => store, + () => "never", + ); + }); - it("never clears completed tasks regardless of turns", () => { - store.create("A", "Desc", "done"); - store.create("B", "Desc", "done"); - store.complete("1"); - store.complete("2"); - manager.trackCompletion("1", 1); - manager.trackCompletion("2", 1); + it("never clears completed tasks regardless of turns", () => { + store.create("A", "Desc", "done"); + store.create("B", "Desc", "done"); + store.complete("1"); + store.complete("2"); + manager.trackCompletion("1", 1); + manager.trackCompletion("2", 1); - for (let turn = 2; turn <= 20; turn++) { - manager.onTurnStart(turn); - } - expect(store.list()).toHaveLength(2); - }); + for (let turn = 2; turn <= 20; turn++) { + manager.onTurnStart(turn); + } + expect(store.list()).toHaveLength(2); + }); - it("trackCompletion is a no-op", () => { - store.create("Task", "Desc", "done"); - store.complete("1"); - manager.trackCompletion("1", 1); + it("trackCompletion is a no-op", () => { + store.create("Task", "Desc", "done"); + store.complete("1"); + manager.trackCompletion("1", 1); - manager.onTurnStart(100); - expect(store.get("1")).toBeDefined(); - }); + manager.onTurnStart(100); + expect(store.get("1")).toBeDefined(); + }); }); describe("auto-clear: dynamic mode switching", () => { - it("respects mode changes via getMode callback", () => { - const store = new TaskStore(); - let mode: AutoClearMode = "never"; - const manager = new AutoClearManager(() => store, () => mode); + it("respects mode changes via getMode callback", () => { + const store = new TaskStore(); + let mode: AutoClearMode = "never"; + const manager = new AutoClearManager( + () => store, + () => mode, + ); - store.create("Task", "Desc", "done"); - store.complete("1"); + store.create("Task", "Desc", "done"); + store.complete("1"); - // Track in never mode — no-op - manager.trackCompletion("1", 1); - manager.onTurnStart(5); - expect(store.get("1")).toBeDefined(); + // Track in never mode — no-op + manager.trackCompletion("1", 1); + manager.onTurnStart(5); + expect(store.get("1")).toBeDefined(); - // Switch to on_task_complete and re-track - mode = "on_task_complete"; - manager.trackCompletion("1", 5); - manager.onTurnStart(9); - expect(store.get("1")).toBeUndefined(); - }); + // Switch to on_task_complete and re-track + mode = "on_task_complete"; + manager.trackCompletion("1", 5); + manager.onTurnStart(9); + expect(store.get("1")).toBeUndefined(); + }); }); describe("auto-clear: store getter (session switch)", () => { - it("operates on the current store after swap", () => { - let store = new TaskStore(); - const manager = new AutoClearManager(() => store, () => "on_task_complete"); + it("operates on the current store after swap", () => { + let store = new TaskStore(); + const manager = new AutoClearManager( + () => store, + () => "on_task_complete", + ); - store.create("Old task", "Desc", "done"); - store.complete("1"); - manager.trackCompletion("1", 1); + store.create("Old task", "Desc", "done"); + store.complete("1"); + manager.trackCompletion("1", 1); - // Simulate session switch — swap store - store = new TaskStore(); - store.create("New task", "Desc", "done"); - manager.reset(); + // Simulate session switch — swap store + store = new TaskStore(); + store.create("New task", "Desc", "done"); + manager.reset(); - // Old task tracking was reset, new store has no completed tasks - manager.onTurnStart(5); - expect(store.list()).toHaveLength(1); - expect(store.get("1")!.subject).toBe("New task"); - }); + // Old task tracking was reset, new store has no completed tasks + manager.onTurnStart(5); + expect(store.list()).toHaveLength(1); + expect(store.get("1")!.subject).toBe("New task"); + }); - it("clears from new store, not old store", () => { - let store = new TaskStore(); - const manager = new AutoClearManager(() => store, () => "on_task_complete"); + it("clears from new store, not old store", () => { + let store = new TaskStore(); + const manager = new AutoClearManager( + () => store, + () => "on_task_complete", + ); - // Swap to new store with a completed task - store = new TaskStore(); - store.create("Task in new store", "Desc", "done"); - store.complete("1"); - manager.trackCompletion("1", 1); + // Swap to new store with a completed task + store = new TaskStore(); + store.create("Task in new store", "Desc", "done"); + store.complete("1"); + manager.trackCompletion("1", 1); - manager.onTurnStart(5); - expect(store.get("1")).toBeUndefined(); // cleared from new store - }); + manager.onTurnStart(5); + expect(store.get("1")).toBeUndefined(); // cleared from new store + }); }); describe("auto-clear: reset (new session)", () => { - it("reset clears per-task tracking so old completions don't fire", () => { - const store = new TaskStore(); - const manager = new AutoClearManager(() => store, () => "on_task_complete"); + it("reset clears per-task tracking so old completions don't fire", () => { + const store = new TaskStore(); + const manager = new AutoClearManager( + () => store, + () => "on_task_complete", + ); - store.create("Task", "Desc", "done"); - store.complete("1"); - manager.trackCompletion("1", 1); + store.create("Task", "Desc", "done"); + store.complete("1"); + manager.trackCompletion("1", 1); - // Simulate /new — reset before the delay expires - manager.reset(); + // Simulate /new — reset before the delay expires + manager.reset(); - // Old completion should NOT trigger after reset - manager.onTurnStart(5); - expect(store.get("1")).toBeDefined(); - }); + // Old completion should NOT trigger after reset + manager.onTurnStart(5); + expect(store.get("1")).toBeDefined(); + }); - it("reset clears batch countdown so old all-completed state doesn't fire", () => { - const store = new TaskStore(); - const manager = new AutoClearManager(() => store, () => "on_list_complete"); + it("reset clears batch countdown so old all-completed state doesn't fire", () => { + const store = new TaskStore(); + const manager = new AutoClearManager( + () => store, + () => "on_list_complete", + ); - store.create("Task", "Desc", "done"); - store.complete("1"); - manager.trackCompletion("1", 1); + store.create("Task", "Desc", "done"); + store.complete("1"); + manager.trackCompletion("1", 1); - // Simulate /new — reset before the delay expires - manager.reset(); + // Simulate /new — reset before the delay expires + manager.reset(); - // Old batch countdown should NOT trigger after reset - manager.onTurnStart(5); - expect(store.get("1")).toBeDefined(); - }); + // Old batch countdown should NOT trigger after reset + manager.onTurnStart(5); + expect(store.get("1")).toBeDefined(); + }); - it("tracking works normally after reset", () => { - const store = new TaskStore(); - const manager = new AutoClearManager(() => store, () => "on_task_complete"); + it("tracking works normally after reset", () => { + const store = new TaskStore(); + const manager = new AutoClearManager( + () => store, + () => "on_task_complete", + ); - store.create("Task", "Desc", "done"); - store.complete("1"); - manager.trackCompletion("1", 1); - manager.reset(); + store.create("Task", "Desc", "done"); + store.complete("1"); + manager.trackCompletion("1", 1); + manager.reset(); - // Re-track after reset with new turn baseline - manager.trackCompletion("1", 10); - manager.onTurnStart(14); - expect(store.get("1")).toBeUndefined(); - }); + // Re-track after reset with new turn baseline + manager.trackCompletion("1", 10); + manager.onTurnStart(14); + expect(store.get("1")).toBeUndefined(); + }); }); diff --git a/test/lgtm-command.test.ts b/test/lgtm-command.test.ts index 3f9d186..ab84234 100644 --- a/test/lgtm-command.test.ts +++ b/test/lgtm-command.test.ts @@ -2,123 +2,165 @@ import { describe, expect, it, vi } from "vitest"; import proofTasksExtension, { parseLgtmArgs } from "../src/index.js"; type RegisteredTool = { - name: string; - execute: (...args: any[]) => Promise; + name: string; + execute: (...args: any[]) => Promise; }; type RegisteredCommand = { - handler: (args: string, ctx: any) => Promise; - getArgumentCompletions?: (args: string) => Promise; + handler: (args: string, ctx: any) => Promise; + getArgumentCompletions?: (args: string) => Promise; }; function makeHarness() { - const tools = new Map(); - const commands = new Map(); - const sentMessages: any[] = []; + const tools = new Map(); + const commands = new Map(); + const sentMessages: any[] = []; - const pi = { - on: vi.fn(), - registerTool: vi.fn((tool: RegisteredTool) => tools.set(tool.name, tool)), - registerCommand: vi.fn((name: string, command: RegisteredCommand) => commands.set(name, command)), - sendMessage: vi.fn((message: any) => sentMessages.push(message)), - }; + const pi = { + on: vi.fn(), + registerTool: vi.fn((tool: RegisteredTool) => tools.set(tool.name, tool)), + registerCommand: vi.fn((name: string, command: RegisteredCommand) => + commands.set(name, command), + ), + sendMessage: vi.fn((message: any) => sentMessages.push(message)), + }; - proofTasksExtension(pi as any); + proofTasksExtension(pi as any); - async function execTool(name: string, params: Record) { - const tool = tools.get(name); - if (!tool) throw new Error(`Tool ${name} not registered`); - return tool.execute("tool-call", params, undefined, undefined, {}); - } + async function execTool(name: string, params: Record) { + const tool = tools.get(name); + if (!tool) throw new Error(`Tool ${name} not registered`); + return tool.execute("tool-call", params, undefined, undefined, {}); + } - function makeUi(overrides: { - select?: Array; - confirm?: Array; - } = {}) { - const selectQueue = [...(overrides.select ?? [])]; - const confirmQueue = [...(overrides.confirm ?? [])]; - return { - notify: vi.fn(), - select: vi.fn(async () => selectQueue.shift()), - confirm: vi.fn(async () => confirmQueue.shift() ?? false), - }; - } + function makeUi( + overrides: { + select?: Array; + confirm?: Array; + } = {}, + ) { + const selectQueue = [...(overrides.select ?? [])]; + const confirmQueue = [...(overrides.confirm ?? [])]; + return { + notify: vi.fn(), + select: vi.fn(async () => selectQueue.shift()), + confirm: vi.fn(async () => confirmQueue.shift() ?? false), + }; + } - return { tools, commands, sentMessages, execTool, makeUi }; + return { tools, commands, sentMessages, execTool, makeUi }; } describe("parseLgtmArgs", () => { - it("parses menu and view forms", () => { - expect(parseLgtmArgs("")).toEqual({ kind: "menu" }); - expect(parseLgtmArgs("*")).toEqual({ kind: "view_all" }); - expect(parseLgtmArgs("1 #2")).toEqual({ kind: "view", ids: ["1", "2"] }); - }); + it("parses menu and view forms", () => { + expect(parseLgtmArgs("")).toEqual({ kind: "menu" }); + expect(parseLgtmArgs("*")).toEqual({ kind: "view_all" }); + expect(parseLgtmArgs("1 #2")).toEqual({ kind: "view", ids: ["1", "2"] }); + }); - it("rejects task-management forms", () => { - expect(parseLgtmArgs("clear")).toEqual({ kind: "error", message: "Task management lives in /tasks now. /lgtm is viewer-only." }); - expect(parseLgtmArgs("clear *")).toEqual({ kind: "error", message: "Task management lives in /tasks now. /lgtm is viewer-only." }); - expect(parseLgtmArgs("clear #7")).toEqual({ kind: "error", message: "Task management lives in /tasks now. /lgtm is viewer-only." }); - expect(parseLgtmArgs("delete #7")).toEqual({ kind: "error", message: "Task management lives in /tasks now. /lgtm is viewer-only." }); - }); + it("rejects task-management forms", () => { + expect(parseLgtmArgs("clear")).toEqual({ + kind: "error", + message: "Task management lives in /tasks now. /lgtm is viewer-only.", + }); + expect(parseLgtmArgs("clear *")).toEqual({ + kind: "error", + message: "Task management lives in /tasks now. /lgtm is viewer-only.", + }); + expect(parseLgtmArgs("clear #7")).toEqual({ + kind: "error", + message: "Task management lives in /tasks now. /lgtm is viewer-only.", + }); + expect(parseLgtmArgs("delete #7")).toEqual({ + kind: "error", + message: "Task management lives in /tasks now. /lgtm is viewer-only.", + }); + }); }); describe("/lgtm command", () => { - it("shows all open proof logs from the picker", async () => { - const harness = makeHarness(); - await harness.execTool("TaskCreate", { subject: "Task A", description: "Desc", done_criterion: "done" }); - await harness.execTool("TaskCreate", { subject: "Task B", description: "Desc", done_criterion: "done" }); + it("shows all open proof logs from the picker", async () => { + const harness = makeHarness(); + await harness.execTool("TaskCreate", { + subject: "Task A", + description: "Desc", + done_criterion: "done", + }); + await harness.execTool("TaskCreate", { + subject: "Task B", + description: "Desc", + done_criterion: "done", + }); - const ui = harness.makeUi({ select: ["View all open proof logs"] }); - const command = harness.commands.get("lgtm"); - if (!command) throw new Error("/lgtm not registered"); + const ui = harness.makeUi({ select: ["View all open proof logs"] }); + const command = harness.commands.get("lgtm"); + if (!command) throw new Error("/lgtm not registered"); - await command.handler("", { ui }); + await command.handler("", { ui }); - expect(harness.sentMessages).toHaveLength(2); - expect(harness.sentMessages[0].customType).toBe("proof-log"); - expect(harness.sentMessages[0].content).toContain("Task #1"); - expect(harness.sentMessages[1].content).toContain("Task #2"); - }); + expect(harness.sentMessages).toHaveLength(2); + expect(harness.sentMessages[0].customType).toBe("proof-log"); + expect(harness.sentMessages[0].content).toContain("Task #1"); + expect(harness.sentMessages[1].content).toContain("Task #2"); + }); - it("shows one proof log from the picker", async () => { - const harness = makeHarness(); - await harness.execTool("TaskCreate", { subject: "Task A", description: "Desc", done_criterion: "done" }); + it("shows one proof log from the picker", async () => { + const harness = makeHarness(); + await harness.execTool("TaskCreate", { + subject: "Task A", + description: "Desc", + done_criterion: "done", + }); - const ui = harness.makeUi({ select: ["[PENDING] #1 Task A"] }); - const command = harness.commands.get("lgtm"); - if (!command) throw new Error("/lgtm not registered"); + const ui = harness.makeUi({ select: ["[PENDING] #1 Task A"] }); + const command = harness.commands.get("lgtm"); + if (!command) throw new Error("/lgtm not registered"); - await command.handler("", { ui }); + await command.handler("", { ui }); - expect(harness.sentMessages).toHaveLength(1); - expect(harness.sentMessages[0].content).toContain("Task #1"); - }); + expect(harness.sentMessages).toHaveLength(1); + expect(harness.sentMessages[0].content).toContain("Task #1"); + }); - it("rejects /lgtm clear and points task management back to /tasks", async () => { - const harness = makeHarness(); - await harness.execTool("TaskCreate", { subject: "Task A", description: "Desc", done_criterion: "done" }); + it("rejects /lgtm clear and points task management back to /tasks", async () => { + const harness = makeHarness(); + await harness.execTool("TaskCreate", { + subject: "Task A", + description: "Desc", + done_criterion: "done", + }); - const ui = harness.makeUi(); - const command = harness.commands.get("lgtm"); - if (!command) throw new Error("/lgtm not registered"); + const ui = harness.makeUi(); + const command = harness.commands.get("lgtm"); + if (!command) throw new Error("/lgtm not registered"); - await command.handler("clear 1", { ui }); + await command.handler("clear 1", { ui }); - expect(harness.sentMessages).toHaveLength(0); - expect(ui.notify).toHaveBeenCalledWith("Task management lives in /tasks now. /lgtm is viewer-only.", "error"); - }); + expect(harness.sentMessages).toHaveLength(0); + expect(ui.notify).toHaveBeenCalledWith( + "Task management lives in /tasks now. /lgtm is viewer-only.", + "error", + ); + }); - it("rejects /lgtm delete and points task management back to /tasks", async () => { - const harness = makeHarness(); - await harness.execTool("TaskCreate", { subject: "Task A", description: "Desc", done_criterion: "done" }); + it("rejects /lgtm delete and points task management back to /tasks", async () => { + const harness = makeHarness(); + await harness.execTool("TaskCreate", { + subject: "Task A", + description: "Desc", + done_criterion: "done", + }); - const ui = harness.makeUi(); - const command = harness.commands.get("lgtm"); - if (!command) throw new Error("/lgtm not registered"); + const ui = harness.makeUi(); + const command = harness.commands.get("lgtm"); + if (!command) throw new Error("/lgtm not registered"); - await command.handler("delete 1", { ui }); + await command.handler("delete 1", { ui }); - expect(harness.sentMessages).toHaveLength(0); - expect(ui.notify).toHaveBeenCalledWith("Task management lives in /tasks now. /lgtm is viewer-only.", "error"); - }); + expect(harness.sentMessages).toHaveLength(0); + expect(ui.notify).toHaveBeenCalledWith( + "Task management lives in /tasks now. /lgtm is viewer-only.", + "error", + ); + }); }); diff --git a/test/review-badges.test.ts b/test/review-badges.test.ts index d6397d1..6713087 100644 --- a/test/review-badges.test.ts +++ b/test/review-badges.test.ts @@ -1,150 +1,145 @@ import { describe, expect, it } from "vitest"; -import { getCompletionMode, getDisplayStatus, getGateStatus, getReviewBadges, getReviewState } from "../src/review-badges.js"; +import { + getCompletionMode, + getDisplayStatus, + getGateStatus, + getReviewState, +} from "../src/review-badges.js"; import type { Task } from "../src/types.js"; function makeTask(overrides: Partial = {}): Task { - return { - id: "1", - subject: "Test", - description: "Desc", - done_criterion: "done", - status: "pending", - progress_label: undefined, - metadata: {}, - blocks: [], - blockedBy: [], - createdAt: 0, - updatedAt: 0, - ...overrides, - }; + return { + id: "1", + subject: "Test", + description: "Desc", + done_criterion: "done", + status: "pending", + progress_label: undefined, + metadata: {}, + blocks: [], + blockedBy: [], + createdAt: 0, + updatedAt: 0, + ...overrides, + }; } -describe("getReviewBadges", () => { - it("renders all dots when no artifacts exist", () => { - expect(getReviewBadges(makeTask())).toBe("[···]"); - }); - - it("fills evidence/review/completed slots independently", () => { - const task = makeTask({ - metadata: { - lgtm_evidence: "npm test", - robot_reviews: [{ - iteration: 1, - reviewer: "opencode", - scope: "task evidence", - observations: ["Observed one unchecked edge case"], - concerns: ["Evidence does not cover prod traffic."], - suggestions: ["Inspect one prod traffic sample."], - blind_spots: "Did not inspect prod traffic", - accepted: false, - evidence_complete: false, - evidence_convincing: false, - missing_evidence: ["Prod traffic sample"], - submitted_at: "2026-04-17T00:00:00.000Z", - mode: "manual", - }], - }, - }); - - expect(getReviewBadges(task)).toBe("[🛠🤖·]"); - }); - - it("fills the completed badge once the task is completed", () => { - const task = makeTask({ - status: "completed", - metadata: { lgtm_evidence: "ok" }, - }); - - expect(getReviewBadges(task)).toBe("[🛠·✓]"); - }); -}); - describe("review state helpers", () => { - it("reports completion mode as proof for top-level tasks", () => { - expect(getCompletionMode(makeTask())).toBe("proof"); - }); + it("reports completion mode as proof for top-level tasks", () => { + expect(getCompletionMode(makeTask())).toBe("proof"); + }); - it("reports completion mode as direct for subtasks", () => { - expect(getCompletionMode(makeTask({ parentId: "1" }))).toBe("direct"); - }); + it("reports completion mode as direct for subtasks", () => { + expect(getCompletionMode(makeTask({ parentId: "1" }))).toBe("direct"); + }); - it("reports superseded when only history remains", () => { - expect(getReviewState(makeTask({ metadata: { lgtm_history: [{ iteration: 1 }] } }))).toBe("superseded"); - }); + it("reports superseded when only history remains", () => { + expect( + getReviewState( + makeTask({ metadata: { lgtm_history: [{ iteration: 1 }] } }), + ), + ).toBe("superseded"); + }); }); describe("getGateStatus", () => { - it("reports top-level proof requirement before evidence", () => { - expect(getGateStatus(makeTask())).toBe("top-level task requires TaskClaimDone evidence before completion"); - }); + it("reports top-level proof requirement before evidence", () => { + expect(getGateStatus(makeTask())).toBe( + "top-level task requires TaskClaimDone evidence before completion", + ); + }); - it("reports non-blocking reviewer failure", () => { - expect(getGateStatus(makeTask({ - metadata: { - lgtm_evidence: "ok", - robot_review_last_error: "Unexpected token 'a'", - }, - }))).toContain("review unavailable; autonomy continues"); - }); + it("reports non-blocking reviewer failure", () => { + expect( + getGateStatus( + makeTask({ + metadata: { + lgtm_evidence: "ok", + robot_review_last_error: "Unexpected token 'a'", + }, + }), + ), + ).toContain("review unavailable; autonomy continues"); + }); - it("reports rejected robot review when latest review does not accept", () => { - expect(getGateStatus(makeTask({ - metadata: { - lgtm_evidence: "ok", - robot_reviews: [{ - iteration: 1, - reviewer: "opencode", - scope: "task evidence", - observations: ["Observed missing output"], - concerns: ["The current evidence is summary-only."], - suggestions: ["Paste the literal output."], - blind_spots: "none", - accepted: false, - evidence_complete: false, - evidence_convincing: false, - missing_evidence: ["literal output"], - submitted_at: "2026-04-17T00:00:00.000Z", - mode: "manual", - }], - }, - }))).toBe("latest proof review rejected the evidence; strengthen the proof and try again"); - }); + it("reports rejected robot review when latest review does not accept", () => { + expect( + getGateStatus( + makeTask({ + metadata: { + lgtm_evidence: "ok", + robot_reviews: [ + { + iteration: 1, + reviewer: "opencode", + scope: "task evidence", + observations: ["Observed missing output"], + concerns: ["The current evidence is summary-only."], + suggestions: ["Paste the literal output."], + blind_spots: "none", + accepted: false, + evidence_complete: false, + evidence_convincing: false, + missing_evidence: ["literal output"], + submitted_at: "2026-04-17T00:00:00.000Z", + mode: "manual", + }, + ], + }, + }), + ), + ).toBe( + "latest proof review rejected the evidence; strengthen the proof and try again", + ); + }); - it("keeps rejection higher priority than a later reviewer warning", () => { - expect(getGateStatus(makeTask({ - metadata: { - lgtm_evidence: "ok", - robot_review_last_error: "timeout", - robot_reviews: [{ - iteration: 1, - reviewer: "opencode", - scope: "task evidence", - observations: ["Observed missing output"], - concerns: ["The current evidence is summary-only."], - suggestions: ["Paste the literal output."], - blind_spots: "none", - accepted: false, - evidence_complete: false, - evidence_convincing: false, - missing_evidence: ["literal output"], - submitted_at: "2026-04-17T00:00:00.000Z", - mode: "manual", - }], - }, - }))).toBe("latest proof review rejected the evidence; strengthen the proof and try again"); - }); + it("keeps rejection higher priority than a later reviewer warning", () => { + expect( + getGateStatus( + makeTask({ + metadata: { + lgtm_evidence: "ok", + robot_review_last_error: "timeout", + robot_reviews: [ + { + iteration: 1, + reviewer: "opencode", + scope: "task evidence", + observations: ["Observed missing output"], + concerns: ["The current evidence is summary-only."], + suggestions: ["Paste the literal output."], + blind_spots: "none", + accepted: false, + evidence_complete: false, + evidence_convincing: false, + missing_evidence: ["literal output"], + submitted_at: "2026-04-17T00:00:00.000Z", + mode: "manual", + }, + ], + }, + }), + ), + ).toBe( + "latest proof review rejected the evidence; strengthen the proof and try again", + ); + }); }); describe("getDisplayStatus", () => { - it("returns pending for fresh tasks", () => { - expect(getDisplayStatus(makeTask())).toBe("pending"); - }); + it("returns pending for fresh tasks", () => { + expect(getDisplayStatus(makeTask())).toBe("pending"); + }); - it("returns in_progress for active tasks not yet escalated", () => { - expect(getDisplayStatus(makeTask({ status: "in_progress" }))).toBe("in_progress"); - }); + it("returns in_progress for active tasks not yet escalated", () => { + expect(getDisplayStatus(makeTask({ status: "in_progress" }))).toBe( + "in_progress", + ); + }); - it("returns completed for completed tasks", () => { - expect(getDisplayStatus(makeTask({ status: "completed" }))).toBe("completed"); - }); + it("returns completed for completed tasks", () => { + expect(getDisplayStatus(makeTask({ status: "completed" }))).toBe( + "completed", + ); + }); }); diff --git a/test/robot-review-runner.test.ts b/test/robot-review-runner.test.ts index dcb59fe..6b30957 100644 --- a/test/robot-review-runner.test.ts +++ b/test/robot-review-runner.test.ts @@ -1,78 +1,115 @@ import { describe, expect, it } from "vitest"; import { - DEFAULT_ROBOT_REVIEW_TIMEOUT_MS, - extractFinalAssistantTextFromPiJsonl, - extractRobotReviewJson, - getCurrentModelRef, - getPiInvocation, - getRobotReviewTimeoutMs, - runRobotReviewCommand, + DEFAULT_ROBOT_REVIEW_TIMEOUT_MS, + extractFinalAssistantTextFromPiJsonl, + extractRobotReviewJson, + getCurrentModelRef, + getPiInvocation, + getRobotReviewTimeoutMs, + runRobotReviewCommand, } from "../src/index.js"; describe("robot review runner helpers", () => { - it("uses plain pi by default and allows override", () => { - expect(getPiInvocation(["--mode", "json"], {} as NodeJS.ProcessEnv)).toEqual({ - command: "pi", - args: ["--mode", "json"], - }); - expect(getPiInvocation(["-p"], { PI_PROOF_TASKS_PI_BIN: "/custom/pi" } as NodeJS.ProcessEnv)).toEqual({ - command: "/custom/pi", - args: ["-p"], - }); - }); + it("uses plain pi by default and allows override", () => { + expect( + getPiInvocation(["--mode", "json"], {} as NodeJS.ProcessEnv), + ).toEqual({ + command: "pi", + args: ["--mode", "json"], + }); + expect( + getPiInvocation(["-p"], { + PI_PROOF_TASKS_PI_BIN: "/custom/pi", + } as NodeJS.ProcessEnv), + ).toEqual({ + command: "/custom/pi", + args: ["-p"], + }); + }); - it("parses the final assistant text from pi jsonl", () => { - const output = [ - "{\"type\":\"message_update\"}", - "{\"type\":\"message_end\",\"message\":{\"role\":\"assistant\",\"content\":[{\"type\":\"text\",\"text\":\"ROBOT_REVIEW_JSON_START {\\\"accepted\\\":true} ROBOT_REVIEW_JSON_END\"}]}}", - ].join("\n"); - expect(extractFinalAssistantTextFromPiJsonl(output)).toContain("ROBOT_REVIEW_JSON_START"); - }); + it("parses the final assistant text from pi jsonl", () => { + const output = [ + '{"type":"message_update"}', + '{"type":"message_end","message":{"role":"assistant","content":[{"type":"text","text":"ROBOT_REVIEW_JSON_START {\\"accepted\\":true} ROBOT_REVIEW_JSON_END"}]}}', + ].join("\n"); + expect(extractFinalAssistantTextFromPiJsonl(output)).toContain( + "ROBOT_REVIEW_JSON_START", + ); + }); - it("parses noisy JSON wrapped in review markers", () => { - const output = [ - "ROBOT_REVIEW_JSON_START", - "and here is the JSON you asked for:", - "```json", - '{"accepted":true,"observations":["ok"]}', - "```", - "ROBOT_REVIEW_JSON_END", - ].join("\n"); - expect(extractRobotReviewJson(output)).toEqual({ accepted: true, observations: ["ok"] }); - }); + it("parses noisy JSON wrapped in review markers", () => { + const output = [ + "ROBOT_REVIEW_JSON_START", + "and here is the JSON you asked for:", + "```json", + '{"accepted":true,"observations":["ok"]}', + "```", + "ROBOT_REVIEW_JSON_END", + ].join("\n"); + expect(extractRobotReviewJson(output)).toEqual({ + accepted: true, + observations: ["ok"], + }); + }); - it("includes raw output context on parse failure", () => { - expect(() => extractRobotReviewJson("ROBOT_REVIEW_JSON_START and nope ROBOT_REVIEW_JSON_END")).toThrow(/Raw output:/); - }); + it("includes raw output context on parse failure", () => { + expect(() => + extractRobotReviewJson( + "ROBOT_REVIEW_JSON_START and nope ROBOT_REVIEW_JSON_END", + ), + ).toThrow(/Raw output:/); + }); - it("uses configured timeout or falls back to default", () => { - expect(getRobotReviewTimeoutMs({ PI_PROOF_TASKS_ROBOT_REVIEW_TIMEOUT_MS: "2500" } as NodeJS.ProcessEnv)).toBe(2500); - expect(getRobotReviewTimeoutMs({ PI_PROOF_TASKS_ROBOT_REVIEW_TIMEOUT_MS: "bad" } as NodeJS.ProcessEnv)).toBe(DEFAULT_ROBOT_REVIEW_TIMEOUT_MS); - }); + it("uses configured timeout or falls back to default", () => { + expect( + getRobotReviewTimeoutMs({ + PI_PROOF_TASKS_ROBOT_REVIEW_TIMEOUT_MS: "2500", + } as NodeJS.ProcessEnv), + ).toBe(2500); + expect( + getRobotReviewTimeoutMs({ + PI_PROOF_TASKS_ROBOT_REVIEW_TIMEOUT_MS: "bad", + } as NodeJS.ProcessEnv), + ).toBe(DEFAULT_ROBOT_REVIEW_TIMEOUT_MS); + }); - it("formats the current model as the reviewer model ref", () => { - expect(getCurrentModelRef({ provider: "openai", id: "gpt-5" })).toBe("openai/gpt-5"); - expect(getCurrentModelRef({ providerId: "anthropic", modelId: "claude-haiku" })).toBe("anthropic/claude-haiku"); - expect(getCurrentModelRef({ provider: "openai" })).toBeUndefined(); - }); + it("formats the current model as the reviewer model ref", () => { + expect(getCurrentModelRef({ provider: "openai", id: "gpt-5" })).toBe( + "openai/gpt-5", + ); + expect( + getCurrentModelRef({ providerId: "anthropic", modelId: "claude-haiku" }), + ).toBe("anthropic/claude-haiku"); + expect(getCurrentModelRef({ provider: "openai" })).toBeUndefined(); + }); - it("times out bounded child commands", async () => { - await expect(runRobotReviewCommand({ - command: process.execPath, - args: ["-e", "setTimeout(() => {}, 1000)"], - }, undefined, 25)).rejects.toThrow(/timed out/i); - }); + it("times out bounded child commands", async () => { + await expect( + runRobotReviewCommand( + { + command: process.execPath, + args: ["-e", "setTimeout(() => {}, 1000)"], + }, + undefined, + 25, + ), + ).rejects.toThrow(/timed out/i); + }); - it("extracts assistant text from a child jsonl process", async () => { - const script = [ - "process.stdout.write(JSON.stringify({type:'message_update'}) + '\\n');", - "process.stdout.write(JSON.stringify({type:'message_end',message:{role:'assistant',content:[{type:'text',text:'ROBOT_REVIEW_JSON_START {\\\"accepted\\\":true,\\\"observations\\\":[\\\"ok\\\"]} ROBOT_REVIEW_JSON_END'}]}}) + '\\n');", - ].join(""); - const result = await runRobotReviewCommand({ - command: process.execPath, - args: ["-e", script], - }, undefined, 500); - expect(result.exitCode).toBe(0); - expect(result.stdout).toContain("ROBOT_REVIEW_JSON_END"); - }); + it("extracts assistant text from a child jsonl process", async () => { + const script = [ + "process.stdout.write(JSON.stringify({type:'message_update'}) + '\\n');", + "process.stdout.write(JSON.stringify({type:'message_end',message:{role:'assistant',content:[{type:'text',text:'ROBOT_REVIEW_JSON_START {\\\"accepted\\\":true,\\\"observations\\\":[\\\"ok\\\"]} ROBOT_REVIEW_JSON_END'}]}}) + '\\n');", + ].join(""); + const result = await runRobotReviewCommand( + { + command: process.execPath, + args: ["-e", script], + }, + undefined, + 500, + ); + expect(result.exitCode).toBe(0); + expect(result.stdout).toContain("ROBOT_REVIEW_JSON_END"); + }); }); diff --git a/test/robot-review.test.ts b/test/robot-review.test.ts index 741f766..72672d7 100644 --- a/test/robot-review.test.ts +++ b/test/robot-review.test.ts @@ -2,288 +2,446 @@ import { mkdtempSync, writeFileSync } from "node:fs"; import { tmpdir } from "node:os"; import { join } from "node:path"; import { describe, expect, it } from "vitest"; -import { archiveCurrentEvidence, buildArtifactRecords, buildRobotReviewPrompt, getCurrentEvidenceIteration, getEvidenceHistory, renderEvidencePacket, renderProofLog } from "../src/index.js"; -import { appendRobotReviewMetadata, getLatestRobotReview, getRobotReviews, hasCompleteProofClaim, relaxAdvisoryVerificationHints, shouldCompleteAfterAcceptedReview } from "../src/robot-review.js"; +import { + archiveCurrentEvidence, + buildArtifactRecords, + buildRobotReviewPrompt, + getCurrentEvidenceIteration, + getEvidenceHistory, + renderEvidencePacket, + renderProofLog, +} from "../src/index.js"; +import { + appendRobotReviewMetadata, + getLatestRobotReview, + getRobotReviews, + hasCompleteProofClaim, + relaxAdvisoryVerificationHints, + shouldCompleteAfterAcceptedReview, +} from "../src/robot-review.js"; import type { Task } from "../src/types.js"; function makeTask(overrides: Partial = {}): Task { - return { - id: "1", - subject: "Test", - description: "Desc", - done_criterion: "done", - status: "pending", - progress_label: undefined, - metadata: {}, - blocks: [], - blockedBy: [], - createdAt: 0, - updatedAt: 0, - ...overrides, - }; + return { + id: "1", + subject: "Test", + description: "Desc", + done_criterion: "done", + status: "pending", + progress_label: undefined, + metadata: {}, + blocks: [], + blockedBy: [], + createdAt: 0, + updatedAt: 0, + ...overrides, + }; } describe("robot review helpers", () => { - it("completes only after accepted review and complete proof claim", () => { - const task = makeTask({ - metadata: { - lgtm_evidence: "literal output", - lgtm_failure_likely: "wrong command", - lgtm_failure_sneaky: "right output for wrong reason", - lgtm_failure_unknown: "untested platform", - lgtm_falsification_test: "npm test\npass", - lgtm_evidence_reasoning: "the test output rules out the named failures for this scope", - lgtm_verification_hints: ["test/robot-review.test.ts shows the expectation"], - lgtm_remaining_uncertainty: "does not test prod install", - }, - }); - expect(hasCompleteProofClaim(task)).toBe(true); - expect(shouldCompleteAfterAcceptedReview(task, true)).toBe(true); - expect(shouldCompleteAfterAcceptedReview(task, false)).toBe(false); - expect(shouldCompleteAfterAcceptedReview(makeTask({ metadata: { lgtm_evidence: "literal output" } }), true)).toBe(false); - }); + it("completes only after accepted review and complete proof claim", () => { + const task = makeTask({ + metadata: { + lgtm_evidence: "literal output", + lgtm_failure_likely: "wrong command", + lgtm_failure_sneaky: "right output for wrong reason", + lgtm_failure_unknown: "untested platform", + lgtm_falsification_test: "npm test\npass", + lgtm_evidence_reasoning: + "the test output rules out the named failures for this scope", + lgtm_verification_hints: [ + "test/robot-review.test.ts shows the expectation", + ], + lgtm_remaining_uncertainty: "does not test prod install", + }, + }); + expect(hasCompleteProofClaim(task)).toBe(true); + expect(shouldCompleteAfterAcceptedReview(task, true)).toBe(true); + expect(shouldCompleteAfterAcceptedReview(task, false)).toBe(false); + expect( + shouldCompleteAfterAcceptedReview( + makeTask({ metadata: { lgtm_evidence: "literal output" } }), + true, + ), + ).toBe(false); + }); - it("reads legacy single-review metadata", () => { - const task = makeTask({ - metadata: { - robot_review_reviewer: "opencode", - robot_review_scope: "task evidence", - robot_review_observations: ["Observed no command output for the core claim"], - robot_review_blind_spots: "Did not rerun tests", - robot_review_submitted_at: "2026-04-17T00:00:00.000Z", - }, - }); + it("reads legacy single-review metadata", () => { + const task = makeTask({ + metadata: { + robot_review_reviewer: "opencode", + robot_review_scope: "task evidence", + robot_review_observations: [ + "Observed no command output for the core claim", + ], + robot_review_blind_spots: "Did not rerun tests", + robot_review_submitted_at: "2026-04-17T00:00:00.000Z", + }, + }); - const reviews = getRobotReviews(task); - expect(reviews).toHaveLength(1); - expect(reviews[0].reviewer).toBe("opencode"); - expect(reviews[0].iteration).toBe(1); - expect(reviews[0].accepted).toBe(true); - }); + const reviews = getRobotReviews(task); + expect(reviews).toHaveLength(1); + expect(reviews[0].reviewer).toBe("opencode"); + expect(reviews[0].iteration).toBe(1); + expect(reviews[0].accepted).toBe(true); + }); - it("builds artifact records with absolute path and sha256", () => { - const dir = mkdtempSync(join(tmpdir(), "pi-proof-tasks-")); - const path = join(dir, "evidence.log"); - writeFileSync(path, "hello\n"); + it("builds artifact records with absolute path and sha256", () => { + const dir = mkdtempSync(join(tmpdir(), "pi-proof-tasks-")); + const path = join(dir, "evidence.log"); + writeFileSync(path, "hello\n"); - const [artifact] = buildArtifactRecords([path]); - expect(artifact.path).toBe(path); - expect(artifact.bytes).toBe(6); - expect(artifact.sha256).toHaveLength(64); - }); + const [artifact] = buildArtifactRecords([path]); + expect(artifact.path).toBe(path); + expect(artifact.bytes).toBe(6); + expect(artifact.sha256).toHaveLength(64); + }); - it("archives current evidence with reason", () => { - const task = makeTask({ - metadata: { - lgtm_evidence: "literal output", - lgtm_failure_likely: "wrong seed", - lgtm_failure_sneaky: "wrong threshold", - lgtm_failure_unknown: "untested environment", - lgtm_falsification_test: "pytest -k check", - lgtm_evidence_reasoning: "pytest output distinguishes the expected passing path from the named failures", - lgtm_verification_hints: ["see line 5"], - lgtm_remaining_uncertainty: "not load tested", - lgtm_submitted_at: "2026-06-07T00:00:00.000Z", - lgtm_commands: [{ cmd: "pytest", exit_code: 0 }], - }, - }); + it("archives current evidence with reason", () => { + const task = makeTask({ + metadata: { + lgtm_evidence: "literal output", + lgtm_failure_likely: "wrong seed", + lgtm_failure_sneaky: "wrong threshold", + lgtm_failure_unknown: "untested environment", + lgtm_falsification_test: "pytest -k check", + lgtm_evidence_reasoning: + "pytest output distinguishes the expected passing path from the named failures", + lgtm_verification_hints: ["see line 5"], + lgtm_remaining_uncertainty: "not load tested", + lgtm_submitted_at: "2026-06-07T00:00:00.000Z", + lgtm_commands: [{ cmd: "pytest", exit_code: 0 }], + }, + }); - const archived = archiveCurrentEvidence(task, "threshold changed"); - const taskWithHistory = makeTask({ metadata: archived }); - expect(getCurrentEvidenceIteration(task)?.iteration).toBe(1); - expect(getEvidenceHistory(taskWithHistory)).toHaveLength(1); - expect(getEvidenceHistory(taskWithHistory)[0].supersede_reason).toBe("threshold changed"); - }); + const archived = archiveCurrentEvidence(task, "threshold changed"); + const taskWithHistory = makeTask({ metadata: archived }); + expect(getCurrentEvidenceIteration(task)?.iteration).toBe(1); + expect(getEvidenceHistory(taskWithHistory)).toHaveLength(1); + expect(getEvidenceHistory(taskWithHistory)[0].supersede_reason).toBe( + "threshold changed", + ); + }); - it("treats verification hints as advisory when core evidence already passes", () => { - const review = relaxAdvisoryVerificationHints({ - reviewer: "auto", - scope: "task evidence", - observations: ["Observed commit, push, and test logs"], - concerns: [], - suggestions: [], - blind_spots: "Did not inspect interactive UI", - accepted: false, - evidence_complete: true, - evidence_convincing: false, - missing_evidence: ["verification_hints_actionable"], - submitted_at: "2026-06-13T00:00:00.000Z", - mode: "auto", - rubric: { - evidence_covers_done_criterion: { reason: "verbatim logs match", pass: true }, - falsification_test_runnable: { reason: "command and output shown", pass: true }, - failure_modes_addressed: { reason: "plausible top risks named", pass: true }, - evidence_distinguishes_success: { reason: "evidence rules out named failures", pass: true }, - verification_hints_actionable: { reason: "paths are vague", pass: false }, - }, - }); + it("treats advisory rubric failures as non-blocking when core evidence already passes", () => { + const review = relaxAdvisoryVerificationHints({ + reviewer: "auto", + scope: "task evidence", + observations: ["Observed commit, push, and test logs"], + concerns: [], + suggestions: [], + blind_spots: "Did not inspect interactive UI", + accepted: false, + evidence_complete: true, + evidence_convincing: false, + missing_evidence: [ + "verification_hints_actionable", + "evidence_distinguishes_success", + ], + submitted_at: "2026-06-13T00:00:00.000Z", + mode: "auto", + rubric: { + evidence_covers_done_criterion: { + reason: "verbatim logs match", + pass: true, + }, + falsification_test_runnable: { + reason: "command and output shown", + pass: true, + }, + failure_modes_addressed: { + reason: "plausible top risks named", + pass: true, + }, + evidence_distinguishes_success: { + reason: "reasoning writeup is thin", + pass: false, + }, + verification_hints_actionable: { + reason: "paths are vague", + pass: false, + }, + }, + }); - expect(review.accepted).toBe(true); - expect(review.evidence_convincing).toBe(true); - expect(review.observations.at(-1)).toContain("treated as advisory"); - expect(review.missing_evidence).toEqual([]); - }); + expect(review.accepted).toBe(true); + expect(review.evidence_convincing).toBe(true); + expect( + review.observations.some((item) => item.includes("treated as advisory")), + ).toBe(true); + expect(review.missing_evidence).toEqual([]); + }); - it("does not relax verification hints unless the core rubric passes", () => { - const review = relaxAdvisoryVerificationHints({ - reviewer: "auto", - scope: "task evidence", - observations: ["Observed vague summary only"], - concerns: [], - suggestions: [], - blind_spots: "Did not rerun tests", - accepted: false, - evidence_complete: true, - evidence_convincing: false, - missing_evidence: ["verification_hints_actionable"], - submitted_at: "2026-06-13T00:00:00.000Z", - mode: "auto", - rubric: { - evidence_covers_done_criterion: { reason: "summary only", pass: false }, - falsification_test_runnable: { reason: "command and output shown", pass: true }, - failure_modes_addressed: { reason: "plausible top risks named", pass: true }, - evidence_distinguishes_success: { reason: "evidence does not rule out summary-only failure", pass: false }, - verification_hints_actionable: { reason: "paths are vague", pass: false }, - }, - }); + it("does not relax verification hints unless the core rubric passes", () => { + const review = relaxAdvisoryVerificationHints({ + reviewer: "auto", + scope: "task evidence", + observations: ["Observed vague summary only"], + concerns: [], + suggestions: [], + blind_spots: "Did not rerun tests", + accepted: false, + evidence_complete: true, + evidence_convincing: false, + missing_evidence: ["verification_hints_actionable"], + submitted_at: "2026-06-13T00:00:00.000Z", + mode: "auto", + rubric: { + evidence_covers_done_criterion: { reason: "summary only", pass: false }, + falsification_test_runnable: { + reason: "command and output shown", + pass: true, + }, + failure_modes_addressed: { + reason: "plausible top risks named", + pass: true, + }, + evidence_distinguishes_success: { + reason: "evidence does not rule out summary-only failure", + pass: false, + }, + verification_hints_actionable: { + reason: "paths are vague", + pass: false, + }, + }, + }); - expect(review.accepted).toBe(false); - expect(review.evidence_convincing).toBe(false); - }); + expect(review.accepted).toBe(false); + expect(review.evidence_convincing).toBe(false); + }); - it("renders one compact evidence packet for both human and robot review", () => { - const task = makeTask({ - metadata: { - lgtm_evidence: "literal output", - lgtm_failure_likely: "wrong seed", - lgtm_failure_sneaky: "wrong threshold", - lgtm_failure_unknown: "does not test UI rendering", - lgtm_falsification_test: "pytest -k check\nPASSED", - lgtm_evidence_reasoning: "The passing pytest transcript distinguishes success from wrong-threshold and wrong-seed failures for this test scope.", - lgtm_verification_hints: ["test/robot-review.test.ts contains the new guard test"], - lgtm_remaining_uncertainty: "not load tested", - lgtm_submitted_at: "2026-06-14T00:00:00.000Z", - lgtm_commands: [{ cmd: "npm test", exit_code: 0, stdout_path: "/tmp/test.log" }], - lgtm_evidence_artifacts: [{ path: "/tmp/test.log", sha256: "abc", bytes: 123 }], - }, - }); + it("renders one compact evidence packet for both human and robot review", () => { + const task = makeTask({ + metadata: { + lgtm_evidence: "literal output", + lgtm_failure_likely: "wrong seed", + lgtm_failure_sneaky: "wrong threshold", + lgtm_failure_unknown: "does not test UI rendering", + lgtm_falsification_test: "pytest -k check\nPASSED", + lgtm_evidence_reasoning: + "The passing pytest transcript distinguishes success from wrong-threshold and wrong-seed failures for this test scope.", + lgtm_verification_hints: [ + "test/robot-review.test.ts contains the new guard test", + ], + lgtm_remaining_uncertainty: "not load tested", + lgtm_submitted_at: "2026-06-14T00:00:00.000Z", + lgtm_commands: [ + { cmd: "npm test", exit_code: 0, stdout_path: "/tmp/test.log" }, + ], + lgtm_evidence_artifacts: [ + { path: "/tmp/test.log", sha256: "abc", bytes: 123 }, + ], + }, + }); - const packet = renderEvidencePacket(task); - const prompt = buildRobotReviewPrompt(task); - expect(packet).toContain("## Goal"); - expect(packet).toContain("## Planned evidence / UAT"); - expect(packet).toContain("## Attempt 1"); - expect(prompt).toContain(packet); - expect(prompt).toContain("does this evidence prove success for the stated goal"); - }); + const packet = renderEvidencePacket(task); + const prompt = buildRobotReviewPrompt(task); + expect(packet).toContain("## Goal"); + expect(packet).toContain("## Attempt 1"); + expect(packet).toContain("### Evidence"); + expect(packet).toContain("### Verify"); + expect(prompt).toContain(packet); + expect(prompt).toContain( + "does this packet prove the exact user-visible success condition", + ); + expect(prompt).toContain( + "Do not reject solely because items 3, 4, or 5 are weak", + ); + expect(prompt).toContain( + "concrete missing artifacts, command outputs, written-file checks", + ); + }); - it("appends robot reviews as iterations", () => { - const task = makeTask(); - const metadata1 = appendRobotReviewMetadata(task, { - reviewer: "opencode", - scope: "task evidence", - observations: ["Observed missing benchmark output"], - concerns: ["The current evidence does not show the claimed speedup."], - suggestions: ["Add the benchmark transcript for the claimed speedup."], - blind_spots: "Did not inspect prod config", - accepted: false, - evidence_complete: false, - evidence_convincing: false, - missing_evidence: ["Benchmark output for the claimed speedup"], - submitted_at: "2026-04-17T00:00:00.000Z", - mode: "auto", - }); - const task1 = makeTask({ metadata: metadata1 }); - const metadata2 = appendRobotReviewMetadata(task1, { - reviewer: "opencode", - scope: "updated task evidence", - observations: ["Observed benchmark output and test transcript"], - concerns: [], - suggestions: [], - blind_spots: "Did not inspect long-run stability", - accepted: true, - evidence_complete: true, - evidence_convincing: true, - missing_evidence: [], - submitted_at: "2026-04-17T01:00:00.000Z", - mode: "auto", - }); + it("truncates long submitted evidence in the rendered proof log and points to the full artifact", () => { + const longEvidence = Array.from( + { length: 35 }, + (_, i) => `line ${i + 1}`, + ).join("\n"); + const task = makeTask({ + metadata: { + lgtm_evidence: longEvidence, + lgtm_failure_likely: "wrong seed", + lgtm_failure_sneaky: "wrong threshold", + lgtm_failure_unknown: "untested environment", + lgtm_falsification_test: "pytest -k check\nPASSED", + lgtm_evidence_reasoning: + "The transcript rules out the named failures for this scope.", + lgtm_verification_hints: ["see /tmp/test.log"], + lgtm_remaining_uncertainty: "not load tested", + lgtm_submitted_at: "2026-06-14T00:00:00.000Z", + lgtm_evidence_artifacts: [ + { path: "/tmp/test.log", sha256: "abc", bytes: 123 }, + ], + }, + }); - const task2 = makeTask({ metadata: metadata2 }); - const reviews = getRobotReviews(task2); - expect(reviews).toHaveLength(2); - expect(reviews[0].iteration).toBe(1); - expect(reviews[1].iteration).toBe(2); - expect(getLatestRobotReview(task2)?.evidence_convincing).toBe(true); - expect(task2.metadata.robot_review_iteration_count).toBe(2); - }); + const log = renderProofLog(task); + expect(log).toContain("line 1"); + expect(log).toContain("line 8"); + expect(log).toContain("line 35"); + expect(log).not.toContain("line 9"); + expect(log).toContain("[... 19 middle lines omitted ...]"); + expect(log).toContain( + "[truncated at 16 lines from 35; showing first 8 and last 8; full text: /tmp/test.log]", + ); + }); - it("renders a simple proof log with judgement and suggestions", () => { - const taskWithEvidence = makeTask({ - metadata: { - lgtm_evidence: "npm test\n125 passed", - lgtm_failure_likely: "old package name still in README", - lgtm_failure_sneaky: "top-level direct completion still slips through", - lgtm_failure_unknown: "fresh judge command fails in a real session", - lgtm_falsification_test: "npm test\n125 passed", - lgtm_evidence_reasoning: "The test transcript and grep distinguish the intended behavior from stale workflow regressions.", - lgtm_verification_hints: ["README.md install block shows pi-proof-tasks"], - lgtm_remaining_uncertainty: "Did not exercise every model provider.", - lgtm_submitted_at: "2026-06-14T00:00:00.000Z", - }, - }); - const task = makeTask({ - metadata: { - ...taskWithEvidence.metadata, - ...appendRobotReviewMetadata(taskWithEvidence, { - reviewer: "auto", - scope: "proof log", - observations: ["Observed the test transcript and renamed package."], - concerns: ["The live Pi session path is still untested."], - suggestions: ["Run one self-hosted TaskClaimDone UAT."], - blind_spots: "Did not inspect external auth state", - accepted: false, - evidence_complete: true, - evidence_convincing: false, - missing_evidence: ["self-hosted TaskClaimDone UAT"], - submitted_at: "2026-06-14T00:01:00.000Z", - mode: "auto", - }), - }, - }); + it("appends robot reviews as iterations", () => { + const task = makeTask(); + const metadata1 = appendRobotReviewMetadata(task, { + reviewer: "opencode", + scope: "task evidence", + observations: ["Observed missing benchmark output"], + concerns: ["The current evidence does not show the claimed speedup."], + suggestions: ["Add the benchmark transcript for the claimed speedup."], + blind_spots: "Did not inspect prod config", + accepted: false, + evidence_complete: false, + evidence_convincing: false, + missing_evidence: ["Benchmark output for the claimed speedup"], + submitted_at: "2026-04-17T00:00:00.000Z", + mode: "auto", + }); + const task1 = makeTask({ metadata: metadata1 }); + const metadata2 = appendRobotReviewMetadata(task1, { + reviewer: "opencode", + scope: "updated task evidence", + observations: ["Observed benchmark output and test transcript"], + concerns: [], + suggestions: [], + blind_spots: "Did not inspect long-run stability", + accepted: true, + evidence_complete: true, + evidence_convincing: true, + missing_evidence: [], + submitted_at: "2026-04-17T01:00:00.000Z", + mode: "auto", + }); - const log = renderProofLog(task); - expect(log).toContain("# Task #1: Test"); - expect(log).toContain("## Goal"); - expect(log).toContain("## Planned evidence / UAT"); - expect(log).toContain("## Attempt 1"); - expect(log).toContain("### Submitted evidence"); - expect(log).toContain("### Judgement"); - expect(log).toContain("Refused by auto"); - expect(log).toContain("Run one self-hosted TaskClaimDone UAT."); - }); + const task2 = makeTask({ metadata: metadata2 }); + const reviews = getRobotReviews(task2); + expect(reviews).toHaveLength(2); + expect(reviews[0].iteration).toBe(1); + expect(reviews[1].iteration).toBe(2); + expect(getLatestRobotReview(task2)?.evidence_convincing).toBe(true); + expect(task2.metadata.robot_review_iteration_count).toBe(2); + }); - it("renders reviewer-unavailable proof logs for fail-open completion notes", () => { - const task = makeTask({ - status: "completed", - metadata: { - lgtm_evidence: "npm test\n125 passed", - lgtm_failure_likely: "old package name still in README", - lgtm_failure_sneaky: "top-level direct completion still slips through", - lgtm_failure_unknown: "fresh judge command fails in a real session", - lgtm_falsification_test: "npm test\n125 passed", - lgtm_evidence_reasoning: "The test transcript and grep distinguish the intended behavior from stale workflow regressions.", - lgtm_verification_hints: ["README.md install block shows pi-proof-tasks"], - lgtm_remaining_uncertainty: "Did not exercise every model provider.", - robot_review_last_error: "judge auth failed", - }, - }); + it("renders a simple proof log with judgement and suggestions", () => { + const taskWithEvidence = makeTask({ + metadata: { + lgtm_evidence: "npm test\n125 passed", + lgtm_failure_likely: "old package name still in README", + lgtm_failure_sneaky: "top-level direct completion still slips through", + lgtm_failure_unknown: "fresh judge command fails in a real session", + lgtm_falsification_test: "npm test\n125 passed", + lgtm_evidence_reasoning: + "The test transcript and grep distinguish the intended behavior from stale workflow regressions.", + lgtm_verification_hints: [ + "README.md install block shows pi-proof-tasks", + ], + lgtm_remaining_uncertainty: "Did not exercise every model provider.", + lgtm_submitted_at: "2026-06-14T00:00:00.000Z", + }, + }); + const task = makeTask({ + metadata: { + ...taskWithEvidence.metadata, + ...appendRobotReviewMetadata(taskWithEvidence, { + reviewer: "auto", + scope: "proof log", + observations: ["Observed the test transcript and renamed package."], + concerns: ["The live Pi session path is still untested."], + suggestions: ["Run one self-hosted TaskClaimDone UAT."], + blind_spots: "Did not inspect external auth state", + accepted: false, + evidence_complete: true, + evidence_convincing: false, + missing_evidence: ["self-hosted TaskClaimDone UAT"], + submitted_at: "2026-06-14T00:01:00.000Z", + mode: "auto", + }), + }, + }); - const log = renderProofLog(task); - expect(log).toContain("completed with reviewer unavailable"); - expect(log).toContain("### Judgement"); - expect(log).toContain("judge auth failed"); - expect(log).toContain("Autonomy continued without blocking completion."); - }); + const log = renderProofLog(task); + expect(log).toContain("# Task #1: Test"); + expect(log).toContain("## Goal"); + expect(log).toContain("## Attempt 1"); + expect(log).toContain("### Evidence"); + expect(log).toContain("### Verify"); + expect(log).toContain("### Judgement"); + expect(log).toContain("Refused by auto"); + expect(log).toContain("### Observations"); + expect(log).toContain("### Concerns"); + expect(log).toContain("### Missing evidence"); + expect(log).toContain("### Suggestions"); + expect(log).toContain("Run one self-hosted TaskClaimDone UAT."); + }); + + it("keeps full submitted evidence in the automatic review packet even when proof logs truncate it", () => { + const artifactPath = join(tmpdir(), "proof-packet-long-evidence.log"); + const longEvidence = Array.from( + { length: 35 }, + (_, i) => `line ${i + 1}`, + ).join("\n"); + writeFileSync(artifactPath, longEvidence); + const task = makeTask({ + metadata: { + lgtm_evidence: longEvidence, + lgtm_failure_likely: "missing artifact", + lgtm_failure_sneaky: "wrong slice shown", + lgtm_failure_unknown: "untested provider path", + lgtm_falsification_test: "npm test\npass", + lgtm_evidence_reasoning: + "The full evidence must stay visible to the judge even if humans see a shortened preview.", + lgtm_verification_hints: [ + "Open the artifact if the inline preview truncates.", + ], + lgtm_remaining_uncertainty: "Did not inspect live TUI.", + lgtm_evidence_artifacts: buildArtifactRecords([artifactPath]), + }, + }); + + const proofLog = renderProofLog(task); + const reviewPacket = renderEvidencePacket(task, { + truncateEvidence: false, + }); + expect(proofLog).toContain("line 8"); + expect(proofLog).toContain("line 35"); + expect(proofLog).not.toContain("line 9"); + expect(reviewPacket).toContain("line 35"); + expect(reviewPacket).not.toContain("[truncated at 16 lines"); + }); + + it("renders reviewer-unavailable proof logs for fail-open completion notes", () => { + const task = makeTask({ + status: "completed", + metadata: { + lgtm_evidence: "npm test\n125 passed", + lgtm_failure_likely: "old package name still in README", + lgtm_failure_sneaky: "top-level direct completion still slips through", + lgtm_failure_unknown: "fresh judge command fails in a real session", + lgtm_falsification_test: "npm test\n125 passed", + lgtm_evidence_reasoning: + "The test transcript and grep distinguish the intended behavior from stale workflow regressions.", + lgtm_verification_hints: [ + "README.md install block shows pi-proof-tasks", + ], + lgtm_remaining_uncertainty: "Did not exercise every model provider.", + robot_review_last_error: "judge auth failed", + }, + }); + + const log = renderProofLog(task); + expect(log).toContain("completed with reviewer unavailable"); + expect(log).toContain("### Judgement"); + expect(log).toContain("judge auth failed"); + expect(log).toContain("### Suggestions"); + expect(log).not.toContain("### Missing evidence"); + expect(log).not.toContain("### Observations"); + expect(log).not.toContain("### Concerns"); + expect(log).toContain("Autonomy continued without blocking completion."); + }); }); - diff --git a/test/task-claim-done-flow.test.ts b/test/task-claim-done-flow.test.ts new file mode 100644 index 0000000..2665bb8 --- /dev/null +++ b/test/task-claim-done-flow.test.ts @@ -0,0 +1,190 @@ +import { chmodSync, mkdtempSync, writeFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { afterEach, describe, expect, it, vi } from "vitest"; +import proofTasksExtension from "../src/index.js"; + +type RegisteredTool = { + name: string; + execute: (...args: any[]) => Promise; +}; + +function makeHarness() { + const tools = new Map(); + const pi = { + on: vi.fn(), + registerTool: vi.fn((tool: RegisteredTool) => tools.set(tool.name, tool)), + registerCommand: vi.fn(), + sendMessage: vi.fn(), + }; + + proofTasksExtension(pi as any); + + async function execTool( + name: string, + params: Record, + ctx: Record = {}, + ) { + const tool = tools.get(name); + if (!tool) throw new Error(`Tool ${name} not registered`); + return tool.execute("tool-call", params, undefined, undefined, ctx); + } + + return { execTool }; +} + +function writeReviewerScript(source: string): string { + const dir = mkdtempSync(join(tmpdir(), "pi-proof-reviewer-")); + const path = join(dir, "reviewer.js"); + writeFileSync(path, `#!/usr/bin/env node\n${source}\n`); + chmodSync(path, 0o755); + return path; +} + +const ORIGINAL_PI_BIN = process.env.PI_PROOF_TASKS_PI_BIN; +afterEach(() => { + if (ORIGINAL_PI_BIN === undefined) delete process.env.PI_PROOF_TASKS_PI_BIN; + else process.env.PI_PROOF_TASKS_PI_BIN = ORIGINAL_PI_BIN; +}); + +describe("TaskClaimDone end-to-end proof flow", () => { + it("keeps the task open on rejected review and /lgtm-style TaskGet shows truncated evidence", async () => { + const reviewer = writeReviewerScript(` +const review = { + reviewer: "fake-judge", + scope: "task evidence", + rubric: { + evidence_covers_done_criterion: { reason: "missing one artifact", pass: false }, + falsification_test_runnable: { reason: "ok", pass: true }, + failure_modes_addressed: { reason: "ok", pass: true }, + evidence_distinguishes_success: { reason: "not enough", pass: false }, + verification_hints_actionable: { reason: "ok", pass: true } + }, + observations: ["Observed truncated proof packet"], + concerns: ["Need stronger evidence"], + suggestions: ["Add one more artifact"], + blind_spots: "Did not inspect live TUI", + missing_evidence: ["evidence_covers_done_criterion", "evidence_distinguishes_success"], + evidence_complete: false, + evidence_convincing: false, + accepted: false +}; +console.log("ROBOT_REVIEW_JSON_START"); +console.log(JSON.stringify(review)); +console.log("ROBOT_REVIEW_JSON_END"); +`); + process.env.PI_PROOF_TASKS_PI_BIN = reviewer; + + const harness = makeHarness(); + await harness.execTool("TaskCreate", { + subject: "Proof task", + description: "Desc", + done_criterion: "done", + }); + + const artifactPath = join(tmpdir(), "proof-long-evidence.log"); + const longEvidence = Array.from( + { length: 35 }, + (_, i) => `line ${i + 1}`, + ).join("\n"); + writeFileSync(artifactPath, longEvidence); + + const claim = await harness.execTool( + "TaskClaimDone", + { + taskId: "1", + evidence: longEvidence, + failure_likely: "missing artifact", + failure_sneaky: "right shape for wrong reason", + failure_unknown: "untested provider path", + falsification_test: "npm test\npass", + evidence_reasoning: + "The packet distinguishes the named failures for this test scope.", + verification_hints: ["look at the proof log"], + remaining_uncertainty: "Did not inspect live TUI", + evidence_paths: [artifactPath], + }, + { model: { provider: "openai", id: "gpt-5" } }, + ); + + const claimText = claim.content[0].text; + + const taskGet = await harness.execTool("TaskGet", { taskId: "1" }); + const text = taskGet.content[0].text; + + expect(claimText).toContain("## TaskClaimDone -> Task #1: Proof task"); + expect(claimText).toContain("### Metadata"); + expect(claimText).toContain("- Proof iterations: 1"); + expect(claimText).toContain("- Robot reviews: 1"); + expect(text).toContain("Status: pending"); + expect(text).toContain( + "Gate status: latest proof review rejected the evidence; strengthen the proof and try again", + ); + expect(text).toContain("line 1"); + expect(text).toContain("line 8"); + expect(text).toContain("line 35"); + expect(text).not.toContain("line 9"); + expect(text).toContain("[... 19 middle lines omitted ...]"); + expect(text).toContain( + `[truncated at 16 lines from 35; showing first 8 and last 8; full text: ${artifactPath}]`, + ); + expect(text).toContain("### Judgement"); + expect(text).toContain("Refused"); + expect(text).toContain("### Missing evidence"); + expect(text).toContain("### Suggestions"); + expect(text).toContain("Add one more artifact"); + }); + + it("completes the task fail-open on parse failure and preserves the failure note", async () => { + const reviewer = writeReviewerScript(` +console.log("ROBOT_REVIEW_JSON_START and nope ROBOT_REVIEW_JSON_END"); +`); + process.env.PI_PROOF_TASKS_PI_BIN = reviewer; + + const harness = makeHarness(); + await harness.execTool("TaskCreate", { + subject: "Proof task", + description: "Desc", + done_criterion: "done", + }); + + const claim = await harness.execTool( + "TaskClaimDone", + { + taskId: "1", + evidence: "short evidence", + failure_likely: "missing artifact", + failure_sneaky: "right shape for wrong reason", + failure_unknown: "untested provider path", + falsification_test: "npm test\npass", + evidence_reasoning: + "The packet distinguishes the named failures for this test scope.", + verification_hints: ["look at the proof log"], + remaining_uncertainty: "Did not inspect live TUI", + }, + { model: { provider: "openai", id: "gpt-5" } }, + ); + + const claimText = claim.content[0].text; + + const taskGet = await harness.execTool("TaskGet", { taskId: "1" }); + const text = taskGet.content[0].text; + + expect(claimText).toContain("## TaskClaimDone -> Task #1: Proof task"); + expect(claimText).toContain("### Metadata"); + expect(claimText).toContain( + "- Gate status: completed with reviewer unavailable", + ); + expect(text).toContain("Status: completed"); + expect(text).toContain("completed with reviewer unavailable"); + expect(text).toContain("Raw output:"); + expect(text).toContain("### Suggestions"); + expect(text).not.toContain("### Missing evidence\n- (none)"); + expect(text).not.toContain("### Observations\n- (none)"); + expect(text).not.toContain("### Concerns\n- (none)"); + expect(text).toContain( + "ROBOT_REVIEW_JSON_START and nope ROBOT_REVIEW_JSON_END", + ); + expect(text).toContain("Autonomy continued without blocking completion."); + }); +}); diff --git a/test/task-list-render.test.ts b/test/task-list-render.test.ts index 821b21b..42842af 100644 --- a/test/task-list-render.test.ts +++ b/test/task-list-render.test.ts @@ -1,99 +1,312 @@ -import { describe, expect, it, vi } from "vitest"; +import { mkdtempSync, rmSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { afterEach, describe, expect, it, vi } from "vitest"; import proofTasksExtension from "../src/index.js"; +import { TaskStore } from "../src/task-store.js"; type RegisteredTool = { - name: string; - execute: (...args: any[]) => Promise; + name: string; + execute: (...args: any[]) => Promise; }; function makeHarness() { - const tools = new Map(); - const pi = { - on: vi.fn(), - registerTool: vi.fn((tool: RegisteredTool) => tools.set(tool.name, tool)), - registerCommand: vi.fn(), - sendMessage: vi.fn(), - }; + const tools = new Map(); + const handlers = new Map any>>(); + const pi = { + on: vi.fn((event: string, handler: (...args: any[]) => any) => { + const existing = handlers.get(event) ?? []; + existing.push(handler); + handlers.set(event, existing); + }), + registerTool: vi.fn((tool: RegisteredTool) => tools.set(tool.name, tool)), + registerCommand: vi.fn(), + sendMessage: vi.fn(), + }; - proofTasksExtension(pi as any); + proofTasksExtension(pi as any); - async function execTool(name: string, params: Record) { - const tool = tools.get(name); - if (!tool) throw new Error(`Tool ${name} not registered`); - return tool.execute("tool-call", params, undefined, undefined, {}); - } + async function execTool(name: string, params: Record) { + const tool = tools.get(name); + if (!tool) throw new Error(`Tool ${name} not registered`); + return tool.execute("tool-call", params, undefined, undefined, {}); + } - return { execTool }; + async function trigger(event: string, payload: any = {}, ctx: any = {}) { + for (const handler of handlers.get(event) ?? []) { + await handler(payload, ctx); + } + } + + return { execTool, trigger }; } -describe("TaskList", () => { - it("renders a compact one-line-per-task summary", async () => { - const harness = makeHarness(); - await harness.execTool("TaskCreate", { - subject: "Design the flux capacitor", - description: "Desc", - done_criterion: "done", - }); - await harness.execTool("TaskCreate", { - subject: "Acquiring plutonium", - description: "Desc", - done_criterion: "done", - progress_label: "Acquiring plutonium", - }); - await harness.execTool("TaskCreate", { - subject: "Install flux capacitor in DeLorean", - description: "Desc", - done_criterion: "done", - parentId: "1", - }); - await harness.execTool("TaskCreate", { - subject: "Test time travel at 88 mph", - description: "Desc", - done_criterion: "done", - }); +const tempDirs: string[] = []; - await harness.execTool("TaskUpdate", { taskId: "1", status: "completed" }); - await harness.execTool("TaskUpdate", { taskId: "2", status: "in_progress" }); - await harness.execTool("TaskUpdate", { taskId: "3", add_blocked_by: ["1"] }); - await harness.execTool("TaskUpdate", { taskId: "4", add_blocked_by: ["2", "3"] }); - - const result = await harness.execTool("TaskList", {}); - const text = result.content[0].text; - - expect(text).toContain("● 4 tasks (1 in progress, 3 open)"); - expect(text).toContain("◻ #1 Design the flux capacitor"); - expect(text).toContain("◼ #2 Acquiring plutonium"); - expect(text).toContain("◻ #3 Install flux capacitor in DeLorean › subtask of #1 › blocked by #1"); - expect(text).toContain("◻ #4 Test time travel at 88 mph › blocked by #2, #3"); - expect(text).not.toContain("[ACTIVE]"); - expect(text).not.toContain("[PENDING]"); - expect(text).not.toContain("[DONE"); - expect(text).not.toContain("🛠"); - expect(text).not.toContain("test:"); - }); - - it("shows completed subtasks without proof-lane clutter", async () => { - const harness = makeHarness(); - await harness.execTool("TaskCreate", { - subject: "Top-level goal", - description: "Desc", - done_criterion: "done", - }); - await harness.execTool("TaskCreate", { - subject: "Finished checklist item", - description: "Desc", - done_criterion: "done", - parentId: "1", - }); - - await harness.execTool("TaskUpdate", { taskId: "2", status: "completed" }); - - const result = await harness.execTool("TaskList", {}); - const text = result.content[0].text; - - expect(text).toContain("● 2 tasks (1 done, 1 open)"); - expect(text).toContain("✔ #2 Finished checklist item › subtask of #1"); - expect(text).not.toContain("[DONE"); - expect(text).not.toContain("🛠"); - }); +afterEach(() => { + delete process.env.PI_TASKS; + while (tempDirs.length > 0) + rmSync(tempDirs.pop()!, { recursive: true, force: true }); +}); + +describe("Task tools", () => { + it("renders a compact one-line-per-task summary", async () => { + const harness = makeHarness(); + await harness.execTool("TaskCreate", { + subject: "Design the flux capacitor", + description: "Desc", + done_criterion: "done", + }); + await harness.execTool("TaskCreate", { + subject: "Acquiring plutonium", + description: "Desc", + done_criterion: "done", + progress_label: "Acquiring plutonium", + }); + await harness.execTool("TaskCreate", { + subject: "Install flux capacitor in DeLorean", + description: "Desc", + done_criterion: "done", + parentId: "1", + }); + await harness.execTool("TaskCreate", { + subject: "Test time travel at 88 mph", + description: "Desc", + done_criterion: "done", + }); + + await harness.execTool("TaskUpdate", { taskId: "1", status: "completed" }); + await harness.execTool("TaskUpdate", { + taskId: "2", + status: "in_progress", + }); + await harness.execTool("TaskUpdate", { + taskId: "3", + add_blocked_by: ["1"], + }); + await harness.execTool("TaskUpdate", { + taskId: "4", + add_blocked_by: ["2", "3"], + }); + + const result = await harness.execTool("TaskList", {}); + const text = result.content[0].text; + + expect(text).toContain("● 4 goals (1 in progress, 3 open)"); + expect(text).toContain("◻ #1 Design the flux capacitor"); + expect(text).toContain("◼ #2 Acquiring plutonium"); + expect(text).toContain( + "◻ #3 Install flux capacitor in DeLorean › subtask of #1 › blocked by #1", + ); + expect(text).toContain( + "◻ #4 Test time travel at 88 mph › blocked by #2, #3", + ); + expect(text).not.toContain("[ACTIVE]"); + expect(text).not.toContain("[PENDING]"); + expect(text).not.toContain("[DONE"); + expect(text).not.toContain("proof claim submitted"); + expect(text).not.toContain("test:"); + }); + + it("shows TaskCreate output with metadata and compact previews", async () => { + const harness = makeHarness(); + const result = await harness.execTool("TaskCreate", { + subject: "Top-level goal", + description: "Line 1\nLine 2\nLine 3", + done_criterion: "observe line a\nobserve line b", + progress_label: "Running check", + metadata: { owner: "pi", note: "short" }, + }); + + const text = result.content[0].text; + expect(text).toContain("## TaskCreate -> Task #1: Top-level goal"); + expect(text).toContain("### Metadata"); + expect(text).toContain("- Metadata keys: 2"); + expect(text).toContain("### Done criterion"); + expect(text).toContain("### Description"); + expect(text).toContain("### Progress label"); + expect(text).toContain("### Metadata preview"); + }); + + it("shows TaskUpdate output with changed fields and previews", async () => { + const harness = makeHarness(); + await harness.execTool("TaskCreate", { + subject: "Top-level goal", + description: "Desc", + done_criterion: "done", + }); + + const result = await harness.execTool("TaskUpdate", { + taskId: "1", + status: "in_progress", + progress_label: "Running check", + metadata: { owner: "pi" }, + }); + + const text = result.content[0].text; + expect(text).toContain("## TaskUpdate -> Task #1: Top-level goal"); + expect(text).toContain( + "- Updated fields: status, progress_label, metadata", + ); + expect(text).toContain("- status: pending -> in_progress"); + expect(text).toContain("- progress_label: (missing) -> Running check"); + expect(text).toContain("### Metadata patch"); + }); + + it("shows completed subtasks without proof-lane clutter", async () => { + const harness = makeHarness(); + await harness.execTool("TaskCreate", { + subject: "Top-level goal", + description: "Desc", + done_criterion: "done", + }); + await harness.execTool("TaskCreate", { + subject: "Finished checklist item", + description: "Desc", + done_criterion: "done", + parentId: "1", + }); + + await harness.execTool("TaskUpdate", { taskId: "2", status: "completed" }); + + const result = await harness.execTool("TaskList", {}); + const text = result.content[0].text; + + expect(text).toContain("● 2 goals (1 done hidden, 1 open)"); + expect(text).toContain("◻ #1 Top-level goal"); + expect(text).not.toContain("#2 Finished checklist item"); + expect(text).not.toContain("[DONE"); + expect(text).not.toContain("proof claim submitted"); + }); + + it("keeps persisted completed tasks on startup but hides them from the collapsed list", async () => { + const dir = mkdtempSync(join(tmpdir(), "pi-proof-tasks-")); + tempDirs.push(dir); + const taskPath = join(dir, "tasks.json"); + process.env.PI_TASKS = taskPath; + + const seeded = new TaskStore(taskPath); + seeded.create("Finished work", "Desc", "done"); + seeded.complete("1"); + + const harness = makeHarness(); + await harness.trigger( + "before_agent_start", + {}, + { + ui: { setWidget() {}, setStatus() {} }, + sessionManager: { getSessionId: () => "session-test" }, + }, + ); + + const result = await harness.execTool("TaskList", {}); + expect(result.content[0].text).toContain("● 1 goals (1 done hidden)"); + expect(result.content[0].text).toContain( + "No open tasks. Completed tasks are hidden by default.", + ); + + const reloaded = new TaskStore(taskPath); + expect(reloaded.get("1")?.status).toBe("completed"); + }); + + it("keeps persisted completed tasks on startup even when one open goal remains", async () => { + const dir = mkdtempSync(join(tmpdir(), "pi-proof-tasks-")); + tempDirs.push(dir); + const taskPath = join(dir, "tasks.json"); + process.env.PI_TASKS = taskPath; + + const seeded = new TaskStore(taskPath); + seeded.create("Open goal", "Desc", "done"); + seeded.create("Finished work", "Desc", "done", undefined, undefined, "1"); + seeded.complete("2"); + + const harness = makeHarness(); + await harness.trigger( + "before_agent_start", + {}, + { + ui: { setWidget() {}, setStatus() {} }, + sessionManager: { getSessionId: () => "session-test" }, + }, + ); + + const result = await harness.execTool("TaskList", {}); + const text = result.content[0].text; + expect(text).toContain("● 2 goals (1 done hidden, 1 open)"); + expect(text).toContain("◻ #1 Open goal"); + expect(text).not.toContain("Finished work"); + + const reloaded = new TaskStore(taskPath); + expect(reloaded.get("2")?.status).toBe("completed"); + }); + + it("keeps completed tasks persisted by default across later turns", async () => { + const dir = mkdtempSync(join(tmpdir(), "pi-proof-tasks-")); + tempDirs.push(dir); + const taskPath = join(dir, "tasks.json"); + process.env.PI_TASKS = taskPath; + + const harness = makeHarness(); + await harness.execTool("TaskCreate", { + subject: "Persistent completed goal", + description: "Desc", + done_criterion: "done", + }); + await harness.execTool("TaskCreate", { + subject: "Checklist item", + description: "Desc", + done_criterion: "done", + parentId: "1", + }); + await harness.execTool("TaskUpdate", { taskId: "2", status: "completed" }); + + for (let turn = 0; turn < 8; turn++) { + await harness.trigger("turn_start", {}, { + ui: { setWidget() {}, setStatus() {} }, + sessionManager: { getSessionId: () => "session-test" }, + }); + } + + const reloaded = new TaskStore(taskPath); + expect(reloaded.get("2")?.status).toBe("completed"); + }); + + it("stores named PI_TASKS lists inside the repo .pi/tasks directory", async () => { + process.env.PI_TASKS = `named-${Date.now()}`; + const expectedPath = join( + process.cwd(), + ".pi", + "tasks", + `${process.env.PI_TASKS}.json`, + ); + try { + rmSync(expectedPath); + } catch {} + try { + rmSync(expectedPath + ".lock"); + } catch {} + try { + rmSync(expectedPath + ".tmp"); + } catch {} + + const harness = makeHarness(); + await harness.execTool("TaskCreate", { + subject: "Repo local task", + description: "Desc", + done_criterion: "done", + }); + + const reloaded = new TaskStore(expectedPath); + expect(reloaded.get("1")?.subject).toBe("Repo local task"); + + try { + rmSync(expectedPath); + } catch {} + try { + rmSync(expectedPath + ".lock"); + } catch {} + try { + rmSync(expectedPath + ".tmp"); + } catch {} + }); }); diff --git a/test/task-store.test.ts b/test/task-store.test.ts index dfac154..b2fac51 100644 --- a/test/task-store.test.ts +++ b/test/task-store.test.ts @@ -1,467 +1,530 @@ import { readFileSync, rmSync } from "node:fs"; -import { homedir, tmpdir } from "node:os"; +import { tmpdir } from "node:os"; import { join } from "node:path"; import { afterEach, beforeEach, describe, expect, it } from "vitest"; import { TaskStore } from "../src/task-store.js"; // Helper: create a subtask, which can be ticked off directly. function createSubtask(store: TaskStore, subject: string) { - const parent = store.create(`${subject} parent`, "Desc", "done criterion"); - return store.create(subject, "Desc", "done criterion", undefined, undefined, parent.id); + const parent = store.create(`${subject} parent`, "Desc", "done criterion"); + return store.create( + subject, + "Desc", + "done criterion", + undefined, + undefined, + parent.id, + ); } describe("TaskStore (in-memory)", () => { - let store: TaskStore; - - beforeEach(() => { - store = new TaskStore(); // no listId = in-memory - }); - - it("creates tasks with auto-incrementing IDs", () => { - const t1 = store.create("First task", "Description 1", "criterion 1"); - const t2 = store.create("Second task", "Description 2", "criterion 2"); - - expect(t1.id).toBe("1"); - expect(t2.id).toBe("2"); - expect(t1.status).toBe("pending"); - expect(t1.subject).toBe("First task"); - expect(t1.description).toBe("Description 1"); - expect(t1.done_criterion).toBe("criterion 1"); - }); - - it("creates tasks with optional fields", () => { - const t = store.create("Task", "Desc", "done criterion", "Running task", { key: "value" }); - - expect(t.progress_label).toBe("Running task"); - expect(t.metadata).toEqual({ key: "value" }); - }); - - it("gets a task by ID", () => { - store.create("Test", "Desc", "done"); - const task = store.get("1"); - - expect(task).toBeDefined(); - expect(task!.subject).toBe("Test"); - }); - - it("returns undefined for non-existent task", () => { - expect(store.get("999")).toBeUndefined(); - }); - - it("lists all tasks sorted by ID", () => { - store.create("Task 3", "Desc", "done"); - store.create("Task 1", "Desc", "done"); - store.create("Task 2", "Desc", "done"); - - const tasks = store.list(); - expect(tasks.map(t => t.id)).toEqual(["1", "2", "3"]); - }); - - it("updates task status", () => { - store.create("Test", "Desc", "done"); - const { task, changedFields } = store.update("1", { status: "in_progress" }); - - expect(task!.status).toBe("in_progress"); - expect(changedFields).toEqual(["status"]); - }); - - it("updates multiple fields at once", () => { - store.create("Test", "Desc", "done"); - const { changedFields } = store.update("1", { - subject: "Updated subject", - description: "Updated desc", - metadata: { owner: "agent-1" }, - }); - - expect(changedFields).toContain("subject"); - expect(changedFields).toContain("description"); - expect(changedFields).toContain("metadata"); - - const task = store.get("1")!; - expect(task.subject).toBe("Updated subject"); - expect(task.metadata.owner).toBe("agent-1"); - }); - - it("deletes a task with status: deleted", () => { - store.create("Test", "Desc", "done"); - const { changedFields } = store.update("1", { status: "deleted" }); - - expect(changedFields).toEqual(["deleted"]); - expect(store.get("1")).toBeUndefined(); - expect(store.list()).toHaveLength(0); - }); - - it("preserves ID counter after deletion", () => { - store.create("Task 1", "Desc", "done"); - store.create("Task 2", "Desc", "done"); - store.update("1", { status: "deleted" }); - - const t3 = store.create("Task 3", "Desc", "done"); - expect(t3.id).toBe("3"); // Not "1" — counter continues - }); - - it("merges metadata with null key deletion", () => { - store.create("Test", "Desc", "done", undefined, { a: 1, b: 2, c: 3 }); - store.update("1", { metadata: { b: null, d: 4 } }); - - const task = store.get("1")!; - expect(task.metadata).toEqual({ a: 1, c: 3, d: 4 }); - }); - - it("sets up bidirectional blocks via add_blocks", () => { - store.create("Blocker", "Desc", "done"); - store.create("Blocked", "Desc", "done"); - - store.update("1", { add_blocks: ["2"] }); - - const t1 = store.get("1")!; - const t2 = store.get("2")!; - expect(t1.blocks).toContain("2"); - expect(t2.blockedBy).toContain("1"); - }); - - it("sets up bidirectional blocks via add_blocked_by", () => { - store.create("Blocker", "Desc", "done"); - store.create("Blocked", "Desc", "done"); - - store.update("2", { add_blocked_by: ["1"] }); - - const t1 = store.get("1")!; - const t2 = store.get("2")!; - expect(t1.blocks).toContain("2"); - expect(t2.blockedBy).toContain("1"); - }); - - it("does not duplicate dependency edges", () => { - store.create("A", "Desc", "done"); - store.create("B", "Desc", "done"); - - store.update("1", { add_blocks: ["2"] }); - store.update("1", { add_blocks: ["2"] }); // duplicate - - const t1 = store.get("1")!; - expect(t1.blocks.filter(id => id === "2")).toHaveLength(1); - }); - - it("cleans up dependency edges on deletion", () => { - store.create("A", "Desc", "done"); - store.create("B", "Desc", "done"); - store.update("1", { add_blocks: ["2"] }); - - store.update("1", { status: "deleted" }); - - const t2 = store.get("2")!; - expect(t2.blockedBy).toEqual([]); - }); - - it("clears completed tasks", () => { - store.create("Completed", "Desc", "done"); - store.create("Pending", "Desc", "done"); - store.complete("1"); - - const count = store.clearCompleted(); - - expect(count).toBe(1); - expect(store.list()).toHaveLength(1); - expect(store.list()[0].id).toBe("2"); - }); - - it("allows TaskUpdate(status=completed) for subtasks", () => { - createSubtask(store, "Checklist item"); - const { task, changedFields } = store.update("2", { status: "completed" }); - expect(task!.status).toBe("completed"); - expect(changedFields).toContain("status"); - }); - - it("blocks TaskUpdate(status=completed) for top-level tasks", () => { - store.create("Goal", "Desc", "done"); - expect(() => store.update("1", { status: "completed" })).toThrow("Top-level task #1 requires proof"); - }); - - it("keeps top-level completion gated even after proof evidence exists", () => { - store.create("Escalated", "Desc", "done"); - store.update("1", { metadata: { lgtm_evidence: "literal output" } }); - expect(() => store.update("1", { status: "completed" })).toThrow("TaskClaimDone"); - }); - - it("rejects changing parentId after creation", () => { - store.create("Parent", "Desc", "done"); - store.create("Child", "Desc", "done"); - expect(() => store.update("2", { parentId: "1" })).toThrow("parentId is creation-only"); - }); - - it("returns not found for update on non-existent task", () => { - const { task, changedFields } = store.update("999", { status: "in_progress" }); - expect(task).toBeUndefined(); - expect(changedFields).toEqual([]); - }); - - it("complete() is the internal proof-review completion path", () => { - store.create("Test", "Desc", "done"); - const task = store.complete("1"); - expect(task.status).toBe("completed"); - }); - - it("complete() also works for subtasks", () => { - createSubtask(store, "Test"); - const task = store.complete("2"); - expect(task.status).toBe("completed"); - }); - - it("complete() throws on non-existent task", () => { - expect(() => store.complete("999")).toThrow("not found"); - }); - - it("delete method works", () => { - store.create("Test", "Desc", "done"); - expect(store.delete("1")).toBe(true); - expect(store.delete("1")).toBe(false); // already deleted - expect(store.list()).toHaveLength(0); - }); - - it("creates tasks with metadata via TaskCreate", () => { - const t = store.create("With meta", "Desc", "done", undefined, { pr: "123", reviewer: "alice" }); - expect(t.metadata).toEqual({ pr: "123", reviewer: "alice" }); - - const retrieved = store.get("1")!; - expect(retrieved.metadata).toEqual({ pr: "123", reviewer: "alice" }); - }); - - it("allows circular dependencies with warning", () => { - store.create("A", "Desc", "done"); - store.create("B", "Desc", "done"); - store.update("1", { add_blocks: ["2"] }); - const { warnings } = store.update("2", { add_blocks: ["1"] }); - - expect(store.get("1")!.blocks).toContain("2"); - expect(store.get("2")!.blocks).toContain("1"); - expect(warnings).toContain("cycle: #2 and #1 block each other"); - }); - - it("allows self-dependency with warning", () => { - store.create("Self", "Desc", "done"); - const { warnings } = store.update("1", { add_blocks: ["1"] }); - expect(store.get("1")!.blocks).toContain("1"); - expect(warnings).toContain("#1 blocks itself"); - }); - - it("stores dangling edge IDs with warning", () => { - store.create("Real", "Desc", "done"); - const { warnings } = store.update("1", { add_blocks: ["9999"] }); - expect(store.get("1")!.blocks).toContain("9999"); - expect(warnings).toContain("#9999 does not exist"); - }); - - it("returns no warnings for valid dependencies", () => { - store.create("A", "Desc", "done"); - store.create("B", "Desc", "done"); - const { warnings } = store.update("1", { add_blocks: ["2"] }); - expect(warnings).toEqual([]); - }); - - it("accepts whitespace-only subjects (matches Claude Code)", () => { - const t = store.create(" ", "Desc", "done"); - expect(t.subject).toBe(" "); - }); - - it("updates progress_label field", () => { - store.create("Test", "Desc", "done"); - const { changedFields } = store.update("1", { progress_label: "Running tests" }); - expect(changedFields).toContain("progress_label"); - expect(store.get("1")!.progress_label).toBe("Running tests"); - }); - - it("updates description field", () => { - store.create("Test", "Original desc", "done"); - const { changedFields } = store.update("1", { description: "Updated desc" }); - expect(changedFields).toContain("description"); - expect(store.get("1")!.description).toBe("Updated desc"); - }); - - it("updates done_criterion field", () => { - store.create("Test", "Desc", "original criterion"); - const { changedFields } = store.update("1", { done_criterion: "updated criterion" }); - expect(changedFields).toContain("done_criterion"); - expect(store.get("1")!.done_criterion).toBe("updated criterion"); - }); - - it("returns empty changedFields when updating non-existent task", () => { - const { task, changedFields, warnings } = store.update("999", { status: "in_progress" }); - expect(task).toBeUndefined(); - expect(changedFields).toEqual([]); - expect(warnings).toEqual([]); - }); - - it("clearCompleted cleans up dependency edges", () => { - store.create("Blocker", "Desc", "done"); - store.create("Blocked", "Desc", "done"); - store.update("1", { add_blocks: ["2"] }); - // complete() is the internal proof-review completion path. - store.complete("1"); - - store.clearCompleted(); - - const t2 = store.get("2")!; - expect(t2.blockedBy).toEqual([]); - }); - - it("handles multiple add_blocks in one call", () => { - store.create("Blocker", "Desc", "done"); - store.create("B1", "Desc", "done"); - store.create("B2", "Desc", "done"); - - store.update("1", { add_blocks: ["2", "3"] }); - - expect(store.get("1")!.blocks).toEqual(["2", "3"]); - expect(store.get("2")!.blockedBy).toContain("1"); - expect(store.get("3")!.blockedBy).toContain("1"); - }); - - it("add_blocked_by warns on self-dependency", () => { - store.create("Self", "Desc", "done"); - const { warnings } = store.update("1", { add_blocked_by: ["1"] }); - expect(store.get("1")!.blockedBy).toContain("1"); - expect(warnings).toContain("#1 blocks itself"); - }); - - it("add_blocked_by warns on dangling ref", () => { - store.create("Real", "Desc", "done"); - const { warnings } = store.update("1", { add_blocked_by: ["9999"] }); - expect(store.get("1")!.blockedBy).toContain("9999"); - expect(warnings).toContain("#9999 does not exist"); - }); - - it("add_blocked_by warns on cycle", () => { - store.create("A", "Desc", "done"); - store.create("B", "Desc", "done"); - store.update("1", { add_blocks: ["2"] }); - const { warnings } = store.update("1", { add_blocked_by: ["2"] }); - expect(warnings).toContain("cycle: #1 and #2 block each other"); - }); - - it("clearCompleted returns 0 when no completed tasks", () => { - store.create("Pending", "Desc", "done"); - expect(store.clearCompleted()).toBe(0); - }); - - it("list sorts pending → in_progress → completed with all three present", () => { - store.create("Pending task", "Desc", "done"); - store.create("Completed task", "Desc", "done"); - store.create("In-progress task", "Desc", "done"); - store.create("Another pending", "Desc", "done"); - - store.complete("2"); - store.update("3", { status: "in_progress" }); - - const tasks = store.list(); - const statusOrder: Record = { pending: 0, in_progress: 1, completed: 2 }; - const sorted = [...tasks].sort((a, b) => { - const so = (statusOrder[a.status] ?? 0) - (statusOrder[b.status] ?? 0); - if (so !== 0) return so; - return Number(a.id) - Number(b.id); - }); - - expect(sorted.map(t => t.id)).toEqual(["1", "4", "3", "2"]); - expect(sorted.map(t => t.status)).toEqual(["pending", "pending", "in_progress", "completed"]); - }); + let store: TaskStore; + + beforeEach(() => { + store = new TaskStore(); // no listId = in-memory + }); + + it("creates tasks with auto-incrementing IDs", () => { + const t1 = store.create("First task", "Description 1", "criterion 1"); + const t2 = store.create("Second task", "Description 2", "criterion 2"); + + expect(t1.id).toBe("1"); + expect(t2.id).toBe("2"); + expect(t1.status).toBe("pending"); + expect(t1.subject).toBe("First task"); + expect(t1.description).toBe("Description 1"); + expect(t1.done_criterion).toBe("criterion 1"); + }); + + it("creates tasks with optional fields", () => { + const t = store.create("Task", "Desc", "done criterion", "Running task", { + key: "value", + }); + + expect(t.progress_label).toBe("Running task"); + expect(t.metadata).toEqual({ key: "value" }); + }); + + it("gets a task by ID", () => { + store.create("Test", "Desc", "done"); + const task = store.get("1"); + + expect(task).toBeDefined(); + expect(task!.subject).toBe("Test"); + }); + + it("returns undefined for non-existent task", () => { + expect(store.get("999")).toBeUndefined(); + }); + + it("lists all tasks sorted by ID", () => { + store.create("Task 3", "Desc", "done"); + store.create("Task 1", "Desc", "done"); + store.create("Task 2", "Desc", "done"); + + const tasks = store.list(); + expect(tasks.map((t) => t.id)).toEqual(["1", "2", "3"]); + }); + + it("updates task status", () => { + store.create("Test", "Desc", "done"); + const { task, changedFields } = store.update("1", { + status: "in_progress", + }); + + expect(task!.status).toBe("in_progress"); + expect(changedFields).toEqual(["status"]); + }); + + it("updates multiple fields at once", () => { + store.create("Test", "Desc", "done"); + const { changedFields } = store.update("1", { + subject: "Updated subject", + description: "Updated desc", + metadata: { owner: "agent-1" }, + }); + + expect(changedFields).toContain("subject"); + expect(changedFields).toContain("description"); + expect(changedFields).toContain("metadata"); + + const task = store.get("1")!; + expect(task.subject).toBe("Updated subject"); + expect(task.metadata.owner).toBe("agent-1"); + }); + + it("deletes a task with status: deleted", () => { + store.create("Test", "Desc", "done"); + const { changedFields } = store.update("1", { status: "deleted" }); + + expect(changedFields).toEqual(["deleted"]); + expect(store.get("1")).toBeUndefined(); + expect(store.list()).toHaveLength(0); + }); + + it("preserves ID counter after deletion", () => { + store.create("Task 1", "Desc", "done"); + store.create("Task 2", "Desc", "done"); + store.update("1", { status: "deleted" }); + + const t3 = store.create("Task 3", "Desc", "done"); + expect(t3.id).toBe("3"); // Not "1" — counter continues + }); + + it("merges metadata with null key deletion", () => { + store.create("Test", "Desc", "done", undefined, { a: 1, b: 2, c: 3 }); + store.update("1", { metadata: { b: null, d: 4 } }); + + const task = store.get("1")!; + expect(task.metadata).toEqual({ a: 1, c: 3, d: 4 }); + }); + + it("sets up bidirectional blocks via add_blocks", () => { + store.create("Blocker", "Desc", "done"); + store.create("Blocked", "Desc", "done"); + + store.update("1", { add_blocks: ["2"] }); + + const t1 = store.get("1")!; + const t2 = store.get("2")!; + expect(t1.blocks).toContain("2"); + expect(t2.blockedBy).toContain("1"); + }); + + it("sets up bidirectional blocks via add_blocked_by", () => { + store.create("Blocker", "Desc", "done"); + store.create("Blocked", "Desc", "done"); + + store.update("2", { add_blocked_by: ["1"] }); + + const t1 = store.get("1")!; + const t2 = store.get("2")!; + expect(t1.blocks).toContain("2"); + expect(t2.blockedBy).toContain("1"); + }); + + it("does not duplicate dependency edges", () => { + store.create("A", "Desc", "done"); + store.create("B", "Desc", "done"); + + store.update("1", { add_blocks: ["2"] }); + store.update("1", { add_blocks: ["2"] }); // duplicate + + const t1 = store.get("1")!; + expect(t1.blocks.filter((id) => id === "2")).toHaveLength(1); + }); + + it("cleans up dependency edges on deletion", () => { + store.create("A", "Desc", "done"); + store.create("B", "Desc", "done"); + store.update("1", { add_blocks: ["2"] }); + + store.update("1", { status: "deleted" }); + + const t2 = store.get("2")!; + expect(t2.blockedBy).toEqual([]); + }); + + it("clears completed tasks", () => { + store.create("Completed", "Desc", "done"); + store.create("Pending", "Desc", "done"); + store.complete("1"); + + const count = store.clearCompleted(); + + expect(count).toBe(1); + expect(store.list()).toHaveLength(1); + expect(store.list()[0].id).toBe("2"); + }); + + it("allows TaskUpdate(status=completed) for subtasks", () => { + createSubtask(store, "Checklist item"); + const { task, changedFields } = store.update("2", { status: "completed" }); + expect(task!.status).toBe("completed"); + expect(changedFields).toContain("status"); + }); + + it("blocks TaskUpdate(status=completed) for top-level tasks", () => { + store.create("Goal", "Desc", "done"); + expect(() => store.update("1", { status: "completed" })).toThrow( + "Top-level task #1 requires proof", + ); + }); + + it("keeps top-level completion gated even after proof evidence exists", () => { + store.create("Escalated", "Desc", "done"); + store.update("1", { metadata: { lgtm_evidence: "literal output" } }); + expect(() => store.update("1", { status: "completed" })).toThrow( + "TaskClaimDone", + ); + }); + + it("rejects changing parentId after creation", () => { + store.create("Parent", "Desc", "done"); + store.create("Child", "Desc", "done"); + expect(() => store.update("2", { parentId: "1" })).toThrow( + "parentId is creation-only", + ); + }); + + it("returns not found for update on non-existent task", () => { + const { task, changedFields } = store.update("999", { + status: "in_progress", + }); + expect(task).toBeUndefined(); + expect(changedFields).toEqual([]); + }); + + it("complete() is the internal proof-review completion path", () => { + store.create("Test", "Desc", "done"); + const task = store.complete("1"); + expect(task.status).toBe("completed"); + }); + + it("complete() also works for subtasks", () => { + createSubtask(store, "Test"); + const task = store.complete("2"); + expect(task.status).toBe("completed"); + }); + + it("complete() throws on non-existent task", () => { + expect(() => store.complete("999")).toThrow("not found"); + }); + + it("delete method works", () => { + store.create("Test", "Desc", "done"); + expect(store.delete("1")).toBe(true); + expect(store.delete("1")).toBe(false); // already deleted + expect(store.list()).toHaveLength(0); + }); + + it("creates tasks with metadata via TaskCreate", () => { + const t = store.create("With meta", "Desc", "done", undefined, { + pr: "123", + reviewer: "alice", + }); + expect(t.metadata).toEqual({ pr: "123", reviewer: "alice" }); + + const retrieved = store.get("1")!; + expect(retrieved.metadata).toEqual({ pr: "123", reviewer: "alice" }); + }); + + it("allows circular dependencies with warning", () => { + store.create("A", "Desc", "done"); + store.create("B", "Desc", "done"); + store.update("1", { add_blocks: ["2"] }); + const { warnings } = store.update("2", { add_blocks: ["1"] }); + + expect(store.get("1")!.blocks).toContain("2"); + expect(store.get("2")!.blocks).toContain("1"); + expect(warnings).toContain("cycle: #2 and #1 block each other"); + }); + + it("allows self-dependency with warning", () => { + store.create("Self", "Desc", "done"); + const { warnings } = store.update("1", { add_blocks: ["1"] }); + expect(store.get("1")!.blocks).toContain("1"); + expect(warnings).toContain("#1 blocks itself"); + }); + + it("stores dangling edge IDs with warning", () => { + store.create("Real", "Desc", "done"); + const { warnings } = store.update("1", { add_blocks: ["9999"] }); + expect(store.get("1")!.blocks).toContain("9999"); + expect(warnings).toContain("#9999 does not exist"); + }); + + it("returns no warnings for valid dependencies", () => { + store.create("A", "Desc", "done"); + store.create("B", "Desc", "done"); + const { warnings } = store.update("1", { add_blocks: ["2"] }); + expect(warnings).toEqual([]); + }); + + it("accepts whitespace-only subjects (matches Claude Code)", () => { + const t = store.create(" ", "Desc", "done"); + expect(t.subject).toBe(" "); + }); + + it("updates progress_label field", () => { + store.create("Test", "Desc", "done"); + const { changedFields } = store.update("1", { + progress_label: "Running tests", + }); + expect(changedFields).toContain("progress_label"); + expect(store.get("1")!.progress_label).toBe("Running tests"); + }); + + it("updates description field", () => { + store.create("Test", "Original desc", "done"); + const { changedFields } = store.update("1", { + description: "Updated desc", + }); + expect(changedFields).toContain("description"); + expect(store.get("1")!.description).toBe("Updated desc"); + }); + + it("updates done_criterion field", () => { + store.create("Test", "Desc", "original criterion"); + const { changedFields } = store.update("1", { + done_criterion: "updated criterion", + }); + expect(changedFields).toContain("done_criterion"); + expect(store.get("1")!.done_criterion).toBe("updated criterion"); + }); + + it("returns empty changedFields when updating non-existent task", () => { + const { task, changedFields, warnings } = store.update("999", { + status: "in_progress", + }); + expect(task).toBeUndefined(); + expect(changedFields).toEqual([]); + expect(warnings).toEqual([]); + }); + + it("clearCompleted cleans up dependency edges", () => { + store.create("Blocker", "Desc", "done"); + store.create("Blocked", "Desc", "done"); + store.update("1", { add_blocks: ["2"] }); + // complete() is the internal proof-review completion path. + store.complete("1"); + + store.clearCompleted(); + + const t2 = store.get("2")!; + expect(t2.blockedBy).toEqual([]); + }); + + it("handles multiple add_blocks in one call", () => { + store.create("Blocker", "Desc", "done"); + store.create("B1", "Desc", "done"); + store.create("B2", "Desc", "done"); + + store.update("1", { add_blocks: ["2", "3"] }); + + expect(store.get("1")!.blocks).toEqual(["2", "3"]); + expect(store.get("2")!.blockedBy).toContain("1"); + expect(store.get("3")!.blockedBy).toContain("1"); + }); + + it("add_blocked_by warns on self-dependency", () => { + store.create("Self", "Desc", "done"); + const { warnings } = store.update("1", { add_blocked_by: ["1"] }); + expect(store.get("1")!.blockedBy).toContain("1"); + expect(warnings).toContain("#1 blocks itself"); + }); + + it("add_blocked_by warns on dangling ref", () => { + store.create("Real", "Desc", "done"); + const { warnings } = store.update("1", { add_blocked_by: ["9999"] }); + expect(store.get("1")!.blockedBy).toContain("9999"); + expect(warnings).toContain("#9999 does not exist"); + }); + + it("add_blocked_by warns on cycle", () => { + store.create("A", "Desc", "done"); + store.create("B", "Desc", "done"); + store.update("1", { add_blocks: ["2"] }); + const { warnings } = store.update("1", { add_blocked_by: ["2"] }); + expect(warnings).toContain("cycle: #1 and #2 block each other"); + }); + + it("clearCompleted returns 0 when no completed tasks", () => { + store.create("Pending", "Desc", "done"); + expect(store.clearCompleted()).toBe(0); + }); + + it("list sorts pending → in_progress → completed with all three present", () => { + store.create("Pending task", "Desc", "done"); + store.create("Completed task", "Desc", "done"); + store.create("In-progress task", "Desc", "done"); + store.create("Another pending", "Desc", "done"); + + store.complete("2"); + store.update("3", { status: "in_progress" }); + + const tasks = store.list(); + const statusOrder: Record = { + pending: 0, + in_progress: 1, + completed: 2, + }; + const sorted = [...tasks].sort((a, b) => { + const so = (statusOrder[a.status] ?? 0) - (statusOrder[b.status] ?? 0); + if (so !== 0) return so; + return Number(a.id) - Number(b.id); + }); + + expect(sorted.map((t) => t.id)).toEqual(["1", "4", "3", "2"]); + expect(sorted.map((t) => t.status)).toEqual([ + "pending", + "pending", + "in_progress", + "completed", + ]); + }); }); describe("TaskStore (file-backed)", () => { - const testListId = `test-${Date.now()}-${Math.random().toString(36).slice(2)}`; - const tasksDir = join(homedir(), ".pi", "tasks"); - const filePath = join(tasksDir, `${testListId}.json`); + const testListId = `test-${Date.now()}-${Math.random().toString(36).slice(2)}`; + const tasksDir = join(process.cwd(), ".pi", "tasks"); + const filePath = join(tasksDir, `${testListId}.json`); - afterEach(() => { - try { rmSync(filePath); } catch { /* */ } - try { rmSync(filePath + ".lock"); } catch { /* */ } - try { rmSync(filePath + ".tmp"); } catch { /* */ } - }); + afterEach(() => { + try { + rmSync(filePath); + } catch { + /* */ + } + try { + rmSync(filePath + ".lock"); + } catch { + /* */ + } + try { + rmSync(filePath + ".tmp"); + } catch { + /* */ + } + }); - it("persists tasks to disk", () => { - const store1 = new TaskStore(testListId); - store1.create("Persistent task", "Should survive reload", "done"); + it("persists tasks to disk", () => { + const store1 = new TaskStore(testListId); + store1.create("Persistent task", "Should survive reload", "done"); - const store2 = new TaskStore(testListId); - const tasks = store2.list(); + const store2 = new TaskStore(testListId); + const tasks = store2.list(); - expect(tasks).toHaveLength(1); - expect(tasks[0].subject).toBe("Persistent task"); - }); + expect(tasks).toHaveLength(1); + expect(tasks[0].subject).toBe("Persistent task"); + }); - it("persists in_progress updates to disk", () => { - const store1 = new TaskStore(testListId); - store1.create("Task", "Desc", "done"); - store1.update("1", { status: "in_progress" }); + it("persists in_progress updates to disk", () => { + const store1 = new TaskStore(testListId); + store1.create("Task", "Desc", "done"); + store1.update("1", { status: "in_progress" }); - const store2 = new TaskStore(testListId); - expect(store2.get("1")!.status).toBe("in_progress"); - }); + const store2 = new TaskStore(testListId); + expect(store2.get("1")!.status).toBe("in_progress"); + }); - it("persists completed tasks to disk", () => { - const store1 = new TaskStore(testListId); - store1.create("Done task", "Desc", "done"); - store1.create("Pending task", "Desc", "done"); - store1.complete("1"); + it("persists completed tasks to disk", () => { + const store1 = new TaskStore(testListId); + store1.create("Done task", "Desc", "done"); + store1.create("Pending task", "Desc", "done"); + store1.complete("1"); - const store2 = new TaskStore(testListId); - expect(store2.get("1")).toBeDefined(); - expect(store2.get("1")!.status).toBe("completed"); - expect(store2.get("2")).toBeDefined(); - expect(store2.list()).toHaveLength(2); - }); + const store2 = new TaskStore(testListId); + expect(store2.get("1")).toBeDefined(); + expect(store2.get("1")!.status).toBe("completed"); + expect(store2.get("2")).toBeDefined(); + expect(store2.list()).toHaveLength(2); + }); - it("restores all tasks across instances", () => { - const store1 = new TaskStore(testListId); - store1.create("Pending", "Desc", "done"); - store1.create("In progress", "Desc", "done"); - store1.create("Done", "Desc", "done"); - store1.update("2", { status: "in_progress" }); - store1.complete("3"); + it("restores all tasks across instances", () => { + const store1 = new TaskStore(testListId); + store1.create("Pending", "Desc", "done"); + store1.create("In progress", "Desc", "done"); + store1.create("Done", "Desc", "done"); + store1.update("2", { status: "in_progress" }); + store1.complete("3"); - const store2 = new TaskStore(testListId); - const tasks = store2.list(); - expect(tasks).toHaveLength(3); - expect(tasks.map(t => t.id)).toContain("1"); - expect(tasks.map(t => t.id)).toContain("2"); - expect(tasks.map(t => t.id)).toContain("3"); - }); + const store2 = new TaskStore(testListId); + const tasks = store2.list(); + expect(tasks).toHaveLength(3); + expect(tasks.map((t) => t.id)).toContain("1"); + expect(tasks.map((t) => t.id)).toContain("2"); + expect(tasks.map((t) => t.id)).toContain("3"); + }); - it("persists ID counter across instances", () => { - const store1 = new TaskStore(testListId); - store1.create("Task 1", "Desc", "done"); - store1.create("Task 2", "Desc", "done"); + it("persists ID counter across instances", () => { + const store1 = new TaskStore(testListId); + store1.create("Task 1", "Desc", "done"); + store1.create("Task 2", "Desc", "done"); - const store2 = new TaskStore(testListId); - const t3 = store2.create("Task 3", "Desc", "done"); - expect(t3.id).toBe("3"); - }); + const store2 = new TaskStore(testListId); + const t3 = store2.create("Task 3", "Desc", "done"); + expect(t3.id).toBe("3"); + }); }); describe("TaskStore (absolute path)", () => { - const absFilePath = join(tmpdir(), `pi-tasks-test-${Date.now()}.json`); + const absFilePath = join(tmpdir(), `pi-tasks-test-${Date.now()}.json`); - afterEach(() => { - try { rmSync(absFilePath); } catch { /* */ } - try { rmSync(absFilePath + ".lock"); } catch { /* */ } - try { rmSync(absFilePath + ".tmp"); } catch { /* */ } - }); + afterEach(() => { + try { + rmSync(absFilePath); + } catch { + /* */ + } + try { + rmSync(absFilePath + ".lock"); + } catch { + /* */ + } + try { + rmSync(absFilePath + ".tmp"); + } catch { + /* */ + } + }); - it("accepts absolute path and persists tasks", () => { - const store1 = new TaskStore(absFilePath); - store1.create("Abs path task", "Desc", "done"); + it("accepts absolute path and persists tasks", () => { + const store1 = new TaskStore(absFilePath); + store1.create("Abs path task", "Desc", "done"); - const store2 = new TaskStore(absFilePath); - expect(store2.list()).toHaveLength(1); - expect(store2.list()[0].subject).toBe("Abs path task"); - }); + const store2 = new TaskStore(absFilePath); + expect(store2.list()).toHaveLength(1); + expect(store2.list()[0].subject).toBe("Abs path task"); + }); - it("persists completed tasks when using absolute path", () => { - const store1 = new TaskStore(absFilePath); - store1.create("Pending", "Desc", "done"); - store1.create("Completed", "Desc", "done"); - store1.complete("2"); + it("persists completed tasks when using absolute path", () => { + const store1 = new TaskStore(absFilePath); + store1.create("Pending", "Desc", "done"); + store1.create("Completed", "Desc", "done"); + store1.complete("2"); - const raw = JSON.parse(readFileSync(absFilePath, "utf-8")); - expect(raw.tasks).toHaveLength(2); - }); + const raw = JSON.parse(readFileSync(absFilePath, "utf-8")); + expect(raw.tasks).toHaveLength(2); + }); }); diff --git a/test/task-widget.test.ts b/test/task-widget.test.ts index a687b1d..d1156b8 100644 --- a/test/task-widget.test.ts +++ b/test/task-widget.test.ts @@ -4,425 +4,428 @@ import { TaskWidget, type Theme, type UICtx } from "../src/ui/task-widget.js"; /** Create a mock theme that returns raw text (no ANSI escapes). */ function mockTheme(): Theme { - return { - fg: (_color: string, text: string) => text, - bold: (text: string) => text, - strikethrough: (text: string) => `~~${text}~~`, - }; + return { + fg: (_color: string, text: string) => text, + bold: (text: string) => text, + strikethrough: (text: string) => `~~${text}~~`, + }; } /** Create a mock UICtx that captures setWidget calls. */ function mockUICtx() { - const state: { - widgets: Map; - statuses: Map; - } = { - widgets: new Map(), - statuses: new Map(), - }; + const state: { + widgets: Map; + statuses: Map; + } = { + widgets: new Map(), + statuses: new Map(), + }; - const ctx: UICtx = { - setWidget(key, content, options) { - state.widgets.set(key, { content, options }); - }, - setStatus(key, text) { - state.statuses.set(key, text); - }, - }; + const ctx: UICtx = { + setWidget(key, content, options) { + state.widgets.set(key, { content, options }); + }, + setStatus(key, text) { + state.statuses.set(key, text); + }, + }; - return { ctx, state }; + return { ctx, state }; } /** Render the widget and return its lines. */ function renderWidget(state: ReturnType["state"]): string[] { - const entry = state.widgets.get("tasks"); - if (!entry?.content) return []; - const theme = mockTheme(); - const tui = { terminal: { columns: 200 }, requestRender() {} }; - const result = entry.content(tui, theme); - return result.render(); + const entry = state.widgets.get("tasks"); + if (!entry?.content) return []; + const theme = mockTheme(); + const tui = { terminal: { columns: 200 }, requestRender() {} }; + const result = entry.content(tui, theme); + return result.render(); } describe("TaskWidget", () => { - let store: TaskStore; - let widget: TaskWidget; - let ui: ReturnType; + let store: TaskStore; + let widget: TaskWidget; + let ui: ReturnType; - beforeEach(() => { - vi.useFakeTimers(); - store = new TaskStore(); - widget = new TaskWidget(store); - ui = mockUICtx(); - widget.setUICtx(ui.ctx); - }); + beforeEach(() => { + vi.useFakeTimers(); + store = new TaskStore(); + widget = new TaskWidget(store); + ui = mockUICtx(); + widget.setUICtx(ui.ctx); + }); - afterEach(() => { - widget.dispose(); - vi.useRealTimers(); - }); + afterEach(() => { + widget.dispose(); + vi.useRealTimers(); + }); - it("shows nothing when no tasks exist", () => { - widget.update(); - const entry = ui.state.widgets.get("tasks"); - expect(entry?.content).toBeUndefined(); - }); + it("shows nothing when no tasks exist", () => { + widget.update(); + const entry = ui.state.widgets.get("tasks"); + expect(entry?.content).toBeUndefined(); + }); - it("renders pending tasks with ◻ icon", () => { - store.create("Do something", "Desc", "done"); - widget.update(); + it("renders pending tasks with ◻ icon", () => { + store.create("Do something", "Desc", "done"); + widget.update(); - const lines = renderWidget(ui.state); - expect(lines).toHaveLength(2); // header + 1 task - expect(lines[0]).toContain("1 tasks"); - expect(lines[0]).toContain("1 open"); - expect(lines[1]).toContain("◻"); - expect(lines[1]).toContain("Do something"); - expect(lines[1]).not.toContain("done"); - }); + const lines = renderWidget(ui.state); + expect(lines).toHaveLength(2); // header + 1 task + expect(lines[0]).toContain("1 goals"); + expect(lines[0]).toContain("1 open"); + expect(lines[1]).toContain("◻"); + expect(lines[1]).toContain("Do something"); + expect(lines[1]).not.toContain("done"); + }); - it("renders in-progress tasks with ◼ icon", () => { - store.create("Working on it", "Desc", "done"); - store.update("1", { status: "in_progress" }); - widget.update(); + it("renders in-progress tasks with ◼ icon", () => { + store.create("Working on it", "Desc", "done"); + store.update("1", { status: "in_progress" }); + widget.update(); - const lines = renderWidget(ui.state); - expect(lines[1]).toContain("◼"); - expect(lines[1]).toContain("Working on it"); - }); + const lines = renderWidget(ui.state); + expect(lines[1]).toContain("◼"); + expect(lines[1]).toContain("Working on it"); + }); - it("renders completed tasks with ✔ icon and strikethrough", () => { - store.create("Done task", "Desc", "done"); - store.complete("1"); - widget.update(); + it("hides the widget when only completed tasks remain", () => { + store.create("Done task", "Desc", "done"); + store.complete("1"); + widget.update(); - const lines = renderWidget(ui.state); - expect(lines[1]).toContain("✔"); - expect(lines[1]).toContain("~~#1 Done task~~"); - }); + const lines = renderWidget(ui.state); + expect(lines).toEqual([]); + }); - it("does not render proof badges on collapsed rows", () => { - store.create("Done task", "Desc", "done"); - store.update("1", { - metadata: { robot_review_observations: ["Observed output drift on seed 2"], lgtm_evidence: "verbatim output" }, - }); - store.complete("1"); - widget.update(); + it("does not render proof badges on collapsed rows", () => { + store.create("Open task", "Desc", "done"); + store.create("Done task", "Desc", "done"); + store.update("2", { + metadata: { + robot_review_observations: ["Observed output drift on seed 2"], + lgtm_evidence: "verbatim output", + }, + }); + store.complete("2"); + widget.update(); - const lines = renderWidget(ui.state); - expect(lines[1]).not.toContain("["); - expect(lines[1]).not.toContain("🛠"); - expect(lines[1]).not.toContain("🤖"); - }); + const lines = renderWidget(ui.state); + expect(lines[1]).toContain("Open task"); + expect(lines[1]).not.toContain("["); + expect(lines[1]).not.toContain("robot_review_observations"); + expect(lines[1]).not.toContain("lgtm_evidence"); + }); - it("renders active tasks with spinner icon", () => { - store.create("Running thing", "Desc", "done criterion", "Processing data"); - store.update("1", { status: "in_progress" }); - widget.setActiveTask("1", true); + it("renders active tasks with spinner icon", () => { + store.create("Running thing", "Desc", "done criterion", "Processing data"); + store.update("1", { status: "in_progress" }); + widget.setActiveTask("1", true); - const lines = renderWidget(ui.state); - // Should show activeForm text with "…" suffix - expect(lines[1]).toContain("Processing data…"); - // Should NOT show ◼ for active task - expect(lines[1]).not.toContain("◼"); - }); + const lines = renderWidget(ui.state); + // Should show activeForm text with "…" suffix + expect(lines[1]).toContain("Processing data…"); + // Should NOT show ◼ for active task + expect(lines[1]).not.toContain("◼"); + }); - it("shows blocked-by info for pending tasks", () => { - store.create("Blocker", "Desc", "done"); - store.create("Blocked", "Desc", "done"); - store.update("2", { add_blocked_by: ["1"] }); - widget.update(); + it("shows blocked-by info for pending tasks", () => { + store.create("Blocker", "Desc", "done"); + store.create("Blocked", "Desc", "done"); + store.update("2", { add_blocked_by: ["1"] }); + widget.update(); - const lines = renderWidget(ui.state); - const blockedLine = lines.find(l => l.includes("Blocked")); - // blocked-by suffix is only added via dim theme helper, which in mock is identity - // So we should see the raw text. Check for the relevant subject line having blocked-by info - expect(blockedLine).toContain("blocked by #1"); - }); + const lines = renderWidget(ui.state); + const blockedLine = lines.find((l) => l.includes("Blocked")); + // blocked-by suffix is only added via dim theme helper, which in mock is identity + // So we should see the raw text. Check for the relevant subject line having blocked-by info + expect(blockedLine).toContain("blocked by #1"); + }); - it("hides completed blockers in blocked-by suffix", () => { - store.create("Blocker", "Desc", "done"); - store.create("Blocked", "Desc", "done"); - store.update("2", { add_blocked_by: ["1"] }); - store.complete("1"); - widget.update(); + it("hides completed blockers in blocked-by suffix", () => { + store.create("Blocker", "Desc", "done"); + store.create("Blocked", "Desc", "done"); + store.update("2", { add_blocked_by: ["1"] }); + store.complete("1"); + widget.update(); - const lines = renderWidget(ui.state); - const blockedLine = lines.find(l => l.includes("Blocked")); - expect(blockedLine).not.toContain("blocked by"); - }); + const lines = renderWidget(ui.state); + const blockedLine = lines.find((l) => l.includes("Blocked")); + expect(blockedLine).not.toContain("blocked by"); + }); - it("shows status summary in header", () => { - store.create("Task A", "Desc", "done"); - store.create("Task B", "Desc", "done"); - store.create("Task C", "Desc", "done"); - store.complete("1"); - store.update("2", { status: "in_progress" }); - widget.update(); + it("shows status summary in header", () => { + store.create("Task A", "Desc", "done"); + store.create("Task B", "Desc", "done"); + store.create("Task C", "Desc", "done"); + store.complete("1"); + store.update("2", { status: "in_progress" }); + widget.update(); - const lines = renderWidget(ui.state); - expect(lines[0]).toContain("3 tasks"); - expect(lines[0]).toContain("1 done"); - expect(lines[0]).toContain("1 in progress"); - expect(lines[0]).toContain("1 open"); - }); + const lines = renderWidget(ui.state); + expect(lines[0]).toContain("3 goals"); + expect(lines[0]).toContain("1 done hidden"); + expect(lines[0]).toContain("1 in progress"); + expect(lines[0]).toContain("1 open"); + }); - it("clears widget when all tasks are deleted", () => { - store.create("Task", "Desc", "done"); - widget.update(); - expect(ui.state.widgets.get("tasks")?.content).toBeDefined(); + it("clears widget when all tasks are deleted", () => { + store.create("Task", "Desc", "done"); + widget.update(); + expect(ui.state.widgets.get("tasks")?.content).toBeDefined(); - store.update("1", { status: "deleted" }); - widget.update(); - expect(ui.state.widgets.get("tasks")?.content).toBeUndefined(); - }); + store.update("1", { status: "deleted" }); + widget.update(); + expect(ui.state.widgets.get("tasks")?.content).toBeUndefined(); + }); - it("limits visible tasks to MAX_VISIBLE_TASKS", () => { - for (let i = 0; i < 15; i++) { - store.create(`Task ${i + 1}`, "Desc", "done"); - } - widget.update(); + it("limits visible tasks to MAX_VISIBLE_TASKS", () => { + for (let i = 0; i < 15; i++) { + store.create(`Task ${i + 1}`, "Desc", "done"); + } + widget.update(); - const lines = renderWidget(ui.state); - // header + 5 visible tasks + "...and 10 more" - expect(lines).toHaveLength(7); - expect(lines[6]).toContain("10 more"); - }); + const lines = renderWidget(ui.state); + // header + 5 visible tasks + "...and 10 more open" + expect(lines).toHaveLength(7); + expect(lines[6]).toContain("10 more open"); + }); - it("tracks token usage for active tasks", () => { - store.create("Active task", "Desc", "done criterion", "Running"); - store.update("1", { status: "in_progress" }); - widget.setActiveTask("1", true); + it("tracks token usage for active tasks", () => { + store.create("Active task", "Desc", "done criterion", "Running"); + store.update("1", { status: "in_progress" }); + widget.setActiveTask("1", true); - widget.addTokenUsage(1000, 500); - widget.addTokenUsage(500, 300); + widget.addTokenUsage(1000, 500); + widget.addTokenUsage(500, 300); - const lines = renderWidget(ui.state); - const activeLine = lines.find(l => l.includes("Running…")); - expect(activeLine).toContain("↑ 1.5k"); - expect(activeLine).toContain("↓ 800"); - }); + const lines = renderWidget(ui.state); + const activeLine = lines.find((l) => l.includes("Running…")); + expect(activeLine).toContain("↑ 1.5k"); + expect(activeLine).toContain("↓ 800"); + }); - it("deactivates a task with setActiveTask(id, false)", () => { - store.create("Task", "Desc", "done criterion", "Doing work"); - store.update("1", { status: "in_progress" }); - widget.setActiveTask("1", true); + it("deactivates a task with setActiveTask(id, false)", () => { + store.create("Task", "Desc", "done criterion", "Doing work"); + store.update("1", { status: "in_progress" }); + widget.setActiveTask("1", true); - // Should be active (spinner) - let lines = renderWidget(ui.state); - expect(lines[1]).toContain("Doing work…"); + // Should be active (spinner) + let lines = renderWidget(ui.state); + expect(lines[1]).toContain("Doing work…"); - widget.setActiveTask("1", false); - lines = renderWidget(ui.state); - // Should now show as regular in_progress (◼) - expect(lines[1]).toContain("◼"); - expect(lines[1]).not.toContain("Doing work…"); - }); + widget.setActiveTask("1", false); + lines = renderWidget(ui.state); + // Should now show as regular in_progress (◼) + expect(lines[1]).toContain("◼"); + expect(lines[1]).not.toContain("Doing work…"); + }); - it("prunes stale active IDs on update", () => { - store.create("Task", "Desc", "done"); - store.update("1", { status: "in_progress" }); - widget.setActiveTask("1", true); + it("prunes stale active IDs on update", () => { + store.create("Task", "Desc", "done"); + store.update("1", { status: "in_progress" }); + widget.setActiveTask("1", true); - // Complete the task externally - store.complete("1"); - widget.update(); + // Complete the task externally + store.complete("1"); + widget.update(); - // Should render as completed, not active - const lines = renderWidget(ui.state); - expect(lines[1]).toContain("✔"); - expect(lines[1]).toContain("~~#1 Task~~"); - }); + // Completed tasks are hidden from the default widget + const lines = renderWidget(ui.state); + expect(lines).toEqual([]); + }); - it("supports multiple active tasks simultaneously", () => { - store.create("Task A", "Desc", "done criterion", "Processing A"); - store.create("Task B", "Desc", "done criterion", "Processing B"); - store.update("1", { status: "in_progress" }); - store.update("2", { status: "in_progress" }); - widget.setActiveTask("1", true); - widget.setActiveTask("2", true); + it("supports multiple active tasks simultaneously", () => { + store.create("Task A", "Desc", "done criterion", "Processing A"); + store.create("Task B", "Desc", "done criterion", "Processing B"); + store.update("1", { status: "in_progress" }); + store.update("2", { status: "in_progress" }); + widget.setActiveTask("1", true); + widget.setActiveTask("2", true); - const lines = renderWidget(ui.state); - expect(lines[1]).toContain("Processing A…"); - expect(lines[2]).toContain("Processing B…"); - }); + const lines = renderWidget(ui.state); + expect(lines[1]).toContain("Processing A…"); + expect(lines[2]).toContain("Processing B…"); + }); - it("distributes token usage across all active tasks", () => { - store.create("Task A", "Desc", "done criterion", "A"); - store.create("Task B", "Desc", "done criterion", "B"); - store.update("1", { status: "in_progress" }); - store.update("2", { status: "in_progress" }); - widget.setActiveTask("1", true); - widget.setActiveTask("2", true); + it("distributes token usage across all active tasks", () => { + store.create("Task A", "Desc", "done criterion", "A"); + store.create("Task B", "Desc", "done criterion", "B"); + store.update("1", { status: "in_progress" }); + store.update("2", { status: "in_progress" }); + widget.setActiveTask("1", true); + widget.setActiveTask("2", true); - widget.addTokenUsage(100, 50); + widget.addTokenUsage(100, 50); - const lines = renderWidget(ui.state); - // Both tasks should have the same token counts - expect(lines[1]).toContain("↑ 100"); - expect(lines[2]).toContain("↑ 100"); - }); + const lines = renderWidget(ui.state); + // Both tasks should have the same token counts + expect(lines[1]).toContain("↑ 100"); + expect(lines[2]).toContain("↑ 100"); + }); - it("dispose clears widget and timer", () => { - store.create("Task", "Desc", "done"); - store.update("1", { status: "in_progress" }); - widget.setActiveTask("1", true); + it("dispose clears widget and timer", () => { + store.create("Task", "Desc", "done"); + store.update("1", { status: "in_progress" }); + widget.setActiveTask("1", true); - widget.dispose(); - expect(ui.state.widgets.get("tasks")?.content).toBeUndefined(); - }); + widget.dispose(); + expect(ui.state.widgets.get("tasks")?.content).toBeUndefined(); + }); - it("uses subject as fallback when no activeForm", () => { - store.create("My Subject", "Desc", "done"); - store.update("1", { status: "in_progress" }); - widget.setActiveTask("1", true); + it("uses subject as fallback when no activeForm", () => { + store.create("My Subject", "Desc", "done"); + store.update("1", { status: "in_progress" }); + widget.setActiveTask("1", true); - const lines = renderWidget(ui.state); - expect(lines[1]).toContain("My Subject…"); - }); + const lines = renderWidget(ui.state); + expect(lines[1]).toContain("My Subject…"); + }); - it("shows elapsed time but no token arrows when tokens are zero", () => { - store.create("No tokens", "Desc", "done criterion", "Working"); - store.update("1", { status: "in_progress" }); - widget.setActiveTask("1", true); + it("shows elapsed time but no token arrows when tokens are zero", () => { + store.create("No tokens", "Desc", "done criterion", "Working"); + store.update("1", { status: "in_progress" }); + widget.setActiveTask("1", true); - // No addTokenUsage calls — tokens stay at 0 - vi.advanceTimersByTime(5000); - widget.update(); + // No addTokenUsage calls — tokens stay at 0 + vi.advanceTimersByTime(5000); + widget.update(); - const lines = renderWidget(ui.state); - const activeLine = lines.find(l => l.includes("Working…")); - expect(activeLine).toContain("5s"); - expect(activeLine).not.toContain("↑"); - expect(activeLine).not.toContain("↓"); - }); + const lines = renderWidget(ui.state); + const activeLine = lines.find((l) => l.includes("Working…")); + expect(activeLine).toContain("5s"); + expect(activeLine).not.toContain("↑"); + expect(activeLine).not.toContain("↓"); + }); - it("cleans up metrics when stale active IDs are pruned", () => { - store.create("Task", "Desc", "done criterion", "Running"); - store.update("1", { status: "in_progress" }); - widget.setActiveTask("1", true); - widget.addTokenUsage(100, 50); + it("cleans up metrics when stale active IDs are pruned", () => { + store.create("Task", "Desc", "done criterion", "Running"); + store.update("1", { status: "in_progress" }); + widget.setActiveTask("1", true); + widget.addTokenUsage(100, 50); - // Delete task externally - store.update("1", { status: "deleted" }); - widget.update(); + // Delete task externally + store.update("1", { status: "deleted" }); + widget.update(); - // Reactivate with same ID (new task) — should get fresh metrics - store.create("Task 2", "Desc", "done criterion", "Running"); // ID 2 - store.update("2", { status: "in_progress" }); - widget.setActiveTask("2", true); + // Reactivate with same ID (new task) — should get fresh metrics + store.create("Task 2", "Desc", "done criterion", "Running"); // ID 2 + store.update("2", { status: "in_progress" }); + widget.setActiveTask("2", true); - const lines = renderWidget(ui.state); - // Should not carry over old tokens - expect(lines[1]).not.toContain("↑ 100"); - }); + const lines = renderWidget(ui.state); + // Should not carry over old tokens + expect(lines[1]).not.toContain("↑ 100"); + }); - it("indents task lines under header", () => { - store.create("Indented task", "Desc", "done"); - widget.update(); + it("indents task lines under header", () => { + store.create("Indented task", "Desc", "done"); + widget.update(); - const lines = renderWidget(ui.state); - // Task line should start with 2 spaces - expect(lines[1]).toMatch(/^\s{2}/); - }); + const lines = renderWidget(ui.state); + // Task line should start with 2 spaces + expect(lines[1]).toMatch(/^\s{2}/); + }); - it("widget is placed aboveEditor", () => { - store.create("Task", "Desc", "done"); - widget.update(); + it("widget is placed aboveEditor", () => { + store.create("Task", "Desc", "done"); + widget.update(); - const entry = ui.state.widgets.get("tasks"); - expect(entry?.options?.placement).toBe("aboveEditor"); - }); + const entry = ui.state.widgets.get("tasks"); + expect(entry?.options?.placement).toBe("aboveEditor"); + }); }); describe("formatDuration (via widget rendering)", () => { - let store: TaskStore; - let widget: TaskWidget; - let ui: ReturnType; + let store: TaskStore; + let widget: TaskWidget; + let ui: ReturnType; - beforeEach(() => { - vi.useFakeTimers(); - store = new TaskStore(); - widget = new TaskWidget(store); - ui = mockUICtx(); - widget.setUICtx(ui.ctx); - }); + beforeEach(() => { + vi.useFakeTimers(); + store = new TaskStore(); + widget = new TaskWidget(store); + ui = mockUICtx(); + widget.setUICtx(ui.ctx); + }); - afterEach(() => { - widget.dispose(); - vi.useRealTimers(); - }); + afterEach(() => { + widget.dispose(); + vi.useRealTimers(); + }); - it("shows seconds for short durations", () => { - store.create("Quick", "Desc", "done criterion", "Working"); - store.update("1", { status: "in_progress" }); - widget.setActiveTask("1", true); + it("shows seconds for short durations", () => { + store.create("Quick", "Desc", "done criterion", "Working"); + store.update("1", { status: "in_progress" }); + widget.setActiveTask("1", true); - vi.advanceTimersByTime(30_000); // 30s - widget.update(); + vi.advanceTimersByTime(30_000); // 30s + widget.update(); - const lines = renderWidget(ui.state); - expect(lines[1]).toContain("30s"); - }); + const lines = renderWidget(ui.state); + expect(lines[1]).toContain("30s"); + }); - it("shows hours for long durations", () => { - store.create("Long", "Desc", "done criterion", "Working"); - store.update("1", { status: "in_progress" }); - widget.setActiveTask("1", true); + it("shows hours for long durations", () => { + store.create("Long", "Desc", "done criterion", "Working"); + store.update("1", { status: "in_progress" }); + widget.setActiveTask("1", true); - vi.advanceTimersByTime(3_723_000); // 1h 2m 3s → "1h 2m" - widget.update(); + vi.advanceTimersByTime(3_723_000); // 1h 2m 3s → "1h 2m" + widget.update(); - const lines = renderWidget(ui.state); - expect(lines[1]).toContain("1h 2m"); - }); + const lines = renderWidget(ui.state); + expect(lines[1]).toContain("1h 2m"); + }); - it("shows exact hours without minutes", () => { - store.create("Exact", "Desc", "done criterion", "Working"); - store.update("1", { status: "in_progress" }); - widget.setActiveTask("1", true); + it("shows exact hours without minutes", () => { + store.create("Exact", "Desc", "done criterion", "Working"); + store.update("1", { status: "in_progress" }); + widget.setActiveTask("1", true); - vi.advanceTimersByTime(7_200_000); // 2h exactly - widget.update(); + vi.advanceTimersByTime(7_200_000); // 2h exactly + widget.update(); - const lines = renderWidget(ui.state); - expect(lines[1]).toContain("2h)"); - }); + const lines = renderWidget(ui.state); + expect(lines[1]).toContain("2h)"); + }); - it("shows minutes and seconds", () => { - store.create("Medium", "Desc", "done criterion", "Working"); - store.update("1", { status: "in_progress" }); - widget.setActiveTask("1", true); + it("shows minutes and seconds", () => { + store.create("Medium", "Desc", "done criterion", "Working"); + store.update("1", { status: "in_progress" }); + widget.setActiveTask("1", true); - vi.advanceTimersByTime(169_000); // 2m 49s - widget.update(); + vi.advanceTimersByTime(169_000); // 2m 49s + widget.update(); - const lines = renderWidget(ui.state); - expect(lines[1]).toContain("2m 49s"); - }); + const lines = renderWidget(ui.state); + expect(lines[1]).toContain("2m 49s"); + }); - it("formats small token counts without k suffix", () => { - store.create("Small", "Desc", "done criterion", "Working"); - store.update("1", { status: "in_progress" }); - widget.setActiveTask("1", true); + it("formats small token counts without k suffix", () => { + store.create("Small", "Desc", "done criterion", "Working"); + store.update("1", { status: "in_progress" }); + widget.setActiveTask("1", true); - widget.addTokenUsage(500, 200); - widget.update(); + widget.addTokenUsage(500, 200); + widget.update(); - const lines = renderWidget(ui.state); - expect(lines[1]).toContain("↑ 500"); - expect(lines[1]).toContain("↓ 200"); - }); + const lines = renderWidget(ui.state); + expect(lines[1]).toContain("↑ 500"); + expect(lines[1]).toContain("↓ 200"); + }); - it("formats token counts with k suffix and removes .0", () => { - store.create("Large", "Desc", "done criterion", "Working"); - store.update("1", { status: "in_progress" }); - widget.setActiveTask("1", true); + it("formats token counts with k suffix and removes .0", () => { + store.create("Large", "Desc", "done criterion", "Working"); + store.update("1", { status: "in_progress" }); + widget.setActiveTask("1", true); - widget.addTokenUsage(2000, 4100); - widget.update(); + widget.addTokenUsage(2000, 4100); + widget.update(); - const lines = renderWidget(ui.state); - expect(lines[1]).toContain("↑ 2k"); // 2000 → "2k" (not "2.0k") - expect(lines[1]).toContain("↓ 4.1k"); // 4100 → "4.1k" - }); + const lines = renderWidget(ui.state); + expect(lines[1]).toContain("↑ 2k"); // 2000 → "2k" (not "2.0k") + expect(lines[1]).toContain("↓ 4.1k"); // 4100 → "4.1k" + }); });