From 3c7d2f51409f677a2d613471bdf30b0c872a1c69 Mon Sep 17 00:00:00 2001 From: wassname <1103714+wassname@users.noreply.github.com> Date: Fri, 17 Apr 2026 08:20:30 +0800 Subject: [PATCH] feat: iterate and auto-run robot reviews --- README.md | 39 +++++- src/index.ts | 269 +++++++++++++++++++++++++++++++++---- src/review-badges.ts | 3 +- src/robot-review.ts | 107 +++++++++++++++ test/review-badges.test.ts | 13 +- test/robot-review.test.ts | 75 +++++++++++ 6 files changed, 474 insertions(+), 32 deletions(-) create mode 100644 src/robot-review.ts create mode 100644 test/robot-review.test.ts diff --git a/README.md b/README.md index 88d2151..4f6447b 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ A [pi](https://pi.dev) extension that adds structured human sign-off to task tra The core idea: agents cannot mark tasks complete themselves. They must call `lgtm_ask` with auditable evidence and explicit failure-mode analysis, then a human signs off via `/lgtm `. -Tasks can also carry a separate fresh-perspective robot review from a subagent or other model family. That review is observational only and never completes the task. +Tasks can also carry a separate fresh-perspective robot review from a subagent or other model family. Robot reviews can iterate: if the latest review says the evidence is incomplete or unconvincing, human sign-off is held back until the agent strengthens the evidence and reruns review. ## Install @@ -46,7 +46,7 @@ Stripped: `TaskExecute`, `TaskOutput`, `TaskStop`, `process-tracker.ts`, subagen Badges: - `🛠` tool evidence attached via `lgtm_ask` -- `🤖` robot review attached via `robot_review_ask` +- `🤖` one or more robot review iterations attached - `👀` pending human sign-off via `/lgtm` ## Tools @@ -89,6 +89,8 @@ After calling this, the task shows `👀` and is only completable via `/lgtm in_progress -> (lgtm_ask) -> pending_approval 👀 -> (/lgtm) -> completed +pending -> in_progress -> (lgtm_ask) + -> robot review iteration(s) 🤖 + -> pending_approval 👀 if latest robot review passes or no robot review is required + -> strengthen evidence + rerun review if latest robot review fails + -> (/lgtm) -> completed -> deleted ``` @@ -145,8 +173,9 @@ PI_TASKS_DEBUG=1 # trace to stderr ``` src/ -├── index.ts # 6 tools + /tasks + /lgtm commands + widget + event handlers +├── index.ts # 7 tools + /tasks + /lgtm commands + widget + event handlers ├── review-badges.ts # Review badge helpers for tool/robot/human lanes +├── robot-review.ts # Robot review iteration storage + compatibility helpers ├── types.ts # Task, TaskStatus types ├── task-store.ts # File-backed store with CRUD, locking, complete() method ├── auto-clear.ts # Turn-based auto-clearing of completed tasks diff --git a/src/index.ts b/src/index.ts index 72477f0..15ab1da 100644 --- a/src/index.ts +++ b/src/index.ts @@ -14,11 +14,18 @@ * /lgtm — Human signs off on a task (only way to complete) */ +import { spawn } from "node:child_process"; import { join, resolve } from "node:path"; import type { ExtensionAPI, ExtensionCommandContext, ExtensionContext } from "@mariozechner/pi-coding-agent"; import { Type } from "@sinclair/typebox"; import { AutoClearManager } from "./auto-clear.js"; import { getReviewBadges, REVIEW_BADGES } from "./review-badges.js"; +import { + appendRobotReviewMetadata, + getRobotReviews, + latestRobotReviewPasses, + type RobotReviewRecord, +} from "./robot-review.js"; import { TaskStore } from "./task-store.js"; import { loadTasksConfig } from "./tasks-config.js"; import { TaskWidget, type UICtx } from "./ui/task-widget.js"; @@ -27,10 +34,129 @@ function textResult(msg: string) { return { content: [{ type: "text" as const, text: msg }], details: undefined as any }; } -const TASK_TOOL_NAMES = new Set(["TaskCreate", "TaskList", "TaskGet", "TaskUpdate", "lgtm_ask", "robot_review_ask"]); +const TASK_TOOL_NAMES = new Set(["TaskCreate", "TaskList", "TaskGet", "TaskUpdate", "lgtm_ask", "robot_review_ask", "robot_review_run"]); const REMINDER_INTERVAL = 4; const AUTO_CLEAR_DELAY = 4; +type CommandResult = { stdout: string; stderr: string; exitCode: number | null }; + +function shellQuote(text: string): string { + return JSON.stringify(text); +} + +function runShellCommand(command: string, signal?: AbortSignal): Promise { + return new Promise((resolve, reject) => { + const child = spawn("bash", ["-lc", command], { stdio: ["ignore", "pipe", "pipe"] }); + const stdoutChunks: Buffer[] = []; + const stderrChunks: Buffer[] = []; + child.stdout.on("data", (data) => stdoutChunks.push(data)); + child.stderr.on("data", (data) => stderrChunks.push(data)); + child.on("error", reject); + const onAbort = () => child.kill(); + signal?.addEventListener("abort", onAbort, { once: true }); + child.on("close", (exitCode) => { + signal?.removeEventListener("abort", onAbort); + if (signal?.aborted) { + reject(new Error("aborted")); + return; + } + resolve({ + stdout: Buffer.concat(stdoutChunks).toString("utf-8"), + stderr: Buffer.concat(stderrChunks).toString("utf-8"), + exitCode, + }); + }); + }); +} + +function extractRobotReviewJson(output: string): Record { + const match = output.match(/ROBOT_REVIEW_JSON_START\s*([\s\S]*?)\s*ROBOT_REVIEW_JSON_END/); + if (!match) throw new Error("Robot reviewer did not return the expected JSON markers."); + return JSON.parse(match[1]) as Record; +} + +function formatRobotReview(review: RobotReviewRecord): string { + const parts = [ + `Robot review #${review.iteration} (${review.submitted_at})`, + `Reviewer: ${review.reviewer}${review.mode === "auto" ? " [auto]" : ""}`, + `Scope: ${review.scope}`, + `Evidence complete: ${review.evidence_complete ? "yes" : "no"}`, + `Evidence convincing: ${review.evidence_convincing ? "yes" : "no"}`, + `Observations:\n- ${review.observations.join("\n- ")}`, + ]; + if (review.missing_evidence.length > 0) parts.push(`Missing evidence:\n- ${review.missing_evidence.join("\n- ")}`); + if (review.blind_spots) parts.push(`Blind spots: ${review.blind_spots}`); + return parts.join("\n"); +} + +function buildRobotReviewPrompt(task: any): string { + const priorReviews = getRobotReviews(task); + const priorSection = priorReviews.length > 0 + ? `\nPrevious robot reviews:\n${priorReviews.map(formatRobotReview).join("\n\n")}\n` + : "\nPrevious robot reviews:\n(none)\n"; + return [ + "Review the task evidence with a fresh perspective.", + "Observations should stay concrete and source-grounded.", + "Set evidence_complete=false if the supplied evidence does not cover the claimed done criterion.", + "Set evidence_convincing=false if the evidence exists but would not convince a skeptical reviewer.", + "Return exactly one JSON object between the markers ROBOT_REVIEW_JSON_START and ROBOT_REVIEW_JSON_END.", + "JSON schema:", + '{"reviewer":"string","scope":"string","observations":["string"],"blind_spots":"string","evidence_complete":true,"evidence_convincing":true,"missing_evidence":["string"]}', + "", + `Task #${task.id}: ${task.subject}`, + `Done criterion: ${task.done_criterion}`, + `Description: ${task.description}`, + "", + "Evidence package:", + `Evidence: ${task.metadata?.lgtm_evidence ?? "(missing)"}`, + `Failure likely: ${task.metadata?.lgtm_failure_likely ?? "(missing)"}`, + `Failure sneaky: ${task.metadata?.lgtm_failure_sneaky ?? "(missing)"}`, + `Falsification test: ${task.metadata?.lgtm_falsification_test ?? "(missing)"}`, + `Verification hints: ${Array.isArray(task.metadata?.lgtm_verification_hints) ? task.metadata.lgtm_verification_hints.join(" | ") : "(missing)"}`, + `Remaining uncertainty: ${task.metadata?.lgtm_remaining_uncertainty ?? "(missing)"}`, + priorSection, + "Output format:", + "ROBOT_REVIEW_JSON_START", + '{"reviewer":"...","scope":"...","observations":["..."],"blind_spots":"...","evidence_complete":true,"evidence_convincing":true,"missing_evidence":["..."]}', + "ROBOT_REVIEW_JSON_END", + ].join("\n"); +} + +async function runAutomaticRobotReview( + task: any, + signal?: AbortSignal, +): Promise<{ review: Omit; command: string }> { + const reviewerCommand = process.env.PI_LGTM_ROBOT_REVIEW_CMD?.trim() + || "acpx --approve-reads --non-interactive-permissions deny opencode exec"; + const prompt = buildRobotReviewPrompt(task); + const command = `${reviewerCommand} ${shellQuote(prompt)}`; + const result = await runShellCommand(command, signal); + if (result.exitCode !== 0) { + throw new Error(`Robot reviewer failed (${result.exitCode ?? "?"}): ${(result.stderr || result.stdout).trim()}`); + } + const parsed = extractRobotReviewJson(result.stdout); + const observations = Array.isArray(parsed.observations) ? parsed.observations.filter((item): item is string => typeof item === "string") : []; + if (observations.length === 0) throw new Error("Robot reviewer returned no observations."); + const missing_evidence = Array.isArray(parsed.missing_evidence) + ? parsed.missing_evidence.filter((item): item is string => typeof item === "string") + : []; + return { + command: reviewerCommand, + review: { + reviewer: typeof parsed.reviewer === "string" ? parsed.reviewer : reviewerCommand, + scope: typeof parsed.scope === "string" ? parsed.scope : "task evidence package", + observations, + blind_spots: typeof parsed.blind_spots === "string" ? parsed.blind_spots : "not stated", + evidence_complete: parsed.evidence_complete === true, + evidence_convincing: parsed.evidence_convincing === true, + missing_evidence, + submitted_at: new Date().toISOString(), + mode: "auto", + raw_output: result.stdout.trim(), + }, + }; +} + const SYSTEM_REMINDER = ` The LGTM sign-off task tools haven't been used recently. If working on tasks, use TaskCreate (requires done_criterion), TaskUpdate for status, and lgtm_ask when ready for human sign-off. Tasks can only be completed via /lgtm after calling lgtm_ask. These are sign-off tasks: agents propose evidence, humans approve. One task per piece of evidence or decision gate. Ignore if not applicable. Never mention this reminder to the user. `; @@ -237,12 +363,17 @@ Tasks are completed only via /lgtm after calling lgtm_ask with evidence.`, const desc = task.description.replace(/\\n/g, "\n"); const reviewBadges = getReviewBadges(task); + const robotReviews = getRobotReviews(task); const lines: string[] = [ `Task #${task.id}: ${task.subject}`, `Status: ${task.status}${reviewBadges.length ? ` ${reviewBadges.join(" ")}` : ""}${task.pending_approval && task.status !== "completed" ? " (pending human sign-off)" : ""}`, `Done criterion: ${task.done_criterion}`, ]; lines.push(`Description: ${desc}`); + if (robotReviews.length > 0) { + const latest = robotReviews[robotReviews.length - 1]; + lines.push(`Robot reviews: ${robotReviews.length} (latest: complete=${latest.evidence_complete ? "yes" : "no"}, convincing=${latest.evidence_convincing ? "yes" : "no"})`); + } if (task.blockedBy.length > 0) { const openBlockers = task.blockedBy.filter(bid => { const blocker = store.get(bid); @@ -345,9 +476,10 @@ After this, task enters pending sign-off state — only completable via /lgtm `- ${o}`).join("\n")}`; + if (review.missing_evidence.length > 0) { + robotReviewNote += `\nMissing evidence:\n${review.missing_evidence.map(item => `- ${item}`).join("\n")}`; + } + if (!(review.evidence_complete && review.evidence_convincing)) { + robotReviewNote += `\nResult: human sign-off has been held back until the evidence is strengthened and reviewed again.`; + } + } catch (err: any) { + robotReviewNote = + `\n\n### Automatic robot review\n` + + `Reviewer failed: ${err.message}\n` + + `Task remains pending human sign-off; rerun with stronger evidence or call \`robot_review_run\` after fixing reviewer setup.`; + } + } widget.update(); const hintsSection = params.verification_hints?.length @@ -384,14 +546,15 @@ After this, task enters pending sign-off state — only completable via /lgtm `- ${o}`).join("\n")}\n\n` + + `${(params.missing_evidence?.length ?? 0) > 0 ? `### Missing evidence\n${(params.missing_evidence ?? []).map(item => `- ${item}`).join("\n")}\n\n` : ""}` + `### Blind spots\n${params.blind_spots}\n\n` + `${REVIEW_BADGES.robot} Robot review stored. Human sign-off still requires \`/lgtm ${task.id}\`.`; @@ -443,6 +620,46 @@ This does not complete the task. Human /lgtm remains the only completion path.`, }, }); + pi.registerTool({ + name: "robot_review_run", + label: "robot_review_run", + description: `Run the configured automatic robot reviewer against the current task evidence. + +Uses PI_LGTM_ROBOT_REVIEW_CMD if set, otherwise defaults to: +\`acpx --approve-reads --non-interactive-permissions deny opencode exec\` + +This appends a new robot-review iteration. If the reviewer marks evidence incomplete or unconvincing, pending human sign-off is cleared until stronger evidence is submitted and reviewed again.`, + parameters: Type.Object({ + taskId: Type.String({ description: "Task ID to review" }), + }), + + async execute(_toolCallId, params, signal, _onUpdate, _ctx) { + const task = store.get(params.taskId); + if (!task) return textResult(`Task #${params.taskId} not found`); + if (!task.metadata?.lgtm_evidence) { + return textResult(`Task #${params.taskId} has no stored evidence yet. Call lgtm_ask first.`); + } + + const { review, command } = await runAutomaticRobotReview(task, signal); + store.update(params.taskId, { + pending_approval: review.evidence_complete && review.evidence_convincing ? task.pending_approval : false, + metadata: appendRobotReviewMetadata(task, review), + }); + widget.update(); + + return textResult( + `## Automatic robot review for task #${task.id}: ${task.subject}\n` + + `Reviewer command: ${command}\n` + + `Iteration: ${getRobotReviews(store.get(params.taskId)!).length}\n` + + `Evidence complete: ${review.evidence_complete ? "yes" : "no"}\n` + + `Evidence convincing: ${review.evidence_convincing ? "yes" : "no"}\n\n` + + `### Observations\n${review.observations.map(o => `- ${o}`).join("\n")}\n\n` + + `${review.missing_evidence.length > 0 ? `### Missing evidence\n${review.missing_evidence.map(item => `- ${item}`).join("\n")}\n\n` : ""}` + + `### Blind spots\n${review.blind_spots}`, + ); + }, + }); + // ────────────────────────────────────────────────── // /tasks command // ────────────────────────────────────────────────── @@ -531,12 +748,11 @@ This does not complete the task. Human /lgtm remains the only completion path.`, evidenceNote = parts.join("\n"); } let robotNote = ""; - if (em.robot_review_observations?.length) { - const parts = [`\n\nRobot review (${em.robot_review_submitted_at ?? "?"})`]; - if (em.robot_review_reviewer) parts.push(`Reviewer: ${em.robot_review_reviewer}`); - if (em.robot_review_scope) parts.push(`Scope: ${em.robot_review_scope}`); - parts.push(`Observations:\n- ${em.robot_review_observations.join("\n- ")}`); - if (em.robot_review_blind_spots) parts.push(`Blind spots: ${em.robot_review_blind_spots}`); + const robotReviews = getRobotReviews(task); + if (robotReviews.length > 0) { + const latest = robotReviews[robotReviews.length - 1]; + const parts = [`\n\nRobot reviews: ${robotReviews.length}`]; + parts.push(formatRobotReview(latest)); robotNote = parts.join("\n"); } const title = `#${task.id} [${task.status}] ${task.subject}\nDone: ${task.done_criterion}${pendingNote}\n${task.description}${evidenceNote}${robotNote}`; @@ -585,6 +801,10 @@ This does not complete the task. Human /lgtm remains the only completion path.`, ctx.ui.notify(`Task #${taskId} not ready. Agent must call lgtm_ask first.`, "error"); return; } + if (getRobotReviews(task).length > 0 && !latestRobotReviewPasses(task)) { + ctx.ui.notify(`Task #${taskId} is blocked by the latest robot review. Strengthen evidence and rerun review first.`, "error"); + return; + } // Show stored evidence for review before sign-off const m = task.metadata; @@ -598,15 +818,14 @@ This does not complete the task. Human /lgtm remains the only completion path.`, if (m.lgtm_verification_hints?.length) evidenceParts.push(`Hints: ${m.lgtm_verification_hints.join(", ")}`); evidenceParts.push(`Submitted: ${m.lgtm_submitted_at}`); } - if (m.robot_review_observations?.length) { - const robotParts = [ - `Robot review:\nReviewer: ${m.robot_review_reviewer ?? "?"}`, - `Scope: ${m.robot_review_scope ?? "?"}`, - `Observations:\n- ${m.robot_review_observations.join("\n- ")}`, - ]; - if (m.robot_review_blind_spots) robotParts.push(`Blind spots: ${m.robot_review_blind_spots}`); - if (m.robot_review_submitted_at) robotParts.push(`Submitted: ${m.robot_review_submitted_at}`); - evidenceParts.push(robotParts.join("\n")); + const robotReviews = getRobotReviews(task); + if (robotReviews.length > 0) { + evidenceParts.push( + `Robot reviews (${robotReviews.length} total):\n${robotReviews.map(formatRobotReview).join("\n\n")}`, + ); + if (!latestRobotReviewPasses(task)) { + evidenceParts.push("Latest robot review says the evidence is not yet complete/convincing."); + } } const evidenceSummary = evidenceParts.length > 0 ? evidenceParts.join("\n\n") : "(no stored evidence)"; const confirm = await ctx.ui.select( diff --git a/src/review-badges.ts b/src/review-badges.ts index d49ff99..63c41b4 100644 --- a/src/review-badges.ts +++ b/src/review-badges.ts @@ -1,3 +1,4 @@ +import { getRobotReviews } from "./robot-review.js"; import type { Task } from "./types.js"; export const REVIEW_BADGES = { @@ -9,7 +10,7 @@ export const REVIEW_BADGES = { export function getReviewBadges(task: Task): string[] { const badges: string[] = []; if (task.metadata?.lgtm_evidence) badges.push(REVIEW_BADGES.tool); - if (task.metadata?.robot_review_observations?.length) badges.push(REVIEW_BADGES.robot); + if (getRobotReviews(task).length > 0) badges.push(REVIEW_BADGES.robot); if (task.pending_approval && task.status !== "completed") badges.push(REVIEW_BADGES.human); return badges; } diff --git a/src/robot-review.ts b/src/robot-review.ts new file mode 100644 index 0000000..6244ba8 --- /dev/null +++ b/src/robot-review.ts @@ -0,0 +1,107 @@ +import type { Task } from "./types.js"; + +export type RobotReviewMode = "manual" | "auto"; + +export interface RobotReviewRecord { + iteration: number; + reviewer: string; + scope: string; + observations: string[]; + blind_spots: string; + evidence_complete: boolean; + evidence_convincing: boolean; + missing_evidence: string[]; + submitted_at: string; + mode: RobotReviewMode; + raw_output?: string; +} + +function toStringArray(value: unknown): string[] { + return Array.isArray(value) ? value.filter((item): item is string => typeof item === "string") : []; +} + +function normalizeReview(value: unknown, index: number): RobotReviewRecord | undefined { + if (!value || typeof value !== "object") return undefined; + const review = value as Record; + const reviewer = typeof review.reviewer === "string" ? review.reviewer : "unknown"; + const scope = typeof review.scope === "string" ? review.scope : "unknown"; + const observations = toStringArray(review.observations); + if (observations.length === 0) return undefined; + return { + iteration: typeof review.iteration === "number" ? review.iteration : index + 1, + reviewer, + scope, + observations, + blind_spots: typeof review.blind_spots === "string" ? review.blind_spots : "not recorded", + evidence_complete: typeof review.evidence_complete === "boolean" ? review.evidence_complete : true, + evidence_convincing: typeof review.evidence_convincing === "boolean" ? review.evidence_convincing : true, + missing_evidence: toStringArray(review.missing_evidence), + submitted_at: typeof review.submitted_at === "string" ? review.submitted_at : new Date(0).toISOString(), + mode: review.mode === "auto" ? "auto" : "manual", + raw_output: typeof review.raw_output === "string" ? review.raw_output : undefined, + }; +} + +function getLegacyRobotReview(task: Task): RobotReviewRecord | undefined { + const observations = toStringArray(task.metadata?.robot_review_observations); + if (observations.length === 0) return undefined; + return { + iteration: 1, + reviewer: typeof task.metadata?.robot_review_reviewer === "string" ? task.metadata.robot_review_reviewer : "unknown", + scope: typeof task.metadata?.robot_review_scope === "string" ? task.metadata.robot_review_scope : "unknown", + observations, + blind_spots: typeof task.metadata?.robot_review_blind_spots === "string" ? task.metadata.robot_review_blind_spots : "not recorded", + evidence_complete: typeof task.metadata?.robot_review_evidence_complete === "boolean" ? task.metadata.robot_review_evidence_complete : true, + evidence_convincing: typeof task.metadata?.robot_review_evidence_convincing === "boolean" ? task.metadata.robot_review_evidence_convincing : true, + missing_evidence: toStringArray(task.metadata?.robot_review_missing_evidence), + submitted_at: typeof task.metadata?.robot_review_submitted_at === "string" ? task.metadata.robot_review_submitted_at : new Date(0).toISOString(), + mode: task.metadata?.robot_review_mode === "auto" ? "auto" : "manual", + raw_output: typeof task.metadata?.robot_review_raw_output === "string" ? task.metadata.robot_review_raw_output : undefined, + }; +} + +export function getRobotReviews(task: Task): RobotReviewRecord[] { + const reviews = Array.isArray(task.metadata?.robot_reviews) + ? task.metadata.robot_reviews + .map((review: unknown, index: number) => normalizeReview(review, index)) + .filter((review): review is RobotReviewRecord => review !== undefined) + : []; + if (reviews.length > 0) { + return reviews.map((review, index) => ({ ...review, iteration: index + 1 })); + } + const legacy = getLegacyRobotReview(task); + return legacy ? [legacy] : []; +} + +export function getLatestRobotReview(task: Task): RobotReviewRecord | undefined { + const reviews = getRobotReviews(task); + return reviews.length > 0 ? reviews[reviews.length - 1] : undefined; +} + +export function appendRobotReviewMetadata(task: Task, review: Omit): Record { + const robot_reviews = [...getRobotReviews(task), { ...review, iteration: 0 }].map((entry, index) => ({ + ...entry, + iteration: index + 1, + })); + const latest = robot_reviews[robot_reviews.length - 1]; + return { + robot_reviews, + robot_review_reviewer: latest.reviewer, + robot_review_scope: latest.scope, + robot_review_observations: latest.observations, + robot_review_blind_spots: latest.blind_spots, + robot_review_evidence_complete: latest.evidence_complete, + robot_review_evidence_convincing: latest.evidence_convincing, + robot_review_missing_evidence: latest.missing_evidence, + robot_review_submitted_at: latest.submitted_at, + robot_review_mode: latest.mode, + robot_review_raw_output: latest.raw_output ?? null, + robot_review_requires_followup: !(latest.evidence_complete && latest.evidence_convincing), + robot_review_iteration_count: robot_reviews.length, + }; +} + +export function latestRobotReviewPasses(task: Task): boolean { + const latest = getLatestRobotReview(task); + return latest ? latest.evidence_complete && latest.evidence_convincing : false; +} diff --git a/test/review-badges.test.ts b/test/review-badges.test.ts index 62e03e0..05a910c 100644 --- a/test/review-badges.test.ts +++ b/test/review-badges.test.ts @@ -30,7 +30,18 @@ describe("getReviewBadges", () => { pending_approval: true, metadata: { lgtm_evidence: "npm test", - robot_review_observations: ["Observed one unchecked edge case"], + robot_reviews: [{ + iteration: 1, + reviewer: "opencode", + scope: "task evidence", + observations: ["Observed one unchecked edge case"], + blind_spots: "Did not inspect prod traffic", + evidence_complete: false, + evidence_convincing: false, + missing_evidence: ["Prod traffic sample"], + submitted_at: "2026-04-17T00:00:00.000Z", + mode: "manual", + }], }, }); diff --git a/test/robot-review.test.ts b/test/robot-review.test.ts new file mode 100644 index 0000000..0252909 --- /dev/null +++ b/test/robot-review.test.ts @@ -0,0 +1,75 @@ +import { describe, expect, it } from "vitest"; +import { appendRobotReviewMetadata, getLatestRobotReview, getRobotReviews } from "../src/robot-review.js"; +import type { Task } from "../src/types.js"; + +function makeTask(overrides: Partial = {}): Task { + return { + id: "1", + subject: "Test", + description: "Desc", + done_criterion: "done", + pending_approval: false, + status: "pending", + progress_label: undefined, + metadata: {}, + blocks: [], + blockedBy: [], + createdAt: 0, + updatedAt: 0, + ...overrides, + }; +} + +describe("robot review helpers", () => { + it("reads legacy single-review metadata", () => { + const task = makeTask({ + metadata: { + robot_review_reviewer: "opencode", + robot_review_scope: "task evidence", + robot_review_observations: ["Observed no command output for the core claim"], + robot_review_blind_spots: "Did not rerun tests", + robot_review_submitted_at: "2026-04-17T00:00:00.000Z", + }, + }); + + const reviews = getRobotReviews(task); + expect(reviews).toHaveLength(1); + expect(reviews[0].reviewer).toBe("opencode"); + expect(reviews[0].iteration).toBe(1); + }); + + it("appends robot reviews as iterations", () => { + const task = makeTask(); + const metadata1 = appendRobotReviewMetadata(task, { + reviewer: "opencode", + scope: "task evidence", + observations: ["Observed missing benchmark output"], + blind_spots: "Did not inspect prod config", + evidence_complete: false, + evidence_convincing: false, + missing_evidence: ["Benchmark output for the claimed speedup"], + submitted_at: "2026-04-17T00:00:00.000Z", + mode: "auto", + }); + const task1 = makeTask({ metadata: metadata1 }); + const metadata2 = appendRobotReviewMetadata(task1, { + reviewer: "opencode", + scope: "updated task evidence", + observations: ["Observed benchmark output and test transcript"], + blind_spots: "Did not inspect long-run stability", + evidence_complete: true, + evidence_convincing: true, + missing_evidence: [], + submitted_at: "2026-04-17T01:00:00.000Z", + mode: "auto", + }); + + const task2 = makeTask({ metadata: metadata2 }); + const reviews = getRobotReviews(task2); + expect(reviews).toHaveLength(2); + expect(reviews[0].iteration).toBe(1); + expect(reviews[1].iteration).toBe(2); + expect(getLatestRobotReview(task2)?.evidence_convincing).toBe(true); + expect(task2.metadata.robot_review_iteration_count).toBe(2); + }); +});