From 0ba01736156e85a05b7d4bb54daf713028b61c2a Mon Sep 17 00:00:00 2001 From: wassname <1103714+wassname@users.noreply.github.com> Date: Fri, 17 Apr 2026 08:31:07 +0800 Subject: [PATCH] feat: gate lgtm on robot review acceptance --- README.md | 3 ++- src/index.ts | 34 ++++++++++++++++++++++++++++++++-- src/robot-review.ts | 13 ++++++++++++- test/review-badges.test.ts | 1 + test/robot-review.test.ts | 3 +++ 5 files changed, 50 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 4f6447b..1a567ef 100644 --- a/README.md +++ b/README.md @@ -104,6 +104,7 @@ Required fields: | `scope` | What the reviewer inspected | | `observations` | Concrete observations only. No advice, verdicts, or editorial | | `blind_spots` | What the reviewer did not inspect or could not verify | +| `accepted` | Overall accept/reject decision for whether the task is ready to advance | | `evidence_complete` | Whether the supplied evidence actually covers the done criterion | | `evidence_convincing` | Whether the supplied evidence would convince a skeptical reviewer | | `missing_evidence` | Concrete missing checks or artifacts needed before human sign-off | @@ -127,7 +128,7 @@ PI_LGTM_ROBOT_REVIEW_CMD='acpx --approve-reads --non-interactive-permissions den PI_LGTM_AUTO_ROBOT_REVIEW=1 ``` -This appends a new robot-review iteration. If the latest robot review sets `evidence_complete=false` or `evidence_convincing=false`, `/lgtm` is blocked until stronger evidence is submitted and reviewed again. +This appends a new robot-review iteration. The reviewer returns an explicit `accepted` boolean as well as detailed observations, blind spots, and missing evidence. If the latest robot review rejects the evidence, `/lgtm` is blocked until stronger evidence is submitted and reviewed again. ## Commands diff --git a/src/index.ts b/src/index.ts index 15ab1da..31508df 100644 --- a/src/index.ts +++ b/src/index.ts @@ -22,6 +22,7 @@ import { AutoClearManager } from "./auto-clear.js"; import { getReviewBadges, REVIEW_BADGES } from "./review-badges.js"; import { appendRobotReviewMetadata, + getLatestRobotReview, getRobotReviews, latestRobotReviewPasses, type RobotReviewRecord, @@ -80,6 +81,7 @@ function formatRobotReview(review: RobotReviewRecord): string { `Robot review #${review.iteration} (${review.submitted_at})`, `Reviewer: ${review.reviewer}${review.mode === "auto" ? " [auto]" : ""}`, `Scope: ${review.scope}`, + `Accepted: ${review.accepted ? "yes" : "no"}`, `Evidence complete: ${review.evidence_complete ? "yes" : "no"}`, `Evidence convincing: ${review.evidence_convincing ? "yes" : "no"}`, `Observations:\n- ${review.observations.join("\n- ")}`, @@ -101,7 +103,7 @@ function buildRobotReviewPrompt(task: any): string { "Set evidence_convincing=false if the evidence exists but would not convince a skeptical reviewer.", "Return exactly one JSON object between the markers ROBOT_REVIEW_JSON_START and ROBOT_REVIEW_JSON_END.", "JSON schema:", - '{"reviewer":"string","scope":"string","observations":["string"],"blind_spots":"string","evidence_complete":true,"evidence_convincing":true,"missing_evidence":["string"]}', + '{"reviewer":"string","scope":"string","observations":["string"],"blind_spots":"string","accepted":true,"evidence_complete":true,"evidence_convincing":true,"missing_evidence":["string"]}', "", `Task #${task.id}: ${task.subject}`, `Done criterion: ${task.done_criterion}`, @@ -117,7 +119,7 @@ function buildRobotReviewPrompt(task: any): string { priorSection, "Output format:", "ROBOT_REVIEW_JSON_START", - '{"reviewer":"...","scope":"...","observations":["..."],"blind_spots":"...","evidence_complete":true,"evidence_convincing":true,"missing_evidence":["..."]}', + '{"reviewer":"...","scope":"...","observations":["..."],"blind_spots":"...","accepted":true,"evidence_complete":true,"evidence_convincing":true,"missing_evidence":["..."]}', "ROBOT_REVIEW_JSON_END", ].join("\n"); } @@ -147,6 +149,9 @@ async function runAutomaticRobotReview( scope: typeof parsed.scope === "string" ? parsed.scope : "task evidence package", observations, blind_spots: typeof parsed.blind_spots === "string" ? parsed.blind_spots : "not stated", + accepted: typeof parsed.accepted === "boolean" + ? parsed.accepted + : parsed.evidence_complete === true && parsed.evidence_convincing === true, evidence_complete: parsed.evidence_complete === true, evidence_convincing: parsed.evidence_convincing === true, missing_evidence, @@ -249,6 +254,27 @@ export default function (pi: ExtensionAPI) { showPersistedTasks(); }); + pi.on("before_agent_start", async (event) => { + const followups = store.list().flatMap(task => { + const latest = getLatestRobotReview(task); + return latest && !latest.accepted ? [{ task, latest }] : []; + }); + if (followups.length === 0) return undefined; + + const reminder = followups.map(({ task, latest }) => { + const missing = latest.missing_evidence.length > 0 + ? ` Missing evidence: ${latest.missing_evidence.join("; ")}.` + : ""; + return `- Task #${task.id} ${task.subject}: latest robot review rejected the evidence.${missing} Strengthen the evidence, call lgtm_ask again, then rerun robot_review_run before asking for human sign-off.`; + }).join("\n"); + + return { + systemPrompt: + event.systemPrompt + + `\n\n\nLatest robot review follow-up required:\n${reminder}\nDo not ask for human sign-off until the latest robot review accepts the evidence.\n\n`, + }; + }); + pi.on("session_switch" as any, async (event: any, ctx: ExtensionContext) => { widget.setUICtx(ctx.ui as UICtx); const isResume = event?.reason === "resume"; @@ -578,6 +604,7 @@ This does not complete the task. Human /lgtm remains the only completion path.`, blind_spots: Type.String({ description: "What the reviewer did not inspect or could not verify" }), evidence_complete: Type.Boolean({ description: "Whether the supplied evidence covers the claimed done criterion." }), evidence_convincing: Type.Boolean({ description: "Whether the supplied evidence would convince a skeptical reviewer." }), + accepted: Type.Optional(Type.Boolean({ description: "Overall review decision. Defaults to evidence_complete && evidence_convincing." })), missing_evidence: Type.Optional(Type.Array(Type.String(), { description: "Concrete missing checks, artifacts, or observations needed before human sign-off." })), }), @@ -594,6 +621,7 @@ This does not complete the task. Human /lgtm remains the only completion path.`, scope: params.scope, observations: params.observations, blind_spots: params.blind_spots, + accepted: params.accepted ?? (params.evidence_complete && params.evidence_convincing), evidence_complete: params.evidence_complete, evidence_convincing: params.evidence_convincing, missing_evidence: params.missing_evidence ?? [], @@ -609,6 +637,7 @@ This does not complete the task. Human /lgtm remains the only completion path.`, `Iteration: ${getRobotReviews(store.get(params.taskId)!).length}\n` + `Reviewer: ${params.reviewer}\n` + `Scope: ${params.scope}\n\n` + + `Accepted: ${(params.accepted ?? (params.evidence_complete && params.evidence_convincing)) ? "yes" : "no"}\n` + `Evidence complete: ${params.evidence_complete ? "yes" : "no"}\n` + `Evidence convincing: ${params.evidence_convincing ? "yes" : "no"}\n\n` + `### Observations\n${params.observations.map(o => `- ${o}`).join("\n")}\n\n` + @@ -651,6 +680,7 @@ This appends a new robot-review iteration. If the reviewer marks evidence incomp `## Automatic robot review for task #${task.id}: ${task.subject}\n` + `Reviewer command: ${command}\n` + `Iteration: ${getRobotReviews(store.get(params.taskId)!).length}\n` + + `Accepted: ${review.accepted ? "yes" : "no"}\n` + `Evidence complete: ${review.evidence_complete ? "yes" : "no"}\n` + `Evidence convincing: ${review.evidence_convincing ? "yes" : "no"}\n\n` + `### Observations\n${review.observations.map(o => `- ${o}`).join("\n")}\n\n` + diff --git a/src/robot-review.ts b/src/robot-review.ts index 6244ba8..1948bd3 100644 --- a/src/robot-review.ts +++ b/src/robot-review.ts @@ -8,6 +8,7 @@ export interface RobotReviewRecord { scope: string; observations: string[]; blind_spots: string; + accepted: boolean; evidence_complete: boolean; evidence_convincing: boolean; missing_evidence: string[]; @@ -33,6 +34,10 @@ function normalizeReview(value: unknown, index: number): RobotReviewRecord | und scope, observations, blind_spots: typeof review.blind_spots === "string" ? review.blind_spots : "not recorded", + accepted: typeof review.accepted === "boolean" + ? review.accepted + : (typeof review.evidence_complete === "boolean" ? review.evidence_complete : true) + && (typeof review.evidence_convincing === "boolean" ? review.evidence_convincing : true), evidence_complete: typeof review.evidence_complete === "boolean" ? review.evidence_complete : true, evidence_convincing: typeof review.evidence_convincing === "boolean" ? review.evidence_convincing : true, missing_evidence: toStringArray(review.missing_evidence), @@ -51,6 +56,10 @@ function getLegacyRobotReview(task: Task): RobotReviewRecord | undefined { scope: typeof task.metadata?.robot_review_scope === "string" ? task.metadata.robot_review_scope : "unknown", observations, blind_spots: typeof task.metadata?.robot_review_blind_spots === "string" ? task.metadata.robot_review_blind_spots : "not recorded", + accepted: typeof task.metadata?.robot_review_accepted === "boolean" + ? task.metadata.robot_review_accepted + : (typeof task.metadata?.robot_review_evidence_complete === "boolean" ? task.metadata.robot_review_evidence_complete : true) + && (typeof task.metadata?.robot_review_evidence_convincing === "boolean" ? task.metadata.robot_review_evidence_convincing : true), evidence_complete: typeof task.metadata?.robot_review_evidence_complete === "boolean" ? task.metadata.robot_review_evidence_complete : true, evidence_convincing: typeof task.metadata?.robot_review_evidence_convincing === "boolean" ? task.metadata.robot_review_evidence_convincing : true, missing_evidence: toStringArray(task.metadata?.robot_review_missing_evidence), @@ -81,6 +90,7 @@ export function getLatestRobotReview(task: Task): RobotReviewRecord | undefined export function appendRobotReviewMetadata(task: Task, review: Omit): Record { const robot_reviews = [...getRobotReviews(task), { ...review, iteration: 0 }].map((entry, index) => ({ ...entry, + accepted: entry.accepted, iteration: index + 1, })); const latest = robot_reviews[robot_reviews.length - 1]; @@ -90,6 +100,7 @@ export function appendRobotReviewMetadata(task: Task, review: Omit { scope: "task evidence", observations: ["Observed one unchecked edge case"], blind_spots: "Did not inspect prod traffic", + accepted: false, evidence_complete: false, evidence_convincing: false, missing_evidence: ["Prod traffic sample"], diff --git a/test/robot-review.test.ts b/test/robot-review.test.ts index 0252909..d8f77b1 100644 --- a/test/robot-review.test.ts +++ b/test/robot-review.test.ts @@ -36,6 +36,7 @@ describe("robot review helpers", () => { expect(reviews).toHaveLength(1); expect(reviews[0].reviewer).toBe("opencode"); expect(reviews[0].iteration).toBe(1); + expect(reviews[0].accepted).toBe(true); }); it("appends robot reviews as iterations", () => { @@ -45,6 +46,7 @@ describe("robot review helpers", () => { scope: "task evidence", observations: ["Observed missing benchmark output"], blind_spots: "Did not inspect prod config", + accepted: false, evidence_complete: false, evidence_convincing: false, missing_evidence: ["Benchmark output for the claimed speedup"], @@ -57,6 +59,7 @@ describe("robot review helpers", () => { scope: "updated task evidence", observations: ["Observed benchmark output and test transcript"], blind_spots: "Did not inspect long-run stability", + accepted: true, evidence_complete: true, evidence_convincing: true, missing_evidence: [],