mirror of
https://github.com/wassname/pi-lgtm.git
synced 2026-06-27 16:46:17 +08:00
feat: gate lgtm on robot review acceptance
This commit is contained in:
@@ -104,6 +104,7 @@ Required fields:
|
|||||||
| `scope` | What the reviewer inspected |
|
| `scope` | What the reviewer inspected |
|
||||||
| `observations` | Concrete observations only. No advice, verdicts, or editorial |
|
| `observations` | Concrete observations only. No advice, verdicts, or editorial |
|
||||||
| `blind_spots` | What the reviewer did not inspect or could not verify |
|
| `blind_spots` | What the reviewer did not inspect or could not verify |
|
||||||
|
| `accepted` | Overall accept/reject decision for whether the task is ready to advance |
|
||||||
| `evidence_complete` | Whether the supplied evidence actually covers the done criterion |
|
| `evidence_complete` | Whether the supplied evidence actually covers the done criterion |
|
||||||
| `evidence_convincing` | Whether the supplied evidence would convince a skeptical reviewer |
|
| `evidence_convincing` | Whether the supplied evidence would convince a skeptical reviewer |
|
||||||
| `missing_evidence` | Concrete missing checks or artifacts needed before human sign-off |
|
| `missing_evidence` | Concrete missing checks or artifacts needed before human sign-off |
|
||||||
@@ -127,7 +128,7 @@ PI_LGTM_ROBOT_REVIEW_CMD='acpx --approve-reads --non-interactive-permissions den
|
|||||||
PI_LGTM_AUTO_ROBOT_REVIEW=1
|
PI_LGTM_AUTO_ROBOT_REVIEW=1
|
||||||
```
|
```
|
||||||
|
|
||||||
This appends a new robot-review iteration. If the latest robot review sets `evidence_complete=false` or `evidence_convincing=false`, `/lgtm` is blocked until stronger evidence is submitted and reviewed again.
|
This appends a new robot-review iteration. The reviewer returns an explicit `accepted` boolean as well as detailed observations, blind spots, and missing evidence. If the latest robot review rejects the evidence, `/lgtm` is blocked until stronger evidence is submitted and reviewed again.
|
||||||
|
|
||||||
## Commands
|
## Commands
|
||||||
|
|
||||||
|
|||||||
+32
-2
@@ -22,6 +22,7 @@ import { AutoClearManager } from "./auto-clear.js";
|
|||||||
import { getReviewBadges, REVIEW_BADGES } from "./review-badges.js";
|
import { getReviewBadges, REVIEW_BADGES } from "./review-badges.js";
|
||||||
import {
|
import {
|
||||||
appendRobotReviewMetadata,
|
appendRobotReviewMetadata,
|
||||||
|
getLatestRobotReview,
|
||||||
getRobotReviews,
|
getRobotReviews,
|
||||||
latestRobotReviewPasses,
|
latestRobotReviewPasses,
|
||||||
type RobotReviewRecord,
|
type RobotReviewRecord,
|
||||||
@@ -80,6 +81,7 @@ function formatRobotReview(review: RobotReviewRecord): string {
|
|||||||
`Robot review #${review.iteration} (${review.submitted_at})`,
|
`Robot review #${review.iteration} (${review.submitted_at})`,
|
||||||
`Reviewer: ${review.reviewer}${review.mode === "auto" ? " [auto]" : ""}`,
|
`Reviewer: ${review.reviewer}${review.mode === "auto" ? " [auto]" : ""}`,
|
||||||
`Scope: ${review.scope}`,
|
`Scope: ${review.scope}`,
|
||||||
|
`Accepted: ${review.accepted ? "yes" : "no"}`,
|
||||||
`Evidence complete: ${review.evidence_complete ? "yes" : "no"}`,
|
`Evidence complete: ${review.evidence_complete ? "yes" : "no"}`,
|
||||||
`Evidence convincing: ${review.evidence_convincing ? "yes" : "no"}`,
|
`Evidence convincing: ${review.evidence_convincing ? "yes" : "no"}`,
|
||||||
`Observations:\n- ${review.observations.join("\n- ")}`,
|
`Observations:\n- ${review.observations.join("\n- ")}`,
|
||||||
@@ -101,7 +103,7 @@ function buildRobotReviewPrompt(task: any): string {
|
|||||||
"Set evidence_convincing=false if the evidence exists but would not convince a skeptical reviewer.",
|
"Set evidence_convincing=false if the evidence exists but would not convince a skeptical reviewer.",
|
||||||
"Return exactly one JSON object between the markers ROBOT_REVIEW_JSON_START and ROBOT_REVIEW_JSON_END.",
|
"Return exactly one JSON object between the markers ROBOT_REVIEW_JSON_START and ROBOT_REVIEW_JSON_END.",
|
||||||
"JSON schema:",
|
"JSON schema:",
|
||||||
'{"reviewer":"string","scope":"string","observations":["string"],"blind_spots":"string","evidence_complete":true,"evidence_convincing":true,"missing_evidence":["string"]}',
|
'{"reviewer":"string","scope":"string","observations":["string"],"blind_spots":"string","accepted":true,"evidence_complete":true,"evidence_convincing":true,"missing_evidence":["string"]}',
|
||||||
"",
|
"",
|
||||||
`Task #${task.id}: ${task.subject}`,
|
`Task #${task.id}: ${task.subject}`,
|
||||||
`Done criterion: ${task.done_criterion}`,
|
`Done criterion: ${task.done_criterion}`,
|
||||||
@@ -117,7 +119,7 @@ function buildRobotReviewPrompt(task: any): string {
|
|||||||
priorSection,
|
priorSection,
|
||||||
"Output format:",
|
"Output format:",
|
||||||
"ROBOT_REVIEW_JSON_START",
|
"ROBOT_REVIEW_JSON_START",
|
||||||
'{"reviewer":"...","scope":"...","observations":["..."],"blind_spots":"...","evidence_complete":true,"evidence_convincing":true,"missing_evidence":["..."]}',
|
'{"reviewer":"...","scope":"...","observations":["..."],"blind_spots":"...","accepted":true,"evidence_complete":true,"evidence_convincing":true,"missing_evidence":["..."]}',
|
||||||
"ROBOT_REVIEW_JSON_END",
|
"ROBOT_REVIEW_JSON_END",
|
||||||
].join("\n");
|
].join("\n");
|
||||||
}
|
}
|
||||||
@@ -147,6 +149,9 @@ async function runAutomaticRobotReview(
|
|||||||
scope: typeof parsed.scope === "string" ? parsed.scope : "task evidence package",
|
scope: typeof parsed.scope === "string" ? parsed.scope : "task evidence package",
|
||||||
observations,
|
observations,
|
||||||
blind_spots: typeof parsed.blind_spots === "string" ? parsed.blind_spots : "not stated",
|
blind_spots: typeof parsed.blind_spots === "string" ? parsed.blind_spots : "not stated",
|
||||||
|
accepted: typeof parsed.accepted === "boolean"
|
||||||
|
? parsed.accepted
|
||||||
|
: parsed.evidence_complete === true && parsed.evidence_convincing === true,
|
||||||
evidence_complete: parsed.evidence_complete === true,
|
evidence_complete: parsed.evidence_complete === true,
|
||||||
evidence_convincing: parsed.evidence_convincing === true,
|
evidence_convincing: parsed.evidence_convincing === true,
|
||||||
missing_evidence,
|
missing_evidence,
|
||||||
@@ -249,6 +254,27 @@ export default function (pi: ExtensionAPI) {
|
|||||||
showPersistedTasks();
|
showPersistedTasks();
|
||||||
});
|
});
|
||||||
|
|
||||||
|
pi.on("before_agent_start", async (event) => {
|
||||||
|
const followups = store.list().flatMap(task => {
|
||||||
|
const latest = getLatestRobotReview(task);
|
||||||
|
return latest && !latest.accepted ? [{ task, latest }] : [];
|
||||||
|
});
|
||||||
|
if (followups.length === 0) return undefined;
|
||||||
|
|
||||||
|
const reminder = followups.map(({ task, latest }) => {
|
||||||
|
const missing = latest.missing_evidence.length > 0
|
||||||
|
? ` Missing evidence: ${latest.missing_evidence.join("; ")}.`
|
||||||
|
: "";
|
||||||
|
return `- Task #${task.id} ${task.subject}: latest robot review rejected the evidence.${missing} Strengthen the evidence, call lgtm_ask again, then rerun robot_review_run before asking for human sign-off.`;
|
||||||
|
}).join("\n");
|
||||||
|
|
||||||
|
return {
|
||||||
|
systemPrompt:
|
||||||
|
event.systemPrompt +
|
||||||
|
`\n\n<system-reminder>\nLatest robot review follow-up required:\n${reminder}\nDo not ask for human sign-off until the latest robot review accepts the evidence.\n</system-reminder>\n`,
|
||||||
|
};
|
||||||
|
});
|
||||||
|
|
||||||
pi.on("session_switch" as any, async (event: any, ctx: ExtensionContext) => {
|
pi.on("session_switch" as any, async (event: any, ctx: ExtensionContext) => {
|
||||||
widget.setUICtx(ctx.ui as UICtx);
|
widget.setUICtx(ctx.ui as UICtx);
|
||||||
const isResume = event?.reason === "resume";
|
const isResume = event?.reason === "resume";
|
||||||
@@ -578,6 +604,7 @@ This does not complete the task. Human /lgtm remains the only completion path.`,
|
|||||||
blind_spots: Type.String({ description: "What the reviewer did not inspect or could not verify" }),
|
blind_spots: Type.String({ description: "What the reviewer did not inspect or could not verify" }),
|
||||||
evidence_complete: Type.Boolean({ description: "Whether the supplied evidence covers the claimed done criterion." }),
|
evidence_complete: Type.Boolean({ description: "Whether the supplied evidence covers the claimed done criterion." }),
|
||||||
evidence_convincing: Type.Boolean({ description: "Whether the supplied evidence would convince a skeptical reviewer." }),
|
evidence_convincing: Type.Boolean({ description: "Whether the supplied evidence would convince a skeptical reviewer." }),
|
||||||
|
accepted: Type.Optional(Type.Boolean({ description: "Overall review decision. Defaults to evidence_complete && evidence_convincing." })),
|
||||||
missing_evidence: Type.Optional(Type.Array(Type.String(), { description: "Concrete missing checks, artifacts, or observations needed before human sign-off." })),
|
missing_evidence: Type.Optional(Type.Array(Type.String(), { description: "Concrete missing checks, artifacts, or observations needed before human sign-off." })),
|
||||||
}),
|
}),
|
||||||
|
|
||||||
@@ -594,6 +621,7 @@ This does not complete the task. Human /lgtm remains the only completion path.`,
|
|||||||
scope: params.scope,
|
scope: params.scope,
|
||||||
observations: params.observations,
|
observations: params.observations,
|
||||||
blind_spots: params.blind_spots,
|
blind_spots: params.blind_spots,
|
||||||
|
accepted: params.accepted ?? (params.evidence_complete && params.evidence_convincing),
|
||||||
evidence_complete: params.evidence_complete,
|
evidence_complete: params.evidence_complete,
|
||||||
evidence_convincing: params.evidence_convincing,
|
evidence_convincing: params.evidence_convincing,
|
||||||
missing_evidence: params.missing_evidence ?? [],
|
missing_evidence: params.missing_evidence ?? [],
|
||||||
@@ -609,6 +637,7 @@ This does not complete the task. Human /lgtm remains the only completion path.`,
|
|||||||
`Iteration: ${getRobotReviews(store.get(params.taskId)!).length}\n` +
|
`Iteration: ${getRobotReviews(store.get(params.taskId)!).length}\n` +
|
||||||
`Reviewer: ${params.reviewer}\n` +
|
`Reviewer: ${params.reviewer}\n` +
|
||||||
`Scope: ${params.scope}\n\n` +
|
`Scope: ${params.scope}\n\n` +
|
||||||
|
`Accepted: ${(params.accepted ?? (params.evidence_complete && params.evidence_convincing)) ? "yes" : "no"}\n` +
|
||||||
`Evidence complete: ${params.evidence_complete ? "yes" : "no"}\n` +
|
`Evidence complete: ${params.evidence_complete ? "yes" : "no"}\n` +
|
||||||
`Evidence convincing: ${params.evidence_convincing ? "yes" : "no"}\n\n` +
|
`Evidence convincing: ${params.evidence_convincing ? "yes" : "no"}\n\n` +
|
||||||
`### Observations\n${params.observations.map(o => `- ${o}`).join("\n")}\n\n` +
|
`### Observations\n${params.observations.map(o => `- ${o}`).join("\n")}\n\n` +
|
||||||
@@ -651,6 +680,7 @@ This appends a new robot-review iteration. If the reviewer marks evidence incomp
|
|||||||
`## Automatic robot review for task #${task.id}: ${task.subject}\n` +
|
`## Automatic robot review for task #${task.id}: ${task.subject}\n` +
|
||||||
`Reviewer command: ${command}\n` +
|
`Reviewer command: ${command}\n` +
|
||||||
`Iteration: ${getRobotReviews(store.get(params.taskId)!).length}\n` +
|
`Iteration: ${getRobotReviews(store.get(params.taskId)!).length}\n` +
|
||||||
|
`Accepted: ${review.accepted ? "yes" : "no"}\n` +
|
||||||
`Evidence complete: ${review.evidence_complete ? "yes" : "no"}\n` +
|
`Evidence complete: ${review.evidence_complete ? "yes" : "no"}\n` +
|
||||||
`Evidence convincing: ${review.evidence_convincing ? "yes" : "no"}\n\n` +
|
`Evidence convincing: ${review.evidence_convincing ? "yes" : "no"}\n\n` +
|
||||||
`### Observations\n${review.observations.map(o => `- ${o}`).join("\n")}\n\n` +
|
`### Observations\n${review.observations.map(o => `- ${o}`).join("\n")}\n\n` +
|
||||||
|
|||||||
+12
-1
@@ -8,6 +8,7 @@ export interface RobotReviewRecord {
|
|||||||
scope: string;
|
scope: string;
|
||||||
observations: string[];
|
observations: string[];
|
||||||
blind_spots: string;
|
blind_spots: string;
|
||||||
|
accepted: boolean;
|
||||||
evidence_complete: boolean;
|
evidence_complete: boolean;
|
||||||
evidence_convincing: boolean;
|
evidence_convincing: boolean;
|
||||||
missing_evidence: string[];
|
missing_evidence: string[];
|
||||||
@@ -33,6 +34,10 @@ function normalizeReview(value: unknown, index: number): RobotReviewRecord | und
|
|||||||
scope,
|
scope,
|
||||||
observations,
|
observations,
|
||||||
blind_spots: typeof review.blind_spots === "string" ? review.blind_spots : "not recorded",
|
blind_spots: typeof review.blind_spots === "string" ? review.blind_spots : "not recorded",
|
||||||
|
accepted: typeof review.accepted === "boolean"
|
||||||
|
? review.accepted
|
||||||
|
: (typeof review.evidence_complete === "boolean" ? review.evidence_complete : true)
|
||||||
|
&& (typeof review.evidence_convincing === "boolean" ? review.evidence_convincing : true),
|
||||||
evidence_complete: typeof review.evidence_complete === "boolean" ? review.evidence_complete : true,
|
evidence_complete: typeof review.evidence_complete === "boolean" ? review.evidence_complete : true,
|
||||||
evidence_convincing: typeof review.evidence_convincing === "boolean" ? review.evidence_convincing : true,
|
evidence_convincing: typeof review.evidence_convincing === "boolean" ? review.evidence_convincing : true,
|
||||||
missing_evidence: toStringArray(review.missing_evidence),
|
missing_evidence: toStringArray(review.missing_evidence),
|
||||||
@@ -51,6 +56,10 @@ function getLegacyRobotReview(task: Task): RobotReviewRecord | undefined {
|
|||||||
scope: typeof task.metadata?.robot_review_scope === "string" ? task.metadata.robot_review_scope : "unknown",
|
scope: typeof task.metadata?.robot_review_scope === "string" ? task.metadata.robot_review_scope : "unknown",
|
||||||
observations,
|
observations,
|
||||||
blind_spots: typeof task.metadata?.robot_review_blind_spots === "string" ? task.metadata.robot_review_blind_spots : "not recorded",
|
blind_spots: typeof task.metadata?.robot_review_blind_spots === "string" ? task.metadata.robot_review_blind_spots : "not recorded",
|
||||||
|
accepted: typeof task.metadata?.robot_review_accepted === "boolean"
|
||||||
|
? task.metadata.robot_review_accepted
|
||||||
|
: (typeof task.metadata?.robot_review_evidence_complete === "boolean" ? task.metadata.robot_review_evidence_complete : true)
|
||||||
|
&& (typeof task.metadata?.robot_review_evidence_convincing === "boolean" ? task.metadata.robot_review_evidence_convincing : true),
|
||||||
evidence_complete: typeof task.metadata?.robot_review_evidence_complete === "boolean" ? task.metadata.robot_review_evidence_complete : true,
|
evidence_complete: typeof task.metadata?.robot_review_evidence_complete === "boolean" ? task.metadata.robot_review_evidence_complete : true,
|
||||||
evidence_convincing: typeof task.metadata?.robot_review_evidence_convincing === "boolean" ? task.metadata.robot_review_evidence_convincing : true,
|
evidence_convincing: typeof task.metadata?.robot_review_evidence_convincing === "boolean" ? task.metadata.robot_review_evidence_convincing : true,
|
||||||
missing_evidence: toStringArray(task.metadata?.robot_review_missing_evidence),
|
missing_evidence: toStringArray(task.metadata?.robot_review_missing_evidence),
|
||||||
@@ -81,6 +90,7 @@ export function getLatestRobotReview(task: Task): RobotReviewRecord | undefined
|
|||||||
export function appendRobotReviewMetadata(task: Task, review: Omit<RobotReviewRecord, "iteration">): Record<string, unknown> {
|
export function appendRobotReviewMetadata(task: Task, review: Omit<RobotReviewRecord, "iteration">): Record<string, unknown> {
|
||||||
const robot_reviews = [...getRobotReviews(task), { ...review, iteration: 0 }].map((entry, index) => ({
|
const robot_reviews = [...getRobotReviews(task), { ...review, iteration: 0 }].map((entry, index) => ({
|
||||||
...entry,
|
...entry,
|
||||||
|
accepted: entry.accepted,
|
||||||
iteration: index + 1,
|
iteration: index + 1,
|
||||||
}));
|
}));
|
||||||
const latest = robot_reviews[robot_reviews.length - 1];
|
const latest = robot_reviews[robot_reviews.length - 1];
|
||||||
@@ -90,6 +100,7 @@ export function appendRobotReviewMetadata(task: Task, review: Omit<RobotReviewRe
|
|||||||
robot_review_scope: latest.scope,
|
robot_review_scope: latest.scope,
|
||||||
robot_review_observations: latest.observations,
|
robot_review_observations: latest.observations,
|
||||||
robot_review_blind_spots: latest.blind_spots,
|
robot_review_blind_spots: latest.blind_spots,
|
||||||
|
robot_review_accepted: latest.accepted,
|
||||||
robot_review_evidence_complete: latest.evidence_complete,
|
robot_review_evidence_complete: latest.evidence_complete,
|
||||||
robot_review_evidence_convincing: latest.evidence_convincing,
|
robot_review_evidence_convincing: latest.evidence_convincing,
|
||||||
robot_review_missing_evidence: latest.missing_evidence,
|
robot_review_missing_evidence: latest.missing_evidence,
|
||||||
@@ -103,5 +114,5 @@ export function appendRobotReviewMetadata(task: Task, review: Omit<RobotReviewRe
|
|||||||
|
|
||||||
export function latestRobotReviewPasses(task: Task): boolean {
|
export function latestRobotReviewPasses(task: Task): boolean {
|
||||||
const latest = getLatestRobotReview(task);
|
const latest = getLatestRobotReview(task);
|
||||||
return latest ? latest.evidence_complete && latest.evidence_convincing : false;
|
return latest ? latest.accepted : false;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -36,6 +36,7 @@ describe("getReviewBadges", () => {
|
|||||||
scope: "task evidence",
|
scope: "task evidence",
|
||||||
observations: ["Observed one unchecked edge case"],
|
observations: ["Observed one unchecked edge case"],
|
||||||
blind_spots: "Did not inspect prod traffic",
|
blind_spots: "Did not inspect prod traffic",
|
||||||
|
accepted: false,
|
||||||
evidence_complete: false,
|
evidence_complete: false,
|
||||||
evidence_convincing: false,
|
evidence_convincing: false,
|
||||||
missing_evidence: ["Prod traffic sample"],
|
missing_evidence: ["Prod traffic sample"],
|
||||||
|
|||||||
@@ -36,6 +36,7 @@ describe("robot review helpers", () => {
|
|||||||
expect(reviews).toHaveLength(1);
|
expect(reviews).toHaveLength(1);
|
||||||
expect(reviews[0].reviewer).toBe("opencode");
|
expect(reviews[0].reviewer).toBe("opencode");
|
||||||
expect(reviews[0].iteration).toBe(1);
|
expect(reviews[0].iteration).toBe(1);
|
||||||
|
expect(reviews[0].accepted).toBe(true);
|
||||||
});
|
});
|
||||||
|
|
||||||
it("appends robot reviews as iterations", () => {
|
it("appends robot reviews as iterations", () => {
|
||||||
@@ -45,6 +46,7 @@ describe("robot review helpers", () => {
|
|||||||
scope: "task evidence",
|
scope: "task evidence",
|
||||||
observations: ["Observed missing benchmark output"],
|
observations: ["Observed missing benchmark output"],
|
||||||
blind_spots: "Did not inspect prod config",
|
blind_spots: "Did not inspect prod config",
|
||||||
|
accepted: false,
|
||||||
evidence_complete: false,
|
evidence_complete: false,
|
||||||
evidence_convincing: false,
|
evidence_convincing: false,
|
||||||
missing_evidence: ["Benchmark output for the claimed speedup"],
|
missing_evidence: ["Benchmark output for the claimed speedup"],
|
||||||
@@ -57,6 +59,7 @@ describe("robot review helpers", () => {
|
|||||||
scope: "updated task evidence",
|
scope: "updated task evidence",
|
||||||
observations: ["Observed benchmark output and test transcript"],
|
observations: ["Observed benchmark output and test transcript"],
|
||||||
blind_spots: "Did not inspect long-run stability",
|
blind_spots: "Did not inspect long-run stability",
|
||||||
|
accepted: true,
|
||||||
evidence_complete: true,
|
evidence_complete: true,
|
||||||
evidence_convincing: true,
|
evidence_convincing: true,
|
||||||
missing_evidence: [],
|
missing_evidence: [],
|
||||||
|
|||||||
Reference in New Issue
Block a user