mirror of
https://github.com/wassname/pi-lgtm.git
synced 2026-06-27 15:31:29 +08:00
Simplify automatic robot review
This commit is contained in:
@@ -92,7 +92,7 @@ After calling this, the task shows `👀` and is only completable via `/lgtm <id
|
||||
|
||||
The tool result includes a non-blocking self-check prompt asking whether the evidence directly addresses the `done_criterion` and whether a skeptical reviewer would find it convincing.
|
||||
|
||||
`lgtm_ask` always runs the robot-review stage immediately after storing evidence. A failing or errored robot review clears `pending_approval` until the evidence is strengthened and reviewed again.
|
||||
`lgtm_ask` always runs the robot-review stage immediately after storing evidence. A robot review that rejects the evidence clears `pending_approval` until the evidence is strengthened and reviewed again. A reviewer crash, auth failure, timeout, or malformed output is recorded as a warning and leaves human sign-off open.
|
||||
|
||||
### `lgtm_supersede`
|
||||
|
||||
@@ -122,21 +122,15 @@ Use this from a separate subagent or other model when possible. Reviews append a
|
||||
|
||||
### `robot_review_run`
|
||||
|
||||
Run the configured automatic robot reviewer against the current task evidence.
|
||||
Run the automatic robot reviewer against the current task evidence using the current session model.
|
||||
|
||||
Default reviewer stage:
|
||||
|
||||
```bash
|
||||
pi --mode json -p --no-session
|
||||
pi --mode json -p --no-session --no-tools --no-extensions --model <current-session-model>
|
||||
```
|
||||
|
||||
Override with:
|
||||
|
||||
```bash
|
||||
PI_LGTM_ROBOT_REVIEW_MODEL='openai/gpt-5'
|
||||
```
|
||||
|
||||
This appends a new robot-review iteration. The reviewer returns an explicit `accepted` boolean as well as detailed observations, blind spots, and missing evidence. If the latest robot review rejects the evidence, `/lgtm` is blocked until stronger evidence is submitted and reviewed again.
|
||||
This appends a new robot-review iteration. The reviewer returns an explicit `accepted` boolean as well as detailed observations, blind spots, and missing evidence. If the latest robot review rejects the evidence, `/lgtm` is blocked until stronger evidence is submitted and reviewed again. If the reviewer process fails to run or returns malformed output, the failure is recorded but human sign-off stays open.
|
||||
|
||||
## Commands
|
||||
|
||||
@@ -154,8 +148,8 @@ Interactive menu: view tasks, create task, clear completed/all.
|
||||
pending -> in_progress -> (lgtm_ask)
|
||||
-> current evidence iteration N
|
||||
-> robot review iteration(s) on current evidence 🤖
|
||||
-> pending_approval 👀 if latest robot review passes
|
||||
-> reviewer_failed_to_run | reviewer_rejected
|
||||
-> pending_approval 👀 if latest robot review passes, or reviewer infra fails
|
||||
-> reviewer_rejected
|
||||
-> lgtm_supersede or newer lgtm_ask -> superseded history + fresh current evidence
|
||||
-> (/lgtm) -> completed
|
||||
-> deleted
|
||||
|
||||
+42
-41
@@ -77,27 +77,20 @@ export function getRobotReviewTimeoutMs(env: NodeJS.ProcessEnv = process.env): n
|
||||
return Number.isFinite(configured) && configured > 0 ? configured : DEFAULT_ROBOT_REVIEW_TIMEOUT_MS;
|
||||
}
|
||||
|
||||
/**
|
||||
* Pick a reviewer model from a different provider than the current one.
|
||||
* Prefers cheap/fast models suitable for review tasks.
|
||||
* Returns undefined if no alternate provider is available (falls back to same model).
|
||||
*/
|
||||
export function pickAlternateReviewerModel(currentProviderId?: string): string | undefined {
|
||||
// Ordered by: cheap, fast, good enough for structured review
|
||||
const providerModels: Record<string, string> = {
|
||||
anthropic: "anthropic/claude-haiku-4-5",
|
||||
"github-copilot": "github-copilot/gemini-3-flash-preview",
|
||||
openrouter: "openrouter/google/gemini-2.5-flash",
|
||||
};
|
||||
const providers = Object.keys(providerModels);
|
||||
const current = currentProviderId ?? "";
|
||||
|
||||
// Try a different provider first
|
||||
for (const p of providers) {
|
||||
if (p !== current) return providerModels[p];
|
||||
}
|
||||
// All same? Just return the first non-current
|
||||
return providers.length > 0 ? providerModels[providers[0]] : undefined;
|
||||
/** Format pi's current model object as the CLI's provider/model reference. */
|
||||
export function getCurrentModelRef(model: unknown): string | undefined {
|
||||
if (!model || typeof model !== "object") return undefined;
|
||||
const provider = typeof (model as any).provider === "string"
|
||||
? (model as any).provider
|
||||
: typeof (model as any).providerId === "string"
|
||||
? (model as any).providerId
|
||||
: undefined;
|
||||
const id = typeof (model as any).id === "string"
|
||||
? (model as any).id
|
||||
: typeof (model as any).modelId === "string"
|
||||
? (model as any).modelId
|
||||
: undefined;
|
||||
return provider && id ? `${provider}/${id}` : undefined;
|
||||
}
|
||||
|
||||
function getAssistantTextFromPiEvent(event: any): string | undefined {
|
||||
@@ -583,16 +576,17 @@ function buildRobotReviewPrompt(task: any): string {
|
||||
async function runAutomaticRobotReview(
|
||||
task: any,
|
||||
signal?: AbortSignal,
|
||||
currentProviderId?: string,
|
||||
currentModelRef?: string,
|
||||
): Promise<{ review: Omit<RobotReviewRecord, "iteration">; command: string }> {
|
||||
if (!currentModelRef) {
|
||||
throw new Error("Automatic robot review requires an active current session model.");
|
||||
}
|
||||
const prompt = buildRobotReviewPrompt(task);
|
||||
const args = ["--mode", "json", "-p", "--no-session", "--no-tools", "--no-extensions"];
|
||||
const reviewerModel = process.env.PI_LGTM_ROBOT_REVIEW_MODEL?.trim() || pickAlternateReviewerModel(currentProviderId);
|
||||
if (reviewerModel) args.push("--model", reviewerModel);
|
||||
const args = ["--mode", "json", "-p", "--no-session", "--no-tools", "--no-extensions", "--model", currentModelRef];
|
||||
args.push(prompt);
|
||||
const invocation = getPiInvocation(args);
|
||||
const timeoutMs = getRobotReviewTimeoutMs();
|
||||
const commandLabel = `${invocation.command} ${invocation.args.slice(0, reviewerModel ? 6 : 4).join(" ")}`;
|
||||
const commandLabel = `${invocation.command} ${invocation.args.slice(0, -1).join(" ")}`;
|
||||
const result = await runRobotReviewCommand(invocation, signal, timeoutMs);
|
||||
if (result.exitCode !== 0) {
|
||||
const error = new Error(`Robot reviewer failed (${result.exitCode ?? "?"}): ${(result.stderr || result.stdout).trim()}`) as Error & { rawOutput?: string };
|
||||
@@ -710,14 +704,11 @@ export default function (pi: ExtensionAPI) {
|
||||
let currentTurn = 0;
|
||||
let lastTaskToolUseTurn = 0;
|
||||
let reminderInjectedThisCycle = false;
|
||||
let currentProvider: string | undefined;
|
||||
|
||||
pi.on("turn_start", async (_event, ctx) => {
|
||||
currentTurn++;
|
||||
widget.setUICtx(ctx.ui as UICtx);
|
||||
upgradeStoreIfNeeded(ctx);
|
||||
const model = ctx.model;
|
||||
if (model) currentProvider = (model as any).providerId ?? (model as any).provider;
|
||||
if (autoClear.onTurnStart(currentTurn)) widget.update();
|
||||
});
|
||||
|
||||
@@ -1035,7 +1026,7 @@ Do NOT summarize or interpret. Paste literal command output, exact log lines, ma
|
||||
supersede_reason: Type.Optional(Type.String({ description: "Why this evidence replaces an older submission on the same task." })),
|
||||
}),
|
||||
|
||||
async execute(_toolCallId, params, signal, _onUpdate, _ctx) {
|
||||
async execute(_toolCallId, params, signal, _onUpdate, ctx) {
|
||||
const task = store.get(params.taskId);
|
||||
if (!task) return Promise.resolve(textResult(`Task #${params.taskId} not found`));
|
||||
if (task.status === "completed") return Promise.resolve(textResult(`Task #${params.taskId} already completed`));
|
||||
@@ -1065,7 +1056,7 @@ Do NOT summarize or interpret. Paste literal command output, exact log lines, ma
|
||||
const refreshedTask = store.get(params.taskId);
|
||||
if (!refreshedTask) return textResult(`Task #${params.taskId} not found after evidence update`);
|
||||
try {
|
||||
const { review, command } = await runAutomaticRobotReview(refreshedTask, signal, currentProvider);
|
||||
const { review, command } = await runAutomaticRobotReview(refreshedTask, signal, getCurrentModelRef(ctx.model));
|
||||
store.update(params.taskId, {
|
||||
pending_approval: shouldOpenHumanSignoffGate(refreshedTask, review.accepted),
|
||||
metadata: {
|
||||
@@ -1091,13 +1082,13 @@ Do NOT summarize or interpret. Paste literal command output, exact log lines, ma
|
||||
}
|
||||
} catch (err: any) {
|
||||
store.update(params.taskId, {
|
||||
pending_approval: false,
|
||||
pending_approval: refreshedTask.pending_approval,
|
||||
metadata: getAutomaticReviewFailureMetadata(err.message, err.rawOutput),
|
||||
});
|
||||
robotReviewNote =
|
||||
`\n\n### Automatic robot review\n` +
|
||||
`Reviewer failed: ${err.message}\n` +
|
||||
`Human sign-off is blocked until the reviewer stage succeeds.` +
|
||||
`Human sign-off is still allowed because reviewer failures are warnings, not evidence rejections.` +
|
||||
(typeof err.rawOutput === "string" && err.rawOutput.trim()
|
||||
? `\n\n${formatReviewTextBlock("Reviewer raw output", err.rawOutput.trim())}`
|
||||
: "");
|
||||
@@ -1233,7 +1224,7 @@ This does not complete the task. Human /lgtm remains the only completion path.`,
|
||||
pi.registerTool({
|
||||
name: "robot_review_run",
|
||||
label: "robot_review_run",
|
||||
description: `Run the configured automatic robot reviewer against the current task evidence.
|
||||
description: `Run the automatic robot reviewer against the current task evidence using the current session model.
|
||||
|
||||
Runs the same Pi-native reviewer stage used automatically by \`lgtm_ask\`.
|
||||
|
||||
@@ -1250,7 +1241,7 @@ This appends a new robot-review iteration. If the reviewer marks evidence incomp
|
||||
}
|
||||
|
||||
try {
|
||||
const { review, command } = await runAutomaticRobotReview(task, signal, currentProvider);
|
||||
const { review, command } = await runAutomaticRobotReview(task, signal, getCurrentModelRef(_ctx.model));
|
||||
store.update(params.taskId, {
|
||||
pending_approval: shouldOpenHumanSignoffGate(task, review.accepted),
|
||||
metadata: {
|
||||
@@ -1277,13 +1268,14 @@ This appends a new robot-review iteration. If the reviewer marks evidence incomp
|
||||
);
|
||||
} catch (err: any) {
|
||||
store.update(params.taskId, {
|
||||
pending_approval: false,
|
||||
pending_approval: task.pending_approval,
|
||||
metadata: getAutomaticReviewFailureMetadata(err.message, err.rawOutput),
|
||||
});
|
||||
widget.update();
|
||||
return textResult(
|
||||
`## Automatic robot review for task #${task.id}: ${task.subject}\n` +
|
||||
`Reviewer failed: ${err.message}\n\n` +
|
||||
`Automatic review failures are warnings, not evidence rejections, so human sign-off is still allowed.\n\n` +
|
||||
`Gate status: ${getGateStatus(store.get(params.taskId) ?? task)}` +
|
||||
(typeof err.rawOutput === "string" && err.rawOutput.trim()
|
||||
? `\n\n${formatReviewTextBlock("Reviewer raw output", err.rawOutput.trim())}`
|
||||
@@ -1463,12 +1455,17 @@ This appends a new robot-review iteration. If the reviewer marks evidence incomp
|
||||
if (noEvidence) {
|
||||
title = `⚠ Task #${taskId} has no agent-submitted evidence.\nSign off anyway?\nDone: ${task.done_criterion}`;
|
||||
signLabel = "✓ Override — sign off without evidence";
|
||||
} else if (reviewerFailed) {
|
||||
title = `⚠ Task #${taskId} automatic robot review failed.\nSign off anyway?\nDone: ${task.done_criterion}`;
|
||||
signLabel = "✓ Override — sign off despite reviewer failure";
|
||||
} else if (robotRejected) {
|
||||
title = `⚠ Task #${taskId} robot review rejected the evidence.\nSign off anyway?\nDone: ${task.done_criterion}`;
|
||||
signLabel = "✓ Override — sign off despite rejected review";
|
||||
} else if (reviewerFailed) {
|
||||
if (task.pending_approval) {
|
||||
title = `⚠ Task #${taskId} automatic robot review failed, but human sign-off is still allowed.\nContinue?\nDone: ${task.done_criterion}`;
|
||||
signLabel = "✓ LGTM — sign off despite reviewer warning";
|
||||
} else {
|
||||
title = `⚠ Task #${taskId} automatic robot review failed.\nSign off anyway?\nDone: ${task.done_criterion}`;
|
||||
signLabel = "✓ Override — sign off despite reviewer failure";
|
||||
}
|
||||
}
|
||||
const confirm = await ctx.ui.select(title, [signLabel, "✗ Cancel"]);
|
||||
if (confirm !== signLabel) return;
|
||||
@@ -1505,7 +1502,7 @@ This appends a new robot-review iteration. If the reviewer marks evidence incomp
|
||||
};
|
||||
for (const t of open) groups[getDisplayStatus(t)].push(t);
|
||||
const groupLabel: Record<DisplayStatus, string> = {
|
||||
awaiting_signoff: "READY (robot ✓)",
|
||||
awaiting_signoff: "READY (human sign-off open)",
|
||||
in_progress: "ACTIVE (no /lgtm evidence)",
|
||||
pending: "PENDING (not started)",
|
||||
completed: "DONE",
|
||||
@@ -1516,7 +1513,11 @@ This appends a new robot-review iteration. If the reviewer marks evidence incomp
|
||||
if (inBucket.length === 0) continue;
|
||||
lines.push(` ${groupLabel[status]}:`);
|
||||
for (const t of inBucket) {
|
||||
const warn = status === "awaiting_signoff" && !latestRobotReviewPasses(t) ? " ⚠ robot rejected" : "";
|
||||
const warn = status === "awaiting_signoff"
|
||||
? (typeof t.metadata?.robot_review_last_error === "string"
|
||||
? " ⚠ reviewer warning"
|
||||
: (!latestRobotReviewPasses(t) ? " ⚠ robot rejected" : ""))
|
||||
: "";
|
||||
lines.push(` #${t.id} ${t.subject}${warn}`);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -57,10 +57,10 @@ export function getCompletionMode(task: Task): CompletionMode {
|
||||
|
||||
export function getReviewState(task: Task): ReviewState {
|
||||
if (task.status === "completed") return "human_signed_off";
|
||||
if (task.pending_approval && hasCurrentEvidence(task)) return "ready_for_human";
|
||||
if (typeof task.metadata?.robot_review_last_error === "string") return "reviewer_failed_to_run";
|
||||
const latest = getLatestRobotReview(task);
|
||||
if (latest && !latest.accepted) return "reviewer_rejected";
|
||||
if (task.pending_approval && hasCurrentEvidence(task)) return "ready_for_human";
|
||||
if (typeof task.metadata?.robot_review_last_error === "string") return "reviewer_failed_to_run";
|
||||
if (hasCurrentEvidence(task)) return "evidence_submitted";
|
||||
if (hasEvidenceHistory(task)) return "superseded";
|
||||
return "no_evidence";
|
||||
@@ -70,7 +70,12 @@ export function getGateStatus(task: Task): string {
|
||||
const state = getReviewState(task);
|
||||
if (state === "human_signed_off") return "human signed off";
|
||||
if (state === "no_evidence") return "no lgtm evidence submitted";
|
||||
if (state === "ready_for_human") return `ready for human sign-off via /lgtm ${task.id}`;
|
||||
if (state === "ready_for_human") {
|
||||
if (typeof task.metadata?.robot_review_last_error === "string") {
|
||||
return `warning: automatic robot review failed, human sign-off still allowed via /lgtm ${task.id}: ${task.metadata.robot_review_last_error}`;
|
||||
}
|
||||
return `ready for human sign-off via /lgtm ${task.id}`;
|
||||
}
|
||||
if (state === "reviewer_failed_to_run") {
|
||||
return `blocked: automatic robot review failed: ${task.metadata.robot_review_last_error}`;
|
||||
}
|
||||
|
||||
@@ -82,13 +82,23 @@ describe("getGateStatus", () => {
|
||||
}))).toBe("ready for human sign-off via /lgtm 1");
|
||||
});
|
||||
|
||||
it("reports reviewer failure separately from rejected evidence", () => {
|
||||
it("reports blocking reviewer failure when human sign-off is closed", () => {
|
||||
expect(getGateStatus(makeTask({
|
||||
metadata: {
|
||||
lgtm_evidence: "ok",
|
||||
robot_review_last_error: "Unexpected token 'a'",
|
||||
},
|
||||
}))).toContain("automatic robot review failed");
|
||||
}))).toContain("blocked: automatic robot review failed");
|
||||
});
|
||||
|
||||
it("reports reviewer failure as a warning when human sign-off stays open", () => {
|
||||
expect(getGateStatus(makeTask({
|
||||
pending_approval: true,
|
||||
metadata: {
|
||||
lgtm_evidence: "ok",
|
||||
robot_review_last_error: "Unexpected token 'a'",
|
||||
},
|
||||
}))).toContain("warning: automatic robot review failed");
|
||||
});
|
||||
|
||||
it("reports rejected robot review when latest review does not accept", () => {
|
||||
@@ -111,6 +121,29 @@ describe("getGateStatus", () => {
|
||||
},
|
||||
}))).toBe("blocked: latest robot review rejected the evidence");
|
||||
});
|
||||
|
||||
it("keeps rejection higher priority than a later reviewer warning", () => {
|
||||
expect(getGateStatus(makeTask({
|
||||
pending_approval: true,
|
||||
metadata: {
|
||||
lgtm_evidence: "ok",
|
||||
robot_review_last_error: "timeout",
|
||||
robot_reviews: [{
|
||||
iteration: 1,
|
||||
reviewer: "opencode",
|
||||
scope: "task evidence",
|
||||
observations: ["Observed missing output"],
|
||||
blind_spots: "none",
|
||||
accepted: false,
|
||||
evidence_complete: false,
|
||||
evidence_convincing: false,
|
||||
missing_evidence: ["literal output"],
|
||||
submitted_at: "2026-04-17T00:00:00.000Z",
|
||||
mode: "manual",
|
||||
}],
|
||||
},
|
||||
}))).toBe("blocked: latest robot review rejected the evidence");
|
||||
});
|
||||
});
|
||||
|
||||
describe("getDisplayStatus", () => {
|
||||
|
||||
@@ -3,6 +3,7 @@ import {
|
||||
DEFAULT_ROBOT_REVIEW_TIMEOUT_MS,
|
||||
extractFinalAssistantTextFromPiJsonl,
|
||||
extractRobotReviewJson,
|
||||
getCurrentModelRef,
|
||||
getPiInvocation,
|
||||
getRobotReviewTimeoutMs,
|
||||
runRobotReviewCommand,
|
||||
@@ -49,6 +50,12 @@ describe("robot review runner helpers", () => {
|
||||
expect(getRobotReviewTimeoutMs({ PI_LGTM_ROBOT_REVIEW_TIMEOUT_MS: "bad" } as NodeJS.ProcessEnv)).toBe(DEFAULT_ROBOT_REVIEW_TIMEOUT_MS);
|
||||
});
|
||||
|
||||
it("formats the current model as the reviewer model ref", () => {
|
||||
expect(getCurrentModelRef({ provider: "openai", id: "gpt-5" })).toBe("openai/gpt-5");
|
||||
expect(getCurrentModelRef({ providerId: "anthropic", modelId: "claude-haiku" })).toBe("anthropic/claude-haiku");
|
||||
expect(getCurrentModelRef({ provider: "openai" })).toBeUndefined();
|
||||
});
|
||||
|
||||
it("times out bounded child commands", async () => {
|
||||
await expect(runRobotReviewCommand({
|
||||
command: process.execPath,
|
||||
|
||||
Reference in New Issue
Block a user