mirror of
https://github.com/wassname/pi-lgtm.git
synced 2026-06-27 16:46:17 +08:00
Simplify automatic robot review
This commit is contained in:
@@ -92,7 +92,7 @@ After calling this, the task shows `👀` and is only completable via `/lgtm <id
|
|||||||
|
|
||||||
The tool result includes a non-blocking self-check prompt asking whether the evidence directly addresses the `done_criterion` and whether a skeptical reviewer would find it convincing.
|
The tool result includes a non-blocking self-check prompt asking whether the evidence directly addresses the `done_criterion` and whether a skeptical reviewer would find it convincing.
|
||||||
|
|
||||||
`lgtm_ask` always runs the robot-review stage immediately after storing evidence. A failing or errored robot review clears `pending_approval` until the evidence is strengthened and reviewed again.
|
`lgtm_ask` always runs the robot-review stage immediately after storing evidence. A robot review that rejects the evidence clears `pending_approval` until the evidence is strengthened and reviewed again. A reviewer crash, auth failure, timeout, or malformed output is recorded as a warning and leaves human sign-off open.
|
||||||
|
|
||||||
### `lgtm_supersede`
|
### `lgtm_supersede`
|
||||||
|
|
||||||
@@ -122,21 +122,15 @@ Use this from a separate subagent or other model when possible. Reviews append a
|
|||||||
|
|
||||||
### `robot_review_run`
|
### `robot_review_run`
|
||||||
|
|
||||||
Run the configured automatic robot reviewer against the current task evidence.
|
Run the automatic robot reviewer against the current task evidence using the current session model.
|
||||||
|
|
||||||
Default reviewer stage:
|
Default reviewer stage:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
pi --mode json -p --no-session
|
pi --mode json -p --no-session --no-tools --no-extensions --model <current-session-model>
|
||||||
```
|
```
|
||||||
|
|
||||||
Override with:
|
This appends a new robot-review iteration. The reviewer returns an explicit `accepted` boolean as well as detailed observations, blind spots, and missing evidence. If the latest robot review rejects the evidence, `/lgtm` is blocked until stronger evidence is submitted and reviewed again. If the reviewer process fails to run or returns malformed output, the failure is recorded but human sign-off stays open.
|
||||||
|
|
||||||
```bash
|
|
||||||
PI_LGTM_ROBOT_REVIEW_MODEL='openai/gpt-5'
|
|
||||||
```
|
|
||||||
|
|
||||||
This appends a new robot-review iteration. The reviewer returns an explicit `accepted` boolean as well as detailed observations, blind spots, and missing evidence. If the latest robot review rejects the evidence, `/lgtm` is blocked until stronger evidence is submitted and reviewed again.
|
|
||||||
|
|
||||||
## Commands
|
## Commands
|
||||||
|
|
||||||
@@ -154,8 +148,8 @@ Interactive menu: view tasks, create task, clear completed/all.
|
|||||||
pending -> in_progress -> (lgtm_ask)
|
pending -> in_progress -> (lgtm_ask)
|
||||||
-> current evidence iteration N
|
-> current evidence iteration N
|
||||||
-> robot review iteration(s) on current evidence 🤖
|
-> robot review iteration(s) on current evidence 🤖
|
||||||
-> pending_approval 👀 if latest robot review passes
|
-> pending_approval 👀 if latest robot review passes, or reviewer infra fails
|
||||||
-> reviewer_failed_to_run | reviewer_rejected
|
-> reviewer_rejected
|
||||||
-> lgtm_supersede or newer lgtm_ask -> superseded history + fresh current evidence
|
-> lgtm_supersede or newer lgtm_ask -> superseded history + fresh current evidence
|
||||||
-> (/lgtm) -> completed
|
-> (/lgtm) -> completed
|
||||||
-> deleted
|
-> deleted
|
||||||
|
|||||||
+42
-41
@@ -77,27 +77,20 @@ export function getRobotReviewTimeoutMs(env: NodeJS.ProcessEnv = process.env): n
|
|||||||
return Number.isFinite(configured) && configured > 0 ? configured : DEFAULT_ROBOT_REVIEW_TIMEOUT_MS;
|
return Number.isFinite(configured) && configured > 0 ? configured : DEFAULT_ROBOT_REVIEW_TIMEOUT_MS;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/** Format pi's current model object as the CLI's provider/model reference. */
|
||||||
* Pick a reviewer model from a different provider than the current one.
|
export function getCurrentModelRef(model: unknown): string | undefined {
|
||||||
* Prefers cheap/fast models suitable for review tasks.
|
if (!model || typeof model !== "object") return undefined;
|
||||||
* Returns undefined if no alternate provider is available (falls back to same model).
|
const provider = typeof (model as any).provider === "string"
|
||||||
*/
|
? (model as any).provider
|
||||||
export function pickAlternateReviewerModel(currentProviderId?: string): string | undefined {
|
: typeof (model as any).providerId === "string"
|
||||||
// Ordered by: cheap, fast, good enough for structured review
|
? (model as any).providerId
|
||||||
const providerModels: Record<string, string> = {
|
: undefined;
|
||||||
anthropic: "anthropic/claude-haiku-4-5",
|
const id = typeof (model as any).id === "string"
|
||||||
"github-copilot": "github-copilot/gemini-3-flash-preview",
|
? (model as any).id
|
||||||
openrouter: "openrouter/google/gemini-2.5-flash",
|
: typeof (model as any).modelId === "string"
|
||||||
};
|
? (model as any).modelId
|
||||||
const providers = Object.keys(providerModels);
|
: undefined;
|
||||||
const current = currentProviderId ?? "";
|
return provider && id ? `${provider}/${id}` : undefined;
|
||||||
|
|
||||||
// Try a different provider first
|
|
||||||
for (const p of providers) {
|
|
||||||
if (p !== current) return providerModels[p];
|
|
||||||
}
|
|
||||||
// All same? Just return the first non-current
|
|
||||||
return providers.length > 0 ? providerModels[providers[0]] : undefined;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function getAssistantTextFromPiEvent(event: any): string | undefined {
|
function getAssistantTextFromPiEvent(event: any): string | undefined {
|
||||||
@@ -583,16 +576,17 @@ function buildRobotReviewPrompt(task: any): string {
|
|||||||
async function runAutomaticRobotReview(
|
async function runAutomaticRobotReview(
|
||||||
task: any,
|
task: any,
|
||||||
signal?: AbortSignal,
|
signal?: AbortSignal,
|
||||||
currentProviderId?: string,
|
currentModelRef?: string,
|
||||||
): Promise<{ review: Omit<RobotReviewRecord, "iteration">; command: string }> {
|
): Promise<{ review: Omit<RobotReviewRecord, "iteration">; command: string }> {
|
||||||
|
if (!currentModelRef) {
|
||||||
|
throw new Error("Automatic robot review requires an active current session model.");
|
||||||
|
}
|
||||||
const prompt = buildRobotReviewPrompt(task);
|
const prompt = buildRobotReviewPrompt(task);
|
||||||
const args = ["--mode", "json", "-p", "--no-session", "--no-tools", "--no-extensions"];
|
const args = ["--mode", "json", "-p", "--no-session", "--no-tools", "--no-extensions", "--model", currentModelRef];
|
||||||
const reviewerModel = process.env.PI_LGTM_ROBOT_REVIEW_MODEL?.trim() || pickAlternateReviewerModel(currentProviderId);
|
|
||||||
if (reviewerModel) args.push("--model", reviewerModel);
|
|
||||||
args.push(prompt);
|
args.push(prompt);
|
||||||
const invocation = getPiInvocation(args);
|
const invocation = getPiInvocation(args);
|
||||||
const timeoutMs = getRobotReviewTimeoutMs();
|
const timeoutMs = getRobotReviewTimeoutMs();
|
||||||
const commandLabel = `${invocation.command} ${invocation.args.slice(0, reviewerModel ? 6 : 4).join(" ")}`;
|
const commandLabel = `${invocation.command} ${invocation.args.slice(0, -1).join(" ")}`;
|
||||||
const result = await runRobotReviewCommand(invocation, signal, timeoutMs);
|
const result = await runRobotReviewCommand(invocation, signal, timeoutMs);
|
||||||
if (result.exitCode !== 0) {
|
if (result.exitCode !== 0) {
|
||||||
const error = new Error(`Robot reviewer failed (${result.exitCode ?? "?"}): ${(result.stderr || result.stdout).trim()}`) as Error & { rawOutput?: string };
|
const error = new Error(`Robot reviewer failed (${result.exitCode ?? "?"}): ${(result.stderr || result.stdout).trim()}`) as Error & { rawOutput?: string };
|
||||||
@@ -710,14 +704,11 @@ export default function (pi: ExtensionAPI) {
|
|||||||
let currentTurn = 0;
|
let currentTurn = 0;
|
||||||
let lastTaskToolUseTurn = 0;
|
let lastTaskToolUseTurn = 0;
|
||||||
let reminderInjectedThisCycle = false;
|
let reminderInjectedThisCycle = false;
|
||||||
let currentProvider: string | undefined;
|
|
||||||
|
|
||||||
pi.on("turn_start", async (_event, ctx) => {
|
pi.on("turn_start", async (_event, ctx) => {
|
||||||
currentTurn++;
|
currentTurn++;
|
||||||
widget.setUICtx(ctx.ui as UICtx);
|
widget.setUICtx(ctx.ui as UICtx);
|
||||||
upgradeStoreIfNeeded(ctx);
|
upgradeStoreIfNeeded(ctx);
|
||||||
const model = ctx.model;
|
|
||||||
if (model) currentProvider = (model as any).providerId ?? (model as any).provider;
|
|
||||||
if (autoClear.onTurnStart(currentTurn)) widget.update();
|
if (autoClear.onTurnStart(currentTurn)) widget.update();
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -1035,7 +1026,7 @@ Do NOT summarize or interpret. Paste literal command output, exact log lines, ma
|
|||||||
supersede_reason: Type.Optional(Type.String({ description: "Why this evidence replaces an older submission on the same task." })),
|
supersede_reason: Type.Optional(Type.String({ description: "Why this evidence replaces an older submission on the same task." })),
|
||||||
}),
|
}),
|
||||||
|
|
||||||
async execute(_toolCallId, params, signal, _onUpdate, _ctx) {
|
async execute(_toolCallId, params, signal, _onUpdate, ctx) {
|
||||||
const task = store.get(params.taskId);
|
const task = store.get(params.taskId);
|
||||||
if (!task) return Promise.resolve(textResult(`Task #${params.taskId} not found`));
|
if (!task) return Promise.resolve(textResult(`Task #${params.taskId} not found`));
|
||||||
if (task.status === "completed") return Promise.resolve(textResult(`Task #${params.taskId} already completed`));
|
if (task.status === "completed") return Promise.resolve(textResult(`Task #${params.taskId} already completed`));
|
||||||
@@ -1065,7 +1056,7 @@ Do NOT summarize or interpret. Paste literal command output, exact log lines, ma
|
|||||||
const refreshedTask = store.get(params.taskId);
|
const refreshedTask = store.get(params.taskId);
|
||||||
if (!refreshedTask) return textResult(`Task #${params.taskId} not found after evidence update`);
|
if (!refreshedTask) return textResult(`Task #${params.taskId} not found after evidence update`);
|
||||||
try {
|
try {
|
||||||
const { review, command } = await runAutomaticRobotReview(refreshedTask, signal, currentProvider);
|
const { review, command } = await runAutomaticRobotReview(refreshedTask, signal, getCurrentModelRef(ctx.model));
|
||||||
store.update(params.taskId, {
|
store.update(params.taskId, {
|
||||||
pending_approval: shouldOpenHumanSignoffGate(refreshedTask, review.accepted),
|
pending_approval: shouldOpenHumanSignoffGate(refreshedTask, review.accepted),
|
||||||
metadata: {
|
metadata: {
|
||||||
@@ -1091,13 +1082,13 @@ Do NOT summarize or interpret. Paste literal command output, exact log lines, ma
|
|||||||
}
|
}
|
||||||
} catch (err: any) {
|
} catch (err: any) {
|
||||||
store.update(params.taskId, {
|
store.update(params.taskId, {
|
||||||
pending_approval: false,
|
pending_approval: refreshedTask.pending_approval,
|
||||||
metadata: getAutomaticReviewFailureMetadata(err.message, err.rawOutput),
|
metadata: getAutomaticReviewFailureMetadata(err.message, err.rawOutput),
|
||||||
});
|
});
|
||||||
robotReviewNote =
|
robotReviewNote =
|
||||||
`\n\n### Automatic robot review\n` +
|
`\n\n### Automatic robot review\n` +
|
||||||
`Reviewer failed: ${err.message}\n` +
|
`Reviewer failed: ${err.message}\n` +
|
||||||
`Human sign-off is blocked until the reviewer stage succeeds.` +
|
`Human sign-off is still allowed because reviewer failures are warnings, not evidence rejections.` +
|
||||||
(typeof err.rawOutput === "string" && err.rawOutput.trim()
|
(typeof err.rawOutput === "string" && err.rawOutput.trim()
|
||||||
? `\n\n${formatReviewTextBlock("Reviewer raw output", err.rawOutput.trim())}`
|
? `\n\n${formatReviewTextBlock("Reviewer raw output", err.rawOutput.trim())}`
|
||||||
: "");
|
: "");
|
||||||
@@ -1233,7 +1224,7 @@ This does not complete the task. Human /lgtm remains the only completion path.`,
|
|||||||
pi.registerTool({
|
pi.registerTool({
|
||||||
name: "robot_review_run",
|
name: "robot_review_run",
|
||||||
label: "robot_review_run",
|
label: "robot_review_run",
|
||||||
description: `Run the configured automatic robot reviewer against the current task evidence.
|
description: `Run the automatic robot reviewer against the current task evidence using the current session model.
|
||||||
|
|
||||||
Runs the same Pi-native reviewer stage used automatically by \`lgtm_ask\`.
|
Runs the same Pi-native reviewer stage used automatically by \`lgtm_ask\`.
|
||||||
|
|
||||||
@@ -1250,7 +1241,7 @@ This appends a new robot-review iteration. If the reviewer marks evidence incomp
|
|||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const { review, command } = await runAutomaticRobotReview(task, signal, currentProvider);
|
const { review, command } = await runAutomaticRobotReview(task, signal, getCurrentModelRef(_ctx.model));
|
||||||
store.update(params.taskId, {
|
store.update(params.taskId, {
|
||||||
pending_approval: shouldOpenHumanSignoffGate(task, review.accepted),
|
pending_approval: shouldOpenHumanSignoffGate(task, review.accepted),
|
||||||
metadata: {
|
metadata: {
|
||||||
@@ -1277,13 +1268,14 @@ This appends a new robot-review iteration. If the reviewer marks evidence incomp
|
|||||||
);
|
);
|
||||||
} catch (err: any) {
|
} catch (err: any) {
|
||||||
store.update(params.taskId, {
|
store.update(params.taskId, {
|
||||||
pending_approval: false,
|
pending_approval: task.pending_approval,
|
||||||
metadata: getAutomaticReviewFailureMetadata(err.message, err.rawOutput),
|
metadata: getAutomaticReviewFailureMetadata(err.message, err.rawOutput),
|
||||||
});
|
});
|
||||||
widget.update();
|
widget.update();
|
||||||
return textResult(
|
return textResult(
|
||||||
`## Automatic robot review for task #${task.id}: ${task.subject}\n` +
|
`## Automatic robot review for task #${task.id}: ${task.subject}\n` +
|
||||||
`Reviewer failed: ${err.message}\n\n` +
|
`Reviewer failed: ${err.message}\n\n` +
|
||||||
|
`Automatic review failures are warnings, not evidence rejections, so human sign-off is still allowed.\n\n` +
|
||||||
`Gate status: ${getGateStatus(store.get(params.taskId) ?? task)}` +
|
`Gate status: ${getGateStatus(store.get(params.taskId) ?? task)}` +
|
||||||
(typeof err.rawOutput === "string" && err.rawOutput.trim()
|
(typeof err.rawOutput === "string" && err.rawOutput.trim()
|
||||||
? `\n\n${formatReviewTextBlock("Reviewer raw output", err.rawOutput.trim())}`
|
? `\n\n${formatReviewTextBlock("Reviewer raw output", err.rawOutput.trim())}`
|
||||||
@@ -1463,12 +1455,17 @@ This appends a new robot-review iteration. If the reviewer marks evidence incomp
|
|||||||
if (noEvidence) {
|
if (noEvidence) {
|
||||||
title = `⚠ Task #${taskId} has no agent-submitted evidence.\nSign off anyway?\nDone: ${task.done_criterion}`;
|
title = `⚠ Task #${taskId} has no agent-submitted evidence.\nSign off anyway?\nDone: ${task.done_criterion}`;
|
||||||
signLabel = "✓ Override — sign off without evidence";
|
signLabel = "✓ Override — sign off without evidence";
|
||||||
} else if (reviewerFailed) {
|
|
||||||
title = `⚠ Task #${taskId} automatic robot review failed.\nSign off anyway?\nDone: ${task.done_criterion}`;
|
|
||||||
signLabel = "✓ Override — sign off despite reviewer failure";
|
|
||||||
} else if (robotRejected) {
|
} else if (robotRejected) {
|
||||||
title = `⚠ Task #${taskId} robot review rejected the evidence.\nSign off anyway?\nDone: ${task.done_criterion}`;
|
title = `⚠ Task #${taskId} robot review rejected the evidence.\nSign off anyway?\nDone: ${task.done_criterion}`;
|
||||||
signLabel = "✓ Override — sign off despite rejected review";
|
signLabel = "✓ Override — sign off despite rejected review";
|
||||||
|
} else if (reviewerFailed) {
|
||||||
|
if (task.pending_approval) {
|
||||||
|
title = `⚠ Task #${taskId} automatic robot review failed, but human sign-off is still allowed.\nContinue?\nDone: ${task.done_criterion}`;
|
||||||
|
signLabel = "✓ LGTM — sign off despite reviewer warning";
|
||||||
|
} else {
|
||||||
|
title = `⚠ Task #${taskId} automatic robot review failed.\nSign off anyway?\nDone: ${task.done_criterion}`;
|
||||||
|
signLabel = "✓ Override — sign off despite reviewer failure";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
const confirm = await ctx.ui.select(title, [signLabel, "✗ Cancel"]);
|
const confirm = await ctx.ui.select(title, [signLabel, "✗ Cancel"]);
|
||||||
if (confirm !== signLabel) return;
|
if (confirm !== signLabel) return;
|
||||||
@@ -1505,7 +1502,7 @@ This appends a new robot-review iteration. If the reviewer marks evidence incomp
|
|||||||
};
|
};
|
||||||
for (const t of open) groups[getDisplayStatus(t)].push(t);
|
for (const t of open) groups[getDisplayStatus(t)].push(t);
|
||||||
const groupLabel: Record<DisplayStatus, string> = {
|
const groupLabel: Record<DisplayStatus, string> = {
|
||||||
awaiting_signoff: "READY (robot ✓)",
|
awaiting_signoff: "READY (human sign-off open)",
|
||||||
in_progress: "ACTIVE (no /lgtm evidence)",
|
in_progress: "ACTIVE (no /lgtm evidence)",
|
||||||
pending: "PENDING (not started)",
|
pending: "PENDING (not started)",
|
||||||
completed: "DONE",
|
completed: "DONE",
|
||||||
@@ -1516,7 +1513,11 @@ This appends a new robot-review iteration. If the reviewer marks evidence incomp
|
|||||||
if (inBucket.length === 0) continue;
|
if (inBucket.length === 0) continue;
|
||||||
lines.push(` ${groupLabel[status]}:`);
|
lines.push(` ${groupLabel[status]}:`);
|
||||||
for (const t of inBucket) {
|
for (const t of inBucket) {
|
||||||
const warn = status === "awaiting_signoff" && !latestRobotReviewPasses(t) ? " ⚠ robot rejected" : "";
|
const warn = status === "awaiting_signoff"
|
||||||
|
? (typeof t.metadata?.robot_review_last_error === "string"
|
||||||
|
? " ⚠ reviewer warning"
|
||||||
|
: (!latestRobotReviewPasses(t) ? " ⚠ robot rejected" : ""))
|
||||||
|
: "";
|
||||||
lines.push(` #${t.id} ${t.subject}${warn}`);
|
lines.push(` #${t.id} ${t.subject}${warn}`);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -57,10 +57,10 @@ export function getCompletionMode(task: Task): CompletionMode {
|
|||||||
|
|
||||||
export function getReviewState(task: Task): ReviewState {
|
export function getReviewState(task: Task): ReviewState {
|
||||||
if (task.status === "completed") return "human_signed_off";
|
if (task.status === "completed") return "human_signed_off";
|
||||||
if (task.pending_approval && hasCurrentEvidence(task)) return "ready_for_human";
|
|
||||||
if (typeof task.metadata?.robot_review_last_error === "string") return "reviewer_failed_to_run";
|
|
||||||
const latest = getLatestRobotReview(task);
|
const latest = getLatestRobotReview(task);
|
||||||
if (latest && !latest.accepted) return "reviewer_rejected";
|
if (latest && !latest.accepted) return "reviewer_rejected";
|
||||||
|
if (task.pending_approval && hasCurrentEvidence(task)) return "ready_for_human";
|
||||||
|
if (typeof task.metadata?.robot_review_last_error === "string") return "reviewer_failed_to_run";
|
||||||
if (hasCurrentEvidence(task)) return "evidence_submitted";
|
if (hasCurrentEvidence(task)) return "evidence_submitted";
|
||||||
if (hasEvidenceHistory(task)) return "superseded";
|
if (hasEvidenceHistory(task)) return "superseded";
|
||||||
return "no_evidence";
|
return "no_evidence";
|
||||||
@@ -70,7 +70,12 @@ export function getGateStatus(task: Task): string {
|
|||||||
const state = getReviewState(task);
|
const state = getReviewState(task);
|
||||||
if (state === "human_signed_off") return "human signed off";
|
if (state === "human_signed_off") return "human signed off";
|
||||||
if (state === "no_evidence") return "no lgtm evidence submitted";
|
if (state === "no_evidence") return "no lgtm evidence submitted";
|
||||||
if (state === "ready_for_human") return `ready for human sign-off via /lgtm ${task.id}`;
|
if (state === "ready_for_human") {
|
||||||
|
if (typeof task.metadata?.robot_review_last_error === "string") {
|
||||||
|
return `warning: automatic robot review failed, human sign-off still allowed via /lgtm ${task.id}: ${task.metadata.robot_review_last_error}`;
|
||||||
|
}
|
||||||
|
return `ready for human sign-off via /lgtm ${task.id}`;
|
||||||
|
}
|
||||||
if (state === "reviewer_failed_to_run") {
|
if (state === "reviewer_failed_to_run") {
|
||||||
return `blocked: automatic robot review failed: ${task.metadata.robot_review_last_error}`;
|
return `blocked: automatic robot review failed: ${task.metadata.robot_review_last_error}`;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -82,13 +82,23 @@ describe("getGateStatus", () => {
|
|||||||
}))).toBe("ready for human sign-off via /lgtm 1");
|
}))).toBe("ready for human sign-off via /lgtm 1");
|
||||||
});
|
});
|
||||||
|
|
||||||
it("reports reviewer failure separately from rejected evidence", () => {
|
it("reports blocking reviewer failure when human sign-off is closed", () => {
|
||||||
expect(getGateStatus(makeTask({
|
expect(getGateStatus(makeTask({
|
||||||
metadata: {
|
metadata: {
|
||||||
lgtm_evidence: "ok",
|
lgtm_evidence: "ok",
|
||||||
robot_review_last_error: "Unexpected token 'a'",
|
robot_review_last_error: "Unexpected token 'a'",
|
||||||
},
|
},
|
||||||
}))).toContain("automatic robot review failed");
|
}))).toContain("blocked: automatic robot review failed");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("reports reviewer failure as a warning when human sign-off stays open", () => {
|
||||||
|
expect(getGateStatus(makeTask({
|
||||||
|
pending_approval: true,
|
||||||
|
metadata: {
|
||||||
|
lgtm_evidence: "ok",
|
||||||
|
robot_review_last_error: "Unexpected token 'a'",
|
||||||
|
},
|
||||||
|
}))).toContain("warning: automatic robot review failed");
|
||||||
});
|
});
|
||||||
|
|
||||||
it("reports rejected robot review when latest review does not accept", () => {
|
it("reports rejected robot review when latest review does not accept", () => {
|
||||||
@@ -111,6 +121,29 @@ describe("getGateStatus", () => {
|
|||||||
},
|
},
|
||||||
}))).toBe("blocked: latest robot review rejected the evidence");
|
}))).toBe("blocked: latest robot review rejected the evidence");
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("keeps rejection higher priority than a later reviewer warning", () => {
|
||||||
|
expect(getGateStatus(makeTask({
|
||||||
|
pending_approval: true,
|
||||||
|
metadata: {
|
||||||
|
lgtm_evidence: "ok",
|
||||||
|
robot_review_last_error: "timeout",
|
||||||
|
robot_reviews: [{
|
||||||
|
iteration: 1,
|
||||||
|
reviewer: "opencode",
|
||||||
|
scope: "task evidence",
|
||||||
|
observations: ["Observed missing output"],
|
||||||
|
blind_spots: "none",
|
||||||
|
accepted: false,
|
||||||
|
evidence_complete: false,
|
||||||
|
evidence_convincing: false,
|
||||||
|
missing_evidence: ["literal output"],
|
||||||
|
submitted_at: "2026-04-17T00:00:00.000Z",
|
||||||
|
mode: "manual",
|
||||||
|
}],
|
||||||
|
},
|
||||||
|
}))).toBe("blocked: latest robot review rejected the evidence");
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
describe("getDisplayStatus", () => {
|
describe("getDisplayStatus", () => {
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ import {
|
|||||||
DEFAULT_ROBOT_REVIEW_TIMEOUT_MS,
|
DEFAULT_ROBOT_REVIEW_TIMEOUT_MS,
|
||||||
extractFinalAssistantTextFromPiJsonl,
|
extractFinalAssistantTextFromPiJsonl,
|
||||||
extractRobotReviewJson,
|
extractRobotReviewJson,
|
||||||
|
getCurrentModelRef,
|
||||||
getPiInvocation,
|
getPiInvocation,
|
||||||
getRobotReviewTimeoutMs,
|
getRobotReviewTimeoutMs,
|
||||||
runRobotReviewCommand,
|
runRobotReviewCommand,
|
||||||
@@ -49,6 +50,12 @@ describe("robot review runner helpers", () => {
|
|||||||
expect(getRobotReviewTimeoutMs({ PI_LGTM_ROBOT_REVIEW_TIMEOUT_MS: "bad" } as NodeJS.ProcessEnv)).toBe(DEFAULT_ROBOT_REVIEW_TIMEOUT_MS);
|
expect(getRobotReviewTimeoutMs({ PI_LGTM_ROBOT_REVIEW_TIMEOUT_MS: "bad" } as NodeJS.ProcessEnv)).toBe(DEFAULT_ROBOT_REVIEW_TIMEOUT_MS);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("formats the current model as the reviewer model ref", () => {
|
||||||
|
expect(getCurrentModelRef({ provider: "openai", id: "gpt-5" })).toBe("openai/gpt-5");
|
||||||
|
expect(getCurrentModelRef({ providerId: "anthropic", modelId: "claude-haiku" })).toBe("anthropic/claude-haiku");
|
||||||
|
expect(getCurrentModelRef({ provider: "openai" })).toBeUndefined();
|
||||||
|
});
|
||||||
|
|
||||||
it("times out bounded child commands", async () => {
|
it("times out bounded child commands", async () => {
|
||||||
await expect(runRobotReviewCommand({
|
await expect(runRobotReviewCommand({
|
||||||
command: process.execPath,
|
command: process.execPath,
|
||||||
|
|||||||
Reference in New Issue
Block a user