mirror of
https://github.com/wassname/pi-lgtm.git
synced 2026-06-27 17:01:35 +08:00
feat: run robot review via pi harness
This commit is contained in:
@@ -89,7 +89,7 @@ After calling this, the task shows `👀` and is only completable via `/lgtm <id
|
|||||||
|
|
||||||
The tool result includes a non-blocking self-check prompt asking whether the evidence directly addresses the `done_criterion` and whether a skeptical reviewer would find it convincing.
|
The tool result includes a non-blocking self-check prompt asking whether the evidence directly addresses the `done_criterion` and whether a skeptical reviewer would find it convincing.
|
||||||
|
|
||||||
`lgtm_ask` also accepts `run_robot_review` (optional). If true, or if `PI_LGTM_AUTO_ROBOT_REVIEW=1`, the extension runs the configured robot reviewer immediately after storing evidence. A failing robot review clears `pending_approval` until the evidence is strengthened.
|
`lgtm_ask` always runs the robot-review stage immediately after storing evidence. A failing or errored robot review clears `pending_approval` until the evidence is strengthened and reviewed again.
|
||||||
|
|
||||||
### `robot_review_ask`
|
### `robot_review_ask`
|
||||||
|
|
||||||
@@ -115,17 +115,16 @@ Use this from a separate subagent or other model when possible. Reviews append a
|
|||||||
|
|
||||||
Run the configured automatic robot reviewer against the current task evidence.
|
Run the configured automatic robot reviewer against the current task evidence.
|
||||||
|
|
||||||
Default reviewer command:
|
Default reviewer stage:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
acpx --approve-reads --non-interactive-permissions deny opencode exec
|
pi --mode json -p --no-session
|
||||||
```
|
```
|
||||||
|
|
||||||
Override with:
|
Override with:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
PI_LGTM_ROBOT_REVIEW_CMD='acpx --approve-reads --non-interactive-permissions deny codex exec'
|
PI_LGTM_ROBOT_REVIEW_MODEL='openai/gpt-5'
|
||||||
PI_LGTM_AUTO_ROBOT_REVIEW=1
|
|
||||||
```
|
```
|
||||||
|
|
||||||
This appends a new robot-review iteration. The reviewer returns an explicit `accepted` boolean as well as detailed observations, blind spots, and missing evidence. If the latest robot review rejects the evidence, `/lgtm` is blocked until stronger evidence is submitted and reviewed again.
|
This appends a new robot-review iteration. The reviewer returns an explicit `accepted` boolean as well as detailed observations, blind spots, and missing evidence. If the latest robot review rejects the evidence, `/lgtm` is blocked until stronger evidence is submitted and reviewed again.
|
||||||
|
|||||||
+99
-65
@@ -41,33 +41,12 @@ const AUTO_CLEAR_DELAY = 4;
|
|||||||
|
|
||||||
type CommandResult = { stdout: string; stderr: string; exitCode: number | null };
|
type CommandResult = { stdout: string; stderr: string; exitCode: number | null };
|
||||||
|
|
||||||
function shellQuote(text: string): string {
|
function getPiInvocation(args: string[]): { command: string; args: string[] } {
|
||||||
return JSON.stringify(text);
|
const currentScript = process.argv[1];
|
||||||
}
|
if (currentScript) {
|
||||||
|
return { command: process.execPath, args: [currentScript, ...args] };
|
||||||
function runShellCommand(command: string, signal?: AbortSignal): Promise<CommandResult> {
|
}
|
||||||
return new Promise((resolve, reject) => {
|
return { command: "pi", args };
|
||||||
const child = spawn("bash", ["-lc", command], { stdio: ["ignore", "pipe", "pipe"] });
|
|
||||||
const stdoutChunks: Buffer[] = [];
|
|
||||||
const stderrChunks: Buffer[] = [];
|
|
||||||
child.stdout.on("data", (data) => stdoutChunks.push(data));
|
|
||||||
child.stderr.on("data", (data) => stderrChunks.push(data));
|
|
||||||
child.on("error", reject);
|
|
||||||
const onAbort = () => child.kill();
|
|
||||||
signal?.addEventListener("abort", onAbort, { once: true });
|
|
||||||
child.on("close", (exitCode) => {
|
|
||||||
signal?.removeEventListener("abort", onAbort);
|
|
||||||
if (signal?.aborted) {
|
|
||||||
reject(new Error("aborted"));
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
resolve({
|
|
||||||
stdout: Buffer.concat(stdoutChunks).toString("utf-8"),
|
|
||||||
stderr: Buffer.concat(stderrChunks).toString("utf-8"),
|
|
||||||
exitCode,
|
|
||||||
});
|
|
||||||
});
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function extractRobotReviewJson(output: string): Record<string, unknown> {
|
function extractRobotReviewJson(output: string): Record<string, unknown> {
|
||||||
@@ -128,11 +107,69 @@ async function runAutomaticRobotReview(
|
|||||||
task: any,
|
task: any,
|
||||||
signal?: AbortSignal,
|
signal?: AbortSignal,
|
||||||
): Promise<{ review: Omit<RobotReviewRecord, "iteration">; command: string }> {
|
): Promise<{ review: Omit<RobotReviewRecord, "iteration">; command: string }> {
|
||||||
const reviewerCommand = process.env.PI_LGTM_ROBOT_REVIEW_CMD?.trim()
|
|
||||||
|| "acpx --approve-reads --non-interactive-permissions deny opencode exec";
|
|
||||||
const prompt = buildRobotReviewPrompt(task);
|
const prompt = buildRobotReviewPrompt(task);
|
||||||
const command = `${reviewerCommand} ${shellQuote(prompt)}`;
|
const args = ["--mode", "json", "-p", "--no-session"];
|
||||||
const result = await runShellCommand(command, signal);
|
const reviewerModel = process.env.PI_LGTM_ROBOT_REVIEW_MODEL?.trim();
|
||||||
|
if (reviewerModel) args.push("--model", reviewerModel);
|
||||||
|
args.push(prompt);
|
||||||
|
const invocation = getPiInvocation(args);
|
||||||
|
const commandLabel = `${invocation.command} ${args.slice(0, reviewerModel ? 6 : 4).join(" ")}`;
|
||||||
|
const result = await new Promise<CommandResult>((resolve, reject) => {
|
||||||
|
const child = spawn(invocation.command, invocation.args, { shell: false, stdio: ["ignore", "pipe", "pipe"] });
|
||||||
|
const stdoutChunks: Buffer[] = [];
|
||||||
|
const stderrChunks: Buffer[] = [];
|
||||||
|
let buffer = "";
|
||||||
|
let finalAssistantText = "";
|
||||||
|
child.stdout.on("data", (data) => {
|
||||||
|
stdoutChunks.push(data);
|
||||||
|
buffer += data.toString("utf-8");
|
||||||
|
const lines = buffer.split("\n");
|
||||||
|
buffer = lines.pop() || "";
|
||||||
|
for (const line of lines) {
|
||||||
|
if (!line.trim()) continue;
|
||||||
|
try {
|
||||||
|
const event = JSON.parse(line) as any;
|
||||||
|
if (event.type === "message_end" && event.message?.role === "assistant") {
|
||||||
|
const text = Array.isArray(event.message.content)
|
||||||
|
? event.message.content.find((part: any) => part.type === "text")?.text
|
||||||
|
: undefined;
|
||||||
|
if (typeof text === "string") finalAssistantText = text;
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// ignore malformed line noise
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
child.stderr.on("data", (data) => stderrChunks.push(data));
|
||||||
|
child.on("error", reject);
|
||||||
|
const onAbort = () => child.kill();
|
||||||
|
signal?.addEventListener("abort", onAbort, { once: true });
|
||||||
|
child.on("close", (exitCode) => {
|
||||||
|
signal?.removeEventListener("abort", onAbort);
|
||||||
|
if (signal?.aborted) {
|
||||||
|
reject(new Error("aborted"));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (buffer.trim()) {
|
||||||
|
try {
|
||||||
|
const event = JSON.parse(buffer) as any;
|
||||||
|
if (event.type === "message_end" && event.message?.role === "assistant") {
|
||||||
|
const text = Array.isArray(event.message.content)
|
||||||
|
? event.message.content.find((part: any) => part.type === "text")?.text
|
||||||
|
: undefined;
|
||||||
|
if (typeof text === "string") finalAssistantText = text;
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// ignore malformed trailing line
|
||||||
|
}
|
||||||
|
}
|
||||||
|
resolve({
|
||||||
|
stdout: finalAssistantText || Buffer.concat(stdoutChunks).toString("utf-8"),
|
||||||
|
stderr: Buffer.concat(stderrChunks).toString("utf-8"),
|
||||||
|
exitCode,
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
if (result.exitCode !== 0) {
|
if (result.exitCode !== 0) {
|
||||||
throw new Error(`Robot reviewer failed (${result.exitCode ?? "?"}): ${(result.stderr || result.stdout).trim()}`);
|
throw new Error(`Robot reviewer failed (${result.exitCode ?? "?"}): ${(result.stderr || result.stdout).trim()}`);
|
||||||
}
|
}
|
||||||
@@ -143,9 +180,9 @@ async function runAutomaticRobotReview(
|
|||||||
? parsed.missing_evidence.filter((item): item is string => typeof item === "string")
|
? parsed.missing_evidence.filter((item): item is string => typeof item === "string")
|
||||||
: [];
|
: [];
|
||||||
return {
|
return {
|
||||||
command: reviewerCommand,
|
command: commandLabel,
|
||||||
review: {
|
review: {
|
||||||
reviewer: typeof parsed.reviewer === "string" ? parsed.reviewer : reviewerCommand,
|
reviewer: typeof parsed.reviewer === "string" ? parsed.reviewer : commandLabel,
|
||||||
scope: typeof parsed.scope === "string" ? parsed.scope : "task evidence package",
|
scope: typeof parsed.scope === "string" ? parsed.scope : "task evidence package",
|
||||||
observations,
|
observations,
|
||||||
blind_spots: typeof parsed.blind_spots === "string" ? parsed.blind_spots : "not stated",
|
blind_spots: typeof parsed.blind_spots === "string" ? parsed.blind_spots : "not stated",
|
||||||
@@ -502,7 +539,6 @@ After this, task enters pending sign-off state — only completable via /lgtm <i
|
|||||||
falsification_test: Type.String({ description: "What you ran and what you got, presented so both you and the human can sanity-check it. State: what you ran (command/experiment/log check), the actual output or result, and why that result could not occur if a failure mode were real. Must be traceable: include file paths, log snippets, counts, or commit. The human should be able to verify without re-running anything." }),
|
falsification_test: Type.String({ description: "What you ran and what you got, presented so both you and the human can sanity-check it. State: what you ran (command/experiment/log check), the actual output or result, and why that result could not occur if a failure mode were real. Must be traceable: include file paths, log snippets, counts, or commit. The human should be able to verify without re-running anything." }),
|
||||||
verification_hints: Type.Array(Type.String(), { description: "Where to look and what to check. Descriptions of evidence locations, not bare file paths. E.g. 'lines 45-60 in src/loss.py show the gradient check' not 'src/loss.py'." }),
|
verification_hints: Type.Array(Type.String(), { description: "Where to look and what to check. Descriptions of evidence locations, not bare file paths. E.g. 'lines 45-60 in src/loss.py show the gradient check' not 'src/loss.py'." }),
|
||||||
remaining_uncertainty: Type.String({ description: "What's NOT tested, known limitations, edge cases deferred. If you can't articulate uncertainty, you haven't thought hard enough." }),
|
remaining_uncertainty: Type.String({ description: "What's NOT tested, known limitations, edge cases deferred. If you can't articulate uncertainty, you haven't thought hard enough." }),
|
||||||
run_robot_review: Type.Optional(Type.Boolean({ description: "If true, run the configured automatic robot reviewer immediately after storing evidence." })),
|
|
||||||
}),
|
}),
|
||||||
|
|
||||||
async execute(_toolCallId, params, signal, _onUpdate, _ctx) {
|
async execute(_toolCallId, params, signal, _onUpdate, _ctx) {
|
||||||
@@ -525,34 +561,33 @@ After this, task enters pending sign-off state — only completable via /lgtm <i
|
|||||||
},
|
},
|
||||||
});
|
});
|
||||||
let robotReviewNote = "";
|
let robotReviewNote = "";
|
||||||
const shouldRunRobotReview = params.run_robot_review ?? process.env.PI_LGTM_AUTO_ROBOT_REVIEW === "1";
|
const refreshedTask = store.get(params.taskId);
|
||||||
if (shouldRunRobotReview) {
|
if (!refreshedTask) return textResult(`Task #${params.taskId} not found after evidence update`);
|
||||||
const refreshedTask = store.get(params.taskId);
|
try {
|
||||||
if (!refreshedTask) return textResult(`Task #${params.taskId} not found after evidence update`);
|
const { review, command } = await runAutomaticRobotReview(refreshedTask, signal);
|
||||||
try {
|
store.update(params.taskId, {
|
||||||
const { review, command } = await runAutomaticRobotReview(refreshedTask, signal);
|
pending_approval: review.accepted,
|
||||||
store.update(params.taskId, {
|
metadata: appendRobotReviewMetadata(refreshedTask, review),
|
||||||
pending_approval: review.evidence_complete && review.evidence_convincing,
|
});
|
||||||
metadata: appendRobotReviewMetadata(refreshedTask, review),
|
robotReviewNote =
|
||||||
});
|
`\n\n### Automatic robot review\n` +
|
||||||
robotReviewNote =
|
`Reviewer: ${command}\n` +
|
||||||
`\n\n### Automatic robot review\n` +
|
`Accepted: ${review.accepted ? "yes" : "no"}\n` +
|
||||||
`Reviewer command: ${command}\n` +
|
`Evidence complete: ${review.evidence_complete ? "yes" : "no"}\n` +
|
||||||
`Evidence complete: ${review.evidence_complete ? "yes" : "no"}\n` +
|
`Evidence convincing: ${review.evidence_convincing ? "yes" : "no"}\n` +
|
||||||
`Evidence convincing: ${review.evidence_convincing ? "yes" : "no"}\n` +
|
`${review.observations.map(o => `- ${o}`).join("\n")}`;
|
||||||
`${review.observations.map(o => `- ${o}`).join("\n")}`;
|
if (review.missing_evidence.length > 0) {
|
||||||
if (review.missing_evidence.length > 0) {
|
robotReviewNote += `\nMissing evidence:\n${review.missing_evidence.map(item => `- ${item}`).join("\n")}`;
|
||||||
robotReviewNote += `\nMissing evidence:\n${review.missing_evidence.map(item => `- ${item}`).join("\n")}`;
|
|
||||||
}
|
|
||||||
if (!(review.evidence_complete && review.evidence_convincing)) {
|
|
||||||
robotReviewNote += `\nResult: human sign-off has been held back until the evidence is strengthened and reviewed again.`;
|
|
||||||
}
|
|
||||||
} catch (err: any) {
|
|
||||||
robotReviewNote =
|
|
||||||
`\n\n### Automatic robot review\n` +
|
|
||||||
`Reviewer failed: ${err.message}\n` +
|
|
||||||
`Task remains pending human sign-off; rerun with stronger evidence or call \`robot_review_run\` after fixing reviewer setup.`;
|
|
||||||
}
|
}
|
||||||
|
if (!review.accepted) {
|
||||||
|
robotReviewNote += `\nResult: human sign-off has been held back until the evidence is strengthened and reviewed again.`;
|
||||||
|
}
|
||||||
|
} catch (err: any) {
|
||||||
|
store.update(params.taskId, { pending_approval: false });
|
||||||
|
robotReviewNote =
|
||||||
|
`\n\n### Automatic robot review\n` +
|
||||||
|
`Reviewer failed: ${err.message}\n` +
|
||||||
|
`Human sign-off is blocked until the reviewer stage succeeds.`;
|
||||||
}
|
}
|
||||||
widget.update();
|
widget.update();
|
||||||
|
|
||||||
@@ -574,7 +609,7 @@ After this, task enters pending sign-off state — only completable via /lgtm <i
|
|||||||
uncertaintySection +
|
uncertaintySection +
|
||||||
robotReviewNote +
|
robotReviewNote +
|
||||||
`\n\n---\n` +
|
`\n\n---\n` +
|
||||||
`Task #${task.id} is now ${shouldRunRobotReview && !store.get(task.id)?.pending_approval ? "not yet ready for human sign-off" : `pending human sign-off via \`/lgtm ${task.id}\``}.\n\n` +
|
`Task #${task.id} is now ${store.get(task.id)?.pending_approval ? `pending human sign-off via \`/lgtm ${task.id}\`` : "not yet ready for human sign-off"}.\n\n` +
|
||||||
`**Self-check (non-blocking):** Look at this as the human will see it. ` +
|
`**Self-check (non-blocking):** Look at this as the human will see it. ` +
|
||||||
`Does the evidence directly address the done_criterion "${task.done_criterion}"? ` +
|
`Does the evidence directly address the done_criterion "${task.done_criterion}"? ` +
|
||||||
`Would a skeptical reviewer find this convincing, or would they immediately ask ` +
|
`Would a skeptical reviewer find this convincing, or would they immediately ask ` +
|
||||||
@@ -654,8 +689,7 @@ This does not complete the task. Human /lgtm remains the only completion path.`,
|
|||||||
label: "robot_review_run",
|
label: "robot_review_run",
|
||||||
description: `Run the configured automatic robot reviewer against the current task evidence.
|
description: `Run the configured automatic robot reviewer against the current task evidence.
|
||||||
|
|
||||||
Uses PI_LGTM_ROBOT_REVIEW_CMD if set, otherwise defaults to:
|
Runs the same Pi-native reviewer stage used automatically by \`lgtm_ask\`.
|
||||||
\`acpx --approve-reads --non-interactive-permissions deny opencode exec\`
|
|
||||||
|
|
||||||
This appends a new robot-review iteration. If the reviewer marks evidence incomplete or unconvincing, pending human sign-off is cleared until stronger evidence is submitted and reviewed again.`,
|
This appends a new robot-review iteration. If the reviewer marks evidence incomplete or unconvincing, pending human sign-off is cleared until stronger evidence is submitted and reviewed again.`,
|
||||||
parameters: Type.Object({
|
parameters: Type.Object({
|
||||||
@@ -671,7 +705,7 @@ This appends a new robot-review iteration. If the reviewer marks evidence incomp
|
|||||||
|
|
||||||
const { review, command } = await runAutomaticRobotReview(task, signal);
|
const { review, command } = await runAutomaticRobotReview(task, signal);
|
||||||
store.update(params.taskId, {
|
store.update(params.taskId, {
|
||||||
pending_approval: review.evidence_complete && review.evidence_convincing ? task.pending_approval : false,
|
pending_approval: review.accepted ? task.pending_approval : false,
|
||||||
metadata: appendRobotReviewMetadata(task, review),
|
metadata: appendRobotReviewMetadata(task, review),
|
||||||
});
|
});
|
||||||
widget.update();
|
widget.update();
|
||||||
|
|||||||
Reference in New Issue
Block a user